summaryrefslogtreecommitdiff
path: root/parser
diff options
context:
space:
mode:
authorYale AI Dept <ai@nebula.cs.yale.edu>1993-07-14 13:08:00 -0500
committerDuncan McGreggor <duncan.mcgreggor@rackspace.com>1993-07-14 13:08:00 -0500
commit4e987026148fe65c323afbc93cd560c07bf06b3f (patch)
tree26ae54177389edcbe453d25a00c38c2774e8b7d4 /parser
Import to github.
Diffstat (limited to 'parser')
-rw-r--r--parser/README1
-rw-r--r--parser/annotation-parser.scm184
-rw-r--r--parser/decl-parser.scm175
-rw-r--r--parser/exp-parser.scm230
-rw-r--r--parser/interface-parser.scm98
-rw-r--r--parser/lexer.scm651
-rw-r--r--parser/module-parser.scm312
-rw-r--r--parser/parser-debugger.scm81
-rw-r--r--parser/parser-driver.scm48
-rw-r--r--parser/parser-errors.scm74
-rw-r--r--parser/parser-globals.scm27
-rw-r--r--parser/parser-macros.scm327
-rw-r--r--parser/parser.scm54
-rw-r--r--parser/pattern-parser.scm220
-rw-r--r--parser/token.scm364
-rw-r--r--parser/type-parser.scm116
-rw-r--r--parser/typedecl-parser.scm163
17 files changed, 3125 insertions, 0 deletions
diff --git a/parser/README b/parser/README
new file mode 100644
index 0000000..a2facd7
--- /dev/null
+++ b/parser/README
@@ -0,0 +1 @@
+This directory contains the lexer and parser.
diff --git a/parser/annotation-parser.scm b/parser/annotation-parser.scm
new file mode 100644
index 0000000..4ae33cf
--- /dev/null
+++ b/parser/annotation-parser.scm
@@ -0,0 +1,184 @@
+
+(define *annotation-escape* '())
+
+(define (parse-annotations)
+ (let ((save-layout (dynamic *layout-stack*)))
+ (setf (dynamic *layout-stack*) '())
+ (advance-token)
+ (let/cc annotation-escape
+ (setf *annotation-escape* (lambda ()
+ (setf (dynamic *layout-stack*) save-layout)
+ (advance-to-annotation-end)
+ (funcall annotation-escape '())))
+ (let ((res (start-layout (function parse-annotation-list-1))))
+ (setf (dynamic *layout-stack*) save-layout)
+ (token-case
+ (end-annotation res)
+ (else (signal-annotation-error)))))))
+
+(define (parse-annotation-list-1 in-layout?)
+ (let ((kind (get-annotation-kind)))
+ (cond ((eq? kind 'decl)
+ (let ((d (parse-annotation-decl)))
+ (token-case
+ (\; (cons d (parse-annotation-list-1 in-layout?)))
+ (else (close-layout in-layout?)
+ (list d)))))
+ ((eq? kind 'value)
+ (let ((d (parse-annotation-value)))
+ (token-case
+ (\; (cons d (parse-annotation-list-1 in-layout?)))
+ (else (close-layout in-layout?)
+ (list d)))))
+ (else
+ (close-layout in-layout?)
+ '()))))
+
+(define (get-annotation-kind)
+ (token-case
+ ((no-advance end-annotation) 'end)
+ ((no-advance \() 'decl)
+ ((var con)
+ (let ((next (peek-1-type)))
+ (cond ((eq? next '|,|)
+ 'decl)
+ ((eq? next '|::|)
+ 'decl)
+ (else
+ 'value))))
+ (else 'error)))
+
+(define (parse-annotation-decl)
+ (let* ((names (parse-aname-list))
+ (decls (parse-aval-list)))
+ (make annotation-decl (names names) (annotations decls))))
+
+(define (parse-aname-list)
+ (let ((name 'foo))
+ (token-case
+ (var
+ (setf name (var->symbol)))
+ (con
+ (setf name (con->symbol)))
+ (else (signal-annotation-error)))
+ (token-case (\, (cons name (parse-aname-list)))
+ (|::| (list name))
+ (else (signal-annotation-error)))))
+
+
+(define (parse-aval-list)
+ (let ((ann (parse-annotation-value)))
+ (token-case (\, (cons ann (parse-aval-list)))
+ (else (list ann)))))
+
+(define (parse-annotation-value)
+ (token-case
+ (name (let* ((name (token->symbol))
+ (args (parse-annotation-args name)))
+ (make annotation-value (name name) (args args))))))
+
+(define (parse-annotation-args name)
+ (token-case
+ (\( (parse-annotation-args-1 name 0))
+ (else '())))
+
+;;; This routine can invoke special parsers for the arguments
+
+(define (parse-annotation-args-1 name i)
+ (let* ((argtype (get-annotation-arg-description name i))
+ (arg (parse-annotation-arg argtype)))
+ (token-case
+ (\) (list arg))
+ (\, (cons arg (parse-annotation-args-1 name (1+ i))))
+ (else (signal-annotation-error)))))
+
+(define (parse-annotation-arg type)
+ (cond ((eq? type 'string)
+ (token-case
+ ((string no-advance)
+ (let ((res (car *token-args*)))
+ (advance-token)
+ res))
+ (else (signal-annotation-error))))
+ ;; The following is for a datatype import/export. It is
+ ;; Type(Con1(strs),Con2(strs),...)
+ ((eq? type 'integer)
+ (token-case
+ ((integer no-advance) (token->integer))
+ (else (signal-annotation-error))))
+ ((eq? type 'constr-list)
+ (parse-annotation-constr-list))
+ (else
+ (signal-annotation-error))))
+
+(define (signal-annotation-error)
+ (parser-error/recoverable 'annotation-error "Error in annotation syntax")
+ (funcall *annotation-escape*))
+
+(define (parse-annotation-constr-list)
+ (token-case
+ (tycon (let ((type-name (token->symbol)))
+ (token-case (\( (let* ((args (parse-acl1))
+ (res (tuple type-name args)))
+ (token-case ; leave the ) to end the args
+ ((no-advance \)) (list res))
+ (\, (cons res (parse-annotation-constr-list)))
+ (else (signal-annotation-error)))))
+ (else (signal-annotation-error)))))
+ (else (signal-annotation-error))))
+
+(define (parse-acl1)
+ (token-case
+ (con (let ((con-name (con->symbol)))
+ (token-case (\( (let ((str-args (parse-string-list)))
+ (token-case
+ (\, (cons (tuple con-name str-args)
+ (parse-acl1)))
+ (\) (list (tuple con-name str-args)))
+ (else (signal-annotation-error)))))
+ (else (signal-annotation-error)))))
+ (else (signal-annotation-error))))
+
+(define (parse-string-list)
+ (token-case
+ ((string no-advance)
+ (let ((res (read-lisp-object (car *token-args*))))
+ (advance-token)
+ (token-case
+ (\) (list res))
+ (\, (cons res (parse-string-list)))
+ (else (signal-annotation-error)))))
+ (else (signal-annotation-error))))
+
+(define (advance-to-annotation-end)
+ (token-case
+ (eof '())
+ (end-annotation
+ (advance-token))
+ (else
+ (advance-token)
+ (advance-to-annotation-end))))
+
+(define *known-annotations* '(
+ (|LispName| string)
+ (|Prelude|)
+ (|Strictness| string)
+ (|Strict|)
+ (|NoConversion|)
+ (|Inline|)
+ (|STRICT|)
+ (|ImportLispType| constr-list)
+ (|ExportLispType| constr-list)
+ (|Complexity| integer)
+ ))
+
+(define (get-annotation-arg-description annotation i)
+ (let ((s (assq annotation *known-annotations*)))
+ (cond ((eq? s '#f)
+ (parser-error/recoverable 'unknown-annotation
+ "Annotation ~A is not defined in this system - ignored."
+ annotation)
+ 'unknown)
+ ((>= i (length s))
+ 'error)
+ (else (list-ref s (1+ i))))))
diff --git a/parser/decl-parser.scm b/parser/decl-parser.scm
new file mode 100644
index 0000000..bf924b5
--- /dev/null
+++ b/parser/decl-parser.scm
@@ -0,0 +1,175 @@
+;;; File: decl-parser Author: John
+
+(define (parse-decl)
+ (let ((decl-type (find-decl-type)))
+ (cond ((eq? decl-type 'signdecl)
+ (parse-signdecl))
+ ((eq? decl-type 'pat-or-op)
+ (parse-pat-or-op))
+ ((eq? decl-type 'fundef)
+ (parse-fundef))
+ ((eq? decl-type 'plus-def)
+ (parse-plus-def))
+ ((eq? decl-type 'annotation)
+ (make annotation-decls (annotations (parse-annotations)))))))
+
+;;; This looks at the first tokens in a definition to determine it's type.
+;;; var (:: | ,) - signdecl
+;;; var apat-start - function definition
+;;; (var | _) + - definition of infix +
+;;; anything alse - pattern binding or infix definition
+
+(define (find-decl-type)
+ (let* ((saved-excursion (save-scanner-state))
+ (decl-type
+ (token-case
+ (var (scan-var)
+ (token-case
+ ((\, \:\:) 'signdecl)
+ (apat-start 'fundef)
+ (+ 'plus-def)
+ (else 'pat-or-op)))
+ (_ (token-case
+ (+ 'plus-def)
+ (else 'pat-or-op)))
+ (begin-annotation 'annotation)
+ (else 'pat-or-op))))
+ (restore-excursion saved-excursion)
+ decl-type))
+
+;;; These are the different flavors of decl parsers
+
+(define (parse-signdecl)
+ (save-parser-context
+ (trace-parser signdecl
+ (let ((vars (parse-signdecl-vars)))
+ (require-token \:\:
+ (signal-missing-token "`::'" "signature declaration"))
+ (let ((signature (parse-signature)))
+ (make signdecl (vars vars) (signature signature)))))))
+
+(define (parse-signdecl-vars)
+ (token-case
+ (var (let ((var (var->ast)))
+ (token-case (\, (cons var (parse-signdecl-vars)))
+ (else (list var)))))
+ (else (signal-missing-token "<var>" "signature declaration"))))
+
+(define (parse-pat-or-op)
+ (trace-parser patdef
+ (let* ((line-number (capture-current-line))
+ (pat (parse-pat)))
+ (token-case
+ (varop (parse-infix-def pat line-number))
+ (else (add-rhs pat '() '#f line-number))))))
+
+(define (parse-infix-def pat1 line-number)
+ (let* ((op (make var-pat (var (varop->ast))))
+ (pat2 (parse-pat)))
+ (add-rhs op (list pat1 pat2) '#t line-number)))
+
+(define (parse-fundef)
+ (trace-parser fundef
+ (let* ((start-line (capture-current-line))
+ (fn (parse-apat)) ; must be a single variable
+ (args (parse-apat-list)))
+ (add-rhs fn args '#f start-line))))
+
+(define (parse-plus-def)
+ (trace-parser plus-def
+ (let* ((start-line (capture-current-line))
+ (var (parse-apat)))
+ (parse-infix-def var start-line))))
+
+(define (add-rhs pat args infix? start-line)
+ (let* ((rhs (parse-rhs))
+ (decls (parse-where-decls))
+ (single (make single-fun-def
+ (args args)
+ (rhs-list rhs)
+ (where-decls decls)
+ (infix? infix?)))
+ (valdef (make valdef (lhs pat) (definitions (list single)))))
+ (setf (ast-node-line-number single) start-line)
+ (setf (ast-node-line-number valdef) start-line)
+ valdef))
+
+(define (parse-rhs)
+ (token-case
+ (= (let ((rhs (parse-exp)))
+ (list (make guarded-rhs (guard (make omitted-guard)) (rhs rhs)))))
+ (\| (parse-guarded-rhs))
+ (else
+ (signal-missing-token "`=' or `|'" "rhs of valdef"))))
+
+(define (parse-guarded-rhs) ; assume just past |
+ (trace-parser guard
+ (let ((guard (parse-exp-i))) ; 1.2 change
+ (require-token = (signal-missing-token "`='" "guarded rhs"))
+ (let* ((exp (parse-exp))
+ (res (make guarded-rhs (guard guard) (rhs exp))))
+ (token-case
+ (\| (cons res (parse-guarded-rhs)))
+ (else (list res)))))))
+
+(define (parse-where-decls)
+ (token-case
+ (|where|
+ (parse-decl-list))
+ (else '())))
+
+(define (parse-decl-list)
+ (start-layout (function parse-decl-list-1)))
+
+(define (parse-decl-list-1 in-layout?)
+ (token-case
+ ((apat-start begin-annotation)
+ (let ((decl (parse-decl)))
+ (token-case
+ (\; (decl-cons decl (parse-decl-list-1 in-layout?)))
+ (else (close-layout in-layout?)
+ (list decl)))))
+ (else
+ (close-layout in-layout?)
+ '())))
+
+;;; This adds a new decl to a decl list. Successive decls for the same fn
+;;; are combined.
+
+(define (decl-cons decl decl-list)
+ (cond ((null? decl-list)
+ (list decl))
+ (else (nconc (combine-decls decl (car decl-list)) (cdr decl-list)))))
+
+(define (decl-push decl decl-stack)
+ (cond ((null? decl-stack)
+ (list decl))
+ (else (nconc (nreverse (combine-decls (car decl-stack) decl))
+ (cdr decl-stack)))))
+
+(define (combine-decls decl1 decl2)
+ (if (and (is-type? 'valdef decl1)
+ (is-type? 'valdef decl2)
+ (same-decl-var? (valdef-lhs decl1) (valdef-lhs decl2)))
+ (if (eqv? (length (single-fun-def-args (car (valdef-definitions decl1))))
+ (length (single-fun-def-args (car (valdef-definitions decl2)))))
+ (begin
+ (setf (valdef-definitions decl1)
+ (nconc (valdef-definitions decl1)
+ (valdef-definitions decl2)))
+ (list decl1))
+ (signal-multiple-definitions-arity-mismatch (valdef-lhs decl1)))
+ (list decl1 decl2)))
+
+(define (same-decl-var? pat1 pat2)
+ (and (is-type? 'var-pat pat1)
+ (is-type? 'var-pat pat2)
+ (eq? (var-ref-name (var-pat-var pat1))
+ (var-ref-name (var-pat-var pat2)))))
+
+(define (signal-multiple-definitions-arity-mismatch pat)
+ (parser-error 'multiple-definitions-arity-mismatch
+ "Definition of ~a does not match arity of previous definition."
+ pat))
+
+
diff --git a/parser/exp-parser.scm b/parser/exp-parser.scm
new file mode 100644
index 0000000..6f941ae
--- /dev/null
+++ b/parser/exp-parser.scm
@@ -0,0 +1,230 @@
+;;; File: expr-parser Author: John
+
+(define (parse-exp)
+ (trace-parser exp
+ (parse-exp-0)))
+
+(define (parse-exp-0) ;; This picks up expr type signatures
+ (let ((exp (parse-exp-i)))
+ (token-case
+ (\:\: (let ((signature (parse-signature)))
+ (make exp-sign (exp exp) (signature signature))))
+ (else exp))))
+
+(define (parse-exp-i) ;; This collects a list of exps for later prec parsing
+ (let ((exps (parse-infix-exps)))
+ (if (null? (cdr exps))
+ (car exps)
+ (make pp-exp-list (exps exps)))))
+
+(define (parse-infix-exps)
+ (token-case
+ (- (cons (make negate) (parse-infix-exps)))
+ (\\ (list (parse-lambda)))
+ (|let| (list (parse-let)))
+ (|if| (list (parse-if)))
+ (|case| (parse-possible-app (parse-case)))
+ (else (let ((aexp (parse-aexp)))
+ (parse-possible-app aexp)))))
+
+(define (parse-possible-app exp)
+ (token-case
+ (aexp-start
+ (let ((exp2 (parse-aexp)))
+ (parse-possible-app (make app (fn exp) (arg exp2)))))
+ (varop
+ (let ((varop (varop->ast)))
+ (if (eq-token? '\))
+ (list exp varop)
+ `(,exp ,varop ,@(parse-infix-exps)))))
+ (conop
+ (let ((conop (conop->ast)))
+ (if (eq-token? '\))
+ (list exp conop)
+ `(,exp ,conop ,@(parse-infix-exps)))))
+ (else (list exp))))
+
+(define (parse-lambda)
+ (trace-parser lambda
+ (save-parser-context
+ (let ((pats (parse-apat-list)))
+ (require-token -> (signal-missing-token "`->'" "lambda expression"))
+ (let ((exp (parse-exp)))
+ (make lambda (pats pats) (body exp)))))))
+
+(define (parse-let)
+ (trace-parser let
+ (save-parser-context
+ (let ((decls (parse-decl-list)))
+ (require-token |in| (signal-missing-token "`in'" "let expression"))
+ (let ((exp (parse-exp)))
+ (make let (decls decls) (body exp)))))))
+
+(define (parse-if)
+ (trace-parser if
+ (save-parser-context
+ (let ((test-exp (parse-exp)))
+ (require-token |then| (signal-missing-token "`then'" "if expression"))
+ (let ((then-exp (parse-exp)))
+ (require-token |else| (signal-missing-token "`else'" "if expression"))
+ (let ((else-exp (parse-exp)))
+ (make if (test-exp test-exp)
+ (then-exp then-exp)
+ (else-exp else-exp))))))))
+
+(define (parse-case)
+ (trace-parser case
+ (save-parser-context
+ (let ((exp (parse-exp)))
+ (require-token |of| (signal-missing-token "`of'" "case expression"))
+ (let ((alts (start-layout (function parse-alts))))
+ (make case (exp exp) (alts alts)))))))
+
+(define (parse-alts in-layout?)
+ (token-case
+ (pat-start
+ (let ((alt (parse-alt)))
+ (token-case
+ (\; (cons alt (parse-alts in-layout?)))
+ (else (close-layout in-layout?)
+ (list alt)))))
+ (else
+ (close-layout in-layout?)
+ '())))
+
+(define (parse-alt)
+ (trace-parser alt
+ (let* ((pat (parse-pat))
+ (rhs-list (token-case
+ (-> (let ((exp (parse-exp)))
+ (list (make guarded-rhs (guard (make omitted-guard))
+ (rhs exp)))))
+ (\| (parse-guarded-alt-rhs))
+ (else (signal-missing-token "`->' or `|'" "rhs of alt"))))
+ (decls (parse-where-decls)))
+ (make alt (pat pat) (rhs-list rhs-list) (where-decls decls)))))
+
+(define (parse-guarded-alt-rhs)
+ (let ((guard (parse-exp)))
+ (require-token -> (signal-missing-token "`->'" "alt"))
+ (let* ((exp (parse-exp))
+ (res (make guarded-rhs (guard guard) (rhs exp))))
+ (token-case
+ (\| (cons res (parse-guarded-alt-rhs)))
+ (else (list res))))))
+
+(define (parse-aexp)
+ (trace-parser aexp
+ (token-case
+ (var (save-parser-context (var->ast)))
+ (con (save-parser-context (con->ast)))
+ (literal (literal->ast))
+ (\(
+ (token-case
+ (\) (**con/def (core-symbol "UnitConstructor")))
+ ((no-advance -) (parse-exp-or-tuple))
+ (varop
+ (let ((varop (varop->ast)))
+ (make-right-section varop)))
+ (conop
+ (let ((conop (conop->ast)))
+ (make-right-section conop)))
+ (else
+ (parse-exp-or-tuple))))
+ (\[
+ (token-case
+ (\] (make list-exp (exps '())))
+ (else
+ (let ((exp (parse-exp)))
+ (token-case
+ (\, (let ((exp2 (parse-exp)))
+ (token-case
+ (\] (make list-exp (exps (list exp exp2))))
+ (\.\. (token-case
+ (\] (make sequence-then (from exp) (then exp2)))
+ (else
+ (let ((exp3 (parse-exp)))
+ (require-token
+ \]
+ (signal-missing-token
+ "`]'" "sequence expression"))
+ (make sequence-then-to (from exp) (then exp2)
+ (to exp3))))))
+ (else
+ (make list-exp
+ (exps `(,exp ,exp2 ,@(parse-exp-list))))))))
+ (\.\. (token-case
+ (\] (make sequence (from exp)))
+ (else
+ (let ((exp2 (parse-exp)))
+ (require-token
+ \]
+ (signal-missing-token "`]'" "sequence expression"))
+ (make sequence-to (from exp) (to exp2))))))
+ (\] (make list-exp (exps (list exp))))
+ (\| (parse-list-comp exp))
+ (else
+ (signal-invalid-syntax
+ "a list, sequence, or list comprehension")))))))
+ (else
+ (signal-invalid-syntax "an aexp")))))
+
+(define (make-right-section op)
+ (let ((exps (parse-infix-exps)))
+ (token-case
+ (\) (make pp-exp-list (exps (cons op exps))))
+ (else (signal-missing-token "`)'" "right section expression")))))
+
+(define (parse-exp-list)
+ (token-case
+ (\] '())
+ (\, (let ((exp (parse-exp))) (cons exp (parse-exp-list))))
+ (else (signal-missing-token "`]' or `,'" "list expression"))))
+
+(define (parse-exp-or-tuple)
+ (let ((exp (parse-exp)))
+ (token-case
+ (\) exp) ; Note - sections ending in an op are parsed elsewhere
+ (else (make-tuple-cons (cons exp (parse-tuple-exp)))))))
+
+(define (parse-tuple-exp)
+ (token-case
+ (\) '())
+ (\, (let ((exp (parse-exp))) (cons exp (parse-tuple-exp))))
+ (else (signal-missing-token
+ "`)' or `,'" "tuple or parenthesized expression"))))
+
+;;; List comprehensions
+
+;;; Assume | has been consumed
+
+(define (parse-list-comp exp)
+ (save-parser-context
+ (let ((quals (parse-qual-list)))
+ (make list-comp (exp exp) (quals quals)))))
+
+(define (parse-qual-list)
+ (let ((qual (parse-qual)))
+ (token-case
+ (\, (cons qual (parse-qual-list)))
+ (\] (list qual))
+ (else (signal-missing-token "`]' or `,'" "list comprehension")))))
+
+(define (parse-qual)
+ (trace-parser qual
+ (save-parser-context
+ (let* ((saved-excursion (save-scanner-state))
+ (is-gen? (and (scan-pat) (eq-token? '<-))))
+ (restore-excursion saved-excursion)
+ (cond (is-gen?
+ (let ((pat (parse-pat)))
+ (advance-token) ; past the <-
+ (let ((exp (parse-exp)))
+ (make qual-generator (pat pat) (exp exp)))))
+ (else
+ (let ((exp (parse-exp)))
+ (make qual-filter (exp exp)))))))))
+
+(define (make-tuple-cons args)
+ (let ((tuple-con (**con/def (tuple-constructor (length args)))))
+ (**app/l tuple-con args)))
diff --git a/parser/interface-parser.scm b/parser/interface-parser.scm
new file mode 100644
index 0000000..184fdb0
--- /dev/null
+++ b/parser/interface-parser.scm
@@ -0,0 +1,98 @@
+;;; This is the parser for interface files.
+
+(define (parse-tokens/interface tokens)
+ (init-token-stream tokens)
+ (let ((interface (token-case
+ (|interface| (parse-interface))
+ (|module| (interface-required-error))
+ (else (crud-in-interface-error)))))
+ (cons interface (parse-interface-list))))
+
+(define (interface-required-error)
+ (parser-error 'interface-required "Expecting `interface' keyword"))
+
+(define (crud-in-interface-error)
+ (parser-error 'unexpected-interface-crud "Junk after interface"))
+
+(define (parse-interface-list)
+ (token-case
+ (|interface|
+ (let ((interface (parse-interface)))
+ (cons interface (parse-interface-list))))
+ (|module| (interface-required-error))
+ (eof '())
+ (else (crud-in-interface-error))))
+
+(define (parse-interface)
+ (token-case
+ (modid
+ (let ((module-name (token->symbol)))
+ (require-token |where|
+ (signal-missing-token "`where'" "interface definition"))
+ (let ((mod-ast (make module (name module-name)
+ (type 'interface)
+ (exports '()))))
+ (start-layout (lambda (in-layout?)
+ (parse-interface-decls mod-ast in-layout? 'import))))))))
+
+(define (parse-interface-decls mod-ast in-layout? state)
+ (token-case
+ (|import| (let ((import (parse-import)))
+ (when (not (eq? (import-decl-mode import) 'by-name))
+ (phase-error 'illegal-import
+ "Imports in interfaces must specify specific entities"))
+ (if (eq? state 'import)
+ (push-decl-list import (module-imports mod-ast))
+ (signal-misplaced-import)))
+ (terminate-interface-topdecl mod-ast in-layout? state))
+ (|infix| (terminate-interface-topdecl mod-ast in-layout?
+ (parse-fixity 'n mod-ast state)))
+ (|infixl| (terminate-interface-topdecl mod-ast in-layout?
+ (parse-fixity 'l mod-ast state)))
+ (|infixr| (terminate-interface-topdecl mod-ast in-layout?
+ (parse-fixity 'r mod-ast state)))
+ (|data| (let ((data-decl (parse-type-decl '#t)))
+ (push-decl-list data-decl (module-algdatas mod-ast)))
+ (terminate-interface-topdecl mod-ast in-layout? 'topdecl))
+ (|type| (let ((synonym-decl (parse-synonym-decl)))
+ (push-decl-list synonym-decl (module-synonyms mod-ast)))
+ (terminate-interface-topdecl mod-ast in-layout? 'topdecl))
+ (|class| (let ((class-decl (parse-class-decl)))
+ (check-class-default-decls class-decl)
+ (push-decl-list class-decl (module-classes mod-ast)))
+ (terminate-interface-topdecl mod-ast in-layout? 'topdecl))
+ (|instance| (let ((instance-decl (parse-instance-decl '#t)))
+ (push-decl-list instance-decl (module-instances mod-ast)))
+ (terminate-interface-topdecl mod-ast in-layout? 'topdecl))
+ (var (let ((decl (parse-signdecl)))
+ (setf (module-decls mod-ast)
+ (decl-push decl (module-decls mod-ast))))
+ (terminate-interface-topdecl mod-ast in-layout? 'topdecl))
+ ((begin-annotation no-advance)
+ (let ((annotations (parse-annotations)))
+ (setf (module-annotations mod-ast)
+ (append (module-annotations mod-ast) annotations)))
+ (terminate-interface-topdecl mod-ast in-layout? state))
+ (else
+ (maybe-end-interface mod-ast in-layout?))))
+
+(define (maybe-end-interface mod-ast in-layout?)
+ (cond ((or (eq-token? '|interface|) (eq-token? 'eof) (eq-token? '\})
+ (eq-token? '$\}))
+ (close-layout in-layout?)
+ (wrapup-module mod-ast)
+ mod-ast)
+ (else
+ (signal-invalid-syntax "a topdecl"))))
+
+(define (terminate-interface-topdecl mod-ast in-layout? state)
+ (token-case
+ (\; (parse-interface-decls mod-ast in-layout? state))
+ (else (maybe-end-interface mod-ast in-layout?))))
+
+(define (check-class-default-decls class-decl)
+ (dolist (d (class-decl-decls class-decl))
+ (when (valdef? d)
+ (remember-context d
+ (recoverable-error 'no-defaults-in-interface
+ "Class defaults should not be put in interface files")))))
diff --git a/parser/lexer.scm b/parser/lexer.scm
new file mode 100644
index 0000000..7230613
--- /dev/null
+++ b/parser/lexer.scm
@@ -0,0 +1,651 @@
+;;; File: parser/lexer Author: John
+
+;;; token data structure: a list with the token type in the
+;;; car and other information in the rest of the list. Symbols
+;;; designate the token type.
+
+;;; Reserved tokens use the name as the type and have no args.
+;;; Reserved tokens:
+;;; case class data default deriving else hiding if import in infix
+;;; infixl infixr instance interface let module of renaming then to
+;;; type where .. :: => = @ \ | ~ <- -> `
+;;; Other tokens:
+;;; (file string)
+;;; (newline line indent-column)
+;;; (conid string)
+;;; (varid string)
+;;; (consym string)
+;;; (varsym string)
+;;; (comment string) ;;; not used at the moment
+;;; (integer integer)
+;;; (float integer fraction exponent)
+;;; (string string)
+;;; (eof)
+
+
+;;; *** All of the stuff for lexing character and string literals is
+;;; *** broken because it assumes that the host Lisp uses the ASCII
+;;; *** encoding for characters and supports at least 255 characters.
+;;; *** I have marked the specific places in the code where these
+;;; *** assumptions are made, but fixing the problem will probably
+;;; *** require more drastic changes anyway -- such as using integers
+;;; *** instead of characters and vectors of integers instead of characters
+;;; *** throughout the compiler.
+
+(define *max-char* 255) ; highest char-code allowed.
+
+;;; This defines the long names of the control chars. Note that some of
+;;; this duplicates the table above & the reader.
+
+(define *control-char-names* '(
+ ("NUL" . 0) ("SOH" . 1) ("STX" . 2) ("ETX" . 3)
+ ("EOT" . 4) ("ENQ" . 5) ("ACK" . 6) ("BEL" . 7)
+ ("BS" . 8) ("HT" . 9) ("LF" . 10) ("VT" . 11)
+ ("FF" . 12) ("CR" . 13) ("SO" . 14) ("SI" . 15)
+ ("DLE" . 16) ("DC1" . 17) ("DC2" . 18) ("DC3" . 19)
+ ("DC4" . 20) ("NAK" . 21) ("SYN" . 22) ("ETB" . 23)
+ ("CAN" . 24) ("EM" . 25) ("SUB" . 26) ("ESC" . 27)
+ ("FS" . 28) ("GS" . 29) ("RS" . 30) ("US" . 31)
+ ("SP" . 32) ("DEL" . 127)))
+
+;;; This defines the short names for a few control chars. This
+;;; is keyed off the previous table
+
+(define *short-control-char-names* '(
+ (#\a . "BEL") (#\b . "BS") (#\f . "FF") (#\n . "LF")
+ (#\r . "CR") (#\t . "HT") (#\v . "VT")))
+
+;;; This is used in the ^X construct. Assume that ^X = code for ^A + X-A
+;;; *** This is an invalid assumption.
+
+(define *control-A* 1)
+
+;;; This function is the interface between the lexer and the rest
+;;; of the system. Note that the `file' reported in error messages
+;;; must be bound in an outer context.
+
+
+;;; *** I think this function should be binding these variables and not
+;;; *** just assigning them.
+
+(define (lex-port port literate?)
+ (setf *lex-literate?* literate?)
+ (setf *current-line* 1)
+ (setf *current-col* 0)
+ (setf *on-new-line?* '#t)
+ (setf *save-col?* '#f)
+ (setf *port* port)
+ (setf *tokens* '())
+ (setf *char* (read-char *port*))
+ (setf *peek-char* (read-char *port*))
+ (when (eof-object? *char*)
+ (setf *char* '#\space))
+ (when (eof-object? *peek-char*)
+ (setf *peek-char* '#\space))
+ (setf *at-eof/p?* '#f)
+ (setf *at-eof?* '#f)
+ (when *lex-literate?*
+ (process-literate-comments '#t))
+ (parse-till-eof)
+ (nreverse *tokens*))
+
+(define (parse-till-eof)
+ (cond (*at-eof?*
+ (emit-token 'eof)
+ '())
+ (else
+ (lex-one-token)
+ (parse-till-eof))))
+
+;;; There is an assumption that the scanner never peeks beyond a newline.
+;;; In literate mode, this may reveal the wrong thing.
+
+(define (advance-char)
+ (if (and *lex-literate?* (eqv? *char* #\newline))
+ (process-literate-comments '#f)
+ (advance-char-1)))
+
+(define (advance-char-1)
+ (cond ((eqv? *char* #\newline)
+ (setf *on-new-line?* '#t)
+ (incf (the fixnum *current-line*))
+ (setf *current-col* 0))
+ ((eqv? *char* #\tab)
+ (incf (the fixnum *current-col*) (- 8 (modulo *current-col* 8))))
+ (else
+ (incf (the fixnum *current-col*))))
+ (setf *char* *peek-char*)
+ (setf *at-eof?* *at-eof/p?*)
+ (setf *peek-char* (read-char *port*))
+ (when (eof-object? *peek-char*)
+ (setf *at-eof/p?* '#t)
+ (setf *peek-char* '#\space))
+ *char*)
+
+(define (peek-char-2)
+ (let ((ch (peek-char *port*)))
+ (if (eof-object? ch)
+ '#\space
+ ch)))
+
+(define (lex-one-token)
+ (setf *start-line* *current-line*) ; capture the loc at the start of the token
+ (setf *start-col* *current-col*)
+ (unless *at-eof?*
+ (char-case *char*
+ (whitechar
+ (advance-char)
+ (lex-one-token))
+ (#\- (char-case *peek-char*
+ (#\- (lex-comment))
+ (#\> (advance-char)
+ (advance-char)
+ (emit-token '\-\>))
+ (#\} (signal-missing-begin-comment)
+ (advance-char)
+ (advance-char)
+ (lex-one-token))
+ (else
+ (lex-varsym))))
+ (#\{ (cond ((char=? *peek-char* '#\-)
+ (advance-char)
+ (advance-char)
+ (cond ((char=? *char* '#\#)
+ (advance-char)
+ (emit-token 'begin-annotation))
+ (else
+ (lex-ncomment)
+ (lex-one-token))))
+ (else
+ (advance-char)
+ (emit-token '\{ ))))
+ (small (lex-varid))
+ (large (lex-conid))
+ (#\( (advance-char)
+ (emit-token '\())
+ (#\: (lex-consym))
+ (#\` (advance-char)
+ (emit-token '\`))
+ ((symbol presymbol) (lex-varsym))
+ (digit (lex-numeric))
+ (#\' (lex-char))
+ (#\" (lex-string))
+ (#\) (advance-char)
+ (emit-token '\)))
+ (#\, (advance-char)
+ (emit-token '\,))
+ (#\; (advance-char)
+ (emit-token '\;))
+ (#\[ (advance-char)
+ (emit-token '\[))
+ (#\] (advance-char)
+ (emit-token '\]))
+ (#\_ (advance-char)
+ (emit-token '\_))
+ (#\} (advance-char)
+ (emit-token '\}))
+ (else
+ (signal-invalid-character *char*)
+ (advance-char)
+ (lex-one-token)))))
+
+(define (signal-missing-begin-comment)
+ (lexer-error 'missing-begin-comment
+ "`-}' appears outside of a nested comment."))
+
+(define (signal-invalid-character ch)
+ (lexer-error 'invalid-character
+ "Invalid character `~a' appears in source program." ch))
+
+(define (advance-past-white)
+ (unless *at-eof?*
+ (char-case *char*
+ (whitechar
+ (advance-char)
+ (advance-past-white))
+ (else
+ '()))))
+
+(define (process-literate-comments at-start?)
+ (unless at-start? (advance-char-1))
+ (let ((l (classify-line)))
+ (cond ((or *at-eof?* (eq? l 'program))
+ '())
+ ((eq? l 'blank)
+ (skip-literate-comment '#t))
+ (else
+ (when (not at-start?)
+ (lexer-error 'blank-line-needed
+ "Literate comments must be preceeded by a blank line"))
+ (skip-literate-comment '#f)))))
+
+(define (skip-literate-comment prev-blank)
+ (skip-past-line)
+ (let ((l (classify-line)))
+ (cond (*at-eof?*
+ '())
+ ((eq? l 'comment)
+ (skip-literate-comment '#f))
+ ((eq? l 'blank)
+ (skip-literate-comment '#t))
+ (else
+ (when (not prev-blank)
+ (lexer-error 'blank-line-needed
+ "Literate comments must be followed by a blank line"))))))
+
+(define (classify-line)
+ (if *at-eof?*
+ 'blank
+ (char-case *char*
+ (#\>
+ (advance-char-1)
+ 'program)
+ (#\newline 'blank)
+ (whitechar
+ (classify-line-1))
+ (else 'comment))))
+
+(define (classify-line-1)
+ (advance-char-1)
+ (char-case *char*
+ (#\newline 'blank)
+ (whitechar (classify-line-1))
+ (else 'comment)))
+
+(define (skip-past-line)
+ (when (not *at-eof?*)
+ (char-case *char*
+ (#\newline
+ (advance-char-1))
+ (else
+ (advance-char-1)
+ (skip-past-line)))))
+
+(define (lex-comment) ;; a -- style comment
+ (advance-char)
+ (cond (*at-eof?* (lexer-eof-in-comment *current-line*))
+ ((char=? *char* #\newline)
+ (lex-one-token))
+ (else
+ (lex-comment))))
+
+(define (lexer-eof-in-comment start-line)
+ (signal-eof-in-comment start-line)
+ (lex-one-token)) ; will return the eof token
+
+(define (signal-eof-in-comment start-line)
+ (lexer-error 'eof-in-comment
+ "End of file in comment starting at line ~A." start-line))
+
+;;; Here *char* and *peek-char* are the first two chars on a line.
+
+(define (scan-symbol)
+ (scan-list-of (symbol #\:)))
+
+(define (scan-var-con)
+ (scan-list-of (large small digit #\' #\_)))
+
+(define (lex-ncomment)
+ (lex-ncomment-1 *current-line*))
+
+(define (lex-ncomment-1 start-line)
+ (if *at-eof?*
+ (lexer-eof-in-comment start-line)
+ (char-case *char*
+ (#\- (cond ((char=? *peek-char* #\})
+ (advance-char)
+ (advance-char))
+ (else
+ (advance-char)
+ (lex-ncomment-1 start-line))))
+ (#\{ (cond ((char=? *peek-char* #\-)
+ (advance-char)
+ (advance-char)
+ (lex-ncomment)
+ (lex-ncomment-1 start-line))
+ (else
+ (advance-char)
+ (lex-ncomment-1 start-line))))
+ (else
+ (advance-char)
+ (lex-ncomment-1 start-line)))))
+
+(define (lex-varid)
+ (let ((sym (scan-var-con)))
+ (parse-reserved sym varid
+ "case" "class"
+ "data" "default" "deriving"
+ "else"
+ "hiding"
+ "if" "import" "in" "infix" "infixl" "infixr" "instance" "interface"
+ "let"
+ "module"
+ "of"
+ "renaming"
+ "then" "to" "type"
+ "where")))
+
+(define (lex-conid)
+ (let ((sym (scan-var-con)))
+ (emit-token/string 'conid sym)))
+
+(define (lex-consym)
+ (let ((sym (scan-symbol)))
+ (cond ((string=/list? (cdr sym) ":")
+ (emit-token '\:\:))
+ (else
+ (emit-token/string 'consym sym)))))
+
+(define (lex-varsym)
+ (let ((sym (scan-symbol)))
+ (cond ((and (string=/list? sym "<") (char=? *char* #\-))
+ (advance-char)
+ (emit-token '\<\-))
+ ((and (string=/list? sym "#")
+ (char=? *char* #\-)
+ (char=? *peek-char* #\}))
+ (advance-char)
+ (advance-char)
+ (emit-token 'end-annotation))
+ (else
+ (parse-reserved sym varsym
+ ".."
+ "=>" "="
+ "@"
+ "\\"
+ "|"
+ "~")))))
+
+(define (lex-integer radix)
+ (lex-integer-1 radix 0))
+
+(define (lex-integer-1 radix psum)
+ (declare (type fixnum radix)
+ (type integer psum))
+ (let ((d (char->digit *char* radix)))
+ (if d
+ (begin
+ (advance-char)
+ (lex-integer-1 radix (+ (* psum radix) (the fixnum d))))
+ psum)))
+
+(define (lex-fraction int-part denominator)
+ (declare (type integer int-part denominator))
+ (let ((d (char->digit *char* 10)))
+ (if d
+ (begin
+ (advance-char)
+ (lex-fraction
+ (+ (* int-part 10) (the fixnum d)) (* denominator 10)))
+ (values int-part denominator))))
+
+(define (lex-numeric)
+ (let ((int-part (lex-integer 10)))
+ (if (and (char=? *char* #\.)
+ (char->digit *peek-char* 10))
+ (lex-float int-part)
+ (emit-token 'integer int-part))))
+
+(define (lex-float int-part)
+ (advance-char)
+ (multiple-value-bind (numerator denominator) (lex-fraction int-part 1)
+ (let ((no-exponent
+ (lambda () (emit-token 'float numerator denominator 0))))
+ (char-case *char*
+ (exponent
+ (char-case *peek-char*
+ (digit
+ (advance-char)
+ (lex-float/exp numerator denominator 1))
+ ((#\+ #\-)
+ (cond ((char->digit (peek-char-2) 10)
+ (let ((sign (if (char=? *peek-char* '#\+) 1 -1)))
+ (advance-char)
+ (advance-char)
+ (lex-float/exp numerator denominator sign)))
+ (else
+ (funcall no-exponent))))
+ (else
+ (funcall no-exponent))))
+ (else
+ (emit-token 'float numerator denominator 0))))))
+
+(define (lex-float/exp numerator denominator sign)
+ (let ((exponent (lex-integer 10)))
+ (emit-token 'float numerator denominator (* sign exponent))))
+
+(define (lex-char)
+ (advance-char)
+ (let ((c
+ (char-case *char*
+ (#\' (signal-null-character)
+ '#\?)
+ (#\\ (lex-escaped-char '#f))
+ ((#\space graphic)
+ (let ((ch *char*))
+ (advance-char)
+ ch))
+ (else
+ (signal-bad-character-constant *char*)
+ (advance-char)
+ `#\?))))
+ (cond ((char=? *char* '#\')
+ (advance-char)
+ (emit-token 'char c))
+ (else
+ (signal-missing-char-quote)
+ (skip-to-quote-or-eol)))))
+
+(define (signal-null-character)
+ (lexer-error 'null-character
+ "Null character '' is illegal - use '\\'' for a quote."))
+
+(define (signal-bad-character-constant ch)
+ (lexer-error 'bad-character-constant
+ "The character `~a' may not appear in a character literal." ch))
+
+(define (signal-missing-char-quote)
+ (lexer-error 'missing-char-quote
+ "Character constant has more than one character."))
+
+
+(define (skip-to-quote-or-eol)
+ (if *at-eof?*
+ (lex-one-token)
+ (char-case *char*
+ (#\' (advance-char)
+ (lex-one-token))
+ (#\newline (advance-char)
+ (lex-one-token))
+ (else
+ (advance-char)
+ (skip-to-quote-or-eol)))))
+
+(define (lex-string)
+ (advance-char)
+ (emit-token 'string (list->string (gather-string-chars))))
+
+(define (gather-string-chars)
+ (char-case *char*
+ (#\\
+ (let ((ch (lex-escaped-char '#t)))
+ (if (eq? ch 'null)
+ (gather-string-chars)
+ (cons ch (gather-string-chars)))))
+ (#\"
+ (advance-char)
+ '())
+ ((graphic #\space)
+ (let ((ch *char*))
+ (advance-char)
+ (cons ch (gather-string-chars))))
+ (#\newline
+ (signal-missing-string-quote)
+ '())
+ (else
+ (signal-bad-string-constant *char*)
+ (advance-char)
+ (gather-string-chars))))
+
+(define (signal-missing-string-quote)
+ (lexer-error 'missing-string-quote
+ "String continued over end of line."))
+
+(define (signal-bad-string-constant ch)
+ (lexer-error 'bad-string-constant
+ "The character `~a' may not appear in a string literal." ch))
+
+
+(define (convert-stupid-control-character-names)
+ (let ((c1 *char*)
+ (c2 *peek-char*))
+ (advance-char)
+ (advance-char)
+ (let ((s2 (string c1 c2))
+ (s3 (string c1 c2 *char*)))
+ (let ((srch3 (assoc s3 *control-char-names*)))
+ (cond (srch3
+ (advance-char)
+ (integer->char (cdr srch3)))
+ (else
+ (let ((srch2 (assoc s2 *control-char-names*)))
+ (cond (srch2
+ (integer->char (cdr srch2)))
+ (else
+ (signal-bad-control-char s3)
+ `#\?)))))))))
+
+(define (signal-bad-control-char name)
+ (lexer-error 'invalid-control-char
+ "`~a' is not a recognized control character name." name))
+
+
+(define (lex-escaped-char in-string?)
+ (advance-char)
+ (char-case *char*
+ ((#\a #\b #\f #\n #\r #\t #\v)
+ (let* ((ccode (cdr (assoc *char* *short-control-char-names*)))
+ (ccode1 (cdr (assoc ccode *control-char-names*))))
+ (advance-char)
+ (integer->char ccode1)))
+ ((#\\ #\' #\")
+ (let ((ch *char*))
+ (advance-char)
+ ch))
+ (#\&
+ (advance-char)
+ (cond (in-string? 'null)
+ (else
+ (signal-bad-&-escape)
+ '#\?)))
+ (#\^
+ ;; *** This code is problematic because it assumes
+ ;; *** (1) that you can do the arithmetic on the character codes
+ ;; *** (2) that the resulting integer can actually be coerced to
+ ;; *** the right character object in the host Lisp.
+ (advance-char)
+ (char-case *char*
+ ((large #\@ #\[ #\\ #\] #\^ #\_)
+ (let ((code (+ (- (char->integer *char*)
+ (char->integer '#\A))
+ *control-A*)))
+ (advance-char)
+ (integer->char code)))
+ (else
+ (signal-bad-^-escape *char*)
+ '#\?)))
+ (large
+ (convert-stupid-control-character-names))
+ (digit
+ (convert-num-to-char (lex-integer 10)))
+ (#\o
+ (advance-char)
+ (cond ((char->digit *char* 8)
+ (convert-num-to-char (lex-integer 8)))
+ (else
+ (signal-missing-octal-digits)
+ '#\?)))
+ (#\x
+ (advance-char)
+ (cond ((char->digit *char* 16)
+ (convert-num-to-char (lex-integer 16)))
+ (else
+ (signal-missing-hex-digits)
+ `#\?)))
+ (whitechar
+ (cond (in-string?
+ (lex-gap))
+ (else
+ (signal-bad-gap)
+ `#\?)))
+ (else
+ (signal-bad-escape *char*)
+ `#\?)))
+
+(define (signal-bad-&-escape)
+ (lexer-error 'bad-&-escape
+ "The escape `\\&' is not allowed inside a character literal."))
+
+(define (signal-bad-^-escape ch)
+ (lexer-error 'bad-^-escape
+ "The escape `\\^~a' is not recognized." ch))
+
+(define (signal-missing-octal-digits)
+ (lexer-error 'missing-octal-digits
+ "No digits provided for `\\o' escape."))
+
+(define (signal-missing-hex-digits)
+ (lexer-error 'missing-hex-digits
+ "No digits provided for `\\x' escape."))
+
+(define (signal-bad-gap)
+ (lexer-error 'invalid-gap
+ "Gaps are not allowed inside character literals."))
+
+(define (signal-bad-escape ch)
+ (lexer-error 'bad-escape
+ "The escape `\\~a' is not recognized." ch))
+
+
+
+;;; *** This code is problematic because it assumes that integers
+;;; *** between 0 and 255 map on to characters with the corresponding
+;;; *** ASCII encoding in the host Lisp, and that the host Lisp actually
+;;; *** supports 255 characters.
+
+(define (convert-num-to-char num)
+ (cond ((and (>= num 0) (>= *max-char* num))
+ (integer->char num))
+ (else
+ (signal-char-out-of-range num)
+ '#\?)))
+
+(define (signal-char-out-of-range num)
+ (lexer-error 'char-out-of-range
+ "There is no character corresponding to code ~s." num))
+
+
+(define (lex-gap)
+ (cond (*at-eof?*
+ (signal-eof-in-gap)
+ 'null)
+ (else
+ (char-case *char*
+ (whitechar
+ (advance-char)
+ (lex-gap))
+ (#\\
+ (advance-char)
+ 'null)
+ (else
+ (signal-missing-gap)
+ 'null)))))
+
+
+(define (signal-eof-in-gap)
+ (lexer-error 'eof-in-gap
+ "End of file encountered inside gap."))
+
+(define (signal-missing-gap)
+ (lexer-error 'missing-gap
+ "Missing gap delimiter, or junk inside gap."))
diff --git a/parser/module-parser.scm b/parser/module-parser.scm
new file mode 100644
index 0000000..2ffa391
--- /dev/null
+++ b/parser/module-parser.scm
@@ -0,0 +1,312 @@
+;;; File: module-parser Author: John
+
+;;; This is for using the parser to parse strings.
+
+(define (parse-from-string string parse-proc filename)
+ (dynamic-let ((*current-file* filename))
+ (call-with-input-string string
+ (lambda (port)
+ (let ((tokens (lex-port port '#f)))
+ (init-token-stream tokens)
+ (let ((res (funcall parse-proc)))
+ (if (not (eq-token? 'eof))
+ (signal-leftover-tokens)
+ res)))))))
+
+(define (signal-leftover-tokens)
+ (fatal-error 'leftover-tokens
+ "Leftover tokens after parsing."))
+
+
+;;; This file deals with the basic structure of a module. It also adds
+;;; the `module Main where' required by abbreviated modules.
+
+(define (parse-tokens tokens)
+ (init-token-stream tokens)
+ (let ((mod (token-case
+ (|module| (parse-module))
+ (else (parse-modules/named '|Main| '())))))
+ (cons mod (parse-module-list))))
+
+(define (parse-module)
+ (token-case
+ (modid (let* ((mod-name (token->symbol))
+ (exports (parse-exports)))
+ (require-token
+ |where|
+ (signal-missing-token "`where'" "module definition"))
+ (parse-modules/named mod-name exports)))
+ (else (signal-missing-token "<modid>" "module definition"))))
+
+(define (parse-module-list)
+ (token-case
+ (|module|
+ (let ((mod (parse-module)))
+ (cons mod (parse-module-list))))
+ (eof '())
+ (else (signal-missing-module))))
+
+(define (signal-missing-module)
+ (parser-error 'missing-module
+ "Missing `module', or leftover junk after module definition."))
+
+(define (parse-exports)
+ (token-case
+ (\( (parse-export-list))
+ (else '())))
+
+(define (parse-export-list)
+ (let ((entity (parse-entity 'export)))
+ (token-case
+ (\) (list entity))
+ (\, (cons entity (parse-export-list)))
+ (else (signal-missing-token "`)' or ','" "export list")))))
+
+(define (parse-modules/named mod-name exports)
+ (trace-parser module
+ (let ((mod-ast (make module
+ (name mod-name)
+ (type 'standard)
+ (exports exports)
+ (default *standard-module-default*))))
+ (start-layout (lambda (in-layout?)
+ (parse-module-decls mod-ast in-layout? 'import))))))
+
+;;; The mod-ast fields are kept in non-reversed order by appending
+;;; each decl to the end of the appropriate list. This loses for
+;;; value decls, so these are in reversed order!!
+
+(define (parse-module-decls mod-ast in-layout? state)
+ (token-case
+ (|import| (let ((import (parse-import)))
+ (if (eq? state 'import)
+ (push-decl-list import (module-imports mod-ast))
+ (signal-misplaced-import)))
+ (terminate-topdecl mod-ast in-layout? state))
+ (|infix| (terminate-topdecl mod-ast in-layout?
+ (parse-fixity 'n mod-ast state)))
+ (|infixl| (terminate-topdecl mod-ast in-layout?
+ (parse-fixity 'l mod-ast state)))
+ (|infixr| (terminate-topdecl mod-ast in-layout?
+ (parse-fixity 'r mod-ast state)))
+ (|data| (let ((data-decl (parse-type-decl '#f)))
+ (push-decl-list data-decl (module-algdatas mod-ast)))
+ (terminate-topdecl mod-ast in-layout? 'topdecl))
+ (|type| (let ((synonym-decl (parse-synonym-decl)))
+ (push-decl-list synonym-decl (module-synonyms mod-ast)))
+ (terminate-topdecl mod-ast in-layout? 'topdecl))
+ (|class| (let ((class-decl (parse-class-decl)))
+ (push-decl-list class-decl (module-classes mod-ast)))
+ (terminate-topdecl mod-ast in-layout? 'topdecl))
+ (|instance| (let ((instance-decl (parse-instance-decl '#f)))
+ (push-decl-list instance-decl (module-instances mod-ast)))
+ (terminate-topdecl mod-ast in-layout? 'topdecl))
+ (|default| (let ((types
+ (token-case
+ (\( (token-case (\) '())
+ (else (parse-type-list))))
+ (else (list (parse-type))))))
+ (if (eq? (module-default mod-ast) *standard-module-default*)
+ (setf (module-default mod-ast)
+ (make default-decl (types types)))
+ (signal-multiple-defaults)))
+ (terminate-topdecl mod-ast in-layout? 'topdecl))
+ ((begin-annotation no-advance)
+ (let ((annotations (parse-annotations)))
+ (setf (module-annotations mod-ast)
+ (append (module-annotations mod-ast) annotations)))
+ (terminate-topdecl mod-ast in-layout? state))
+ (pat-start (let ((decl (parse-decl)))
+ (setf (module-decls mod-ast)
+ (decl-push decl (module-decls mod-ast))))
+ (terminate-topdecl mod-ast in-layout? 'topdecl))
+ (else
+ (maybe-end-module mod-ast in-layout? state))))
+
+(define (signal-misplaced-import)
+ (parser-error 'misplaced-import
+ "The import declaration is misplaced."))
+
+(define (signal-multiple-defaults)
+ (parser-error 'multiple-defaults
+ "There are multiple default declarations."))
+
+(define (terminate-topdecl mod-ast in-layout? state)
+ (token-case
+ (\; (parse-module-decls mod-ast in-layout? state))
+ (else (maybe-end-module mod-ast in-layout? state))))
+
+(define (maybe-end-module mod-ast in-layout? state)
+ (declare (ignore state))
+ (cond ((or (eq-token? '|module|) (eq-token? 'eof) (eq-token? '\})
+ (eq-token? '$\}))
+ (close-layout in-layout?)
+ (wrapup-module mod-ast)
+ mod-ast)
+ (else
+ (signal-invalid-syntax "a topdecl"))))
+
+(define (wrapup-module mod-ast)
+ (setf (module-decls mod-ast)
+ (nreverse (module-decls mod-ast)))
+ (when (and (null? (module-imports mod-ast))
+ (null? (module-decls mod-ast))
+ (null? (module-algdatas mod-ast))
+ (null? (module-synonyms mod-ast))
+ (null? (module-instances mod-ast))
+ (null? (module-classes mod-ast)))
+ (signal-empty-module)))
+
+(define (signal-empty-module)
+ (parser-error 'empty-module "Module definition is empty."))
+
+(define (parse-import)
+ (save-parser-context
+ (token-case
+ (modid (let ((mod (token->symbol))
+ (mode 'all)
+ (specs '()))
+ (token-case
+ (\( (setf mode 'by-name)
+ (token-case
+ (\) (setf specs '()))
+ (else (setf specs (parse-import-list)))))
+ (|hiding| (require-token
+ \(
+ (signal-missing-token "`('" "hiding clause"))
+ (setf specs (parse-import-list)))
+ (else '()))
+ (let ((renamings (token-case (|renaming|
+ (require-token
+ \(
+ (signal-missing-token
+ "`('" "renaming clause"))
+ (parse-renamings))
+ (else '()))))
+ (make import-decl (module-name mod) (mode mode) (specs specs)
+ (renamings renamings)))))
+ (else
+ (signal-missing-token "<modid>" "import declaration")))))
+
+(define (parse-import-list)
+ (let ((import (parse-entity 'import)))
+ (token-case
+ (\, (cons import (parse-import-list)))
+ (\) (list import))
+ (else (signal-missing-token "`)' or `,'" "import list")))))
+
+(define (parse-renamings)
+ (let ((renaming
+ (save-parser-context
+ (token-case
+ (var (let ((name1 (var->symbol)))
+ (require-token
+ |to|
+ (signal-missing-token "`to'" "import renaming clause"))
+ (token-case
+ (var (let ((name2 (var->symbol)))
+ (make renaming (from name1) (to name2)
+ (referenced? '#f))))
+ (else (signal-invalid-syntax "import renaming clause")))))
+ (con (let ((name1 (con->symbol)))
+ (require-token
+ |to|
+ (signal-missing-token "`to'" "import renaming clause"))
+ (token-case
+ (con (let ((name2 (con->symbol)))
+ (make renaming (from name1) (to name2)
+ (referenced? '#f))))
+ (else (signal-invalid-syntax "import renaming clause")))))
+ (else (signal-invalid-syntax "import renaming clause"))))))
+ (token-case (\, (cons renaming (parse-renamings)))
+ (\) (list renaming)))))
+
+(define (parse-fixity associativity mod-ast state)
+ (let ((fixity-decl
+ (save-parser-context
+ (let* ((prec (token-case
+ (k (let ((p (token->integer)))
+ (cond ((<= p 9)
+ p)
+ (else
+ (signal-bad-fixity)
+ 9))))
+ (else 9)))
+ (ops (parse-op-list))
+ (fixity (make fixity (associativity associativity)
+ (precedence prec))))
+ (make fixity-decl (fixity fixity) (names ops))))))
+ (push-decl-list fixity-decl (module-fixities mod-ast))
+ (cond ((or (eq? state 'import)
+ (eq? state 'fixity))
+ 'fixity)
+ (else
+ (signal-misplaced-fixity)
+ state))))
+
+
+(define (signal-bad-fixity)
+ (parser-error 'bad-fixity
+ "Expecting fixity value of 0 - 9."))
+
+(define (signal-misplaced-fixity)
+ (parser-error 'misplaced-fixity "The fixity declaration is misplaced."))
+
+(define (parse-op-list)
+ (let ((name (token-case
+ (op (op->symbol))
+ (else (signal-missing-token "<op>" "fixity declaration")))))
+ (token-case
+ (\, (cons name (parse-op-list)))
+ (else (list name)))))
+
+(define (parse-entity context)
+ (trace-parser entity
+ (save-parser-context
+ (token-case
+ (var (var->entity))
+ (tycon
+ (let ((name (token->symbol)))
+ (token-case
+ (\( (token-case
+ (\.\. (require-token
+ '\)
+ (signal-missing-token "`)'" "class or datatype entity"))
+ (make entity-abbreviated (name name)))
+ (var (parse-entity-class name))
+ (con (parse-entity-datatype name))
+ (\) (make entity-class (name name) (methods '())))
+ (else (signal-invalid-syntax "an entity"))))
+ (\.\. (if (eq? context 'export)
+ (make entity-module (name name))
+ (signal-invalid-syntax "an entity")))
+ (else
+ (make entity-con (name name))))))
+ (else (signal-invalid-syntax "an entity"))))))
+
+(define (parse-entity-class class-name)
+ (let ((vars (parse-var-list)))
+ (make entity-class (name class-name) (methods vars))))
+
+(define (parse-entity-datatype type-name)
+ (let ((constrs (parse-con-list)))
+ (make entity-datatype (name type-name) (constructors constrs))))
+
+(define (parse-var-list)
+ (token-case
+ (var (let ((name (var->symbol)))
+ (token-case
+ (\) (list name))
+ (\, (cons name (parse-var-list)))
+ (else
+ (signal-missing-token "`)' or `,'" "class entity")))))
+ (else (signal-missing-token "<var>" "class entity"))))
+
+(define (parse-con-list)
+ (token-case
+ (con (let ((name (con->symbol)))
+ (token-case
+ (\) (list name))
+ (\, (cons name (parse-con-list)))
+ (else (signal-missing-token "`)' or `,'" "datatype entity")))))
+ (else (signal-missing-token "<con>" "datatype entity"))))
diff --git a/parser/parser-debugger.scm b/parser/parser-debugger.scm
new file mode 100644
index 0000000..40d9382
--- /dev/null
+++ b/parser/parser-debugger.scm
@@ -0,0 +1,81 @@
+;;; These routines are strictly for debugging the parser. They could
+;;; be removed from the system later.
+
+;;; define some debugging stuff
+;;; Here's the debugging control:
+;;; Capabilities:
+;;; record start (line,token,production,k)
+;;; record end (line,token,prodection,k)
+;;; print end (line,token,prodection,k,value)
+;;; break start
+;;; break end
+
+(define *parser-debug-options* '())
+(define *parser-debug-lines* '())
+(define *parser-debug-id* 0)
+
+(define (watch-lines . lines)
+ (setf *parser-debug-lines* lines))
+
+(define (watching-this-line?)
+ (and (not (eq? *parser-debug-lines* 'none))
+ (or (null? *parser-debug-lines*)
+ (and (>= *current-line* (car *parser-debug-lines*))
+ (or (null? (cdr *parser-debug-lines*))
+ (<= *current-line* (cadr *parser-debug-lines*)))))))
+
+(define (ptrace-print-obj x)
+ (pprint x))
+
+(define (ptrace-breakpoint)
+ (error "Breakpoint~%"))
+
+(define (parser-show-context id tag msg)
+ (format '#t "~A parse of ~A(~A) Line: ~A Token: ~A"
+ msg tag id *current-line* *token*)
+ (when (not (null? *token-args*))
+ (format '#t " ~A" *token-args*))
+ (format '#t "~%"))
+
+(define (ptrace-clear)
+ (setf *parser-debug-options* '()))
+
+(define (ptrace-pop)
+ (pop *parser-debug-options*))
+
+(define (ptrace-watch . things)
+ (dolist (x things)
+ (push (cons x 'watch) *parser-debug-options*)))
+
+(define (ptrace-show . things)
+ (dolist (x things)
+ (push (cons x 'show) *parser-debug-options*)))
+
+(define (ptrace-break . things)
+ (dolist (x things)
+ (push (cons x 'break) *parser-debug-options*)))
+
+;;; Routines called by the trace-parser macro
+
+(define (tracing-parse/entry tag)
+ (let ((all? (assq 'all *parser-debug-options*))
+ (this? (assq tag *parser-debug-options*)))
+ (cond ((or all? this?)
+ (incf *parser-debug-id*)
+ (parser-show-context *parser-debug-id* tag "Entering")
+ (when (and this? (eq? (cdr this?) 'break))
+ (ptrace-breakpoint))
+ *parser-debug-id*)
+ (else 0))))
+
+(define (tracing-parse/exit tag id res)
+ (let ((all? (assq 'all *parser-debug-options*))
+ (this? (assq tag *parser-debug-options*)))
+ (when (and (or all? this?) (not (eq? tag 0)))
+ (setf (dynamic *returned-obj*) res)
+ (parser-show-context id tag "Exiting")
+ (when (and this? (eq? (cdr this?) 'show))
+ (ptrace-print-obj res))
+ (when (and this? (eq? (cdr this?) 'break))
+ (ptrace-breakpoint)))))
+
diff --git a/parser/parser-driver.scm b/parser/parser-driver.scm
new file mode 100644
index 0000000..cd42d3d
--- /dev/null
+++ b/parser/parser-driver.scm
@@ -0,0 +1,48 @@
+
+;;; This is the top level entry to the parse. The input is a list of file
+;;; names to be parsed and the output is a list of modules. Interface files
+;;; generate modules similar to ordinary files.
+
+(define (parse-files filenames)
+ (let ((all-mods '()))
+ (dolist (file filenames)
+ (let* ((ext (filename-type file))
+ (mods (cond ((string=? ext ".hs")
+ (parse-single-file file))
+ ((string=? ext ".lhs")
+ (parse-single-file/literate file))
+ ((string=? ext ".hi")
+ (parse-single-file/interface file)))))
+ (setf all-mods (append all-mods mods))))
+ all-mods))
+
+(define (parse-single-file filename)
+ (parse-single-file-1 filename '#f '#f))
+
+(define (parse-single-file/literate filename)
+ (parse-single-file-1 filename '#t '#f))
+
+(define (parse-single-file/interface filename)
+ (parse-single-file-1 filename '#f '#t))
+
+(define (parse-single-file-1 filename literate? interface?)
+ (when (memq 'reading *printers*)
+ (format '#t "Reading Haskell source file ~s.~%" filename))
+ (when (not (file-exists? filename))
+ (signal-file-not-found filename))
+ (dynamic-let ((*current-file* filename))
+ (let ((mods '()))
+ (call-with-input-file filename
+ (lambda (port)
+ (let* ((tokens (lex-port port literate?))
+ (module-asts (if interface?
+ (parse-tokens/interface tokens)
+ (parse-tokens tokens))))
+ (setf mods module-asts))))
+ (when (memq 'parse *printers*)
+ (dolist (m mods)
+ (format '#t "~%")
+ (print-full-module m)))
+ mods)))
+
+
diff --git a/parser/parser-errors.scm b/parser/parser-errors.scm
new file mode 100644
index 0000000..ae4d097
--- /dev/null
+++ b/parser/parser-errors.scm
@@ -0,0 +1,74 @@
+;;; This contains parser error handlers. They, in turn, call the
+;;; system error handlers.
+
+(define (lexer-error id . msgs)
+ (parser-error/common id 'recoverable msgs '#t)
+ `#\?)
+
+(define (parser-error id . msgs)
+ (parser-error/common id 'phase msgs '#f)
+ (if (null? *layout-stack*)
+ (abort-compilation)
+ (recover-to-next-decl *token-stream*)))
+
+(define (parser-error/recoverable id . args)
+ (parser-error/common id 'recoverable args '#f))
+
+(define (parser-error/common id type msgs in-lexer?)
+ (let ((place
+ (if in-lexer?
+ (list "Error occured at in file ~A at line ~A, column ~A."
+ *current-file* *current-line* *current-col*)
+ (list "Error occured at in file ~A at line ~A, token ~A."
+ *current-file* *current-line*
+ (cond ((null? *token-args*)
+ *token*)
+ ((null? (cdr *token-args*))
+ (car *token-args*))
+ (else *token-args*)))))) ; could be better
+ (haskell-error id type (list place msgs))))
+
+(define (recover-to-next-decl tokens)
+ (cond ((null? tokens)
+ (abort-compilation))
+ ((eq? (car (car tokens)) 'line)
+ (search-layout-stack *layout-stack* tokens (caddr (car tokens))))
+ (else (recover-to-next-decl (cdr tokens)))))
+
+(define (search-layout-stack layouts tokens column)
+ (cond ((null? layouts)
+ (abort-compilation))
+ ((> column (layout-col (car layouts)))
+ (recover-to-next-decl (cdr tokens)))
+ ((= column (layout-col (car layouts)))
+ (setf *current-col* column)
+ (setf *current-line* (cadr (car tokens)))
+ (setf *token-stream* (cdr tokens))
+ (advance-token) ; loads up *token*
+ ;; *** layout-recovery-fn is not defined anywhere!
+ (funcall (layout-recovery-fn (car layouts))))
+ (else
+ (setf *layout-stack* (cdr *layout-stack*))
+ (search-layout-stack (cdr layouts) tokens column))))
+
+
+;;; Here are some very commonly used signalling functions.
+;;; Other (more specific) signalling functions are defined near
+;;; the places where they are called.
+
+
+;;; This is used when a particular token isn't found.
+
+(define (signal-missing-token what where)
+ (parser-error 'missing-token
+ "Missing ~a in ~a." what where))
+
+
+;;; This is used to signal more complicated parse failures involving
+;;; failure to match a nonterminal.
+
+(define (signal-invalid-syntax where)
+ (parser-error 'invalid-syntax
+ "Invalid syntax appears where ~a is expected." where))
+
+
diff --git a/parser/parser-globals.scm b/parser/parser-globals.scm
new file mode 100644
index 0000000..528e582
--- /dev/null
+++ b/parser/parser-globals.scm
@@ -0,0 +1,27 @@
+;;; Global vars used in the parser
+
+(define *current-line* '()) ; current line the scanner is on
+(define *current-col* '()) ; current col; valid at start of line &
+ ; after where,let,of
+
+;;; Lexer
+
+(define *lex-literate?* '#f)
+(define *start-line* 0)
+(define *start-col* 0)
+(define *on-new-line?* '#t)
+(define *save-col?* '#f)
+(define *port* '())
+(define *tokens* '())
+(define *char* 0)
+(define *peek-char* 0)
+(define *at-eof/p?* 0)
+(define *at-eof?* 0)
+(define *on-new-line? '#f)
+
+;;; Parser
+
+(define *token-stream* '()) ; remaining tokens to be parsed
+(define *token* '()) ; current token type
+(define *token-args* '()) ; current token arguments
+(define *layout-stack* '()) ; columns at which layout is being done
diff --git a/parser/parser-macros.scm b/parser/parser-macros.scm
new file mode 100644
index 0000000..c4f5a63
--- /dev/null
+++ b/parser/parser-macros.scm
@@ -0,0 +1,327 @@
+;;; Macro definitions for the parser & lexer.
+
+
+;;; This macro allows debugging of the lexer. Before releasing, this can
+;;; be replaced by (begin ,@body) for faster code.
+
+(define-syntax (trace-parser tag . body)
+; `(begin
+; (let* ((k (tracing-parse/entry ',tag))
+; (res (begin ,@body)))
+; (tracing-parse/exit ',tag k res)
+; res))
+ (declare (ignore tag))
+ `(begin ,@body)
+ )
+
+;;; Macros used by the lexer.
+
+;;; The lexer used a macro, char-case, to dispatch on the syntactic catagory of
+;;; a character. These catagories (processed at compile time) are defined
+;;; here. Note that some of these definitions use the char-code
+;;; directly and would need updating for different character sets.
+
+(define *lex-definitions*
+ '((vtab 11) ; define by ascii code to avoid relying of the reader
+ (formfeed 12)
+ (whitechar #\newline #\space #\tab formfeed vtab)
+ (small #\a - #\z)
+ (large #\A - #\Z)
+ (digit #\0 - #\9)
+ (symbol #\! #\# #\$ #\% #\& #\* #\+ #\. #\/ #\< #\= #\> #\? #\@
+ #\\ #\^ #\|)
+ (presymbol #\- #\~)
+ (exponent #\e #\E)
+ (graphic large small digit
+ #\! #\" #\# #\$ #\% #\& #\' #\( #\) #\* #\+
+ #\, #\- #\. #\/ #\: #\; #\< #\= #\> #\? #\@
+ #\[ #\\ #\] #\^ #\_ #\` #\{ #\| #\} #\~)
+ (charesc #\a #\b #\f #\n #\r #\t #\v #\\ #\" #\' #\&)
+ (cntrl large #\@ #\[ #\\ #\] #\^ #\_)))
+
+;;; The char-case macro is similar to case using characters to select.
+;;; The following capabilities are added by char-case:
+;;; pre-defined constants are denoted by symbols (defined above)
+;;; ranges of characters are represented using -. For example,
+;;; (#\a - #\z #\A - #\Z) denotes all alphabetics.
+;;; numbers refer to the char code of a character.
+;;; The generated code is optimized somewhat to take advantage of
+;;; consecutive character ranges. With a little work, this could be
+;;; implemented using jump tables someday.
+
+(define-syntax (char-case exp . alts)
+ (expand-char-case exp alts))
+
+(define (expand-char-case exp alts)
+ (let ((temp (gensym)))
+ `(let ((,temp ,exp))
+ ,(expand-char-case1 temp alts))))
+
+(define (expand-char-case1 temp alts)
+ (if (null? alts)
+ '()
+ (let* ((alt (car alts))
+ (test (car alt))
+ (body (cons 'begin (cdr alt)))
+ (rest (expand-char-case1 temp (cdr alts))))
+ (cond ((eq? test 'else)
+ body)
+ (else
+ `(if (or ,@(gen-char-tests temp
+ (if (pair? test) test (list test))))
+ ,body
+ ,rest))))))
+
+(define (gen-char-tests temp tests)
+ (gen-char-tests-1 temp
+ (sort-list (gather-char-tests tests) (function char<?))))
+
+(define (gen-char-tests-1 temp chars)
+ (cond ((null? chars)
+ '())
+ ((long-enough-run? chars 3)
+ (gen-range-check temp (car chars) (car chars) (cdr chars)))
+ (else
+ `((char=? ,temp ',(car chars))
+ ,@(gen-char-tests-1 temp (cdr chars))))))
+
+(define (gen-range-check temp first current chars)
+ (if (and (pair? chars) (consec-chars? current (car chars)))
+ (gen-range-check temp first (car chars) (cdr chars))
+ `((and (char>=? ,temp ',first)
+ (char<=? ,temp ',current))
+ ,@(gen-char-tests-1 temp chars))))
+
+(define (consec-chars? c1 c2)
+ (eqv? (+ 1 (char->integer c1)) (char->integer c2)))
+
+(define (long-enough-run? l n)
+ (or (eqv? n 1)
+ (and (pair? (cdr l))
+ (consec-chars? (car l) (cadr l))
+ (long-enough-run? (cdr l) (1- n)))))
+
+(define (gather-char-tests tests)
+ (cond ((null? tests)
+ '())
+ ((symbol? (car tests))
+ (let ((new-test (assq (car tests) *lex-definitions*)))
+ (if new-test
+ (gather-char-tests (append (cdr new-test) (cdr tests)))
+ (error "Unknown character class: ~A~%" (car tests)))))
+ ((integer? (car tests))
+ (cons (integer->char (car tests))
+ (gather-char-tests (cdr tests))))
+ ((and (pair? (cdr tests)) (eq? '- (cadr tests)))
+ (letrec ((fn (lambda (a z)
+ (if (char>? a z)
+ (gather-char-tests (cdddr tests))
+ (cons a (funcall
+ fn (integer->char
+ (+ 1 (char->integer a))) z))))))
+ (funcall fn (car tests) (caddr tests))))
+ ((char? (car tests))
+ (cons (car tests) (gather-char-tests (cdr tests))))
+ (else
+ (error "Invalid selector in char-case: ~A~%" (car tests)))))
+
+;;; This macro scans a list of characters on a given syntaxtic catagory.
+;;; The current character is always included in the resulting list.
+
+(define-syntax (scan-list-of char-type)
+ `(letrec ((test-next (lambda ()
+ (char-case *char*
+ (,char-type
+ (let ((c *char*))
+ (advance-char)
+ (cons c (funcall test-next))))
+ (else '())))))
+ (let ((c *char*))
+ (advance-char)
+ (cons c (funcall test-next)))))
+
+;;; This macro tests for string equality in which the strings are
+;;; represented by lists of characters. The comparisons are expanded
+;;; inline (really just a little partial evaluation going on here!) for
+;;; fast execution. The tok argument evaluate to a list of chars. The string
+;;; argument must be a string constant, which is converted to characters
+;;; as the macro expands.
+
+(define-syntax (string=/list? tok string)
+ (let ((temp (gensym)))
+ `(let ((,temp ,tok))
+ ,(expand-string=/list? temp (string->list string)))))
+
+(define (expand-string=/list? var chars)
+ (if (null? chars)
+ `(null? ,var)
+ (let ((new-temp (gensym)))
+ `(and (pair? ,var)
+ (char=? (car ,var) ',(car chars))
+ (let ((,new-temp (cdr ,var)))
+ ,(expand-string=/list? new-temp (cdr chars)))))))
+
+;;; This macro extends the string equality defined above to search a
+;;; list of reserved words quickly for keywords. It does this by a case
+;;; dispatch on the first character of the string and then processing
+;;; the remaining characters wirh string=/list. This would go a little
+;;; faster with recursive char-case statements, but I'm a little too
+;;; lazy at for this at the moment. If a keyword is found is emitted
+;;; as a symbol. If not, the token string is emitted with the token
+;;; type indicated. Assume the string being scanned is a list of
+;;; chars assigned to a var. (Yeah - I know - I should add a gensym
+;;; var for this argument!!).
+
+(define-syntax (parse-reserved var token-type . reserved-words)
+ (let ((sorted-rws (sort-list reserved-words (function string<?))))
+ `(let ((thunk (lambda () (emit-token/string ',token-type ,var))))
+ (char-case (car ,var)
+ ,@(expand-parse-reserved var
+ (group-by-first-char (list (car sorted-rws)) (cdr sorted-rws)))
+ (else (funcall thunk))))))
+
+(define (group-by-first-char group rest)
+ (cond ((null? rest)
+ (list group))
+ ((char=? (string-ref (car group) 0)
+ (string-ref (car rest) 0))
+ (group-by-first-char (append group (list (car rest))) (cdr rest)))
+ (else
+ (cons group (group-by-first-char (list (car rest)) (cdr rest))))))
+
+(define (expand-parse-reserved var groups)
+ (if (null? groups)
+ '()
+ `((,(string-ref (caar groups) 0)
+ (cond ,@(expand-parse-reserved/group var (car groups))
+ (else (funcall thunk))))
+ ,@(expand-parse-reserved var (cdr groups)))))
+
+(define (expand-parse-reserved/group var group)
+ (if (null? group)
+ '()
+ `(((string=/list? (cdr ,var)
+ ,(substring (car group) 1 (string-length (car group))))
+ (emit-token ',(string->symbol (car group))))
+ ,@(expand-parse-reserved/group var (cdr group)))))
+
+
+;;; The following macros are used by the parser.
+
+;;; The primary macro used by the parser is token-case, which dispatches
+;;; on the type of the current token (this is always *token* - unlike the
+;;; lexer, no lookahead is provided; however, some of these dispatches are
+;;; procedures that do a limited lookahead. The problem with lookahead is that
+;;; the layout rule adds tokens which are not visible looking into the
+;;; token stream directly.
+
+;;; Unlike char-case, the token is normally advanced unless the selector
+;;; includes `no-advance'. The final else also avoids advancing the token.
+
+;;; In addition to raw token types, more complex types can be used. These
+;;; are defined here. The construct `satisfies fn' calls the indicated
+;;; function to determine whether the current token matches.
+
+;;; If the token type to be matched is not a constant, the construct
+;;; `unquote var' matches the current token against the type in the var.
+
+(define *predefined-syntactic-catagories* '(
+ (+ satisfies at-varsym/+?)
+ (- satisfies at-varsym/-?)
+ (tycon no-advance conid)
+ (tyvar no-advance varid)
+ (var no-advance varid satisfies at-varsym/paren?)
+ (con no-advance conid satisfies at-consym/paren?)
+ (name no-advance var con)
+ (consym/paren no-advance satisfies at-consym/paren?)
+ (varsym? no-advance varsym)
+ (consym? no-advance consym)
+ (varid? no-advance varid)
+ (conid? no-advance conid)
+ (op no-advance varsym consym \`)
+ (varop no-advance varsym satisfies at-varid/quoted?)
+ (conop no-advance consym satisfies at-conid/quoted?)
+ (modid no-advance conid)
+ (literal no-advance integer float char string)
+ (numeric no-advance integer float)
+ (k no-advance integer)
+ (+k no-advance satisfies at-+k?)
+ (-n no-advance satisfies at--n?)
+ (apat-start no-advance varid conid literal _ \( \[ \~)
+ (pat-start no-advance - apat-start)
+ (atype-start no-advance tycon tyvar \( \[)
+ (aexp-start no-advance varid conid \( \[ literal)
+ ))
+
+;;; The format of token-case is
+;;; (token-case
+;;; (sel1 . e1) (sel2 . e2) ... [(else . en)])
+;;; If the sel is a symbol it is the same as a singleton list: (@ x) = ((@) x)
+
+;;; Warning: this generates rather poor code! Should be fixed up someday.
+
+(define-syntax (token-case . alts)
+ `(cond ,@(map (function gen-token-case-alt) alts)))
+
+(define (gen-token-case-alt alt)
+ (let ((test (car alt))
+ (code (cdr alt)))
+ (cond ((eq? test 'else)
+ `(else ,@code))
+ ((symbol? test)
+ (gen-token-case-alt-1 (expand-catagories (list test)) code))
+ (else
+ (gen-token-case-alt-1 (expand-catagories test) code)))))
+
+(define (expand-catagories terms)
+ (if (null? terms)
+ terms
+ (let ((a (assq (car terms) *predefined-syntactic-catagories*))
+ (r (expand-catagories (cdr terms))))
+ (if (null? a)
+ (cons (car terms) r)
+ (expand-catagories (append (cdr a) r))))))
+
+(define (gen-token-case-alt-1 test code)
+ `((or ,@(gen-token-test test))
+ ,@(if (memq 'no-advance test) '() '((advance-token)))
+ ,@code))
+
+(define (gen-token-test test)
+ (cond ((null? test)
+ '())
+ ((eq? (car test) 'no-advance)
+ (gen-token-test (cdr test)))
+ ((eq? (car test) 'unquote)
+ (cons `(eq? *token* ,(cadr test)) (gen-token-test (cddr test))))
+ ((eq? (car test) 'satisfies)
+ (cons (list (cadr test)) (gen-token-test (cddr test))))
+ (else
+ (cons `(eq? *token* ',(car test)) (gen-token-test (cdr test))))))
+
+;;; require-tok requires a specific token to be at the scanner. If it
+;;; is found, the token is advanced over. Otherwise, the error
+;;; routine is called.
+
+(define-syntax (require-token tok error-handler)
+ `(token-case
+ (,tok '())
+ (else ,error-handler)))
+
+;;; The save-parser-context macro captures the current line & file and
+;;; attaches it to the ast node generated.
+
+(define-syntax (save-parser-context . body)
+ (let ((temp1 (gensym))
+ (temp2 (gensym)))
+ `(let ((,temp1 (capture-current-line))
+ (,temp2 (begin ,@body)))
+ (setf (ast-node-line-number ,temp2) ,temp1)
+ ,temp2)))
+
+(define (capture-current-line)
+ (make source-pointer (line *current-line*) (file *current-file*)))
+
+(define-syntax (push-decl-list decl place)
+ `(setf ,place (nconc ,place (list ,decl))))
+
diff --git a/parser/parser.scm b/parser/parser.scm
new file mode 100644
index 0000000..7a91930
--- /dev/null
+++ b/parser/parser.scm
@@ -0,0 +1,54 @@
+;;; parser.scm -- compilation unit definition for the lexer and parser
+;;;
+;;; author : John
+;;; date : 10 Dec 1991
+;;;
+
+(define-compilation-unit parser
+ (source-filename "$Y2/parser/")
+ (require global)
+ (unit parser-globals
+ (source-filename "parser-globals.scm"))
+ (unit parser-macros
+ (source-filename "parser-macros.scm")
+ (require parser-globals))
+ (unit parser-errors
+ (source-filename "parser-errors.scm")
+ (require parser-macros))
+ (unit lexer
+ (source-filename "lexer.scm")
+ (require parser-macros))
+ (unit token
+ (source-filename "token.scm")
+ (require parser-macros))
+ (unit parser-driver
+ (source-filename "parser-driver.scm")
+ (require parser-macros))
+ (unit module-parser
+ (source-filename "module-parser.scm")
+ (require parser-macros))
+ (unit interface-parser
+ (source-filename "interface-parser.scm")
+ (require parser-macros))
+ (unit decl-parser
+ (source-filename "decl-parser.scm")
+ (require parser-macros))
+ (unit type-parser
+ (source-filename "type-parser.scm")
+ (require parser-macros))
+ (unit typedecl-parser
+ (source-filename "typedecl-parser.scm")
+ (require parser-macros))
+ (unit exp-parser
+ (source-filename "exp-parser.scm")
+ (require parser-macros))
+ (unit annotation-parser
+ (source-filename "annotation-parser.scm")
+ (require parser-macros))
+ (unit pattern-parser
+ (source-filename "pattern-parser.scm")
+ (require parser-macros))
+ (unit parser-debugger
+ (source-filename "parser-debugger.scm")
+ (require parser-macros)))
+
diff --git a/parser/pattern-parser.scm b/parser/pattern-parser.scm
new file mode 100644
index 0000000..39a82cc
--- /dev/null
+++ b/parser/pattern-parser.scm
@@ -0,0 +1,220 @@
+;;; File: pattern-parser Author: John
+
+;;; This parses the pattern syntax except for the parts which need to be
+;;; resolved by precedence parsing.
+
+;;; This parses a list of alternating pats & conops.
+
+(define (parse-pat)
+ (trace-parser pat
+ (let ((res (parse-pat/list)))
+ (if (null? (cdr res))
+ (car res)
+ (make pp-pat-list (pats res))))))
+
+;;; This parses a list of patterns with intervening conops and + patterns
+
+(define (parse-pat/list)
+ (token-case
+ (con (let ((pcon (pcon->ast)))
+ (setf (pcon-pats pcon) (parse-apat-list))
+ (cons pcon (parse-pat/tail))))
+ (-n
+ (advance-token) ; past -
+ (token-case
+ (numeric (let ((val (literal->ast)))
+ (cons (make pp-pat-negated)
+ (cons (make const-pat (value val))
+ (parse-pat/tail)))))
+ (else
+ (signal-missing-token "<number>" "negative literal pattern"))))
+ (var
+ (let ((var (var->ast)))
+ (token-case
+ (+k (cons (make var-pat (var var))
+ (parse-+k-pat)))
+ (@ (let ((pattern (parse-apat)))
+ (cons (make as-pat (var var) (pattern pattern))
+ (parse-pat/tail))))
+ (else (cons (make var-pat (var var)) (parse-pat/tail))))))
+ (_
+ (let ((pat (make wildcard-pat)))
+ (token-case
+ (+k (cons pat (parse-+k-pat)))
+ (else (cons pat (parse-pat/tail))))))
+ (else (let ((apat (parse-apat)))
+ (cons apat (parse-pat/tail))))))
+
+
+(define (parse-+k-pat)
+ (advance-token) ; past +
+ (token-case
+ (k (let ((val (literal->ast)))
+ (cons (make pp-pat-plus)
+ (cons (make const-pat (value val))
+ (parse-pat/tail)))))
+ (else (signal-missing-token "<integer>" "successor pattern"))))
+
+(define (parse-pat/tail)
+ (token-case
+ (conop
+ (let ((con (pconop->ast)))
+ (cons con (parse-pat/list))))
+ (else '())))
+
+(define (parse-apat)
+ (trace-parser apat
+ (token-case
+ (var (let ((var (var->ast)))
+ (token-case
+ (@
+ (let ((pattern (parse-apat)))
+ (make as-pat (var var) (pattern pattern))))
+ (else (make var-pat (var var))))))
+ (con (pcon->ast))
+ (literal (let ((value (literal->ast)))
+ (make const-pat (value value))))
+ (_ (make wildcard-pat))
+ (\( (token-case
+ (\) (**pcon/def (core-symbol "UnitConstructor") '()))
+ (else
+ (let ((pat (parse-pat)))
+ (token-case
+ (\, (**pcon/tuple (cons pat (parse-pat-list '\)))))
+ (\) pat)
+ (else
+ (signal-missing-token "`)' or `,'" "pattern")))))))
+ (\[ (token-case
+ (\] (make list-pat (pats '())))
+ (else (make list-pat (pats (parse-pat-list '\]))))))
+ (\~ (let ((pattern (parse-apat)))
+ (make irr-pat (pattern pattern))))
+ (else
+ (signal-invalid-syntax "an apat")))))
+
+(define (parse-pat-list term) ;; , separated
+ (let ((pat (parse-pat)))
+ (token-case
+ (\, (cons pat (parse-pat-list term)))
+ ((unquote term) (list pat))
+ (else
+ (signal-missing-token
+ (if (eq? term '\)) "`)'" "`]'")
+ "pattern")))))
+
+(define (parse-apat-list) ;; space separated
+ (token-case
+ (apat-start
+ (let ((pat (parse-apat)))
+ (cons pat (parse-apat-list))))
+ (else
+ '())))
+
+;;; The following routine scans patterns without creating ast structure.
+;;; They return #t or #f depending on whether a valid pattern was encountered.
+;;; The leave the scanner pointing to the next token after the pattern.
+
+(define (scan-pat) ; same as parse-pat/list
+ (and
+ (token-case
+ (con (scan-con)
+ (scan-apat-list))
+ (-n (advance-token)
+ (token-case
+ (numeric (advance-token)
+ '#t)
+ (else '#f)))
+ (var (and (scan-var)
+ (token-case
+ (@ (scan-apat))
+ (+k (scan-+k))
+ (else '#t))))
+ (_ (scan-+k))
+ (else (scan-apat)))
+ (scan-pat/tail)))
+
+(define (scan-pat/tail)
+ (token-case
+ (conop (and (scan-conop)
+ (scan-pat)))
+ (else '#t)))
+
+(define (scan-apat)
+ (token-case
+ (var (scan-var)
+ (token-case
+ (@ (scan-apat))
+ (else '#t)))
+ (con (scan-con))
+ (literal (advance-token)
+ '#t)
+ (_ '#t)
+ (\( (token-case
+ (\) '#t)
+ (else
+ (and (scan-pat)
+ (token-case
+ (\, (scan-pat-list '\)))
+ (\) '#t)
+ (else '#f))))))
+ (\[ (token-case
+ (\] '#t)
+ (else (scan-pat-list '\]))))
+ (\~ (scan-apat))
+ (else '#f)))
+
+(define (scan-pat-list term)
+ (and (scan-pat)
+ (token-case
+ (\, (scan-pat-list term))
+ ((unquote term) '#t)
+ (else '#f))))
+
+(define (scan-apat-list)
+ (token-case
+ (apat-start
+ (and (scan-apat)
+ (scan-apat-list)))
+ (else '#t)))
+
+(define (scan-var)
+ (token-case
+ (varid '#t)
+ (\( (token-case
+ (varsym
+ (token-case
+ (\) '#t)
+ (else '#f)))
+ (else '#f)))
+ (else '#f)))
+
+(define (scan-con)
+ (token-case
+ (conid '#t)
+ (\( (token-case
+ (consym
+ (token-case
+ (\) '#t)
+ (else '#f)))
+ (else '#f)))
+ (else '#f)))
+
+(define (scan-conop)
+ (token-case
+ (consym '#t)
+ (\` (token-case
+ (conid
+ (token-case
+ (\` '#t)
+ (else '#f)))
+ (else '#f)))
+ (else '#f)))
+
+(define (scan-+k)
+ (token-case
+ (+k (advance-token) ; past the +
+ (token-case
+ (integer '#t)
+ (else '#f)))
+ (else '#t)))
+
diff --git a/parser/token.scm b/parser/token.scm
new file mode 100644
index 0000000..6ca9981
--- /dev/null
+++ b/parser/token.scm
@@ -0,0 +1,364 @@
+;;; This file abstracts the representation of tokens. It is used by both
+;;; the lexer & parser. This also contains routines for converting
+;;; individual tokens to ast structure. Routines used by the
+;;; token-case macro in `satisfies' clauses are here too.
+
+;;; Lexer routines for emitting tokens:
+
+(define (emit-token type . args)
+ (cond (*on-new-line?*
+ (push (list 'line *start-line* *start-col*) *tokens*))
+ (*save-col?*
+ (push (list 'col *start-col*) *tokens*)))
+ (push (cons type args) *tokens*)
+ (setf *on-new-line?* '#f)
+ (setf *save-col?* (memq type '(|where| |of| |let|))))
+
+(define (emit-token/string type string-as-list)
+ (emit-token type (list->string string-as-list)))
+
+;;; Parser routines:
+
+;;; These routines take care of the token stream in the parser. They
+;;; maintain globals for the current token and its location.
+
+;;; Globals used:
+;;; *token-stream* remaining tokens to be parsed
+;;; *token* current token type
+;;; *token-args* current token arguments
+;;; *layout-stack* columns at which layout is being done
+;;; *current-line* current line the scanner is on
+;;; *current-col* current col; valid at start of line & after where,let,of
+;;; *current-file*
+
+(define (init-token-stream tokens)
+ (setf *token-stream* tokens)
+ (setf *layout-stack* '())
+ (advance-token))
+
+(define (advance-token)
+ (cond ((null? *token-stream*)
+ (setf *token* 'eof))
+ (else
+ (let* ((token (car *token-stream*)))
+ (setf *token-stream* (cdr *token-stream*))
+ (advance-token-1 (car token) (cdr token))))))
+
+(define (advance-token-1 type args)
+ (cond ((eq? type 'file)
+ (setf *current-file* (car args))
+ (advance-token))
+ ((eq? type 'col)
+ (setf *current-col* (car args))
+ (advance-token))
+ ((eq? type 'line) ;; assume blank lines have been removed
+ (let ((line (car args))
+ (col (cadr args)))
+ (setf *current-line* line)
+ (setf *current-col* col)
+ (setf *token-stream*
+ (resolve-layout *token-stream* *layout-stack*)))
+ (advance-token))
+ (else
+ (setf *token* type)
+ (setf *token-args* args)
+ type)))
+
+(define (insert-extra-token tok-type stream) ; used by layout
+ (cons (list tok-type) stream))
+
+;;; This looks for the { to decide of layout will apply. If so, the layout
+;;; stack is pushed. The body function, fn, is called with a boolean which
+;;; tells it the whether layout rule is in force.
+
+;;; *** The CMU CL compiler barfs with some kind of internal error
+;;; *** on this function. See the revised definition below.
+
+;(define (start-layout fn)
+; (token-case
+; (\{ (funcall fn '#f))
+; (else
+; (let/cc recovery-fn
+; (push (cons *current-col* (lambda ()
+; (let ((res (funcall fn '#t)))
+; (funcall recovery-fn res))))
+; *layout-stack*)
+; (funcall fn '#t)))))
+
+(define (start-layout fn)
+ (token-case
+ (\{ (funcall fn '#f))
+ (else
+ (let/cc recovery-fn
+ (start-layout-1 fn recovery-fn)))))
+
+(define (start-layout-1 fn recovery-fn)
+ (push (cons *current-col*
+ (lambda ()
+ (let ((res (funcall fn '#t)))
+ (funcall recovery-fn res))))
+ *layout-stack*)
+ (funcall fn '#t))
+
+(define (layout-col x)
+ (car x))
+
+(define (layout-recovery-fn x)
+ (cdr x))
+
+(define (close-layout in-layout?)
+ (cond (in-layout?
+ (setf *layout-stack* (cdr *layout-stack*))
+ (token-case
+ ($\} '()) ; the advance-token routine may have inserted this
+ (else '())))
+ (else
+ (token-case
+ (\} '())
+ (else
+ (signal-missing-brace))))))
+
+(define (signal-missing-brace)
+ (parser-error 'missing-brace
+ "Missing `}'."))
+
+(define (resolve-layout stream layout-stack)
+ (if (null? layout-stack)
+ stream
+ (let ((col (layout-col (car layout-stack))))
+ (declare (type fixnum col))
+ (cond ((= (the fixnum *current-col*) col)
+ (insert-extra-token '\; stream))
+ ((< (the fixnum *current-col*) col)
+ (insert-extra-token
+ '$\} (resolve-layout stream (cdr layout-stack))))
+ (else
+ stream)
+ ))))
+
+
+;;; The following routines are used for backtracking. This is a bit of
+;;; a hack at the moment.
+
+(define (save-scanner-state)
+ (vector *token* *token-args* *token-stream* *layout-stack* *current-line*
+ *current-col*))
+
+(define (restore-excursion state)
+ (setf *token* (vector-ref state 0))
+ (setf *token-args* (vector-ref state 1))
+ (setf *token-stream* (vector-ref state 2))
+ (setf *layout-stack* (vector-ref state 3))
+ (setf *current-line* (vector-ref state 4))
+ (setf *current-col* (vector-ref state 5)))
+
+(define (eq-token? type)
+ (eq? type *token*))
+
+(define (eq-token-arg? str)
+ (string=? str (car *token-args*)))
+
+;;; lookahead into the token stream
+
+(define (peek-1-type)
+ (peek-toks 0 *token-stream*))
+
+(define (peek-2-type)
+ (peek-toks 1 *token-stream*))
+
+;;; This is a Q&D way of looking ahead. It does not expand the layout
+;;; as it goes so there may be missing } and ;. This should not matter
+;;; in the places where this is used since these would be invalid anyway.
+;;; To be safe, token types are rechecked while advancing to verify the
+;;; lookahead.
+
+(define (peek-toks n toks)
+ (declare (type fixnum n))
+ (cond ((null? toks)
+ 'eof)
+ ((memq (caar toks) '(col line))
+ (peek-toks n (cdr toks)))
+ ((eqv? n 0)
+ (caar toks))
+ (else (peek-toks (1- n) (cdr toks)))))
+
+;; These routines handle the `satisfies' clauses used in token-case.
+
+(define (at-varsym/+?)
+ (and (eq? *token* 'varsym)
+ (string=? (car *token-args*) "+")))
+
+(define (at-varsym/-?)
+ (and (eq? *token* 'varsym)
+ (string=? (car *token-args*) "-")))
+
+(define (at-varsym/paren?)
+ (and (eq? *token* '\()
+ (eq? (peek-1-type) 'varsym)
+ (eq? (peek-2-type) '\))))
+
+(define (at-consym/paren?)
+ (and (eq? *token* '\()
+ (eq? (peek-1-type) 'consym)
+ (eq? (peek-2-type) '\))))
+
+(define (at-varid/quoted?)
+ (and (eq? *token* '\`)
+ (eq? (peek-1-type) 'varid)))
+
+(define (at-conid/quoted?)
+ (and (eq? *token* '\`)
+ (eq? (peek-1-type) 'conid)))
+
+(define (at-+k?)
+ (and (at-varsym/+?)
+ (eq? (peek-1-type) 'integer)))
+
+(define (at--n?)
+ (and (at-varsym/-?)
+ (memq (peek-1-type) '(integer float))))
+
+;;; The following routines convert the simplest tokens to AST structure.
+
+(define-local-syntax (return+advance x)
+ `(let ((x ,x))
+ (advance-token)
+ x))
+
+(define (token->symbol)
+ (return+advance
+ (string->symbol (car *token-args*))))
+
+(define (token->symbol/con) ; for conid, aconid
+ (return+advance
+ (string->symbol (add-con-prefix (car *token-args*)))))
+
+(define (var->symbol)
+ (token-case
+ (\( (token-case
+ (varsym?
+ (let ((res (token->symbol)))
+ (token-case
+ (\) res)
+ (else (signal-missing-token "`)'" "var")))))
+ (else (signal-missing-token "<varsym>" "var"))))
+ (varid? (token->symbol))))
+
+(define (var->ast)
+ (let ((vname (var->symbol)))
+ (make var-ref (name vname) (infix? '#f) (var *undefined-def*))))
+
+(define (var->entity)
+ (let ((vname (var->symbol)))
+ (make entity-var (name vname))))
+
+(define (con->symbol)
+ (token-case
+ (\( (token-case
+ (consym?
+ (let ((res (token->symbol/con)))
+ (token-case
+ (\) res)
+ (else (signal-missing-token "`)'" "con")))))
+ (else (signal-missing-token "<consym>" "con"))))
+ (conid? (token->symbol/con))))
+
+(define (varop->symbol)
+ (token-case
+ (\` (token-case
+ (varid?
+ (let ((res (token->symbol)))
+ (token-case
+ (\` res)
+ (else (signal-missing-token "``'" "varop")))))
+ (else (signal-missing-token "<varid>" "varop"))))
+ (varsym? (token->symbol))))
+
+(define (varop->ast)
+ (let ((varop-name (varop->symbol)))
+ (make var-ref (name varop-name) (infix? '#t) (var *undefined-def*))))
+
+(define (conop->symbol)
+ (token-case
+ (\` (token-case
+ (conid?
+ (let ((res (token->symbol/con)))
+ (token-case
+ (\` res)
+ (else (signal-missing-token "``'" "conop")))))
+ (else (signal-missing-token "<conid>" "conop"))))
+ (consym? (token->symbol/con))))
+
+(define (conop->ast)
+ (let ((conop-name (conop->symbol)))
+ (make con-ref (name conop-name) (infix? '#t) (con *undefined-def*))))
+
+(define (op->symbol)
+ (token-case
+ (\` (token-case
+ (conid?
+ (let ((res (token->symbol/con)))
+ (token-case
+ (\` res)
+ (else (signal-missing-token "``'" "op")))))
+ (varid?
+ (let ((res (token->symbol)))
+ (token-case
+ (\` res)
+ (else (signal-missing-token "``'" "op")))))
+ (else (signal-missing-token "<conid> or <varid>" "op"))))
+ (consym? (token->symbol/con))
+ (varsym? (token->symbol))))
+
+(define (con->ast) ; for conid, aconid
+ (let ((name (con->symbol)))
+ (make con-ref (name name) (con *undefined-def*) (infix? '#f))))
+
+(define (pcon->ast) ; for aconid, conid
+ (let ((name (con->symbol)))
+ (make pcon (name name) (con *undefined-def*) (pats '()) (infix? '#f))))
+
+(define (pconop->ast) ; for aconop, conop
+ (let ((name (conop->symbol)))
+ (make pcon (name name) (con *undefined-def*) (pats '()) (infix? '#t))))
+
+(define (tycon->ast) ; for aconid
+ (let ((name (token->symbol)))
+ (make tycon (name name) (def *undefined-def*) (args '()))))
+
+(define (class->ast) ; for aconid
+ (let ((name (token->symbol)))
+ (make class-ref (name name) (class *undefined-def*))))
+
+(define (tyvar->ast) ; for avarid
+ (let ((name (token->symbol)))
+ (make tyvar (name name))))
+
+(define (token->integer) ; for integer
+ (return+advance
+ (car *token-args*)))
+
+(define (integer->ast) ; for integer
+ (return+advance
+ (make integer-const (value (car *token-args*)))))
+
+(define (float->ast)
+ (return+advance
+ (make float-const (numerator (car *token-args*))
+ (denominator (cadr *token-args*))
+ (exponent (caddr *token-args*)))))
+
+(define (string->ast)
+ (return+advance
+ (make string-const (value (car *token-args*)))))
+
+(define (char->ast)
+ (return+advance
+ (make char-const (value (car *token-args*)))))
+
+(define (literal->ast)
+ (token-case
+ ((no-advance integer) (integer->ast))
+ ((no-advance float) (float->ast))
+ ((no-advance string) (string->ast))
+ ((no-advance char) (char->ast))))
diff --git a/parser/type-parser.scm b/parser/type-parser.scm
new file mode 100644
index 0000000..79c5dbf
--- /dev/null
+++ b/parser/type-parser.scm
@@ -0,0 +1,116 @@
+;;; File: type-parser Author: John
+
+(define (parse-type)
+ (let ((type (parse-btype)))
+ (token-case
+ (->
+ (**tycon/def (core-symbol "Arrow") (list type (parse-type))))
+ (else type))))
+
+(define (parse-btype)
+ (token-case
+ (tycon (let* ((tycon (tycon->ast))
+ (tycon-args (parse-atype-list)))
+ (setf (tycon-args tycon) tycon-args)
+ tycon))
+ (else
+ (parse-atype))))
+
+(define (parse-atype-list)
+ (token-case
+ (atype-start
+ (let ((atype (parse-atype)))
+ (cons atype (parse-atype-list))))
+ (else '())))
+
+(define (parse-atype)
+ (token-case
+ (tyvar (tyvar->ast))
+ (tycon (tycon->ast))
+ (\( (token-case
+ (\) (**tycon/def (core-symbol "UnitType") '()))
+ (else
+ (let ((type (parse-type)))
+ (token-case
+ (\) type)
+ (\, (let ((types (cons type (parse-type-list))))
+ (**tycon/def (tuple-tycon (length types)) types)))
+ (else
+ (signal-missing-token "`)' or `,'" "type expression")))))))
+ (\[ (let ((type (parse-type)))
+ (require-token \] (signal-missing-token "`]'" "type expression"))
+ (**tycon/def (core-symbol "List") (list type))))
+ (else
+ (signal-invalid-syntax "an atype"))))
+
+(define (parse-type-list)
+ (let ((type (parse-type)))
+ (token-case (\, (cons type (parse-type-list)))
+ (\) (list type))
+ (else (signal-missing-token "`)' or `,'" "type expression")))))
+
+;;; This is used to determine whether a type is preceded by a context
+
+(define (has-optional-context?)
+ (let* ((saved-excursion (save-scanner-state))
+ (res (token-case
+ (conid
+ (token-case
+ (varid (eq-token? '=>))
+ (else '#f)))
+ (\( (scan-context))
+ (else '#f))))
+ (restore-excursion saved-excursion)
+ res))
+
+(define (scan-context)
+ (token-case
+ (conid
+ (token-case
+ (varid
+ (token-case
+ (\) (eq-token? '=>))
+ (\, (scan-context))
+ (else '#f)))
+ (else '#f)))
+ (else '#f)))
+
+(define (parse-context)
+ (let ((contexts (token-case
+ (tycon
+ (list (parse-single-context)))
+ (\( (parse-contexts))
+ (else
+ (signal-invalid-syntax "a context")))))
+ (require-token => (signal-missing-token "`=>'" "context"))
+ contexts))
+
+(define (parse-single-context)
+ (let ((class (class->ast)))
+ (token-case
+ (tyvar
+ (let ((tyvar (token->symbol)))
+ (make context (class class) (tyvar tyvar))))
+ (else (signal-missing-token "<tyvar>" "class assertion")))))
+
+(define (parse-contexts)
+ (token-case
+ (tycon (let ((context (parse-single-context)))
+ (token-case
+ (\, (cons context (parse-contexts)))
+ (\) (list context))
+ (else (signal-missing-token "`)' or `,'" "context")))))
+ (else (signal-missing-token "<tycon>" "class assertion"))))
+
+(define (parse-optional-context)
+ (if (has-optional-context?)
+ (parse-context)
+ '()))
+
+(define (parse-signature)
+ (let* ((contexts (parse-optional-context))
+ (type (parse-type)))
+ (make signature (context contexts) (type type))))
+
+
+ \ No newline at end of file
diff --git a/parser/typedecl-parser.scm b/parser/typedecl-parser.scm
new file mode 100644
index 0000000..4995dc7
--- /dev/null
+++ b/parser/typedecl-parser.scm
@@ -0,0 +1,163 @@
+;;; File: parser/typedecl-parser Author: John
+
+(define (parse-type-decl interface?)
+ (save-parser-context
+ (let* ((sig (parse-signature))
+ (contexts (signature-context sig))
+ (simple (signature-type sig))
+ (deriving '())
+ (constrs '()))
+ ;; #t = builtins ([] (,,) ->) not allowed
+ (check-simple simple '#t "type declaration")
+ (let ((annotations (parse-constr-annotations)))
+ (token-case
+ (= (setf constrs (parse-constrs))
+ (token-case
+ (|deriving|
+ (setf deriving
+ (token-case
+ (\(
+ (token-case
+ (\) '())
+ (else (parse-class-list))))
+ (tycon (list (class->ast)))
+ (else (signal-invalid-syntax "a deriving clause")))))))
+ (else
+ (when (not interface?)
+ (signal-missing-constructors))))
+ (make data-decl (context contexts) (simple simple)
+ (constrs constrs) (deriving deriving)
+ (annotations annotations))))))
+
+(define (signal-missing-constructors)
+ (parser-error 'missing-constructors
+ "Data type definition requires constructors"))
+
+(define (check-simple simple fresh? where)
+ (when (not (tycon? simple))
+ (signal-not-simple where))
+ (when (and fresh? (not (eq? (tycon-def simple) *undefined-def*)))
+ (signal-not-simple where))
+ (let ((tyvars (map (lambda (arg)
+ (when (not (tyvar? arg))
+ (signal-not-simple where))
+ (tyvar-name arg))
+ (tycon-args simple))))
+ (when (not (null? (find-duplicates tyvars)))
+ (signal-unique-tyvars-required))))
+
+(define (signal-unique-tyvars-required)
+ (parser-error 'unique-tyvars-required
+ "Duplicate type variables appear in simple."))
+
+(define (signal-not-simple where)
+ (parser-error 'not-simple "Simple type required in ~a." where))
+
+(define (parse-constrs)
+ (let ((constr (parse-constr)))
+ (token-case
+ (\| (cons constr (parse-constrs)))
+ (else (list constr)))))
+
+(define (parse-constr)
+ (save-parser-context
+ (let ((saved-excursion (save-scanner-state)))
+ (token-case
+ (consym/paren
+ (parse-prefix-constr))
+ (else
+ (let ((type1 (parse-btype))
+ (anns (parse-constr-annotations)))
+ (token-case
+ (conop
+ (parse-infix-constr (tuple type1 anns)))
+ (else
+ (restore-excursion saved-excursion)
+ (parse-prefix-constr)))))))))
+
+(define (parse-prefix-constr)
+ (token-case
+ (con
+ (let* ((con (con->ast))
+ (types (parse-constr-type-list)))
+ (make constr (constructor con) (types types))))
+ (else
+ (signal-missing-token "<con>" "constrs list"))))
+
+(define (parse-constr-type-list)
+ (token-case
+ (atype-start
+ (let* ((atype (parse-atype))
+ (anns (parse-constr-annotations)))
+ (cons (tuple atype anns)
+ (parse-constr-type-list))))
+ (else '())))
+
+(define (parse-infix-constr t+a1)
+ (let* ((con (conop->ast))
+ (type2 (parse-btype))
+ (anns (parse-constr-annotations)))
+ (make constr (constructor con) (types (list t+a1 (tuple type2 anns))))))
+
+(define (parse-class-list)
+ (token-case
+ (tycon (let ((class (class->ast)))
+ (token-case
+ (\, (cons class (parse-class-list)))
+ (\) (list class))
+ (else (signal-missing-token "`)' or `,'" "deriving clause")))))
+ (else (signal-missing-token "<tycon>" "deriving clause"))))
+
+(define (parse-constr-annotations)
+ (token-case
+ ((begin-annotation no-advance)
+ (let ((annotations (parse-annotations)))
+ (append annotations (parse-constr-annotations))))
+ (else '())))
+
+(define (parse-synonym-decl)
+ (save-parser-context
+ (let* ((sig (parse-signature))
+ (contexts (signature-context sig))
+ (simple (signature-type sig)))
+ (check-simple simple '#t "type synonym declaration")
+ (when (not (null? contexts))
+ (signal-no-context-in-synonym))
+ (require-token = (signal-missing-token "`='" "type synonym declaration"))
+ (let ((body (parse-type)))
+ (make synonym-decl (simple simple) (body body))))))
+
+(define (signal-no-context-in-synonym)
+ (parser-error 'no-context-in-synonym
+ "Context is not permitted in type synonym declaration."))
+
+(define (parse-class-decl)
+ (save-parser-context
+ (let ((supers (parse-optional-context)))
+ (token-case
+ (tycon
+ (let ((class (class->ast)))
+ (token-case
+ (tyvar
+ (let* ((class-var (token->symbol))
+ (decls (parse-where-decls)))
+ (make class-decl (class class) (super-classes supers)
+ (class-var class-var) (decls decls))))
+ (else
+ (signal-missing-token "<tyvar>" "class declaration")))))
+ (else (signal-missing-token "<tycon>" "class declaration"))))))
+
+(define (parse-instance-decl interface?)
+ (save-parser-context
+ (let ((contexts (parse-optional-context))
+ (decls '()))
+ (token-case
+ (tycon
+ (let* ((class (class->ast))
+ (simple (parse-type)))
+ (when (not interface?)
+ (setf decls (parse-where-decls)))
+ (check-simple simple '#f "instance declaration")
+ (make instance-decl (context contexts) (class class)
+ (simple simple) (decls decls))))
+ (else (signal-missing-token "<tycon>" "instance declaration"))))))