summaryrefslogtreecommitdiff
path: root/parser/token.scm
diff options
context:
space:
mode:
Diffstat (limited to 'parser/token.scm')
-rw-r--r--parser/token.scm364
1 files changed, 364 insertions, 0 deletions
diff --git a/parser/token.scm b/parser/token.scm
new file mode 100644
index 0000000..6ca9981
--- /dev/null
+++ b/parser/token.scm
@@ -0,0 +1,364 @@
+;;; This file abstracts the representation of tokens. It is used by both
+;;; the lexer & parser. This also contains routines for converting
+;;; individual tokens to ast structure. Routines used by the
+;;; token-case macro in `satisfies' clauses are here too.
+
+;;; Lexer routines for emitting tokens:
+
+(define (emit-token type . args)
+ (cond (*on-new-line?*
+ (push (list 'line *start-line* *start-col*) *tokens*))
+ (*save-col?*
+ (push (list 'col *start-col*) *tokens*)))
+ (push (cons type args) *tokens*)
+ (setf *on-new-line?* '#f)
+ (setf *save-col?* (memq type '(|where| |of| |let|))))
+
+(define (emit-token/string type string-as-list)
+ (emit-token type (list->string string-as-list)))
+
+;;; Parser routines:
+
+;;; These routines take care of the token stream in the parser. They
+;;; maintain globals for the current token and its location.
+
+;;; Globals used:
+;;; *token-stream* remaining tokens to be parsed
+;;; *token* current token type
+;;; *token-args* current token arguments
+;;; *layout-stack* columns at which layout is being done
+;;; *current-line* current line the scanner is on
+;;; *current-col* current col; valid at start of line & after where,let,of
+;;; *current-file*
+
+(define (init-token-stream tokens)
+ (setf *token-stream* tokens)
+ (setf *layout-stack* '())
+ (advance-token))
+
+(define (advance-token)
+ (cond ((null? *token-stream*)
+ (setf *token* 'eof))
+ (else
+ (let* ((token (car *token-stream*)))
+ (setf *token-stream* (cdr *token-stream*))
+ (advance-token-1 (car token) (cdr token))))))
+
+(define (advance-token-1 type args)
+ (cond ((eq? type 'file)
+ (setf *current-file* (car args))
+ (advance-token))
+ ((eq? type 'col)
+ (setf *current-col* (car args))
+ (advance-token))
+ ((eq? type 'line) ;; assume blank lines have been removed
+ (let ((line (car args))
+ (col (cadr args)))
+ (setf *current-line* line)
+ (setf *current-col* col)
+ (setf *token-stream*
+ (resolve-layout *token-stream* *layout-stack*)))
+ (advance-token))
+ (else
+ (setf *token* type)
+ (setf *token-args* args)
+ type)))
+
+(define (insert-extra-token tok-type stream) ; used by layout
+ (cons (list tok-type) stream))
+
+;;; This looks for the { to decide of layout will apply. If so, the layout
+;;; stack is pushed. The body function, fn, is called with a boolean which
+;;; tells it the whether layout rule is in force.
+
+;;; *** The CMU CL compiler barfs with some kind of internal error
+;;; *** on this function. See the revised definition below.
+
+;(define (start-layout fn)
+; (token-case
+; (\{ (funcall fn '#f))
+; (else
+; (let/cc recovery-fn
+; (push (cons *current-col* (lambda ()
+; (let ((res (funcall fn '#t)))
+; (funcall recovery-fn res))))
+; *layout-stack*)
+; (funcall fn '#t)))))
+
+(define (start-layout fn)
+ (token-case
+ (\{ (funcall fn '#f))
+ (else
+ (let/cc recovery-fn
+ (start-layout-1 fn recovery-fn)))))
+
+(define (start-layout-1 fn recovery-fn)
+ (push (cons *current-col*
+ (lambda ()
+ (let ((res (funcall fn '#t)))
+ (funcall recovery-fn res))))
+ *layout-stack*)
+ (funcall fn '#t))
+
+(define (layout-col x)
+ (car x))
+
+(define (layout-recovery-fn x)
+ (cdr x))
+
+(define (close-layout in-layout?)
+ (cond (in-layout?
+ (setf *layout-stack* (cdr *layout-stack*))
+ (token-case
+ ($\} '()) ; the advance-token routine may have inserted this
+ (else '())))
+ (else
+ (token-case
+ (\} '())
+ (else
+ (signal-missing-brace))))))
+
+(define (signal-missing-brace)
+ (parser-error 'missing-brace
+ "Missing `}'."))
+
+(define (resolve-layout stream layout-stack)
+ (if (null? layout-stack)
+ stream
+ (let ((col (layout-col (car layout-stack))))
+ (declare (type fixnum col))
+ (cond ((= (the fixnum *current-col*) col)
+ (insert-extra-token '\; stream))
+ ((< (the fixnum *current-col*) col)
+ (insert-extra-token
+ '$\} (resolve-layout stream (cdr layout-stack))))
+ (else
+ stream)
+ ))))
+
+
+;;; The following routines are used for backtracking. This is a bit of
+;;; a hack at the moment.
+
+(define (save-scanner-state)
+ (vector *token* *token-args* *token-stream* *layout-stack* *current-line*
+ *current-col*))
+
+(define (restore-excursion state)
+ (setf *token* (vector-ref state 0))
+ (setf *token-args* (vector-ref state 1))
+ (setf *token-stream* (vector-ref state 2))
+ (setf *layout-stack* (vector-ref state 3))
+ (setf *current-line* (vector-ref state 4))
+ (setf *current-col* (vector-ref state 5)))
+
+(define (eq-token? type)
+ (eq? type *token*))
+
+(define (eq-token-arg? str)
+ (string=? str (car *token-args*)))
+
+;;; lookahead into the token stream
+
+(define (peek-1-type)
+ (peek-toks 0 *token-stream*))
+
+(define (peek-2-type)
+ (peek-toks 1 *token-stream*))
+
+;;; This is a Q&D way of looking ahead. It does not expand the layout
+;;; as it goes so there may be missing } and ;. This should not matter
+;;; in the places where this is used since these would be invalid anyway.
+;;; To be safe, token types are rechecked while advancing to verify the
+;;; lookahead.
+
+(define (peek-toks n toks)
+ (declare (type fixnum n))
+ (cond ((null? toks)
+ 'eof)
+ ((memq (caar toks) '(col line))
+ (peek-toks n (cdr toks)))
+ ((eqv? n 0)
+ (caar toks))
+ (else (peek-toks (1- n) (cdr toks)))))
+
+;; These routines handle the `satisfies' clauses used in token-case.
+
+(define (at-varsym/+?)
+ (and (eq? *token* 'varsym)
+ (string=? (car *token-args*) "+")))
+
+(define (at-varsym/-?)
+ (and (eq? *token* 'varsym)
+ (string=? (car *token-args*) "-")))
+
+(define (at-varsym/paren?)
+ (and (eq? *token* '\()
+ (eq? (peek-1-type) 'varsym)
+ (eq? (peek-2-type) '\))))
+
+(define (at-consym/paren?)
+ (and (eq? *token* '\()
+ (eq? (peek-1-type) 'consym)
+ (eq? (peek-2-type) '\))))
+
+(define (at-varid/quoted?)
+ (and (eq? *token* '\`)
+ (eq? (peek-1-type) 'varid)))
+
+(define (at-conid/quoted?)
+ (and (eq? *token* '\`)
+ (eq? (peek-1-type) 'conid)))
+
+(define (at-+k?)
+ (and (at-varsym/+?)
+ (eq? (peek-1-type) 'integer)))
+
+(define (at--n?)
+ (and (at-varsym/-?)
+ (memq (peek-1-type) '(integer float))))
+
+;;; The following routines convert the simplest tokens to AST structure.
+
+(define-local-syntax (return+advance x)
+ `(let ((x ,x))
+ (advance-token)
+ x))
+
+(define (token->symbol)
+ (return+advance
+ (string->symbol (car *token-args*))))
+
+(define (token->symbol/con) ; for conid, aconid
+ (return+advance
+ (string->symbol (add-con-prefix (car *token-args*)))))
+
+(define (var->symbol)
+ (token-case
+ (\( (token-case
+ (varsym?
+ (let ((res (token->symbol)))
+ (token-case
+ (\) res)
+ (else (signal-missing-token "`)'" "var")))))
+ (else (signal-missing-token "<varsym>" "var"))))
+ (varid? (token->symbol))))
+
+(define (var->ast)
+ (let ((vname (var->symbol)))
+ (make var-ref (name vname) (infix? '#f) (var *undefined-def*))))
+
+(define (var->entity)
+ (let ((vname (var->symbol)))
+ (make entity-var (name vname))))
+
+(define (con->symbol)
+ (token-case
+ (\( (token-case
+ (consym?
+ (let ((res (token->symbol/con)))
+ (token-case
+ (\) res)
+ (else (signal-missing-token "`)'" "con")))))
+ (else (signal-missing-token "<consym>" "con"))))
+ (conid? (token->symbol/con))))
+
+(define (varop->symbol)
+ (token-case
+ (\` (token-case
+ (varid?
+ (let ((res (token->symbol)))
+ (token-case
+ (\` res)
+ (else (signal-missing-token "``'" "varop")))))
+ (else (signal-missing-token "<varid>" "varop"))))
+ (varsym? (token->symbol))))
+
+(define (varop->ast)
+ (let ((varop-name (varop->symbol)))
+ (make var-ref (name varop-name) (infix? '#t) (var *undefined-def*))))
+
+(define (conop->symbol)
+ (token-case
+ (\` (token-case
+ (conid?
+ (let ((res (token->symbol/con)))
+ (token-case
+ (\` res)
+ (else (signal-missing-token "``'" "conop")))))
+ (else (signal-missing-token "<conid>" "conop"))))
+ (consym? (token->symbol/con))))
+
+(define (conop->ast)
+ (let ((conop-name (conop->symbol)))
+ (make con-ref (name conop-name) (infix? '#t) (con *undefined-def*))))
+
+(define (op->symbol)
+ (token-case
+ (\` (token-case
+ (conid?
+ (let ((res (token->symbol/con)))
+ (token-case
+ (\` res)
+ (else (signal-missing-token "``'" "op")))))
+ (varid?
+ (let ((res (token->symbol)))
+ (token-case
+ (\` res)
+ (else (signal-missing-token "``'" "op")))))
+ (else (signal-missing-token "<conid> or <varid>" "op"))))
+ (consym? (token->symbol/con))
+ (varsym? (token->symbol))))
+
+(define (con->ast) ; for conid, aconid
+ (let ((name (con->symbol)))
+ (make con-ref (name name) (con *undefined-def*) (infix? '#f))))
+
+(define (pcon->ast) ; for aconid, conid
+ (let ((name (con->symbol)))
+ (make pcon (name name) (con *undefined-def*) (pats '()) (infix? '#f))))
+
+(define (pconop->ast) ; for aconop, conop
+ (let ((name (conop->symbol)))
+ (make pcon (name name) (con *undefined-def*) (pats '()) (infix? '#t))))
+
+(define (tycon->ast) ; for aconid
+ (let ((name (token->symbol)))
+ (make tycon (name name) (def *undefined-def*) (args '()))))
+
+(define (class->ast) ; for aconid
+ (let ((name (token->symbol)))
+ (make class-ref (name name) (class *undefined-def*))))
+
+(define (tyvar->ast) ; for avarid
+ (let ((name (token->symbol)))
+ (make tyvar (name name))))
+
+(define (token->integer) ; for integer
+ (return+advance
+ (car *token-args*)))
+
+(define (integer->ast) ; for integer
+ (return+advance
+ (make integer-const (value (car *token-args*)))))
+
+(define (float->ast)
+ (return+advance
+ (make float-const (numerator (car *token-args*))
+ (denominator (cadr *token-args*))
+ (exponent (caddr *token-args*)))))
+
+(define (string->ast)
+ (return+advance
+ (make string-const (value (car *token-args*)))))
+
+(define (char->ast)
+ (return+advance
+ (make char-const (value (car *token-args*)))))
+
+(define (literal->ast)
+ (token-case
+ ((no-advance integer) (integer->ast))
+ ((no-advance float) (float->ast))
+ ((no-advance string) (string->ast))
+ ((no-advance char) (char->ast))))