From 4e987026148fe65c323afbc93cd560c07bf06b3f Mon Sep 17 00:00:00 2001 From: Yale AI Dept Date: Wed, 14 Jul 1993 13:08:00 -0500 Subject: Import to github. --- parser/token.scm | 364 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 364 insertions(+) create mode 100644 parser/token.scm (limited to 'parser/token.scm') diff --git a/parser/token.scm b/parser/token.scm new file mode 100644 index 0000000..6ca9981 --- /dev/null +++ b/parser/token.scm @@ -0,0 +1,364 @@ +;;; This file abstracts the representation of tokens. It is used by both +;;; the lexer & parser. This also contains routines for converting +;;; individual tokens to ast structure. Routines used by the +;;; token-case macro in `satisfies' clauses are here too. + +;;; Lexer routines for emitting tokens: + +(define (emit-token type . args) + (cond (*on-new-line?* + (push (list 'line *start-line* *start-col*) *tokens*)) + (*save-col?* + (push (list 'col *start-col*) *tokens*))) + (push (cons type args) *tokens*) + (setf *on-new-line?* '#f) + (setf *save-col?* (memq type '(|where| |of| |let|)))) + +(define (emit-token/string type string-as-list) + (emit-token type (list->string string-as-list))) + +;;; Parser routines: + +;;; These routines take care of the token stream in the parser. They +;;; maintain globals for the current token and its location. + +;;; Globals used: +;;; *token-stream* remaining tokens to be parsed +;;; *token* current token type +;;; *token-args* current token arguments +;;; *layout-stack* columns at which layout is being done +;;; *current-line* current line the scanner is on +;;; *current-col* current col; valid at start of line & after where,let,of +;;; *current-file* + +(define (init-token-stream tokens) + (setf *token-stream* tokens) + (setf *layout-stack* '()) + (advance-token)) + +(define (advance-token) + (cond ((null? *token-stream*) + (setf *token* 'eof)) + (else + (let* ((token (car *token-stream*))) + (setf *token-stream* (cdr *token-stream*)) + (advance-token-1 (car token) (cdr token)))))) + +(define (advance-token-1 type args) + (cond ((eq? type 'file) + (setf *current-file* (car args)) + (advance-token)) + ((eq? type 'col) + (setf *current-col* (car args)) + (advance-token)) + ((eq? type 'line) ;; assume blank lines have been removed + (let ((line (car args)) + (col (cadr args))) + (setf *current-line* line) + (setf *current-col* col) + (setf *token-stream* + (resolve-layout *token-stream* *layout-stack*))) + (advance-token)) + (else + (setf *token* type) + (setf *token-args* args) + type))) + +(define (insert-extra-token tok-type stream) ; used by layout + (cons (list tok-type) stream)) + +;;; This looks for the { to decide of layout will apply. If so, the layout +;;; stack is pushed. The body function, fn, is called with a boolean which +;;; tells it the whether layout rule is in force. + +;;; *** The CMU CL compiler barfs with some kind of internal error +;;; *** on this function. See the revised definition below. + +;(define (start-layout fn) +; (token-case +; (\{ (funcall fn '#f)) +; (else +; (let/cc recovery-fn +; (push (cons *current-col* (lambda () +; (let ((res (funcall fn '#t))) +; (funcall recovery-fn res)))) +; *layout-stack*) +; (funcall fn '#t))))) + +(define (start-layout fn) + (token-case + (\{ (funcall fn '#f)) + (else + (let/cc recovery-fn + (start-layout-1 fn recovery-fn))))) + +(define (start-layout-1 fn recovery-fn) + (push (cons *current-col* + (lambda () + (let ((res (funcall fn '#t))) + (funcall recovery-fn res)))) + *layout-stack*) + (funcall fn '#t)) + +(define (layout-col x) + (car x)) + +(define (layout-recovery-fn x) + (cdr x)) + +(define (close-layout in-layout?) + (cond (in-layout? + (setf *layout-stack* (cdr *layout-stack*)) + (token-case + ($\} '()) ; the advance-token routine may have inserted this + (else '()))) + (else + (token-case + (\} '()) + (else + (signal-missing-brace)))))) + +(define (signal-missing-brace) + (parser-error 'missing-brace + "Missing `}'.")) + +(define (resolve-layout stream layout-stack) + (if (null? layout-stack) + stream + (let ((col (layout-col (car layout-stack)))) + (declare (type fixnum col)) + (cond ((= (the fixnum *current-col*) col) + (insert-extra-token '\; stream)) + ((< (the fixnum *current-col*) col) + (insert-extra-token + '$\} (resolve-layout stream (cdr layout-stack)))) + (else + stream) + )))) + + +;;; The following routines are used for backtracking. This is a bit of +;;; a hack at the moment. + +(define (save-scanner-state) + (vector *token* *token-args* *token-stream* *layout-stack* *current-line* + *current-col*)) + +(define (restore-excursion state) + (setf *token* (vector-ref state 0)) + (setf *token-args* (vector-ref state 1)) + (setf *token-stream* (vector-ref state 2)) + (setf *layout-stack* (vector-ref state 3)) + (setf *current-line* (vector-ref state 4)) + (setf *current-col* (vector-ref state 5))) + +(define (eq-token? type) + (eq? type *token*)) + +(define (eq-token-arg? str) + (string=? str (car *token-args*))) + +;;; lookahead into the token stream + +(define (peek-1-type) + (peek-toks 0 *token-stream*)) + +(define (peek-2-type) + (peek-toks 1 *token-stream*)) + +;;; This is a Q&D way of looking ahead. It does not expand the layout +;;; as it goes so there may be missing } and ;. This should not matter +;;; in the places where this is used since these would be invalid anyway. +;;; To be safe, token types are rechecked while advancing to verify the +;;; lookahead. + +(define (peek-toks n toks) + (declare (type fixnum n)) + (cond ((null? toks) + 'eof) + ((memq (caar toks) '(col line)) + (peek-toks n (cdr toks))) + ((eqv? n 0) + (caar toks)) + (else (peek-toks (1- n) (cdr toks))))) + +;; These routines handle the `satisfies' clauses used in token-case. + +(define (at-varsym/+?) + (and (eq? *token* 'varsym) + (string=? (car *token-args*) "+"))) + +(define (at-varsym/-?) + (and (eq? *token* 'varsym) + (string=? (car *token-args*) "-"))) + +(define (at-varsym/paren?) + (and (eq? *token* '\() + (eq? (peek-1-type) 'varsym) + (eq? (peek-2-type) '\)))) + +(define (at-consym/paren?) + (and (eq? *token* '\() + (eq? (peek-1-type) 'consym) + (eq? (peek-2-type) '\)))) + +(define (at-varid/quoted?) + (and (eq? *token* '\`) + (eq? (peek-1-type) 'varid))) + +(define (at-conid/quoted?) + (and (eq? *token* '\`) + (eq? (peek-1-type) 'conid))) + +(define (at-+k?) + (and (at-varsym/+?) + (eq? (peek-1-type) 'integer))) + +(define (at--n?) + (and (at-varsym/-?) + (memq (peek-1-type) '(integer float)))) + +;;; The following routines convert the simplest tokens to AST structure. + +(define-local-syntax (return+advance x) + `(let ((x ,x)) + (advance-token) + x)) + +(define (token->symbol) + (return+advance + (string->symbol (car *token-args*)))) + +(define (token->symbol/con) ; for conid, aconid + (return+advance + (string->symbol (add-con-prefix (car *token-args*))))) + +(define (var->symbol) + (token-case + (\( (token-case + (varsym? + (let ((res (token->symbol))) + (token-case + (\) res) + (else (signal-missing-token "`)'" "var"))))) + (else (signal-missing-token "" "var")))) + (varid? (token->symbol)))) + +(define (var->ast) + (let ((vname (var->symbol))) + (make var-ref (name vname) (infix? '#f) (var *undefined-def*)))) + +(define (var->entity) + (let ((vname (var->symbol))) + (make entity-var (name vname)))) + +(define (con->symbol) + (token-case + (\( (token-case + (consym? + (let ((res (token->symbol/con))) + (token-case + (\) res) + (else (signal-missing-token "`)'" "con"))))) + (else (signal-missing-token "" "con")))) + (conid? (token->symbol/con)))) + +(define (varop->symbol) + (token-case + (\` (token-case + (varid? + (let ((res (token->symbol))) + (token-case + (\` res) + (else (signal-missing-token "``'" "varop"))))) + (else (signal-missing-token "" "varop")))) + (varsym? (token->symbol)))) + +(define (varop->ast) + (let ((varop-name (varop->symbol))) + (make var-ref (name varop-name) (infix? '#t) (var *undefined-def*)))) + +(define (conop->symbol) + (token-case + (\` (token-case + (conid? + (let ((res (token->symbol/con))) + (token-case + (\` res) + (else (signal-missing-token "``'" "conop"))))) + (else (signal-missing-token "" "conop")))) + (consym? (token->symbol/con)))) + +(define (conop->ast) + (let ((conop-name (conop->symbol))) + (make con-ref (name conop-name) (infix? '#t) (con *undefined-def*)))) + +(define (op->symbol) + (token-case + (\` (token-case + (conid? + (let ((res (token->symbol/con))) + (token-case + (\` res) + (else (signal-missing-token "``'" "op"))))) + (varid? + (let ((res (token->symbol))) + (token-case + (\` res) + (else (signal-missing-token "``'" "op"))))) + (else (signal-missing-token " or " "op")))) + (consym? (token->symbol/con)) + (varsym? (token->symbol)))) + +(define (con->ast) ; for conid, aconid + (let ((name (con->symbol))) + (make con-ref (name name) (con *undefined-def*) (infix? '#f)))) + +(define (pcon->ast) ; for aconid, conid + (let ((name (con->symbol))) + (make pcon (name name) (con *undefined-def*) (pats '()) (infix? '#f)))) + +(define (pconop->ast) ; for aconop, conop + (let ((name (conop->symbol))) + (make pcon (name name) (con *undefined-def*) (pats '()) (infix? '#t)))) + +(define (tycon->ast) ; for aconid + (let ((name (token->symbol))) + (make tycon (name name) (def *undefined-def*) (args '())))) + +(define (class->ast) ; for aconid + (let ((name (token->symbol))) + (make class-ref (name name) (class *undefined-def*)))) + +(define (tyvar->ast) ; for avarid + (let ((name (token->symbol))) + (make tyvar (name name)))) + +(define (token->integer) ; for integer + (return+advance + (car *token-args*))) + +(define (integer->ast) ; for integer + (return+advance + (make integer-const (value (car *token-args*))))) + +(define (float->ast) + (return+advance + (make float-const (numerator (car *token-args*)) + (denominator (cadr *token-args*)) + (exponent (caddr *token-args*))))) + +(define (string->ast) + (return+advance + (make string-const (value (car *token-args*))))) + +(define (char->ast) + (return+advance + (make char-const (value (car *token-args*))))) + +(define (literal->ast) + (token-case + ((no-advance integer) (integer->ast)) + ((no-advance float) (float->ast)) + ((no-advance string) (string->ast)) + ((no-advance char) (char->ast)))) -- cgit v1.2.3