diff options
author | Ludovic Courtès <ludo@gnu.org> | 2010-03-31 00:41:28 +0200 |
---|---|---|
committer | Ludovic Courtès <ludo@gnu.org> | 2010-03-31 00:41:59 +0200 |
commit | 1b10152215db2ed381bd63c8c234eb44eb7ed414 (patch) | |
tree | 6e8fc7d5c965ba8bca3a62a34e382feb25ba69ae | |
parent | 3ffd1ba96e986581d97079308fc15ef1fc933cdb (diff) |
Add Boucher's `lalr-scm' as the `(system base lalr)' module.
Taken from r51 of <http://lalr-scm.googlecode.com/svn/trunk>.
* module/Makefile.am (SYSTEM_BASE_SOURCES): Add `system/base/lalr.scm'.
(NOCOMP_SOURCES): Add `system/base/lalr.upstream.scm'.
* module/system/base/lalr.scm, module/system/base/lalr.upstream.scm: New
files.
* test-suite/Makefile.am (LALR_TESTS, LALR_EXTRA, TESTS,
TESTS_ENVIRONMENT): New variables.
(EXTRA_DIST): Add $(LALR_EXTRA).
* test-suite/lalr/common-test.scm,
test-suite/lalr/glr-test.scm,
test-suite/lalr/test-glr-associativity.scm,
test-suite/lalr/test-glr-basics-01.scm,
test-suite/lalr/test-glr-basics-02.scm,
test-suite/lalr/test-glr-basics-03.scm,
test-suite/lalr/test-glr-basics-04.scm,
test-suite/lalr/test-glr-basics-05.scm,
test-suite/lalr/test-glr-script-expression.scm,
test-suite/lalr/test-glr-single-expressions.scm,
test-suite/lalr/test-lr-associativity-01.scm,
test-suite/lalr/test-lr-associativity-02.scm,
test-suite/lalr/test-lr-associativity-03.scm,
test-suite/lalr/test-lr-associativity-04.scm,
test-suite/lalr/test-lr-basics-01.scm,
test-suite/lalr/test-lr-basics-02.scm,
test-suite/lalr/test-lr-basics-03.scm,
test-suite/lalr/test-lr-basics-04.scm,
test-suite/lalr/test-lr-basics-05.scm,
test-suite/lalr/test-lr-error-recovery-01.scm,
test-suite/lalr/test-lr-error-recovery-02.scm,
test-suite/lalr/test-lr-no-clause.scm,
test-suite/lalr/test-lr-script-expression.scm,
test-suite/lalr/test-lr-single-expressions.scm: New files.
29 files changed, 3759 insertions, 1 deletions
diff --git a/module/Makefile.am b/module/Makefile.am index 0ee2d1ce6..bae73168d 100644 --- a/module/Makefile.am +++ b/module/Makefile.am @@ -165,11 +165,12 @@ SCRIPTS_SOURCES = \ scripts/read-rfc822.scm \ scripts/snarf-guile-m4-docs.scm -SYSTEM_BASE_SOURCES = \ +SYSTEM_BASE_SOURCES = \ system/base/pmatch.scm \ system/base/syntax.scm \ system/base/compile.scm \ system/base/language.scm \ + system/base/lalr.scm \ system/base/message.scm ICE_9_SOURCES = \ @@ -316,6 +317,7 @@ NOCOMP_SOURCES = \ ice-9/gds-client.scm \ ice-9/psyntax.scm \ ice-9/quasisyntax.scm \ + system/base/lalr.upstream.scm \ system/repl/describe.scm \ ice-9/debugger/command-loop.scm \ ice-9/debugger/commands.scm \ diff --git a/module/system/base/lalr.scm b/module/system/base/lalr.scm new file mode 100644 index 000000000..8383a6f6f --- /dev/null +++ b/module/system/base/lalr.scm @@ -0,0 +1,45 @@ +;;; -*- mode: scheme; coding: utf-8; -*- +;;; +;;; Copyright (C) 2010 Free Software Foundation, Inc. +;;; +;;; This library is free software; you can redistribute it and/or modify it +;;; under the terms of the GNU Lesser General Public License as published by +;;; the Free Software Foundation; either version 3 of the License, or (at +;;; your option) any later version. +;;; +;;; This library is distributed in the hope that it will be useful, but +;;; WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser +;;; General Public License for more details. +;;; +;;; You should have received a copy of the GNU Lesser General Public License +;;; along with this program. If not, see <http://www.gnu.org/licenses/>. + +(define-module (system base lalr) + + ;; XXX: In theory this import is not needed but the evaluator (not the + ;; compiler) complains about `lexical-token' being unbound when expanding + ;; `(define-record-type lexical-token ...)' if we omit it. + #:use-module (srfi srfi-9) + + #:export (lalr-parser print-states + + make-lexical-token lexical-token? + lexical-token-category + lexical-token-source + lexical-token-value + + make-source-location source-location? + source-location-input + source-location-line + source-location-column + source-location-offset + source-location-length + + ;; `lalr-parser' is a defmacro, which produces code that refers to + ;; these drivers. + lr-driver glr-driver)) + +;; The LALR parser generator was written by Dominique Boucher. It's available +;; from http://code.google.com/p/lalr-scm/ and released under the LGPLv3+. +(include-from-path "system/base/lalr.upstream.scm") diff --git a/module/system/base/lalr.upstream.scm b/module/system/base/lalr.upstream.scm new file mode 100755 index 000000000..217c43980 --- /dev/null +++ b/module/system/base/lalr.upstream.scm @@ -0,0 +1,2077 @@ +;;; +;;;; An Efficient and Portable LALR(1) Parser Generator for Scheme +;;; +;; Copyright 1993, 2010 Dominique Boucher +;; +;; This program is free software: you can redistribute it and/or +;; modify it under the terms of the GNU Lesser General Public License +;; as published by the Free Software Foundation, either version 3 of +;; the License, or (at your option) any later version. +;; +;; This program is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU Lesser General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with this program. If not, see <http://www.gnu.org/licenses/>. + + +(define *lalr-scm-version* "2.4.1") + + +(cond-expand + + ;; -- Gambit-C + (gambit + + (define-macro (def-macro form . body) + `(define-macro ,form (let () ,@body))) + + (def-macro (BITS-PER-WORD) 28) + (def-macro (logical-or x . y) `(bitwise-ior ,x ,@y)) + (def-macro (lalr-error msg obj) `(error ,msg ,obj)) + + (define pprint pretty-print) + (define lalr-keyword? keyword?)) + + ;; -- + (bigloo + (define-macro (def-macro form . body) + `(define-macro ,form (let () ,@body))) + + (define pprint (lambda (obj) (write obj) (newline))) + (define lalr-keyword? keyword?) + (def-macro (BITS-PER-WORD) 29) + (def-macro (logical-or x . y) `(bit-or ,x ,@y)) + (def-macro (lalr-error msg obj) `(error "lalr-parser" ,msg ,obj))) + + ;; -- Chicken + (chicken + + (define-macro (def-macro form . body) + `(define-macro ,form (let () ,@body))) + + (define pprint pretty-print) + (define lalr-keyword? symbol?) + (def-macro (BITS-PER-WORD) 30) + (def-macro (logical-or x . y) `(bitwise-ior ,x ,@y)) + (def-macro (lalr-error msg obj) `(error ,msg ,obj))) + + ;; -- STKlos + (stklos + (require "pp") + + (define (pprint form) (pp form :port (current-output-port))) + + (define lalr-keyword? keyword?) + (define-macro (BITS-PER-WORD) 30) + (define-macro (logical-or x . y) `(bit-or ,x ,@y)) + (define-macro (lalr-error msg obj) `(error 'lalr-parser ,msg ,obj))) + + ;; -- Guile + (guile + (use-modules (ice-9 pretty-print)) + (use-modules (srfi srfi-9)) + + (define pprint pretty-print) + (define lalr-keyword? symbol?) + (define-macro (BITS-PER-WORD) 30) + (define-macro (logical-or x . y) `(logior ,x ,@y)) + (define-macro (lalr-error msg obj) `(error ,msg ,obj))) + + ;; -- Kawa + (kawa + (require 'pretty-print) + (define (BITS-PER-WORD) 30) + (define logical-or logior) + (define (lalr-keyword? obj) (keyword? obj)) + (define (pprint obj) (pretty-print obj)) + (define (lalr-error msg obj) (error msg obj))) + + ;; -- SISC + (sisc + (import logicops) + (import record) + + (define pprint pretty-print) + (define lalr-keyword? symbol?) + (define-macro BITS-PER-WORD (lambda () 32)) + (define-macro logical-or (lambda (x . y) `(logor ,x ,@y))) + (define-macro (lalr-error msg obj) `(error "~a ~S:" ,msg ,obj))) + + + (else + (error "Unsupported Scheme system"))) + + +(define-record-type lexical-token + (make-lexical-token category source value) + lexical-token? + (category lexical-token-category) + (source lexical-token-source) + (value lexical-token-value)) + + +(define-record-type source-location + (make-source-location input line column offset length) + source-location? + (input source-location-input) + (line source-location-line) + (column source-location-column) + (offset source-location-offset) + (length source-location-length)) + + + + ;; - Macros pour la gestion des vecteurs de bits + +(define-macro (lalr-parser . arguments) + (define (set-bit v b) + (let ((x (quotient b (BITS-PER-WORD))) + (y (expt 2 (remainder b (BITS-PER-WORD))))) + (vector-set! v x (logical-or (vector-ref v x) y)))) + + (define (bit-union v1 v2 n) + (do ((i 0 (+ i 1))) + ((= i n)) + (vector-set! v1 i (logical-or (vector-ref v1 i) + (vector-ref v2 i))))) + + ;; - Macro pour les structures de donnees + + (define (new-core) (make-vector 4 0)) + (define (set-core-number! c n) (vector-set! c 0 n)) + (define (set-core-acc-sym! c s) (vector-set! c 1 s)) + (define (set-core-nitems! c n) (vector-set! c 2 n)) + (define (set-core-items! c i) (vector-set! c 3 i)) + (define (core-number c) (vector-ref c 0)) + (define (core-acc-sym c) (vector-ref c 1)) + (define (core-nitems c) (vector-ref c 2)) + (define (core-items c) (vector-ref c 3)) + + (define (new-shift) (make-vector 3 0)) + (define (set-shift-number! c x) (vector-set! c 0 x)) + (define (set-shift-nshifts! c x) (vector-set! c 1 x)) + (define (set-shift-shifts! c x) (vector-set! c 2 x)) + (define (shift-number s) (vector-ref s 0)) + (define (shift-nshifts s) (vector-ref s 1)) + (define (shift-shifts s) (vector-ref s 2)) + + (define (new-red) (make-vector 3 0)) + (define (set-red-number! c x) (vector-set! c 0 x)) + (define (set-red-nreds! c x) (vector-set! c 1 x)) + (define (set-red-rules! c x) (vector-set! c 2 x)) + (define (red-number c) (vector-ref c 0)) + (define (red-nreds c) (vector-ref c 1)) + (define (red-rules c) (vector-ref c 2)) + + + (define (new-set nelem) + (make-vector nelem 0)) + + + (define (vector-map f v) + (let ((vm-n (- (vector-length v) 1))) + (let loop ((vm-low 0) (vm-high vm-n)) + (if (= vm-low vm-high) + (vector-set! v vm-low (f (vector-ref v vm-low) vm-low)) + (let ((vm-middle (quotient (+ vm-low vm-high) 2))) + (loop vm-low vm-middle) + (loop (+ vm-middle 1) vm-high)))))) + + + ;; - Constantes + (define STATE-TABLE-SIZE 1009) + + + ;; - Tableaux + (define rrhs #f) + (define rlhs #f) + (define ritem #f) + (define nullable #f) + (define derives #f) + (define fderives #f) + (define firsts #f) + (define kernel-base #f) + (define kernel-end #f) + (define shift-symbol #f) + (define shift-set #f) + (define red-set #f) + (define state-table #f) + (define acces-symbol #f) + (define reduction-table #f) + (define shift-table #f) + (define consistent #f) + (define lookaheads #f) + (define LA #f) + (define LAruleno #f) + (define lookback #f) + (define goto-map #f) + (define from-state #f) + (define to-state #f) + (define includes #f) + (define F #f) + (define action-table #f) + + ;; - Variables + (define nitems #f) + (define nrules #f) + (define nvars #f) + (define nterms #f) + (define nsyms #f) + (define nstates #f) + (define first-state #f) + (define last-state #f) + (define final-state #f) + (define first-shift #f) + (define last-shift #f) + (define first-reduction #f) + (define last-reduction #f) + (define nshifts #f) + (define maxrhs #f) + (define ngotos #f) + (define token-set-size #f) + + (define driver-name 'lr-driver) + + (define (gen-tables! tokens gram ) + (initialize-all) + (rewrite-grammar + tokens + gram + (lambda (terms terms/prec vars gram gram/actions) + (set! the-terminals/prec (list->vector terms/prec)) + (set! the-terminals (list->vector terms)) + (set! the-nonterminals (list->vector vars)) + (set! nterms (length terms)) + (set! nvars (length vars)) + (set! nsyms (+ nterms nvars)) + (let ((no-of-rules (length gram/actions)) + (no-of-items (let loop ((l gram/actions) (count 0)) + (if (null? l) + count + (loop (cdr l) (+ count (length (caar l)))))))) + (pack-grammar no-of-rules no-of-items gram) + (set-derives) + (set-nullable) + (generate-states) + (lalr) + (build-tables) + (compact-action-table terms) + gram/actions)))) + + + (define (initialize-all) + (set! rrhs #f) + (set! rlhs #f) + (set! ritem #f) + (set! nullable #f) + (set! derives #f) + (set! fderives #f) + (set! firsts #f) + (set! kernel-base #f) + (set! kernel-end #f) + (set! shift-symbol #f) + (set! shift-set #f) + (set! red-set #f) + (set! state-table (make-vector STATE-TABLE-SIZE '())) + (set! acces-symbol #f) + (set! reduction-table #f) + (set! shift-table #f) + (set! consistent #f) + (set! lookaheads #f) + (set! LA #f) + (set! LAruleno #f) + (set! lookback #f) + (set! goto-map #f) + (set! from-state #f) + (set! to-state #f) + (set! includes #f) + (set! F #f) + (set! action-table #f) + (set! nstates #f) + (set! first-state #f) + (set! last-state #f) + (set! final-state #f) + (set! first-shift #f) + (set! last-shift #f) + (set! first-reduction #f) + (set! last-reduction #f) + (set! nshifts #f) + (set! maxrhs #f) + (set! ngotos #f) + (set! token-set-size #f) + (set! rule-precedences '())) + + + (define (pack-grammar no-of-rules no-of-items gram) + (set! nrules (+ no-of-rules 1)) + (set! nitems no-of-items) + (set! rlhs (make-vector nrules #f)) + (set! rrhs (make-vector nrules #f)) + (set! ritem (make-vector (+ 1 nitems) #f)) + + (let loop ((p gram) (item-no 0) (rule-no 1)) + (if (not (null? p)) + (let ((nt (caar p))) + (let loop2 ((prods (cdar p)) (it-no2 item-no) (rl-no2 rule-no)) + (if (null? prods) + (loop (cdr p) it-no2 rl-no2) + (begin + (vector-set! rlhs rl-no2 nt) + (vector-set! rrhs rl-no2 it-no2) + (let loop3 ((rhs (car prods)) (it-no3 it-no2)) + (if (null? rhs) + (begin + (vector-set! ritem it-no3 (- rl-no2)) + (loop2 (cdr prods) (+ it-no3 1) (+ rl-no2 1))) + (begin + (vector-set! ritem it-no3 (car rhs)) + (loop3 (cdr rhs) (+ it-no3 1)))))))))))) + + + (define (set-derives) + (define delts (make-vector (+ nrules 1) 0)) + (define dset (make-vector nvars -1)) + + (let loop ((i 1) (j 0)) ; i = 0 + (if (< i nrules) + (let ((lhs (vector-ref rlhs i))) + (if (>= lhs 0) + (begin + (vector-set! delts j (cons i (vector-ref dset lhs))) + (vector-set! dset lhs j) + (loop (+ i 1) (+ j 1))) + (loop (+ i 1) j))))) + + (set! derives (make-vector nvars 0)) + + (let loop ((i 0)) + (if (< i nvars) + (let ((q (let loop2 ((j (vector-ref dset i)) (s '())) + (if (< j 0) + s + (let ((x (vector-ref delts j))) + (loop2 (cdr x) (cons (car x) s))))))) + (vector-set! derives i q) + (loop (+ i 1)))))) + + + + (define (set-nullable) + (set! nullable (make-vector nvars #f)) + (let ((squeue (make-vector nvars #f)) + (rcount (make-vector (+ nrules 1) 0)) + (rsets (make-vector nvars #f)) + (relts (make-vector (+ nitems nvars 1) #f))) + (let loop ((r 0) (s2 0) (p 0)) + (let ((*r (vector-ref ritem r))) + (if *r + (if (< *r 0) + (let ((symbol (vector-ref rlhs (- *r)))) + (if (and (>= symbol 0) + (not (vector-ref nullable symbol))) + (begin + (vector-set! nullable symbol #t) + (vector-set! squeue s2 symbol) + (loop (+ r 1) (+ s2 1) p)))) + (let loop2 ((r1 r) (any-tokens #f)) + (let* ((symbol (vector-ref ritem r1))) + (if (> symbol 0) + (loop2 (+ r1 1) (or any-tokens (>= symbol nvars))) + (if (not any-tokens) + (let ((ruleno (- symbol))) + (let loop3 ((r2 r) (p2 p)) + (let ((symbol (vector-ref ritem r2))) + (if (> symbol 0) + (begin + (vector-set! rcount ruleno + (+ (vector-ref rcount ruleno) 1)) + (vector-set! relts p2 + (cons (vector-ref rsets symbol) + ruleno)) + (vector-set! rsets symbol p2) + (loop3 (+ r2 1) (+ p2 1))) + (loop (+ r2 1) s2 p2))))) + (loop (+ r1 1) s2 p)))))) + (let loop ((s1 0) (s3 s2)) + (if (< s1 s3) + (let loop2 ((p (vector-ref rsets (vector-ref squeue s1))) (s4 s3)) + (if p + (let* ((x (vector-ref relts p)) + (ruleno (cdr x)) + (y (- (vector-ref rcount ruleno) 1))) + (vector-set! rcount ruleno y) + (if (= y 0) + (let ((symbol (vector-ref rlhs ruleno))) + (if (and (>= symbol 0) + (not (vector-ref nullable symbol))) + (begin + (vector-set! nullable symbol #t) + (vector-set! squeue s4 symbol) + (loop2 (car x) (+ s4 1))) + (loop2 (car x) s4))) + (loop2 (car x) s4)))) + (loop (+ s1 1) s4))))))))) + + + + (define (set-firsts) + (set! firsts (make-vector nvars '())) + + ;; -- initialization + (let loop ((i 0)) + (if (< i nvars) + (let loop2 ((sp (vector-ref derives i))) + (if (null? sp) + (loop (+ i 1)) + (let ((sym (vector-ref ritem (vector-ref rrhs (car sp))))) + (if (< -1 sym nvars) + (vector-set! firsts i (sinsert sym (vector-ref firsts i)))) + (loop2 (cdr sp))))))) + + ;; -- reflexive and transitive closure + (let loop ((continue #t)) + (if continue + (let loop2 ((i 0) (cont #f)) + (if (>= i nvars) + (loop cont) + (let* ((x (vector-ref firsts i)) + (y (let loop3 ((l x) (z x)) + (if (null? l) + z + (loop3 (cdr l) + (sunion (vector-ref firsts (car l)) z)))))) + (if (equal? x y) + (loop2 (+ i 1) cont) + (begin + (vector-set! firsts i y) + (loop2 (+ i 1) #t)))))))) + + (let loop ((i 0)) + (if (< i nvars) + (begin + (vector-set! firsts i (sinsert i (vector-ref firsts i))) + (loop (+ i 1)))))) + + + + + (define (set-fderives) + (set! fderives (make-vector nvars #f)) + + (set-firsts) + + (let loop ((i 0)) + (if (< i nvars) + (let ((x (let loop2 ((l (vector-ref firsts i)) (fd '())) + (if (null? l) + fd + (loop2 (cdr l) + (sunion (vector-ref derives (car l)) fd)))))) + (vector-set! fderives i x) + (loop (+ i 1)))))) + + + (define (closure core) + ;; Initialization + (define ruleset (make-vector nrules #f)) + + (let loop ((csp core)) + (if (not (null? csp)) + (let ((sym (vector-ref ritem (car csp)))) + (if (< -1 sym nvars) + (let loop2 ((dsp (vector-ref fderives sym))) + (if (not (null? dsp)) + (begin + (vector-set! ruleset (car dsp) #t) + (loop2 (cdr dsp)))))) + (loop (cdr csp))))) + + (let loop ((ruleno 1) (csp core) (itemsetv '())) ; ruleno = 0 + (if (< ruleno nrules) + (if (vector-ref ruleset ruleno) + (let ((itemno (vector-ref rrhs ruleno))) + (let loop2 ((c csp) (itemsetv2 itemsetv)) + (if (and (pair? c) + (< (car c) itemno)) + (loop2 (cdr c) (cons (car c) itemsetv2)) + (loop (+ ruleno 1) c (cons itemno itemsetv2))))) + (loop (+ ruleno 1) csp itemsetv)) + (let loop2 ((c csp) (itemsetv2 itemsetv)) + (if (pair? c) + (loop2 (cdr c) (cons (car c) itemsetv2)) + (reverse itemsetv2)))))) + + + + (define (allocate-item-sets) + (set! kernel-base (make-vector nsyms 0)) + (set! kernel-end (make-vector nsyms #f))) + + + (define (allocate-storage) + (allocate-item-sets) + (set! red-set (make-vector (+ nrules 1) 0))) + + ; -- + + + (define (initialize-states) + (let ((p (new-core))) + (set-core-number! p 0) + (set-core-acc-sym! p #f) + (set-core-nitems! p 1) + (set-core-items! p '(0)) + + (set! first-state (list p)) + (set! last-state first-state) + (set! nstates 1))) + + + + (define (generate-states) + (allocate-storage) + (set-fderives) + (initialize-states) + (let loop ((this-state first-state)) + (if (pair? this-state) + (let* ((x (car this-state)) + (is (closure (core-items x)))) + (save-reductions x is) + (new-itemsets is) + (append-states) + (if (> nshifts 0) + (save-shifts x)) + (loop (cdr this-state)))))) + + + (define (new-itemsets itemset) + ;; - Initialization + (set! shift-symbol '()) + (let loop ((i 0)) + (if (< i nsyms) + (begin + (vector-set! kernel-end i '()) + (loop (+ i 1))))) + + (let loop ((isp itemset)) + (if (pair? isp) + (let* ((i (car isp)) + (sym (vector-ref ritem i))) + (if (>= sym 0) + (begin + (set! shift-symbol (sinsert sym shift-symbol)) + (let ((x (vector-ref kernel-end sym))) + (if (null? x) + (begin + (vector-set! kernel-base sym (cons (+ i 1) x)) + (vector-set! kernel-end sym (vector-ref kernel-base sym))) + (begin + (set-cdr! x (list (+ i 1))) + (vector-set! kernel-end sym (cdr x))))))) + (loop (cdr isp))))) + + (set! nshifts (length shift-symbol))) + + + + (define (get-state sym) + (let* ((isp (vector-ref kernel-base sym)) + (n (length isp)) + (key (let loop ((isp1 isp) (k 0)) + (if (null? isp1) + (modulo k STATE-TABLE-SIZE) + (loop (cdr isp1) (+ k (car isp1)))))) + (sp (vector-ref state-table key))) + (if (null? sp) + (let ((x (new-state sym))) + (vector-set! state-table key (list x)) + (core-number x)) + (let loop ((sp1 sp)) + (if (and (= n (core-nitems (car sp1))) + (let loop2 ((i1 isp) (t (core-items (car sp1)))) + (if (and (pair? i1) + (= (car i1) + (car t))) + (loop2 (cdr i1) (cdr t)) + (null? i1)))) + (core-number (car sp1)) + (if (null? (cdr sp1)) + (let ((x (new-state sym))) + (set-cdr! sp1 (list x)) + (core-number x)) + (loop (cdr sp1)))))))) + + + (define (new-state sym) + (let* ((isp (vector-ref kernel-base sym)) + (n (length isp)) + (p (new-core))) + (set-core-number! p nstates) + (set-core-acc-sym! p sym) + (if (= sym nvars) (set! final-state nstates)) + (set-core-nitems! p n) + (set-core-items! p isp) + (set-cdr! last-state (list p)) + (set! last-state (cdr last-state)) + (set! nstates (+ nstates 1)) + p)) + + + ; -- + + (define (append-states) + (set! shift-set + (let loop ((l (reverse shift-symbol))) + (if (null? l) + '() + (cons (get-state (car l)) (loop (cdr l))))))) + + ; -- + + (define (save-shifts core) + (let ((p (new-shift))) + (set-shift-number! p (core-number core)) + (set-shift-nshifts! p nshifts) + (set-shift-shifts! p shift-set) + (if last-shift + (begin + (set-cdr! last-shift (list p)) + (set! last-shift (cdr last-shift))) + (begin + (set! first-shift (list p)) + (set! last-shift first-shift))))) + + (define (save-reductions core itemset) + (let ((rs (let loop ((l itemset)) + (if (null? l) + '() + (let ((item (vector-ref ritem (car l)))) + (if (< item 0) + (cons (- item) (loop (cdr l))) + (loop (cdr l)))))))) + (if (pair? rs) + (let ((p (new-red))) + (set-red-number! p (core-number core)) + (set-red-nreds! p (length rs)) + (set-red-rules! p rs) + (if last-reduction + (begin + (set-cdr! last-reduction (list p)) + (set! last-reduction (cdr last-reduction))) + (begin + (set! first-reduction (list p)) + (set! last-reduction first-reduction))))))) + + + ; -- + + (define (lalr) + (set! token-set-size (+ 1 (quotient nterms (BITS-PER-WORD)))) + (set-accessing-symbol) + (set-shift-table) + (set-reduction-table) + (set-max-rhs) + (initialize-LA) + (set-goto-map) + (initialize-F) + (build-relations) + (digraph includes) + (compute-lookaheads)) + + (define (set-accessing-symbol) + (set! acces-symbol (make-vector nstates #f)) + (let loop ((l first-state)) + (if (pair? l) + (let ((x (car l))) + (vector-set! acces-symbol (core-number x) (core-acc-sym x)) + (loop (cdr l)))))) + + (define (set-shift-table) + (set! shift-table (make-vector nstates #f)) + (let loop ((l first-shift)) + (if (pair? l) + (let ((x (car l))) + (vector-set! shift-table (shift-number x) x) + (loop (cdr l)))))) + + (define (set-reduction-table) + (set! reduction-table (make-vector nstates #f)) + (let loop ((l first-reduction)) + (if (pair? l) + (let ((x (car l))) + (vector-set! reduction-table (red-number x) x) + (loop (cdr l)))))) + + (define (set-max-rhs) + (let loop ((p 0) (curmax 0) (length 0)) + (let ((x (vector-ref ritem p))) + (if x + (if (>= x 0) + (loop (+ p 1) curmax (+ length 1)) + (loop (+ p 1) (max curmax length) 0)) + (set! maxrhs curmax))))) + + (define (initialize-LA) + (define (last l) + (if (null? (cdr l)) + (car l) + (last (cdr l)))) + + (set! consistent (make-vector nstates #f)) + (set! lookaheads (make-vector (+ nstates 1) #f)) + + (let loop ((count 0) (i 0)) + (if (< i nstates) + (begin + (vector-set! lookaheads i count) + (let ((rp (vector-ref reduction-table i)) + (sp (vector-ref shift-table i))) + (if (and rp + (or (> (red-nreds rp) 1) + (and sp + (not + (< (vector-ref acces-symbol + (last (shift-shifts sp))) + nvars))))) + (loop (+ count (red-nreds rp)) (+ i 1)) + (begin + (vector-set! consistent i #t) + (loop count (+ i 1)))))) + + (begin + (vector-set! lookaheads nstates count) + (let ((c (max count 1))) + (set! LA (make-vector c #f)) + (do ((j 0 (+ j 1))) ((= j c)) (vector-set! LA j (new-set token-set-size))) + (set! LAruleno (make-vector c -1)) + (set! lookback (make-vector c #f))) + (let loop ((i 0) (np 0)) + (if (< i nstates) + (if (vector-ref consistent i) + (loop (+ i 1) np) + (let ((rp (vector-ref reduction-table i))) + (if rp + (let loop2 ((j (red-rules rp)) (np2 np)) + (if (null? j) + (loop (+ i 1) np2) + (begin + (vector-set! LAruleno np2 (car j)) + (loop2 (cdr j) (+ np2 1))))) + (loop (+ i 1) np)))))))))) + + + (define (set-goto-map) + (set! goto-map (make-vector (+ nvars 1) 0)) + (let ((temp-map (make-vector (+ nvars 1) 0))) + (let loop ((ng 0) (sp first-shift)) + (if (pair? sp) + (let loop2 ((i (reverse (shift-shifts (car sp)))) (ng2 ng)) + (if (pair? i) + (let ((symbol (vector-ref acces-symbol (car i)))) + (if (< symbol nvars) + (begin + (vector-set! goto-map symbol + (+ 1 (vector-ref goto-map symbol))) + (loop2 (cdr i) (+ ng2 1))) + (loop2 (cdr i) ng2))) + (loop ng2 (cdr sp)))) + + (let loop ((k 0) (i 0)) + (if (< i nvars) + (begin + (vector-set! temp-map i k) + (loop (+ k (vector-ref goto-map i)) (+ i 1))) + + (begin + (do ((i 0 (+ i 1))) + ((>= i nvars)) + (vector-set! goto-map i (vector-ref temp-map i))) + + (set! ngotos ng) + (vector-set! goto-map nvars ngotos) + (vector-set! temp-map nvars ngotos) + (set! from-state (make-vector ngotos #f)) + (set! to-state (make-vector ngotos #f)) + + (do ((sp first-shift (cdr sp))) + ((null? sp)) + (let* ((x (car sp)) + (state1 (shift-number x))) + (do ((i (shift-shifts x) (cdr i))) + ((null? i)) + (let* ((state2 (car i)) + (symbol (vector-ref acces-symbol state2))) + (if (< symbol nvars) + (let ((k (vector-ref temp-map symbol))) + (vector-set! temp-map symbol (+ k 1)) + (vector-set! from-state k state1) + (vector-set! to-state k state2)))))))))))))) + + + (define (map-goto state symbol) + (let loop ((low (vector-ref goto-map symbol)) + (high (- (vector-ref goto-map (+ symbol 1)) 1))) + (if (> low high) + (begin + (display (list "Error in map-goto" state symbol)) (newline) + 0) + (let* ((middle (quotient (+ low high) 2)) + (s (vector-ref from-state middle))) + (cond + ((= s state) + middle) + ((< s state) + (loop (+ middle 1) high)) + (else + (loop low (- middle 1)))))))) + + + (define (initialize-F) + (set! F (make-vector ngotos #f)) + (do ((i 0 (+ i 1))) ((= i ngotos)) (vector-set! F i (new-set token-set-size))) + + (let ((reads (make-vector ngotos #f))) + + (let loop ((i 0) (rowp 0)) + (if (< i ngotos) + (let* ((rowf (vector-ref F rowp)) + (stateno (vector-ref to-state i)) + (sp (vector-ref shift-table stateno))) + (if sp + (let loop2 ((j (shift-shifts sp)) (edges '())) + (if (pair? j) + (let ((symbol (vector-ref acces-symbol (car j)))) + (if (< symbol nvars) + (if (vector-ref nullable symbol) + (loop2 (cdr j) (cons (map-goto stateno symbol) + edges)) + (loop2 (cdr j) edges)) + (begin + (set-bit rowf (- symbol nvars)) + (loop2 (cdr j) edges)))) + (if (pair? edges) + (vector-set! reads i (reverse edges)))))) + (loop (+ i 1) (+ rowp 1))))) + (digraph reads))) + + (define (add-lookback-edge stateno ruleno gotono) + (let ((k (vector-ref lookaheads (+ stateno 1)))) + (let loop ((found #f) (i (vector-ref lookaheads stateno))) + (if (and (not found) (< i k)) + (if (= (vector-ref LAruleno i) ruleno) + (loop #t i) + (loop found (+ i 1))) + + (if (not found) + (begin (display "Error in add-lookback-edge : ") + (display (list stateno ruleno gotono)) (newline)) + (vector-set! lookback i + (cons gotono (vector-ref lookback i)))))))) + + + (define (transpose r-arg n) + (let ((new-end (make-vector n #f)) + (new-R (make-vector n #f))) + (do ((i 0 (+ i 1))) + ((= i n)) + (let ((x (list 'bidon))) + (vector-set! new-R i x) + (vector-set! new-end i x))) + (do ((i 0 (+ i 1))) + ((= i n)) + (let ((sp (vector-ref r-arg i))) + (if (pair? sp) + (let loop ((sp2 sp)) + (if (pair? sp2) + (let* ((x (car sp2)) + (y (vector-ref new-end x))) + (set-cdr! y (cons i (cdr y))) + (vector-set! new-end x (cdr y)) + (loop (cdr sp2)))))))) + (do ((i 0 (+ i 1))) + ((= i n)) + (vector-set! new-R i (cdr (vector-ref new-R i)))) + + new-R)) + + + + (define (build-relations) + + (define (get-state stateno symbol) + (let loop ((j (shift-shifts (vector-ref shift-table stateno))) + (stno stateno)) + (if (null? j) + stno + (let ((st2 (car j))) + (if (= (vector-ref acces-symbol st2) symbol) + st2 + (loop (cdr j) st2)))))) + + (set! includes (make-vector ngotos #f)) + (do ((i 0 (+ i 1))) + ((= i ngotos)) + (let ((state1 (vector-ref from-state i)) + (symbol1 (vector-ref acces-symbol (vector-ref to-state i)))) + (let loop ((rulep (vector-ref derives symbol1)) + (edges '())) + (if (pair? rulep) + (let ((*rulep (car rulep))) + (let loop2 ((rp (vector-ref rrhs *rulep)) + (stateno state1) + (states (list state1))) + (let ((*rp (vector-ref ritem rp))) + (if (> *rp 0) + (let ((st (get-state stateno *rp))) + (loop2 (+ rp 1) st (cons st states))) + (begin + + (if (not (vector-ref consistent stateno)) + (add-lookback-edge stateno *rulep i)) + + (let loop2 ((done #f) + (stp (cdr states)) + (rp2 (- rp 1)) + (edgp edges)) + (if (not done) + (let ((*rp (vector-ref ritem rp2))) + (if (< -1 *rp nvars) + (loop2 (not (vector-ref nullable *rp)) + (cdr stp) + (- rp2 1) + (cons (map-goto (car stp) *rp) edgp)) + (loop2 #t stp rp2 edgp))) + + (loop (cdr rulep) edgp)))))))) + (vector-set! includes i edges))))) + (set! includes (transpose includes ngotos))) + + + + (define (compute-lookaheads) + (let ((n (vector-ref lookaheads nstates))) + (let loop ((i 0)) + (if (< i n) + (let loop2 ((sp (vector-ref lookback i))) + (if (pair? sp) + (let ((LA-i (vector-ref LA i)) + (F-j (vector-ref F (car sp)))) + (bit-union LA-i F-j token-set-size) + (loop2 (cdr sp))) + (loop (+ i 1)))))))) + + + + (define (digraph relation) + (define infinity (+ ngotos 2)) + (define INDEX (make-vector (+ ngotos 1) 0)) + (define VERTICES (make-vector (+ ngotos 1) 0)) + (define top 0) + (define R relation) + + (define (traverse i) + (set! top (+ 1 top)) + (vector-set! VERTICES top i) + (let ((height top)) + (vector-set! INDEX i height) + (let ((rp (vector-ref R i))) + (if (pair? rp) + (let loop ((rp2 rp)) + (if (pair? rp2) + (let ((j (car rp2))) + (if (= 0 (vector-ref INDEX j)) + (traverse j)) + (if (> (vector-ref INDEX i) + (vector-ref INDEX j)) + (vector-set! INDEX i (vector-ref INDEX j))) + (let ((F-i (vector-ref F i)) + (F-j (vector-ref F j))) + (bit-union F-i F-j token-set-size)) + (loop (cdr rp2)))))) + (if (= (vector-ref INDEX i) height) + (let loop () + (let ((j (vector-ref VERTICES top))) + (set! top (- top 1)) + (vector-set! INDEX j infinity) + (if (not (= i j)) + (begin + (bit-union (vector-ref F i) + (vector-ref F j) + token-set-size) + (loop))))))))) + + (let loop ((i 0)) + (if (< i ngotos) + (begin + (if (and (= 0 (vector-ref INDEX i)) + (pair? (vector-ref R i))) + (traverse i)) + (loop (+ i 1)))))) + + + ;; ---------------------------------------------------------------------- + ;; operator precedence management + ;; ---------------------------------------------------------------------- + + ;; a vector of precedence descriptors where each element + ;; is of the form (terminal type precedence) + (define the-terminals/prec #f) ; terminal symbols with precedence + ; the precedence is an integer >= 0 + (define (get-symbol-precedence sym) + (caddr (vector-ref the-terminals/prec sym))) + ; the operator type is either 'none, 'left, 'right, or 'nonassoc + (define (get-symbol-assoc sym) + (cadr (vector-ref the-terminals/prec sym))) + + (define rule-precedences '()) + (define (add-rule-precedence! rule sym) + (set! rule-precedences + (cons (cons rule sym) rule-precedences))) + + (define (get-rule-precedence ruleno) + (cond + ((assq ruleno rule-precedences) + => (lambda (p) + (get-symbol-precedence (cdr p)))) + (else + ;; process the rule symbols from left to right + (let loop ((i (vector-ref rrhs ruleno)) + (prec 0)) + (let ((item (vector-ref ritem i))) + ;; end of rule + (if (< item 0) + prec + (let ((i1 (+ i 1))) + (if (>= item nvars) + ;; it's a terminal symbol + (loop i1 (get-symbol-precedence (- item nvars))) + (loop i1 prec))))))))) + + ;; ---------------------------------------------------------------------- + ;; Build the various tables + ;; ---------------------------------------------------------------------- + + (define expected-conflicts 0) + + (define (build-tables) + + (define (resolve-conflict sym rule) + (let ((sym-prec (get-symbol-precedence sym)) + (sym-assoc (get-symbol-assoc sym)) + (rule-prec (get-rule-precedence rule))) + (cond + ((> sym-prec rule-prec) 'shift) + ((< sym-prec rule-prec) 'reduce) + ((eq? sym-assoc 'left) 'reduce) + ((eq? sym-assoc 'right) 'shift) + (else 'none)))) + + (define conflict-messages '()) + + (define (add-conflict-message . l) + (set! conflict-messages (cons l conflict-messages))) + + (define (log-conflicts) + (if (> (length conflict-messages) expected-conflicts) + (for-each + (lambda (message) + (for-each display message) + (newline)) + conflict-messages))) + + ;; --- Add an action to the action table + (define (add-action state symbol new-action) + (let* ((state-actions (vector-ref action-table state)) + (actions (assv symbol state-actions))) + (if (pair? actions) + (let ((current-action (cadr actions))) + (if (not (= new-action current-action)) + ;; -- there is a conflict + (begin + (if (and (<= current-action 0) (<= new-action 0)) + ;; --- reduce/reduce conflict + (begin + (add-conflict-message + "%% Reduce/Reduce conflict (reduce " (- new-action) ", reduce " (- current-action) + ") on '" (get-symbol (+ symbol nvars)) "' in state " state) + (if (eq? driver-name 'glr-driver) + (set-cdr! (cdr actions) (cons new-action (cddr actions))) + (set-car! (cdr actions) (max current-action new-action)))) + ;; --- shift/reduce conflict + ;; can we resolve the conflict using precedences? + (case (resolve-conflict symbol (- current-action)) + ;; -- shift + ((shift) (if (eq? driver-name 'glr-driver) + (set-cdr! (cdr actions) (cons new-action (cddr actions))) + (set-car! (cdr actions) new-action))) + ;; -- reduce + ((reduce) #f) ; well, nothing to do... + ;; -- signal a conflict! + (else (add-conflict-message + "%% Shift/Reduce conflict (shift " new-action ", reduce " (- current-action) + ") on '" (get-symbol (+ symbol nvars)) "' in state " state) + (if (eq? driver-name 'glr-driver) + (set-cdr! (cdr actions) (cons new-action (cddr actions))) + (set-car! (cdr actions) new-action)))))))) + + (vector-set! action-table state (cons (list symbol new-action) state-actions))))) + + (define (add-action-for-all-terminals state action) + (do ((i 1 (+ i 1))) + ((= i nterms)) + (add-action state i action))) + + (set! action-table (make-vector nstates '())) + + (do ((i 0 (+ i 1))) ; i = state + ((= i nstates)) + (let ((red (vector-ref reduction-table i))) + (if (and red (>= (red-nreds red) 1)) + (if (and (= (red-nreds red) 1) (vector-ref consistent i)) + (add-action-for-all-terminals i (- (car (red-rules red)))) + (let ((k (vector-ref lookaheads (+ i 1)))) + (let loop ((j (vector-ref lookaheads i))) + (if (< j k) + (let ((rule (- (vector-ref LAruleno j))) + (lav (vector-ref LA j))) + (let loop2 ((token 0) (x (vector-ref lav 0)) (y 1) (z 0)) + (if (< token nterms) + (begin + (let ((in-la-set? (modulo x 2))) + (if (= in-la-set? 1) + (add-action i token rule))) + (if (= y (BITS-PER-WORD)) + (loop2 (+ token 1) + (vector-ref lav (+ z 1)) + 1 + (+ z 1)) + (loop2 (+ token 1) (quotient x 2) (+ y 1) z))))) + (loop (+ j 1))))))))) + + (let ((shiftp (vector-ref shift-table i))) + (if shiftp + (let loop ((k (shift-shifts shiftp))) + (if (pair? k) + (let* ((state (car k)) + (symbol (vector-ref acces-symbol state))) + (if (>= symbol nvars) + (add-action i (- symbol nvars) state)) + (loop (cdr k)))))))) + + (add-action final-state 0 'accept) + (log-conflicts)) + + (define (compact-action-table terms) + (define (most-common-action acts) + (let ((accums '())) + (let loop ((l acts)) + (if (pair? l) + (let* ((x (cadar l)) + (y (assv x accums))) + (if (and (number? x) (< x 0)) + (if y + (set-cdr! y (+ 1 (cdr y))) + (set! accums (cons `(,x . 1) accums)))) + (loop (cdr l))))) + + (let loop ((l accums) (max 0) (sym #f)) + (if (null? l) + sym + (let ((x (car l))) + (if (> (cdr x) max) + (loop (cdr l) (cdr x) (car x)) + (loop (cdr l) max sym))))))) + + (define (translate-terms acts) + (map (lambda (act) + (cons (list-ref terms (car act)) + (cdr act))) + acts)) + + (do ((i 0 (+ i 1))) + ((= i nstates)) + (let ((acts (vector-ref action-table i))) + (if (vector? (vector-ref reduction-table i)) + (let ((act (most-common-action acts))) + (vector-set! action-table i + (cons `(*default* ,(if act act '*error*)) + (translate-terms + (lalr-filter (lambda (x) + (not (and (= (length x) 2) + (eq? (cadr x) act)))) + acts))))) + (vector-set! action-table i + (cons `(*default* *error*) + (translate-terms acts))))))) + + + + ;; -- + + (define (rewrite-grammar tokens grammar k) + + (define eoi '*eoi*) + + (define (check-terminal term terms) + (cond + ((not (valid-terminal? term)) + (lalr-error "invalid terminal: " term)) + ((member term terms) + (lalr-error "duplicate definition of terminal: " term)))) + + (define (prec->type prec) + (cdr (assq prec '((left: . left) + (right: . right) + (nonassoc: . nonassoc))))) + + (cond + ;; --- a few error conditions + ((not (list? tokens)) + (lalr-error "Invalid token list: " tokens)) + ((not (pair? grammar)) + (lalr-error "Grammar definition must have a non-empty list of productions" '())) + + (else + ;; --- check the terminals + (let loop1 ((lst tokens) + (rev-terms '()) + (rev-terms/prec '()) + (prec-level 0)) + (if (pair? lst) + (let ((term (car lst))) + (cond + ((pair? term) + (if (and (memq (car term) '(left: right: nonassoc:)) + (not (null? (cdr term)))) + (let ((prec (+ prec-level 1)) + (optype (prec->type (car term)))) + (let loop-toks ((l (cdr term)) + (rev-terms rev-terms) + (rev-terms/prec rev-terms/prec)) + (if (null? l) + (loop1 (cdr lst) rev-terms rev-terms/prec prec) + (let ((term (car l))) + (check-terminal term rev-terms) + (loop-toks + (cdr l) + (cons term rev-terms) + (cons (list term optype prec) rev-terms/prec)))))) + + (lalr-error "invalid operator precedence specification: " term))) + + (else + (check-terminal term rev-terms) + (loop1 (cdr lst) + (cons term rev-terms) + (cons (list term 'none 0) rev-terms/prec) + prec-level)))) + + ;; --- check the grammar rules + (let loop2 ((lst grammar) (rev-nonterm-defs '())) + (if (pair? lst) + (let ((def (car lst))) + (if (not (pair? def)) + (lalr-error "Nonterminal definition must be a non-empty list" '()) + (let ((nonterm (car def))) + (cond ((not (valid-nonterminal? nonterm)) + (lalr-error "Invalid nonterminal:" nonterm)) + ((or (member nonterm rev-terms) + (assoc nonterm rev-nonterm-defs)) + (lalr-error "Nonterminal previously defined:" nonterm)) + (else + (loop2 (cdr lst) + (cons def rev-nonterm-defs))))))) + (let* ((terms (cons eoi (cons 'error (reverse rev-terms)))) + (terms/prec (cons '(eoi none 0) (cons '(error none 0) (reverse rev-terms/prec)))) + (nonterm-defs (reverse rev-nonterm-defs)) + (nonterms (cons '*start* (map car nonterm-defs)))) + (if (= (length nonterms) 1) + (lalr-error "Grammar must contain at least one nonterminal" '()) + (let loop-defs ((defs (cons `(*start* (,(cadr nonterms) ,eoi) : $1) + nonterm-defs)) + (ruleno 0) + (comp-defs '())) + (if (pair? defs) + (let* ((nonterm-def (car defs)) + (compiled-def (rewrite-nonterm-def + nonterm-def + ruleno + terms nonterms))) + (loop-defs (cdr defs) + (+ ruleno (length compiled-def)) + (cons compiled-def comp-defs))) + + (let ((compiled-nonterm-defs (reverse comp-defs))) + (k terms + terms/prec + nonterms + (map (lambda (x) (cons (caaar x) (map cdar x))) + compiled-nonterm-defs) + (apply append compiled-nonterm-defs)))))))))))))) + + + (define (rewrite-nonterm-def nonterm-def ruleno terms nonterms) + + (define No-NT (length nonterms)) + + (define (encode x) + (let ((PosInNT (pos-in-list x nonterms))) + (if PosInNT + PosInNT + (let ((PosInT (pos-in-list x terms))) + (if PosInT + (+ No-NT PosInT) + (lalr-error "undefined symbol : " x)))))) + + (define (process-prec-directive rhs ruleno) + (let loop ((l rhs)) + (if (null? l) + '() + (let ((first (car l)) + (rest (cdr l))) + (cond + ((or (member first terms) (member first nonterms)) + (cons first (loop rest))) + ((and (pair? first) + (eq? (car first) 'prec:)) + (if (and (pair? (cdr first)) + (null? (cddr first)) + (member (cadr first) terms)) + (if (null? rest) + (begin + (add-rule-precedence! ruleno (pos-in-list (cadr first) terms)) + (loop rest)) + (lalr-error "prec: directive should be at end of rule: " rhs)) + (lalr-error "Invalid prec: directive: " first))) + (else + (lalr-error "Invalid terminal or nonterminal: " first))))))) + + (define (check-error-production rhs) + (let loop ((rhs rhs)) + (if (pair? rhs) + (begin + (if (and (eq? (car rhs) 'error) + (or (null? (cdr rhs)) + (not (member (cadr rhs) terms)) + (not (null? (cddr rhs))))) + (lalr-error "Invalid 'error' production. A single terminal symbol must follow the 'error' token.:" rhs)) + (loop (cdr rhs)))))) + + + (if (not (pair? (cdr nonterm-def))) + (lalr-error "At least one production needed for nonterminal:" (car nonterm-def)) + (let ((name (symbol->string (car nonterm-def)))) + (let loop1 ((lst (cdr nonterm-def)) + (i 1) + (rev-productions-and-actions '())) + (if (not (pair? lst)) + (reverse rev-productions-and-actions) + (let* ((rhs (process-prec-directive (car lst) (+ ruleno i -1))) + (rest (cdr lst)) + (prod (map encode (cons (car nonterm-def) rhs)))) + ;; -- check for undefined tokens + (for-each (lambda (x) + (if (not (or (member x terms) (member x nonterms))) + (lalr-error "Invalid terminal or nonterminal:" x))) + rhs) + ;; -- check 'error' productions + (check-error-production rhs) + + (if (and (pair? rest) + (eq? (car rest) ':) + (pair? (cdr rest))) + (loop1 (cddr rest) + (+ i 1) + (cons (cons prod (cadr rest)) + rev-productions-and-actions)) + (let* ((rhs-length (length rhs)) + (action + (cons 'vector + (cons (list 'quote (string->symbol + (string-append + name + "-" + (number->string i)))) + (let loop-j ((j 1)) + (if (> j rhs-length) + '() + (cons (string->symbol + (string-append + "$" + (number->string j))) + (loop-j (+ j 1))))))))) + (loop1 rest + (+ i 1) + (cons (cons prod action) + rev-productions-and-actions)))))))))) + + (define (valid-nonterminal? x) + (symbol? x)) + + (define (valid-terminal? x) + (symbol? x)) ; DB + + ;; ---------------------------------------------------------------------- + ;; Miscellaneous + ;; ---------------------------------------------------------------------- + (define (pos-in-list x lst) + (let loop ((lst lst) (i 0)) + (cond ((not (pair? lst)) #f) + ((equal? (car lst) x) i) + (else (loop (cdr lst) (+ i 1)))))) + + (define (sunion lst1 lst2) ; union of sorted lists + (let loop ((L1 lst1) + (L2 lst2)) + (cond ((null? L1) L2) + ((null? L2) L1) + (else + (let ((x (car L1)) (y (car L2))) + (cond + ((> x y) + (cons y (loop L1 (cdr L2)))) + ((< x y) + (cons x (loop (cdr L1) L2))) + (else + (loop (cdr L1) L2)) + )))))) + + (define (sinsert elem lst) + (let loop ((l1 lst)) + (if (null? l1) + (cons elem l1) + (let ((x (car l1))) + (cond ((< elem x) + (cons elem l1)) + ((> elem x) + (cons x (loop (cdr l1)))) + (else + l1)))))) + + (define (lalr-filter p lst) + (let loop ((l lst)) + (if (null? l) + '() + (let ((x (car l)) (y (cdr l))) + (if (p x) + (cons x (loop y)) + (loop y)))))) + + ;; ---------------------------------------------------------------------- + ;; Debugging tools ... + ;; ---------------------------------------------------------------------- + (define the-terminals #f) ; names of terminal symbols + (define the-nonterminals #f) ; non-terminals + + (define (print-item item-no) + (let loop ((i item-no)) + (let ((v (vector-ref ritem i))) + (if (>= v 0) + (loop (+ i 1)) + (let* ((rlno (- v)) + (nt (vector-ref rlhs rlno))) + (display (vector-ref the-nonterminals nt)) (display " --> ") + (let loop ((i (vector-ref rrhs rlno))) + (let ((v (vector-ref ritem i))) + (if (= i item-no) + (display ". ")) + (if (>= v 0) + (begin + (display (get-symbol v)) + (display " ") + (loop (+ i 1))) + (begin + (display " (rule ") + (display (- v)) + (display ")") + (newline)))))))))) + + (define (get-symbol n) + (if (>= n nvars) + (vector-ref the-terminals (- n nvars)) + (vector-ref the-nonterminals n))) + + + (define (print-states) + (define (print-action act) + (cond + ((eq? act '*error*) + (display " : Error")) + ((eq? act 'accept) + (display " : Accept input")) + ((< act 0) + (display " : reduce using rule ") + (display (- act))) + (else + (display " : shift and goto state ") + (display act))) + (newline) + #t) + + (define (print-actions acts) + (let loop ((l acts)) + (if (null? l) + #t + (let ((sym (caar l)) + (act (cadar l))) + (display " ") + (cond + ((eq? sym 'default) + (display "default action")) + (else + (if (number? sym) + (display (get-symbol (+ sym nvars))) + (display sym)))) + (print-action act) + (loop (cdr l)))))) + + (if (not action-table) + (begin + (display "No generated parser available!") + (newline) + #f) + (begin + (display "State table") (newline) + (display "-----------") (newline) (newline) + + (let loop ((l first-state)) + (if (null? l) + #t + (let* ((core (car l)) + (i (core-number core)) + (items (core-items core)) + (actions (vector-ref action-table i))) + (display "state ") (display i) (newline) + (newline) + (for-each (lambda (x) (display " ") (print-item x)) + items) + (newline) + (print-actions actions) + (newline) + (loop (cdr l)))))))) + + + + ;; ---------------------------------------------------------------------- + + (define build-goto-table + (lambda () + `(vector + ,@(map + (lambda (shifts) + (list 'quote + (if shifts + (let loop ((l (shift-shifts shifts))) + (if (null? l) + '() + (let* ((state (car l)) + (symbol (vector-ref acces-symbol state))) + (if (< symbol nvars) + (cons `(,symbol . ,state) + (loop (cdr l))) + (loop (cdr l)))))) + '()))) + (vector->list shift-table))))) + + + (define build-reduction-table + (lambda (gram/actions) + `(vector + '() + ,@(map + (lambda (p) + (let ((act (cdr p))) + `(lambda ,(if (eq? driver-name 'lr-driver) + '(___stack ___sp ___goto-table ___push yypushback) + '(___sp ___goto-table ___push)) + ,(let* ((nt (caar p)) (rhs (cdar p)) (n (length rhs))) + `(let* (,@(if act + (let loop ((i 1) (l rhs)) + (if (pair? l) + (let ((rest (cdr l))) + (cons + `(,(string->symbol + (string-append + "$" + (number->string + (+ (- n i) 1)))) + ,(if (eq? driver-name 'lr-driver) + `(vector-ref ___stack (- ___sp ,(- (* i 2) 1))) + `(list-ref ___sp ,(+ (* (- i 1) 2) 1)))) + (loop (+ i 1) rest))) + '())) + '())) + ,(if (= nt 0) + '$1 + `(___push ,n ,nt ,(cdr p) ,@(if (eq? driver-name 'lr-driver) '() '(___sp))))))))) + + gram/actions)))) + + + + ;; Options + + (define *valid-options* + (list + (cons 'out-table: + (lambda (option) + (and (list? option) + (= (length option) 2) + (string? (cadr option))))) + (cons 'output: + (lambda (option) + (and (list? option) + (= (length option) 3) + (symbol? (cadr option)) + (string? (caddr option))))) + (cons 'expect: + (lambda (option) + (and (list? option) + (= (length option) 2) + (integer? (cadr option)) + (>= (cadr option) 0)))) + + (cons 'driver: + (lambda (option) + (and (list? option) + (= (length option) 2) + (symbol? (cadr option)) + (memq (cadr option) '(lr glr))))))) + + + (define (validate-options options) + (for-each + (lambda (option) + (let ((p (assoc (car option) *valid-options*))) + (if (or (not p) + (not ((cdr p) option))) + (lalr-error "Invalid option:" option)))) + options)) + + + (define (output-parser! options code) + (let ((option (assq 'output: options))) + (if option + (let ((parser-name (cadr option)) + (file-name (caddr option))) + (with-output-to-file file-name + (lambda () + (pprint `(define ,parser-name ,code)) + (newline))))))) + + + (define (output-table! options) + (let ((option (assq 'out-table: options))) + (if option + (let ((file-name (cadr option))) + (with-output-to-file file-name print-states))))) + + + (define (set-expected-conflicts! options) + (let ((option (assq 'expect: options))) + (set! expected-conflicts (if option (cadr option) 0)))) + + (define (set-driver-name! options) + (let ((option (assq 'driver: options))) + (if option + (let ((driver-type (cadr option))) + (set! driver-name (if (eq? driver-type 'glr) 'glr-driver 'lr-driver)))))) + + + ;; -- arguments + + (define (extract-arguments lst proc) + (let loop ((options '()) + (tokens '()) + (rules '()) + (lst lst)) + (if (pair? lst) + (let ((p (car lst))) + (cond + ((and (pair? p) + (lalr-keyword? (car p)) + (assq (car p) *valid-options*)) + (loop (cons p options) tokens rules (cdr lst))) + (else + (proc options p (cdr lst))))) + (lalr-error "Malformed lalr-parser form" lst)))) + + + (define (build-driver options tokens rules) + (validate-options options) + (set-expected-conflicts! options) + (set-driver-name! options) + (let* ((gram/actions (gen-tables! tokens rules)) + (code `(,driver-name ',action-table ,(build-goto-table) ,(build-reduction-table gram/actions)))) + + (output-table! options) + (output-parser! options code) + code)) + + (extract-arguments arguments build-driver)) + + + +;;; +;;;; -- +;;;; Implementation of the lr-driver +;;; + + +(cond-expand + (gambit + (declare + (standard-bindings) + (fixnum) + (block) + (not safe))) + (chicken + (declare + (uses extras) + (usual-integrations) + (fixnum) + (not safe))) + (else)) + + +;;; +;;;; Source location utilities +;;; + + +;; This function assumes that src-location-1 and src-location-2 are source-locations +;; Returns #f if they are not locations for the same input +(define (combine-locations src-location-1 src-location-2) + (let ((offset-1 (source-location-offset src-location-1)) + (offset-2 (source-location-offset src-location-2)) + (length-1 (source-location-length src-location-1)) + (length-2 (source-location-length src-location-2))) + + (cond ((not (equal? (source-location-input src-location-1) + (source-location-input src-location-2))) + #f) + ((or (not (number? offset-1)) (not (number? offset-2)) + (not (number? length-1)) (not (number? length-2)) + (< offset-1 0) (< offset-2 0) + (< length-1 0) (< length-2 0)) + (make-source-location (source-location-input src-location-1) + (source-location-line src-location-1) + (source-location-column src-location-1) + -1 -1)) + ((<= offset-1 offset-2) + (make-source-location (source-location-input src-location-1) + (source-location-line src-location-1) + (source-location-column src-location-1) + offset-1 + (- (+ offset-2 length-2) offset-1))) + (else + (make-source-location (source-location-input src-location-1) + (source-location-line src-location-1) + (source-location-column src-location-1) + offset-2 + (- (+ offset-1 length-1) offset-2)))))) + + +;;; +;;;; LR-driver +;;; + + +(define *max-stack-size* 500) + +(define (lr-driver action-table goto-table reduction-table) + (define ___atable action-table) + (define ___gtable goto-table) + (define ___rtable reduction-table) + + (define ___lexerp #f) + (define ___errorp #f) + + (define ___stack #f) + (define ___sp 0) + + (define ___curr-input #f) + (define ___reuse-input #f) + + (define ___input #f) + (define (___consume) + (set! ___input (if ___reuse-input ___curr-input (___lexerp))) + (set! ___reuse-input #f) + (set! ___curr-input ___input)) + + (define (___pushback) + (set! ___reuse-input #t)) + + (define (___initstack) + (set! ___stack (make-vector *max-stack-size* 0)) + (set! ___sp 0)) + + (define (___growstack) + (let ((new-stack (make-vector (* 2 (vector-length ___stack)) 0))) + (let loop ((i (- (vector-length ___stack) 1))) + (if (>= i 0) + (begin + (vector-set! new-stack i (vector-ref ___stack i)) + (loop (- i 1))))) + (set! ___stack new-stack))) + + (define (___checkstack) + (if (>= ___sp (vector-length ___stack)) + (___growstack))) + + (define (___push delta new-category lvalue) + (set! ___sp (- ___sp (* delta 2))) + (let* ((state (vector-ref ___stack ___sp)) + (new-state (cdr (assoc new-category (vector-ref ___gtable state))))) + (set! ___sp (+ ___sp 2)) + (___checkstack) + (vector-set! ___stack ___sp new-state) + (vector-set! ___stack (- ___sp 1) lvalue))) + + (define (___reduce st) + ((vector-ref ___rtable st) ___stack ___sp ___gtable ___push ___pushback)) + + (define (___shift token attribute) + (set! ___sp (+ ___sp 2)) + (___checkstack) + (vector-set! ___stack (- ___sp 1) attribute) + (vector-set! ___stack ___sp token)) + + (define (___action x l) + (let ((y (assoc x l))) + (if y (cadr y) (cadar l)))) + + (define (___recover tok) + (let find-state ((sp ___sp)) + (if (< sp 0) + (set! ___sp sp) + (let* ((state (vector-ref ___stack sp)) + (act (assoc 'error (vector-ref ___atable state)))) + (if act + (begin + (set! ___sp sp) + (___sync (cadr act) tok)) + (find-state (- sp 2))))))) + + (define (___sync state tok) + (let ((sync-set (map car (cdr (vector-ref ___atable state))))) + (set! ___sp (+ ___sp 4)) + (___checkstack) + (vector-set! ___stack (- ___sp 3) #f) + (vector-set! ___stack (- ___sp 2) state) + (let skip () + (let ((i (___category ___input))) + (if (eq? i '*eoi*) + (set! ___sp -1) + (if (memq i sync-set) + (let ((act (assoc i (vector-ref ___atable state)))) + (vector-set! ___stack (- ___sp 1) #f) + (vector-set! ___stack ___sp (cadr act))) + (begin + (___consume) + (skip)))))))) + + (define (___category tok) + (if (lexical-token? tok) + (lexical-token-category tok) + tok)) + + (define (___value tok) + (if (lexical-token? tok) + (lexical-token-value tok) + tok)) + + (define (___run) + (let loop () + (if ___input + (let* ((state (vector-ref ___stack ___sp)) + (i (___category ___input)) + (attr (___value ___input)) + (act (___action i (vector-ref ___atable state)))) + + (cond ((not (symbol? i)) + (___errorp "Syntax error: invalid token: " ___input) + #f) + + ;; Input succesfully parsed + ((eq? act 'accept) + (vector-ref ___stack 1)) + + ;; Syntax error in input + ((eq? act '*error*) + (if (eq? i '*eoi*) + (begin + (___errorp "Syntax error: unexpected end of input") + #f) + (begin + (___errorp "Syntax error: unexpected token : " ___input) + (___recover i) + (if (>= ___sp 0) + (set! ___input #f) + (begin + (set! ___sp 0) + (set! ___input '*eoi*))) + (loop)))) + + ;; Shift current token on top of the stack + ((>= act 0) + (___shift act attr) + (set! ___input (if (eq? i '*eoi*) '*eoi* #f)) + (loop)) + + ;; Reduce by rule (- act) + (else + (___reduce (- act)) + (loop)))) + + ;; no lookahead, so check if there is a default action + ;; that does not require the lookahead + (let* ((state (vector-ref ___stack ___sp)) + (acts (vector-ref ___atable state)) + (defact (if (pair? acts) (cadar acts) #f))) + (if (and (= 1 (length acts)) (< defact 0)) + (___reduce (- defact)) + (___consume)) + (loop))))) + + + (lambda (lexerp errorp) + (set! ___errorp errorp) + (set! ___lexerp lexerp) + (___initstack) + (___run))) + + +;;; +;;;; Simple-minded GLR-driver +;;; + + +(define (glr-driver action-table goto-table reduction-table) + (define ___atable action-table) + (define ___gtable goto-table) + (define ___rtable reduction-table) + + (define ___lexerp #f) + (define ___errorp #f) + + ;; -- Input handling + + (define *input* #f) + (define (initialize-lexer lexer) + (set! ___lexerp lexer) + (set! *input* #f)) + (define (consume) + (set! *input* (___lexerp))) + + (define (token-category tok) + (if (lexical-token? tok) + (lexical-token-category tok) + tok)) + + (define (token-attribute tok) + (if (lexical-token? tok) + (lexical-token-value tok) + tok)) + + ;; -- Processes (stacks) handling + + (define *processes* '()) + + (define (initialize-processes) + (set! *processes* '())) + (define (add-process process) + (set! *processes* (cons process *processes*))) + (define (get-processes) + (reverse *processes*)) + + (define (for-all-processes proc) + (let ((processes (get-processes))) + (initialize-processes) + (for-each proc processes))) + + ;; -- parses + (define *parses* '()) + (define (get-parses) + *parses*) + (define (initialize-parses) + (set! *parses* '())) + (define (add-parse parse) + (set! *parses* (cons parse *parses*))) + + + (define (push delta new-category lvalue stack) + (let* ((stack (drop stack (* delta 2))) + (state (car stack)) + (new-state (cdr (assv new-category (vector-ref ___gtable state))))) + (cons new-state (cons lvalue stack)))) + + (define (reduce state stack) + ((vector-ref ___rtable state) stack ___gtable push)) + + (define (shift state symbol stack) + (cons state (cons symbol stack))) + + (define (get-actions token action-list) + (let ((pair (assoc token action-list))) + (if pair + (cdr pair) + (cdar action-list)))) ;; get the default action + + + (define (run) + (let loop-tokens () + (consume) + (let ((symbol (token-category *input*)) + (attr (token-attribute *input*))) + (for-all-processes + (lambda (process) + (let loop ((stacks (list process)) (active-stacks '())) + (cond ((pair? stacks) + (let* ((stack (car stacks)) + (state (car stack))) + (let actions-loop ((actions (get-actions symbol (vector-ref ___atable state))) + (active-stacks active-stacks)) + (if (pair? actions) + (let ((action (car actions)) + (other-actions (cdr actions))) + (cond ((eq? action '*error*) + (actions-loop other-actions active-stacks)) + ((eq? action 'accept) + (add-parse (car (take-right stack 2))) + (actions-loop other-actions active-stacks)) + ((>= action 0) + (let ((new-stack (shift action attr stack))) + (add-process new-stack)) + (actions-loop other-actions active-stacks)) + (else + (let ((new-stack (reduce (- action) stack))) + (actions-loop other-actions (cons new-stack active-stacks)))))) + (loop (cdr stacks) active-stacks))))) + ((pair? active-stacks) + (loop (reverse active-stacks) '()))))))) + (if (pair? (get-processes)) + (loop-tokens)))) + + + (lambda (lexerp errorp) + (set! ___errorp errorp) + (initialize-lexer lexerp) + (initialize-processes) + (initialize-parses) + (add-process '(0)) + (run) + (get-parses))) + + +(define (drop l n) + (cond ((and (> n 0) (pair? l)) + (drop (cdr l) (- n 1))) + (else + l))) + +(define (take-right l n) + (drop l (- (length l) n)))
\ No newline at end of file diff --git a/test-suite/Makefile.am b/test-suite/Makefile.am index 40f5a9868..94789d347 100644 --- a/test-suite/Makefile.am +++ b/test-suite/Makefile.am @@ -120,3 +120,46 @@ SCM_TESTS = tests/alist.test \ tests/weaks.test EXTRA_DIST = guile-test lib.scm $(SCM_TESTS) ChangeLog-2008 + + +# Test suite of Dominique Boucher's `lalr-scm'. +# From http://code.google.com/p/lalr-scm/. + +LALR_TESTS = \ + lalr/test-glr-associativity.scm \ + lalr/test-glr-basics-01.scm \ + lalr/test-glr-basics-02.scm \ + lalr/test-glr-basics-03.scm \ + lalr/test-glr-basics-04.scm \ + lalr/test-glr-basics-05.scm \ + lalr/test-glr-script-expression.scm \ + lalr/test-glr-single-expressions.scm \ + \ + lalr/test-lr-associativity-01.scm \ + lalr/test-lr-basics-01.scm \ + lalr/test-lr-basics-02.scm \ + lalr/test-lr-basics-03.scm \ + lalr/test-lr-basics-04.scm \ + lalr/test-lr-basics-05.scm \ + lalr/test-lr-error-recovery-01.scm \ + lalr/test-lr-error-recovery-02.scm \ + lalr/test-lr-no-clause.scm \ + lalr/test-lr-script-expression.scm \ + lalr/test-lr-single-expressions.scm + +# Tests not listed in `run-guile-test.sh' and which should not be run. +LALR_EXTRA = \ + lalr/test-lr-associativity-02.scm \ + lalr/test-lr-associativity-03.scm \ + lalr/test-lr-associativity-04.scm + +# Test framework. +LALR_EXTRA += \ + lalr/common-test.scm \ + lalr/glr-test.scm \ + lalr/run-guile-test.sh + +TESTS = $(LALR_TESTS) +TESTS_ENVIRONMENT = $(top_builddir)/meta/guile --no-autocompile + +EXTRA_DIST += $(LALR_EXTRA) $(LALR_TESTS) diff --git a/test-suite/lalr/common-test.scm b/test-suite/lalr/common-test.scm new file mode 100644 index 000000000..8563029ce --- /dev/null +++ b/test-suite/lalr/common-test.scm @@ -0,0 +1,63 @@ +;;; common-test.scm -- +;;; + +;; Slightly modified for Guile by Ludovic Courtès <ludo@gnu.org>, 2010. + +(use-modules (system base lalr) + (ice-9 pretty-print)) + +(define *error* '()) + +(define-syntax when + (syntax-rules () + ((_ ?expr ?body ...) + (if ?expr + (let () ?body ...) + #f)))) + +(define-syntax check + (syntax-rules (=>) + ((_ ?expr => ?expected-result) + (check ?expr (=> equal?) ?expected-result)) + + ((_ ?expr (=> ?equal) ?expected-result) + (let ((result ?expr) + (expected ?expected-result)) + (set! *error* '()) + (when (not (?equal result expected)) + (display "Failed test: \n") + (pretty-print (quote ?expr))(newline) + (display "\tresult was: ") + (pretty-print result)(newline) + (display "\texpected: ") + (pretty-print expected)(newline) + (exit 1)))))) + +;;; -------------------------------------------------------------------- + +(define (display-result v) + (if v + (begin + (display "==> ") + (display v) + (newline)))) + +(define eoi-token + (make-lexical-token '*eoi* #f #f)) + +(define (make-lexer tokens) + (lambda () + (if (null? tokens) + eoi-token + (let ((t (car tokens))) + (set! tokens (cdr tokens)) + t)))) + +(define (error-handler message . args) + (set! *error* (cons `(error-handler ,message . ,(if (pair? args) + (lexical-token-category (car args)) + '())) + *error*)) + (cons message args)) + +;;; end of file diff --git a/test-suite/lalr/glr-test.scm b/test-suite/lalr/glr-test.scm new file mode 100644 index 000000000..18b8e865f --- /dev/null +++ b/test-suite/lalr/glr-test.scm @@ -0,0 +1,88 @@ +":";exec snow -- "$0" "$@"
+;;;
+;;;; Tests for the GLR parser generator
+;;;
+;;
+;; @created "Fri Aug 19 11:23:48 EDT 2005"
+;;
+
+(package* glr-test/v1.0.0
+ (require: lalr/v2.4.0))
+
+
+(define (syntax-error msg . args)
+ (display msg (current-error-port))
+ (for-each (cut format (current-error-port) " ~A" <>) args)
+ (newline (current-error-port))
+ (throw 'misc-error))
+
+
+(define (make-lexer words)
+ (let ((phrase words))
+ (lambda ()
+ (if (null? phrase)
+ '*eoi*
+ (let ((word (car phrase)))
+ (set! phrase (cdr phrase))
+ word)))))
+
+
+;;;
+;;;; Test 1
+;;;
+
+
+(define parser-1
+ ;; Grammar taken from Tomita's "An Efficient Augmented-Context-Free Parsing Algorithm"
+ (lalr-parser
+ (driver: glr)
+ (expect: 2)
+ (*n *v *d *p)
+ (<s> (<np> <vp>)
+ (<s> <pp>))
+ (<np> (*n)
+ (*d *n)
+ (<np> <pp>))
+ (<pp> (*p <np>))
+ (<vp> (*v <np>))))
+
+
+(define *phrase-1* '(*n *v *d *n *p *d *n *p *d *n *p *d *n))
+
+(define (test-1)
+ (parser-1 (make-lexer *phrase-1*) syntax-error))
+
+
+;;;
+;;;; Test 2
+;;;
+
+
+(define parser-2
+ ;; The dangling-else problem
+ (lalr-parser
+ (driver: glr)
+ (expect: 1)
+ ((nonassoc: if then else e s))
+ (<s> (s)
+ (if e then <s>)
+ (if e then <s> else <s>))))
+
+
+(define *phrase-2* '(if e then if e then s else s))
+
+(define (test-2)
+ (parser-2 (make-lexer *phrase-2*) syntax-error))
+
+
+
+
+(define (assert-length l n test-name)
+ (display "Test '")
+ (display test-name)
+ (display (if (not (= (length l) n)) "' failed!" "' passed!"))
+ (newline))
+
+(assert-length (test-1) 14 1)
+(assert-length (test-2) 2 2)
+
diff --git a/test-suite/lalr/run-guile-test.sh b/test-suite/lalr/run-guile-test.sh new file mode 100644 index 000000000..ab29b83dd --- /dev/null +++ b/test-suite/lalr/run-guile-test.sh @@ -0,0 +1,30 @@ +# guile-test.sh -- +# + +for item in \ + test-glr-basics-01.scm \ + test-glr-basics-02.scm \ + test-glr-basics-03.scm \ + test-glr-basics-04.scm \ + test-glr-basics-05.scm \ + test-glr-associativity.scm \ + test-glr-script-expression.scm \ + test-glr-single-expressions.scm \ + \ + test-lr-basics-01.scm \ + test-lr-basics-02.scm \ + test-lr-basics-03.scm \ + test-lr-basics-04.scm \ + test-lr-basics-05.scm \ + test-lr-error-recovery-01.scm \ + test-lr-error-recovery-02.scm \ + test-lr-no-clause.scm \ + test-lr-associativity-01.scm \ + test-lr-script-expression.scm \ + test-lr-single-expressions.scm + do +printf "\n\n*** Running $item\n" +guile $item +done + +### end of file diff --git a/test-suite/lalr/test-glr-associativity.scm b/test-suite/lalr/test-glr-associativity.scm new file mode 100644 index 000000000..6a5a5e25b --- /dev/null +++ b/test-suite/lalr/test-glr-associativity.scm @@ -0,0 +1,102 @@ +;;; test-glr-associativity.scm +;; +;;With the GLR parser both the terminal precedence and the non-terminal +;;associativity are not respected; rather they generate two child +;;processes. +;; + +(load "common-test.scm") + +(define parser + (lalr-parser + (driver: glr) + (expect: 0) + + (N LPAREN RPAREN + (left: + -) + (right: * /) + (nonassoc: uminus)) + + (output (expr) : $1) + (expr (expr + expr) : (list $1 '+ $3) + (expr - expr) : (list $1 '- $3) + (expr * expr) : (list $1 '* $3) + (expr / expr) : (list $1 '/ $3) + (- expr (prec: uminus)) : (list '- $2) + (N) : $1 + (LPAREN expr RPAREN) : $2))) + +(define (doit . tokens) + (parser (make-lexer tokens) error-handler)) + +;;; -------------------------------------------------------------------- + +;;Remember that the result of the GLR driver is a list of parses, not a +;;single parse. + +(check + (doit (make-lexical-token 'N #f 1)) + => '(1)) + +(check + (doit (make-lexical-token 'N #f 1) + (make-lexical-token '+ #f '+) + (make-lexical-token 'N #f 2)) + => '((1 + 2))) + +(check + (doit (make-lexical-token 'N #f 1) + (make-lexical-token '* #f '*) + (make-lexical-token 'N #f 2)) + => '((1 * 2))) + +(check + (doit (make-lexical-token '- #f '-) + (make-lexical-token 'N #f 1)) + => '((- 1))) + +(check + (doit (make-lexical-token '- #f '-) + (make-lexical-token '- #f '-) + (make-lexical-token 'N #f 1)) + => '((- (- 1)))) + +(check + (doit (make-lexical-token 'N #f 1) + (make-lexical-token '+ #f '+) + (make-lexical-token '- #f '-) + (make-lexical-token 'N #f 2)) + => '((1 + (- 2)))) + +;;; -------------------------------------------------------------------- + +(check + ;;left-associativity + (doit (make-lexical-token 'N #f 1) + (make-lexical-token '+ #f '+) + (make-lexical-token 'N #f 2) + (make-lexical-token '+ #f '+) + (make-lexical-token 'N #f 3)) + => '(((1 + 2) + 3))) + +(check + ;;right-associativity + (doit (make-lexical-token 'N #f 1) + (make-lexical-token '* #f '*) + (make-lexical-token 'N #f 2) + (make-lexical-token '* #f '*) + (make-lexical-token 'N #f 3)) + => '(((1 * 2) * 3) + (1 * (2 * 3)))) + +(check + ;;precedence + (doit (make-lexical-token 'N #f 1) + (make-lexical-token '+ #f '+) + (make-lexical-token 'N #f 2) + (make-lexical-token '* #f '*) + (make-lexical-token 'N #f 3)) + => '(((1 + 2) * 3) + (1 + (2 * 3)))) + +;;; end of file diff --git a/test-suite/lalr/test-glr-basics-01.scm b/test-suite/lalr/test-glr-basics-01.scm new file mode 100644 index 000000000..8cac63c71 --- /dev/null +++ b/test-suite/lalr/test-glr-basics-01.scm @@ -0,0 +1,35 @@ +;;; test-lr-basics-01.scm -- +;; +;;A grammar that only accept a single terminal as input. It refuses the +;;end-of-input as first token. +;; + +(load "common-test.scm") + +(define (doit . tokens) + (let* ((lexer (make-lexer tokens)) + (parser (lalr-parser (expect: 0) + (driver: glr) + (A) + (e (A) : $1)))) + (parser lexer error-handler))) + +(check + (doit (make-lexical-token 'A #f 1)) + => '(1)) + +(check + (doit) + => '()) + +(check + ;;Parse correctly the first A and reduce it. The second A triggers + ;;an error which empties the stack and consumes all the input + ;;tokens. Finally, an unexpected end-of-input error is returned + ;;because EOI is invalid as first token after the start. + (doit (make-lexical-token 'A #f 1) + (make-lexical-token 'A #f 2) + (make-lexical-token 'A #f 3)) + => '()) + +;;; end of file diff --git a/test-suite/lalr/test-glr-basics-02.scm b/test-suite/lalr/test-glr-basics-02.scm new file mode 100644 index 000000000..a4e24ad9d --- /dev/null +++ b/test-suite/lalr/test-glr-basics-02.scm @@ -0,0 +1,30 @@ +;;; test-lr-basics-02.scm -- +;; +;;A grammar that only accept a single terminal or the EOI. +;; + +(load "common-test.scm") + +(define (doit . tokens) + (let ((parser (lalr-parser (expect: 0) + (driver: glr) + (A) + (e (A) : $1 + () : 0)))) + (parser (make-lexer tokens) error-handler))) + +(check + (doit) + => '(0)) + +(check + (doit (make-lexical-token 'A #f 1)) + => '(1)) + +(check + (doit (make-lexical-token 'A #f 1) + (make-lexical-token 'A #f 2) + (make-lexical-token 'A #f 3)) + => '()) + +;;; end of file diff --git a/test-suite/lalr/test-glr-basics-03.scm b/test-suite/lalr/test-glr-basics-03.scm new file mode 100644 index 000000000..ec80ed514 --- /dev/null +++ b/test-suite/lalr/test-glr-basics-03.scm @@ -0,0 +1,37 @@ +;;; test-lr-basics-03.scm -- +;; +;;A grammar that accepts fixed sequences of a single terminal or the +;;EOI. + +(load "common-test.scm") + +(define (doit . tokens) + (let ((parser (lalr-parser (expect: 0) + (driver: glr) + (A) + (e (A) : (list $1) + (A A) : (list $1 $2) + (A A A) : (list $1 $2 $3) + () : 0)))) + (parser (make-lexer tokens) error-handler))) + +(check + (doit (make-lexical-token 'A #f 1)) + => '((1))) + +(check + (doit (make-lexical-token 'A #f 1) + (make-lexical-token 'A #f 2)) + => '((1 2))) + +(check + (doit (make-lexical-token 'A #f 1) + (make-lexical-token 'A #f 2) + (make-lexical-token 'A #f 3)) + => '((1 2 3))) + +(check + (doit) + => '(0)) + +;;; end of file diff --git a/test-suite/lalr/test-glr-basics-04.scm b/test-suite/lalr/test-glr-basics-04.scm new file mode 100644 index 000000000..00d287110 --- /dev/null +++ b/test-suite/lalr/test-glr-basics-04.scm @@ -0,0 +1,43 @@ +;;; test-lr-basics-04.scm -- +;; +;;A grammar accepting a sequence of equal tokens of arbitrary length. +;;The return value is the value of the last parsed token. + + +(load "common-test.scm") + +(define (doit . tokens) + (let ((parser (lalr-parser (expect: 0) + (driver: glr) + (A) + (e (e A) : $2 + (A) : $1 + () : 0)))) + (parser (make-lexer tokens) error-handler))) + +(check + (doit) + => '(0)) + +(check + ;;Two results because there is a shift/reduce conflict, so two + ;;processes are generated. + (doit (make-lexical-token 'A #f 1)) + => '(1 1)) + +(check + ;;Two results because there is a shift/reduce conflict, so two + ;;processes are generated. Notice that the rules: + ;; + ;; (e A) (A) + ;; + ;;generate only one conflict when the second "A" comes. The third + ;;"A" comes when the state is inside the rule "(e A)", so there is + ;;no conflict. + ;; + (doit (make-lexical-token 'A #f 1) + (make-lexical-token 'A #f 2) + (make-lexical-token 'A #f 3)) + => '(3 3)) + +;;; end of file diff --git a/test-suite/lalr/test-glr-basics-05.scm b/test-suite/lalr/test-glr-basics-05.scm new file mode 100644 index 000000000..ca48fd79f --- /dev/null +++ b/test-suite/lalr/test-glr-basics-05.scm @@ -0,0 +1,40 @@ +;;; test-lr-basics-05.scm -- +;; +;;A grammar accepting a sequence of equal tokens of arbitrary length. +;;The return value is the list of values. +;; + +(load "common-test.scm") + +(define (doit . tokens) + (let ((parser (lalr-parser (expect: 0) + (driver: glr) + (A) + (e (e A) : (cons $2 $1) + (A) : (list $1) + () : (list 0))))) + (parser (make-lexer tokens) error-handler))) + +(check + (doit) + => '((0))) + +(check + (doit (make-lexical-token 'A #f 1)) + => '((1 0) + (1))) + +(check + (doit (make-lexical-token 'A #f 1) + (make-lexical-token 'A #f 2)) + => '((2 1 0) + (2 1))) + +(check + (doit (make-lexical-token 'A #f 1) + (make-lexical-token 'A #f 2) + (make-lexical-token 'A #f 3)) + => '((3 2 1 0) + (3 2 1))) + +;;; end of file diff --git a/test-suite/lalr/test-glr-script-expression.scm b/test-suite/lalr/test-glr-script-expression.scm new file mode 100644 index 000000000..5d6d4265c --- /dev/null +++ b/test-suite/lalr/test-glr-script-expression.scm @@ -0,0 +1,125 @@ +;;; test-lr-script-expression.scm -- +;; +;;Parse scripts, each line an expression. +;; + +(load "common-test.scm") + +(define (doit . tokens) + (let ((parser (lalr-parser (expect: 0) + (driver: glr) + (N O C T (left: A) (left: M) (nonassoc: U)) + + (script (lines) : (reverse $1)) + + (lines (lines line) : (cons $2 $1) + (line) : (list $1)) + + (line (T) : #\newline + (E T) : $1 + (error T) : (list 'error-clause $2)) + + (E (N) : $1 + (E A E) : ($2 $1 $3) + (E M E) : ($2 $1 $3) + (A E (prec: U)) : ($1 $2) + (O E C) : $2)))) + (parser (make-lexer tokens) error-handler))) + +;;; -------------------------------------------------------------------- +;;; Correct input + +(check + (doit (make-lexical-token 'T #f #\newline)) + => '((#\newline))) + +(check + (doit (make-lexical-token 'N #f 1) + (make-lexical-token 'T #f #\newline)) + => '((1))) + +(check + (doit (make-lexical-token 'N #f 1) + (make-lexical-token 'A #f +) + (make-lexical-token 'N #f 2) + (make-lexical-token 'T #f #\newline)) + => '((3))) + +(check + (doit (make-lexical-token 'N #f 1) + (make-lexical-token 'A #f +) + (make-lexical-token 'N #f 2) + (make-lexical-token 'M #f *) + (make-lexical-token 'N #f 3) + (make-lexical-token 'T #f #\newline)) + => '((9) (7))) + +(check + (doit (make-lexical-token 'N #f 10) + (make-lexical-token 'M #f *) + (make-lexical-token 'N #f 2) + (make-lexical-token 'A #f +) + (make-lexical-token 'N #f 3) + (make-lexical-token 'T #f #\newline)) + => '((23))) + +(check + (doit (make-lexical-token 'O #f #\() + (make-lexical-token 'N #f 1) + (make-lexical-token 'A #f +) + (make-lexical-token 'N #f 2) + (make-lexical-token 'C #f #\)) + (make-lexical-token 'M #f *) + (make-lexical-token 'N #f 3) + (make-lexical-token 'T #f #\newline)) + => '((9))) + +(check + (doit (make-lexical-token 'O #f #\() + (make-lexical-token 'N #f 1) + (make-lexical-token 'A #f +) + (make-lexical-token 'N #f 2) + (make-lexical-token 'C #f #\)) + (make-lexical-token 'M #f *) + (make-lexical-token 'N #f 3) + (make-lexical-token 'T #f #\newline) + + (make-lexical-token 'N #f 4) + (make-lexical-token 'M #f /) + (make-lexical-token 'N #f 5) + (make-lexical-token 'T #f #\newline)) + => '((9 4/5))) + +;;; -------------------------------------------------------------------- + +(check + ;;Successful error recovery. + (doit (make-lexical-token 'O #f #\() + (make-lexical-token 'N #f 1) + (make-lexical-token 'A #f +) + (make-lexical-token 'N #f 2) + (make-lexical-token 'M #f *) + (make-lexical-token 'N #f 3) + (make-lexical-token 'T #f #\newline) + + (make-lexical-token 'N #f 4) + (make-lexical-token 'M #f /) + (make-lexical-token 'N #f 5) + (make-lexical-token 'T #f #\newline)) + => '()) + +(check + ;;Unexpected end of input. + (doit (make-lexical-token 'N #f 1) + (make-lexical-token 'A #f +) + (make-lexical-token 'N #f 2)) + => '()) + +(check + ;;Unexpected end of input. + (doit (make-lexical-token 'N #f 1) + (make-lexical-token 'A #f +) + (make-lexical-token 'T #f #\newline)) + => '()) + +;;; end of file diff --git a/test-suite/lalr/test-glr-single-expressions.scm b/test-suite/lalr/test-glr-single-expressions.scm new file mode 100644 index 000000000..9415262d4 --- /dev/null +++ b/test-suite/lalr/test-glr-single-expressions.scm @@ -0,0 +1,60 @@ +;;; test-lr-single-expressions.scm -- +;; +;;Grammar accepting single expressions. +;; + +(load "common-test.scm") + +(define (doit . tokens) + (let ((parser (lalr-parser (expect: 0) + (driver: glr) + (N O C (left: A) (left: M) (nonassoc: U)) + + (E (N) : $1 + (E A E) : ($2 $1 $3) + (E M E) : ($2 $1 $3) + (A E (prec: U)) : ($1 $2) + (O E C) : $2)))) + (parser (make-lexer tokens) error-handler))) + +;;; -------------------------------------------------------------------- + +(check ;correct input + (doit (make-lexical-token 'N #f 1)) + => '(1)) + +(check ;correct input + (doit (make-lexical-token 'A #f -) + (make-lexical-token 'N #f 1)) + => '(-1)) + +(check ;correct input + (doit (make-lexical-token 'A #f +) + (make-lexical-token 'N #f 1)) + => '(1)) + +(check ;correct input + (doit (make-lexical-token 'N #f 1) + (make-lexical-token 'A #f +) + (make-lexical-token 'N #f 2)) + => '(3)) + +(check ;correct input + (doit (make-lexical-token 'N #f 1) + (make-lexical-token 'A #f +) + (make-lexical-token 'N #f 2) + (make-lexical-token 'M #f *) + (make-lexical-token 'N #f 3)) + => '(9 7)) + +(check ;correct input + (doit (make-lexical-token 'O #f #\() + (make-lexical-token 'N #f 1) + (make-lexical-token 'A #f +) + (make-lexical-token 'N #f 2) + (make-lexical-token 'C #f #\)) + (make-lexical-token 'M #f *) + (make-lexical-token 'N #f 3)) + => '(9)) + +;;; end of file diff --git a/test-suite/lalr/test-lr-associativity-01.scm b/test-suite/lalr/test-lr-associativity-01.scm new file mode 100644 index 000000000..8519dee1c --- /dev/null +++ b/test-suite/lalr/test-lr-associativity-01.scm @@ -0,0 +1,91 @@ +;;; test-lr-associativity-01.scm -- +;; +;;Show how to use left and right associativity. Notice that the +;;terminal M is declared as right associative; this influences the +;;binding of values to the $n symbols in the semantic clauses. The +;;semantic clause in the rule: +;; +;; (E M E M E) : (list $1 $2 (list $3 $4 $5)) +;; +;;looks like it is right-associated, and it is because we have declared +;;M as "right:". +;; + +(load "common-test.scm") + +(define (doit . tokens) + (let ((parser (lalr-parser + (expect: 0) + (N (left: A) + (right: M) + (nonassoc: U)) + (E (N) : $1 + (E A E) : (list $1 $2 $3) + (E M E) : (list $1 $2 $3) + (E M E M E) : (list $1 $2 (list $3 $4 $5)) + (A E (prec: U)) : (list '- $2))))) + (parser (make-lexer tokens) error-handler))) + +;;; -------------------------------------------------------------------- +;;; Single operator. + +(check + (doit (make-lexical-token 'N #f 1)) + => 1) + +(check + (doit (make-lexical-token 'N #f 1) + (make-lexical-token 'A #f '+) + (make-lexical-token 'N #f 2)) + => '(1 + 2)) + +(check + (doit (make-lexical-token 'N #f 1) + (make-lexical-token 'M #f '*) + (make-lexical-token 'N #f 2)) + => '(1 * 2)) + +(check + (doit (make-lexical-token 'A #f '-) + (make-lexical-token 'N #f 1)) + => '(- 1)) + +;;; -------------------------------------------------------------------- +;;; Precedence. + +(check + (doit (make-lexical-token 'N #f 1) + (make-lexical-token 'A #f '+) + (make-lexical-token 'N #f 2) + (make-lexical-token 'M #f '*) + (make-lexical-token 'N #f 3)) + => '(1 + (2 * 3))) + +(check + (doit (make-lexical-token 'N #f 1) + (make-lexical-token 'M #f '*) + (make-lexical-token 'N #f 2) + (make-lexical-token 'A #f '+) + (make-lexical-token 'N #f 3)) + => '((1 * 2) + 3)) + +;;; -------------------------------------------------------------------- +;;; Associativity. + +(check + (doit (make-lexical-token 'N #f 1) + (make-lexical-token 'A #f '+) + (make-lexical-token 'N #f 2) + (make-lexical-token 'A #f '+) + (make-lexical-token 'N #f 3)) + => '((1 + 2) + 3)) + +(check + (doit (make-lexical-token 'N #f 1) + (make-lexical-token 'M #f '*) + (make-lexical-token 'N #f 2) + (make-lexical-token 'M #f '*) + (make-lexical-token 'N #f 3)) + => '(1 * (2 * 3))) + +;;; end of file diff --git a/test-suite/lalr/test-lr-associativity-02.scm b/test-suite/lalr/test-lr-associativity-02.scm new file mode 100644 index 000000000..6fb62e767 --- /dev/null +++ b/test-suite/lalr/test-lr-associativity-02.scm @@ -0,0 +1,91 @@ +;;; test-lr-associativity-02.scm -- +;; +;;Show how to use left and right associativity. Notice that the +;;terminal M is declared as left associative; this influences the +;;binding of values to the $n symbols in the semantic clauses. The +;;semantic clause in the rule: +;; +;; (E M E M E) : (list $1 $2 (list $3 $4 $5)) +;; +;;looks like it is right-associated, but the result is left-associated +;;because we have declared M as "left:". +;; + +(load "common-test.scm") + +(define (doit . tokens) + (let ((parser (lalr-parser + (expect: 0) + (N (left: A) + (left: M) + (nonassoc: U)) + (E (N) : $1 + (E A E) : (list $1 $2 $3) + (E M E) : (list $1 $2 $3) + (E M E M E) : (list $1 $2 (list $3 $4 $5)) + (A E (prec: U)) : (list '- $2))))) + (parser (make-lexer tokens) error-handler))) + +;;; -------------------------------------------------------------------- +;;; Single operator. + +(check + (doit (make-lexical-token 'N #f 1)) + => 1) + +(check + (doit (make-lexical-token 'N #f 1) + (make-lexical-token 'A #f '+) + (make-lexical-token 'N #f 2)) + => '(1 + 2)) + +(check + (doit (make-lexical-token 'N #f 1) + (make-lexical-token 'M #f '*) + (make-lexical-token 'N #f 2)) + => '(1 * 2)) + +(check + (doit (make-lexical-token 'A #f '-) + (make-lexical-token 'N #f 1)) + => '(- 1)) + +;;; -------------------------------------------------------------------- +;;; Precedence. + +(check + (doit (make-lexical-token 'N #f 1) + (make-lexical-token 'A #f '+) + (make-lexical-token 'N #f 2) + (make-lexical-token 'M #f '*) + (make-lexical-token 'N #f 3)) + => '(1 + (2 * 3))) + +(check + (doit (make-lexical-token 'N #f 1) + (make-lexical-token 'M #f '*) + (make-lexical-token 'N #f 2) + (make-lexical-token 'A #f '+) + (make-lexical-token 'N #f 3)) + => '((1 * 2) + 3)) + +;;; -------------------------------------------------------------------- +;;; Associativity. + +(check + (doit (make-lexical-token 'N #f 1) + (make-lexical-token 'A #f '+) + (make-lexical-token 'N #f 2) + (make-lexical-token 'A #f '+) + (make-lexical-token 'N #f 3)) + => '((1 + 2) + 3)) + +(check + (doit (make-lexical-token 'N #f 1) + (make-lexical-token 'M #f '*) + (make-lexical-token 'N #f 2) + (make-lexical-token 'M #f '*) + (make-lexical-token 'N #f 3)) + => '((1 * 2) * 3)) + +;;; end of file diff --git a/test-suite/lalr/test-lr-associativity-03.scm b/test-suite/lalr/test-lr-associativity-03.scm new file mode 100644 index 000000000..4c35b822b --- /dev/null +++ b/test-suite/lalr/test-lr-associativity-03.scm @@ -0,0 +1,85 @@ +;;; test-lr-associativity-01.scm -- +;; +;;Show how to use left and right associativity. Notice that the +;;terminal M is declared as non-associative; this influences the binding +;;of values to the $n symbols in the semantic clauses. The semantic +;;clause in the rule: +;; +;; (E M E M E) : (list $1 $2 (list $3 $4 $5)) +;; +;;looks like it is right-associated, and it is because we have declared +;;M as "right:". +;; + +(load "common-test.scm") + +(define (doit . tokens) + (let ((parser (lalr-parser + (expect: 0) + (N (nonassoc: A) + (nonassoc: M)) + (E (N) : $1 + (E A E) : (list $1 $2 $3) + (E A E A E) : (list (list $1 $2 $3) $4 $5) + (E M E) : (list $1 $2 $3) + (E M E M E) : (list $1 $2 (list $3 $4 $5)))))) + (parser (make-lexer tokens) error-handler))) + +;;; -------------------------------------------------------------------- +;;; Single operator. + +(check + (doit (make-lexical-token 'N #f 1)) + => 1) + +(check + (doit (make-lexical-token 'N #f 1) + (make-lexical-token 'A #f '+) + (make-lexical-token 'N #f 2)) + => '(1 + 2)) + +(check + (doit (make-lexical-token 'N #f 1) + (make-lexical-token 'M #f '*) + (make-lexical-token 'N #f 2)) + => '(1 * 2)) + +;;; -------------------------------------------------------------------- +;;; Precedence. + +(check + (doit (make-lexical-token 'N #f 1) + (make-lexical-token 'A #f '+) + (make-lexical-token 'N #f 2) + (make-lexical-token 'M #f '*) + (make-lexical-token 'N #f 3)) + => '(1 + (2 * 3))) + +(check + (doit (make-lexical-token 'N #f 1) + (make-lexical-token 'M #f '*) + (make-lexical-token 'N #f 2) + (make-lexical-token 'A #f '+) + (make-lexical-token 'N #f 3)) + => '((1 * 2) + 3)) + +;;; -------------------------------------------------------------------- +;;; Associativity. + +(check + (doit (make-lexical-token 'N #f 1) + (make-lexical-token 'A #f '+) + (make-lexical-token 'N #f 2) + (make-lexical-token 'A #f '+) + (make-lexical-token 'N #f 3)) + => '((1 + 2) + 3)) + +(check + (doit (make-lexical-token 'N #f 1) + (make-lexical-token 'M #f '*) + (make-lexical-token 'N #f 2) + (make-lexical-token 'M #f '*) + (make-lexical-token 'N #f 3)) + => '(1 * (2 * 3))) + +;;; end of file diff --git a/test-suite/lalr/test-lr-associativity-04.scm b/test-suite/lalr/test-lr-associativity-04.scm new file mode 100644 index 000000000..0aea3f096 --- /dev/null +++ b/test-suite/lalr/test-lr-associativity-04.scm @@ -0,0 +1,83 @@ +;;; test-lr-associativity-04.scm -- +;; +;;Show how to use associativity. +;; + +(load "common-test.scm") + +(define (doit . tokens) + (let ((parser (lalr-parser + (expect: 0) + (N (left: A) + (left: M)) + (E (N) : $1 + + (E A E) : (list $1 $2 $3) + (E A E A E) : (list (list $1 $2 $3) $4 $5) + + (E M E) : (list $1 $2 $3) + (E M E M E) : (list $1 $2 (list $3 $4 $5)) + + (E A E M E) : (list $1 $2 $3 $4 $5) + (E M E A E) : (list $1 $2 $3 $4 $5) + )))) + (parser (make-lexer tokens) error-handler))) + +;;; -------------------------------------------------------------------- +;;; Single operator. + +(check + (doit (make-lexical-token 'N #f 1)) + => 1) + +(check + (doit (make-lexical-token 'N #f 1) + (make-lexical-token 'A #f '+) + (make-lexical-token 'N #f 2)) + => '(1 + 2)) + +(check + (doit (make-lexical-token 'N #f 1) + (make-lexical-token 'M #f '*) + (make-lexical-token 'N #f 2)) + => '(1 * 2)) + +;;; -------------------------------------------------------------------- +;;; Precedence. + +(check + (doit (make-lexical-token 'N #f 1) + (make-lexical-token 'A #f '+) + (make-lexical-token 'N #f 2) + (make-lexical-token 'M #f '*) + (make-lexical-token 'N #f 3)) + => '(1 + (2 * 3))) + +(check + (doit (make-lexical-token 'N #f 1) + (make-lexical-token 'M #f '*) + (make-lexical-token 'N #f 2) + (make-lexical-token 'A #f '+) + (make-lexical-token 'N #f 3)) + => '((1 * 2) + 3)) + +;;; -------------------------------------------------------------------- +;;; Associativity. + +(check + (doit (make-lexical-token 'N #f 1) + (make-lexical-token 'A #f '+) + (make-lexical-token 'N #f 2) + (make-lexical-token 'A #f '+) + (make-lexical-token 'N #f 3)) + => '((1 + 2) + 3)) + +(check + (doit (make-lexical-token 'N #f 1) + (make-lexical-token 'M #f '*) + (make-lexical-token 'N #f 2) + (make-lexical-token 'M #f '*) + (make-lexical-token 'N #f 3)) + => '((1 * 2) * 3)) + +;;; end of file diff --git a/test-suite/lalr/test-lr-basics-01.scm b/test-suite/lalr/test-lr-basics-01.scm new file mode 100644 index 000000000..0176fe689 --- /dev/null +++ b/test-suite/lalr/test-lr-basics-01.scm @@ -0,0 +1,38 @@ +;;; test-lr-basics-01.scm -- +;; +;;A grammar that only accept a single terminal as input. It refuses the +;;end-of-input as first token. +;; + +(load "common-test.scm") + +(define (doit . tokens) + (let* ((lexer (make-lexer tokens)) + (parser (lalr-parser (expect: 0) + (A) + (e (A) : $1)))) + (parser lexer error-handler))) + +(check + (doit (make-lexical-token 'A #f 1)) + => 1) + +(check + (let ((r (doit))) + (cons r *error*)) + => '(#f (error-handler "Syntax error: unexpected end of input"))) + +(check + ;;Parse correctly the first A and reduce it. The second A triggers + ;;an error which empties the stack and consumes all the input + ;;tokens. Finally, an unexpected end-of-input error is returned + ;;because EOI is invalid as first token after the start. + (let ((r (doit (make-lexical-token 'A #f 1) + (make-lexical-token 'A #f 2) + (make-lexical-token 'A #f 3)))) + (cons r *error*)) + => '(#f + (error-handler "Syntax error: unexpected end of input") + (error-handler "Syntax error: unexpected token : " . A))) + +;;; end of file diff --git a/test-suite/lalr/test-lr-basics-02.scm b/test-suite/lalr/test-lr-basics-02.scm new file mode 100644 index 000000000..4a5abc1af --- /dev/null +++ b/test-suite/lalr/test-lr-basics-02.scm @@ -0,0 +1,33 @@ +;;; test-lr-basics-02.scm -- +;; +;;A grammar that only accept a single terminal or the EOI. +;; + +(load "common-test.scm") + +(define (doit . tokens) + (let ((parser (lalr-parser (expect: 0) + (A) + (e (A) : $1 + () : 0)))) + (parser (make-lexer tokens) error-handler))) + +(check + (doit) + => 0) + +(check + (doit (make-lexical-token 'A #f 1)) + => 1) + +(check + ;;Parse correctly the first A and reduce it. The second A triggers + ;;an error which empties the stack and consumes all the input + ;;tokens. Finally, the end-of-input token is correctly parsed. + (let ((r (doit (make-lexical-token 'A #f 1) + (make-lexical-token 'A #f 2) + (make-lexical-token 'A #f 3)))) + (cons r *error*)) + => '(0 (error-handler "Syntax error: unexpected token : " . A))) + +;;; end of file diff --git a/test-suite/lalr/test-lr-basics-03.scm b/test-suite/lalr/test-lr-basics-03.scm new file mode 100644 index 000000000..156de360d --- /dev/null +++ b/test-suite/lalr/test-lr-basics-03.scm @@ -0,0 +1,36 @@ +;;; test-lr-basics-03.scm -- +;; +;;A grammar that accepts fixed sequences of a single terminal or the +;;EOI. + +(load "common-test.scm") + +(define (doit . tokens) + (let ((parser (lalr-parser (expect: 0) + (A) + (e (A) : (list $1) + (A A) : (list $1 $2) + (A A A) : (list $1 $2 $3) + () : 0)))) + (parser (make-lexer tokens) error-handler))) + +(check + (doit (make-lexical-token 'A #f 1)) + => '(1)) + +(check + (doit (make-lexical-token 'A #f 1) + (make-lexical-token 'A #f 2)) + => '(1 2)) + +(check + (doit (make-lexical-token 'A #f 1) + (make-lexical-token 'A #f 2) + (make-lexical-token 'A #f 3)) + => '(1 2 3)) + +(check + (doit) + => 0) + +;;; end of file diff --git a/test-suite/lalr/test-lr-basics-04.scm b/test-suite/lalr/test-lr-basics-04.scm new file mode 100644 index 000000000..34b8eda77 --- /dev/null +++ b/test-suite/lalr/test-lr-basics-04.scm @@ -0,0 +1,31 @@ +;;; test-lr-basics-04.scm -- +;; +;;A grammar accepting a sequence of equal tokens of arbitrary length. +;;The return value is the value of the last parsed token. + + +(load "common-test.scm") + +(define (doit . tokens) + (let ((parser (lalr-parser (expect: 0) + (A) + (e (e A) : $2 + (A) : $1 + () : 0)))) + (parser (make-lexer tokens) error-handler))) + +(check + (doit) + => 0) + +(check + (doit (make-lexical-token 'A #f 1)) + => 1) + +(check + (doit (make-lexical-token 'A #f 1) + (make-lexical-token 'A #f 2) + (make-lexical-token 'A #f 3)) + => 3) + +;;; end of file diff --git a/test-suite/lalr/test-lr-basics-05.scm b/test-suite/lalr/test-lr-basics-05.scm new file mode 100644 index 000000000..ffb91d4c1 --- /dev/null +++ b/test-suite/lalr/test-lr-basics-05.scm @@ -0,0 +1,36 @@ +;;; test-lr-basics-05.scm -- +;; +;;A grammar accepting a sequence of equal tokens of arbitrary length. +;;The return value is the list of values. +;; + +(load "common-test.scm") + +(define (doit . tokens) + (let ((parser (lalr-parser (expect: 0) + (A) + (e (e A) : (cons $2 $1) + (A) : (list $1) + () : 0)))) + (parser (make-lexer tokens) error-handler))) + +(check + (doit) + => 0) + +(check + (doit (make-lexical-token 'A #f 1)) + => '(1)) + +(check + (doit (make-lexical-token 'A #f 1) + (make-lexical-token 'A #f 2)) + => '(2 1)) + +(check + (doit (make-lexical-token 'A #f 1) + (make-lexical-token 'A #f 2) + (make-lexical-token 'A #f 3)) + => '(3 2 1)) + +;;; end of file diff --git a/test-suite/lalr/test-lr-error-recovery-01.scm b/test-suite/lalr/test-lr-error-recovery-01.scm new file mode 100644 index 000000000..7ad756b68 --- /dev/null +++ b/test-suite/lalr/test-lr-error-recovery-01.scm @@ -0,0 +1,145 @@ +;;; test-lr-error-recovery-01.scm -- +;; +;;Test error recovery with a terminator terminal. +;; + +(load "common-test.scm") + +(define (doit . tokens) + (let ((parser (lalr-parser + (expect: 0) + (NUMBER BAD NEWLINE) + + (script (lines) : (reverse $1) + () : 0) + (lines (lines line) : (cons $2 $1) + (line) : (list $1)) + (line (NEWLINE) : (list 'line $1) + (NUMBER NEWLINE) : (list 'line $1 $2) + (NUMBER NUMBER NEWLINE) : (list 'line $1 $2 $3) + + ;;This semantic action will cause "(recover $1 + ;;$2)" to be the result of the offending line. + (error NEWLINE) : (list 'recover $1 $2))))) + (parser (make-lexer tokens) error-handler))) + +;;; -------------------------------------------------------------------- +;;; No errors, grammar tests. + +(check + (doit) + => 0) + +(check + (doit (make-lexical-token 'NEWLINE #f #\newline)) + => '((line #\newline))) + +(check + (doit (make-lexical-token 'NUMBER #f 1) + (make-lexical-token 'NEWLINE #f #\newline)) + => '((line 1 #\newline))) + +(check + (doit (make-lexical-token 'NUMBER #f 1) + (make-lexical-token 'NUMBER #f 2) + (make-lexical-token 'NEWLINE #f #\newline)) + => '((line 1 2 #\newline))) + +(check + (doit (make-lexical-token 'NUMBER #f 1) + (make-lexical-token 'NEWLINE #f #\newline) + (make-lexical-token 'NUMBER #f 2) + (make-lexical-token 'NEWLINE #f #\newline)) + => '((line 1 #\newline) + (line 2 #\newline))) + +(check + (doit (make-lexical-token 'NUMBER #f 1) + (make-lexical-token 'NEWLINE #f #\newline) + (make-lexical-token 'NUMBER #f 2) + (make-lexical-token 'NEWLINE #f #\newline) + (make-lexical-token 'NUMBER #f 3) + (make-lexical-token 'NEWLINE #f #\newline)) + => '((line 1 #\newline) + (line 2 #\newline) + (line 3 #\newline))) + +(check + (doit (make-lexical-token 'NUMBER #f 1) + (make-lexical-token 'NEWLINE #f #\newline) + (make-lexical-token 'NUMBER #f 2) + (make-lexical-token 'NEWLINE #f #\newline) + (make-lexical-token 'NUMBER #f 3) + (make-lexical-token 'NEWLINE #f #\newline) + (make-lexical-token 'NUMBER #f 41) + (make-lexical-token 'NUMBER #f 42) + (make-lexical-token 'NEWLINE #f #\newline)) + => '((line 1 #\newline) + (line 2 #\newline) + (line 3 #\newline) + (line 41 42 #\newline))) + +;;; -------------------------------------------------------------------- +;;; Successful error recovery. + +(check + ;;The BAD triggers an error, recovery happens, the first NEWLINE is + ;;correctly parsed as recovery token; the second line is correct. + (let ((r (doit (make-lexical-token 'NUMBER #f 1) + (make-lexical-token 'BAD #f 'alpha) + (make-lexical-token 'NEWLINE #f #\newline) + (make-lexical-token 'NUMBER #f 2) + (make-lexical-token 'NEWLINE #f #\newline)))) + (cons r *error*)) + => '(((recover #f #f) + (line 2 #\newline)) + (error-handler "Syntax error: unexpected token : " . BAD))) + + +(check + ;;The first BAD triggers an error, recovery happens skipping the + ;;second and third BADs, the first NEWLINE is detected as + ;;synchronisation token; the second line is correct. + (let ((r (doit (make-lexical-token 'NUMBER #f 1) + (make-lexical-token 'BAD #f 'alpha) + (make-lexical-token 'BAD #f 'beta) + (make-lexical-token 'BAD #f 'delta) + (make-lexical-token 'NEWLINE #f #\newline) + (make-lexical-token 'NUMBER #f 2) + (make-lexical-token 'NEWLINE #f #\newline)))) + (cons r *error*)) + => '(((recover #f #f) + (line 2 #\newline)) + (error-handler "Syntax error: unexpected token : " . BAD))) + +;;; -------------------------------------------------------------------- +;;; Failed error recovery. + +(check + ;;End-of-input is found after NUMBER. + (let ((r (doit (make-lexical-token 'NUMBER #f 1)))) + (cons r *error*)) + => '(#f (error-handler "Syntax error: unexpected end of input"))) + +(check + ;;The BAD triggers the error, the stack is rewind up to the start, + ;;then end-of-input happens while trying to skip tokens until the + ;;synchronisation one is found. End-of-input is an acceptable token + ;;after the start. + (let ((r (doit (make-lexical-token 'NUMBER #f 1) + (make-lexical-token 'BAD #f 'alpha) + (make-lexical-token 'BAD #f 'beta) + (make-lexical-token 'BAD #f 'delta)))) + (cons r *error*)) + => '(0 (error-handler "Syntax error: unexpected token : " . BAD))) + +(check + ;;The BAD triggers the error, the stack is rewind up to the start, + ;;then end-of-input happens while trying to skip tokens until the + ;;synchronisation one is found. End-of-input is an acceptable token + ;;after the start. + (let ((r (doit (make-lexical-token 'BAD #f 'alpha)))) + (cons r *error*)) + => '(0 (error-handler "Syntax error: unexpected token : " . BAD))) + +;;; end of file diff --git a/test-suite/lalr/test-lr-error-recovery-02.scm b/test-suite/lalr/test-lr-error-recovery-02.scm new file mode 100644 index 000000000..a82498b50 --- /dev/null +++ b/test-suite/lalr/test-lr-error-recovery-02.scm @@ -0,0 +1,51 @@ +;;; test-lr-error-recovery-02.scm -- +;; +;;Test error recovery policy when the synchronisation terminal has the +;;same category of the lookahead that raises the error. +;; + +(load "common-test.scm") + +(define (doit . tokens) + (let ((parser (lalr-parser (expect: 0) + (A B C) + (alphas (alpha) : $1 + (alphas alpha) : $2) + (alpha (A B) : (list $1 $2) + (C) : $1 + (error C) : 'error-form)))) + (parser (make-lexer tokens) error-handler))) + +;;; -------------------------------------------------------------------- +;;; No error, just grammar tests. + +(check + (doit (make-lexical-token 'A #f 1) + (make-lexical-token 'B #f 2)) + => '(1 2)) + +(check + (doit (make-lexical-token 'C #f 3)) + => '3) + +;;; -------------------------------------------------------------------- +;;; Successful error recovery. + +(check + ;;Error, recovery, end-of-input. + (let ((r (doit (make-lexical-token 'A #f 1) + (make-lexical-token 'C #f 3)))) + (cons r *error*)) + => '(error-form (error-handler "Syntax error: unexpected token : " . C))) + +(check + ;;Error, recovery, correct parse of "A B". + (let ((r (doit (make-lexical-token 'A #f 1) + (make-lexical-token 'C #f 3) + (make-lexical-token 'A #f 1) + (make-lexical-token 'B #f 2)))) + (cons r *error*)) + => '((1 2) + (error-handler "Syntax error: unexpected token : " . C))) + +;;; end of file diff --git a/test-suite/lalr/test-lr-no-clause.scm b/test-suite/lalr/test-lr-no-clause.scm new file mode 100644 index 000000000..fb98da6d8 --- /dev/null +++ b/test-suite/lalr/test-lr-no-clause.scm @@ -0,0 +1,40 @@ +;;; test-lr-no-clause.scm -- +;; + +(load "common-test.scm") + +(define (doit . tokens) + (let ((parser (lalr-parser (expect: 0) + (NUMBER COMMA NEWLINE) + + (lines (lines line) : (list $2) + (line) : (list $1)) + (line (NEWLINE) : #\newline + (NUMBER NEWLINE) : $1 + ;;this is a rule with no semantic action + (COMMA NUMBER NEWLINE))))) + (parser (make-lexer tokens) error-handler))) + +(check + ;;correct input + (doit (make-lexical-token 'NUMBER #f 1) + (make-lexical-token 'NEWLINE #f #\newline)) + => '(1)) + +(check + ;;correct input with comma, which is a rule with no client form + (doit (make-lexical-token 'COMMA #f #\,) + (make-lexical-token 'NUMBER #f 1) + (make-lexical-token 'NEWLINE #f #\newline)) + => '(#(line-3 #\, 1 #\newline))) + +(check + ;;correct input with comma, which is a rule with no client form + (doit (make-lexical-token 'NUMBER #f 1) + (make-lexical-token 'NEWLINE #f #\newline) + (make-lexical-token 'COMMA #f #\,) + (make-lexical-token 'NUMBER #f 2) + (make-lexical-token 'NEWLINE #f #\newline)) + => '(#(line-3 #\, 2 #\newline))) + +;;; end of file diff --git a/test-suite/lalr/test-lr-script-expression.scm b/test-suite/lalr/test-lr-script-expression.scm new file mode 100644 index 000000000..8cf1a9bf2 --- /dev/null +++ b/test-suite/lalr/test-lr-script-expression.scm @@ -0,0 +1,119 @@ +;;; test-lr-script-expression.scm -- +;; +;;Parse scripts, each line an expression. +;; + +(load "common-test.scm") + +(define (doit . tokens) + (let ((parser (lalr-parser (expect: 0) + (N O C T (left: A) (left: M) (nonassoc: U)) + + (script (lines) : (reverse $1)) + + (lines (lines line) : (cons $2 $1) + (line) : (list $1)) + + (line (T) : #\newline + (E T) : $1 + (error T) : (list 'error-clause $2)) + + (E (N) : $1 + (E A E) : ($2 $1 $3) + (E M E) : ($2 $1 $3) + (A E (prec: U)) : ($1 $2) + (O E C) : $2)))) + (parser (make-lexer tokens) error-handler))) + +;;; -------------------------------------------------------------------- +;;; Correct input + +(check + (doit (make-lexical-token 'T #f #\newline)) + => '(#\newline)) + +(check + (doit (make-lexical-token 'N #f 1) + (make-lexical-token 'T #f #\newline)) + => '(1)) + +(check + (doit (make-lexical-token 'N #f 1) + (make-lexical-token 'A #f +) + (make-lexical-token 'N #f 2) + (make-lexical-token 'T #f #\newline)) + => '(3)) + +(check + (doit (make-lexical-token 'N #f 1) + (make-lexical-token 'A #f +) + (make-lexical-token 'N #f 2) + (make-lexical-token 'M #f *) + (make-lexical-token 'N #f 3) + (make-lexical-token 'T #f #\newline)) + => '(7)) + +(check + (doit (make-lexical-token 'O #f #\() + (make-lexical-token 'N #f 1) + (make-lexical-token 'A #f +) + (make-lexical-token 'N #f 2) + (make-lexical-token 'C #f #\)) + (make-lexical-token 'M #f *) + (make-lexical-token 'N #f 3) + (make-lexical-token 'T #f #\newline)) + => '(9)) + +(check + (doit (make-lexical-token 'O #f #\() + (make-lexical-token 'N #f 1) + (make-lexical-token 'A #f +) + (make-lexical-token 'N #f 2) + (make-lexical-token 'C #f #\)) + (make-lexical-token 'M #f *) + (make-lexical-token 'N #f 3) + (make-lexical-token 'T #f #\newline) + + (make-lexical-token 'N #f 4) + (make-lexical-token 'M #f /) + (make-lexical-token 'N #f 5) + (make-lexical-token 'T #f #\newline)) + => '(9 4/5)) + +;;; -------------------------------------------------------------------- + +(check + ;;Successful error recovery. + (doit (make-lexical-token 'O #f #\() + (make-lexical-token 'N #f 1) + (make-lexical-token 'A #f +) + (make-lexical-token 'N #f 2) + (make-lexical-token 'M #f *) + (make-lexical-token 'N #f 3) + (make-lexical-token 'T #f #\newline) + + (make-lexical-token 'N #f 4) + (make-lexical-token 'M #f /) + (make-lexical-token 'N #f 5) + (make-lexical-token 'T #f #\newline)) + => '((error-clause #f) + 4/5)) + +(check + ;;Unexpected end of input. + (let ((r (doit (make-lexical-token 'N #f 1) + (make-lexical-token 'A #f +) + (make-lexical-token 'N #f 2)))) + (cons r *error*)) + => '(#f (error-handler "Syntax error: unexpected end of input"))) + +(check + ;;Unexpected end of input. + (let ((r (doit (make-lexical-token 'N #f 1) + (make-lexical-token 'A #f +) + (make-lexical-token 'T #f #\newline)))) + (cons r *error*)) + => '(((error-clause #f)) + (error-handler "Syntax error: unexpected token : " . T))) + +;;; end of file diff --git a/test-suite/lalr/test-lr-single-expressions.scm b/test-suite/lalr/test-lr-single-expressions.scm new file mode 100644 index 000000000..5fcd9f3b0 --- /dev/null +++ b/test-suite/lalr/test-lr-single-expressions.scm @@ -0,0 +1,59 @@ +;;; test-lr-single-expressions.scm -- +;; +;;Grammar accepting single expressions. +;; + +(load "common-test.scm") + +(define (doit . tokens) + (let ((parser (lalr-parser (expect: 0) + (N O C (left: A) (left: M) (nonassoc: U)) + + (E (N) : $1 + (E A E) : ($2 $1 $3) + (E M E) : ($2 $1 $3) + (A E (prec: U)) : ($1 $2) + (O E C) : $2)))) + (parser (make-lexer tokens) error-handler))) + +;;; -------------------------------------------------------------------- + +(check ;correct input + (doit (make-lexical-token 'N #f 1)) + => 1) + +(check ;correct input + (doit (make-lexical-token 'A #f -) + (make-lexical-token 'N #f 1)) + => -1) + +(check ;correct input + (doit (make-lexical-token 'A #f +) + (make-lexical-token 'N #f 1)) + => 1) + +(check ;correct input + (doit (make-lexical-token 'N #f 1) + (make-lexical-token 'A #f +) + (make-lexical-token 'N #f 2)) + => 3) + +(check ;correct input + (doit (make-lexical-token 'N #f 1) + (make-lexical-token 'A #f +) + (make-lexical-token 'N #f 2) + (make-lexical-token 'M #f *) + (make-lexical-token 'N #f 3)) + => 7) + +(check ;correct input + (doit (make-lexical-token 'O #f #\() + (make-lexical-token 'N #f 1) + (make-lexical-token 'A #f +) + (make-lexical-token 'N #f 2) + (make-lexical-token 'C #f #\)) + (make-lexical-token 'M #f *) + (make-lexical-token 'N #f 3)) + => 9) + +;;; end of file |