summaryrefslogtreecommitdiff
path: root/parser/parser-macros.scm
blob: c4f5a6395200567c4409a4b60cefe68423ab3f3f (about) (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
;;; Macro definitions for the parser & lexer.


;;; This macro allows debugging of the lexer.  Before releasing, this can
;;; be replaced by (begin ,@body) for faster code.

(define-syntax (trace-parser tag . body)
;  `(begin 
;     (let* ((k (tracing-parse/entry ',tag))
;	    (res (begin ,@body)))
;       (tracing-parse/exit ',tag k res)
;       res))
  (declare (ignore tag))
  `(begin ,@body)
  )

;;; Macros used by the lexer.

;;; The lexer used a macro, char-case, to dispatch on the syntactic catagory of
;;; a character.  These catagories (processed at compile time) are defined
;;; here.  Note that some of these definitions use the char-code
;;; directly and would need updating for different character sets.

(define *lex-definitions*
  '((vtab 11)  ; define by ascii code to avoid relying of the reader
    (formfeed 12) 
    (whitechar #\newline #\space #\tab formfeed vtab)
    (small #\a - #\z)
    (large #\A - #\Z)
    (digit #\0 - #\9)
    (symbol #\! #\# #\$ #\% #\& #\* #\+ #\. #\/ #\< #\= #\> #\? #\@
      #\\ #\^ #\|)
    (presymbol #\- #\~)
    (exponent #\e #\E)
    (graphic large small digit
             #\! #\" #\# #\$ #\% #\& #\' #\( #\) #\* #\+
             #\, #\- #\. #\/ #\: #\; #\< #\= #\> #\? #\@
	     #\[ #\\ #\] #\^ #\_ #\` #\{ #\| #\} #\~)
    (charesc #\a #\b #\f #\n #\r #\t #\v #\\ #\" #\' #\&)
    (cntrl large #\@ #\[ #\\ #\] #\^ #\_)))

;;; The char-case macro is similar to case using characters to select.
;;; The following capabilities are added by char-case:
;;;   pre-defined constants are denoted by symbols (defined above)
;;;   ranges of characters are represented using -.  For example,
;;;     (#\a - #\z #\A - #\Z) denotes all alphabetics.
;;;   numbers refer to the char code of a character.
;;; The generated code is optimized somewhat to take advantage of
;;; consecutive character ranges.  With a little work, this could be
;;; implemented using jump tables someday.

(define-syntax (char-case exp . alts)
  (expand-char-case exp alts))

(define (expand-char-case exp alts)
  (let ((temp (gensym)))
    `(let ((,temp ,exp))
       ,(expand-char-case1 temp alts))))

(define (expand-char-case1 temp alts)
  (if (null? alts)
      '()
      (let* ((alt (car alts))
	     (test (car alt))
	     (body (cons 'begin (cdr alt)))
	     (rest (expand-char-case1 temp (cdr alts))))
	(cond ((eq? test 'else)
	       body)
	      (else
	       `(if (or ,@(gen-char-tests temp
			     (if (pair? test) test (list test))))
		    ,body
		    ,rest))))))

(define (gen-char-tests temp tests)
  (gen-char-tests-1 temp
	(sort-list (gather-char-tests tests) (function char<?))))

(define (gen-char-tests-1 temp chars)
  (cond ((null? chars)
	 '())
	((long-enough-run? chars 3)
	 (gen-range-check temp (car chars) (car chars) (cdr chars)))
	(else
	 `((char=? ,temp ',(car chars))
	   ,@(gen-char-tests-1 temp (cdr chars))))))

(define (gen-range-check temp first current chars)
  (if (and (pair? chars) (consec-chars? current (car chars)))
      (gen-range-check temp first (car chars) (cdr chars))
      `((and (char>=? ,temp ',first)
	     (char<=? ,temp ',current))
	,@(gen-char-tests-1 temp chars))))

(define (consec-chars? c1 c2)
  (eqv? (+ 1 (char->integer c1)) (char->integer c2)))

(define (long-enough-run? l n)
  (or (eqv? n 1)
      (and (pair? (cdr l))
	   (consec-chars? (car l) (cadr l))
	   (long-enough-run? (cdr l) (1- n)))))

(define (gather-char-tests tests)
  (cond ((null? tests)
	 '())
	((symbol? (car tests))
	 (let ((new-test (assq (car tests) *lex-definitions*)))
	   (if new-test
	       (gather-char-tests (append (cdr new-test) (cdr tests)))
	       (error "Unknown character class: ~A~%" (car tests)))))
	((integer? (car tests))
	 (cons (integer->char (car tests))
	       (gather-char-tests (cdr tests))))
	((and (pair? (cdr tests)) (eq? '- (cadr tests)))
	 (letrec ((fn (lambda (a z)
			(if (char>? a z)
			    (gather-char-tests (cdddr tests))
			    (cons a (funcall
				      fn (integer->char
					 (+ 1 (char->integer a))) z))))))
	   (funcall fn (car tests) (caddr tests))))
	((char? (car tests))
	 (cons (car tests) (gather-char-tests (cdr tests))))
	(else
	 (error "Invalid selector in char-case: ~A~%" (car tests)))))

;;; This macro scans a list of characters on a given syntaxtic catagory.
;;; The current character is always included in the resulting list.

(define-syntax (scan-list-of char-type)
 `(letrec ((test-next (lambda ()
		       (char-case *char*
			(,char-type
			 (let ((c *char*))
			   (advance-char)
			   (cons c (funcall test-next))))
			(else '())))))
    (let ((c *char*))
      (advance-char)
      (cons c (funcall test-next)))))

;;; This macro tests for string equality in which the strings are
;;; represented by lists of characters.  The comparisons are expanded
;;; inline (really just a little partial evaluation going on here!) for
;;; fast execution.  The tok argument evaluate to a list of chars.  The string
;;; argument must be a string constant, which is converted to characters
;;; as the macro expands.

(define-syntax (string=/list? tok string)
  (let ((temp (gensym)))
    `(let ((,temp ,tok))
       ,(expand-string=/list? temp (string->list string)))))

(define (expand-string=/list? var chars)
  (if (null? chars)
      `(null? ,var)
      (let ((new-temp (gensym)))
	`(and (pair? ,var)
	      (char=? (car ,var) ',(car chars))
	      (let ((,new-temp (cdr ,var)))
		,(expand-string=/list? new-temp (cdr chars)))))))

;;; This macro extends the string equality defined above to search a
;;; list of reserved words quickly for keywords.  It does this by a case
;;; dispatch on the first character of the string and then processing
;;; the remaining characters wirh string=/list.  This would go a little
;;; faster with recursive char-case statements, but I'm a little too
;;; lazy at for this at the moment.  If a keyword is found is emitted
;;; as a symbol.  If not, the token string is emitted with the token
;;; type indicated.  Assume the string being scanned is a list of
;;; chars assigned to a var.  (Yeah - I know - I should add a gensym
;;; var for this argument!!).

(define-syntax (parse-reserved var token-type . reserved-words)
 (let ((sorted-rws (sort-list reserved-words (function string<?))))
  `(let ((thunk (lambda () (emit-token/string ',token-type ,var))))
    (char-case (car ,var)
     ,@(expand-parse-reserved var
        (group-by-first-char (list (car sorted-rws)) (cdr sorted-rws)))
      (else (funcall thunk))))))

(define (group-by-first-char group rest)
  (cond ((null? rest)
	 (list group))
	((char=? (string-ref (car group) 0)
		 (string-ref (car rest) 0))
	 (group-by-first-char (append group (list (car rest))) (cdr rest)))
	(else
	 (cons group (group-by-first-char (list (car rest)) (cdr rest))))))

(define (expand-parse-reserved var groups)
  (if (null? groups)
      '()
      `((,(string-ref (caar groups) 0)
	 (cond ,@(expand-parse-reserved/group var (car groups))
	       (else (funcall thunk))))
	,@(expand-parse-reserved var (cdr groups)))))

(define (expand-parse-reserved/group var group)
  (if (null? group)
      '()
      `(((string=/list? (cdr ,var)
	     ,(substring (car group) 1 (string-length (car group))))
	 (emit-token ',(string->symbol (car group))))
	,@(expand-parse-reserved/group var (cdr group)))))


;;; The following macros are used by the parser.

;;; The primary macro used by the parser is token-case, which dispatches
;;; on the type of the current token (this is always *token* - unlike the
;;; lexer, no lookahead is provided; however, some of these dispatches are
;;; procedures that do a limited lookahead.  The problem with lookahead is that
;;; the layout rule adds tokens which are not visible looking into the
;;; token stream directly.

;;; Unlike char-case, the token is normally advanced unless the selector
;;; includes `no-advance'.  The final else also avoids advancing the token.

;;; In addition to raw token types, more complex types can be used.  These
;;; are defined here.  The construct `satisfies fn' calls the indicated
;;; function to determine whether the current token matches.

;;; If the token type to be matched is not a constant, the construct
;;; `unquote var' matches the current token against the type in the var.

(define *predefined-syntactic-catagories* '(
  (+ satisfies at-varsym/+?)
  (- satisfies at-varsym/-?)
  (tycon no-advance conid)
  (tyvar no-advance varid)
  (var no-advance varid satisfies at-varsym/paren?)
  (con no-advance conid satisfies at-consym/paren?)
  (name no-advance var con)
  (consym/paren no-advance satisfies at-consym/paren?)
  (varsym? no-advance varsym)
  (consym? no-advance consym)
  (varid? no-advance varid)
  (conid? no-advance conid)
  (op no-advance varsym consym \`)
  (varop no-advance varsym satisfies at-varid/quoted?)
  (conop no-advance consym satisfies at-conid/quoted?)
  (modid no-advance conid)
  (literal no-advance integer float char string)
  (numeric no-advance integer float)
  (k no-advance integer)
  (+k no-advance satisfies at-+k?)
  (-n no-advance satisfies at--n?)
  (apat-start no-advance varid conid literal _ \( \[ \~)
  (pat-start no-advance - apat-start)
  (atype-start no-advance tycon tyvar \( \[)
  (aexp-start no-advance varid conid \( \[ literal)
  ))

;;; The format of token-case is
;;;  (token-case
;;;    (sel1 . e1) (sel2 . e2) ... [(else . en)])
;;; If the sel is a symbol it is the same as a singleton list: (@ x) = ((@) x)

;;; Warning: this generates rather poor code!  Should be fixed up someday.

(define-syntax (token-case . alts)
  `(cond ,@(map (function gen-token-case-alt) alts)))

(define (gen-token-case-alt alt)
  (let ((test (car alt))
	(code (cdr alt)))
    (cond ((eq? test 'else)
	   `(else ,@code))
	  ((symbol? test)
	   (gen-token-case-alt-1 (expand-catagories (list test)) code))
	  (else
	   (gen-token-case-alt-1 (expand-catagories test) code)))))

(define (expand-catagories terms)
  (if (null? terms)
      terms
      (let ((a (assq (car terms) *predefined-syntactic-catagories*))
	    (r (expand-catagories (cdr terms))))
	(if (null? a)
	    (cons (car terms) r)
	    (expand-catagories (append (cdr a) r))))))

(define (gen-token-case-alt-1 test code)
  `((or ,@(gen-token-test test))
    ,@(if (memq 'no-advance test) '() '((advance-token)))
    ,@code))

(define (gen-token-test test)
  (cond ((null? test)
	 '())
	((eq? (car test) 'no-advance)
	 (gen-token-test (cdr test)))
	((eq? (car test) 'unquote)
	 (cons `(eq? *token* ,(cadr test)) (gen-token-test (cddr test))))
	((eq? (car test) 'satisfies)
	 (cons (list (cadr test)) (gen-token-test (cddr test))))
	(else
	 (cons `(eq? *token* ',(car test)) (gen-token-test (cdr test))))))

;;; require-tok requires a specific token to be at the scanner.  If it
;;; is found, the token is advanced over.  Otherwise, the error
;;; routine is called.

(define-syntax (require-token tok error-handler)
  `(token-case
    (,tok '())
    (else ,error-handler)))

;;; The save-parser-context macro captures the current line & file and
;;; attaches it to the ast node generated.

(define-syntax (save-parser-context . body)
  (let ((temp1 (gensym))
	(temp2 (gensym)))
    `(let ((,temp1 (capture-current-line))
	   (,temp2 (begin ,@body)))
       (setf (ast-node-line-number ,temp2) ,temp1)
       ,temp2)))

(define (capture-current-line)
  (make source-pointer (line *current-line*) (file *current-file*)))

(define-syntax (push-decl-list decl place)
  `(setf ,place (nconc ,place (list ,decl))))