summaryrefslogtreecommitdiff
path: root/scripts/read-text-outline
blob: 7affb6348ed21be764f239377f6903c6b5b3dffa (about) (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
#!/bin/sh
# aside from this initial boilerplate, this is actually -*- scheme -*- code
main='(module-ref (resolve-module '\''(scripts read-text-outline)) '\'main')'
exec ${GUILE-guile} -l $0 -c "(apply $main (cdr (command-line)))" "$@"
!#
;;; read-text-outline --- Read a text outline and display it as a sexp

;; 	Copyright (C) 2002 Free Software Foundation, Inc.
;;
;; This program is free software; you can redistribute it and/or
;; modify it under the terms of the GNU General Public License as
;; published by the Free Software Foundation; either version 2, or
;; (at your option) any later version.
;;
;; This program is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
;; General Public License for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with this software; see the file COPYING.  If not, write to
;; the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
;; Boston, MA 02110-1301 USA

;;; Author: Thien-Thi Nguyen <ttn@gnu.org>

;;; Commentary:

;; Usage: read-text-outline OUTLINE
;;
;; Scan OUTLINE file and display a list of trees, the structure of
;; each reflecting the "levels" in OUTLINE.  The recognized outline
;; format (used to indicate outline headings) is zero or more pairs of
;; leading spaces followed by "-".  Something like:
;;
;;    - a                  0
;;      - b                1
;;        - c              2
;;      - d                1
;;    - e                  0
;;      - f                1
;;        - g              2
;;      - h                1
;;
;; In this example the levels are shown to the right.  The output for
;; such a file would be the single line:
;;
;;   (("a" ("b" "c") "d") ("e" ("f" "g") "h"))
;;
;; Basically, anything at the beginning of a list is a parent, and the
;; remaining elements of that list are its children.
;;
;;
;; Usage from a Scheme program: These two procs are exported:
;;
;;   (read-text-outline . args)           ; only first arg is used
;;   (read-text-outline-silently port)
;;   (make-text-outline-reader re specs)
;;
;; `make-text-outline-reader' returns a proc that reads from PORT and
;; returns a list of trees (similar to `read-text-outline-silently').
;;
;; RE is a regular expression (string) that is used to identify a header
;; line of the outline (as opposed to a whitespace line or intervening
;; text).  RE must begin w/ a sub-expression to match the "level prefix"
;; of the line.  You can use `level-submatch-number' in SPECS (explained
;; below) to specify a number other than 1, the default.
;;
;; Normally, the level of the line is taken directly as the length of
;; its level prefix.  This often results in adjacent levels not mapping
;; to adjacent numbers, which confuses the tree-building portion of the
;; program, which expects top-level to be 0, first sub-level to be 1,
;; etc.  You can use `level-substring-divisor' or `compute-level' in
;; SPECS to specify a constant scaling factor or specify a completely
;; alternative procedure, respectively.
;;
;; SPECS is an alist which may contain the following key/value pairs:
;;
;; - level-submatch-number NUMBER
;; - level-substring-divisor NUMBER
;; - compute-level PROC
;; - body-submatch-number NUMBER
;; - extra-fields ((FIELD-1 . SUBMATCH-1) (FIELD-2 . SUBMATCH-2) ...)
;;
;; The PROC value associated with key `compute-level' should take a
;; Scheme match structure (as returned by `regexp-exec') and return a
;; number, the normalized level for that line.  If this is specified,
;; it takes precedence over other level-computation methods.
;;
;; Use `body-submatch-number' if RE specifies the whole body, or if you
;; want to make use of the extra fields parsing.  The `extra-fields'
;; value is a sub-alist, whose keys name additional fields that are to
;; be recognized.  These fields along with `level' are set as object
;; properties of the final string ("body") that is consed into the tree.
;; If a field name ends in "?" the field value is set to be #t if there
;; is a match and the result is not an empty string, and #f otherwise.
;;
;;
;; Bugs and caveats:
;;
;; (1) Only the first file specified on the command line is scanned.
;; (2) TAB characters at the beginnings of lines are not recognized.
;; (3) Outlines that "skip" levels signal an error.  In other words,
;;     this will fail:
;;
;;            - a               0
;;              - b             1
;;                  - c         3       <-- skipped 2 -- error!
;;              - d             1
;;
;;
;; TODO: Determine what's the right thing to do for skips.
;;       Handle TABs.
;;       Make line format customizable via longopts.

;;; Code:

(define-module (scripts read-text-outline)
  :export (read-text-outline
           read-text-outline-silently
           make-text-outline-reader)
  :use-module (ice-9 regex)
  :autoload (ice-9 rdelim) (read-line)
  :autoload (ice-9 getopt-long) (getopt-long))

(define (?? symbol)
  (let ((name (symbol->string symbol)))
    (string=? "?" (substring name (1- (string-length name))))))

(define (msub n)
  (lambda (m)
    (match:substring m n)))

(define (??-predicates pair)
  (cons (car pair)
        (if (?? (car pair))
            (lambda (m)
              (not (string=? "" (match:substring m (cdr pair)))))
            (msub (cdr pair)))))

(define (make-line-parser re specs)
  (let* ((rx (let ((fc (substring re 0 1)))
               (make-regexp (if (string=? "^" fc)
                                re
                                (string-append "^" re)))))
         (check (lambda (key)
                  (assq-ref specs key)))
         (level-substring (msub (or (check 'level-submatch-number) 1)))
         (extract-level (cond ((check 'compute-level)
                               => (lambda (proc)
                                    (lambda (m)
                                      (proc m))))
                              ((check 'level-substring-divisor)
                               => (lambda (n)
                                    (lambda (m)
                                      (/ (string-length (level-substring m))
                                         n))))
                              (else
                               (lambda (m)
                                 (string-length (level-substring m))))))
         (extract-body (cond ((check 'body-submatch-number)
                              => msub)
                             (else
                              (lambda (m) (match:suffix m)))))
         (misc-props! (cond ((check 'extra-fields)
                             => (lambda (alist)
                                  (let ((new (map ??-predicates alist)))
                                    (lambda (obj m)
                                      (for-each
                                       (lambda (pair)
                                         (set-object-property!
                                          obj (car pair)
                                          ((cdr pair) m)))
                                       new)))))
                            (else
                             (lambda (obj m) #t)))))
    ;; retval
    (lambda (line)
      (cond ((regexp-exec rx line)
             => (lambda (m)
                  (let ((level (extract-level m))
                        (body  (extract-body m)))
                    (set-object-property! body 'level level)
                    (misc-props! body m)
                    body)))
            (else #f)))))

(define (make-text-outline-reader re specs)
  (let ((parse-line (make-line-parser re specs)))
    ;; retval
    (lambda (port)
      (let* ((all '(start))
             (pchain (list)))           ; parents chain
        (let loop ((line (read-line port))
                   (prev-level -1)      ; how this relates to the first input
                                        ; level determines whether or not we
                                        ; start in "sibling" or "child" mode.
                                        ; in the end, `start' is ignored and
                                        ; it's much easier to ignore parents
                                        ; than siblings (sometimes).  this is
                                        ; not to encourage ignorance, however.
                   (tp all))            ; tail pointer
          (or (eof-object? line)
              (cond ((parse-line line)
                     => (lambda (w)
                          (let* ((words (list w))
                                 (level (object-property w 'level))
                                 (diff (- level prev-level)))
                            (cond

                             ;; sibling
                             ((zero? diff)
                              ;; just extend the chain
                              (set-cdr! tp words))

                             ;; child
                             ((positive? diff)
                              (or (= 1 diff)
                                  (error "unhandled diff not 1:" diff line))
                              ;; parent may be contacted by uncle later (kids
                              ;; these days!) so save its level
                              (set-object-property! tp 'level prev-level)
                              (set! pchain (cons tp pchain))
                              ;; "push down" car into hierarchy
                              (set-car! tp (cons (car tp) words)))

                             ;; uncle
                             ((negative? diff)
                              ;; prune back to where levels match
                              (do ((p pchain (cdr p)))
                                  ((= level (object-property (car p) 'level))
                                   (set! pchain p)))
                              ;; resume at this level
                              (set-cdr! (car pchain) words)
                              (set! pchain (cdr pchain))))

                            (loop (read-line port) level words))))
                    (else (loop (read-line port) prev-level tp)))))
        (set! all (car all))
        (if (eq? 'start all)
            '()                         ; wasteland
            (cdr all))))))

(define read-text-outline-silently
  (make-text-outline-reader "(([ ][ ])*)- *"
                            '((level-substring-divisor . 2))))

(define (read-text-outline . args)
  (write (read-text-outline-silently (open-file (car args) "r")))
  (newline)
  #t)                                   ; exit val

(define main read-text-outline)

;;; read-text-outline ends here