summaryrefslogtreecommitdiff
path: root/module/ice-9/ftw.scm
blob: 9c9694fd732ee5d69e644c9966f4354c4fa484b5 (about) (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
;;;; ftw.scm --- file system tree walk

;;;; 	Copyright (C) 2002, 2003, 2006, 2011, 2012 Free Software Foundation, Inc.
;;;;
;;;; This library is free software; you can redistribute it and/or
;;;; modify it under the terms of the GNU Lesser General Public
;;;; License as published by the Free Software Foundation; either
;;;; version 3 of the License, or (at your option) any later version.
;;;; 
;;;; This library is distributed in the hope that it will be useful,
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
;;;; Lesser General Public License for more details.
;;;; 
;;;; You should have received a copy of the GNU Lesser General Public
;;;; License along with this library; if not, write to the Free Software
;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

;;; Author: Thien-Thi Nguyen <ttn@gnu.org>

;;; Commentary:

;; Two procedures are provided: `ftw' and `nftw'.

;; NOTE: The following description was adapted from the GNU libc info page, w/
;; significant modifications for a more "Schemey" interface.  Most noticible
;; are the inlining of `struct FTW *' parameters `base' and `level' and the
;; omission of `descriptors' parameters.

;; * Types
;;
;;    The X/Open specification defines two procedures to process whole
;; hierarchies of directories and the contained files.  Both procedures
;; of this `ftw' family take as one of the arguments a callback procedure
;; which must be of these types.
;;
;;  - Data Type: __ftw_proc_t
;;           (lambda (filename statinfo flag) ...) => status
;;
;;      Type for callback procedures given to the `ftw' procedure.  The
;;      first parameter is a filename, the second parameter is the
;;      vector value as returned by calling `stat' on FILENAME.
;;
;;      The last parameter is a symbol giving more information about
;;      FILENAM.  It can have one of the following values:
;;
;;     `regular'
;;           The current item is a normal file or files which do not fit
;;           into one of the following categories.  This means
;;           especially special files, sockets etc.
;;
;;     `directory'
;;           The current item is a directory.
;;
;;     `invalid-stat'
;;           The `stat' call to fill the object pointed to by the second
;;           parameter failed and so the information is invalid.
;;
;;     `directory-not-readable'
;;           The item is a directory which cannot be read.
;;
;;     `symlink'
;;           The item is a symbolic link.  Since symbolic links are
;;           normally followed seeing this value in a `ftw' callback
;;           procedure means the referenced file does not exist.  The
;;           situation for `nftw' is different.
;;
;;  - Data Type: __nftw_proc_t
;;           (lambda (filename statinfo flag base level) ...) => status
;;
;;      The first three arguments have the same as for the
;;      `__ftw_proc_t' type.  A difference is that for the third
;;      argument some additional values are defined to allow finer
;;      differentiation:
;;
;;     `directory-processed'
;;           The current item is a directory and all subdirectories have
;;           already been visited and reported.  This flag is returned
;;           instead of `directory' if the `depth' flag is given to
;;           `nftw' (see below).
;;
;;     `stale-symlink'
;;           The current item is a stale symbolic link.  The file it
;;           points to does not exist.
;;
;;      The last two parameters are described below.  They contain
;;      information to help interpret FILENAME and give some information
;;      about current state of the traversal of the directory hierarchy.
;;
;;     `base'
;;           The value specifies which part of the filename argument
;;           given in the first parameter to the callback procedure is
;;           the name of the file.  The rest of the string is the path
;;           to locate the file.  This information is especially
;;           important if the `chdir' flag for `nftw' was set since then
;;           the current directory is the one the current item is found
;;           in.
;;
;;     `level'
;;           While processing the directory the procedures tracks how
;;           many directories have been examined to find the current
;;           item.  This nesting level is 0 for the item given starting
;;           item (file or directory) and is incremented by one for each
;;           entered directory.
;;
;; * Procedure: (ftw filename proc . options)
;;   Do a file system tree walk starting at FILENAME using PROC.
;;
;;   The `ftw' procedure calls the callback procedure given in the
;;   parameter PROC for every item which is found in the directory
;;   specified by FILENAME and all directories below.  The procedure
;;   follows symbolic links if necessary but does not process an item
;;   twice.  If FILENAME names no directory this item is the only
;;   object reported by calling the callback procedure.
;;
;;   The filename given to the callback procedure is constructed by
;;   taking the FILENAME parameter and appending the names of all
;;   passed directories and then the local file name.  So the
;;   callback procedure can use this parameter to access the file.
;;   Before the callback procedure is called `ftw' calls `stat' for
;;   this file and passes the information up to the callback
;;   procedure.  If this `stat' call was not successful the failure is
;;   indicated by setting the flag argument of the callback procedure
;;   to `invalid-stat'.  Otherwise the flag is set according to the
;;   description given in the description of `__ftw_proc_t' above.
;;
;;   The callback procedure is expected to return non-#f to indicate
;;   that no error occurred and the processing should be continued.
;;   If an error occurred in the callback procedure or the call to
;;   `ftw' shall return immediately the callback procedure can return
;;   #f.  This is the only correct way to stop the procedure.  The
;;   program must not use `throw' or similar techniques to continue
;;   the program in another place.  [Can we relax this? --ttn]
;;
;;   The return value of the `ftw' procedure is #t if all callback
;;   procedure calls returned #t and all actions performed by the
;;   `ftw' succeeded.  If some procedure call failed (other than
;;   calling `stat' on an item) the procedure returns #f.  If a
;;   callback procedure returns a value other than #t this value is
;;   returned as the return value of `ftw'.
;;
;; * Procedure: (nftw filename proc . control-flags)
;;   Do a new-style file system tree walk starting at FILENAME using PROC.
;;   Various optional CONTROL-FLAGS alter the default behavior.
;;
;;   The `nftw' procedures works like the `ftw' procedures.  It calls
;;   the callback procedure PROC for all items it finds in the
;;   directory FILENAME and below.
;;
;;   The differences are that for one the callback procedure is of a
;;   different type.  It takes also `base' and `level' parameters as
;;   described above.
;;
;;   The second difference is that `nftw' takes additional optional
;;   arguments which are zero or more of the following symbols:
;;
;;   physical'
;;        While traversing the directory symbolic links are not
;;        followed.  I.e., if this flag is given symbolic links are
;;        reported using the `symlink' value for the type parameter
;;        to the callback procedure.  Please note that if this flag is
;;        used the appearance of `symlink' in a callback procedure
;;        does not mean the referenced file does not exist.  To
;;        indicate this the extra value `stale-symlink' exists.
;;
;;   mount'
;;        The callback procedure is only called for items which are on
;;        the same mounted file system as the directory given as the
;;        FILENAME parameter to `nftw'.
;;
;;   chdir'
;;        If this flag is given the current working directory is
;;        changed to the directory containing the reported object
;;        before the callback procedure is called.
;;
;;   depth'
;;        If this option is given the procedure visits first all files
;;        and subdirectories before the callback procedure is called
;;        for the directory itself (depth-first processing).  This
;;        also means the type flag given to the callback procedure is
;;        `directory-processed' and not `directory'.
;;
;;   The return value is computed in the same way as for `ftw'.
;;   `nftw' returns #t if no failure occurred in `nftw' and all
;;   callback procedure call return values are also #t.  For internal
;;   errors such as memory problems the error `ftw-error' is thrown.
;;   If the return value of a callback invocation is not #t this
;;   very same value is returned.

;;; Code:

(define-module (ice-9 ftw)
  #:use-module (ice-9 match)
  #:use-module (ice-9 vlist)
  #:use-module (srfi srfi-1)
  #:autoload   (ice-9 i18n)   (string-locale<?)
  #:export (ftw nftw
            file-system-fold
            file-system-tree
            scandir))

(define (directory-files dir)
  (let ((dir-stream (opendir dir)))
    (let loop ((new (readdir dir-stream))
               (acc '()))
      (if (eof-object? new)
	  (begin
	    (closedir dir-stream)
	    acc)
          (loop (readdir dir-stream)
                (if (or (string=? "."  new)             ;;; ignore
                        (string=? ".." new))            ;;; ignore
                    acc
                    (cons new acc)))))))

(define (pathify . nodes)
  (let loop ((nodes nodes)
             (result ""))
    (if (null? nodes)
        (or (and (string=? "" result) "")
            (substring result 1 (string-length result)))
        (loop (cdr nodes) (string-append result "/" (car nodes))))))

(define (abs? filename)
  (char=? #\/ (string-ref filename 0)))

;; `visited?-proc' returns a test procedure VISITED? which when called as
;; (VISITED? stat-obj) returns #f the first time a distinct file is seen,
;; then #t on any subsequent sighting of it.
;;
;; stat:dev and stat:ino together uniquely identify a file (see "Attribute
;; Meanings" in the glibc manual).  Often there'll be just one dev, and
;; usually there's just a handful mounted, so the strategy here is a small
;; hash table indexed by dev, containing hash tables indexed by ino.
;;
;; It'd be possible to make a pair (dev . ino) and use that as the key to a
;; single hash table.  It'd use an extra pair for every file visited, but
;; might be a little faster if it meant less scheme code.
;;
(define (visited?-proc size)
  (let ((dev-hash (make-hash-table 7)))
    (lambda (s)
      (and s
	   (let ((ino-hash (hashv-ref dev-hash (stat:dev s)))
		 (ino      (stat:ino s)))
	     (or ino-hash
		 (begin
		   (set! ino-hash (make-hash-table size))
		   (hashv-set! dev-hash (stat:dev s) ino-hash)))
	     (or (hashv-ref ino-hash ino)
		 (begin
		   (hashv-set! ino-hash ino #t)
		   #f)))))))

(define (stat-dir-readable?-proc uid gid)
  (let ((uid (getuid))
        (gid (getgid)))
    (lambda (s)
      (let* ((perms (stat:perms s))
             (perms-bit-set? (lambda (mask)
                               (not (= 0 (logand mask perms))))))
        (or (and (= uid (stat:uid s))
                 (perms-bit-set? #o400))
            (and (= gid (stat:gid s))
                 (perms-bit-set? #o040))
            (perms-bit-set? #o004))))))

(define (stat&flag-proc dir-readable? . control-flags)
  (let* ((directory-flag (if (memq 'depth control-flags)
                             'directory-processed
                             'directory))
         (stale-symlink-flag (if (memq 'nftw-style control-flags)
                                 'stale-symlink
                                 'symlink))
         (physical? (memq 'physical control-flags))
         (easy-flag (lambda (s)
                      (let ((type (stat:type s)))
                        (if (eq? 'directory type)
                            (if (dir-readable? s)
                                directory-flag
                                'directory-not-readable)
                            'regular)))))
    (lambda (name)
      (let ((s (false-if-exception (lstat name))))
        (cond ((not s)
               (values s 'invalid-stat))
              ((eq? 'symlink (stat:type s))
               (let ((s-follow (false-if-exception (stat name))))
                 (cond ((not s-follow)
                        (values s stale-symlink-flag))
                       ((and s-follow physical?)
                        (values s 'symlink))
                       ((and s-follow (not physical?))
                        (values s-follow (easy-flag s-follow))))))
              (else (values s (easy-flag s))))))))

(define (clean name)
  (let ((last-char-index (1- (string-length name))))
    (if (char=? #\/ (string-ref name last-char-index))
        (substring name 0 last-char-index)
        name)))

(define (ftw filename proc . options)
  (let* ((visited? (visited?-proc (cond ((memq 'hash-size options) => cadr)
                                        (else 211))))
         (stat&flag (stat&flag-proc
                     (stat-dir-readable?-proc (getuid) (getgid)))))
    (letrec ((go (lambda (fullname)
                   (call-with-values (lambda () (stat&flag fullname))
                     (lambda (s flag)
                       (or (visited? s)
                           (let ((ret (proc fullname s flag))) ; callback
                             (or (eq? #t ret)
                                 (throw 'ftw-early-exit ret))
                             (and (eq? 'directory flag)
                                  (for-each
                                   (lambda (child)
                                     (go (pathify fullname child)))
                                   (directory-files fullname)))
                             #t)))))))
      (catch 'ftw-early-exit
             (lambda () (go (clean filename)))
             (lambda (key val) val)))))

(define (nftw filename proc . control-flags)
  (let* ((od (getcwd))                  ; orig dir
         (odev (let ((s (false-if-exception (lstat filename))))
                 (if s (stat:dev s) -1)))
         (same-dev? (if (memq 'mount control-flags)
                        (lambda (s) (= (stat:dev s) odev))
                        (lambda (s) #t)))
         (base-sub (lambda (name base) (substring name 0 base)))
         (maybe-cd (if (memq 'chdir control-flags)
                       (if (abs? filename)
                           (lambda (fullname base)
                             (or (= 0 base)
                                 (chdir (base-sub fullname base))))
                           (lambda (fullname base)
                             (chdir
                              (pathify od (base-sub fullname base)))))
                       (lambda (fullname base) #t)))
         (maybe-cd-back (if (memq 'chdir control-flags)
                            (lambda () (chdir od))
                            (lambda () #t)))
         (depth-first? (memq 'depth control-flags))
         (visited? (visited?-proc
                    (cond ((memq 'hash-size control-flags) => cadr)
                          (else 211))))
         (has-kids? (if depth-first?
                        (lambda (flag) (eq? flag 'directory-processed))
                        (lambda (flag) (eq? flag 'directory))))
         (stat&flag (apply stat&flag-proc
                           (stat-dir-readable?-proc (getuid) (getgid))
                           (cons 'nftw-style control-flags))))
    (letrec ((go (lambda (fullname base level)
                   (call-with-values (lambda () (stat&flag fullname))
                     (lambda (s flag)
                       (letrec ((self (lambda ()
                                        (maybe-cd fullname base)
                                        ;; the callback
                                        (let ((ret (proc fullname s flag
                                                         base level)))
                                          (maybe-cd-back)
                                          (or (eq? #t ret)
                                              (throw 'nftw-early-exit ret)))))
                                (kids (lambda ()
                                        (and (has-kids? flag)
                                             (for-each
                                              (lambda (child)
                                                (go (pathify fullname child)
                                                    (1+ (string-length
                                                         fullname))
                                                    (1+ level)))
                                              (directory-files fullname))))))
                         (or (visited? s)
                             (not (same-dev? s))
                             (if depth-first?
                                 (begin (kids) (self))
                                 (begin (self) (kids)))))))
                   #t)))
      (let ((ret (catch 'nftw-early-exit
                        (lambda () (go (clean filename) 0 0))
                        (lambda (key val) val))))
        (chdir od)
        ret))))


;;;
;;; `file-system-fold' & co.
;;;

(define-syntax-rule (errno-if-exception expr)
  (catch 'system-error
    (lambda ()
      expr)
    (lambda args
      (system-error-errno args))))

(define* (file-system-fold enter? leaf down up skip error init file-name
                           #:optional (stat lstat))
  "Traverse the directory at FILE-NAME, recursively.  Enter
sub-directories only when (ENTER? PATH STAT RESULT) returns true.  When
a sub-directory is entered, call (DOWN PATH STAT RESULT), where PATH is
the path of the sub-directory and STAT the result of (stat PATH); when
it is left, call (UP PATH STAT RESULT).  For each file in a directory,
call (LEAF PATH STAT RESULT).  When ENTER? returns false, call (SKIP
PATH STAT RESULT).  When an `opendir' or STAT call raises an exception,
call (ERROR PATH STAT ERRNO RESULT), with ERRNO being the operating
system error number that was raised.

Return the result of these successive applications.
When FILE-NAME names a flat file, (LEAF PATH STAT INIT) is returned.
The optional STAT parameter defaults to `lstat'."

  (define (mark v s)
    (vhash-cons (cons (stat:dev s) (stat:ino s)) #t v))

  (define (visited? v s)
    (vhash-assoc (cons (stat:dev s) (stat:ino s)) v))

  (let loop ((name     file-name)
             (path     "")
             (dir-stat (errno-if-exception (stat file-name)))
             (result   init)
             (visited  vlist-null))

    (define full-name
      (if (string=? path "")
          name
          (string-append path "/" name)))

    (cond
     ((integer? dir-stat)
      ;; FILE-NAME is not readable.
      (error full-name #f dir-stat result))
     ((visited? visited dir-stat)
      (values result visited))
     ((eq? 'directory (stat:type dir-stat)) ; true except perhaps the 1st time
      (if (enter? full-name dir-stat result)
          (let ((dir     (errno-if-exception (opendir full-name)))
                (visited (mark visited dir-stat)))
            (cond
             ((directory-stream? dir)
              (let liip ((entry   (readdir dir))
                         (result  (down full-name dir-stat result))
                         (subdirs '()))
                (cond ((eof-object? entry)
                       (begin
                         (closedir dir)
                         (let ((r+v
                                (fold (lambda (subdir result+visited)
                                        (call-with-values
                                            (lambda ()
                                              (loop (car subdir)
                                                    full-name
                                                    (cdr subdir)
                                                    (car result+visited)
                                                    (cdr result+visited)))
                                          cons))
                                      (cons result visited)
                                      subdirs)))
                           (values (up full-name dir-stat (car r+v))
                                   (cdr r+v)))))
                      ((or (string=? entry ".")
                           (string=? entry ".."))
                       (liip (readdir dir)
                             result
                             subdirs))
                      (else
                       (let* ((child (string-append full-name "/" entry))
                              (st    (errno-if-exception (stat child))))
                         (if (integer? st) ; CHILD is a dangling symlink?
                             (liip (readdir dir)
                                   (error child #f st result)
                                   subdirs)
                             (if (eq? (stat:type st) 'directory)
                                 (liip (readdir dir)
                                       result
                                       (alist-cons entry st subdirs))
                                 (liip (readdir dir)
                                       (leaf child st result)
                                       subdirs))))))))
             (else
              ;; Directory FULL-NAME not readable, but it is stat'able.
              (values (error full-name dir-stat dir result)
                      visited))))
          (values (skip full-name dir-stat result)
                  (mark visited dir-stat))))
     (else
      ;; Caller passed a FILE-NAME that names a flat file, not a directory.
      (leaf full-name dir-stat result)))))

(define* (file-system-tree file-name
                           #:optional (enter? (lambda (n s) #t))
                                      (stat lstat))
  "Return a tree of the form (FILE-NAME STAT CHILDREN ...) where STAT is
the result of (STAT FILE-NAME) and CHILDREN are similar structures for
each file contained in FILE-NAME when it designates a directory.  The
optional ENTER? predicate is invoked as (ENTER? NAME STAT) and should
return true to allow recursion into directory NAME; the default value is
a procedure that always returns #t.  When a directory does not match
ENTER?, it nonetheless appears in the resulting tree, only with zero
children.  The optional STAT parameter defaults to `lstat'.  Return #f
when FILE-NAME is not readable."
  (define (enter?* name stat result)
    (enter? name stat))
  (define (leaf name stat result)
    (match result
      (((siblings ...) rest ...)
       (cons (alist-cons (basename name) (cons stat '()) siblings)
             rest))))
  (define (down name stat result)
    (cons '() result))
  (define (up name stat result)
    (match result
      (((children ...) (siblings ...) rest ...)
       (cons (alist-cons (basename name) (cons stat children)
                         siblings)
             rest))))
  (define skip                   ; keep an entry for skipped directories
    leaf)
  (define (error name stat errno result)
    (if (string=? name file-name)
        result
        (leaf name stat result)))

  (match (file-system-fold enter?* leaf down up skip error '(())
                           file-name stat)
    (((tree)) tree)
    ((())     #f)))                            ; FILE-NAME is unreadable

(define* (scandir name #:optional (select? (const #t))
                                  (entry<? string-locale<?))
  "Return the list of the names of files contained in directory NAME
that match predicate SELECT? (by default, all files.)  The returned list
of file names is sorted according to ENTRY<?, which defaults to
`string-locale<?'.  Return #f when NAME is unreadable or is not a directory."
  (define (enter? dir stat result)
    (and stat (string=? dir name)))

  (define (visit basename result)
    (if (select? basename)
        (cons basename result)
        result))

  (define (leaf name stat result)
    (and result
         (visit (basename name) result)))

  (define (down name stat result)
    (visit "." '()))

  (define (up name stat result)
    (visit ".." result))

  (define (skip name stat result)
    ;; All the sub-directories are skipped.
    (visit (basename name) result))

  (define (error name* stat errno result)
    (if (string=? name name*)             ; top-level NAME is unreadable
        result
        (visit (basename name*) result)))

  (and=> (file-system-fold enter? leaf down up skip error #f name lstat)
         (lambda (files)
           (sort files entry<?))))

;;; ftw.scm ends here