summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLudovic Courtès <ludo@gnu.org>2011-12-13 23:54:26 +0100
committerLudovic Courtès <ludo@gnu.org>2011-12-13 23:56:19 +0100
commit243db01e51297cf7e165ee91e96221426b12b345 (patch)
tree4e4d332578e01dfd7d4c8f0a49fe88a701c03721
parentac16263bc191f6b1f05b49d0f291f7fd938da72c (diff)
Add `file-system-fold' and `file-system-tree' to (ice-9 ftw).
* module/ice-9/ftw.scm (file-system-fold, file-system-tree): New procedures. * test-suite/tests/ftw.test (%top-srcdir, %test-dir): New variables. ("file-system-fold", "file-system-tree"): New test prefixes. * doc/ref/misc-modules.texi (File Tree Walk): Document `file-system-tree' and `file-system-fold'.
-rw-r--r--doc/ref/misc-modules.texi136
-rw-r--r--module/ice-9/ftw.scm130
-rw-r--r--test-suite/tests/ftw.test98
3 files changed, 357 insertions, 7 deletions
diff --git a/doc/ref/misc-modules.texi b/doc/ref/misc-modules.texi
index 3dbe981b8..ee124899a 100644
--- a/doc/ref/misc-modules.texi
+++ b/doc/ref/misc-modules.texi
@@ -1099,15 +1099,145 @@ try to use one of them. The reason for two versions is that the full
@cindex file tree walk
The functions in this section traverse a tree of files and
-directories, in a fashion similar to the C @code{ftw} and @code{nftw}
-routines (@pxref{Working with Directory Trees,,, libc, GNU C Library
-Reference Manual}).
+directories. They come in two flavors: the first one is a high-level
+functional interface, and the second one is similar to the C @code{ftw}
+and @code{nftw} routines (@pxref{Working with Directory Trees,,, libc,
+GNU C Library Reference Manual}).
@example
(use-modules (ice-9 ftw))
@end example
@sp 1
+@defun file-system-tree file-name [enter?]
+Return a tree of the form @code{(@var{file-name} @var{stat}
+@var{children} ...)} where @var{stat} is the result of @code{(lstat
+@var{file-name})} and @var{children} are similar structures for each
+file contained in @var{file-name} when it designates a directory.
+
+The optional @var{enter?} predicate is invoked as @code{(@var{enter?}
+@var{name} @var{stat})} and should return true to allow recursion into
+directory @var{name}; the default value is a procedure that always
+returns @code{#t}. When a directory does not match @var{enter?}, it
+nonetheless appears in the resulting tree, only with zero children.
+
+The example below shows how to obtain a hierarchical listing of the
+files under the @file{module/language} directory in the Guile source
+tree, discarding their @code{stat} info:
+
+@example
+(use-modules (ice-9 match))
+
+(define remove-stat
+ ;; Remove the `stat' object the `file-system-tree' provides
+ ;; for each file in the tree.
+ (match-lambda
+ ((name stat) ; flat file
+ name)
+ ((name stat children ...) ; directory
+ (list name (map remove-stat children)))))
+
+(let ((dir (string-append (assq-ref %guile-build-info 'top_srcdir)
+ "/module/language")))
+ (remove-stat (file-system-tree dir)))
+
+@result{}
+("language"
+ (("value" ("spec.go" "spec.scm"))
+ ("scheme"
+ ("spec.go"
+ "spec.scm"
+ "compile-tree-il.scm"
+ "decompile-tree-il.scm"
+ "decompile-tree-il.go"
+ "compile-tree-il.go"))
+ ("tree-il"
+ ("spec.go"
+ "fix-letrec.go"
+ "inline.go"
+ "fix-letrec.scm"
+ "compile-glil.go"
+ "spec.scm"
+ "optimize.scm"
+ "primitives.scm"
+ @dots{}))
+ @dots{}))
+@end example
+@end defun
+
+@cindex file system combinator
+
+It is often desirable to process directories entries directly, rather
+than building up a tree of entries in memory, like
+@code{file-system-tree} does. The following procedure, a
+@dfn{combinator}, is designed to allow directory entries to be processed
+directly as a directory tree is traversed; in fact,
+@code{file-system-tree} is implemented in terms of it.
+
+@defun file-system-fold enter? leaf down up skip init file-name
+Traverse the directory at @var{file-name}, recursively, and return the
+result of the successive applications of the @var{leaf}, @var{down},
+@var{up}, and @var{skip} procedures as described below.
+
+Enter sub-directories only when @code{(@var{enter?} @var{path}
+@var{stat} @var{result})} returns true. When a sub-directory is
+entered, call @code{(@var{down} @var{path} @var{stat} @var{result})},
+where @var{path} is the path of the sub-directory and @var{stat} the
+result of @code{(false-if-exception (lstat @var{path}))}; when it is
+left, call @code{(@var{up} @var{path} @var{stat} @var{result})}.
+
+For each file in a directory, call @code{(@var{leaf} @var{path}
+@var{stat} @var{result})}.
+
+When @var{enter?} returns @code{#f}, or when an unreadable directory is
+encountered, call @code{(@var{skip} @var{path} @var{stat}
+@var{result})}.
+
+When @var{file-name} names a flat file, @code{(@var{leaf} @var{path}
+@var{stat} @var{init})} is returned.
+
+The special @file{.} and @file{..} entries are not passed to these
+procedures. The @var{path} argument to the procedures is a full file
+name---e.g., @code{"../foo/bar/gnu"}; if @var{file-name} is an absolute
+file name, then @var{path} is also an absolute file name. Files and
+directories, as identified by their device/inode number pair, are
+traversed only once.
+
+The example below illustrates the use of @code{file-system-fold}:
+
+@example
+(define (total-file-size file-name)
+ "Return the size in bytes of the files under FILE-NAME (similar
+to `du --apparent-size' with GNU Coreutils.)"
+
+ (define (enter? name stat result)
+ ;; Skip version control directories.
+ (not (member (basename name) '(".git" ".svn" "CVS"))))
+ (define (leaf name stat result)
+ ;; Return RESULT plus the size of the file at NAME.
+ (+ result (stat:size stat)))
+
+ ;; Count zero bytes for directories.
+ (define (down name stat result) result)
+ (define (up name stat result) result)
+
+ ;; Likewise for skipped directories.
+ (define (skip name stat result) result)
+
+ (file-system-fold enter? leaf down up skip
+ 0 ; initial counter is zero bytes
+ file-name))
+
+(total-file-size ".")
+@result{} 8217554
+
+(total-file-size "/dev/null")
+@result{} 0
+@end example
+@end defun
+
+The alternative C-like functions are described below.
+
@defun ftw startname proc ['hash-size n]
Walk the file system tree descending from @var{startname}, calling
@var{proc} for each file and directory.
diff --git a/module/ice-9/ftw.scm b/module/ice-9/ftw.scm
index e6ac0b462..539d80b5f 100644
--- a/module/ice-9/ftw.scm
+++ b/module/ice-9/ftw.scm
@@ -1,6 +1,6 @@
;;;; ftw.scm --- file system tree walk
-;;;; Copyright (C) 2002, 2003, 2006 Free Software Foundation, Inc.
+;;;; Copyright (C) 2002, 2003, 2006, 2011 Free Software Foundation, Inc.
;;;;
;;;; This library is free software; you can redistribute it and/or
;;;; modify it under the terms of the GNU Lesser General Public
@@ -190,7 +190,12 @@
;;; Code:
(define-module (ice-9 ftw)
- :export (ftw nftw))
+ #:use-module (ice-9 match)
+ #:use-module (ice-9 vlist)
+ #:use-module (srfi srfi-1)
+ #:export (ftw nftw
+ file-system-fold
+ file-system-tree))
(define (directory-files dir)
(let ((dir-stream (opendir dir)))
@@ -377,4 +382,125 @@
(chdir od)
ret))))
+
+;;;
+;;; `file-system-fold' & co.
+;;;
+
+(define (file-system-fold enter? leaf down up skip init file-name)
+ "Traverse the directory at FILE-NAME, recursively. Enter
+sub-directories only when (ENTER? PATH STAT RESULT) returns true. When
+a sub-directory is entered, call (DOWN PATH STAT RESULT), where PATH is
+the path of the sub-directory and STAT the result of (lstat PATH); when
+it is left, call (UP PATH STAT RESULT). For each file in a directory,
+call (LEAF PATH STAT RESULT). When ENTER? returns false, call (SKIP
+PATH STAT RESULT). Return the result of these successive applications.
+When FILE-NAME names a flat file,(LEAF PATH STAT INIT) is returned."
+
+ (define (mark v s)
+ (vhash-cons (cons (stat:dev s) (stat:ino s)) #t v))
+
+ (define (visited? v s)
+ (vhash-assoc (cons (stat:dev s) (stat:ino s)) v))
+
+ (let loop ((name file-name)
+ (path "")
+ (dir-stat (false-if-exception (lstat file-name)))
+ (result init)
+ (visited vlist-null))
+
+ (define full-name
+ (if (string=? path "")
+ name
+ (string-append path "/" name)))
+
+ (cond
+ ((not dir-stat)
+ ;; FILE-NAME is not readable.
+ (leaf full-name dir-stat result))
+ ((visited? visited dir-stat)
+ (values result visited))
+ ((eq? 'directory (stat:type dir-stat)) ; true except perhaps the 1st time
+ (if (enter? full-name dir-stat result)
+ (let ((dir (false-if-exception (opendir full-name)))
+ (visited (mark visited dir-stat)))
+ (if dir
+ (let liip ((entry (readdir dir))
+ (result (down full-name dir-stat result))
+ (subdirs '()))
+ (cond ((eof-object? entry)
+ (begin
+ (closedir dir)
+ (let ((r+v
+ (fold (lambda (subdir result+visited)
+ (call-with-values
+ (lambda ()
+ (loop (car subdir)
+ full-name
+ (cdr subdir)
+ (car result+visited)
+ (cdr result+visited)))
+ cons))
+ (cons result visited)
+ subdirs)))
+ (values (up full-name dir-stat (car r+v))
+ (cdr r+v)))))
+ ((or (string=? entry ".")
+ (string=? entry ".."))
+ (liip (readdir dir)
+ result
+ subdirs))
+ (else
+ (let* ((child (string-append full-name "/" entry))
+ (stat (lstat child))) ; cannot fail
+ (cond
+ ((eq? (stat:type stat) 'directory)
+ (liip (readdir dir)
+ result
+ (alist-cons entry stat subdirs)))
+ (else
+ (liip (readdir dir)
+ (leaf child stat result)
+ subdirs)))))))
+
+ ;; Directory FULL-NAME not readable.
+ ;; XXX: It's up to the user to distinguish between not
+ ;; readable and not ENTER?.
+ (values (skip full-name dir-stat result)
+ visited)))
+ (values (skip full-name dir-stat result)
+ (mark visited dir-stat))))
+ (else
+ ;; Caller passed a FILE-NAME that names a flat file, not a directory.
+ (leaf full-name dir-stat result)))))
+
+(define* (file-system-tree file-name #:optional (enter? (lambda (n s) #t)))
+ "Return a tree of the form (FILE-NAME STAT CHILDREN ...) where STAT is
+the result of (lstat FILE-NAME) and CHILDREN are similar structures for
+each file contained in FILE-NAME when it designates a directory. The
+optional ENTER? predicate is invoked as (ENTER? NAME STAT) and should
+return true to allow recursion into directory NAME; the default value is
+a procedure that always returns #t. When a directory does not match
+ENTER?, it nonetheless appears in the resulting tree, only with zero
+children."
+ (define (enter?* name stat result)
+ (enter? name stat))
+ (define (leaf name stat result)
+ (match result
+ (((siblings ...) rest ...)
+ (cons (alist-cons (basename name) (cons stat '()) siblings)
+ rest))))
+ (define (down name stat result)
+ (cons '() result))
+ (define (up name stat result)
+ (match result
+ (((children ...) (siblings ...) rest ...)
+ (cons (alist-cons (basename name) (cons stat children)
+ siblings)
+ rest))))
+ (define skip ; keep an entry for skipped directories
+ leaf)
+
+ (caar (file-system-fold enter?* leaf down up skip '(()) file-name)))
+
;;; ftw.scm ends here
diff --git a/test-suite/tests/ftw.test b/test-suite/tests/ftw.test
index 847fb9ff4..40e4c2a35 100644
--- a/test-suite/tests/ftw.test
+++ b/test-suite/tests/ftw.test
@@ -1,6 +1,6 @@
;;;; ftw.test --- exercise ice-9/ftw.scm -*- scheme -*-
;;;;
-;;;; Copyright 2006 Free Software Foundation, Inc.
+;;;; Copyright 2006, 2011 Free Software Foundation, Inc.
;;;;
;;;; This library is free software; you can redistribute it and/or
;;;; modify it under the terms of the GNU Lesser General Public
@@ -18,7 +18,10 @@
(define-module (test-suite test-ice-9-ftw)
#:use-module (test-suite lib)
- #:use-module (ice-9 ftw))
+ #:use-module (ice-9 ftw)
+ #:use-module (ice-9 match)
+ #:use-module (srfi srfi-1)
+ #:use-module (srfi srfi-26))
;; the procedure-source checks here ensure the vector indexes we write match
@@ -72,3 +75,94 @@
(pass-if "5 7 - 2nd" (eq? #t (try-visited? 5 7)))
(pass-if "7 5 - 2nd" (eq? #t (try-visited? 7 5)))
(pass-if "7 7 - 2nd" (eq? #t (try-visited? 7 7)))))
+
+
+;;;
+;;; `file-system-fold' & co.
+;;;
+
+(define %top-srcdir
+ (assq-ref %guile-build-info 'top_srcdir))
+
+(define %test-dir
+ (string-append %top-srcdir "/test-suite"))
+
+(with-test-prefix "file-system-fold"
+
+ (pass-if "test-suite"
+ (let ((enter? (lambda (n s r)
+ ;; Enter only `test-suite/tests/'.
+ (if (member `(down ,%test-dir) r)
+ (string=? (basename n) "tests")
+ (string=? (basename n) "test-suite"))))
+ (leaf (lambda (n s r) (cons `(leaf ,n) r)))
+ (down (lambda (n s r) (cons `(down ,n) r)))
+ (up (lambda (n s r) (cons `(up ,n) r)))
+ (skip (lambda (n s r) (cons `(skip ,n) r))))
+ (define seq
+ (reverse
+ (file-system-fold enter? leaf down up skip '() %test-dir)))
+
+ (match seq
+ ((('down (? (cut string=? <> %test-dir)))
+ between ...
+ ('up (? (cut string=? <> %test-dir))))
+ (and (any (match-lambda (('leaf (= basename "lib.scm")) #t) (_ #f))
+ between)
+ (any (match-lambda (('down (= basename "tests")) #t) (_ #f))
+ between)
+ (any (match-lambda (('leaf (= basename "alist.test")) #t) (_ #f))
+ between)
+ (any (match-lambda (('up (= basename "tests")) #t) (_ #f))
+ between)
+ (any (match-lambda (('skip (= basename "vm")) #t) (_ #f))
+ between))))))
+
+ (pass-if "test-suite (never enter)"
+ (let ((enter? (lambda (n s r) #f))
+ (leaf (lambda (n s r) (cons `(leaf ,n) r)))
+ (down (lambda (n s r) (cons `(down ,n) r)))
+ (up (lambda (n s r) (cons `(up ,n) r)))
+ (skip (lambda (n s r) (cons `(skip ,n) r))))
+ (equal? (file-system-fold enter? leaf down up skip '() %test-dir)
+ `((skip , %test-dir)))))
+
+ (pass-if "test-suite/lib.scm (flat file)"
+ (let ((enter? (lambda (n s r) #t))
+ (leaf (lambda (n s r) (cons `(leaf ,n) r)))
+ (down (lambda (n s r) (cons `(down ,n) r)))
+ (up (lambda (n s r) (cons `(up ,n) r)))
+ (skip (lambda (n s r) (cons `(skip ,n) r)))
+ (name (string-append %test-dir "/lib.scm")))
+ (equal? (file-system-fold enter? leaf down up skip '() name)
+ `((leaf ,name))))))
+
+(with-test-prefix "file-system-tree"
+
+ (pass-if "test-suite (never enter)"
+ (match (file-system-tree %test-dir (lambda (n s) #f))
+ (("test-suite" (= stat:type 'directory)) ; no children
+ #t)))
+
+ (pass-if "test-suite/*"
+ (match (file-system-tree %test-dir (lambda (n s)
+ (string=? n %test-dir)))
+ (("test-suite" (= stat:type 'directory) children ...)
+ (any (match-lambda
+ (("tests" (= stat:type 'directory)) ; no children
+ #t)
+ (_ #f))
+ children))))
+
+ (pass-if "test-suite (recursive)"
+ (match (file-system-tree %test-dir)
+ (("test-suite" (= stat:type 'directory) children ...)
+ (any (match-lambda
+ (("tests" (= stat:type 'directory) (= car files) ...)
+ (let ((expected '("alist.test" "bytevectors.test"
+ "ftw.test" "gc.test" "vlist.test")))
+ (lset= string=?
+ (lset-intersection string=? files expected)
+ expected)))
+ (_ #f))
+ children)))))