diff options
author | Ludovic Courtès <ludo@gnu.org> | 2011-12-13 23:54:26 +0100 |
---|---|---|
committer | Ludovic Courtès <ludo@gnu.org> | 2011-12-13 23:56:19 +0100 |
commit | 243db01e51297cf7e165ee91e96221426b12b345 (patch) | |
tree | 4e4d332578e01dfd7d4c8f0a49fe88a701c03721 | |
parent | ac16263bc191f6b1f05b49d0f291f7fd938da72c (diff) |
Add `file-system-fold' and `file-system-tree' to (ice-9 ftw).
* module/ice-9/ftw.scm (file-system-fold, file-system-tree): New
procedures.
* test-suite/tests/ftw.test (%top-srcdir, %test-dir): New variables.
("file-system-fold", "file-system-tree"): New test prefixes.
* doc/ref/misc-modules.texi (File Tree Walk): Document
`file-system-tree' and `file-system-fold'.
-rw-r--r-- | doc/ref/misc-modules.texi | 136 | ||||
-rw-r--r-- | module/ice-9/ftw.scm | 130 | ||||
-rw-r--r-- | test-suite/tests/ftw.test | 98 |
3 files changed, 357 insertions, 7 deletions
diff --git a/doc/ref/misc-modules.texi b/doc/ref/misc-modules.texi index 3dbe981b8..ee124899a 100644 --- a/doc/ref/misc-modules.texi +++ b/doc/ref/misc-modules.texi @@ -1099,15 +1099,145 @@ try to use one of them. The reason for two versions is that the full @cindex file tree walk The functions in this section traverse a tree of files and -directories, in a fashion similar to the C @code{ftw} and @code{nftw} -routines (@pxref{Working with Directory Trees,,, libc, GNU C Library -Reference Manual}). +directories. They come in two flavors: the first one is a high-level +functional interface, and the second one is similar to the C @code{ftw} +and @code{nftw} routines (@pxref{Working with Directory Trees,,, libc, +GNU C Library Reference Manual}). @example (use-modules (ice-9 ftw)) @end example @sp 1 +@defun file-system-tree file-name [enter?] +Return a tree of the form @code{(@var{file-name} @var{stat} +@var{children} ...)} where @var{stat} is the result of @code{(lstat +@var{file-name})} and @var{children} are similar structures for each +file contained in @var{file-name} when it designates a directory. + +The optional @var{enter?} predicate is invoked as @code{(@var{enter?} +@var{name} @var{stat})} and should return true to allow recursion into +directory @var{name}; the default value is a procedure that always +returns @code{#t}. When a directory does not match @var{enter?}, it +nonetheless appears in the resulting tree, only with zero children. + +The example below shows how to obtain a hierarchical listing of the +files under the @file{module/language} directory in the Guile source +tree, discarding their @code{stat} info: + +@example +(use-modules (ice-9 match)) + +(define remove-stat + ;; Remove the `stat' object the `file-system-tree' provides + ;; for each file in the tree. + (match-lambda + ((name stat) ; flat file + name) + ((name stat children ...) ; directory + (list name (map remove-stat children))))) + +(let ((dir (string-append (assq-ref %guile-build-info 'top_srcdir) + "/module/language"))) + (remove-stat (file-system-tree dir))) + +@result{} +("language" + (("value" ("spec.go" "spec.scm")) + ("scheme" + ("spec.go" + "spec.scm" + "compile-tree-il.scm" + "decompile-tree-il.scm" + "decompile-tree-il.go" + "compile-tree-il.go")) + ("tree-il" + ("spec.go" + "fix-letrec.go" + "inline.go" + "fix-letrec.scm" + "compile-glil.go" + "spec.scm" + "optimize.scm" + "primitives.scm" + @dots{})) + @dots{})) +@end example +@end defun + +@cindex file system combinator + +It is often desirable to process directories entries directly, rather +than building up a tree of entries in memory, like +@code{file-system-tree} does. The following procedure, a +@dfn{combinator}, is designed to allow directory entries to be processed +directly as a directory tree is traversed; in fact, +@code{file-system-tree} is implemented in terms of it. + +@defun file-system-fold enter? leaf down up skip init file-name +Traverse the directory at @var{file-name}, recursively, and return the +result of the successive applications of the @var{leaf}, @var{down}, +@var{up}, and @var{skip} procedures as described below. + +Enter sub-directories only when @code{(@var{enter?} @var{path} +@var{stat} @var{result})} returns true. When a sub-directory is +entered, call @code{(@var{down} @var{path} @var{stat} @var{result})}, +where @var{path} is the path of the sub-directory and @var{stat} the +result of @code{(false-if-exception (lstat @var{path}))}; when it is +left, call @code{(@var{up} @var{path} @var{stat} @var{result})}. + +For each file in a directory, call @code{(@var{leaf} @var{path} +@var{stat} @var{result})}. + +When @var{enter?} returns @code{#f}, or when an unreadable directory is +encountered, call @code{(@var{skip} @var{path} @var{stat} +@var{result})}. + +When @var{file-name} names a flat file, @code{(@var{leaf} @var{path} +@var{stat} @var{init})} is returned. + +The special @file{.} and @file{..} entries are not passed to these +procedures. The @var{path} argument to the procedures is a full file +name---e.g., @code{"../foo/bar/gnu"}; if @var{file-name} is an absolute +file name, then @var{path} is also an absolute file name. Files and +directories, as identified by their device/inode number pair, are +traversed only once. + +The example below illustrates the use of @code{file-system-fold}: + +@example +(define (total-file-size file-name) + "Return the size in bytes of the files under FILE-NAME (similar +to `du --apparent-size' with GNU Coreutils.)" + + (define (enter? name stat result) + ;; Skip version control directories. + (not (member (basename name) '(".git" ".svn" "CVS")))) + (define (leaf name stat result) + ;; Return RESULT plus the size of the file at NAME. + (+ result (stat:size stat))) + + ;; Count zero bytes for directories. + (define (down name stat result) result) + (define (up name stat result) result) + + ;; Likewise for skipped directories. + (define (skip name stat result) result) + + (file-system-fold enter? leaf down up skip + 0 ; initial counter is zero bytes + file-name)) + +(total-file-size ".") +@result{} 8217554 + +(total-file-size "/dev/null") +@result{} 0 +@end example +@end defun + +The alternative C-like functions are described below. + @defun ftw startname proc ['hash-size n] Walk the file system tree descending from @var{startname}, calling @var{proc} for each file and directory. diff --git a/module/ice-9/ftw.scm b/module/ice-9/ftw.scm index e6ac0b462..539d80b5f 100644 --- a/module/ice-9/ftw.scm +++ b/module/ice-9/ftw.scm @@ -1,6 +1,6 @@ ;;;; ftw.scm --- file system tree walk -;;;; Copyright (C) 2002, 2003, 2006 Free Software Foundation, Inc. +;;;; Copyright (C) 2002, 2003, 2006, 2011 Free Software Foundation, Inc. ;;;; ;;;; This library is free software; you can redistribute it and/or ;;;; modify it under the terms of the GNU Lesser General Public @@ -190,7 +190,12 @@ ;;; Code: (define-module (ice-9 ftw) - :export (ftw nftw)) + #:use-module (ice-9 match) + #:use-module (ice-9 vlist) + #:use-module (srfi srfi-1) + #:export (ftw nftw + file-system-fold + file-system-tree)) (define (directory-files dir) (let ((dir-stream (opendir dir))) @@ -377,4 +382,125 @@ (chdir od) ret)))) + +;;; +;;; `file-system-fold' & co. +;;; + +(define (file-system-fold enter? leaf down up skip init file-name) + "Traverse the directory at FILE-NAME, recursively. Enter +sub-directories only when (ENTER? PATH STAT RESULT) returns true. When +a sub-directory is entered, call (DOWN PATH STAT RESULT), where PATH is +the path of the sub-directory and STAT the result of (lstat PATH); when +it is left, call (UP PATH STAT RESULT). For each file in a directory, +call (LEAF PATH STAT RESULT). When ENTER? returns false, call (SKIP +PATH STAT RESULT). Return the result of these successive applications. +When FILE-NAME names a flat file,(LEAF PATH STAT INIT) is returned." + + (define (mark v s) + (vhash-cons (cons (stat:dev s) (stat:ino s)) #t v)) + + (define (visited? v s) + (vhash-assoc (cons (stat:dev s) (stat:ino s)) v)) + + (let loop ((name file-name) + (path "") + (dir-stat (false-if-exception (lstat file-name))) + (result init) + (visited vlist-null)) + + (define full-name + (if (string=? path "") + name + (string-append path "/" name))) + + (cond + ((not dir-stat) + ;; FILE-NAME is not readable. + (leaf full-name dir-stat result)) + ((visited? visited dir-stat) + (values result visited)) + ((eq? 'directory (stat:type dir-stat)) ; true except perhaps the 1st time + (if (enter? full-name dir-stat result) + (let ((dir (false-if-exception (opendir full-name))) + (visited (mark visited dir-stat))) + (if dir + (let liip ((entry (readdir dir)) + (result (down full-name dir-stat result)) + (subdirs '())) + (cond ((eof-object? entry) + (begin + (closedir dir) + (let ((r+v + (fold (lambda (subdir result+visited) + (call-with-values + (lambda () + (loop (car subdir) + full-name + (cdr subdir) + (car result+visited) + (cdr result+visited))) + cons)) + (cons result visited) + subdirs))) + (values (up full-name dir-stat (car r+v)) + (cdr r+v))))) + ((or (string=? entry ".") + (string=? entry "..")) + (liip (readdir dir) + result + subdirs)) + (else + (let* ((child (string-append full-name "/" entry)) + (stat (lstat child))) ; cannot fail + (cond + ((eq? (stat:type stat) 'directory) + (liip (readdir dir) + result + (alist-cons entry stat subdirs))) + (else + (liip (readdir dir) + (leaf child stat result) + subdirs))))))) + + ;; Directory FULL-NAME not readable. + ;; XXX: It's up to the user to distinguish between not + ;; readable and not ENTER?. + (values (skip full-name dir-stat result) + visited))) + (values (skip full-name dir-stat result) + (mark visited dir-stat)))) + (else + ;; Caller passed a FILE-NAME that names a flat file, not a directory. + (leaf full-name dir-stat result))))) + +(define* (file-system-tree file-name #:optional (enter? (lambda (n s) #t))) + "Return a tree of the form (FILE-NAME STAT CHILDREN ...) where STAT is +the result of (lstat FILE-NAME) and CHILDREN are similar structures for +each file contained in FILE-NAME when it designates a directory. The +optional ENTER? predicate is invoked as (ENTER? NAME STAT) and should +return true to allow recursion into directory NAME; the default value is +a procedure that always returns #t. When a directory does not match +ENTER?, it nonetheless appears in the resulting tree, only with zero +children." + (define (enter?* name stat result) + (enter? name stat)) + (define (leaf name stat result) + (match result + (((siblings ...) rest ...) + (cons (alist-cons (basename name) (cons stat '()) siblings) + rest)))) + (define (down name stat result) + (cons '() result)) + (define (up name stat result) + (match result + (((children ...) (siblings ...) rest ...) + (cons (alist-cons (basename name) (cons stat children) + siblings) + rest)))) + (define skip ; keep an entry for skipped directories + leaf) + + (caar (file-system-fold enter?* leaf down up skip '(()) file-name))) + ;;; ftw.scm ends here diff --git a/test-suite/tests/ftw.test b/test-suite/tests/ftw.test index 847fb9ff4..40e4c2a35 100644 --- a/test-suite/tests/ftw.test +++ b/test-suite/tests/ftw.test @@ -1,6 +1,6 @@ ;;;; ftw.test --- exercise ice-9/ftw.scm -*- scheme -*- ;;;; -;;;; Copyright 2006 Free Software Foundation, Inc. +;;;; Copyright 2006, 2011 Free Software Foundation, Inc. ;;;; ;;;; This library is free software; you can redistribute it and/or ;;;; modify it under the terms of the GNU Lesser General Public @@ -18,7 +18,10 @@ (define-module (test-suite test-ice-9-ftw) #:use-module (test-suite lib) - #:use-module (ice-9 ftw)) + #:use-module (ice-9 ftw) + #:use-module (ice-9 match) + #:use-module (srfi srfi-1) + #:use-module (srfi srfi-26)) ;; the procedure-source checks here ensure the vector indexes we write match @@ -72,3 +75,94 @@ (pass-if "5 7 - 2nd" (eq? #t (try-visited? 5 7))) (pass-if "7 5 - 2nd" (eq? #t (try-visited? 7 5))) (pass-if "7 7 - 2nd" (eq? #t (try-visited? 7 7))))) + + +;;; +;;; `file-system-fold' & co. +;;; + +(define %top-srcdir + (assq-ref %guile-build-info 'top_srcdir)) + +(define %test-dir + (string-append %top-srcdir "/test-suite")) + +(with-test-prefix "file-system-fold" + + (pass-if "test-suite" + (let ((enter? (lambda (n s r) + ;; Enter only `test-suite/tests/'. + (if (member `(down ,%test-dir) r) + (string=? (basename n) "tests") + (string=? (basename n) "test-suite")))) + (leaf (lambda (n s r) (cons `(leaf ,n) r))) + (down (lambda (n s r) (cons `(down ,n) r))) + (up (lambda (n s r) (cons `(up ,n) r))) + (skip (lambda (n s r) (cons `(skip ,n) r)))) + (define seq + (reverse + (file-system-fold enter? leaf down up skip '() %test-dir))) + + (match seq + ((('down (? (cut string=? <> %test-dir))) + between ... + ('up (? (cut string=? <> %test-dir)))) + (and (any (match-lambda (('leaf (= basename "lib.scm")) #t) (_ #f)) + between) + (any (match-lambda (('down (= basename "tests")) #t) (_ #f)) + between) + (any (match-lambda (('leaf (= basename "alist.test")) #t) (_ #f)) + between) + (any (match-lambda (('up (= basename "tests")) #t) (_ #f)) + between) + (any (match-lambda (('skip (= basename "vm")) #t) (_ #f)) + between)))))) + + (pass-if "test-suite (never enter)" + (let ((enter? (lambda (n s r) #f)) + (leaf (lambda (n s r) (cons `(leaf ,n) r))) + (down (lambda (n s r) (cons `(down ,n) r))) + (up (lambda (n s r) (cons `(up ,n) r))) + (skip (lambda (n s r) (cons `(skip ,n) r)))) + (equal? (file-system-fold enter? leaf down up skip '() %test-dir) + `((skip , %test-dir))))) + + (pass-if "test-suite/lib.scm (flat file)" + (let ((enter? (lambda (n s r) #t)) + (leaf (lambda (n s r) (cons `(leaf ,n) r))) + (down (lambda (n s r) (cons `(down ,n) r))) + (up (lambda (n s r) (cons `(up ,n) r))) + (skip (lambda (n s r) (cons `(skip ,n) r))) + (name (string-append %test-dir "/lib.scm"))) + (equal? (file-system-fold enter? leaf down up skip '() name) + `((leaf ,name)))))) + +(with-test-prefix "file-system-tree" + + (pass-if "test-suite (never enter)" + (match (file-system-tree %test-dir (lambda (n s) #f)) + (("test-suite" (= stat:type 'directory)) ; no children + #t))) + + (pass-if "test-suite/*" + (match (file-system-tree %test-dir (lambda (n s) + (string=? n %test-dir))) + (("test-suite" (= stat:type 'directory) children ...) + (any (match-lambda + (("tests" (= stat:type 'directory)) ; no children + #t) + (_ #f)) + children)))) + + (pass-if "test-suite (recursive)" + (match (file-system-tree %test-dir) + (("test-suite" (= stat:type 'directory) children ...) + (any (match-lambda + (("tests" (= stat:type 'directory) (= car files) ...) + (let ((expected '("alist.test" "bytevectors.test" + "ftw.test" "gc.test" "vlist.test"))) + (lset= string=? + (lset-intersection string=? files expected) + expected))) + (_ #f)) + children))))) |