diff options
author | Ludovic Courtès <ludo@gnu.org> | 2020-12-10 15:12:34 +0100 |
---|---|---|
committer | Ludovic Courtès <ludo@gnu.org> | 2020-12-15 17:32:10 +0100 |
commit | 6a060ff27ff68384d7c90076baa36c349fff689d (patch) | |
tree | f7b1f9c7a52e84848fbcaa90d4dc38c25d7d65eb | |
parent | dea1ee1fd740248307f74ca4cb70b94742264098 (diff) |
store-copy: 'populate-store' can optionally deduplicate files.
Until now deduplication was performed as an additional pass after
copying files, which involve re-traversing all the files that had just
been copied.
* guix/store/deduplication.scm (copy-file/deduplicate): New procedure.
* tests/store-deduplication.scm ("copy-file/deduplicate"): New test.
* guix/build/store-copy.scm (populate-store): Add #:deduplicate?
parameter and honor it.
* tests/gexp.scm ("gexp->derivation, store copy"): Pass #:deduplicate? #f
to 'populate-store'.
* gnu/build/image.scm (initialize-root-partition): Pass #:deduplicate?
to 'populate-store'. Pass #:deduplicate? #f to 'register-closure'.
* gnu/build/vm.scm (root-partition-initializer): Likewise.
* gnu/build/install.scm (populate-single-profile-directory): Pass
#:deduplicate? #f to 'populate-store'.
* gnu/build/linux-initrd.scm (build-initrd): Likewise.
* guix/scripts/pack.scm (self-contained-tarball)[import-module?]: New
procedure.
[build]: Pass it as an argument to 'source-module-closure'.
* guix/scripts/pack.scm (squashfs-image)[build]: Wrap in
'with-extensions'.
* gnu/system/linux-initrd.scm (expression->initrd)[import-module?]: New
procedure.
[builder]: Pass it to 'source-module-closure'.
* gnu/system/install.scm (cow-store-service-type)[import-module?]: New
procedure. Pass it to 'source-module-closure'.
-rw-r--r-- | gnu/build/image.scm | 5 | ||||
-rw-r--r-- | gnu/build/install.scm | 3 | ||||
-rw-r--r-- | gnu/build/linux-initrd.scm | 3 | ||||
-rw-r--r-- | gnu/build/vm.scm | 5 | ||||
-rw-r--r-- | gnu/system/install.scm | 12 | ||||
-rw-r--r-- | gnu/system/linux-initrd.scm | 10 | ||||
-rw-r--r-- | guix/build/store-copy.scm | 13 | ||||
-rw-r--r-- | guix/scripts/pack.scm | 258 | ||||
-rw-r--r-- | guix/store/deduplication.scm | 16 | ||||
-rw-r--r-- | tests/gexp.scm | 3 | ||||
-rw-r--r-- | tests/store-deduplication.scm | 18 |
11 files changed, 207 insertions, 139 deletions
diff --git a/gnu/build/image.scm b/gnu/build/image.scm index 0deea10a9d..8f50f27f78 100644 --- a/gnu/build/image.scm +++ b/gnu/build/image.scm @@ -186,7 +186,8 @@ rest of the store when registering the closures. SYSTEM-DIRECTORY is the name of the directory of the 'system' derivation. Pass WAL-MODE? to register-closure." (populate-root-file-system system-directory root) - (populate-store references-graphs root) + (populate-store references-graphs root + #:deduplicate? deduplicate?) ;; Populate /dev. (when make-device-nodes @@ -195,7 +196,7 @@ register-closure." (when register-closures? (for-each (lambda (closure) (register-closure root closure - #:deduplicate? deduplicate? + #:deduplicate? #f #:wal-mode? wal-mode?)) references-graphs)) diff --git a/gnu/build/install.scm b/gnu/build/install.scm index 63995e1d09..f5c8407b89 100644 --- a/gnu/build/install.scm +++ b/gnu/build/install.scm @@ -214,7 +214,8 @@ This is used to create the self-contained tarballs with 'guix pack'." (symlink old (scope new))) ;; Populate the store. - (populate-store (list closure) directory) + (populate-store (list closure) directory + #:deduplicate? #f) (when database (install-database-and-gc-roots directory database profile diff --git a/gnu/build/linux-initrd.scm b/gnu/build/linux-initrd.scm index 99796adba6..bb2ed0db0c 100644 --- a/gnu/build/linux-initrd.scm +++ b/gnu/build/linux-initrd.scm @@ -127,7 +127,8 @@ REFERENCES-GRAPHS." (mkdir "contents") ;; Copy the closures of all the items referenced in REFERENCES-GRAPHS. - (populate-store references-graphs "contents") + (populate-store references-graphs "contents" + #:deduplicate? #f) (with-directory-excursion "contents" ;; Make '/init'. diff --git a/gnu/build/vm.scm b/gnu/build/vm.scm index abb0317faf..03be5697b7 100644 --- a/gnu/build/vm.scm +++ b/gnu/build/vm.scm @@ -395,7 +395,8 @@ system that is passed to 'populate-root-file-system'." (when copy-closures? ;; Populate the store. (populate-store (map (cut string-append "/xchg/" <>) closures) - target)) + target + #:deduplicate? deduplicate?)) ;; Populate /dev. (make-device-nodes target) @@ -412,7 +413,7 @@ system that is passed to 'populate-root-file-system'." (for-each (lambda (closure) (register-closure target (string-append "/xchg/" closure) - #:deduplicate? deduplicate?)) + #:deduplicate? #f)) closures) (unless copy-closures? (umount target-store))) diff --git a/gnu/system/install.scm b/gnu/system/install.scm index a6b9e3d952..e753463473 100644 --- a/gnu/system/install.scm +++ b/gnu/system/install.scm @@ -1,5 +1,5 @@ ;;; GNU Guix --- Functional package management for GNU -;;; Copyright © 2014, 2015, 2016, 2017, 2018, 2019 Ludovic Courtès <ludo@gnu.org> +;;; Copyright © 2014, 2015, 2016, 2017, 2018, 2019, 2020 Ludovic Courtès <ludo@gnu.org> ;;; Copyright © 2015 Mark H Weaver <mhw@netris.org> ;;; Copyright © 2016 Andreas Enge <andreas@enge.fr> ;;; Copyright © 2017 Marius Bakke <mbakke@fastmail.com> @@ -176,6 +176,13 @@ manual." (shepherd-service-type 'cow-store (lambda _ + (define (import-module? module) + ;; Since we don't use deduplication support in 'populate-store', don't + ;; import (guix store deduplication) and its dependencies, which + ;; includes Guile-Gcrypt. + (and (guix-module-name? module) + (not (equal? module '(guix store deduplication))))) + (shepherd-service (requirement '(root-file-system user-processes)) (provision '(cow-store)) @@ -190,7 +197,8 @@ the given target.") ,@%default-modules)) (start (with-imported-modules (source-module-closure - '((gnu build install))) + '((gnu build install)) + #:select? import-module?) #~(case-lambda ((target) (mount-cow-store target #$%backing-directory) diff --git a/gnu/system/linux-initrd.scm b/gnu/system/linux-initrd.scm index 4fb1d863c9..c6ba9bb560 100644 --- a/gnu/system/linux-initrd.scm +++ b/gnu/system/linux-initrd.scm @@ -76,12 +76,20 @@ the derivations referenced by EXP are automatically copied to the initrd." (define init (program-file "init" exp #:guile guile)) + (define (import-module? module) + ;; Since we don't use deduplication support in 'populate-store', don't + ;; import (guix store deduplication) and its dependencies, which includes + ;; Guile-Gcrypt. That way we can run tests with '--bootstrap'. + (and (guix-module-name? module) + (not (equal? module '(guix store deduplication))))) + (define builder ;; Do not use "guile-zlib" extension here, otherwise it would drag the ;; non-static "zlib" package to the initrd closure. It is not needed ;; anyway because the modules are stored uncompressed within the initrd. (with-imported-modules (source-module-closure - '((gnu build linux-initrd))) + '((gnu build linux-initrd)) + #:select? import-module?) #~(begin (use-modules (gnu build linux-initrd)) diff --git a/guix/build/store-copy.scm b/guix/build/store-copy.scm index 95dcb8e114..7f0672cd9d 100644 --- a/guix/build/store-copy.scm +++ b/guix/build/store-copy.scm @@ -20,6 +20,7 @@ #:use-module ((guix build utils) #:hide (copy-recursively)) #:use-module (guix sets) #:use-module (guix progress) + #:autoload (guix store deduplication) (copy-file/deduplicate) #:use-module (srfi srfi-1) #:use-module (srfi srfi-9) #:use-module (srfi srfi-26) @@ -242,10 +243,13 @@ permissions. Write verbose output to the LOG port." lstat))) (define* (populate-store reference-graphs target - #:key (log-port (current-error-port))) + #:key + (deduplicate? #t) + (log-port (current-error-port))) "Populate the store under directory TARGET with the items specified in REFERENCE-GRAPHS, a list of reference-graph files. Items copied to TARGET -maintain timestamps and permissions." +maintain timestamps and permissions. When DEDUPLICATE? is true, deduplicate +regular files as they are copied to TARGET." (define store (string-append target (%store-directory))) @@ -273,6 +277,11 @@ maintain timestamps and permissions." (string-append target thing) #:keep-mtime? #t #:keep-permissions? #t + #:copy-file + (if deduplicate? + (cut copy-file/deduplicate <> <> + #:store store) + copy-file) #:log (%make-void-port "w")) (report)) things))))) diff --git a/guix/scripts/pack.scm b/guix/scripts/pack.scm index 1612ec8f04..440c4b0903 100644 --- a/guix/scripts/pack.scm +++ b/guix/scripts/pack.scm @@ -203,12 +203,19 @@ added to the pack." #+(file-append glibc-utf8-locales "/lib/locale")) (setlocale LC_ALL "en_US.utf8")))) + (define (import-module? module) + ;; Since we don't use deduplication support in 'populate-store', don't + ;; import (guix store deduplication) and its dependencies, which includes + ;; Guile-Gcrypt. That way we can run tests with '--bootstrap'. + (and (not-config? module) + (not (equal? '(guix store deduplication) module)))) + (define build (with-imported-modules (source-module-closure `((guix build utils) (guix build union) (gnu build install)) - #:select? not-config?) + #:select? import-module?) #~(begin (use-modules (guix build utils) ((guix build union) #:select (relative-file-name)) @@ -382,138 +389,139 @@ added to the pack." `(("/bin" -> "bin") ,@symlinks))) (define build - (with-imported-modules (source-module-closure - '((guix build utils) - (guix build store-copy) - (guix build union) - (gnu build install)) - #:select? not-config?) - #~(begin - (use-modules (guix build utils) - (guix build store-copy) - ((guix build union) #:select (relative-file-name)) - (gnu build install) - (srfi srfi-1) - (srfi srfi-26) - (ice-9 match)) + (with-extensions (list guile-gcrypt) + (with-imported-modules (source-module-closure + '((guix build utils) + (guix build store-copy) + (guix build union) + (gnu build install)) + #:select? not-config?) + #~(begin + (use-modules (guix build utils) + (guix build store-copy) + ((guix build union) #:select (relative-file-name)) + (gnu build install) + (srfi srfi-1) + (srfi srfi-26) + (ice-9 match)) - (define database #+database) - (define entry-point #$entry-point) + (define database #+database) + (define entry-point #$entry-point) - (define (mksquashfs args) - (apply invoke "mksquashfs" - `(,@args + (define (mksquashfs args) + (apply invoke "mksquashfs" + `(,@args - ;; Do not create a "recovery file" when appending to the - ;; file system since it's useless in this case. - "-no-recovery" + ;; Do not create a "recovery file" when appending to the + ;; file system since it's useless in this case. + "-no-recovery" - ;; Do not attempt to store extended attributes. - ;; See <https://bugs.gnu.org/40043>. - "-no-xattrs" + ;; Do not attempt to store extended attributes. + ;; See <https://bugs.gnu.org/40043>. + "-no-xattrs" - ;; Set file times and the file system creation time to - ;; one second after the Epoch. - "-all-time" "1" "-mkfs-time" "1" + ;; Set file times and the file system creation time to + ;; one second after the Epoch. + "-all-time" "1" "-mkfs-time" "1" - ;; Reset all UIDs and GIDs. - "-force-uid" "0" "-force-gid" "0"))) + ;; Reset all UIDs and GIDs. + "-force-uid" "0" "-force-gid" "0"))) - (setenv "PATH" #+(file-append archiver "/bin")) + (setenv "PATH" #+(file-append archiver "/bin")) - ;; We need an empty file in order to have a valid file argument when - ;; we reparent the root file system. Read on for why that's - ;; necessary. - (with-output-to-file ".empty" (lambda () (display ""))) - - ;; Create the squashfs image in several steps. - ;; Add all store items. Unfortunately mksquashfs throws away all - ;; ancestor directories and only keeps the basename. We fix this - ;; in the following invocations of mksquashfs. - (mksquashfs `(,@(map store-info-item - (call-with-input-file "profile" - read-reference-graph)) - #$environment - ,#$output - - ;; Do not perform duplicate checking because we - ;; don't have any dupes. - "-no-duplicates" - "-comp" - ,#+(compressor-name compressor))) - - ;; Here we reparent the store items. For each sub-directory of - ;; the store prefix we need one invocation of "mksquashfs". - (for-each (lambda (dir) - (mksquashfs `(".empty" - ,#$output - "-root-becomes" ,dir))) - (reverse (string-tokenize (%store-directory) - (char-set-complement (char-set #\/))))) - - ;; Add symlinks and mount points. - (mksquashfs - `(".empty" - ,#$output - ;; Create SYMLINKS via pseudo file definitions. - ,@(append-map - (match-lambda - ((source '-> target) - ;; Create relative symlinks to work around a bug in - ;; Singularity 2.x: - ;; https://bugs.gnu.org/34913 - ;; https://github.com/sylabs/singularity/issues/1487 - (let ((target (string-append #$profile "/" target))) - (list "-p" - (string-join - ;; name s mode uid gid symlink - (list source - "s" "777" "0" "0" - (relative-file-name (dirname source) - target))))))) - '#$symlinks*) - - "-p" "/.singularity.d d 555 0 0" - - ;; Create the environment file. - "-p" "/.singularity.d/env d 555 0 0" - "-p" ,(string-append - "/.singularity.d/env/90-environment.sh s 777 0 0 " - (relative-file-name "/.singularity.d/env" - #$environment)) - - ;; Create /.singularity.d/actions, and optionally the 'run' - ;; script, used by 'singularity run'. - "-p" "/.singularity.d/actions d 555 0 0" - - ,@(if entry-point - `(;; This one if for Singularity 2.x. - "-p" - ,(string-append - "/.singularity.d/actions/run s 777 0 0 " - (relative-file-name "/.singularity.d/actions" - (string-append #$profile "/" - entry-point))) - - ;; This one is for Singularity 3.x. - "-p" - ,(string-append - "/.singularity.d/runscript s 777 0 0 " - (relative-file-name "/.singularity.d" - (string-append #$profile "/" - entry-point)))) - '()) - - ;; Create empty mount points. - "-p" "/proc d 555 0 0" - "-p" "/sys d 555 0 0" - "-p" "/dev d 555 0 0" - "-p" "/home d 555 0 0")) - - (when database - ;; Initialize /var/guix. - (install-database-and-gc-roots "var-etc" database #$profile) - (mksquashfs `("var-etc" ,#$output)))))) + ;; We need an empty file in order to have a valid file argument when + ;; we reparent the root file system. Read on for why that's + ;; necessary. + (with-output-to-file ".empty" (lambda () (display ""))) + + ;; Create the squashfs image in several steps. + ;; Add all store items. Unfortunately mksquashfs throws away all + ;; ancestor directories and only keeps the basename. We fix this + ;; in the following invocations of mksquashfs. + (mksquashfs `(,@(map store-info-item + (call-with-input-file "profile" + read-reference-graph)) + #$environment + ,#$output + + ;; Do not perform duplicate checking because we + ;; don't have any dupes. + "-no-duplicates" + "-comp" + ,#+(compressor-name compressor))) + + ;; Here we reparent the store items. For each sub-directory of + ;; the store prefix we need one invocation of "mksquashfs". + (for-each (lambda (dir) + (mksquashfs `(".empty" + ,#$output + "-root-becomes" ,dir))) + (reverse (string-tokenize (%store-directory) + (char-set-complement (char-set #\/))))) + + ;; Add symlinks and mount points. + (mksquashfs + `(".empty" + ,#$output + ;; Create SYMLINKS via pseudo file definitions. + ,@(append-map + (match-lambda + ((source '-> target) + ;; Create relative symlinks to work around a bug in + ;; Singularity 2.x: + ;; https://bugs.gnu.org/34913 + ;; https://github.com/sylabs/singularity/issues/1487 + (let ((target (string-append #$profile "/" target))) + (list "-p" + (string-join + ;; name s mode uid gid symlink + (list source + "s" "777" "0" "0" + (relative-file-name (dirname source) + target))))))) + '#$symlinks*) + + "-p" "/.singularity.d d 555 0 0" + + ;; Create the environment file. + "-p" "/.singularity.d/env d 555 0 0" + "-p" ,(string-append + "/.singularity.d/env/90-environment.sh s 777 0 0 " + (relative-file-name "/.singularity.d/env" + #$environment)) + + ;; Create /.singularity.d/actions, and optionally the 'run' + ;; script, used by 'singularity run'. + "-p" "/.singularity.d/actions d 555 0 0" + + ,@(if entry-point + `( ;; This one if for Singularity 2.x. + "-p" + ,(string-append + "/.singularity.d/actions/run s 777 0 0 " + (relative-file-name "/.singularity.d/actions" + (string-append #$profile "/" + entry-point))) + + ;; This one is for Singularity 3.x. + "-p" + ,(string-append + "/.singularity.d/runscript s 777 0 0 " + (relative-file-name "/.singularity.d" + (string-append #$profile "/" + entry-point)))) + '()) + + ;; Create empty mount points. + "-p" "/proc d 555 0 0" + "-p" "/sys d 555 0 0" + "-p" "/dev d 555 0 0" + "-p" "/home d 555 0 0")) + + (when database + ;; Initialize /var/guix. + (install-database-and-gc-roots "var-etc" database #$profile) + (mksquashfs `("var-etc" ,#$output))))))) (gexp->derivation (string-append name (compressor-extension compressor) diff --git a/guix/store/deduplication.scm b/guix/store/deduplication.scm index b4d37d4525..8564f12107 100644 --- a/guix/store/deduplication.scm +++ b/guix/store/deduplication.scm @@ -34,7 +34,8 @@ #:use-module (guix serialization) #:export (nar-sha256 deduplicate - dump-file/deduplicate)) + dump-file/deduplicate + copy-file/deduplicate)) ;; XXX: This port is used as a workaround on Guile <= 2.2.4 where ;; 'port-position' throws to 'out-of-range' when the offset is great than or @@ -256,3 +257,16 @@ down the road." (get-hash))))) (deduplicate file hash #:store store)) + +(define* (copy-file/deduplicate source target + #:key (store (%store-directory))) + "Like 'copy-file', but additionally deduplicate TARGET in STORE." + (call-with-input-file source + (lambda (input) + (let ((stat (stat input))) + (dump-file/deduplicate target input (stat:size stat) + (if (zero? (logand (stat:mode stat) + #o100)) + 'regular + 'executable) + #:store store))))) diff --git a/tests/gexp.scm b/tests/gexp.scm index a0e55178fa..6e92f0e4b3 100644 --- a/tests/gexp.scm +++ b/tests/gexp.scm @@ -736,7 +736,8 @@ (zero? (logand #o222 (stat:mode st))))))) (mkdir #$output) - (populate-store '("graph") #$output) + (populate-store '("graph") #$output + #:deduplicate? #f) ;; Check whether 'populate-store' canonicalizes ;; permissions and timestamps. diff --git a/tests/store-deduplication.scm b/tests/store-deduplication.scm index e2870a363d..7b01acae24 100644 --- a/tests/store-deduplication.scm +++ b/tests/store-deduplication.scm @@ -1,5 +1,5 @@ ;;; GNU Guix --- Functional package management for GNU -;;; Copyright © 2018 Ludovic Courtès <ludo@gnu.org> +;;; Copyright © 2018, 2020 Ludovic Courtès <ludo@gnu.org> ;;; ;;; This file is part of GNU Guix. ;;; @@ -25,6 +25,7 @@ #:use-module (rnrs bytevectors) #:use-module (ice-9 binary-ports) #:use-module (srfi srfi-1) + #:use-module (srfi srfi-26) #:use-module (srfi srfi-64)) (test-begin "store-deduplication") @@ -106,4 +107,19 @@ (cons (apply = (map (compose stat:ino stat) identical)) (map (compose stat:nlink stat) identical)))))) +(test-assert "copy-file/deduplicate" + (call-with-temporary-directory + (lambda (store) + (let ((source (search-path %load-path "gnu/packages/emacs-xyz.scm"))) + (for-each (lambda (target) + (copy-file/deduplicate source + (string-append store target) + #:store store)) + '("/a" "/b" "/c")) + (and (directory-exists? (string-append store "/.links")) + (file=? source (string-append store "/a")) + (apply = (map (compose stat:ino stat + (cut string-append store <>)) + '("/a" "/b" "/c")))))))) + (test-end "store-deduplication") |