diff options
author | Timothy Sample <samplet@ngyro.com> | 2019-06-02 14:41:20 -0400 |
---|---|---|
committer | Ludovic Courtès <ludo@gnu.org> | 2019-06-04 21:24:02 +0200 |
commit | 420c2632bb1f48e492a035c1d216f209734f45e6 (patch) | |
tree | 9de4bc43ee02f3933f4c18dac77283af08ac76a5 | |
parent | 36ad1d24b3d2c174a64c445502a36f19605dbd65 (diff) |
Make URI handling locale independent.
Fixes <https://bugs.gnu.org/35785>.
* module/web/uri.scm (digits, hex-digits, letters): New variables.
(ipv4-regexp, ipv6-regexp, domain-label-regexp, top-label-regexp,
userinfo-pat, host-pat, ipv6-host-pat, port-pat, scheme-pat): Explicitly
list each character instead of using character ranges.
* test-suite/tests/web-uri.test: Add corresponding tests.
Signed-off-by: Ludovic Courtès <ludo@gnu.org>
-rw-r--r-- | module/web/uri.scm | 31 | ||||
-rw-r--r-- | test-suite/tests/web-uri.test | 33 |
2 files changed, 51 insertions, 13 deletions
diff --git a/module/web/uri.scm b/module/web/uri.scm index 5b01aa41f..f5291b80b 100644 --- a/module/web/uri.scm +++ b/module/web/uri.scm @@ -1,6 +1,6 @@ ;;;; (web uri) --- URI manipulation tools ;;;; -;;;; Copyright (C) 1997,2001,2002,2010,2011,2012,2013,2014 Free Software Foundation, Inc. +;;;; Copyright (C) 1997,2001,2002,2010,2011,2012,2013,2014,2019 Free Software Foundation, Inc. ;;;; ;;;; This library is free software; you can redistribute it and/or ;;;; modify it under the terms of the GNU Lesser General Public @@ -182,17 +182,28 @@ for ‘build-uri’ except there is no scheme." ;;; Converters. ;;; +;; Since character ranges in regular expressions may depend on the +;; current locale, we use explicit lists of characters instead. See +;; <https://bugs.gnu.org/35785> for details. +(define digits "0123456789") +(define hex-digits "0123456789ABCDEFabcdef") +(define letters "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz") + ;; See RFC 3986 #3.2.2 for comments on percent-encodings, IDNA (RFC ;; 3490), and non-ASCII host names. ;; (define ipv4-regexp - (make-regexp "^([0-9.]+)$")) + (make-regexp (string-append "^([" digits ".]+)$"))) (define ipv6-regexp - (make-regexp "^([0-9a-fA-F:.]+)$")) + (make-regexp (string-append "^([" hex-digits ":.]+)$"))) (define domain-label-regexp - (make-regexp "^[a-zA-Z0-9]([a-zA-Z0-9-]*[a-zA-Z0-9])?$")) + (make-regexp + (string-append "^[" letters digits "]" + "([" letters digits "-]*[" letters digits "])?$"))) (define top-label-regexp - (make-regexp "^[a-zA-Z]([a-zA-Z0-9-]*[a-zA-Z0-9])?$")) + (make-regexp + (string-append "^[" letters "]" + "([" letters digits "-]*[" letters digits "])?$"))) (define (valid-host? host) (cond @@ -210,13 +221,13 @@ for ‘build-uri’ except there is no scheme." (regexp-exec top-label-regexp host start))))))) (define userinfo-pat - "[a-zA-Z0-9_.!~*'();:&=+$,-]+") + (string-append "[" letters digits "_.!~*'();:&=+$,-]+")) (define host-pat - "[a-zA-Z0-9.-]+") + (string-append "[" letters digits ".-]+")) (define ipv6-host-pat - "[0-9a-fA-F:.]+") + (string-append "[" hex-digits ":.]+")) (define port-pat - "[0-9]*") + (string-append "[" digits "]*")) (define authority-regexp (make-regexp (format #f "^//((~a)@)?((~a)|(\\[(~a)\\]))(:(~a))?$" @@ -253,7 +264,7 @@ for ‘build-uri’ except there is no scheme." ;;; either. (define scheme-pat - "[a-zA-Z][a-zA-Z0-9+.-]*") + (string-append "[" letters "][" letters digits "+.-]*")) (define authority-pat "[^/?#]*") (define path-pat diff --git a/test-suite/tests/web-uri.test b/test-suite/tests/web-uri.test index 73391898c..94778acac 100644 --- a/test-suite/tests/web-uri.test +++ b/test-suite/tests/web-uri.test @@ -1,6 +1,6 @@ ;;;; web-uri.test --- URI library -*- mode: scheme; coding: utf-8; -*- ;;;; -;;;; Copyright (C) 2010-2012, 2014, 2017 Free Software Foundation, Inc. +;;;; Copyright (C) 2010-2012, 2014, 2017, 2019 Free Software Foundation, Inc. ;;;; ;;;; This library is free software; you can redistribute it and/or ;;;; modify it under the terms of the GNU Lesser General Public @@ -121,7 +121,21 @@ (pass-if-uri-exception "http://foo@" "Expected.*host" - (build-uri 'http #:userinfo "foo"))) + (build-uri 'http #:userinfo "foo")) + + ;; In this test, we need to reload the '(web uri)' module with a + ;; different locale. This is because some locale-dependent things + ;; (e.g., compiled regexes) are computed when the module is loaded. + (pass-if-uri-exception "http://illégal.com" + "Expected.*host" + (dynamic-wind + (lambda () #t) + (lambda () + (with-locale "en_US.utf8" + (reload-module (resolve-module '(web uri))) + (build-uri 'http #:host "illégal.com"))) + (lambda () + (reload-module (resolve-module '(web uri))))))) (with-test-prefix "build-uri-reference" (pass-if "//host/etc/foo" @@ -290,7 +304,20 @@ #:port 100 #:path "/" #:query "q" - #:fragment "bar"))) + #:fragment "bar")) + + ;; This test reproduces bug #35785. See the 'illégal' test above for + ;; why we reload the module. + (pass-if "http://www.example.com (sv_SE)" + (dynamic-wind + (lambda () #t) + (lambda () + (with-locale "sv_SE.utf8" + (reload-module (resolve-module '(web uri))) + (uri=? (string->uri "http://www.example.com") + #:scheme 'http #:host "www.example.com" #:path ""))) + (lambda () + (reload-module (resolve-module '(web uri))))))) (with-test-prefix "string->uri-reference" (pass-if "/foo" |