summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTimothy Sample <samplet@ngyro.com>2019-06-02 14:41:20 -0400
committerLudovic Courtès <ludo@gnu.org>2019-06-04 21:24:02 +0200
commit420c2632bb1f48e492a035c1d216f209734f45e6 (patch)
tree9de4bc43ee02f3933f4c18dac77283af08ac76a5
parent36ad1d24b3d2c174a64c445502a36f19605dbd65 (diff)
Make URI handling locale independent.
Fixes <https://bugs.gnu.org/35785>. * module/web/uri.scm (digits, hex-digits, letters): New variables. (ipv4-regexp, ipv6-regexp, domain-label-regexp, top-label-regexp, userinfo-pat, host-pat, ipv6-host-pat, port-pat, scheme-pat): Explicitly list each character instead of using character ranges. * test-suite/tests/web-uri.test: Add corresponding tests. Signed-off-by: Ludovic Courtès <ludo@gnu.org>
-rw-r--r--module/web/uri.scm31
-rw-r--r--test-suite/tests/web-uri.test33
2 files changed, 51 insertions, 13 deletions
diff --git a/module/web/uri.scm b/module/web/uri.scm
index 5b01aa41f..f5291b80b 100644
--- a/module/web/uri.scm
+++ b/module/web/uri.scm
@@ -1,6 +1,6 @@
;;;; (web uri) --- URI manipulation tools
;;;;
-;;;; Copyright (C) 1997,2001,2002,2010,2011,2012,2013,2014 Free Software Foundation, Inc.
+;;;; Copyright (C) 1997,2001,2002,2010,2011,2012,2013,2014,2019 Free Software Foundation, Inc.
;;;;
;;;; This library is free software; you can redistribute it and/or
;;;; modify it under the terms of the GNU Lesser General Public
@@ -182,17 +182,28 @@ for ‘build-uri’ except there is no scheme."
;;; Converters.
;;;
+;; Since character ranges in regular expressions may depend on the
+;; current locale, we use explicit lists of characters instead. See
+;; <https://bugs.gnu.org/35785> for details.
+(define digits "0123456789")
+(define hex-digits "0123456789ABCDEFabcdef")
+(define letters "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz")
+
;; See RFC 3986 #3.2.2 for comments on percent-encodings, IDNA (RFC
;; 3490), and non-ASCII host names.
;;
(define ipv4-regexp
- (make-regexp "^([0-9.]+)$"))
+ (make-regexp (string-append "^([" digits ".]+)$")))
(define ipv6-regexp
- (make-regexp "^([0-9a-fA-F:.]+)$"))
+ (make-regexp (string-append "^([" hex-digits ":.]+)$")))
(define domain-label-regexp
- (make-regexp "^[a-zA-Z0-9]([a-zA-Z0-9-]*[a-zA-Z0-9])?$"))
+ (make-regexp
+ (string-append "^[" letters digits "]"
+ "([" letters digits "-]*[" letters digits "])?$")))
(define top-label-regexp
- (make-regexp "^[a-zA-Z]([a-zA-Z0-9-]*[a-zA-Z0-9])?$"))
+ (make-regexp
+ (string-append "^[" letters "]"
+ "([" letters digits "-]*[" letters digits "])?$")))
(define (valid-host? host)
(cond
@@ -210,13 +221,13 @@ for ‘build-uri’ except there is no scheme."
(regexp-exec top-label-regexp host start)))))))
(define userinfo-pat
- "[a-zA-Z0-9_.!~*'();:&=+$,-]+")
+ (string-append "[" letters digits "_.!~*'();:&=+$,-]+"))
(define host-pat
- "[a-zA-Z0-9.-]+")
+ (string-append "[" letters digits ".-]+"))
(define ipv6-host-pat
- "[0-9a-fA-F:.]+")
+ (string-append "[" hex-digits ":.]+"))
(define port-pat
- "[0-9]*")
+ (string-append "[" digits "]*"))
(define authority-regexp
(make-regexp
(format #f "^//((~a)@)?((~a)|(\\[(~a)\\]))(:(~a))?$"
@@ -253,7 +264,7 @@ for ‘build-uri’ except there is no scheme."
;;; either.
(define scheme-pat
- "[a-zA-Z][a-zA-Z0-9+.-]*")
+ (string-append "[" letters "][" letters digits "+.-]*"))
(define authority-pat
"[^/?#]*")
(define path-pat
diff --git a/test-suite/tests/web-uri.test b/test-suite/tests/web-uri.test
index 73391898c..94778acac 100644
--- a/test-suite/tests/web-uri.test
+++ b/test-suite/tests/web-uri.test
@@ -1,6 +1,6 @@
;;;; web-uri.test --- URI library -*- mode: scheme; coding: utf-8; -*-
;;;;
-;;;; Copyright (C) 2010-2012, 2014, 2017 Free Software Foundation, Inc.
+;;;; Copyright (C) 2010-2012, 2014, 2017, 2019 Free Software Foundation, Inc.
;;;;
;;;; This library is free software; you can redistribute it and/or
;;;; modify it under the terms of the GNU Lesser General Public
@@ -121,7 +121,21 @@
(pass-if-uri-exception "http://foo@"
"Expected.*host"
- (build-uri 'http #:userinfo "foo")))
+ (build-uri 'http #:userinfo "foo"))
+
+ ;; In this test, we need to reload the '(web uri)' module with a
+ ;; different locale. This is because some locale-dependent things
+ ;; (e.g., compiled regexes) are computed when the module is loaded.
+ (pass-if-uri-exception "http://illégal.com"
+ "Expected.*host"
+ (dynamic-wind
+ (lambda () #t)
+ (lambda ()
+ (with-locale "en_US.utf8"
+ (reload-module (resolve-module '(web uri)))
+ (build-uri 'http #:host "illégal.com")))
+ (lambda ()
+ (reload-module (resolve-module '(web uri)))))))
(with-test-prefix "build-uri-reference"
(pass-if "//host/etc/foo"
@@ -290,7 +304,20 @@
#:port 100
#:path "/"
#:query "q"
- #:fragment "bar")))
+ #:fragment "bar"))
+
+ ;; This test reproduces bug #35785. See the 'illégal' test above for
+ ;; why we reload the module.
+ (pass-if "http://www.example.com (sv_SE)"
+ (dynamic-wind
+ (lambda () #t)
+ (lambda ()
+ (with-locale "sv_SE.utf8"
+ (reload-module (resolve-module '(web uri)))
+ (uri=? (string->uri "http://www.example.com")
+ #:scheme 'http #:host "www.example.com" #:path "")))
+ (lambda ()
+ (reload-module (resolve-module '(web uri)))))))
(with-test-prefix "string->uri-reference"
(pass-if "/foo"