diff options
author | Mark H Weaver <mhw@netris.org> | 2012-10-30 23:46:31 -0400 |
---|---|---|
committer | Mark H Weaver <mhw@netris.org> | 2012-10-30 23:46:31 -0400 |
commit | fa980bcc0f5b186b98d84fc5d165d35fcbb5d5ec (patch) | |
tree | 411ee841f7526fe7138e42cf399911518df06309 | |
parent | e088b09d7dce5d78c96288778969876b6d25d726 (diff) | |
parent | 10744b7c5007ccac19ea9654be6e749fe6a60992 (diff) |
Merge remote-tracking branch 'origin/stable-2.0'
Moved scm_i_struct_hash from struct.c to hash.c and made it static.
The port's alist is now a field of 'scm_t_port'.
Conflicts:
libguile/arrays.c
libguile/hash.c
libguile/ports.c
libguile/print.h
libguile/read.c
53 files changed, 1676 insertions, 530 deletions
@@ -6,6 +6,7 @@ Contributors since the last release: Volker Grabsch Julian Graham Michael Gran + Daniel Hartwig No Itisnt Neil Jerram Daniel Kraft diff --git a/configure.ac b/configure.ac index ab4e147aa..8adfd471c 100644 --- a/configure.ac +++ b/configure.ac @@ -36,7 +36,7 @@ AC_CONFIG_MACRO_DIR([m4]) AC_CONFIG_SRCDIR(GUILE-VERSION) dnl `AM_SUBST_NOTMAKE' was introduced in Automake 1.11. -AM_INIT_AUTOMAKE([1.11 gnu no-define -Wall -Wno-override dist-xz]) +AM_INIT_AUTOMAKE([1.11 gnu no-define -Wall -Wno-override color-tests dist-xz]) m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])], [AC_SUBST([AM_DEFAULT_VERBOSITY],1)]) AC_COPYRIGHT(GUILE_CONFIGURE_COPYRIGHT) diff --git a/doc/ref/Makefile.am b/doc/ref/Makefile.am index abe9cb9ee..201ab6b3f 100644 --- a/doc/ref/Makefile.am +++ b/doc/ref/Makefile.am @@ -62,6 +62,7 @@ guile_TEXINFOS = preface.texi \ web.texi \ expect.texi \ scsh.texi \ + curried.texi \ sxml-match.texi \ scheme-scripts.texi \ api-overview.texi \ diff --git a/doc/ref/api-data.texi b/doc/ref/api-data.texi index 39c97909a..6d8de2bd6 100644 --- a/doc/ref/api-data.texi +++ b/doc/ref/api-data.texi @@ -3152,12 +3152,24 @@ These procedures are useful for similar tasks. Convert the string @var{str} into a list of characters. @end deffn -@deffn {Scheme Procedure} string-split str chr -@deffnx {C Function} scm_string_split (str, chr) +@deffn {Scheme Procedure} string-split str char_pred +@deffnx {C Function} scm_string_split (str, char_pred) Split the string @var{str} into a list of substrings delimited -by appearances of the character @var{chr}. Note that an empty substring -between separator characters will result in an empty string in the -result list. +by appearances of characters that + +@itemize @bullet +@item +equal @var{char_pred}, if it is a character, + +@item +satisfy the predicate @var{char_pred}, if it is a procedure, + +@item +are in the set @var{char_pred}, if it is a character set. +@end itemize + +Note that an empty substring between separator characters will result in +an empty string in the result list. @lisp (string-split "root:x:0:0:root:/root:/bin/bash" #\:) diff --git a/doc/ref/api-evaluation.texi b/doc/ref/api-evaluation.texi index 611283225..c471f643b 100644 --- a/doc/ref/api-evaluation.texi +++ b/doc/ref/api-evaluation.texi @@ -254,6 +254,8 @@ Encoding of Source Files}. @node Case Sensitivity @subsubsection Case Sensitivity +@cindex fold-case +@cindex no-fold-case @c FIXME::martin: Review me! @@ -275,9 +277,9 @@ options, @xref{Scheme Read}. (read-enable 'case-insensitive) @end lisp -Note that this is seldom a problem, because Scheme programmers tend not -to use uppercase letters in their identifiers anyway. - +It is also possible to disable (or enable) case sensitivity within a +single file by placing the reader directives @code{#!fold-case} (or +@code{#!no-fold-case}) within the file itself. @node Keyword Syntax @subsubsection Keyword Syntax @@ -315,10 +317,10 @@ its read options. @cindex options - read @cindex read options @deffn {Scheme Procedure} read-options [setting] -Display the current settings of the read options. If @var{setting} is -omitted, only a short form of the current read options is printed. -Otherwise if @var{setting} is the symbol @code{help}, a complete options -description is displayed. +Display the current settings of the global read options. If +@var{setting} is omitted, only a short form of the current read options +is printed. Otherwise if @var{setting} is the symbol @code{help}, a +complete options description is displayed. @end deffn The set of available options, and their default values, may be had by @@ -336,8 +338,19 @@ r6rs-hex-escapes no Use R6RS variable-length character and string hex escape square-brackets yes Treat `[' and `]' as parentheses, for R6RS compatibility. hungry-eol-escapes no In strings, consume leading whitespace after an escaped end-of-line. +curly-infix no Support SRFI-105 curly infix expressions. @end smalllisp +Note that Guile also includes a preliminary mechanism for setting read +options on a per-port basis. For instance, the @code{case-insensitive} +read option is set (or unset) on the port when the reader encounters the +@code{#!fold-case} or @code{#!no-fold-case} reader directives. +Similarly, the @code{#!curly-infix} reader directive sets the +@code{curly-infix} read option on the port, and +@code{#!curly-infix-and-bracket-lists} sets @code{curly-infix} and +unsets @code{square-brackets} on the port (@pxref{SRFI-105}). There is +currently no other way to access or set the per-port read options. + The boolean options may be toggled with @code{read-enable} and @code{read-disable}. The non-boolean @code{keywords} option must be set using @code{read-set!}. diff --git a/doc/ref/api-options.texi b/doc/ref/api-options.texi index f63597824..173431890 100644 --- a/doc/ref/api-options.texi +++ b/doc/ref/api-options.texi @@ -390,6 +390,7 @@ r6rs-hex-escapes no Use R6RS variable-length character and string hex escape square-brackets yes Treat `[' and `]' as parentheses, for R6RS compatibility. hungry-eol-escapes no In strings, consume leading whitespace after an escaped end-of-line. +curly-infix no Support SRFI-105 curly infix expressions. scheme@@(guile-user) [1]> (read-enable 'case-insensitive) $2 = (square-brackets keywords #f case-insensitive positions) scheme@@(guile-user) [1]> ,q diff --git a/doc/ref/api-regex.texi b/doc/ref/api-regex.texi index 11a31fca0..082fb874d 100644 --- a/doc/ref/api-regex.texi +++ b/doc/ref/api-regex.texi @@ -1,6 +1,6 @@ @c -*-texinfo-*- @c This is part of the GNU Guile Reference Manual. -@c Copyright (C) 1996, 1997, 2000, 2001, 2002, 2003, 2004, 2007, 2009, 2010 +@c Copyright (C) 1996, 1997, 2000, 2001, 2002, 2003, 2004, 2007, 2009, 2010, 2012 @c Free Software Foundation, Inc. @c See the file guile.texi for copying conditions. @@ -54,11 +54,12 @@ Zero bytes (@code{#\nul}) cannot be used in regex patterns or input strings, since the underlying C functions treat that as the end of string. If there's a zero byte an error is thrown. -Patterns and input strings are treated as being in the locale -character set if @code{setlocale} has been called (@pxref{Locales}), -and in a multibyte locale this includes treating multi-byte sequences -as a single character. (Guile strings are currently merely bytes, -though this may change in the future, @xref{Conversion to/from C}.) +Internally, patterns and input strings are converted to the current +locale's encoding, and then passed to the C library's regular expression +routines (@pxref{Regular Expressions,,, libc, The GNU C Library +Reference Manual}). The returned match structures always point to +characters in the strings, not to individual bytes, even in the case of +multi-byte encodings. @deffn {Scheme Procedure} string-match pattern str [start] Compile the string @var{pattern} into a regular expression and compare diff --git a/doc/ref/curried.texi b/doc/ref/curried.texi new file mode 100644 index 000000000..25430b4f0 --- /dev/null +++ b/doc/ref/curried.texi @@ -0,0 +1,56 @@ +@c -*-texinfo-*- +@c This is part of the GNU Guile Reference Manual. +@c Copyright (C) 2012 Free Software Foundation, Inc. +@c See the file guile.texi for copying conditions. + +@node Curried Definitions +@section Curried Definitions + +The macros in this section are provided by +@lisp +(use-modules (ice-9 curried-definitions)) +@end lisp +@noindent +and replace those provided by default. + +Prior to Guile 2.0, Guile provided a type of definition known colloquially +as a ``curried definition''. The idea is to extend the syntax of +@code{define} so that you can conveniently define procedures that return +procedures, up to any desired depth. + +For example, +@example +(define ((foo x) y) + (list x y)) +@end example +is a convenience form of +@example +(define foo + (lambda (x) + (lambda (y) + (list x y)))) +@end example + +@deffn {Scheme Syntax} define (@dots{} (name args @dots{}) @dots{}) body @dots{} +@deffnx {Scheme Syntax} define* (@dots{} (name args @dots{}) @dots{}) body @dots{} +@deffnx {Scheme Syntax} define-public (@dots{} (name args @dots{}) @dots{}) body @dots{} + +Create a top level variable @var{name} bound to the procedure with +parameter list @var{args}. If @var{name} is itself a formal parameter +list, then a higher order procedure is created using that +formal-parameter list, and returning a procedure that has parameter list +@var{args}. This nesting may occur to arbitrary depth. + +@code{define*} is similar but the formal parameter lists take additional +options as described in @ref{lambda* and define*}. For example, +@example +(define* ((foo #:keys (bar 'baz) (quux 'zot)) frotz #:rest rest) + (list bar quux frotz rest)) + +((foo #:quux 'foo) 1 2 3 4 5) +@result{} (baz foo 1 (2 3 4 5)) +@end example + +@code{define-public} is similar to @code{define} but it also adds +@var{name} to the list of exported bindings of the current module. +@end deffn diff --git a/doc/ref/guile.texi b/doc/ref/guile.texi index c3da0c36d..a1b3fe60c 100644 --- a/doc/ref/guile.texi +++ b/doc/ref/guile.texi @@ -370,6 +370,7 @@ available through both Scheme and C interfaces. * Expect:: Controlling interactive programs with Guile. * sxml-match:: Pattern matching of SXML. * The Scheme shell (scsh):: Using scsh interfaces in Guile. +* Curried Definitions:: Extended @code{define} syntax. @end menu @include slib.texi @@ -387,6 +388,7 @@ available through both Scheme and C interfaces. @include sxml-match.texi @include scsh.texi +@include curried.texi @node Standard Library @chapter Standard Library diff --git a/doc/ref/scheme-ideas.texi b/doc/ref/scheme-ideas.texi index 53f7b6132..15cf6640d 100644 --- a/doc/ref/scheme-ideas.texi +++ b/doc/ref/scheme-ideas.texi @@ -476,6 +476,11 @@ The corresponding forms of the alternative @code{define} syntax are: @noindent For details on how these forms work, see @xref{Lambda}. +Prior to Guile 2.0, Guile provided an extension to @code{define} syntax +that allowed you to nest the previous extension up to an arbitrary +depth. These are no longer provided by default, and instead have been +moved to @ref{Curried Definitions} + (It could be argued that the alternative @code{define} forms are rather confusing, especially for newcomers to the Scheme language, as they hide both the role of @code{lambda} and the fact that procedures are values diff --git a/doc/ref/scheme-using.texi b/doc/ref/scheme-using.texi index 3d439132b..7eb84de0a 100644 --- a/doc/ref/scheme-using.texi +++ b/doc/ref/scheme-using.texi @@ -457,7 +457,7 @@ show a short error printout. Default values for REPL options may be set using @code{repl-default-option-set!} from @code{(system repl common)}: -@deffn {Scheme Procedure} repl-set-default-option! key value +@deffn {Scheme Procedure} repl-default-option-set! key value Set the default value of a REPL option. This function is particularly useful in a user's init file. @xref{Init File}. @end deffn diff --git a/doc/ref/srfi-modules.texi b/doc/ref/srfi-modules.texi index 29c1e06f5..da1b86fe0 100644 --- a/doc/ref/srfi-modules.texi +++ b/doc/ref/srfi-modules.texi @@ -54,6 +54,7 @@ get the relevant SRFI documents from the SRFI home page * SRFI-69:: Basic hash tables. * SRFI-88:: Keyword objects. * SRFI-98:: Accessing environment variables. +* SRFI-105:: Curly-infix expressions. @end menu @@ -3003,10 +3004,10 @@ with locale decimal point, eg.@: @samp{5.2} @item @nicode{~z} @tab time zone, RFC-822 style @item @nicode{~Z} @tab time zone symbol (not currently implemented) @item @nicode{~1} @tab ISO-8601 date, @samp{~Y-~m-~d} -@item @nicode{~2} @tab ISO-8601 time+zone, @samp{~k:~M:~S~z} -@item @nicode{~3} @tab ISO-8601 time, @samp{~k:~M:~S} -@item @nicode{~4} @tab ISO-8601 date/time+zone, @samp{~Y-~m-~dT~k:~M:~S~z} -@item @nicode{~5} @tab ISO-8601 date/time, @samp{~Y-~m-~dT~k:~M:~S} +@item @nicode{~2} @tab ISO-8601 time+zone, @samp{~H:~M:~S~z} +@item @nicode{~3} @tab ISO-8601 time, @samp{~H:~M:~S} +@item @nicode{~4} @tab ISO-8601 date/time+zone, @samp{~Y-~m-~dT~H:~M:~S~z} +@item @nicode{~5} @tab ISO-8601 date/time, @samp{~Y-~m-~dT~H:~M:~S} @end multitable @end defun @@ -4469,6 +4470,56 @@ Returns the names and values of all the environment variables as an association list in which both the keys and the values are strings. @end deffn +@node SRFI-105 +@subsection SRFI-105 Curly-infix expressions. +@cindex SRFI-105 +@cindex curly-infix +@cindex curly-infix-and-bracket-lists + +Guile's built-in reader includes support for SRFI-105 curly-infix +expressions. See @uref{http://srfi.schemers.org/srfi-105/srfi-105.html, +the specification of SRFI-105}. Some examples: + +@example +@{n <= 5@} @result{} (<= n 5) +@{a + b + c@} @result{} (+ a b c) +@{a * @{b + c@}@} @result{} (* a (+ b c)) +@{(- a) / b@} @result{} (/ (- a) b) +@{-(a) / b@} @result{} (/ (- a) b) as well +@{(f a b) + (g h)@} @result{} (+ (f a b) (g h)) +@{f(a b) + g(h)@} @result{} (+ (f a b) (g h)) as well +@{f[a b] + g(h)@} @result{} (+ ($bracket-apply$ f a b) (g h)) +'@{a + f(b) + x@} @result{} '(+ a (f b) x) +@{length(x) >= 6@} @result{} (>= (length x) 6) +@{n-1 + n-2@} @result{} (+ n-1 n-2) +@{n * factorial@{n - 1@}@} @result{} (* n (factorial (- n 1))) +@{@{a > 0@} and @{b >= 1@}@} @result{} (and (> a 0) (>= b 1)) +@{f@{n - 1@}(x)@} @result{} ((f (- n 1)) x) +@{a . z@} @result{} ($nfx$ a . z) +@{a + b - c@} @result{} ($nfx$ a + b - c) +@end example + +To enable curly-infix expressions within a file, place the reader +directive @code{#!curly-infix} before the first use of curly-infix +notation. To globally enable curly-infix expressions in Guile's reader, +set the @code{curly-infix} read option. + +Guile also implements the following non-standard extension to SRFI-105: +if @code{curly-infix} is enabled and there is no other meaning assigned +to square brackets (i.e. the @code{square-brackets} read option is +turned off), then lists within square brackets are read as normal lists +but with the special symbol @code{$bracket-list$} added to the front. +To enable this combination of read options within a file, use the reader +directive @code{#!curly-infix-and-bracket-lists}. For example: + +@example +[a b] @result{} ($bracket-list$ a b) +[a . b] @result{} ($bracket-list$ a . b) +@end example + + +For more information on reader options, @xref{Scheme Read}. + @c srfi-modules.texi ends here @c Local Variables: diff --git a/doc/ref/tour.texi b/doc/ref/tour.texi index 3e612692d..0924216aa 100644 --- a/doc/ref/tour.texi +++ b/doc/ref/tour.texi @@ -1,7 +1,7 @@ @c -*-texinfo-*- @c This is part of the GNU Guile Reference Manual. -@c Copyright (C) 1996, 1997, 2000, 2001, 2002, 2003, 2004, 2006, 2010, 2011 -@c Free Software Foundation, Inc. +@c Copyright (C) 1996, 1997, 2000, 2001, 2002, 2003, 2004, 2006, 2010, 2011, +@c 2012 Free Software Foundation, Inc. @c See the file guile.texi for copying conditions. @raisesections @@ -149,7 +149,7 @@ that makes the @code{j0} function available to Scheme code. SCM j0_wrapper (SCM x) @{ - return scm_make_real (j0 (scm_num2dbl (x, "j0"))); + return scm_from_double (j0 (scm_to_double (x))); @} void diff --git a/libguile/arrays.c b/libguile/arrays.c index f0f901239..83d7db2b9 100644 --- a/libguile/arrays.c +++ b/libguile/arrays.c @@ -1,4 +1,5 @@ -/* Copyright (C) 1995,1996,1997,1998,2000,2001,2002,2003,2004, 2005, 2006, 2009, 2010, 2011 Free Software Foundation, Inc. +/* Copyright (C) 1995,1996,1997,1998,2000,2001,2002,2003,2004,2005, + * 2006, 2009, 2010, 2011, 2012 Free Software Foundation, Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License @@ -814,178 +815,6 @@ scm_i_print_array (SCM array, SCM port, scm_print_state *pstate) return scm_i_print_array_dimension (&h, 0, 0, port, pstate); } -/* Read an array. This function can also read vectors and uniform - vectors. Also, the conflict between '#f' and '#f32' and '#f64' is - handled here. - - C is the first character read after the '#'. -*/ - -static int -read_decimal_integer (SCM port, int c, ssize_t *resp) -{ - ssize_t sign = 1; - ssize_t res = 0; - int got_it = 0; - - if (c == '-') - { - sign = -1; - c = scm_getc_unlocked (port); - } - - while ('0' <= c && c <= '9') - { - res = 10*res + c-'0'; - got_it = 1; - c = scm_getc_unlocked (port); - } - - if (got_it) - *resp = sign * res; - return c; -} - -SCM -scm_i_read_array (SCM port, int c) -{ - ssize_t rank; - scm_t_wchar tag_buf[8]; - int tag_len; - - SCM tag, shape = SCM_BOOL_F, elements; - - /* XXX - shortcut for ordinary vectors. Shouldn't be necessary but - the array code can not deal with zero-length dimensions yet, and - we want to allow zero-length vectors, of course. - */ - if (c == '(') - { - scm_ungetc_unlocked (c, port); - return scm_vector (scm_read (port)); - } - - /* Disambiguate between '#f' and uniform floating point vectors. - */ - if (c == 'f') - { - c = scm_getc_unlocked (port); - if (c != '3' && c != '6') - { - if (c != EOF) - scm_ungetc_unlocked (c, port); - return SCM_BOOL_F; - } - rank = 1; - tag_buf[0] = 'f'; - tag_len = 1; - goto continue_reading_tag; - } - - /* Read rank. - */ - rank = 1; - c = read_decimal_integer (port, c, &rank); - if (rank < 0) - scm_i_input_error (NULL, port, "array rank must be non-negative", - SCM_EOL); - - /* Read tag. - */ - tag_len = 0; - continue_reading_tag: - while (c != EOF && c != '(' && c != '@' && c != ':' - && tag_len < sizeof tag_buf / sizeof tag_buf[0]) - { - tag_buf[tag_len++] = c; - c = scm_getc_unlocked (port); - } - if (tag_len == 0) - tag = SCM_BOOL_T; - else - { - tag = scm_string_to_symbol (scm_from_utf32_stringn (tag_buf, tag_len)); - if (tag_len == sizeof tag_buf / sizeof tag_buf[0]) - scm_i_input_error (NULL, port, "invalid array tag, starting with: ~a", - scm_list_1 (tag)); - } - - /* Read shape. - */ - if (c == '@' || c == ':') - { - shape = SCM_EOL; - - do - { - ssize_t lbnd = 0, len = 0; - SCM s; - - if (c == '@') - { - c = scm_getc_unlocked (port); - c = read_decimal_integer (port, c, &lbnd); - } - - s = scm_from_ssize_t (lbnd); - - if (c == ':') - { - c = scm_getc_unlocked (port); - c = read_decimal_integer (port, c, &len); - if (len < 0) - scm_i_input_error (NULL, port, - "array length must be non-negative", - SCM_EOL); - - s = scm_list_2 (s, scm_from_ssize_t (lbnd+len-1)); - } - - shape = scm_cons (s, shape); - } while (c == '@' || c == ':'); - - shape = scm_reverse_x (shape, SCM_EOL); - } - - /* Read nested lists of elements. - */ - if (c != '(') - scm_i_input_error (NULL, port, - "missing '(' in vector or array literal", - SCM_EOL); - scm_ungetc_unlocked (c, port); - elements = scm_read (port); - - if (scm_is_false (shape)) - shape = scm_from_ssize_t (rank); - else if (scm_ilength (shape) != rank) - scm_i_input_error - (NULL, port, - "the number of shape specifications must match the array rank", - SCM_EOL); - - /* Handle special print syntax of rank zero arrays; see - scm_i_print_array for a rationale. - */ - if (rank == 0) - { - if (!scm_is_pair (elements)) - scm_i_input_error (NULL, port, - "too few elements in array literal, need 1", - SCM_EOL); - if (!scm_is_null (SCM_CDR (elements))) - scm_i_input_error (NULL, port, - "too many elements in array literal, want 1", - SCM_EOL); - elements = SCM_CAR (elements); - } - - /* Construct array. - */ - return scm_list_to_typed_array (tag, shape, elements); -} - - static SCM array_handle_ref (scm_t_array_handle *h, size_t pos) { diff --git a/libguile/arrays.h b/libguile/arrays.h index 5ea604d6a..6045ab65d 100644 --- a/libguile/arrays.h +++ b/libguile/arrays.h @@ -3,7 +3,8 @@ #ifndef SCM_ARRAY_H #define SCM_ARRAY_H -/* Copyright (C) 1995,1996,1997,1999,2000,2001, 2004, 2006, 2008, 2009, 2010 Free Software Foundation, Inc. +/* Copyright (C) 1995,1996,1997,1999,2000,2001, 2004, 2006, 2008, 2009, + * 2010, 2012 Free Software Foundation, Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License @@ -73,7 +74,6 @@ typedef struct scm_i_t_array SCM_INTERNAL SCM scm_i_make_array (int ndim); SCM_INTERNAL int scm_i_print_array (SCM array, SCM port, scm_print_state *pstate); -SCM_INTERNAL SCM scm_i_read_array (SCM port, int c); SCM_INTERNAL void scm_init_arrays (void); diff --git a/libguile/hash.c b/libguile/hash.c index d47c7e054..740dac11f 100644 --- a/libguile/hash.c +++ b/libguile/hash.c @@ -1,4 +1,5 @@ -/* Copyright (C) 1995,1996,1997, 2000, 2001, 2003, 2004, 2006, 2008, 2009, 2010, 2011 Free Software Foundation, Inc. +/* Copyright (C) 1995, 1996, 1997, 2000, 2001, 2003, 2004, 2006, 2008, + * 2009, 2010, 2011, 2012 Free Software Foundation, Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License @@ -223,6 +224,53 @@ scm_i_utf8_string_hash (const char *str, size_t len) return ret; } +static unsigned long scm_raw_ihashq (scm_t_bits key); +static unsigned long scm_raw_ihash (SCM obj, size_t depth); + +/* Return the hash of struct OBJ. Traverse OBJ's fields to compute the + result, unless DEPTH is zero. Assumes that OBJ is a struct. */ +static unsigned long +scm_i_struct_hash (SCM obj, size_t depth) +{ + SCM layout; + scm_t_bits *data; + size_t struct_size, field_num; + unsigned long hash; + + layout = SCM_STRUCT_LAYOUT (obj); + struct_size = scm_i_symbol_length (layout) / 2; + data = SCM_STRUCT_DATA (obj); + + hash = scm_raw_ihashq (SCM_UNPACK (SCM_STRUCT_VTABLE (obj))); + if (depth > 0) + for (field_num = 0; field_num < struct_size; field_num++) + { + int protection; + + protection = scm_i_symbol_ref (layout, field_num * 2 + 1); + if (protection != 'h' && protection != 'o') + { + int type; + type = scm_i_symbol_ref (layout, field_num * 2); + switch (type) + { + case 'p': + hash ^= scm_raw_ihash (SCM_PACK (data[field_num]), + depth / 2); + break; + case 'u': + hash ^= scm_raw_ihashq (data[field_num]); + break; + default: + /* Ignore 's' fields. */; + } + } + } + + /* FIXME: Tail elements should be taken into account. */ + + return hash; +} /* Thomas Wang's integer hasher, from http://www.cris.com/~Ttwang/tech/inthash.htm. */ @@ -298,6 +346,8 @@ scm_raw_ihash (SCM obj, size_t depth) ^ scm_raw_ihash (SCM_CDR (obj), depth / 2)); else return scm_raw_ihashq (scm_tc3_cons); + case scm_tcs_struct: + return scm_i_struct_hash (obj, depth); default: return scm_raw_ihashq (SCM_CELL_WORD_0 (obj)); } diff --git a/libguile/list.c b/libguile/list.c index 8297b17c5..627640334 100644 --- a/libguile/list.c +++ b/libguile/list.c @@ -267,7 +267,7 @@ SCM_DEFINE (scm_append, "append", 0, 0, 1, SCM_DEFINE (scm_append_x, "append!", 0, 0, 1, - (SCM lists), + (SCM args), "A destructive version of @code{append} (@pxref{Pairs and\n" "Lists,,,r5rs, The Revised^5 Report on Scheme}). The cdr field\n" "of each list's final pair is changed to point to the head of\n" @@ -276,26 +276,29 @@ SCM_DEFINE (scm_append_x, "append!", 0, 0, 1, #define FUNC_NAME s_scm_append_x { SCM ret, *loc; - SCM_VALIDATE_REST_ARGUMENT (lists); + int argnum = 1; + SCM_VALIDATE_REST_ARGUMENT (args); - if (scm_is_null (lists)) + if (scm_is_null (args)) return SCM_EOL; loc = &ret; for (;;) { - SCM arg = SCM_CAR (lists); + SCM arg = SCM_CAR (args); *loc = arg; - lists = SCM_CDR (lists); - if (scm_is_null (lists)) + args = SCM_CDR (args); + if (scm_is_null (args)) return ret; if (!SCM_NULL_OR_NIL_P (arg)) { - SCM_VALIDATE_CONS (SCM_ARG1, arg); + SCM_VALIDATE_CONS (argnum, arg); loc = SCM_CDRLOC (scm_last_pair (arg)); + SCM_VALIDATE_NULL_OR_NIL (argnum, *loc); } + argnum++; } } #undef FUNC_NAME diff --git a/libguile/ports.c b/libguile/ports.c index 11142ba65..e7187d35d 100644 --- a/libguile/ports.c +++ b/libguile/ports.c @@ -613,6 +613,8 @@ scm_c_make_port_with_encoding (scm_t_bits tag, unsigned long mode_bits, entry->ilseq_handler = handler; entry->iconv_descriptors = NULL; + entry->alist = SCM_EOL; + if (SCM_PORT_DESCRIPTOR (ret)->free) scm_i_set_finalizer (SCM2PTR (ret), finalize_port, NULL); @@ -2370,7 +2372,7 @@ scm_lfwrite_substr (SCM str, size_t start, size_t end, SCM port) if (end == (size_t) -1) end = scm_i_string_length (str); - scm_display (scm_c_substring (str, start, end), port); + scm_i_display_substring (str, start, end, port); if (pt->rw_random) pt->rw_active = SCM_PORT_WRITE; diff --git a/libguile/ports.h b/libguile/ports.h index 92e388e82..c1ba71921 100644 --- a/libguile/ports.h +++ b/libguile/ports.h @@ -132,6 +132,10 @@ typedef struct scm_t_port_encoding_mode encoding_mode; scm_t_string_failed_conversion_handler ilseq_handler; scm_t_iconv_descriptors *iconv_descriptors; + + /* an alist for storing additional information + (e.g. used to store per-port read options) */ + SCM alist; } scm_t_port; diff --git a/libguile/print.c b/libguile/print.c index 90bc9adf8..5d5c56d2f 100644 --- a/libguile/print.c +++ b/libguile/print.c @@ -1229,6 +1229,29 @@ write_character (scm_t_wchar ch, SCM port, int string_escapes_p) write_character_escaped (ch, string_escapes_p, port); } +/* Display STR to PORT from START inclusive to END exclusive. */ +void +scm_i_display_substring (SCM str, size_t start, size_t end, SCM port) +{ + int narrow_p; + const char *buf; + size_t len, printed; + + buf = scm_i_string_data (str); + len = end - start; + narrow_p = scm_i_is_narrow_string (str); + buf += start * (narrow_p ? sizeof (char) : sizeof (scm_t_wchar)); + + printed = display_string (buf, narrow_p, end - start, port, + PORT_CONVERSION_HANDLER (port)); + + if (SCM_UNLIKELY (printed < len)) + scm_encoding_error (__func__, errno, + "cannot convert to output locale", + port, scm_c_string_ref (str, printed + start)); +} + + /* Print an integer. */ diff --git a/libguile/print.h b/libguile/print.h index 4a3c2f5c0..80a9922f2 100644 --- a/libguile/print.h +++ b/libguile/print.h @@ -3,7 +3,8 @@ #ifndef SCM_PRINT_H #define SCM_PRINT_H -/* Copyright (C) 1995,1996,1998,2000,2001, 2003, 2004, 2006, 2008, 2010, 2012 Free Software Foundation, Inc. +/* Copyright (C) 1995, 1996, 1998, 2000, 2001, 2003, 2004, 2006, 2008, + * 2010, 2012 Free Software Foundation, Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License @@ -78,6 +79,8 @@ SCM_API SCM scm_print_options (SCM setting); SCM_API SCM scm_make_print_state (void); SCM_API void scm_free_print_state (SCM print_state); SCM_INTERNAL SCM scm_i_port_with_print_state (SCM port, SCM print_state); +SCM_INTERNAL void scm_i_display_substring (SCM str, size_t start, size_t end, + SCM port); SCM_API void scm_intprint (scm_t_intmax n, int radix, SCM port); SCM_API void scm_uintprint (scm_t_uintmax n, int radix, SCM port); SCM_API void scm_ipruk (char *hdr, SCM ptr, SCM port); diff --git a/libguile/private-options.h b/libguile/private-options.h index 9d2d43cf5..ed0f314e5 100644 --- a/libguile/private-options.h +++ b/libguile/private-options.h @@ -67,7 +67,8 @@ SCM_INTERNAL scm_t_option scm_read_opts[]; #define SCM_R6RS_ESCAPES_P scm_read_opts[4].val #define SCM_SQUARE_BRACKETS_P scm_read_opts[5].val #define SCM_HUNGRY_EOL_ESCAPES_P scm_read_opts[6].val +#define SCM_CURLY_INFIX_P scm_read_opts[7].val -#define SCM_N_READ_OPTIONS 6 +#define SCM_N_READ_OPTIONS 7 #endif /* PRIVATE_OPTIONS */ diff --git a/libguile/read.c b/libguile/read.c index 5738e2ed8..d977cff82 100644 --- a/libguile/read.c +++ b/libguile/read.c @@ -63,24 +63,59 @@ SCM_SYMBOL (scm_keyword_prefix, "prefix"); SCM_SYMBOL (scm_keyword_postfix, "postfix"); SCM_SYMBOL (sym_nil, "nil"); -scm_t_option scm_read_opts[] = { - { SCM_OPTION_BOOLEAN, "copy", 0, - "Copy source code expressions." }, - { SCM_OPTION_BOOLEAN, "positions", 1, - "Record positions of source code expressions." }, - { SCM_OPTION_BOOLEAN, "case-insensitive", 0, - "Convert symbols to lower case."}, - { SCM_OPTION_SCM, "keywords", (scm_t_bits) SCM_BOOL_F_BITS, - "Style of keyword recognition: #f, 'prefix or 'postfix."}, - { SCM_OPTION_BOOLEAN, "r6rs-hex-escapes", 0, - "Use R6RS variable-length character and string hex escapes."}, - { SCM_OPTION_BOOLEAN, "square-brackets", 1, - "Treat `[' and `]' as parentheses, for R6RS compatibility."}, - { SCM_OPTION_BOOLEAN, "hungry-eol-escapes", 0, - "In strings, consume leading whitespace after an escaped end-of-line."}, - { 0, }, +/* SRFI-105 curly infix expression support */ +SCM_SYMBOL (sym_nfx, "$nfx$"); +SCM_SYMBOL (sym_bracket_list, "$bracket-list$"); +SCM_SYMBOL (sym_bracket_apply, "$bracket-apply$"); + +scm_t_option scm_read_opts[] = + { + { SCM_OPTION_BOOLEAN, "copy", 0, + "Copy source code expressions." }, + { SCM_OPTION_BOOLEAN, "positions", 1, + "Record positions of source code expressions." }, + { SCM_OPTION_BOOLEAN, "case-insensitive", 0, + "Convert symbols to lower case."}, + { SCM_OPTION_SCM, "keywords", (scm_t_bits) SCM_BOOL_F_BITS, + "Style of keyword recognition: #f, 'prefix or 'postfix."}, + { SCM_OPTION_BOOLEAN, "r6rs-hex-escapes", 0, + "Use R6RS variable-length character and string hex escapes."}, + { SCM_OPTION_BOOLEAN, "square-brackets", 1, + "Treat `[' and `]' as parentheses, for R6RS compatibility."}, + { SCM_OPTION_BOOLEAN, "hungry-eol-escapes", 0, + "In strings, consume leading whitespace after an escaped end-of-line."}, + { SCM_OPTION_BOOLEAN, "curly-infix", 0, + "Support SRFI-105 curly infix expressions."}, + { 0, }, + }; + +/* Internal read options structure. This is initialized by 'scm_read' + from the global and per-port read options, and a pointer is passed + down to all helper functions. */ + +enum t_keyword_style + { + KEYWORD_STYLE_HASH_PREFIX, + KEYWORD_STYLE_PREFIX, + KEYWORD_STYLE_POSTFIX + }; + +struct t_read_opts +{ + enum t_keyword_style keyword_style; + unsigned int copy_source_p : 1; + unsigned int record_positions_p : 1; + unsigned int case_insensitive_p : 1; + unsigned int r6rs_escapes_p : 1; + unsigned int square_brackets_p : 1; + unsigned int hungry_eol_escapes_p : 1; + unsigned int curly_infix_p : 1; + unsigned int neoteric_p : 1; }; +typedef struct t_read_opts scm_t_read_opts; + + /* Give meaningful error messages for errors @@ -167,6 +202,9 @@ scm_i_read_hash_procedures_set_x (SCM value) /* The maximum size of Scheme character names. */ #define READER_CHAR_NAME_MAX_SIZE 50 +/* The maximum size of reader directive names. */ +#define READER_DIRECTIVE_NAME_MAX_SIZE 50 + /* `isblank' is only in C99. */ #define CHAR_IS_BLANK_(_chr) \ @@ -185,10 +223,13 @@ scm_i_read_hash_procedures_set_x (SCM value) structure''). */ #define CHAR_IS_R5RS_DELIMITER(c) \ (CHAR_IS_BLANK (c) \ - || (c == ')') || (c == '(') || (c == ';') || (c == '"') \ - || (SCM_SQUARE_BRACKETS_P && ((c == '[') || (c == ']')))) + || (c) == ')' || (c) == '(' || (c) == ';' || (c) == '"') -#define CHAR_IS_DELIMITER CHAR_IS_R5RS_DELIMITER +#define CHAR_IS_DELIMITER(c) \ + (CHAR_IS_R5RS_DELIMITER (c) \ + || (((c) == ']' || (c) == '[') && (opts->square_brackets_p \ + || opts->curly_infix_p)) \ + || (((c) == '}' || (c) == '{') && opts->curly_infix_p)) /* Exponent markers, as defined in section 7.1.1 of R5RS, ``Lexical Structure''. */ @@ -199,8 +240,8 @@ scm_i_read_hash_procedures_set_x (SCM value) /* Read an SCSH block comment. */ static SCM scm_read_scsh_block_comment (scm_t_wchar, SCM); static SCM scm_read_r6rs_block_comment (scm_t_wchar, SCM); -static SCM scm_read_commented_expression (scm_t_wchar, SCM); -static SCM scm_read_shebang (scm_t_wchar, SCM); +static SCM scm_read_commented_expression (scm_t_wchar, SCM, scm_t_read_opts *); +static SCM scm_read_shebang (scm_t_wchar, SCM, scm_t_read_opts *); static SCM scm_get_hash_procedure (int); /* Read from PORT until a delimiter (e.g., a whitespace) is read. Put the @@ -208,7 +249,8 @@ static SCM scm_get_hash_procedure (int); fewer than BUF_SIZE bytes, non-zero otherwise. READ will be set the number of bytes actually read. */ static int -read_token (SCM port, char *buf, size_t buf_size, size_t *read) +read_token (SCM port, scm_t_read_opts *opts, + char *buf, size_t buf_size, size_t *read) { *read = 0; @@ -238,8 +280,8 @@ read_token (SCM port, char *buf, size_t buf_size, size_t *read) /* Like `read_token', but return either BUFFER, or a GC-allocated buffer if the token doesn't fit in BUFFER_SIZE bytes. */ static char * -read_complete_token (SCM port, char *buffer, size_t buffer_size, - size_t *read) +read_complete_token (SCM port, scm_t_read_opts *opts, + char *buffer, size_t buffer_size, size_t *read) { int overflow = 0; size_t bytes_read, overflow_size = 0; @@ -247,7 +289,7 @@ read_complete_token (SCM port, char *buffer, size_t buffer_size, do { - overflow = read_token (port, buffer, buffer_size, &bytes_read); + overflow = read_token (port, opts, buffer, buffer_size, &bytes_read); if (bytes_read == 0) break; if (overflow || overflow_size != 0) @@ -284,7 +326,7 @@ read_complete_token (SCM port, char *buffer, size_t buffer_size, /* Skip whitespace from PORT and return the first non-whitespace character read. Raise an error on end-of-file. */ static int -flush_ws (SCM port, const char *eoferr) +flush_ws (SCM port, scm_t_read_opts *opts, const char *eoferr) { scm_t_wchar c; while (1) @@ -321,10 +363,10 @@ flush_ws (SCM port, const char *eoferr) eoferr = "read_sharp"; goto goteof; case '!': - scm_read_shebang (c, port); + scm_read_shebang (c, port, opts); break; case ';': - scm_read_commented_expression (c, port); + scm_read_commented_expression (c, port, opts); break; case '|': if (scm_is_false (scm_get_hash_procedure (c))) @@ -355,44 +397,49 @@ flush_ws (SCM port, const char *eoferr) /* Token readers. */ -static SCM scm_read_expression (SCM port); -static SCM scm_read_sharp (int chr, SCM port, long line, int column); +static SCM scm_read_expression (SCM port, scm_t_read_opts *opts); +static SCM scm_read_sharp (int chr, SCM port, scm_t_read_opts *opts, + long line, int column); static SCM -maybe_annotate_source (SCM x, SCM port, long line, int column) +maybe_annotate_source (SCM x, SCM port, scm_t_read_opts *opts, + long line, int column) { - if (SCM_RECORD_POSITIONS_P) + if (opts->record_positions_p) scm_i_set_source_properties_x (x, line, column, SCM_FILENAME (port)); return x; } static SCM -scm_read_sexp (scm_t_wchar chr, SCM port) +scm_read_sexp (scm_t_wchar chr, SCM port, scm_t_read_opts *opts) #define FUNC_NAME "scm_i_lreadparen" { int c; SCM tmp, tl, ans = SCM_EOL; - const int terminating_char = ((chr == '[') ? ']' : ')'); + const int curly_list_p = (chr == '{') && opts->curly_infix_p; + const int terminating_char = ((chr == '{') ? '}' + : ((chr == '[') ? ']' + : ')')); /* Need to capture line and column numbers here. */ long line = SCM_LINUM (port); int column = SCM_COL (port) - 1; - c = flush_ws (port, FUNC_NAME); + c = flush_ws (port, opts, FUNC_NAME); if (terminating_char == c) return SCM_EOL; scm_ungetc_unlocked (c, port); - tmp = scm_read_expression (port); + tmp = scm_read_expression (port, opts); /* Note that it is possible for scm_read_expression to return scm_sym_dot, but not as part of a dotted pair: as in #{.}#. So check that it's a real dot by checking `c'. */ if (c == '.' && scm_is_eq (scm_sym_dot, tmp)) { - ans = scm_read_expression (port); - if (terminating_char != (c = flush_ws (port, FUNC_NAME))) + ans = scm_read_expression (port, opts); + if (terminating_char != (c = flush_ws (port, opts, FUNC_NAME))) scm_i_input_error (FUNC_NAME, port, "missing close paren", SCM_EOL); return ans; @@ -401,28 +448,29 @@ scm_read_sexp (scm_t_wchar chr, SCM port) /* Build the head of the list structure. */ ans = tl = scm_cons (tmp, SCM_EOL); - while (terminating_char != (c = flush_ws (port, FUNC_NAME))) + while (terminating_char != (c = flush_ws (port, opts, FUNC_NAME))) { SCM new_tail; - if (c == ')' || (SCM_SQUARE_BRACKETS_P && c == ']')) + if (c == ')' || (c == ']' && opts->square_brackets_p) + || ((c == '}' || c == ']') && opts->curly_infix_p)) scm_i_input_error (FUNC_NAME, port, "in pair: mismatched close paren: ~A", scm_list_1 (SCM_MAKE_CHAR (c))); scm_ungetc_unlocked (c, port); - tmp = scm_read_expression (port); + tmp = scm_read_expression (port, opts); /* See above note about scm_sym_dot. */ if (c == '.' && scm_is_eq (scm_sym_dot, tmp)) { - SCM_SETCDR (tl, scm_read_expression (port)); + SCM_SETCDR (tl, scm_read_expression (port, opts)); - c = flush_ws (port, FUNC_NAME); + c = flush_ws (port, opts, FUNC_NAME); if (terminating_char != c) scm_i_input_error (FUNC_NAME, port, "in pair: missing close paren", SCM_EOL); - goto exit; + break; } new_tail = scm_cons (tmp, SCM_EOL); @@ -430,8 +478,60 @@ scm_read_sexp (scm_t_wchar chr, SCM port) tl = new_tail; } - exit: - return maybe_annotate_source (ans, port, line, column); + if (curly_list_p) + { + /* In addition to finding the length, 'scm_ilength' checks for + improper or circular lists, in which case it returns -1. */ + int len = scm_ilength (ans); + + /* The (len == 0) case is handled above */ + if (len == 1) + /* Return directly to avoid re-annotating the element's source + location with the position of the outer brace. Also, it + might not be possible to annotate the element. */ + return scm_car (ans); /* {e} => e */ + else if (len == 2) + ; /* Leave the list unchanged: {e1 e2} => (e1 e2) */ + else if (len >= 3 && (len & 1)) + { + /* It's a proper list whose length is odd and at least 3. If + the elements at odd indices (the infix operator positions) + are all 'equal?', then it's a simple curly-infix list. + Otherwise it's a mixed curly-infix list. */ + SCM op = scm_cadr (ans); + + /* Check to see if the elements at odd indices are 'equal?' */ + for (tl = scm_cdddr (ans); ; tl = scm_cddr (tl)) + { + if (scm_is_null (tl)) + { + /* Convert simple curly-infix list to prefix: + {a <op> b <op> ...} => (<op> a b ...) */ + tl = ans; + while (scm_is_pair (scm_cdr (tl))) + { + tmp = scm_cddr (tl); + SCM_SETCDR (tl, tmp); + tl = tmp; + } + ans = scm_cons (op, ans); + break; + } + else if (scm_is_false (scm_equal_p (op, scm_car (tl)))) + { + /* Mixed curly-infix list: {e ...} => ($nfx$ e ...) */ + ans = scm_cons (sym_nfx, ans); + break; + } + } + } + else + /* Mixed curly-infix (possibly improper) list: + {e . tail} => ($nfx$ e . tail) */ + ans = scm_cons (sym_nfx, ans); + } + + return maybe_annotate_source (ans, port, opts, line, column); } #undef FUNC_NAME @@ -487,7 +587,7 @@ skip_intraline_whitespace (SCM port) } static SCM -scm_read_string (int chr, SCM port) +scm_read_string (int chr, SCM port, scm_t_read_opts *opts) #define FUNC_NAME "scm_lreadr" { /* For strings smaller than C_STR, this function creates only one Scheme @@ -526,7 +626,7 @@ scm_read_string (int chr, SCM port) case '\\': break; case '\n': - if (SCM_HUNGRY_EOL_ESCAPES_P) + if (opts->hungry_eol_escapes_p) skip_intraline_whitespace (port); continue; case '0': @@ -554,19 +654,19 @@ scm_read_string (int chr, SCM port) c = '\010'; break; case 'x': - if (SCM_R6RS_ESCAPES_P) + if (opts->r6rs_escapes_p) SCM_READ_HEX_ESCAPE (10, ';'); else SCM_READ_HEX_ESCAPE (2, '\0'); break; case 'u': - if (!SCM_R6RS_ESCAPES_P) + if (!opts->r6rs_escapes_p) { SCM_READ_HEX_ESCAPE (4, '\0'); break; } case 'U': - if (!SCM_R6RS_ESCAPES_P) + if (!opts->r6rs_escapes_p) { SCM_READ_HEX_ESCAPE (6, '\0'); break; @@ -593,13 +693,13 @@ scm_read_string (int chr, SCM port) str = scm_string_concatenate_reverse (str, SCM_UNDEFINED, SCM_UNDEFINED); } - return maybe_annotate_source (str, port, line, column); + return maybe_annotate_source (str, port, opts, line, column); } #undef FUNC_NAME static SCM -scm_read_number (scm_t_wchar chr, SCM port) +scm_read_number (scm_t_wchar chr, SCM port, scm_t_read_opts *opts) { SCM result, str = SCM_EOL; char local_buffer[READER_BUFFER_SIZE], *buffer; @@ -611,7 +711,7 @@ scm_read_number (scm_t_wchar chr, SCM port) int column = SCM_COL (port) - 1; scm_ungetc_unlocked (chr, port); - buffer = read_complete_token (port, local_buffer, sizeof local_buffer, + buffer = read_complete_token (port, opts, local_buffer, sizeof local_buffer, &bytes_read); str = scm_from_stringn (buffer, bytes_read, pt->encoding, pt->ilseq_handler); @@ -620,30 +720,30 @@ scm_read_number (scm_t_wchar chr, SCM port) if (scm_is_false (result)) { /* Return a symbol instead of a number */ - if (SCM_CASE_INSENSITIVE_P) + if (opts->case_insensitive_p) str = scm_string_downcase_x (str); result = scm_string_to_symbol (str); } else if (SCM_NIMP (result)) - result = maybe_annotate_source (result, port, line, column); + result = maybe_annotate_source (result, port, opts, line, column); SCM_COL (port) += scm_i_string_length (str); return result; } static SCM -scm_read_mixed_case_symbol (scm_t_wchar chr, SCM port) +scm_read_mixed_case_symbol (scm_t_wchar chr, SCM port, scm_t_read_opts *opts) { SCM result; int ends_with_colon = 0; size_t bytes_read; - int postfix = scm_is_eq (SCM_PACK (SCM_KEYWORD_STYLE), scm_keyword_postfix); + int postfix = (opts->keyword_style == KEYWORD_STYLE_POSTFIX); char local_buffer[READER_BUFFER_SIZE], *buffer; scm_t_port *pt = SCM_PTAB_ENTRY (port); SCM str; scm_ungetc_unlocked (chr, port); - buffer = read_complete_token (port, local_buffer, sizeof local_buffer, + buffer = read_complete_token (port, opts, local_buffer, sizeof local_buffer, &bytes_read); if (bytes_read > 0) ends_with_colon = buffer[bytes_read - 1] == ':'; @@ -653,7 +753,7 @@ scm_read_mixed_case_symbol (scm_t_wchar chr, SCM port) str = scm_from_stringn (buffer, bytes_read - 1, pt->encoding, pt->ilseq_handler); - if (SCM_CASE_INSENSITIVE_P) + if (opts->case_insensitive_p) str = scm_string_downcase_x (str); result = scm_symbol_to_keyword (scm_string_to_symbol (str)); } @@ -662,7 +762,7 @@ scm_read_mixed_case_symbol (scm_t_wchar chr, SCM port) str = scm_from_stringn (buffer, bytes_read, pt->encoding, pt->ilseq_handler); - if (SCM_CASE_INSENSITIVE_P) + if (opts->case_insensitive_p) str = scm_string_downcase_x (str); result = scm_string_to_symbol (str); } @@ -672,7 +772,7 @@ scm_read_mixed_case_symbol (scm_t_wchar chr, SCM port) } static SCM -scm_read_number_and_radix (scm_t_wchar chr, SCM port) +scm_read_number_and_radix (scm_t_wchar chr, SCM port, scm_t_read_opts *opts) #define FUNC_NAME "scm_lreadr" { SCM result; @@ -710,7 +810,7 @@ scm_read_number_and_radix (scm_t_wchar chr, SCM port) radix = 10; } - buffer = read_complete_token (port, local_buffer, sizeof local_buffer, + buffer = read_complete_token (port, opts, local_buffer, sizeof local_buffer, &read); pt = SCM_PTAB_ENTRY (port); @@ -730,7 +830,7 @@ scm_read_number_and_radix (scm_t_wchar chr, SCM port) #undef FUNC_NAME static SCM -scm_read_quote (int chr, SCM port) +scm_read_quote (int chr, SCM port, scm_t_read_opts *opts) { SCM p; long line = SCM_LINUM (port); @@ -767,8 +867,8 @@ scm_read_quote (int chr, SCM port) abort (); } - p = scm_cons2 (p, scm_read_expression (port), SCM_EOL); - return maybe_annotate_source (p, port, line, column); + p = scm_cons2 (p, scm_read_expression (port, opts), SCM_EOL); + return maybe_annotate_source (p, port, opts, line, column); } SCM_SYMBOL (sym_syntax, "syntax"); @@ -777,7 +877,7 @@ SCM_SYMBOL (sym_unsyntax, "unsyntax"); SCM_SYMBOL (sym_unsyntax_splicing, "unsyntax-splicing"); static SCM -scm_read_syntax (int chr, SCM port) +scm_read_syntax (int chr, SCM port, scm_t_read_opts *opts) { SCM p; long line = SCM_LINUM (port); @@ -814,14 +914,14 @@ scm_read_syntax (int chr, SCM port) abort (); } - p = scm_cons2 (p, scm_read_expression (port), SCM_EOL); - return maybe_annotate_source (p, port, line, column); + p = scm_cons2 (p, scm_read_expression (port, opts), SCM_EOL); + return maybe_annotate_source (p, port, opts, line, column); } static SCM -scm_read_nil (int chr, SCM port) +scm_read_nil (int chr, SCM port, scm_t_read_opts *opts) { - SCM id = scm_read_mixed_case_symbol (chr, port); + SCM id = scm_read_mixed_case_symbol (chr, port, opts); if (!scm_is_eq (id, sym_nil)) scm_i_input_error ("scm_read_nil", port, @@ -867,7 +967,7 @@ scm_read_boolean (int chr, SCM port) } static SCM -scm_read_character (scm_t_wchar chr, SCM port) +scm_read_character (scm_t_wchar chr, SCM port, scm_t_read_opts *opts) #define FUNC_NAME "scm_lreadr" { char buffer[READER_CHAR_NAME_MAX_SIZE]; @@ -877,7 +977,8 @@ scm_read_character (scm_t_wchar chr, SCM port) int overflow; scm_t_port *pt; - overflow = read_token (port, buffer, READER_CHAR_NAME_MAX_SIZE, &bytes_read); + overflow = read_token (port, opts, buffer, READER_CHAR_NAME_MAX_SIZE, + &bytes_read); if (overflow) scm_i_input_error (FUNC_NAME, port, "character name too long", SCM_EOL); @@ -973,7 +1074,7 @@ scm_read_character (scm_t_wchar chr, SCM port) #undef FUNC_NAME static SCM -scm_read_keyword (int chr, SCM port) +scm_read_keyword (int chr, SCM port, scm_t_read_opts *opts) { SCM symbol; @@ -982,7 +1083,7 @@ scm_read_keyword (int chr, SCM port) to adapt to the delimiters currently valid of symbols. XXX: This implementation allows sloppy syntaxes like `#: key'. */ - symbol = scm_read_expression (port); + symbol = scm_read_expression (port, opts); if (!scm_is_symbol (symbol)) scm_i_input_error ("scm_read_keyword", port, "keyword prefix `~a' not followed by a symbol: ~s", @@ -992,34 +1093,186 @@ scm_read_keyword (int chr, SCM port) } static SCM -scm_read_vector (int chr, SCM port, long line, int column) +scm_read_vector (int chr, SCM port, scm_t_read_opts *opts, + long line, int column) { /* Note: We call `scm_read_sexp ()' rather than READER here in order to guarantee that it's going to do what we want. After all, this is an implementation detail of `scm_read_vector ()', not a desirable property. */ - return maybe_annotate_source (scm_vector (scm_read_sexp (chr, port)), - port, line, column); + return maybe_annotate_source (scm_vector (scm_read_sexp (chr, port, opts)), + port, opts, line, column); +} + +/* Helper used by scm_read_array */ +static int +read_decimal_integer (SCM port, int c, ssize_t *resp) +{ + ssize_t sign = 1; + ssize_t res = 0; + int got_it = 0; + + if (c == '-') + { + sign = -1; + c = scm_getc_unlocked (port); + } + + while ('0' <= c && c <= '9') + { + res = 10*res + c-'0'; + got_it = 1; + c = scm_getc_unlocked (port); + } + + if (got_it) + *resp = sign * res; + return c; } +/* Read an array. This function can also read vectors and uniform + vectors. Also, the conflict between '#f' and '#f32' and '#f64' is + handled here. + + C is the first character read after the '#'. */ static SCM -scm_read_array (int chr, SCM port, long line, int column) +scm_read_array (int c, SCM port, scm_t_read_opts *opts, long line, int column) { - SCM result = scm_i_read_array (port, chr); - if (scm_is_false (result)) - return result; + ssize_t rank; + scm_t_wchar tag_buf[8]; + int tag_len; + + SCM tag, shape = SCM_BOOL_F, elements, array; + + /* XXX - shortcut for ordinary vectors. Shouldn't be necessary but + the array code can not deal with zero-length dimensions yet, and + we want to allow zero-length vectors, of course. */ + if (c == '(') + return scm_read_vector (c, port, opts, line, column); + + /* Disambiguate between '#f' and uniform floating point vectors. */ + if (c == 'f') + { + c = scm_getc_unlocked (port); + if (c != '3' && c != '6') + { + if (c != EOF) + scm_ungetc_unlocked (c, port); + return SCM_BOOL_F; + } + rank = 1; + tag_buf[0] = 'f'; + tag_len = 1; + goto continue_reading_tag; + } + + /* Read rank. */ + rank = 1; + c = read_decimal_integer (port, c, &rank); + if (rank < 0) + scm_i_input_error (NULL, port, "array rank must be non-negative", + SCM_EOL); + + /* Read tag. */ + tag_len = 0; + continue_reading_tag: + while (c != EOF && c != '(' && c != '@' && c != ':' + && tag_len < sizeof tag_buf / sizeof tag_buf[0]) + { + tag_buf[tag_len++] = c; + c = scm_getc_unlocked (port); + } + if (tag_len == 0) + tag = SCM_BOOL_T; else - return maybe_annotate_source (result, port, line, column); + { + tag = scm_string_to_symbol (scm_from_utf32_stringn (tag_buf, tag_len)); + if (tag_len == sizeof tag_buf / sizeof tag_buf[0]) + scm_i_input_error (NULL, port, "invalid array tag, starting with: ~a", + scm_list_1 (tag)); + } + + /* Read shape. */ + if (c == '@' || c == ':') + { + shape = SCM_EOL; + + do + { + ssize_t lbnd = 0, len = 0; + SCM s; + + if (c == '@') + { + c = scm_getc_unlocked (port); + c = read_decimal_integer (port, c, &lbnd); + } + + s = scm_from_ssize_t (lbnd); + + if (c == ':') + { + c = scm_getc_unlocked (port); + c = read_decimal_integer (port, c, &len); + if (len < 0) + scm_i_input_error (NULL, port, + "array length must be non-negative", + SCM_EOL); + + s = scm_list_2 (s, scm_from_ssize_t (lbnd+len-1)); + } + + shape = scm_cons (s, shape); + } while (c == '@' || c == ':'); + + shape = scm_reverse_x (shape, SCM_EOL); + } + + /* Read nested lists of elements. */ + if (c != '(') + scm_i_input_error (NULL, port, + "missing '(' in vector or array literal", + SCM_EOL); + elements = scm_read_sexp (c, port, opts); + + if (scm_is_false (shape)) + shape = scm_from_ssize_t (rank); + else if (scm_ilength (shape) != rank) + scm_i_input_error + (NULL, port, + "the number of shape specifications must match the array rank", + SCM_EOL); + + /* Handle special print syntax of rank zero arrays; see + scm_i_print_array for a rationale. */ + if (rank == 0) + { + if (!scm_is_pair (elements)) + scm_i_input_error (NULL, port, + "too few elements in array literal, need 1", + SCM_EOL); + if (!scm_is_null (SCM_CDR (elements))) + scm_i_input_error (NULL, port, + "too many elements in array literal, want 1", + SCM_EOL); + elements = SCM_CAR (elements); + } + + /* Construct array, annotate with source location, and return. */ + array = scm_list_to_typed_array (tag, shape, elements); + return maybe_annotate_source (array, port, opts, line, column); } static SCM -scm_read_srfi4_vector (int chr, SCM port, long line, int column) +scm_read_srfi4_vector (int chr, SCM port, scm_t_read_opts *opts, + long line, int column) { - return scm_read_array (chr, port, line, column); + return scm_read_array (chr, port, opts, line, column); } static SCM -scm_read_bytevector (scm_t_wchar chr, SCM port, long line, int column) +scm_read_bytevector (scm_t_wchar chr, SCM port, scm_t_read_opts *opts, + long line, int column) { chr = scm_getc_unlocked (port); if (chr != 'u') @@ -1034,8 +1287,8 @@ scm_read_bytevector (scm_t_wchar chr, SCM port, long line, int column) goto syntax; return maybe_annotate_source - (scm_u8_list_to_bytevector (scm_read_sexp (chr, port)), - port, line, column); + (scm_u8_list_to_bytevector (scm_read_sexp (chr, port, opts)), + port, opts, line, column); syntax: scm_i_input_error ("read_bytevector", port, @@ -1045,7 +1298,8 @@ scm_read_bytevector (scm_t_wchar chr, SCM port, long line, int column) } static SCM -scm_read_guile_bit_vector (scm_t_wchar chr, SCM port, long line, int column) +scm_read_guile_bit_vector (scm_t_wchar chr, SCM port, scm_t_read_opts *opts, + long line, int column) { /* Read the `#*10101'-style read syntax for bit vectors in Guile. This is terribly inefficient but who cares? */ @@ -1063,7 +1317,7 @@ scm_read_guile_bit_vector (scm_t_wchar chr, SCM port, long line, int column) return maybe_annotate_source (scm_bitvector (scm_reverse_x (s_bits, SCM_EOL)), - port, line, column); + port, opts, line, column); } static SCM @@ -1090,38 +1344,59 @@ scm_read_scsh_block_comment (scm_t_wchar chr, SCM port) return SCM_UNSPECIFIED; } +static void set_port_case_insensitive_p (SCM port, scm_t_read_opts *opts, + int value); +static void set_port_square_brackets_p (SCM port, scm_t_read_opts *opts, + int value); +static void set_port_curly_infix_p (SCM port, scm_t_read_opts *opts, + int value); + static SCM -scm_read_shebang (scm_t_wchar chr, SCM port) +scm_read_shebang (scm_t_wchar chr, SCM port, scm_t_read_opts *opts) { - int c = 0; - if ((c = scm_get_byte_or_eof_unlocked (port)) != 'r') - { - scm_ungetc_unlocked (c, port); - return scm_read_scsh_block_comment (chr, port); - } - if ((c = scm_get_byte_or_eof_unlocked (port)) != '6') - { - scm_ungetc_unlocked (c, port); - scm_ungetc_unlocked ('r', port); - return scm_read_scsh_block_comment (chr, port); - } - if ((c = scm_get_byte_or_eof_unlocked (port)) != 'r') - { - scm_ungetc_unlocked (c, port); - scm_ungetc_unlocked ('6', port); - scm_ungetc_unlocked ('r', port); - return scm_read_scsh_block_comment (chr, port); - } - if ((c = scm_get_byte_or_eof_unlocked (port)) != 's') + char name[READER_DIRECTIVE_NAME_MAX_SIZE + 1]; + int c; + int i = 0; + + while (i <= READER_DIRECTIVE_NAME_MAX_SIZE) { - scm_ungetc_unlocked (c, port); - scm_ungetc_unlocked ('r', port); - scm_ungetc_unlocked ('6', port); - scm_ungetc_unlocked ('r', port); - return scm_read_scsh_block_comment (chr, port); + c = scm_getc_unlocked (port); + if (c == EOF) + scm_i_input_error ("skip_block_comment", port, + "unterminated `#! ... !#' comment", SCM_EOL); + else if (('a' <= c && c <= 'z') || ('0' <= c && c <= '9') || c == '-') + name[i++] = c; + else if (CHAR_IS_DELIMITER (c)) + { + scm_ungetc_unlocked (c, port); + name[i] = '\0'; + if (0 == strcmp ("r6rs", name)) + ; /* Silently ignore */ + else if (0 == strcmp ("fold-case", name)) + set_port_case_insensitive_p (port, opts, 1); + else if (0 == strcmp ("no-fold-case", name)) + set_port_case_insensitive_p (port, opts, 0); + else if (0 == strcmp ("curly-infix", name)) + set_port_curly_infix_p (port, opts, 1); + else if (0 == strcmp ("curly-infix-and-bracket-lists", name)) + { + set_port_curly_infix_p (port, opts, 1); + set_port_square_brackets_p (port, opts, 0); + } + else + break; + + return SCM_UNSPECIFIED; + } + else + { + scm_ungetc_unlocked (c, port); + break; + } } - - return SCM_UNSPECIFIED; + while (i > 0) + scm_ungetc_unlocked (name[--i], port); + return scm_read_scsh_block_comment (chr, port); } static SCM @@ -1163,16 +1438,17 @@ scm_read_r6rs_block_comment (scm_t_wchar chr, SCM port) } static SCM -scm_read_commented_expression (scm_t_wchar chr, SCM port) +scm_read_commented_expression (scm_t_wchar chr, SCM port, + scm_t_read_opts *opts) { scm_t_wchar c; - c = flush_ws (port, (char *) NULL); + c = flush_ws (port, opts, (char *) NULL); if (EOF == c) scm_i_input_error ("read_commented_expression", port, "no expression after #; comment", SCM_EOL); scm_ungetc_unlocked (c, port); - scm_read_expression (port); + scm_read_expression (port, opts); return SCM_UNSPECIFIED; } @@ -1274,7 +1550,7 @@ scm_read_extended_symbol (scm_t_wchar chr, SCM port) /* Top-level token readers, i.e., dispatchers. */ static SCM -scm_read_sharp_extension (int chr, SCM port) +scm_read_sharp_extension (int chr, SCM port, scm_t_read_opts *opts) { SCM proc; @@ -1287,7 +1563,8 @@ scm_read_sharp_extension (int chr, SCM port) got = scm_call_2 (proc, SCM_MAKE_CHAR (chr), port); - if (scm_is_pair (got) && !scm_i_has_source_properties (got)) + if (opts->record_positions_p && SCM_NIMP (got) + && !scm_i_has_source_properties (got)) scm_i_set_source_properties_x (got, line, column, SCM_FILENAME (port)); return got; @@ -1299,43 +1576,44 @@ scm_read_sharp_extension (int chr, SCM port) /* The reader for the sharp `#' character. It basically dispatches reads among the above token readers. */ static SCM -scm_read_sharp (scm_t_wchar chr, SCM port, long line, int column) +scm_read_sharp (scm_t_wchar chr, SCM port, scm_t_read_opts *opts, + long line, int column) #define FUNC_NAME "scm_lreadr" { SCM result; chr = scm_getc_unlocked (port); - result = scm_read_sharp_extension (chr, port); + result = scm_read_sharp_extension (chr, port, opts); if (!scm_is_eq (result, SCM_UNSPECIFIED)) return result; switch (chr) { case '\\': - return (scm_read_character (chr, port)); + return (scm_read_character (chr, port, opts)); case '(': - return (scm_read_vector (chr, port, line, column)); + return (scm_read_vector (chr, port, opts, line, column)); case 's': case 'u': case 'f': case 'c': /* This one may return either a boolean or an SRFI-4 vector. */ - return (scm_read_srfi4_vector (chr, port, line, column)); + return (scm_read_srfi4_vector (chr, port, opts, line, column)); case 'v': - return (scm_read_bytevector (chr, port, line, column)); + return (scm_read_bytevector (chr, port, opts, line, column)); case '*': - return (scm_read_guile_bit_vector (chr, port, line, column)); + return (scm_read_guile_bit_vector (chr, port, opts, line, column)); case 't': case 'T': case 'F': return (scm_read_boolean (chr, port)); case ':': - return (scm_read_keyword (chr, port)); + return (scm_read_keyword (chr, port, opts)); case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case '@': - return (scm_read_array (chr, port, line, column)); + return (scm_read_array (chr, port, opts, line, column)); case 'i': case 'e': @@ -1349,21 +1627,21 @@ scm_read_sharp (scm_t_wchar chr, SCM port, long line, int column) case 'X': case 'I': case 'E': - return (scm_read_number_and_radix (chr, port)); + return (scm_read_number_and_radix (chr, port, opts)); case '{': return (scm_read_extended_symbol (chr, port)); case '!': - return (scm_read_shebang (chr, port)); + return (scm_read_shebang (chr, port, opts)); case ';': - return (scm_read_commented_expression (chr, port)); + return (scm_read_commented_expression (chr, port, opts)); case '`': case '\'': case ',': - return (scm_read_syntax (chr, port)); + return (scm_read_syntax (chr, port, opts)); case 'n': - return (scm_read_nil (chr, port)); + return (scm_read_nil (chr, port, opts)); default: - result = scm_read_sharp_extension (chr, port); + result = scm_read_sharp_extension (chr, port, opts); if (scm_is_eq (result, SCM_UNSPECIFIED)) { /* To remain compatible with 1.8 and earlier, the following @@ -1387,8 +1665,8 @@ scm_read_sharp (scm_t_wchar chr, SCM port, long line, int column) #undef FUNC_NAME static SCM -scm_read_expression (SCM port) -#define FUNC_NAME "scm_read_expression" +read_inner_expression (SCM port, scm_t_read_opts *opts) +#define FUNC_NAME "read_inner_expression" { while (1) { @@ -1404,23 +1682,55 @@ scm_read_expression (SCM port) case ';': (void) scm_read_semicolon_comment (chr, port); break; + case '{': + if (opts->curly_infix_p) + { + if (opts->neoteric_p) + return scm_read_sexp (chr, port, opts); + else + { + SCM expr; + + /* Enable neoteric expressions within curly braces */ + opts->neoteric_p = 1; + expr = scm_read_sexp (chr, port, opts); + opts->neoteric_p = 0; + return expr; + } + } + else + return scm_read_mixed_case_symbol (chr, port, opts); case '[': - if (!SCM_SQUARE_BRACKETS_P) - return (scm_read_mixed_case_symbol (chr, port)); - /* otherwise fall through */ + if (opts->square_brackets_p) + return scm_read_sexp (chr, port, opts); + else if (opts->curly_infix_p) + { + /* The syntax of neoteric expressions requires that '[' be + a delimiter when curly-infix is enabled, so it cannot + be part of an unescaped symbol. We might as well do + something useful with it, so we adopt Kawa's convention: + [...] => ($bracket-list$ ...) */ + long line = SCM_LINUM (port); + int column = SCM_COL (port) - 1; + return maybe_annotate_source + (scm_cons (sym_bracket_list, scm_read_sexp (chr, port, opts)), + port, opts, line, column); + } + else + return scm_read_mixed_case_symbol (chr, port, opts); case '(': - return (scm_read_sexp (chr, port)); + return (scm_read_sexp (chr, port, opts)); case '"': - return (scm_read_string (chr, port)); + return (scm_read_string (chr, port, opts)); case '\'': case '`': case ',': - return (scm_read_quote (chr, port)); + return (scm_read_quote (chr, port, opts)); case '#': { long line = SCM_LINUM (port); int column = SCM_COL (port) - 1; - SCM result = scm_read_sharp (chr, port, line, column); + SCM result = scm_read_sharp (chr, port, opts, line, column); if (scm_is_eq (result, SCM_UNSPECIFIED)) /* We read a comment or some such. */ break; @@ -1430,33 +1740,108 @@ scm_read_expression (SCM port) case ')': scm_i_input_error (FUNC_NAME, port, "unexpected \")\"", SCM_EOL); break; + case '}': + if (opts->curly_infix_p) + scm_i_input_error (FUNC_NAME, port, "unexpected \"}\"", SCM_EOL); + else + return scm_read_mixed_case_symbol (chr, port, opts); case ']': - if (SCM_SQUARE_BRACKETS_P) + if (opts->square_brackets_p) scm_i_input_error (FUNC_NAME, port, "unexpected \"]\"", SCM_EOL); /* otherwise fall through */ case EOF: return SCM_EOF_VAL; case ':': - if (scm_is_eq (SCM_PACK (SCM_KEYWORD_STYLE), scm_keyword_prefix)) - return scm_symbol_to_keyword (scm_read_expression (port)); + if (opts->keyword_style == KEYWORD_STYLE_PREFIX) + return scm_symbol_to_keyword (scm_read_expression (port, opts)); /* Fall through. */ default: { if (((chr >= '0') && (chr <= '9')) || (strchr ("+-.", chr))) - return (scm_read_number (chr, port)); + return (scm_read_number (chr, port, opts)); else - return (scm_read_mixed_case_symbol (chr, port)); + return (scm_read_mixed_case_symbol (chr, port, opts)); } } } } #undef FUNC_NAME +static SCM +scm_read_expression (SCM port, scm_t_read_opts *opts) +#define FUNC_NAME "scm_read_expression" +{ + if (!opts->neoteric_p) + return read_inner_expression (port, opts); + else + { + long line = 0; + int column = 0; + SCM expr; + + if (opts->record_positions_p) + { + /* We need to get the position of the first non-whitespace + character in order to correctly annotate neoteric + expressions. For example, for the expression 'f(x)', the + first call to 'read_inner_expression' reads the 'f' (which + cannot be annotated), and then we later read the '(x)' and + use it to construct the new list (f x). */ + int c = flush_ws (port, opts, (char *) NULL); + if (c == EOF) + return SCM_EOF_VAL; + scm_ungetc_unlocked (c, port); + line = SCM_LINUM (port); + column = SCM_COL (port); + } + + expr = read_inner_expression (port, opts); + + /* 'expr' is the first component of the neoteric expression. Now + we loop, and as long as the next character is '(', '[', or '{', + (without any intervening whitespace), we use it to construct a + new expression. For example, f{n - 1}(x) => ((f (- n 1)) x). */ + for (;;) + { + int chr = scm_getc_unlocked (port); + + if (chr == '(') + /* e(...) => (e ...) */ + expr = scm_cons (expr, scm_read_sexp (chr, port, opts)); + else if (chr == '[') + /* e[...] => ($bracket-apply$ e ...) */ + expr = scm_cons (sym_bracket_apply, + scm_cons (expr, + scm_read_sexp (chr, port, opts))); + else if (chr == '{') + { + SCM arg = scm_read_sexp (chr, port, opts); + + if (scm_is_null (arg)) + expr = scm_list_1 (expr); /* e{} => (e) */ + else + expr = scm_list_2 (expr, arg); /* e{...} => (e {...}) */ + } + else + { + if (chr != EOF) + scm_ungetc_unlocked (chr, port); + break; + } + maybe_annotate_source (expr, port, opts, line, column); + } + return expr; + } +} +#undef FUNC_NAME + /* Actual reader. */ +static void init_read_options (SCM port, scm_t_read_opts *opts); + SCM_DEFINE (scm_read, "read", 0, 1, 0, (SCM port), "Read an s-expression from the input port @var{port}, or from\n" @@ -1464,18 +1849,21 @@ SCM_DEFINE (scm_read, "read", 0, 1, 0, "Any whitespace before the next token is discarded.") #define FUNC_NAME s_scm_read { + scm_t_read_opts opts; int c; if (SCM_UNBNDP (port)) port = scm_current_input_port (); SCM_VALIDATE_OPINPORT (1, port); - c = flush_ws (port, (char *) NULL); + init_read_options (port, &opts); + + c = flush_ws (port, &opts, (char *) NULL); if (EOF == c) return SCM_EOF_VAL; scm_ungetc_unlocked (c, port); - return (scm_read_expression (port)); + return (scm_read_expression (port, &opts)); } #undef FUNC_NAME @@ -1732,6 +2120,143 @@ SCM_DEFINE (scm_file_encoding, "file-encoding", 1, 0, 0, } #undef FUNC_NAME + +/* Per-port read options. + + We store per-port read options in the 'port-read-options' key of the + port's alist. The value stored in the alist is a single integer that + contains a two-bit field for each read option. + + If a bit field contains READ_OPTION_INHERIT (3), that indicates that + the applicable value should be inherited from the corresponding + global read option. Otherwise, the bit field contains the value of + the read option. For boolean read options that have been set + per-port, the possible values are 0 or 1. If the 'keyword_style' + read option has been set per-port, its possible values are those in + 'enum t_keyword_style'. */ + +/* Key to read options in per-port alists. */ +SCM_SYMBOL (sym_port_read_options, "port-read-options"); + +/* Offsets of bit fields for each per-port override */ +#define READ_OPTION_COPY_SOURCE_P 0 +#define READ_OPTION_RECORD_POSITIONS_P 2 +#define READ_OPTION_CASE_INSENSITIVE_P 4 +#define READ_OPTION_KEYWORD_STYLE 6 +#define READ_OPTION_R6RS_ESCAPES_P 8 +#define READ_OPTION_SQUARE_BRACKETS_P 10 +#define READ_OPTION_HUNGRY_EOL_ESCAPES_P 12 +#define READ_OPTION_CURLY_INFIX_P 14 + +/* The total width in bits of the per-port overrides */ +#define READ_OPTIONS_NUM_BITS 16 + +#define READ_OPTIONS_INHERIT_ALL ((1UL << READ_OPTIONS_NUM_BITS) - 1) +#define READ_OPTIONS_MAX_VALUE READ_OPTIONS_INHERIT_ALL + +#define READ_OPTION_MASK 3 +#define READ_OPTION_INHERIT 3 + +static void +set_port_read_option (SCM port, int option, int new_value) +{ + SCM scm_read_options; + unsigned int read_options; + + new_value &= READ_OPTION_MASK; + scm_read_options = scm_assq_ref (SCM_PTAB_ENTRY(port)->alist, + sym_port_read_options); + if (scm_is_unsigned_integer (scm_read_options, 0, READ_OPTIONS_MAX_VALUE)) + read_options = scm_to_uint (scm_read_options); + else + read_options = READ_OPTIONS_INHERIT_ALL; + read_options &= ~(READ_OPTION_MASK << option); + read_options |= new_value << option; + scm_read_options = scm_from_uint (read_options); + SCM_PTAB_ENTRY(port)->alist = scm_assq_set_x (SCM_PTAB_ENTRY(port)->alist, + sym_port_read_options, + scm_read_options); +} + +/* Set OPTS and PORT's case-insensitivity according to VALUE. */ +static void +set_port_case_insensitive_p (SCM port, scm_t_read_opts *opts, int value) +{ + value = !!value; + opts->case_insensitive_p = value; + set_port_read_option (port, READ_OPTION_CASE_INSENSITIVE_P, value); +} + +/* Set OPTS and PORT's square_brackets_p option according to VALUE. */ +static void +set_port_square_brackets_p (SCM port, scm_t_read_opts *opts, int value) +{ + value = !!value; + opts->square_brackets_p = value; + set_port_read_option (port, READ_OPTION_SQUARE_BRACKETS_P, value); +} + +/* Set OPTS and PORT's curly_infix_p option according to VALUE. */ +static void +set_port_curly_infix_p (SCM port, scm_t_read_opts *opts, int value) +{ + value = !!value; + opts->curly_infix_p = value; + set_port_read_option (port, READ_OPTION_CURLY_INFIX_P, value); +} + +/* Initialize OPTS based on PORT's read options and the global read + options. */ +static void +init_read_options (SCM port, scm_t_read_opts *opts) +{ + SCM val, scm_read_options; + unsigned int read_options, x; + + scm_read_options = scm_assq_ref (SCM_PTAB_ENTRY(port)->alist, + sym_port_read_options); + + if (scm_is_unsigned_integer (scm_read_options, 0, READ_OPTIONS_MAX_VALUE)) + read_options = scm_to_uint (scm_read_options); + else + read_options = READ_OPTIONS_INHERIT_ALL; + + x = READ_OPTION_MASK & (read_options >> READ_OPTION_KEYWORD_STYLE); + if (x == READ_OPTION_INHERIT) + { + val = SCM_PACK (SCM_KEYWORD_STYLE); + if (scm_is_eq (val, scm_keyword_prefix)) + x = KEYWORD_STYLE_PREFIX; + else if (scm_is_eq (val, scm_keyword_postfix)) + x = KEYWORD_STYLE_POSTFIX; + else + x = KEYWORD_STYLE_HASH_PREFIX; + } + opts->keyword_style = x; + +#define RESOLVE_BOOLEAN_OPTION(NAME, name) \ + do \ + { \ + x = READ_OPTION_MASK & (read_options >> READ_OPTION_ ## NAME); \ + if (x == READ_OPTION_INHERIT) \ + x = !!SCM_ ## NAME; \ + opts->name = x; \ + } \ + while (0) + + RESOLVE_BOOLEAN_OPTION (COPY_SOURCE_P, copy_source_p); + RESOLVE_BOOLEAN_OPTION (RECORD_POSITIONS_P, record_positions_p); + RESOLVE_BOOLEAN_OPTION (CASE_INSENSITIVE_P, case_insensitive_p); + RESOLVE_BOOLEAN_OPTION (R6RS_ESCAPES_P, r6rs_escapes_p); + RESOLVE_BOOLEAN_OPTION (SQUARE_BRACKETS_P, square_brackets_p); + RESOLVE_BOOLEAN_OPTION (HUNGRY_EOL_ESCAPES_P, hungry_eol_escapes_p); + RESOLVE_BOOLEAN_OPTION (CURLY_INFIX_P, curly_infix_p); + +#undef RESOLVE_BOOLEAN_OPTION + + opts->neoteric_p = 0; +} + void scm_init_read () { diff --git a/libguile/read.h b/libguile/read.h index 4bd08fa44..3c47afdd0 100644 --- a/libguile/read.h +++ b/libguile/read.h @@ -54,7 +54,6 @@ SCM_API SCM scm_sym_dot; SCM_API SCM scm_read_options (SCM setting); SCM_API SCM scm_read (SCM port); -SCM_API size_t scm_read_token (int ic, SCM * tok_buf, SCM port, int weird); SCM_API SCM scm_read_hash_extend (SCM chr, SCM proc); SCM_INTERNAL char *scm_i_scan_for_encoding (SCM port); SCM_API SCM scm_file_encoding (SCM port); diff --git a/libguile/srfi-13.c b/libguile/srfi-13.c index 28345532e..97c5a1d64 100644 --- a/libguile/srfi-13.c +++ b/libguile/srfi-13.c @@ -2993,11 +2993,22 @@ SCM_DEFINE (scm_string_tokenize, "string-tokenize", 1, 3, 0, #undef FUNC_NAME SCM_DEFINE (scm_string_split, "string-split", 2, 0, 0, - (SCM str, SCM chr), + (SCM str, SCM char_pred), "Split the string @var{str} into a list of the substrings delimited\n" - "by appearances of the character @var{chr}. Note that an empty substring\n" - "between separator characters will result in an empty string in the\n" - "result list.\n" + "by appearances of characters that\n" + "\n" + "@itemize @bullet\n" + "@item\n" + "equal @var{char_pred}, if it is a character,\n" + "\n" + "@item\n" + "satisfy the predicate @var{char_pred}, if it is a procedure,\n" + "\n" + "@item\n" + "are in the set @var{char_pred}, if it is a character set.\n" + "@end itemize\n\n" + "Note that an empty substring between separator characters\n" + "will result in an empty string in the result list.\n" "\n" "@lisp\n" "(string-split \"root:x:0:0:root:/root:/bin/bash\" #\\:)\n" @@ -3014,47 +3025,73 @@ SCM_DEFINE (scm_string_split, "string-split", 2, 0, 0, "@end lisp") #define FUNC_NAME s_scm_string_split { - long idx, last_idx; - int narrow; SCM res = SCM_EOL; SCM_VALIDATE_STRING (1, str); - SCM_VALIDATE_CHAR (2, chr); - /* This is explicit wide/narrow logic (instead of using - scm_i_string_ref) is a speed optimization. */ - idx = scm_i_string_length (str); - narrow = scm_i_is_narrow_string (str); - if (narrow) + if (SCM_CHARP (char_pred)) { - const char *buf = scm_i_string_chars (str); - while (idx >= 0) + long idx, last_idx; + int narrow; + + /* This is explicit wide/narrow logic (instead of using + scm_i_string_ref) is a speed optimization. */ + idx = scm_i_string_length (str); + narrow = scm_i_is_narrow_string (str); + if (narrow) + { + const char *buf = scm_i_string_chars (str); + while (idx >= 0) + { + last_idx = idx; + while (idx > 0 && buf[idx-1] != (char) SCM_CHAR(char_pred)) + idx--; + if (idx >= 0) + { + res = scm_cons (scm_i_substring (str, idx, last_idx), res); + idx--; + } + } + } + else { - last_idx = idx; - while (idx > 0 && buf[idx-1] != (char) SCM_CHAR(chr)) - idx--; - if (idx >= 0) + const scm_t_wchar *buf = scm_i_string_wide_chars (str); + while (idx >= 0) { - res = scm_cons (scm_i_substring (str, idx, last_idx), res); - idx--; + last_idx = idx; + while (idx > 0 && buf[idx-1] != SCM_CHAR(char_pred)) + idx--; + if (idx >= 0) + { + res = scm_cons (scm_i_substring (str, idx, last_idx), res); + idx--; + } } } } else { - const scm_t_wchar *buf = scm_i_string_wide_chars (str); - while (idx >= 0) + SCM sidx, slast_idx; + + if (!SCM_CHARSETP (char_pred)) + SCM_ASSERT (scm_is_true (scm_procedure_p (char_pred)), + char_pred, SCM_ARG2, FUNC_NAME); + + /* Supporting predicates and character sets involves handling SCM + values so there is less chance to optimize. */ + slast_idx = scm_string_length (str); + for (;;) { - last_idx = idx; - while (idx > 0 && buf[idx-1] != SCM_CHAR(chr)) - idx--; - if (idx >= 0) - { - res = scm_cons (scm_i_substring (str, idx, last_idx), res); - idx--; - } + sidx = scm_string_index_right (str, char_pred, SCM_INUM0, slast_idx); + if (scm_is_false (sidx)) + break; + res = scm_cons (scm_substring (str, scm_oneplus (sidx), slast_idx), res); + slast_idx = sidx; } + + res = scm_cons (scm_substring (str, SCM_INUM0, slast_idx), res); } + scm_remember_upto_here_1 (str); return res; } diff --git a/libguile/srfi-13.h b/libguile/srfi-13.h index f63239a25..325e22272 100644 --- a/libguile/srfi-13.h +++ b/libguile/srfi-13.h @@ -110,7 +110,7 @@ SCM_API SCM scm_xsubstring (SCM s, SCM from, SCM to, SCM start, SCM end); SCM_API SCM scm_string_xcopy_x (SCM target, SCM tstart, SCM s, SCM sfrom, SCM sto, SCM start, SCM end); SCM_API SCM scm_string_replace (SCM s1, SCM s2, SCM start1, SCM end1, SCM start2, SCM end2); SCM_API SCM scm_string_tokenize (SCM s, SCM token_char, SCM start, SCM end); -SCM_API SCM scm_string_split (SCM s, SCM chr); +SCM_API SCM scm_string_split (SCM s, SCM char_pred); SCM_API SCM scm_string_filter (SCM char_pred, SCM s, SCM start, SCM end); SCM_API SCM scm_string_delete (SCM char_pred, SCM s, SCM start, SCM end); diff --git a/libguile/strings.c b/libguile/strings.c index 7c5550fb3..5130cb362 100644 --- a/libguile/strings.c +++ b/libguile/strings.c @@ -1997,7 +1997,10 @@ u32_u8_length_in_bytes (const scm_t_uint32 *str, size_t len) char * scm_to_utf8_stringn (SCM str, size_t *lenp) +#define FUNC_NAME "scm_to_utf8_stringn" { + SCM_VALIDATE_STRING (1, str); + if (scm_i_is_narrow_string (str)) return (char *) latin1_to_u8 ((scm_t_uint8 *) scm_i_string_chars (str), scm_i_string_length (str), @@ -2044,6 +2047,7 @@ scm_to_utf8_stringn (SCM str, size_t *lenp) } } } +#undef FUNC_NAME scm_t_wchar * scm_to_utf32_string (SCM str) diff --git a/meta/Makefile.am b/meta/Makefile.am index acf885490..c9d6a3fd7 100644 --- a/meta/Makefile.am +++ b/meta/Makefile.am @@ -28,7 +28,7 @@ EXTRA_DIST= \ guild.in guile-config.in # What we now call `guild' used to be known as `guile-tools'. -install-data-hook: +install-exec-hook: guild="`echo $(ECHO_N) guild \ | $(SED) -e '$(program_transform_name)'`$(EXEEXT)" ; \ guile_tools="`echo $(ECHO_N) guile-tools \ diff --git a/module/ice-9/boot-9.scm b/module/ice-9/boot-9.scm index 14dfb605e..1285c8364 100644 --- a/module/ice-9/boot-9.scm +++ b/module/ice-9/boot-9.scm @@ -3137,8 +3137,11 @@ module '(ice-9 q) '(make-q q-length))}." (lambda (option) (apply (lambda (name value documentation) (display name) - (if (< (string-length (symbol->string name)) 8) - (display #\tab)) + (let ((len (string-length (symbol->string name)))) + (when (< len 16) + (display #\tab) + (when (< len 8) + (display #\tab)))) (display #\tab) (display value) (display #\tab) @@ -3509,7 +3512,9 @@ module '(ice-9 q) '(make-q q-length))}." (define-syntax define-public (syntax-rules () ((_ (name . args) . body) - (define-public name (lambda args . body))) + (begin + (define name (lambda args . body)) + (export name))) ((_ name val) (begin (define name val) @@ -3899,7 +3904,7 @@ module '(ice-9 q) '(make-q q-length))}." ;;; ;;; Currently, the following feature identifiers are supported: ;;; -;;; guile r5rs srfi-0 srfi-4 srfi-6 srfi-13 srfi-14 srfi-55 srfi-61 +;;; guile r5rs srfi-0 srfi-4 srfi-6 srfi-13 srfi-14 srfi-55 srfi-61 srfi-105 ;;; ;;; Remember to update the features list when adding more SRFIs. ;;; @@ -3919,6 +3924,7 @@ module '(ice-9 q) '(make-q q-length))}." srfi-39 ;; parameterize srfi-55 ;; require-extension srfi-61 ;; general cond clause + srfi-105 ;; curly infix expressions )) ;; This table maps module public interfaces to the list of features. diff --git a/module/ice-9/command-line.scm b/module/ice-9/command-line.scm index 62a2c9e4f..d60a6e36a 100644 --- a/module/ice-9/command-line.scm +++ b/module/ice-9/command-line.scm @@ -1,6 +1,6 @@ ;;; Parsing Guile's command-line -;;; Copyright (C) 1994-1998, 2000-2011 Free Software Foundation, Inc. +;;; Copyright (C) 1994-1998, 2000-2011, 2012 Free Software Foundation, Inc. ;;;; This library is free software; you can redistribute it and/or ;;;; modify it under the terms of the GNU Lesser General Public @@ -325,7 +325,7 @@ If FILE begins with `-' the -s switch is mandatory. ((string=? arg "--listen") ; start a repl server (parse args - (cons '(@@ (system repl server) (spawn-server)) out))) + (cons '((@@ (system repl server) spawn-server)) out))) ((string-prefix? "--listen=" arg) ; start a repl server (parse @@ -336,14 +336,12 @@ If FILE begins with `-' the -s switch is mandatory. ((string->number where) ; --listen=PORT => (lambda (port) (if (and (integer? port) (exact? port) (>= port 0)) - `(@@ (system repl server) - (spawn-server - (make-tcp-server-socket #:port ,port))) + `((@@ (system repl server) spawn-server) + ((@@ (system repl server) make-tcp-server-socket) #:port ,port)) (error "invalid port for --listen")))) ((string-prefix? "/" where) ; --listen=/PATH/TO/SOCKET - `(@@ (system repl server) - (spawn-server - (make-unix-domain-server-socket #:path ,where)))) + `((@@ (system repl server) spawn-server) + ((@@ (system repl server) make-unix-domain-server-socket) #:path ,where))) (else (error "unknown argument to --listen")))) out))) diff --git a/module/ice-9/curried-definitions.scm b/module/ice-9/curried-definitions.scm index d55f1fb6a..8c684a18c 100644 --- a/module/ice-9/curried-definitions.scm +++ b/module/ice-9/curried-definitions.scm @@ -16,7 +16,8 @@ (define-module (ice-9 curried-definitions) #:replace ((cdefine . define) - (cdefine* . define*))) + (cdefine* . define*) + define-public)) (define-syntax cdefine (syntax-rules () @@ -39,3 +40,14 @@ (lambda* rest body body* ...))) ((_ . rest) (define* . rest)))) + +(define-syntax define-public + (syntax-rules () + ((_ (name . args) . body) + (begin + (cdefine (name . args) . body) + (export name))) + ((_ name val) + (begin + (define name val) + (export name))))) diff --git a/module/ice-9/format.scm b/module/ice-9/format.scm index d038ace5a..eed8cbb0e 100644 --- a/module/ice-9/format.scm +++ b/module/ice-9/format.scm @@ -427,15 +427,15 @@ (case modifier ((at) (format:out-str - (with-output-to-string - (lambda () - (truncated-print (next-arg) + (call-with-output-string + (lambda (p) + (truncated-print (next-arg) p #:width width))))) ((colon-at) (format:out-str - (with-output-to-string - (lambda () - (truncated-print (next-arg) + (call-with-output-string + (lambda (p) + (truncated-print (next-arg) p #:width (max (- width output-col) @@ -779,7 +779,7 @@ (define (format:obj->str obj slashify) (let ((res (if slashify (object->string obj) - (with-output-to-string (lambda () (display obj)))))) + (call-with-output-string (lambda (p) (display obj p)))))) (if (and format:read-proof (string-prefix? "#<" res)) (object->string res) res))) diff --git a/module/ice-9/regex.scm b/module/ice-9/regex.scm index f7b94b72a..08ae2c2f5 100644 --- a/module/ice-9/regex.scm +++ b/module/ice-9/regex.scm @@ -172,8 +172,9 @@ (let loop ((start 0) (value init) (abuts #f)) ; True if start abuts a previous match. + (define bol (if (zero? start) 0 regexp/notbol)) (let ((m (if (> start (string-length string)) #f - (regexp-exec regexp string start flags)))) + (regexp-exec regexp string start (logior flags bol))))) (cond ((not m) value) ((and (= (match:start m) (match:end m)) abuts) diff --git a/module/language/tree-il/primitives.scm b/module/language/tree-il/primitives.scm index 18126863c..e3f6a9071 100644 --- a/module/language/tree-il/primitives.scm +++ b/module/language/tree-il/primitives.scm @@ -516,6 +516,27 @@ (define-primitive-expander f64vector-set! (vec i x) (bytevector-ieee-double-native-set! vec (* i 8) x)) +;; Appropriate for use with either 'eqv?' or 'equal?'. +(define maybe-simplify-to-eq + (case-lambda + ((src a b) + ;; Simplify cases where either A or B is constant. + (define (maybe-simplify a b) + (and (const? a) + (let ((v (const-exp a))) + (and (or (memq v '(#f #t () #nil)) + (symbol? v) + (and (integer? v) + (exact? v) + (<= v most-positive-fixnum) + (>= v most-negative-fixnum))) + (make-primcall src 'eq? (list a b)))))) + (or (maybe-simplify a b) (maybe-simplify b a))) + (else #f))) + +(hashq-set! *primitive-expand-table* 'eqv? maybe-simplify-to-eq) +(hashq-set! *primitive-expand-table* 'equal? maybe-simplify-to-eq) + (hashq-set! *primitive-expand-table* '@dynamic-wind (case-lambda diff --git a/module/srfi/srfi-19.scm b/module/srfi/srfi-19.scm index d8f764335..c0a27b1a2 100644 --- a/module/srfi/srfi-19.scm +++ b/module/srfi/srfi-19.scm @@ -1113,13 +1113,13 @@ (cons #\1 (lambda (date pad-with port) (display (date->string date "~Y-~m-~d") port))) (cons #\2 (lambda (date pad-with port) - (display (date->string date "~k:~M:~S~z") port))) + (display (date->string date "~H:~M:~S~z") port))) (cons #\3 (lambda (date pad-with port) - (display (date->string date "~k:~M:~S") port))) + (display (date->string date "~H:~M:~S") port))) (cons #\4 (lambda (date pad-with port) - (display (date->string date "~Y-~m-~dT~k:~M:~S~z") port))) + (display (date->string date "~Y-~m-~dT~H:~M:~S~z") port))) (cons #\5 (lambda (date pad-with port) - (display (date->string date "~Y-~m-~dT~k:~M:~S") port))))) + (display (date->string date "~Y-~m-~dT~H:~M:~S") port))))) (define (get-formatter char) diff --git a/module/srfi/srfi-31.scm b/module/srfi/srfi-31.scm index 4238dc269..cf67e8af5 100644 --- a/module/srfi/srfi-31.scm +++ b/module/srfi/srfi-31.scm @@ -1,6 +1,6 @@ ;;; srfi-31.scm --- special form for recursive evaluation -;; Copyright (C) 2004, 2006 Free Software Foundation, Inc. +;; Copyright (C) 2004, 2006, 2012 Free Software Foundation, Inc. ;; ;; This library is free software; you can redistribute it and/or ;; modify it under the terms of the GNU Lesser General Public @@ -19,17 +19,15 @@ ;;; Original author: Rob Browning <rlb@defaultvalue.org> (define-module (srfi srfi-31) - :export-syntax (rec)) + #:export (rec)) -(define-macro (rec arg-form . body) - (cond - ((and (symbol? arg-form) (= 1 (length body))) - ;; (rec S (cons 1 (delay S))) - `(letrec ((,arg-form ,(car body))) - ,arg-form)) - ;; (rec (f x) (+ x 1)) - ((list? arg-form) - `(letrec ((,(car arg-form) (lambda ,(cdr arg-form) ,@body))) - ,(car arg-form))) - (else - (error "syntax error in rec form" `(rec ,arg-form ,@body))))) +(define-syntax rec + (syntax-rules () + "Return the given object, defined in a lexical environment where +NAME is bound to itself." + ((_ (name . formals) body ...) ; procedure + (letrec ((name (lambda formals body ...))) + name)) + ((_ name expr) ; arbitrary object + (letrec ((name expr)) + name)))) diff --git a/module/system/base/compile.scm b/module/system/base/compile.scm index 0bc11a30f..afcb55a72 100644 --- a/module/system/base/compile.scm +++ b/module/system/base/compile.scm @@ -1,6 +1,6 @@ ;;; High-level compiler interface -;; Copyright (C) 2001, 2009, 2010, 2011 Free Software Foundation, Inc. +;; Copyright (C) 2001, 2009, 2010, 2011, 2012 Free Software Foundation, Inc. ;;; This library is free software; you can redistribute it and/or ;;; modify it under the terms of the GNU Lesser General Public @@ -72,7 +72,7 @@ ;; before the check, so that we avoid races (possibly due to parallel ;; compilation). ;; -(define (ensure-writable-dir dir) +(define (ensure-directory dir) (catch 'system-error (lambda () (mkdir dir)) @@ -80,13 +80,12 @@ (let ((errno (and (pair? rest) (car rest)))) (cond ((eqv? errno EEXIST) - (let ((st (stat dir))) - (if (or (not (eq? (stat:type st) 'directory)) - (not (access? dir W_OK))) - (error "directory not writable" dir)))) + ;; Assume it's a writable directory, to avoid TOCTOU errors, + ;; as well as UID/EUID mismatches that occur with access(2). + #t) ((eqv? errno ENOENT) - (ensure-writable-dir (dirname dir)) - (ensure-writable-dir dir)) + (ensure-directory (dirname dir)) + (ensure-directory dir)) (else (throw k subr fmt args rest))))))) @@ -125,7 +124,7 @@ %compile-fallback-path (canonical->suffix (canonicalize-path file)) (compiled-extension)))) - (and (false-if-exception (ensure-writable-dir (dirname f))) + (and (false-if-exception (ensure-directory (dirname f))) f)))) (define* (compile-file file #:key @@ -144,7 +143,7 @@ ;; Choose the input encoding deterministically. (set-port-encoding! in (or enc "UTF-8")) - (ensure-writable-dir (dirname comp)) + (ensure-directory (dirname comp)) (call-with-output-file/atomic comp (lambda (port) ((language-printer (ensure-language to)) diff --git a/module/texinfo.scm b/module/texinfo.scm index 2ffd85393..519db485d 100644 --- a/module/texinfo.scm +++ b/module/texinfo.scm @@ -384,8 +384,14 @@ Examples: ;; Like a DTD for texinfo (define (command-spec command) - (or (assq command texi-command-specs) - (parser-error #f "Unknown command" command))) + (let ((spec (assq command texi-command-specs))) + (cond + ((not spec) + (parser-error #f "Unknown command" command)) + ((eq? (cadr spec) 'ALIAS) + (command-spec (cddr spec))) + (else + spec)))) (define (inline-content? content) (case content @@ -647,11 +653,10 @@ Examples: (arguments->attlist port (read-arguments port stop-char) arg-names)) (let* ((spec (command-spec command)) + (command (car spec)) (type (cadr spec)) (arg-names (cddr spec))) (case type - ((ALIAS) - (complete-start-command arg-names port)) ((INLINE-TEXT) (assert-curr-char '(#\{) "Inline element lacks {" port) (values command '() type)) @@ -954,7 +959,9 @@ Examples: (loop port expect-eof? end-para need-break? seed))) ((START) ; Start of an @-command (let* ((head (token-head token)) - (type (cadr (command-spec head))) + (spec (command-spec head)) + (head (car spec)) + (type (cadr spec)) (inline? (inline-content? type)) (seed ((if (and inline? (not need-break?)) identity end-para) seed)) @@ -1045,8 +1052,9 @@ Examples: (lambda (command args content seed) ; fdown '()) (lambda (command args parent-seed seed) ; fup - (let ((seed (reverse-collect-str-drop-ws seed)) - (spec (command-spec command))) + (let* ((seed (reverse-collect-str-drop-ws seed)) + (spec (command-spec command)) + (command (car spec))) (if (eq? (cadr spec) 'INLINE-TEXT-ARGS) (cons (list command (cons '% (parse-inline-text-args #f spec seed))) parent-seed) @@ -1062,8 +1070,10 @@ Examples: (let ((parser (make-dom-parser))) ;; duplicate arguments->attlist to avoid unnecessary splitting (lambda (command port) - (let ((args (cdar (parser '*ENVIRON-ARGS* port '()))) - (arg-names (cddr (command-spec command)))) + (let* ((args (cdar (parser '*ENVIRON-ARGS* port '()))) + (spec (command-spec command)) + (command (car spec)) + (arg-names (cddr spec))) (cond ((not arg-names) (if (null? args) '() diff --git a/module/web/client.scm b/module/web/client.scm index b0356680d..cf7ea5325 100644 --- a/module/web/client.scm +++ b/module/web/client.scm @@ -1,6 +1,6 @@ ;;; Web client -;; Copyright (C) 2011 Free Software Foundation, Inc. +;; Copyright (C) 2011, 2012 Free Software Foundation, Inc. ;; This library is free software; you can redistribute it and/or ;; modify it under the terms of the GNU Lesser General Public @@ -42,19 +42,37 @@ http-get)) (define (open-socket-for-uri uri) - (let* ((ai (car (getaddrinfo (uri-host uri) - (cond - ((uri-port uri) => number->string) - (else (symbol->string (uri-scheme uri))))))) - (s (socket (addrinfo:fam ai) (addrinfo:socktype ai) - (addrinfo:protocol ai)))) - (set-port-encoding! s "ISO-8859-1") - (connect s (addrinfo:addr ai)) - ;; Buffer input and output on this port. - (setvbuf s _IOFBF) - ;; Enlarge the receive buffer. - (setsockopt s SOL_SOCKET SO_RCVBUF (* 12 1024)) - s)) + "Return an open input/output port for a connection to URI." + (define addresses + (let ((port (uri-port uri))) + (getaddrinfo (uri-host uri) + (cond (port => number->string) + (else (symbol->string (uri-scheme uri)))) + (if port + AI_NUMERICSERV + 0)))) + + (let loop ((addresses addresses)) + (let* ((ai (car addresses)) + (s (socket (addrinfo:fam ai) (addrinfo:socktype ai) + (addrinfo:protocol ai)))) + (set-port-encoding! s "ISO-8859-1") + + (catch 'system-error + (lambda () + (connect s (addrinfo:addr ai)) + + ;; Buffer input and output on this port. + (setvbuf s _IOFBF) + ;; Enlarge the receive buffer. + (setsockopt s SOL_SOCKET SO_RCVBUF (* 12 1024)) + s) + (lambda args + ;; Connection failed, so try one of the other addresses. + (close s) + (if (null? addresses) + (apply throw args) + (loop (cdr addresses)))))))) (define (decode-string bv encoding) (if (string-ci=? encoding "utf-8") diff --git a/module/web/uri.scm b/module/web/uri.scm index 109118b12..78614a520 100644 --- a/module/web/uri.scm +++ b/module/web/uri.scm @@ -364,7 +364,9 @@ Percent-encoding first writes out the given character to a bytevector within the given @var{encoding}, then encodes each byte as @code{%@var{HH}}, where @var{HH} is the hexadecimal representation of the byte." - (if (string-index str unescaped-chars) + (define (needs-escaped? ch) + (not (char-set-contains? unescaped-chars ch))) + (if (string-index str needs-escaped?) (call-with-output-string* (lambda (port) (string-for-each @@ -377,6 +379,8 @@ the byte." (if (< i len) (let ((byte (bytevector-u8-ref bv i))) (display #\% port) + (when (< byte 16) + (display #\0 port)) (display (number->string byte 16) port) (lp (1+ i)))))))) str))) diff --git a/test-suite/Makefile.am b/test-suite/Makefile.am index c20a97752..24132595e 100644 --- a/test-suite/Makefile.am +++ b/test-suite/Makefile.am @@ -136,6 +136,7 @@ SCM_TESTS = tests/00-initial-env.test \ tests/srfi-67.test \ tests/srfi-69.test \ tests/srfi-88.test \ + tests/srfi-105.test \ tests/srfi-4.test \ tests/srfi-9.test \ tests/statprof.test \ diff --git a/test-suite/tests/chars.test b/test-suite/tests/chars.test index bdc9bdb41..98854f73a 100644 --- a/test-suite/tests/chars.test +++ b/test-suite/tests/chars.test @@ -45,18 +45,18 @@ (pass-if "char=? #\\A #\\A" (char=? #\A #\A)) - (expect-fail "char=? #\\A #\\a" - (char=? #\A #\a)) + (pass-if "char=? #\\A #\\a" + (not (char=? #\A #\a))) - (expect-fail "char=? #\\A #\\B" - (char=? #\A #\B)) + (pass-if "char=? #\\A #\\B" + (not (char=? #\A #\B))) - (expect-fail "char=? #\\B #\\A" - (char=? #\A #\B)) + (pass-if "char=? #\\B #\\A" + (not (char=? #\A #\B))) ;; char<? - (expect-fail "char<? #\\A #\\A" - (char<? #\A #\A)) + (pass-if "char<? #\\A #\\A" + (not (char<? #\A #\A))) (pass-if "char<? #\\A #\\a" (char<? #\A #\a)) @@ -64,8 +64,8 @@ (pass-if "char<? #\\A #\\B" (char<? #\A #\B)) - (expect-fail "char<? #\\B #\\A" - (char<? #\B #\A)) + (pass-if "char<? #\\B #\\A" + (not (char<? #\B #\A))) ;; char<=? (pass-if "char<=? #\\A #\\A" @@ -77,18 +77,18 @@ (pass-if "char<=? #\\A #\\B" (char<=? #\A #\B)) - (expect-fail "char<=? #\\B #\\A" - (char<=? #\B #\A)) + (pass-if "char<=? #\\B #\\A" + (not (char<=? #\B #\A))) ;; char>? - (expect-fail "char>? #\\A #\\A" - (char>? #\A #\A)) + (pass-if "char>? #\\A #\\A" + (not (char>? #\A #\A))) - (expect-fail "char>? #\\A #\\a" - (char>? #\A #\a)) + (pass-if "char>? #\\A #\\a" + (not (char>? #\A #\a))) - (expect-fail "char>? #\\A #\\B" - (char>? #\A #\B)) + (pass-if "char>? #\\A #\\B" + (not (char>? #\A #\B))) (pass-if "char>? #\\B #\\A" (char>? #\B #\A)) @@ -97,11 +97,11 @@ (pass-if "char>=? #\\A #\\A" (char>=? #\A #\A)) - (expect-fail "char>=? #\\A #\\a" - (char>=? #\A #\a)) + (pass-if "char>=? #\\A #\\a" + (not (char>=? #\A #\a))) - (expect-fail "char>=? #\\A #\\B" - (char>=? #\A #\B)) + (pass-if "char>=? #\\A #\\B" + (not (char>=? #\A #\B))) (pass-if "char>=? #\\B #\\A" (char>=? #\B #\A)) @@ -113,24 +113,24 @@ (pass-if "char-ci=? #\\A #\\a" (char-ci=? #\A #\a)) - (expect-fail "char-ci=? #\\A #\\B" - (char-ci=? #\A #\B)) + (pass-if "char-ci=? #\\A #\\B" + (not (char-ci=? #\A #\B))) - (expect-fail "char-ci=? #\\B #\\A" - (char-ci=? #\A #\B)) + (pass-if "char-ci=? #\\B #\\A" + (not (char-ci=? #\A #\B))) ;; char-ci<? - (expect-fail "char-ci<? #\\A #\\A" - (char-ci<? #\A #\A)) + (pass-if "char-ci<? #\\A #\\A" + (not (char-ci<? #\A #\A))) - (expect-fail "char-ci<? #\\A #\\a" - (char-ci<? #\A #\a)) + (pass-if "char-ci<? #\\A #\\a" + (not (char-ci<? #\A #\a))) (pass-if "char-ci<? #\\A #\\B" (char-ci<? #\A #\B)) - (expect-fail "char-ci<? #\\B #\\A" - (char-ci<? #\B #\A)) + (pass-if "char-ci<? #\\B #\\A" + (not (char-ci<? #\B #\A))) ;; char-ci<=? (pass-if "char-ci<=? #\\A #\\A" @@ -142,18 +142,18 @@ (pass-if "char-ci<=? #\\A #\\B" (char-ci<=? #\A #\B)) - (expect-fail "char-ci<=? #\\B #\\A" - (char-ci<=? #\B #\A)) + (pass-if "char-ci<=? #\\B #\\A" + (not (char-ci<=? #\B #\A))) ;; char-ci>? - (expect-fail "char-ci>? #\\A #\\A" - (char-ci>? #\A #\A)) + (pass-if "char-ci>? #\\A #\\A" + (not (char-ci>? #\A #\A))) - (expect-fail "char-ci>? #\\A #\\a" - (char-ci>? #\A #\a)) + (pass-if "char-ci>? #\\A #\\a" + (not (char-ci>? #\A #\a))) - (expect-fail "char-ci>? #\\A #\\B" - (char-ci>? #\A #\B)) + (pass-if "char-ci>? #\\A #\\B" + (not (char-ci>? #\A #\B))) (pass-if "char-ci>? #\\B #\\A" (char-ci>? #\B #\A)) @@ -165,8 +165,8 @@ (pass-if "char-ci>=? #\\A #\\a" (char-ci>=? #\A #\a)) - (expect-fail "char-ci>=? #\\A #\\B" - (char-ci>=? #\A #\B)) + (pass-if "char-ci>=? #\\A #\\B" + (not (char-ci>=? #\A #\B))) (pass-if "char-ci>=? #\\B #\\A" (char-ci>=? #\B #\A))) diff --git a/test-suite/tests/list.test b/test-suite/tests/list.test index dc06f0795..ff31c8605 100644 --- a/test-suite/tests/list.test +++ b/test-suite/tests/list.test @@ -439,15 +439,15 @@ (with-test-prefix "wrong argument" - (expect-fail-exception "improper list and empty list" + (pass-if-exception "improper list and empty list" exception:wrong-type-arg (append! (cons 1 2) '())) - (expect-fail-exception "improper list and list" + (pass-if-exception "improper list and list" exception:wrong-type-arg (append! (cons 1 2) (list 3 4))) - (expect-fail-exception "list, improper list and list" + (pass-if-exception "list, improper list and list" exception:wrong-type-arg (append! (list 1 2) (cons 3 4) (list 5 6))) diff --git a/test-suite/tests/numbers.test b/test-suite/tests/numbers.test index a6697c9a3..ddbd2097e 100644 --- a/test-suite/tests/numbers.test +++ b/test-suite/tests/numbers.test @@ -4845,7 +4845,7 @@ (test+/- n d)))))) (with-test-prefix "divide by zero" - (for `((0 0.0 +0.0)) ;; denominators + (for `((0 0.0 -0.0)) ;; denominators (lambda (d) (for `((15 ,(* 3/2 big) 18.0 33/7 0 0.0 -0.0 +inf.0 -inf.0 +nan.0)) ;; numerators diff --git a/test-suite/tests/reader.test b/test-suite/tests/reader.test index 60c853cf4..6e02255ad 100644 --- a/test-suite/tests/reader.test +++ b/test-suite/tests/reader.test @@ -401,6 +401,19 @@ (lambda () (read-disable 'hungry-eol-escapes)))))) +(with-test-prefix "per-port-read-options" + (pass-if "case-sensitive" + (equal? '(guile GuiLe gUIle) + (with-read-options '(case-insensitive) + (lambda () + (with-input-from-string "GUIle #!no-fold-case GuiLe gUIle" + (lambda () + (list (read) (read) (read)))))))) + (pass-if "case-insensitive" + (equal? '(GUIle guile guile) + (with-input-from-string "GUIle #!fold-case GuiLe gUIle" + (lambda () + (list (read) (read) (read))))))) (with-test-prefix "#;" (for-each diff --git a/test-suite/tests/regexp.test b/test-suite/tests/regexp.test index c2b65a64c..eba415314 100644 --- a/test-suite/tests/regexp.test +++ b/test-suite/tests/regexp.test @@ -1,8 +1,9 @@ ;;;; regexp.test --- test Guile's regexps -*- coding: utf-8; mode: scheme -*- ;;;; Jim Blandy <jimb@red-bean.com> --- September 1999 ;;;; -;;;; Copyright (C) 1999, 2004, 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc. -;;;; +;;;; Copyright (C) 1999, 2004, 2006, 2007, 2008, 2009, 2010, +;;;; 2012 Free Software Foundation, Inc. +;;;; ;;;; This library is free software; you can redistribute it and/or ;;;; modify it under the terms of the GNU Lesser General Public ;;;; License as published by the Free Software Foundation; either @@ -131,7 +132,14 @@ (lambda (match result) (cons (match:substring match) result)) - (logior regexp/notbol regexp/noteol))))) + (logior regexp/notbol regexp/noteol)))) + + (pass-if "regexp/notbol is set correctly" + (equal? '("foo") + (fold-matches "^foo" "foofoofoofoo" '() + (lambda (match result) + (cons (match:substring match) + result)))))) ;;; @@ -282,4 +290,12 @@ (with-locale "en_US.utf8" ;; bug #31650 (equal? (match:substring (string-match ".*" "calçot") 0) - "calçot")))) + "calçot"))) + + (pass-if "match structures refer to char offsets, non-ASCII pattern" + (with-locale "en_US.utf8" + ;; bug #31650 + (equal? (match:substring (string-match "λ: The Ultimate (.*)" + "λ: The Ultimate GOTO") + 1) + "GOTO")))) diff --git a/test-suite/tests/srfi-105.test b/test-suite/tests/srfi-105.test new file mode 100644 index 000000000..99a084bb3 --- /dev/null +++ b/test-suite/tests/srfi-105.test @@ -0,0 +1,240 @@ +;;;; srfi-105.test --- Test suite for Guile's SRFI-105 reader. -*- scheme -*- +;;;; +;;;; Copyright (C) 2012 Free Software Foundation, Inc. +;;;; +;;;; This library is free software; you can redistribute it and/or +;;;; modify it under the terms of the GNU Lesser General Public +;;;; License as published by the Free Software Foundation; either +;;;; version 3 of the License, or (at your option) any later version. +;;;; +;;;; This library is distributed in the hope that it will be useful, +;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;;; Lesser General Public License for more details. +;;;; +;;;; You should have received a copy of the GNU Lesser General Public +;;;; License along with this library; if not, write to the Free Software +;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +(define-module (test-srfi-105) + #:use-module (test-suite lib) + #:use-module (srfi srfi-1)) + +(define (read-string s) + (with-fluids ((%default-port-encoding #f)) + (with-input-from-string s read))) + +(define (with-read-options opts thunk) + (let ((saved-options (read-options))) + (dynamic-wind + (lambda () + (read-options opts)) + thunk + (lambda () + (read-options saved-options))))) + +;; Verify that curly braces are allowed in identifiers and that neoteric +;; expressions are not recognized by default. +(with-test-prefix "no-curly-infix" + (pass-if (equal? '({f(x) + g[y] + h{z} + [a]}) + `(,(string->symbol "{f") + (x) + g [y] + + ,(string->symbol "h{z}") + + [a] + ,(string->symbol "}"))))) + +#!curly-infix + +(with-test-prefix "curly-infix" + (pass-if (equal? '{n <= 5} '(<= n 5))) + (pass-if (equal? '{x + 1} '(+ x 1))) + (pass-if (equal? '{a + b + c} '(+ a b c))) + (pass-if (equal? '{x ,op y ,op z} '(,op x y z))) + (pass-if (equal? '{x eqv? `a} '(eqv? x `a))) + (pass-if (equal? '{'a eq? b} '(eq? 'a b))) + (pass-if (equal? '{n-1 + n-2} '(+ n-1 n-2))) + (pass-if (equal? '{a * {b + c}} '(* a (+ b c)))) + (pass-if (equal? '{a + {b - c}} '(+ a (- b c)))) + (pass-if (equal? '{{a + b} - c} '(- (+ a b) c))) + (pass-if (equal? '{{a > 0} and {b >= 1}} '(and (> a 0) (>= b 1)))) + (pass-if (equal? '{} '())) + (pass-if (equal? '{5} '5)) + (pass-if (equal? '{- x} '(- x))) + (pass-if (equal? '{length(x) >= 6} '(>= (length x) 6))) + (pass-if (equal? '{f(x) + g(y) + h(z)} '(+ (f x) (g y) (h z)))) + (pass-if (equal? '{(f a b) + (g h)} '(+ (f a b) (g h)))) + (pass-if (equal? '{f(a b) + g(h)} '(+ (f a b) (g h)))) + (pass-if (equal? ''{a + f(b) + x} ''(+ a (f b) x))) + (pass-if (equal? '{(- a) / b} '(/ (- a) b))) + (pass-if (equal? '{-(a) / b} '(/ (- a) b))) + (pass-if (equal? '{cos(q)} '(cos q))) + (pass-if (equal? '{e{}} '(e))) + (pass-if (equal? '{pi{}} '(pi))) + (pass-if (equal? '{'f(x)} '(quote (f x)))) + + (pass-if (equal? '{ (f (g h(x))) } '(f (g (h x))))) + (pass-if (equal? '{#(1 2 f(a) 4)} '#(1 2 (f a) 4))) + (pass-if (equal? '{ (f #;g(x) h(x)) } '(f (h x)))) + (pass-if (equal? '{ (f #; g(x)[y] h(x)) } '(f (h x)))) + (pass-if (equal? '{ (f #; g[x]{y} h(x)) } '(f (h x)))) + + (pass-if (equal? '{ (f #(g h(x))) } '(f #(g (h x))))) + (pass-if (equal? '{ (f '(g h(x))) } '(f '(g (h x))))) + (pass-if (equal? '{ (f `(g h(x))) } '(f `(g (h x))))) + (pass-if (equal? '{ (f #'(g h(x))) } '(f #'(g (h x))))) + (pass-if (equal? '{ (f #2((g) (h(x)))) } '(f #2((g) ((h x)))))) + + (pass-if (equal? '{(map - ns)} '(map - ns))) + (pass-if (equal? '{map(- ns)} '(map - ns))) + (pass-if (equal? '{n * factorial{n - 1}} '(* n (factorial (- n 1))))) + (pass-if (equal? '{2 * sin{- x}} '(* 2 (sin (- x))))) + + (pass-if (equal? '{3 + 4 +} '($nfx$ 3 + 4 +))) + (pass-if (equal? '{3 + 4 + 5 +} '($nfx$ 3 + 4 + 5 +))) + (pass-if (equal? '{a . z} '($nfx$ a . z))) + (pass-if (equal? '{a + b - c} '($nfx$ a + b - c))) + + (pass-if (equal? '{read(. options)} '(read . options))) + + (pass-if (equal? '{a(x)(y)} '((a x) y))) + (pass-if (equal? '{x[a]} '($bracket-apply$ x a))) + (pass-if (equal? '{y[a b]} '($bracket-apply$ y a b))) + + (pass-if (equal? '{f(g(x))} '(f (g x)))) + (pass-if (equal? '{f(g(x) h(x))} '(f (g x) (h x)))) + + + (pass-if (equal? '{} '())) + (pass-if (equal? '{e} 'e)) + (pass-if (equal? '{e1 e2} '(e1 e2))) + + (pass-if (equal? '{a . t} '($nfx$ a . t))) + (pass-if (equal? '{a b . t} '($nfx$ a b . t))) + (pass-if (equal? '{a b c . t} '($nfx$ a b c . t))) + (pass-if (equal? '{a b c d . t} '($nfx$ a b c d . t))) + (pass-if (equal? '{a + b +} '($nfx$ a + b +))) + (pass-if (equal? '{a + b + c +} '($nfx$ a + b + c +))) + (pass-if (equal? '{q + r * s} '($nfx$ q + r * s))) + + ;; The following two tests will become relevant when Guile's reader + ;; supports datum labels, specified in SRFI-38 (External + ;; Representation for Data With Shared Structure). + + ;;(pass-if (equal? '{#1=f(#1#)} '#1=(f #1#))) + ;;(pass-if (equal? '#1={a + . #1#} '($nfx$ . #1=(a + . #1#)))) + + (pass-if (equal? '{e()} '(e))) + (pass-if (equal? '{e{}} '(e))) + (pass-if (equal? '{e(1)} '(e 1))) + (pass-if (equal? '{e{1}} '(e 1))) + (pass-if (equal? '{e(1 2)} '(e 1 2))) + (pass-if (equal? '{e{1 2}} '(e (1 2)))) + (pass-if (equal? '{f{n - 1}} '(f (- n 1)))) + (pass-if (equal? '{f{n - 1}(x)} '((f (- n 1)) x))) + (pass-if (equal? '{f{n - 1}{y - 1}} '((f (- n 1)) (- y 1)))) + (pass-if (equal? '{f{- x}[y]} '($bracket-apply$ (f (- x)) y))) + (pass-if (equal? '{g{- x}} '(g (- x)))) + (pass-if (equal? '{( . e)} 'e)) + + (pass-if (equal? '{e[]} '($bracket-apply$ e))) + (pass-if (equal? '{e[1 2]} '($bracket-apply$ e 1 2))) + (pass-if (equal? '{e[1 . 2]} '($bracket-apply$ e 1 . 2))) + + ;; Verify that source position information is not recorded if not + ;; asked for. + (with-test-prefix "no positions" + (pass-if "simple curly-infix list" + (let ((sexp (with-read-options '(curly-infix) + (lambda () + (read-string " {1 + 2 + 3}"))))) + (and (not (source-property sexp 'line)) + (not (source-property sexp 'column))))) + (pass-if "mixed curly-infix list" + (let ((sexp (with-read-options '(curly-infix) + (lambda () + (read-string " {1 + 2 * 3}"))))) + (and (not (source-property sexp 'line)) + (not (source-property sexp 'column))))) + (pass-if "singleton curly-infix list" + (let ((sexp (with-read-options '(curly-infix) + (lambda () + (read-string " { 1.0 }"))))) + (and (not (source-property sexp 'line)) + (not (source-property sexp 'column))))) + (pass-if "neoteric expression" + (let ((sexp (with-read-options '(curly-infix) + (lambda () + (read-string " { f(x) }"))))) + (and (not (source-property sexp 'line)) + (not (source-property sexp 'column)))))) + + ;; Verify that source position information is properly recorded. + (with-test-prefix "positions" + (pass-if "simple curly-infix list" + (let ((sexp (with-read-options '(curly-infix positions) + (lambda () + (read-string " {1 + 2 + 3}"))))) + (and (equal? (source-property sexp 'line) 0) + (equal? (source-property sexp 'column) 1)))) + (pass-if "mixed curly-infix list" + (let ((sexp (with-read-options '(curly-infix positions) + (lambda () + (read-string " {1 + 2 * 3}"))))) + (and (equal? (source-property sexp 'line) 0) + (equal? (source-property sexp 'column) 1)))) + (pass-if "singleton curly-infix list" + (let ((sexp (with-read-options '(curly-infix positions) + (lambda () + (read-string " { 1.0 }"))))) + (and (equal? (source-property sexp 'line) 0) + (equal? (source-property sexp 'column) 3)))) + (pass-if "neoteric expression" + (let ((sexp (with-read-options '(curly-infix positions) + (lambda () + (read-string " { f(x) }"))))) + (and (equal? (source-property sexp 'line) 0) + (equal? (source-property sexp 'column) 3))))) + + ;; Verify that neoteric expressions are recognized only within curly braces. + (pass-if (equal? '(a(x)(y)) '(a (x) (y)))) + (pass-if (equal? '(x[a]) '(x [a]))) + (pass-if (equal? '(y[a b]) '(y [a b]))) + (pass-if (equal? '(a f{n - 1}) '(a f (- n 1)))) + (pass-if (equal? '(a f{n - 1}(x)) '(a f (- n 1) (x)))) + (pass-if (equal? '(a f{n - 1}[x]) '(a f (- n 1) [x]))) + (pass-if (equal? '(a f{n - 1}{y - 1}) '(a f (- n 1) (- y 1)))) + + ;; Verify that bracket lists are not recognized by default. + (pass-if (equal? '{[]} '())) + (pass-if (equal? '{[a]} '(a))) + (pass-if (equal? '{[a b]} '(a b))) + (pass-if (equal? '{[a . b]} '(a . b))) + (pass-if (equal? '[] '())) + (pass-if (equal? '[a] '(a))) + (pass-if (equal? '[a b] '(a b))) + (pass-if (equal? '[a . b] '(a . b)))) + + +#!curly-infix-and-bracket-lists + +(with-test-prefix "curly-infix-and-bracket-lists" + ;; Verify that these neoteric expressions still work properly + ;; when the 'square-brackets' read option is unset (which is done by + ;; the '#!curly-infix-and-bracket-lists' reader directive above). + (pass-if (equal? '{e[]} '($bracket-apply$ e))) + (pass-if (equal? '{e[1 2]} '($bracket-apply$ e 1 2))) + (pass-if (equal? '{e[1 . 2]} '($bracket-apply$ e 1 . 2))) + + ;; The following expressions are not actually part of SRFI-105, but + ;; they are handled when the 'curly-infix' read option is set and the + ;; 'square-brackets' read option is unset. This is a non-standard + ;; extension of SRFI-105, and follows the convention of GNU Kawa. + (pass-if (equal? '{[]} '($bracket-list$))) + (pass-if (equal? '{[a]} '($bracket-list$ a))) + (pass-if (equal? '{[a b]} '($bracket-list$ a b))) + (pass-if (equal? '{[a . b]} '($bracket-list$ a . b))) + + (pass-if (equal? '[] '($bracket-list$))) + (pass-if (equal? '[a] '($bracket-list$ a))) + (pass-if (equal? '[a b] '($bracket-list$ a b))) + (pass-if (equal? '[a . b] '($bracket-list$ a . b)))) diff --git a/test-suite/tests/srfi-31.test b/test-suite/tests/srfi-31.test index 8537d49b6..62645d918 100644 --- a/test-suite/tests/srfi-31.test +++ b/test-suite/tests/srfi-31.test @@ -1,6 +1,6 @@ ;;;; srfi-31.test --- Test suite for Guile's SRFI-31 functions. -*- scheme -*- ;;;; -;;;; Copyright (C) 2004, 2006, 2010 Free Software Foundation, Inc. +;;;; Copyright (C) 2004, 2006, 2010, 2012 Free Software Foundation, Inc. ;;;; ;;;; This library is free software; you can redistribute it and/or ;;;; modify it under the terms of the GNU Lesser General Public @@ -22,9 +22,10 @@ (with-test-prefix "rec special form" - (pass-if-exception "bogus variable" '(misc-error . ".*") + (pass-if-exception "bogus variable" + exception:syntax-pattern-unmatched (eval '(rec #:foo) (current-module))) - + (pass-if "rec expressions" (let ((ones-list (rec ones (cons 1 (delay ones))))) (and (= 1 (car ones-list)) diff --git a/test-suite/tests/strings.test b/test-suite/tests/strings.test index d892b7077..679e17326 100644 --- a/test-suite/tests/strings.test +++ b/test-suite/tests/strings.test @@ -557,7 +557,67 @@ (pass-if "char 255" (equal? '("a" "b") (string-split (string #\a (integer->char 255) #\b) - (integer->char 255))))) + (integer->char 255)))) + + (pass-if "empty string - char" + (equal? '("") + (string-split "" #\:))) + + (pass-if "non-empty - char - no delimiters" + (equal? '("foobarfrob") + (string-split "foobarfrob" #\:))) + + (pass-if "non-empty - char - delimiters" + (equal? '("foo" "bar" "frob") + (string-split "foo:bar:frob" #\:))) + + (pass-if "non-empty - char - leading delimiters" + (equal? '("" "" "foo" "bar" "frob") + (string-split "::foo:bar:frob" #\:))) + + (pass-if "non-empty - char - trailing delimiters" + (equal? '("foo" "bar" "frob" "" "") + (string-split "foo:bar:frob::" #\:))) + + (pass-if "empty string - charset" + (equal? '("") + (string-split "" (char-set #\:)))) + + (pass-if "non-empty - charset - no delimiters" + (equal? '("foobarfrob") + (string-split "foobarfrob" (char-set #\:)))) + + (pass-if "non-empty - charset - delimiters" + (equal? '("foo" "bar" "frob") + (string-split "foo:bar:frob" (char-set #\:)))) + + (pass-if "non-empty - charset - leading delimiters" + (equal? '("" "" "foo" "bar" "frob") + (string-split "::foo:bar:frob" (char-set #\:)))) + + (pass-if "non-empty - charset - trailing delimiters" + (equal? '("foo" "bar" "frob" "" "") + (string-split "foo:bar:frob::" (char-set #\:)))) + + (pass-if "empty string - pred" + (equal? '("") + (string-split "" (negate char-alphabetic?)))) + + (pass-if "non-empty - pred - no delimiters" + (equal? '("foobarfrob") + (string-split "foobarfrob" (negate char-alphabetic?)))) + + (pass-if "non-empty - pred - delimiters" + (equal? '("foo" "bar" "frob") + (string-split "foo:bar:frob" (negate char-alphabetic?)))) + + (pass-if "non-empty - pred - leading delimiters" + (equal? '("" "" "foo" "bar" "frob") + (string-split "::foo:bar:frob" (negate char-alphabetic?)))) + + (pass-if "non-empty - pred - trailing delimiters" + (equal? '("foo" "bar" "frob" "" "") + (string-split "foo:bar:frob::" (negate char-alphabetic?))))) (with-test-prefix "substring-move!" diff --git a/test-suite/tests/structs.test b/test-suite/tests/structs.test index 431a014b8..0e3b2417e 100644 --- a/test-suite/tests/structs.test +++ b/test-suite/tests/structs.test @@ -126,7 +126,49 @@ (not (or (equal? (make-ball red "Bob") (make-ball green "Bob")) (equal? (make-ball red "Bob") (make-ball red "Bill")))))) + +(with-test-prefix "hash" + + (pass-if "simple structs" + (let* ((v (make-vtable "pr")) + (s1 (make-struct v 0 "hello")) + (s2 (make-struct v 0 "hello"))) + (= (hash s1 7777) (hash s2 7777)))) + + (pass-if "different structs" + (let* ((v (make-vtable "pr")) + (s1 (make-struct v 0 "hello")) + (s2 (make-struct v 0 "world"))) + (or (not (= (hash s1 7777) (hash s2 7777))) + (throw 'unresolved)))) + + (pass-if "different struct types" + (let* ((v1 (make-vtable "pr")) + (v2 (make-vtable "pr")) + (s1 (make-struct v1 0 "hello")) + (s2 (make-struct v2 0 "hello"))) + (or (not (= (hash s1 7777) (hash s2 7777))) + (throw 'unresolved)))) + (pass-if "more complex structs" + (let ((s1 (make-ball red (string-copy "Bob"))) + (s2 (make-ball red (string-copy "Bob")))) + (= (hash s1 7777) (hash s2 7777)))) + + (pass-if "struct with weird fields" + (let* ((v (make-vtable "prurph")) + (s1 (make-struct v 0 "hello" 123 "invisible-secret1")) + (s2 (make-struct v 0 "hello" 123 "invisible-secret2"))) + (= (hash s1 7777) (hash s2 7777)))) + + (pass-if "cyclic structs" + (let* ((v (make-vtable "pw")) + (a (make-struct v 0 #f)) + (b (make-struct v 0 a))) + (struct-set! a 0 b) + (and (hash a 7777) (hash b 7777) #t)))) + + ;; ;; make-struct ;; diff --git a/test-suite/tests/texinfo.test b/test-suite/tests/texinfo.test index 98c44b91b..8a4b593fd 100644 --- a/test-suite/tests/texinfo.test +++ b/test-suite/tests/texinfo.test @@ -208,9 +208,8 @@ (test-body "@code{arg}" '((para (code "arg")))) - ;; FIXME: Why no enclosing para here? Probably a bug. (test-body "@url{arg}" - '((uref (% (url "arg"))))) + '((para (uref (% (url "arg")))))) (test-body "@code{ }" '((para (code)))) (test-body "@code{ @code{} }" diff --git a/test-suite/tests/tree-il.test b/test-suite/tests/tree-il.test index 5d12f0c48..4767d624a 100644 --- a/test-suite/tests/tree-il.test +++ b/test-suite/tests/tree-il.test @@ -58,6 +58,20 @@ (assert-tree-il->glil with-partial-evaluation in pat test ...)))) +(define-syntax-rule (pass-if-primitives-resolved in expected) + (pass-if (format #f "primitives-resolved in ~s" 'in) + (let* ((module (let ((m (make-module))) + (beautify-user-module! m) + m)) + (orig (parse-tree-il 'in)) + (resolved (expand-primitives! (resolve-primitives! orig module)))) + (or (equal? (unparse-tree-il resolved) 'expected) + (begin + (format (current-error-port) + "primitive test failed: got ~s, expected ~s" + resolved 'expected) + #f))))) + (define-syntax pass-if-tree-il->scheme (syntax-rules () ((_ in pat) @@ -70,6 +84,69 @@ (_ #f)))))) +(with-test-prefix "primitives" + + (with-test-prefix "eqv?" + + (pass-if-primitives-resolved + (primcall eqv? (toplevel x) (const #f)) + (primcall eq? (const #f) (toplevel x))) + + (pass-if-primitives-resolved + (primcall eqv? (toplevel x) (const ())) + (primcall eq? (const ()) (toplevel x))) + + (pass-if-primitives-resolved + (primcall eqv? (const #t) (lexical x y)) + (primcall eq? (const #t) (lexical x y))) + + (pass-if-primitives-resolved + (primcall eqv? (const this-is-a-symbol) (toplevel x)) + (primcall eq? (const this-is-a-symbol) (toplevel x))) + + (pass-if-primitives-resolved + (primcall eqv? (const 42) (toplevel x)) + (primcall eq? (const 42) (toplevel x))) + + (pass-if-primitives-resolved + (primcall eqv? (const 42.0) (toplevel x)) + (primcall eqv? (const 42.0) (toplevel x))) + + (pass-if-primitives-resolved + (primcall eqv? (const #nil) (toplevel x)) + (primcall eq? (const #nil) (toplevel x)))) + + (with-test-prefix "equal?" + + (pass-if-primitives-resolved + (primcall equal? (toplevel x) (const #f)) + (primcall eq? (const #f) (toplevel x))) + + (pass-if-primitives-resolved + (primcall equal? (toplevel x) (const ())) + (primcall eq? (const ()) (toplevel x))) + + (pass-if-primitives-resolved + (primcall equal? (const #t) (lexical x y)) + (primcall eq? (const #t) (lexical x y))) + + (pass-if-primitives-resolved + (primcall equal? (const this-is-a-symbol) (toplevel x)) + (primcall eq? (const this-is-a-symbol) (toplevel x))) + + (pass-if-primitives-resolved + (primcall equal? (const 42) (toplevel x)) + (primcall eq? (const 42) (toplevel x))) + + (pass-if-primitives-resolved + (primcall equal? (const 42.0) (toplevel x)) + (primcall equal? (const 42.0) (toplevel x))) + + (pass-if-primitives-resolved + (primcall equal? (const #nil) (toplevel x)) + (primcall eq? (const #nil) (toplevel x))))) + + (with-test-prefix "tree-il->scheme" (pass-if-tree-il->scheme (case-lambda ((a) a) ((b c) (list b c))) @@ -1704,3 +1781,8 @@ #:to 'assembly))))) (and (= (length w) 1) (number? (string-contains (car w) "unsupported format option")))))))) + +;; Local Variables: +;; eval: (put 'pass-if-primitives-resolved 'scheme-indent-function 1) +;; eval: (put 'pass-if-tree-il->scheme 'scheme-indent-function 1) +;; End: diff --git a/test-suite/tests/web-uri.test b/test-suite/tests/web-uri.test index 4621a19f9..3f6e7e3ab 100644 --- a/test-suite/tests/web-uri.test +++ b/test-suite/tests/web-uri.test @@ -258,4 +258,6 @@ (equal? "foo bar" (uri-decode "foo+bar")))) (with-test-prefix "encode" - (pass-if (equal? "foo%20bar" (uri-encode "foo bar")))) + (pass-if (equal? "foo%20bar" (uri-encode "foo bar"))) + (pass-if (equal? "foo%0a%00bar" (uri-encode "foo\n\x00bar"))) + (pass-if (equal? "%3c%3e%5c%5e" (uri-encode "<>\\^")))) |