summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLudovic Courtès <ludo@gnu.org>2010-09-14 16:10:52 +0200
committerLudovic Courtès <ludo@gnu.org>2010-09-14 16:11:19 +0200
commit07f49ac786e0f1c007eb336e2fb7a572e8405316 (patch)
treeab04b1efe4ee3062959b659456b3abc53b9d1b6f
parent4ff2b9f4b6fab00e0e982ce6d1b2594c19704d6e (diff)
Factorize and optimize `write' for strings and characters.
According to `write.bm', this makes `write' 2.6 times faster for strings. * libguile/print.c (iprin1): Use `write_character' when `SCM_WRITINGP (pstate)' and `SCM_CHARP (exp)' or `scm_is_string (exp)'. (scm_i_charprint): Remove. (display_character, write_character): New functions. (scm_write_char): Use `display_character' instead of `scm_i_charprint'. * libguile/print.h (scm_i_charprint): Remove declaration. * benchmark-suite/benchmarks/write.bm: New file. * benchmark-suite/Makefile.am (SCM_BENCHMARKS): Add `benchmarks/write.bm'.
-rw-r--r--benchmark-suite/Makefile.am3
-rw-r--r--benchmark-suite/benchmarks/write.bm52
-rw-r--r--libguile/print.c401
-rw-r--r--libguile/print.h3
4 files changed, 254 insertions, 205 deletions
diff --git a/benchmark-suite/Makefile.am b/benchmark-suite/Makefile.am
index b58219a24..9f49f2aad 100644
--- a/benchmark-suite/Makefile.am
+++ b/benchmark-suite/Makefile.am
@@ -11,7 +11,8 @@ SCM_BENCHMARKS = benchmarks/0-reference.bm \
benchmarks/subr.bm \
benchmarks/uniform-vector-read.bm \
benchmarks/vectors.bm \
- benchmarks/vlists.bm
+ benchmarks/vlists.bm \
+ benchmarks/write.bm
EXTRA_DIST = guile-benchmark lib.scm $(SCM_BENCHMARKS) \
ChangeLog-2008
diff --git a/benchmark-suite/benchmarks/write.bm b/benchmark-suite/benchmarks/write.bm
new file mode 100644
index 000000000..e96f2efeb
--- /dev/null
+++ b/benchmark-suite/benchmarks/write.bm
@@ -0,0 +1,52 @@
+;;; write.bm --- Exercise the printer. -*- Scheme -*-
+;;;
+;;; Copyright (C) 2008, 2010 Free Software Foundation, Inc.
+;;;
+;;; This program is free software; you can redistribute it and/or
+;;; modify it under the terms of the GNU Lesser General Public License
+;;; as published by the Free Software Foundation; either version 3, or
+;;; (at your option) any later version.
+;;;
+;;; This program is distributed in the hope that it will be useful,
+;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;;; GNU Lesser General Public License for more details.
+;;;
+;;; You should have received a copy of the GNU Lesser General Public
+;;; License along with this software; see the file COPYING.LESSER. If
+;;; not, write to the Free Software Foundation, Inc., 51 Franklin
+;;; Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+(define-module (benchmarks read)
+ #:use-module (benchmark-suite lib))
+
+(define %len 50000)
+
+(define %string-with-escapes
+ (list->string (map integer->char (iota %len))))
+
+(define %string-without-escapes
+ (make-string %len #\a))
+
+;; Use Unicode-capable ports.
+(fluid-set! %default-port-encoding "UTF-8")
+
+(define %null
+ (%make-void-port OPEN_WRITE))
+
+
+(with-benchmark-prefix "write"
+
+ (benchmark "string with escapes" 50
+ (write %string-with-escapes %null))
+
+ (benchmark "string without escapes" 50
+ (write %string-without-escapes %null)))
+
+(with-benchmark-prefix "display"
+
+ (benchmark "string with escapes" 1000
+ (display %string-with-escapes %null))
+
+ (benchmark "string without escapes" 1000
+ (display %string-without-escapes %null)))
diff --git a/libguile/print.c b/libguile/print.c
index 212b70d2b..5acb06b3e 100644
--- a/libguile/print.c
+++ b/libguile/print.c
@@ -54,6 +54,14 @@
+/* Character printers. */
+
+static int display_character (scm_t_wchar, SCM,
+ scm_t_string_failed_conversion_handler);
+static void write_character (scm_t_wchar, SCM, int);
+
+
+
/* {Names of immediate symbols}
*
* This table must agree with the declarations in scm.h: {Immediate Symbols}.
@@ -461,79 +469,17 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate)
case scm_tc3_imm24:
if (SCM_CHARP (exp))
{
- scm_t_wchar i = SCM_CHAR (exp);
- const char *name;
-
if (SCM_WRITINGP (pstate))
+ write_character (SCM_CHAR (exp), port, 0);
+ else
{
- scm_puts ("#\\", port);
- name = scm_i_charname (exp);
- if (name != NULL)
- scm_puts (name, port);
- else if (uc_is_general_category_withtable (i, UC_CATEGORY_MASK_L
- | UC_CATEGORY_MASK_M
- | UC_CATEGORY_MASK_N
- | UC_CATEGORY_MASK_P
- | UC_CATEGORY_MASK_S))
- /* Print the character if is graphic character. */
- {
- scm_t_wchar *wbuf;
- SCM wstr;
- char *buf;
- size_t len;
- const char *enc;
-
- enc = scm_i_get_port_encoding (port);
- if (uc_combining_class (i) == UC_CCC_NR)
- {
- wstr = scm_i_make_wide_string (1, &wbuf);
- wbuf[0] = i;
- }
- else
- {
- /* Character is a combining character: print it connected
- to a dotted circle instead of connecting it to the
- backslash in '#\' */
- wstr = scm_i_make_wide_string (2, &wbuf);
- wbuf[0] = SCM_CODEPOINT_DOTTED_CIRCLE;
- wbuf[1] = i;
- }
- if (enc == NULL)
- {
- if (i <= 0xFF)
- /* Character is graphic and Latin-1. Print it */
- scm_lfwrite_str (wstr, port);
- else
- /* Character is graphic but unrepresentable in
- this port's encoding. */
- PRINT_CHAR_ESCAPE (i, port);
- }
- else
- {
- buf = u32_conv_to_encoding (enc,
- iconveh_error,
- (scm_t_uint32 *) wbuf,
- 1,
- NULL,
- NULL, &len);
- if (buf != NULL)
- {
- /* Character is graphic. Print it. */
- scm_lfwrite_str (wstr, port);
- free (buf);
- }
- else
- /* Character is graphic but unrepresentable in
- this port's encoding. */
- PRINT_CHAR_ESCAPE (i, port);
- }
- }
- else
- /* Character is a non-graphical character. */
- PRINT_CHAR_ESCAPE (i, port);
+ if (!display_character (SCM_CHAR (exp), port,
+ scm_i_get_conversion_strategy (port)))
+ scm_encoding_error (__func__, errno,
+ "cannot convert to output locale",
+ "UTF-32", scm_i_get_port_encoding (port),
+ scm_string (scm_list_1 (exp)));
}
- else
- scm_i_charprint (i, port);
}
else if (SCM_IFLAGP (exp)
&& ((size_t) SCM_IFLAGNUM (exp) < (sizeof iflagnames / sizeof (char *))))
@@ -597,132 +543,13 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate)
case scm_tc7_string:
if (SCM_WRITINGP (pstate))
{
- size_t i, len;
- static char const hex[] = "0123456789abcdef";
- char buf[9];
-
+ size_t len, i;
scm_putc ('"', port);
len = scm_i_string_length (exp);
for (i = 0; i < len; ++i)
- {
- scm_t_wchar ch = scm_i_string_ref (exp, i);
- int printed = 0;
-
- if (ch == ' ' || ch == '\n')
- {
- scm_putc (ch, port);
- printed = 1;
- }
- else if (ch == '"' || ch == '\\')
- {
- scm_putc ('\\', port);
- scm_i_charprint (ch, port);
- printed = 1;
- }
- else
- if (uc_is_general_category_withtable
- (ch,
- UC_CATEGORY_MASK_L | UC_CATEGORY_MASK_M |
- UC_CATEGORY_MASK_N | UC_CATEGORY_MASK_P |
- UC_CATEGORY_MASK_S))
- {
- /* Print the character since it is a graphic
- character. */
- scm_t_wchar *wbuf;
- SCM wstr = scm_i_make_wide_string (1, &wbuf);
- char *buf;
- size_t len;
-
- if (scm_i_get_port_encoding (port))
- {
- wstr = scm_i_make_wide_string (1, &wbuf);
- wbuf[0] = ch;
- buf = u32_conv_to_encoding (scm_i_get_port_encoding (port),
- iconveh_error,
- (scm_t_uint32 *) wbuf,
- 1 ,
- NULL,
- NULL, &len);
- if (buf != NULL)
- {
- /* Character is graphic and representable in
- this encoding. Print it. */
- scm_lfwrite_str (wstr, port);
- free (buf);
- printed = 1;
- }
- }
- else
- if (ch <= 0xFF)
- {
- scm_putc (ch, port);
- printed = 1;
- }
- }
-
- if (!printed)
- {
- /* Character is graphic but unrepresentable in
- this port's encoding or is not graphic. */
- if (!SCM_R6RS_ESCAPES_P)
- {
- if (ch <= 0xFF)
- {
- buf[0] = '\\';
- buf[1] = 'x';
- buf[2] = hex[ch / 16];
- buf[3] = hex[ch % 16];
- scm_lfwrite (buf, 4, port);
- }
- else if (ch <= 0xFFFF)
- {
- buf[0] = '\\';
- buf[1] = 'u';
- buf[2] = hex[(ch & 0xF000) >> 12];
- buf[3] = hex[(ch & 0xF00) >> 8];
- buf[4] = hex[(ch & 0xF0) >> 4];
- buf[5] = hex[(ch & 0xF)];
- scm_lfwrite (buf, 6, port);
- }
- else if (ch > 0xFFFF)
- {
- buf[0] = '\\';
- buf[1] = 'U';
- buf[2] = hex[(ch & 0xF00000) >> 20];
- buf[3] = hex[(ch & 0xF0000) >> 16];
- buf[4] = hex[(ch & 0xF000) >> 12];
- buf[5] = hex[(ch & 0xF00) >> 8];
- buf[6] = hex[(ch & 0xF0) >> 4];
- buf[7] = hex[(ch & 0xF)];
- scm_lfwrite (buf, 8, port);
- }
- }
- else
- {
- scm_t_wchar ch2 = ch;
-
- /* Print an R6RS variable-length hex escape: "\xNNNN;"
- */
- int i = 8;
- buf[i] = ';';
- i --;
- if (ch == 0)
- buf[i--] = '0';
- else
- while (ch2 > 0)
- {
- buf[i] = hex[ch2 & 0xF];
- ch2 >>= 4;
- i --;
- }
- buf[i] = 'x';
- i --;
- buf[i] = '\\';
- scm_lfwrite (buf + i, 9 - i, port);
- }
- }
- }
+ write_character (scm_i_string_ref (exp, i), port, 1);
+
scm_putc ('"', port);
scm_remember_upto_here_1 (exp);
}
@@ -917,16 +744,179 @@ scm_prin1 (SCM exp, SCM port, int writingp)
}
}
-/* Print a character.
- */
-void
-scm_i_charprint (scm_t_wchar ch, SCM port)
+/* Attempt to display CH to PORT according to STRATEGY. Return non-zero
+ if CH was successfully displayed, zero otherwise (e.g., if it was not
+ representable in PORT's encoding.) */
+static int
+display_character (scm_t_wchar ch, SCM port,
+ scm_t_string_failed_conversion_handler strategy)
{
- scm_t_wchar *wbuf;
- SCM wstr = scm_i_make_wide_string (1, &wbuf);
+ int printed;
+ const char *encoding;
+
+ encoding = scm_i_get_port_encoding (port);
+ if (encoding == NULL)
+ {
+ if (ch <= 0xff)
+ {
+ scm_putc (ch, port);
+ printed = 1;
+ }
+ else
+ printed = 0;
+ }
+ else
+ {
+ size_t len;
+ char locale_encoded[sizeof (ch)], *result;
+
+ len = sizeof (locale_encoded);
+ result = u32_conv_to_encoding (encoding, strategy,
+ (scm_t_uint32 *) &ch, 1,
+ NULL, locale_encoded, &len);
+ if (result != NULL)
+ {
+ /* CH is graphic; print it. */
+
+ if (strategy == SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE)
+ {
+ /* Apply the same escaping syntax as in `write_character'. */
+ if (SCM_R6RS_ESCAPES_P)
+ scm_i_unistring_escapes_to_r6rs_escapes (result, &len);
+ else
+ scm_i_unistring_escapes_to_guile_escapes (result, &len);
+ }
- wbuf[0] = ch;
- scm_lfwrite_str (wstr, port);
+ scm_lfwrite (result, len, port);
+ printed = 1;
+
+ if (SCM_UNLIKELY (result != locale_encoded))
+ free (result);
+ }
+ else
+ printed = 0;
+ }
+
+ return printed;
+}
+
+/* Write CH to PORT, escaping it if it's non-graphic or not
+ representable in PORT's encoding. If STRING_ESCAPES_P is true and CH
+ needs to be escaped, it is escaped using the in-string escape syntax;
+ otherwise the character escape syntax is used. */
+static void
+write_character (scm_t_wchar ch, SCM port, int string_escapes_p)
+{
+ int printed = 0;
+
+ if (string_escapes_p)
+ {
+ /* Check if CH deserves special treatment. */
+ if (ch == '"' || ch == '\\')
+ {
+ scm_putc ('\\', port);
+ scm_putc (ch, port);
+ printed = 1;
+ }
+ else if (ch == ' ' || ch == '\n')
+ {
+ scm_putc (ch, port);
+ printed = 1;
+ }
+ }
+ else
+ scm_puts ("#\\", port);
+
+ if (!printed
+ && uc_is_general_category_withtable (ch,
+ UC_CATEGORY_MASK_L |
+ UC_CATEGORY_MASK_M |
+ UC_CATEGORY_MASK_N |
+ UC_CATEGORY_MASK_P |
+ UC_CATEGORY_MASK_S))
+ /* CH is graphic; attempt to display it. */
+ printed = display_character (ch, port, iconveh_error);
+
+ if (!printed)
+ {
+ /* CH isn't graphic or cannot be represented in PORT's
+ encoding. */
+
+ if (string_escapes_p)
+ {
+ /* Represent CH using the in-string escape syntax. */
+
+ static const char hex[] = "0123456789abcdef";
+ char buf[9];
+
+ if (!SCM_R6RS_ESCAPES_P)
+ {
+ if (ch <= 0xFF)
+ {
+ buf[0] = '\\';
+ buf[1] = 'x';
+ buf[2] = hex[ch / 16];
+ buf[3] = hex[ch % 16];
+ scm_lfwrite (buf, 4, port);
+ }
+ else if (ch <= 0xFFFF)
+ {
+ buf[0] = '\\';
+ buf[1] = 'u';
+ buf[2] = hex[(ch & 0xF000) >> 12];
+ buf[3] = hex[(ch & 0xF00) >> 8];
+ buf[4] = hex[(ch & 0xF0) >> 4];
+ buf[5] = hex[(ch & 0xF)];
+ scm_lfwrite (buf, 6, port);
+ }
+ else if (ch > 0xFFFF)
+ {
+ buf[0] = '\\';
+ buf[1] = 'U';
+ buf[2] = hex[(ch & 0xF00000) >> 20];
+ buf[3] = hex[(ch & 0xF0000) >> 16];
+ buf[4] = hex[(ch & 0xF000) >> 12];
+ buf[5] = hex[(ch & 0xF00) >> 8];
+ buf[6] = hex[(ch & 0xF0) >> 4];
+ buf[7] = hex[(ch & 0xF)];
+ scm_lfwrite (buf, 8, port);
+ }
+ }
+ else
+ {
+ /* Print an R6RS variable-length hex escape: "\xNNNN;". */
+ scm_t_wchar ch2 = ch;
+
+ int i = 8;
+ buf[i] = ';';
+ i --;
+ if (ch == 0)
+ buf[i--] = '0';
+ else
+ while (ch2 > 0)
+ {
+ buf[i] = hex[ch2 & 0xF];
+ ch2 >>= 4;
+ i --;
+ }
+ buf[i] = 'x';
+ i --;
+ buf[i] = '\\';
+ scm_lfwrite (buf + i, 9 - i, port);
+ }
+ }
+ else
+ {
+ /* Represent CH using the character escape syntax. */
+ const char *name;
+
+ name = scm_i_charname (SCM_MAKE_CHAR (ch));
+ if (name != NULL)
+ scm_puts (name, port);
+ else
+ PRINT_CHAR_ESCAPE (ch, port);
+ }
+ }
}
/* Print an integer.
@@ -1248,8 +1238,15 @@ SCM_DEFINE (scm_write_char, "write-char", 1, 1, 0,
SCM_VALIDATE_CHAR (1, chr);
SCM_VALIDATE_OPORT_VALUE (2, port);
-
- scm_i_charprint (SCM_CHAR (chr), SCM_COERCE_OUTPORT (port));
+
+ port = SCM_COERCE_OUTPORT (port);
+ if (!display_character (SCM_CHAR (chr), port,
+ scm_i_get_conversion_strategy (port)))
+ scm_encoding_error (__func__, errno,
+ "cannot convert to output locale",
+ "UTF-32", scm_i_get_port_encoding (port),
+ scm_string (scm_list_1 (chr)));
+
#if 0
#ifdef HAVE_PIPE
# ifdef EPIPE
diff --git a/libguile/print.h b/libguile/print.h
index ae2aaef54..64d1f4bd8 100644
--- a/libguile/print.h
+++ b/libguile/print.h
@@ -3,7 +3,7 @@
#ifndef SCM_PRINT_H
#define SCM_PRINT_H
-/* Copyright (C) 1995,1996,1998,2000,2001, 2003, 2004, 2006, 2008 Free Software Foundation, Inc.
+/* Copyright (C) 1995,1996,1998,2000,2001, 2003, 2004, 2006, 2008, 2010 Free Software Foundation, Inc.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
@@ -78,7 +78,6 @@ SCM_API SCM scm_print_options (SCM setting);
SCM_API SCM scm_make_print_state (void);
SCM_API void scm_free_print_state (SCM print_state);
SCM_INTERNAL SCM scm_i_port_with_print_state (SCM port, SCM print_state);
-SCM_INTERNAL void scm_i_charprint (scm_t_wchar c, SCM port);
SCM_API void scm_intprint (scm_t_intmax n, int radix, SCM port);
SCM_API void scm_uintprint (scm_t_uintmax n, int radix, SCM port);
SCM_API void scm_ipruk (char *hdr, SCM ptr, SCM port);