summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLudovic Courtès <ludo@gnu.org>2010-09-15 18:38:57 +0200
committerLudovic Courtès <ludo@gnu.org>2010-09-15 18:38:57 +0200
commitfd5eec2b6e113f6d13028215a738417607432a2d (patch)
treef87058b9bb6f5d2b4b516763e26c663b7e659f9a
parente9c3018cec8ed236a375c59aed55e66e47671022 (diff)
Optimize `peek-char'.
This makes `peek-char' 40x faster on a port whose encoding is faster on a UTF-8 port containing multi-byte codepoints. The `xml->sxml' procedure is 4x faster on a 2.7 MiB XML file. * libguile/ports.c (get_codepoint): New procedure, moved here from `scm_getc', with the additional BUF and LEN parameters. (scm_getc): Use it. (scm_peek_char): Use it instead of the `scm_getc'/`scm_ungetc' sequence. * test-suite/tests/ports.test ("string ports")["peek-char [latin-1]", "peek-char [utf-8]"]: New tests. * benchmark-suite/Makefile.am (SCM_BENCHMARKS): Add `benchmarks/ports.bm'. * benchmark-suite/benchmarks/ports.bm: New file.
-rw-r--r--benchmark-suite/Makefile.am1
-rw-r--r--benchmark-suite/benchmarks/ports.bm67
-rw-r--r--libguile/ports.c58
-rw-r--r--test-suite/tests/ports.test20
4 files changed, 132 insertions, 14 deletions
diff --git a/benchmark-suite/Makefile.am b/benchmark-suite/Makefile.am
index 9f49f2aad..e2aad9148 100644
--- a/benchmark-suite/Makefile.am
+++ b/benchmark-suite/Makefile.am
@@ -4,6 +4,7 @@ SCM_BENCHMARKS = benchmarks/0-reference.bm \
benchmarks/continuations.bm \
benchmarks/if.bm \
benchmarks/logand.bm \
+ benchmarks/ports.bm \
benchmarks/read.bm \
benchmarks/srfi-1.bm \
benchmarks/srfi-13.bm \
diff --git a/benchmark-suite/benchmarks/ports.bm b/benchmark-suite/benchmarks/ports.bm
new file mode 100644
index 000000000..917a7ddbe
--- /dev/null
+++ b/benchmark-suite/benchmarks/ports.bm
@@ -0,0 +1,67 @@
+;;; ports.bm --- Port I/O. -*- mode: scheme; coding: utf-8; -*-
+;;;
+;;; Copyright (C) 2010 Free Software Foundation, Inc.
+;;;
+;;; This program is free software; you can redistribute it and/or
+;;; modify it under the terms of the GNU Lesser General Public License
+;;; as published by the Free Software Foundation; either version 3, or
+;;; (at your option) any later version.
+;;;
+;;; This program is distributed in the hope that it will be useful,
+;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;;; GNU Lesser General Public License for more details.
+;;;
+;;; You should have received a copy of the GNU Lesser General Public
+;;; License along with this software; see the file COPYING.LESSER. If
+;;; not, write to the Free Software Foundation, Inc., 51 Franklin
+;;; Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+(define-module (benchmarks ports)
+ #:use-module (benchmark-suite lib))
+
+(define %latin1-port
+ (with-fluids ((%default-port-encoding #f))
+ (open-input-string "hello, world")))
+
+(define %utf8/ascii-port
+ (with-fluids ((%default-port-encoding "UTF-8"))
+ (open-input-string "hello, world")))
+
+(define %utf8/wide-port
+ (with-fluids ((%default-port-encoding "UTF-8"))
+ (open-input-string "안녕하세요")))
+
+
+(with-benchmark-prefix "peek-char"
+
+ (benchmark "latin-1 port" 700000
+ (peek-char %latin1-port))
+
+ (benchmark "utf-8 port, ascii character" 700000
+ (peek-char %utf8/ascii-port))
+
+ (benchmark "utf-8 port, Korean character" 700000
+ (peek-char %utf8/wide-port)))
+
+(with-benchmark-prefix "read-char"
+
+ (benchmark "latin-1 port" 10000000
+ (read-char %latin1-port))
+
+ (benchmark "utf-8 port, ascii character" 10000000
+ (read-char %utf8/ascii-port))
+
+ (benchmark "utf-8 port, Korean character" 10000000
+ (read-char %utf8/wide-port)))
+
+(with-benchmark-prefix "char-ready?"
+
+ (benchmark "latin-1 port" 10000000
+ (char-ready? %latin1-port))
+
+ (benchmark "utf-8 port, ascii character" 10000000
+ (char-ready? %utf8/ascii-port))
+
+ (benchmark "utf-8 port, Korean character" 10000000
+ (char-ready? %utf8/wide-port)))
diff --git a/libguile/ports.c b/libguile/ports.c
index 7c3791d22..6cf0de2cc 100644
--- a/libguile/ports.c
+++ b/libguile/ports.c
@@ -1023,13 +1023,15 @@ SCM_DEFINE (scm_read_char, "read-char", 0, 1, 0,
#define SCM_MBCHAR_BUF_SIZE (4)
-/* Get one codepoint from a file, using the port's encoding. */
-scm_t_wchar
-scm_getc (SCM port)
+/* Read a codepoint from PORT and return it. Fill BUF with the byte
+ representation of the codepoint in PORT's encoding, and set *LEN to
+ the length in bytes of that representation. Raise an error on
+ failure. */
+static scm_t_wchar
+get_codepoint (SCM port, char buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
{
int c;
- unsigned int bufcount = 0;
- char buf[SCM_MBCHAR_BUF_SIZE];
+ size_t bufcount = 0;
scm_t_uint32 result_buf;
scm_t_wchar codepoint = 0;
scm_t_uint32 *u32;
@@ -1133,6 +1135,8 @@ scm_getc (SCM port)
break;
}
+ *len = bufcount;
+
return codepoint;
failure:
@@ -1155,6 +1159,15 @@ scm_getc (SCM port)
return 0;
}
+/* Read a codepoint from PORT and return it. */
+scm_t_wchar
+scm_getc (SCM port)
+{
+ size_t len;
+ char buf[SCM_MBCHAR_BUF_SIZE];
+
+ return get_codepoint (port, buf, &len);
+}
/* this should only be called when the read buffer is empty. it
tries to refill the read buffer. it returns the first char from
@@ -1635,18 +1648,37 @@ SCM_DEFINE (scm_peek_char, "peek-char", 0, 1, 0,
"to @code{read-char} would have hung.")
#define FUNC_NAME s_scm_peek_char
{
- scm_t_wchar c, column;
+ SCM result;
+ scm_t_wchar c;
+ char bytes[SCM_MBCHAR_BUF_SIZE];
+ long column, line;
+ size_t len;
+
if (SCM_UNBNDP (port))
port = scm_current_input_port ();
else
SCM_VALIDATE_OPINPORT (1, port);
- column = SCM_COL(port);
- c = scm_getc (port);
- if (EOF == c)
- return SCM_EOF_VAL;
- scm_ungetc (c, port);
- SCM_COL(port) = column;
- return SCM_MAKE_CHAR (c);
+
+ column = SCM_COL (port);
+ line = SCM_LINUM (port);
+
+ c = get_codepoint (port, bytes, &len);
+ if (c == EOF)
+ result = SCM_EOF_VAL;
+ else
+ {
+ long i;
+
+ result = SCM_MAKE_CHAR (c);
+
+ for (i = len - 1; i >= 0; i--)
+ scm_unget_byte (bytes[i], port);
+
+ SCM_COL (port) = column;
+ SCM_LINUM (port) = line;
+ }
+
+ return result;
}
#undef FUNC_NAME
diff --git a/test-suite/tests/ports.test b/test-suite/tests/ports.test
index bb5c17336..4edd53127 100644
--- a/test-suite/tests/ports.test
+++ b/test-suite/tests/ports.test
@@ -422,7 +422,25 @@
(and (eq? faulty-str str)
(string=? from "UTF-32")
(string=? to "ISO-8859-1")
- (string? (strerror errno))))))))
+ (string? (strerror errno)))))))
+
+ (pass-if "peek-char [latin-1]"
+ (let ((p (with-fluids ((%default-port-encoding #f))
+ (open-input-string "hello, world"))))
+ (and (char=? (peek-char p) #\h)
+ (char=? (peek-char p) #\h)
+ (char=? (peek-char p) #\h)
+ (= (port-line p) 0)
+ (= (port-column p) 0))))
+
+ (pass-if "peek-char [utf-8]"
+ (let ((p (with-fluids ((%default-port-encoding "UTF-8"))
+ (open-input-string "안녕하세요"))))
+ (and (char=? (peek-char p) #\안)
+ (char=? (peek-char p) #\안)
+ (char=? (peek-char p) #\안)
+ (= (port-line p) 0)
+ (= (port-column p) 0)))))
(with-test-prefix "call-with-output-string"