summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Gran <spk121@yahoo.com>2009-08-21 08:57:35 -0700
committerMichael Gran <spk121@yahoo.com>2009-08-21 08:57:35 -0700
commite23106d53eb03d7cb4962282396269176ea7482e (patch)
treed7736a8f7f16af0c73365f688f75561aac81c5eb
parent90305ce9e429f0381ff79427e71287fdafd4d201 (diff)
Add initial support for wide symbols
* libguile/hash.c (scm_i_string_hash): new function (scm_hasher): don't unpack string: use scm_i_string_hash * libguile/hash.h: new declaration for scm_i_string_hash * libguile/print.c (quote_keywordish_symbol): use symbol accessors (scm_i_print_symbol_name): new function (scm_print_symbol_name): call scm_i_print_symbol_name (iprin1): use scm_i_print_symbol_name to print symbols * libguile/print.h: new declaration for scm_i_print_symbol_name * libguile/symbols.c (lookup_interned_symbol): now takes scheme string instead of c string; callers changed (lookup_interned_symbol): add wide symbol support (scm_i_c_mem2symbol): removed (scm_i_mem2symbol): removed and replaced with scm_i_str2symbol (scm_i_str2symbol): new function (scm_i_mem2uninterned_symbol): removed and replaced with scm_i_str2uninterned_symbol (scm_i_str2uninterned_symbol): new function (scm_make_symbol, scm_string_to_symbol, scm_from_locale_symbol) (scm_from_locale_symboln): use scm_i_str2symbol * test-suite/tests/symbols.test: new tests
-rw-r--r--libguile/hash.c17
-rw-r--r--libguile/hash.h1
-rw-r--r--libguile/print.c59
-rw-r--r--libguile/print.h1
-rw-r--r--libguile/symbols.c108
-rw-r--r--test-suite/tests/symbols.test39
6 files changed, 113 insertions, 112 deletions
diff --git a/libguile/hash.c b/libguile/hash.c
index d2fe17706..e6e38ba50 100644
--- a/libguile/hash.c
+++ b/libguile/hash.c
@@ -50,6 +50,20 @@ scm_string_hash (const unsigned char *str, size_t len)
return h;
}
+unsigned long
+scm_i_string_hash (SCM str)
+{
+ size_t len = scm_i_string_length (str);
+ size_t i = 0;
+
+ unsigned long h = 0;
+ while (len-- > 0)
+ h = (unsigned long) scm_i_string_ref (str, i++) + h * 37;
+
+ scm_remember_upto_here_1 (str);
+ return h;
+}
+
/* Dirk:FIXME:: why downcase for characters? (2x: scm_hasher, scm_ihashv) */
/* Dirk:FIXME:: scm_hasher could be made static. */
@@ -115,8 +129,7 @@ scm_hasher(SCM obj, unsigned long n, size_t d)
case scm_tc7_string:
{
unsigned long hash =
- scm_string_hash ((const unsigned char *) scm_i_string_chars (obj),
- scm_i_string_length (obj)) % n;
+ scm_i_string_hash (obj) % n;
scm_remember_upto_here_1 (obj);
return hash;
}
diff --git a/libguile/hash.h b/libguile/hash.h
index 789595b42..2ebc05352 100644
--- a/libguile/hash.h
+++ b/libguile/hash.h
@@ -28,6 +28,7 @@
SCM_API unsigned long scm_string_hash (const unsigned char *str, size_t len);
+SCM_INTERNAL unsigned long scm_i_string_hash (SCM str);
SCM_API unsigned long scm_hasher (SCM obj, unsigned long n, size_t d);
SCM_API unsigned long scm_ihashq (SCM obj, unsigned long n);
SCM_API SCM scm_hashq (SCM obj, SCM n);
diff --git a/libguile/print.c b/libguile/print.c
index 74f7d8db6..07bff47fd 100644
--- a/libguile/print.c
+++ b/libguile/print.c
@@ -295,13 +295,12 @@ print_circref (SCM port, scm_print_state *pstate, SCM ref)
/* Print the name of a symbol. */
static int
-quote_keywordish_symbol (const char *str, size_t len)
+quote_keywordish_symbol (SCM symbol)
{
SCM option;
- /* LEN is guaranteed to be > 0.
- */
- if (str[0] != ':' && str[len-1] != ':')
+ if (scm_i_symbol_ref (symbol, 0) != ':'
+ && scm_i_symbol_ref (symbol, scm_i_symbol_length (symbol) - 1) != ':')
return 0;
option = SCM_PRINT_KEYWORD_STYLE;
@@ -313,7 +312,7 @@ quote_keywordish_symbol (const char *str, size_t len)
}
void
-scm_print_symbol_name (const char *str, size_t len, SCM port)
+scm_i_print_symbol_name (SCM str, SCM port)
{
/* This points to the first character that has not yet been written to the
* port. */
@@ -334,18 +333,20 @@ scm_print_symbol_name (const char *str, size_t len, SCM port)
* simpler and faster. */
int maybe_weird = 0;
size_t mw_pos = 0;
+ size_t len = scm_i_symbol_length (str);
+ scm_t_wchar str0 = scm_i_symbol_ref (str, 0);
- if (len == 0 || str[0] == '\'' || str[0] == '`' || str[0] == ','
- || quote_keywordish_symbol (str, len)
- || (str[0] == '.' && len == 1)
- || scm_is_true (scm_c_locale_stringn_to_number (str, len, 10)))
+ if (len == 0 || str0 == '\'' || str0 == '`' || str0 == ','
+ || quote_keywordish_symbol (str)
+ || (str0 == '.' && len == 1)
+ || scm_is_true (scm_i_string_to_number (scm_symbol_to_string (str), 10)))
{
scm_lfwrite ("#{", 2, port);
weird = 1;
}
for (end = pos; end < len; ++end)
- switch (str[end])
+ switch (scm_i_symbol_ref (str, end))
{
#ifdef BRACKETS_AS_PARENS
case '[':
@@ -370,11 +371,11 @@ scm_print_symbol_name (const char *str, size_t len, SCM port)
weird = 1;
}
if (pos < end)
- scm_lfwrite (str + pos, end - pos, port);
+ scm_lfwrite_substr (scm_symbol_to_string (str), pos, end, port);
{
char buf[2];
buf[0] = '\\';
- buf[1] = str[end];
+ buf[1] = (char) (unsigned char) scm_i_symbol_ref (str, end);
scm_lfwrite (buf, 2, port);
}
pos = end + 1;
@@ -392,11 +393,18 @@ scm_print_symbol_name (const char *str, size_t len, SCM port)
break;
}
if (pos < end)
- scm_lfwrite (str + pos, end - pos, port);
+ scm_lfwrite_substr (scm_symbol_to_string (str), pos, end, port);
if (weird)
scm_lfwrite ("}#", 2, port);
}
+void
+scm_print_symbol_name (const char *str, size_t len, SCM port)
+{
+ SCM symbol = scm_from_locale_symboln (str, len);
+ return scm_i_print_symbol_name (symbol, port);
+}
+
/* Print generally. Handles both write and display according to PSTATE.
*/
SCM_GPROC(s_write, "write", 1, 1, 0, scm_write, g_write);
@@ -665,16 +673,13 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate)
case scm_tc7_symbol:
if (scm_i_symbol_is_interned (exp))
{
- scm_print_symbol_name (scm_i_symbol_chars (exp),
- scm_i_symbol_length (exp), port);
+ scm_i_print_symbol_name (exp, port);
scm_remember_upto_here_1 (exp);
}
else
{
scm_puts ("#<uninterned-symbol ", port);
- scm_print_symbol_name (scm_i_symbol_chars (exp),
- scm_i_symbol_length (exp),
- port);
+ scm_i_print_symbol_name (exp, port);
scm_putc (' ', port);
scm_uintprint (SCM_UNPACK (exp), 16, port);
scm_putc ('>', port);
@@ -726,14 +731,16 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate)
EXIT_NESTED_DATA (pstate);
break;
case scm_tcs_subrs:
- scm_puts (SCM_SUBR_GENERIC (exp)
- ? "#<primitive-generic "
- : "#<primitive-procedure ",
- port);
- scm_puts (scm_i_symbol_chars (SCM_SUBR_NAME (exp)), port);
- scm_putc ('>', port);
- break;
-
+ {
+ SCM name = scm_symbol_to_string (SCM_SUBR_NAME (exp));
+ scm_puts (SCM_SUBR_GENERIC (exp)
+ ? "#<primitive-generic "
+ : "#<primitive-procedure ",
+ port);
+ scm_lfwrite_str (name, port);
+ scm_putc ('>', port);
+ break;
+ }
case scm_tc7_pws:
scm_puts ("#<procedure-with-setter", port);
{
diff --git a/libguile/print.h b/libguile/print.h
index 00648efc1..3e2333ddd 100644
--- a/libguile/print.h
+++ b/libguile/print.h
@@ -82,6 +82,7 @@ SCM_API void scm_intprint (scm_t_intmax n, int radix, SCM port);
SCM_API void scm_uintprint (scm_t_uintmax n, int radix, SCM port);
SCM_API void scm_ipruk (char *hdr, SCM ptr, SCM port);
SCM_API void scm_iprlist (char *hdr, SCM exp, int tlr, SCM port, scm_print_state *pstate);
+SCM_INTERNAL void scm_i_print_symbol_name (SCM sym, SCM port);
SCM_API void scm_print_symbol_name (const char *str, size_t len, SCM port);
SCM_API void scm_prin1 (SCM exp, SCM port, int writingp);
SCM_API void scm_iprin1 (SCM exp, SCM port, scm_print_state *pstate);
diff --git a/libguile/symbols.c b/libguile/symbols.c
index c0ba2a8b4..a9320163a 100644
--- a/libguile/symbols.c
+++ b/libguile/symbols.c
@@ -89,11 +89,11 @@ scm_i_hash_symbol (SCM obj, unsigned long n, void *closure)
}
static SCM
-lookup_interned_symbol (const char *name, size_t len,
- unsigned long raw_hash)
+lookup_interned_symbol (SCM name, unsigned long raw_hash)
{
/* Try to find the symbol in the symbols table */
SCM l;
+ size_t len = scm_i_string_length (name);
unsigned long hash = raw_hash % SCM_HASHTABLE_N_BUCKETS (symbols);
for (l = SCM_HASHTABLE_BUCKET (symbols, hash);
@@ -104,15 +104,32 @@ lookup_interned_symbol (const char *name, size_t len,
if (scm_i_symbol_hash (sym) == raw_hash
&& scm_i_symbol_length (sym) == len)
{
- const char *chrs = scm_i_symbol_chars (sym);
- size_t i = len;
-
- while (i != 0)
- {
- --i;
- if (name[i] != chrs[i])
- goto next_symbol;
- }
+ size_t i = len;
+
+ /* Slightly faster path for comparing narrow to narrow. */
+ if (scm_i_is_narrow_string (name) && scm_i_is_narrow_symbol (sym))
+ {
+ const char *chrs = scm_i_symbol_chars (sym);
+ const char *str = scm_i_string_chars (name);
+
+ while (i != 0)
+ {
+ --i;
+ if (str[i] != chrs[i])
+ goto next_symbol;
+ }
+ }
+ else
+ {
+ /* Somewhat slower path for comparing narrow to wide or
+ wide to wide. */
+ while (i != 0)
+ {
+ --i;
+ if (scm_i_string_ref (name, i) != scm_i_symbol_ref (sym, i))
+ goto next_symbol;
+ }
+ }
return sym;
}
@@ -142,32 +159,12 @@ intern_symbol (SCM symbol)
}
static SCM
-scm_i_c_mem2symbol (const char *name, size_t len)
-{
- SCM symbol;
- size_t raw_hash = scm_string_hash ((const unsigned char *) name, len);
-
- symbol = lookup_interned_symbol (name, len, raw_hash);
- if (scm_is_false (symbol))
- {
- /* The symbol was not found, create it. */
- symbol = scm_i_c_make_symbol (name, len, 0, raw_hash,
- scm_cons (SCM_BOOL_F, SCM_EOL));
- intern_symbol (symbol);
- }
-
- return symbol;
-}
-
-static SCM
-scm_i_mem2symbol (SCM str)
+scm_i_str2symbol (SCM str)
{
SCM symbol;
- const char *name = scm_i_string_chars (str);
- size_t len = scm_i_string_length (str);
- size_t raw_hash = scm_string_hash ((const unsigned char *) name, len);
+ size_t raw_hash = scm_i_string_hash (str);
- symbol = lookup_interned_symbol (name, len, raw_hash);
+ symbol = lookup_interned_symbol (str, raw_hash);
if (scm_is_false (symbol))
{
/* The symbol was not found, create it. */
@@ -181,11 +178,9 @@ scm_i_mem2symbol (SCM str)
static SCM
-scm_i_mem2uninterned_symbol (SCM str)
+scm_i_str2uninterned_symbol (SCM str)
{
- const char *name = scm_i_string_chars (str);
- size_t len = scm_i_string_length (str);
- size_t raw_hash = scm_string_hash ((const unsigned char *) name, len);
+ size_t raw_hash = scm_i_string_hash (str);
return scm_i_make_symbol (str, SCM_I_F_SYMBOL_UNINTERNED,
raw_hash, scm_cons (SCM_BOOL_F, SCM_EOL));
@@ -220,7 +215,7 @@ SCM_DEFINE (scm_make_symbol, "make-symbol", 1, 0, 0,
#define FUNC_NAME s_scm_make_symbol
{
SCM_VALIDATE_STRING (1, name);
- return scm_i_mem2uninterned_symbol (name);
+ return scm_i_str2uninterned_symbol (name);
}
#undef FUNC_NAME
@@ -282,7 +277,7 @@ SCM_DEFINE (scm_string_to_symbol, "string->symbol", 1, 0, 0,
#define FUNC_NAME s_scm_string_to_symbol
{
SCM_VALIDATE_STRING (1, string);
- return scm_i_mem2symbol (string);
+ return scm_i_str2symbol (string);
}
#undef FUNC_NAME
@@ -389,44 +384,23 @@ SCM_DEFINE (scm_symbol_pset_x, "symbol-pset!", 2, 0, 0,
SCM
scm_from_locale_symbol (const char *sym)
{
- return scm_i_c_mem2symbol (sym, strlen (sym));
+ return scm_from_locale_symboln (sym, -1);
}
SCM
scm_from_locale_symboln (const char *sym, size_t len)
{
- return scm_i_c_mem2symbol (sym, len);
+ SCM str = scm_from_locale_stringn (sym, len);
+ return scm_i_str2symbol (str);
}
SCM
scm_take_locale_symboln (char *sym, size_t len)
{
- SCM res;
- unsigned long raw_hash;
-
- if (len == (size_t)-1)
- len = strlen (sym);
- else
- {
- /* Ensure STR is null terminated. A realloc for 1 extra byte should
- often be satisfied from the alignment padding after the block, with
- no actual data movement. */
- sym = scm_realloc (sym, len+1);
- sym[len] = '\0';
- }
-
- raw_hash = scm_string_hash ((unsigned char *)sym, len);
- res = lookup_interned_symbol (sym, len, raw_hash);
- if (scm_is_false (res))
- {
- res = scm_i_c_take_symbol (sym, len, 0, raw_hash,
- scm_cons (SCM_BOOL_F, SCM_EOL));
- intern_symbol (res);
- }
- else
- free (sym);
+ SCM str;
- return res;
+ str = scm_take_locale_stringn (sym, len);
+ return scm_i_str2symbol (str);
}
SCM
diff --git a/test-suite/tests/symbols.test b/test-suite/tests/symbols.test
index 3b1abe1e9..b6dbb9d59 100644
--- a/test-suite/tests/symbols.test
+++ b/test-suite/tests/symbols.test
@@ -61,15 +61,13 @@
(let ((s 'x0123456789012345678901234567890123456789))
(not (assq-ref (%symbol-dump s) 'stringbuf-inline))))
- ;; symbol->string isn't ready for UCS-4 yet
-
- ;;(pass-if "short UCS-4-encoded symbols are not inlined"
- ;; (let ((s (string->symbol "\u0100")))
- ;; (not (assq-ref (%symbol-dump s) 'stringbuf-inline))))
+ (pass-if "short UCS-4-encoded symbols are not inlined"
+ (let ((s (string->symbol "\u0100")))
+ (not (assq-ref (%symbol-dump s) 'stringbuf-inline))))
- ;;(pass-if "long UCS-4-encoded symbols are not inlined"
- ;; (let ((s (string->symbol "\u010012345678901234567890123456789")))
- ;; (not (assq-ref (%symbol-dump s) 'stringbuf-inline))))
+ (pass-if "long UCS-4-encoded symbols are not inlined"
+ (let ((s (string->symbol "\u010012345678901234567890123456789")))
+ (not (assq-ref (%symbol-dump s) 'stringbuf-inline))))
(with-test-prefix "hashes"
@@ -99,16 +97,13 @@
(let ((s (string->symbol "\xC0\xC1\xC2")))
(not (assq-ref (%symbol-dump s) 'stringbuf-wide))))
- ;; symbol->string isn't ready for UCS-4 yet
-
- ;;(pass-if "BMP symbols are UCS-4 encoded"
- ;; (let ((s (string->symbol "\u0100\u0101\x0102")))
- ;; (assq-ref (%symbol-dump s) 'stringbuf-wide)))
+ (pass-if "BMP symbols are UCS-4 encoded"
+ (let ((s (string->symbol "\u0100\u0101\x0102")))
+ (assq-ref (%symbol-dump s) 'stringbuf-wide)))
- ;;(pass-if "SMP symbols are UCS-4 encoded"
- ;; (let ((s (string->symbol "\U010300\u010301\x010302")))
- ;; (assq-ref (%symbol-dump s) 'stringbuf-wide)))
- ))
+ (pass-if "SMP symbols are UCS-4 encoded"
+ (let ((s (string->symbol "\U010300\u010301\x010302")))
+ (assq-ref (%symbol-dump s) 'stringbuf-wide)))))
;;;
;;; symbol?
@@ -125,6 +120,16 @@
(pass-if "symbol"
(symbol? 'foo)))
+;;;
+;;; wide symbols
+;;;
+
+(with-test-prefix "BMP symbols"
+
+ (pass-if "BMP symbol's string"
+ (and (= 4 (string-length "abc\u0100"))
+ (string=? "abc\u0100"
+ (symbol->string (string->symbol "abc\u0100"))))))
;;;
;;; symbol->string