summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndy Wingo <wingo@pobox.com>2011-01-05 18:43:28 -0800
committerAndy Wingo <wingo@pobox.com>2011-01-07 09:18:36 -0800
commit622415380cb4b24d1d12641781f705f99b9e720e (patch)
treea9e461af606baa7af82209690733a2be1bec7038
parentf756cd30764c76d270cc34969d7156ebcfcfb214 (diff)
add hash functions for locale, latin1, and utf8 strings
* libguile/hash.c (scm_i_locale_string_hash) (scm_i_latin1_string_hash, scm_i_utf8_string_hash): New functions.
-rw-r--r--libguile/hash.c79
-rw-r--r--libguile/hash.h9
2 files changed, 87 insertions, 1 deletions
diff --git a/libguile/hash.c b/libguile/hash.c
index e1591d298..0dcd1c29e 100644
--- a/libguile/hash.c
+++ b/libguile/hash.c
@@ -22,6 +22,12 @@
# include <config.h>
#endif
+#ifdef HAVE_WCHAR_H
+#include <wchar.h>
+#endif
+
+#include <unistr.h>
+
#include "libguile/_scm.h"
#include "libguile/chars.h"
#include "libguile/ports.h"
@@ -64,6 +70,79 @@ scm_i_string_hash (SCM str)
return h;
}
+unsigned long
+scm_i_locale_string_hash (const char *str, size_t len)
+{
+#ifdef HAVE_WCHAR_H
+ mbstate_t state;
+ wchar_t c;
+ size_t byte_idx = 0, nbytes;
+ unsigned long h = 0;
+
+ if (len == (size_t) -1)
+ len = strlen (str);
+
+ while ((nbytes = mbrtowc (&c, str + byte_idx, len - byte_idx, &state)) > 0)
+ {
+ if (nbytes >= (size_t) -2)
+ /* Invalid input string; punt. */
+ return scm_i_string_hash (scm_from_locale_stringn (str, len));
+
+ h = (unsigned long) c + h * 37;
+ byte_idx += nbytes;
+ }
+
+ return h;
+#else
+ return scm_i_string_hash (scm_from_locale_stringn (str, len));
+#endif
+}
+
+unsigned long
+scm_i_latin1_string_hash (const char *str, size_t len)
+{
+ const scm_t_uint8 *ustr = (const scm_t_uint8 *) str;
+ size_t i = 0;
+ unsigned long h = 0;
+
+ if (len == (size_t) -1)
+ len = strlen (str);
+
+ for (; i < len; i++)
+ h = (unsigned long) ustr[i] + h * 37;
+
+ return h;
+}
+
+unsigned long
+scm_i_utf8_string_hash (const char *str, size_t len)
+{
+ const scm_t_uint8 *ustr = (const scm_t_uint8 *) str;
+ size_t byte_idx = 0;
+ unsigned long h = 0;
+
+ if (len == (size_t) -1)
+ len = strlen (str);
+
+ while (byte_idx < len)
+ {
+ ucs4_t c;
+ int nbytes;
+
+ nbytes = u8_mbtouc (&c, ustr + byte_idx, len - byte_idx);
+ if (nbytes == 0)
+ break;
+ else if (nbytes < 0)
+ /* Bad UTF-8; punt. */
+ return scm_i_string_hash (scm_from_utf8_stringn (str, len));
+
+ h = (unsigned long) c + h * 37;
+ byte_idx += nbytes;
+ }
+
+ return h;
+}
+
/* Dirk:FIXME:: why downcase for characters? (2x: scm_hasher, scm_ihashv) */
/* Dirk:FIXME:: scm_hasher could be made static. */
diff --git a/libguile/hash.h b/libguile/hash.h
index 2ebc05352..307748617 100644
--- a/libguile/hash.h
+++ b/libguile/hash.h
@@ -3,7 +3,7 @@
#ifndef SCM_HASH_H
#define SCM_HASH_H
-/* Copyright (C) 1995,1996,2000, 2006, 2008 Free Software Foundation, Inc.
+/* Copyright (C) 1995,1996,2000, 2006, 2008, 2011 Free Software Foundation, Inc.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
@@ -28,6 +28,13 @@
SCM_API unsigned long scm_string_hash (const unsigned char *str, size_t len);
+SCM_INTERNAL unsigned long scm_i_locale_string_hash (const char *str,
+ size_t len);
+SCM_INTERNAL unsigned long scm_i_latin1_string_hash (const char *str,
+ size_t len);
+SCM_INTERNAL unsigned long scm_i_utf8_string_hash (const char *str,
+ size_t len);
+
SCM_INTERNAL unsigned long scm_i_string_hash (SCM str);
SCM_API unsigned long scm_hasher (SCM obj, unsigned long n, size_t d);
SCM_API unsigned long scm_ihashq (SCM obj, unsigned long n);