diff options
author | Mark H Weaver <mhw@netris.org> | 2019-05-06 21:11:26 -0400 |
---|---|---|
committer | Mark H Weaver <mhw@netris.org> | 2019-05-07 04:41:30 -0400 |
commit | 7c2b48a6bd4b7ccd043b2e19471b498dc66a073d (patch) | |
tree | c853c850dc88cce21bde07283b40bf64bd54699c /libguile | |
parent | 91b5b1631f87067a63cb0b50df5dbfce977c18c7 (diff) |
Strings, i18n: Limit the use of alloca to approximately 8 kilobytes.
* libguile/i18n.c (SCM_MAX_ALLOCA): New macro.
(SCM_STRING_TO_U32_BUF): Accept an additional variable to remember
whether we used malloc to allocate the buffer. Use malloc if the
allocation size is greater than SCM_MAX_ALLOCA.
(SCM_CLEANUP_U32_BUF): New macro.
(compare_u32_strings, compare_u32_strings_ci, str_to_case): Adapt.
* libguile/strings.c (SCM_MAX_ALLOCA): New macro.
(normalize_str, unistring_escapes_to_r6rs_escapes): Use malloc if the
allocation size is greater than SCM_MAX_ALLOCA.
* test-suite/tests/i18n.test, test-suite/tests/strings.test: Add tests.
Diffstat (limited to 'libguile')
-rw-r--r-- | libguile/i18n.c | 72 | ||||
-rw-r--r-- | libguile/strings.c | 43 |
2 files changed, 79 insertions, 36 deletions
diff --git a/libguile/i18n.c b/libguile/i18n.c index 6c87fdaae..5e6783700 100644 --- a/libguile/i18n.c +++ b/libguile/i18n.c @@ -40,6 +40,10 @@ #include <unicase.h> #include <unistr.h> +#ifndef SCM_MAX_ALLOCA +# define SCM_MAX_ALLOCA 4096 /* Max bytes per string to allocate via alloca */ +#endif + #if defined HAVE_NEWLOCALE && defined HAVE_STRCOLL_L && defined HAVE_USELOCALE /* The GNU thread-aware locale API is documented in ``Thread-Aware Locale Model, a Proposal'', by Ulrich Drepper: @@ -743,23 +747,35 @@ SCM_DEFINE (scm_locale_p, "locale?", 1, 0, 0, A similar API can be found in MzScheme starting from version 200: http://download.plt-scheme.org/chronology/mzmr200alpha14.html . */ -#define SCM_STRING_TO_U32_BUF(s1, c_s1) \ - do \ - { \ - if (scm_i_is_narrow_string (s1)) \ - { \ - size_t i, len; \ - const char *buf = scm_i_string_chars (s1); \ - \ - len = scm_i_string_length (s1); \ - c_s1 = alloca (sizeof (scm_t_wchar) * (len + 1)); \ - \ - for (i = 0; i < len; i ++) \ - c_s1[i] = (unsigned char ) buf[i]; \ - c_s1[len] = 0; \ - } \ - else \ - c_s1 = (scm_t_wchar *) scm_i_string_wide_chars (s1); \ +#define SCM_STRING_TO_U32_BUF(str, c_str, c_str_malloc_p) \ + do \ + { \ + if (scm_i_is_narrow_string (str)) \ + { \ + size_t i, len, bytes; \ + const char *buf = scm_i_string_chars (str); \ + \ + len = scm_i_string_length (str); \ + bytes = (len + 1) * sizeof (scm_t_wchar); \ + c_str_malloc_p = (bytes > SCM_MAX_ALLOCA); \ + c_str = c_str_malloc_p ? malloc (bytes) : alloca (bytes); \ + \ + for (i = 0; i < len; i ++) \ + c_str[i] = (unsigned char ) buf[i]; \ + c_str[len] = 0; \ + } \ + else \ + { \ + c_str_malloc_p = 0; \ + c_str = (scm_t_wchar *) scm_i_string_wide_chars (str); \ + } \ + } while (0) + +#define SCM_CLEANUP_U32_BUF(c_str, c_str_malloc_p) \ + do \ + { \ + if (c_str_malloc_p) \ + free (c_str); \ } while (0) @@ -773,10 +789,11 @@ compare_u32_strings (SCM s1, SCM s2, SCM locale, const char *func_name) int result; scm_t_locale c_locale; scm_t_wchar *c_s1, *c_s2; + int c_s1_malloc_p, c_s2_malloc_p; SCM_VALIDATE_OPTIONAL_LOCALE_COPY (3, locale, c_locale); - SCM_STRING_TO_U32_BUF (s1, c_s1); - SCM_STRING_TO_U32_BUF (s2, c_s2); + SCM_STRING_TO_U32_BUF (s1, c_s1, c_s1_malloc_p); + SCM_STRING_TO_U32_BUF (s2, c_s2, c_s2_malloc_p); if (c_locale) RUN_IN_LOCALE_SECTION (c_locale, @@ -786,6 +803,9 @@ compare_u32_strings (SCM s1, SCM s2, SCM locale, const char *func_name) result = u32_strcoll ((const scm_t_uint32 *) c_s1, (const scm_t_uint32 *) c_s2); + SCM_CLEANUP_U32_BUF(c_s1, c_s1_malloc_p); + SCM_CLEANUP_U32_BUF(c_s2, c_s2_malloc_p); + scm_remember_upto_here_2 (s1, s2); scm_remember_upto_here (locale); return result; @@ -828,10 +848,11 @@ compare_u32_strings_ci (SCM s1, SCM s2, SCM locale, const char *func_name) int result, ret = 0; scm_t_locale c_locale; scm_t_wchar *c_s1, *c_s2; + int c_s1_malloc_p, c_s2_malloc_p; SCM_VALIDATE_OPTIONAL_LOCALE_COPY (3, locale, c_locale); - SCM_STRING_TO_U32_BUF (s1, c_s1); - SCM_STRING_TO_U32_BUF (s2, c_s2); + SCM_STRING_TO_U32_BUF (s1, c_s1, c_s1_malloc_p); + SCM_STRING_TO_U32_BUF (s2, c_s2, c_s2_malloc_p); if (c_locale) RUN_IN_LOCALE_SECTION @@ -846,6 +867,9 @@ compare_u32_strings_ci (SCM s1, SCM s2, SCM locale, const char *func_name) (const scm_t_uint32 *) c_s2, &result); + SCM_CLEANUP_U32_BUF(c_s1, c_s1_malloc_p); + SCM_CLEANUP_U32_BUF(c_s2, c_s2_malloc_p); + if (SCM_UNLIKELY (ret != 0)) { errno = ret; @@ -1212,13 +1236,13 @@ str_to_case (SCM str, scm_t_locale c_locale, scm_t_wchar *c_str, *c_buf; scm_t_uint32 *c_convstr; size_t len, convlen; - int ret; + int ret, c_str_malloc_p; SCM convstr; len = scm_i_string_length (str); if (len == 0) return scm_nullstr; - SCM_STRING_TO_U32_BUF (str, c_str); + SCM_STRING_TO_U32_BUF (str, c_str, c_str_malloc_p); if (c_locale) RUN_IN_LOCALE_SECTION (c_locale, ret = @@ -1230,6 +1254,8 @@ str_to_case (SCM str, scm_t_locale c_locale, u32_locale_tocase ((scm_t_uint32 *) c_str, len, &c_convstr, &convlen, func); + SCM_CLEANUP_U32_BUF(c_str, c_str_malloc_p); + scm_remember_upto_here (str); if (SCM_UNLIKELY (ret != 0)) diff --git a/libguile/strings.c b/libguile/strings.c index 5a150278d..e5c7f87d6 100644 --- a/libguile/strings.c +++ b/libguile/strings.c @@ -45,6 +45,10 @@ #include "libguile/validate.h" #include "libguile/private-options.h" +#ifndef SCM_MAX_ALLOCA +# define SCM_MAX_ALLOCA 4096 /* Max bytes per string to allocate via alloca */ +#endif + /* {Strings} @@ -1808,6 +1812,7 @@ static void unistring_escapes_to_r6rs_escapes (char *buf, size_t *lenp) { char *before, *after; + int malloc_p; size_t i, j; /* The worst case is if the input string contains all 4-digit hex escapes. "\uXXXX" (six characters) becomes "\xXXXX;" (seven characters) */ @@ -1815,7 +1820,8 @@ unistring_escapes_to_r6rs_escapes (char *buf, size_t *lenp) size_t nzeros, ndigits; before = buf; - after = alloca (max_out_len); + malloc_p = (max_out_len > SCM_MAX_ALLOCA); + after = malloc_p ? malloc (max_out_len) : alloca (max_out_len); i = 0; j = 0; while (i < *lenp) @@ -1873,6 +1879,8 @@ unistring_escapes_to_r6rs_escapes (char *buf, size_t *lenp) } *lenp = j; memcpy (before, after, j); + if (malloc_p) + free (after); } char * @@ -2313,28 +2321,37 @@ normalize_str (SCM string, uninorm_t form) { SCM ret; scm_t_uint32 *w_str; + scm_t_uint32 *w_norm_str; scm_t_wchar *cbuf; - size_t rlen, len = scm_i_string_length (string); + int malloc_p; + size_t norm_len, len = scm_i_string_length (string); if (scm_i_is_narrow_string (string)) { - size_t i; + size_t i, bytes; const char *buf = scm_i_string_chars (string); - - w_str = alloca (sizeof (scm_t_wchar) * (len + 1)); - + + bytes = (len + 1) * sizeof (scm_t_wchar); + malloc_p = (bytes > SCM_MAX_ALLOCA); + w_str = malloc_p ? malloc (bytes) : alloca (bytes); + for (i = 0; i < len; i ++) w_str[i] = (unsigned char) buf[i]; w_str[len] = 0; } - else - w_str = (scm_t_uint32 *) scm_i_string_wide_chars (string); + else + { + malloc_p = 0; + w_str = (scm_t_uint32 *) scm_i_string_wide_chars (string); + } - w_str = u32_normalize (form, w_str, len, NULL, &rlen); - - ret = scm_i_make_wide_string (rlen, &cbuf, 0); - u32_cpy ((scm_t_uint32 *) cbuf, w_str, rlen); - free (w_str); + w_norm_str = u32_normalize (form, w_str, len, NULL, &norm_len); + + ret = scm_i_make_wide_string (norm_len, &cbuf, 0); + u32_cpy ((scm_t_uint32 *) cbuf, w_norm_str, norm_len); + free (w_norm_str); + if (malloc_p) + free (w_str); scm_i_try_narrow_string (ret); |