diff options
author | Michael Gran <spk121@yahoo.com> | 2010-01-10 15:08:19 -0800 |
---|---|---|
committer | Michael Gran <spk121@yahoo.com> | 2010-01-10 15:16:55 -0800 |
commit | 15b6a6b284f00fa790ef003a9df8c8ae5a4d7d6a (patch) | |
tree | ae8ddbfbd9b909a36a3821b963167ca95752a928 | |
parent | f39ede00675f0d7dcdb864e91653c4dacb98e694 (diff) |
Add R6RS character names
R6RS adds new names for some of the control characters.
* libguile/chars.c (scm_r6rs_charnames, scm_r6rs_charnums)
(SCM_N_R6RS_CHARNAMES): new character name constants
(scm_alt_charnames, scm_alt_charnums): modified to remove duplicates
(scm_i_charname, scm_i_charname_to_char): use new constants
* test-suite/tests/chars.test (R5RS character names, R6RS character names):
new tests
* doc/ref/api-data.texi (Characters): updated
-rwxr-xr-x | doc/ref/api-data.texi | 45 | ||||
-rw-r--r-- | libguile/chars.c | 36 | ||||
-rw-r--r-- | test-suite/tests/chars.test | 18 |
3 files changed, 75 insertions, 24 deletions
diff --git a/doc/ref/api-data.texi b/doc/ref/api-data.texi index bcde697c4..3fc34c28f 100755 --- a/doc/ref/api-data.texi +++ b/doc/ref/api-data.texi @@ -1688,11 +1688,28 @@ the backslash of @code{#\}. Many of the non-printing characters, such as whitespace characters and control characters, also have names. -The most commonly used non-printing characters are space and -newline. Their character names are @code{#\space} and -@code{#\newline}. There are also names for all of the ``C0 control -characters'' (those with code points below 32). The following table -describes the names for each character. +The most commonly used non-printing characters have long character +names, described in the table below. + +@multitable {@code{#\backspace}} {Preferred} +@item Character Name @tab Codepoint +@item @code{#\nul} @tab U+0000 +@item @code{#\alarm} @tab u+0007 +@item @code{#\backspace} @tab U+0008 +@item @code{#\tab} @tab U+0009 +@item @code{#\linefeed} @tab U+000A +@item @code{#\newline} @tab U+000A +@item @code{#\vtab} @tab U+000B +@item @code{#\page} @tab U+000C +@item @code{#\return} @tab U+000D +@item @code{#\esc} @tab U+001B +@item @code{#\space} @tab U+0020 +@item @code{#\delete} @tab U+007F +@end multitable + +There are also short names for all of the ``C0 control characters'' +(those with code points below 32). The following table lists the short +name for each character. @multitable @columnfractions .25 .25 .25 .25 @item 0 = @code{#\nul} @@ -1730,24 +1747,16 @@ describes the names for each character. @item 32 = @code{#\sp} @end multitable -The ``delete'' character (code point U+007F) may be referred to with the -name @code{#\del}. +The short name for the ``delete'' character (code point U+007F) is +@code{#\del}. -One might note that the space character has two names -- -@code{#\space} and @code{#\sp} -- as does the newline character. -Several other non-printing characters have more than one name, for the -sake of compatibility with previous versions. +There are also a few alternative names left over for compatibility with +previous versions of Guile. @multitable {@code{#\backspace}} {Preferred} @item Alternate @tab Standard -@item @code{#\sp} @tab @code{#\space} @item @code{#\nl} @tab @code{#\newline} -@item @code{#\lf} @tab @code{#\newline} -@item @code{#\tab} @tab @code{#\ht} -@item @code{#\backspace} @tab @code{#\bs} -@item @code{#\return} @tab @code{#\cr} -@item @code{#\page} @tab @code{#\ff} -@item @code{#\np} @tab @code{#\ff} +@item @code{#\np} @tab @code{#\page} @item @code{#\null} @tab @code{#\nul} @end multitable diff --git a/libguile/chars.c b/libguile/chars.c index d2749f41f..fcc43f36a 100644 --- a/libguile/chars.c +++ b/libguile/chars.c @@ -536,11 +536,24 @@ static const char *const scm_r5rs_charnames[] = { }; static const scm_t_uint32 const scm_r5rs_charnums[] = { - 0x20, 0x0A + 0x20, 0x0a }; #define SCM_N_R5RS_CHARNAMES (sizeof (scm_r5rs_charnames) / sizeof (char *)) +static const char *const scm_r6rs_charnames[] = { + "nul", "alarm", "backspace", "tab", "linefeed", "vtab", "page", + "return", "esc", "delete" + /* 'space' and 'newline' are already included from the R5RS list. */ +}; + +static const scm_t_uint32 const scm_r6rs_charnums[] = { + 0x00, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, + 0x0d, 0x1b, 0x7f +}; + +#define SCM_N_R6RS_CHARNAMES (sizeof (scm_r6rs_charnames) / sizeof (char *)) + /* The abbreviated names for control characters. */ static const char *const scm_C0_control_charnames[] = { /* C0 controls */ @@ -562,11 +575,11 @@ static const scm_t_uint32 const scm_C0_control_charnums[] = { #define SCM_N_C0_CONTROL_CHARNAMES (sizeof (scm_C0_control_charnames) / sizeof (char *)) static const char *const scm_alt_charnames[] = { - "null", "backspace", "tab", "nl", "newline", "np", "page", "return", + "null", "nl", "np" }; static const scm_t_uint32 const scm_alt_charnums[] = { - 0x00, 0x08, 0x09, 0x0a, 0x0a, 0x0c, 0x0c, 0x0d + 0x00, 0x0a, 0x0c }; #define SCM_N_ALT_CHARNAMES (sizeof (scm_alt_charnames) / sizeof (char *)) @@ -583,6 +596,10 @@ scm_i_charname (SCM chr) if (scm_r5rs_charnums[c] == i) return scm_r5rs_charnames[c]; + for (c = 0; c < SCM_N_R6RS_CHARNAMES; c++) + if (scm_r6rs_charnums[c] == i) + return scm_r6rs_charnames[c]; + for (c = 0; c < SCM_N_C0_CONTROL_CHARNAMES; c++) if (scm_C0_control_charnums[c] == i) return scm_C0_control_charnames[c]; @@ -602,14 +619,21 @@ scm_i_charname_to_char (const char *charname, size_t charname_len) { size_t c; - /* The R5RS charnames. These are supposed to be case - insensitive. */ + /* The R5RS charnames. These are supposed to be case insensitive. */ for (c = 0; c < SCM_N_R5RS_CHARNAMES; c++) if ((strlen (scm_r5rs_charnames[c]) == charname_len) && (!strncasecmp (scm_r5rs_charnames[c], charname, charname_len))) return SCM_MAKE_CHAR (scm_r5rs_charnums[c]); - /* Then come the controls. These are not case sensitive. */ + /* The R6RS charnames. R6RS says that these should be case-sensitive. They + are left as case-insensitive to avoid confusion. */ + for (c = 0; c < SCM_N_R6RS_CHARNAMES; c++) + if ((strlen (scm_r6rs_charnames[c]) == charname_len) + && (!strncasecmp (scm_r6rs_charnames[c], charname, charname_len))) + return SCM_MAKE_CHAR (scm_r6rs_charnums[c]); + + /* Then come the controls. By Guile convention, these are not case + sensitive. */ for (c = 0; c < SCM_N_C0_CONTROL_CHARNAMES; c++) if ((strlen (scm_C0_control_charnames[c]) == charname_len) && (!strncasecmp (scm_C0_control_charnames[c], charname, charname_len))) diff --git a/test-suite/tests/chars.test b/test-suite/tests/chars.test index cd1572feb..509f07066 100644 --- a/test-suite/tests/chars.test +++ b/test-suite/tests/chars.test @@ -258,6 +258,24 @@ (with-test-prefix "charnames" + (pass-if "R5RS character names" + (and (eqv? #\space (integer->char #x20)) + (eqv? #\newline (integer->char #x0A)))) + + (pass-if "R6RS character names" + (and (eqv? #\nul (integer->char #x00)) + (eqv? #\alarm (integer->char #x07)) + (eqv? #\backspace (integer->char #x08)) + (eqv? #\tab (integer->char #x09)) + (eqv? #\linefeed (integer->char #x0A)) + (eqv? #\newline (integer->char #x0A)) + (eqv? #\vtab (integer->char #x0B)) + (eqv? #\page (integer->char #x0C)) + (eqv? #\return (integer->char #x0D)) + (eqv? #\esc (integer->char #x1B)) + (eqv? #\space (integer->char #x20)) + (eqv? #\delete (integer->char #x7F)))) + (pass-if "R5RS character names are case insensitive" (and (eqv? #\space #\ ) (eqv? #\SPACE #\ ) |