summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Gran <spk121@yahoo.com>2010-01-10 15:08:19 -0800
committerMichael Gran <spk121@yahoo.com>2010-01-10 15:16:55 -0800
commit15b6a6b284f00fa790ef003a9df8c8ae5a4d7d6a (patch)
treeae8ddbfbd9b909a36a3821b963167ca95752a928
parentf39ede00675f0d7dcdb864e91653c4dacb98e694 (diff)
Add R6RS character names
R6RS adds new names for some of the control characters. * libguile/chars.c (scm_r6rs_charnames, scm_r6rs_charnums) (SCM_N_R6RS_CHARNAMES): new character name constants (scm_alt_charnames, scm_alt_charnums): modified to remove duplicates (scm_i_charname, scm_i_charname_to_char): use new constants * test-suite/tests/chars.test (R5RS character names, R6RS character names): new tests * doc/ref/api-data.texi (Characters): updated
-rwxr-xr-xdoc/ref/api-data.texi45
-rw-r--r--libguile/chars.c36
-rw-r--r--test-suite/tests/chars.test18
3 files changed, 75 insertions, 24 deletions
diff --git a/doc/ref/api-data.texi b/doc/ref/api-data.texi
index bcde697c4..3fc34c28f 100755
--- a/doc/ref/api-data.texi
+++ b/doc/ref/api-data.texi
@@ -1688,11 +1688,28 @@ the backslash of @code{#\}.
Many of the non-printing characters, such as whitespace characters and
control characters, also have names.
-The most commonly used non-printing characters are space and
-newline. Their character names are @code{#\space} and
-@code{#\newline}. There are also names for all of the ``C0 control
-characters'' (those with code points below 32). The following table
-describes the names for each character.
+The most commonly used non-printing characters have long character
+names, described in the table below.
+
+@multitable {@code{#\backspace}} {Preferred}
+@item Character Name @tab Codepoint
+@item @code{#\nul} @tab U+0000
+@item @code{#\alarm} @tab u+0007
+@item @code{#\backspace} @tab U+0008
+@item @code{#\tab} @tab U+0009
+@item @code{#\linefeed} @tab U+000A
+@item @code{#\newline} @tab U+000A
+@item @code{#\vtab} @tab U+000B
+@item @code{#\page} @tab U+000C
+@item @code{#\return} @tab U+000D
+@item @code{#\esc} @tab U+001B
+@item @code{#\space} @tab U+0020
+@item @code{#\delete} @tab U+007F
+@end multitable
+
+There are also short names for all of the ``C0 control characters''
+(those with code points below 32). The following table lists the short
+name for each character.
@multitable @columnfractions .25 .25 .25 .25
@item 0 = @code{#\nul}
@@ -1730,24 +1747,16 @@ describes the names for each character.
@item 32 = @code{#\sp}
@end multitable
-The ``delete'' character (code point U+007F) may be referred to with the
-name @code{#\del}.
+The short name for the ``delete'' character (code point U+007F) is
+@code{#\del}.
-One might note that the space character has two names --
-@code{#\space} and @code{#\sp} -- as does the newline character.
-Several other non-printing characters have more than one name, for the
-sake of compatibility with previous versions.
+There are also a few alternative names left over for compatibility with
+previous versions of Guile.
@multitable {@code{#\backspace}} {Preferred}
@item Alternate @tab Standard
-@item @code{#\sp} @tab @code{#\space}
@item @code{#\nl} @tab @code{#\newline}
-@item @code{#\lf} @tab @code{#\newline}
-@item @code{#\tab} @tab @code{#\ht}
-@item @code{#\backspace} @tab @code{#\bs}
-@item @code{#\return} @tab @code{#\cr}
-@item @code{#\page} @tab @code{#\ff}
-@item @code{#\np} @tab @code{#\ff}
+@item @code{#\np} @tab @code{#\page}
@item @code{#\null} @tab @code{#\nul}
@end multitable
diff --git a/libguile/chars.c b/libguile/chars.c
index d2749f41f..fcc43f36a 100644
--- a/libguile/chars.c
+++ b/libguile/chars.c
@@ -536,11 +536,24 @@ static const char *const scm_r5rs_charnames[] = {
};
static const scm_t_uint32 const scm_r5rs_charnums[] = {
- 0x20, 0x0A
+ 0x20, 0x0a
};
#define SCM_N_R5RS_CHARNAMES (sizeof (scm_r5rs_charnames) / sizeof (char *))
+static const char *const scm_r6rs_charnames[] = {
+ "nul", "alarm", "backspace", "tab", "linefeed", "vtab", "page",
+ "return", "esc", "delete"
+ /* 'space' and 'newline' are already included from the R5RS list. */
+};
+
+static const scm_t_uint32 const scm_r6rs_charnums[] = {
+ 0x00, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c,
+ 0x0d, 0x1b, 0x7f
+};
+
+#define SCM_N_R6RS_CHARNAMES (sizeof (scm_r6rs_charnames) / sizeof (char *))
+
/* The abbreviated names for control characters. */
static const char *const scm_C0_control_charnames[] = {
/* C0 controls */
@@ -562,11 +575,11 @@ static const scm_t_uint32 const scm_C0_control_charnums[] = {
#define SCM_N_C0_CONTROL_CHARNAMES (sizeof (scm_C0_control_charnames) / sizeof (char *))
static const char *const scm_alt_charnames[] = {
- "null", "backspace", "tab", "nl", "newline", "np", "page", "return",
+ "null", "nl", "np"
};
static const scm_t_uint32 const scm_alt_charnums[] = {
- 0x00, 0x08, 0x09, 0x0a, 0x0a, 0x0c, 0x0c, 0x0d
+ 0x00, 0x0a, 0x0c
};
#define SCM_N_ALT_CHARNAMES (sizeof (scm_alt_charnames) / sizeof (char *))
@@ -583,6 +596,10 @@ scm_i_charname (SCM chr)
if (scm_r5rs_charnums[c] == i)
return scm_r5rs_charnames[c];
+ for (c = 0; c < SCM_N_R6RS_CHARNAMES; c++)
+ if (scm_r6rs_charnums[c] == i)
+ return scm_r6rs_charnames[c];
+
for (c = 0; c < SCM_N_C0_CONTROL_CHARNAMES; c++)
if (scm_C0_control_charnums[c] == i)
return scm_C0_control_charnames[c];
@@ -602,14 +619,21 @@ scm_i_charname_to_char (const char *charname, size_t charname_len)
{
size_t c;
- /* The R5RS charnames. These are supposed to be case
- insensitive. */
+ /* The R5RS charnames. These are supposed to be case insensitive. */
for (c = 0; c < SCM_N_R5RS_CHARNAMES; c++)
if ((strlen (scm_r5rs_charnames[c]) == charname_len)
&& (!strncasecmp (scm_r5rs_charnames[c], charname, charname_len)))
return SCM_MAKE_CHAR (scm_r5rs_charnums[c]);
- /* Then come the controls. These are not case sensitive. */
+ /* The R6RS charnames. R6RS says that these should be case-sensitive. They
+ are left as case-insensitive to avoid confusion. */
+ for (c = 0; c < SCM_N_R6RS_CHARNAMES; c++)
+ if ((strlen (scm_r6rs_charnames[c]) == charname_len)
+ && (!strncasecmp (scm_r6rs_charnames[c], charname, charname_len)))
+ return SCM_MAKE_CHAR (scm_r6rs_charnums[c]);
+
+ /* Then come the controls. By Guile convention, these are not case
+ sensitive. */
for (c = 0; c < SCM_N_C0_CONTROL_CHARNAMES; c++)
if ((strlen (scm_C0_control_charnames[c]) == charname_len)
&& (!strncasecmp (scm_C0_control_charnames[c], charname, charname_len)))
diff --git a/test-suite/tests/chars.test b/test-suite/tests/chars.test
index cd1572feb..509f07066 100644
--- a/test-suite/tests/chars.test
+++ b/test-suite/tests/chars.test
@@ -258,6 +258,24 @@
(with-test-prefix "charnames"
+ (pass-if "R5RS character names"
+ (and (eqv? #\space (integer->char #x20))
+ (eqv? #\newline (integer->char #x0A))))
+
+ (pass-if "R6RS character names"
+ (and (eqv? #\nul (integer->char #x00))
+ (eqv? #\alarm (integer->char #x07))
+ (eqv? #\backspace (integer->char #x08))
+ (eqv? #\tab (integer->char #x09))
+ (eqv? #\linefeed (integer->char #x0A))
+ (eqv? #\newline (integer->char #x0A))
+ (eqv? #\vtab (integer->char #x0B))
+ (eqv? #\page (integer->char #x0C))
+ (eqv? #\return (integer->char #x0D))
+ (eqv? #\esc (integer->char #x1B))
+ (eqv? #\space (integer->char #x20))
+ (eqv? #\delete (integer->char #x7F))))
+
(pass-if "R5RS character names are case insensitive"
(and (eqv? #\space #\ )
(eqv? #\SPACE #\ )