summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorKenichi Handa <handa@m17n.org>2011-07-07 07:43:48 +0900
committerKenichi Handa <handa@m17n.org>2011-07-07 07:43:48 +0900
commitc805dec0b5fa81b5c9f2b724e2ec12a17d723aca (patch)
treec29a8490c976fdf4dbf64ef1b13a57f7d1110cc1 /src
parent5c62d133468c354b47a1643092add8292e084765 (diff)
Add C interface for Unicode character property table.
Diffstat (limited to 'src')
-rw-r--r--src/ChangeLog42
-rw-r--r--src/character.h39
-rw-r--r--src/chartab.c579
-rw-r--r--src/composite.c5
-rw-r--r--src/dispextern.h6
-rw-r--r--src/font.c5
6 files changed, 598 insertions, 78 deletions
diff --git a/src/ChangeLog b/src/ChangeLog
index 78fca60ca2..1a56298ee2 100644
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,45 @@
+2011-07-06 Kenichi Handa <handa@m17n.org>
+
+ * character.h (unicode_category_t): New enum type.
+
+ * chartab.c (uniprop_decoder_t, uniprop_encoder_t): New types.
+ (Qchar_code_property_table): New variable.
+ (UNIPROP_TABLE_P, UNIPROP_GET_DECODER)
+ (UNIPROP_COMPRESSED_FORM_P): New macros.
+ (char_table_ascii): Uncompress the compressed values.
+ (sub_char_table_ref): New arg is_uniprop. Callers changed.
+ Uncompress the compressed values.
+ (sub_char_table_ref_and_range): Likewise.
+ (char_table_ref_and_range): Uncompress the compressed values.
+ (sub_char_table_set): New arg is_uniprop. Callers changed.
+ Uncompress the compressed values.
+ (sub_char_table_set_range): Args changed. Callers changed.
+ (char_table_set_range): Adjuted for the above change.
+ (map_sub_char_table): Delete args default_val and parent. Add arg
+ top. Give decoded values to a Lisp function.
+ (map_char_table): Adjusted for the above change. Give decoded
+ values to a Lisp function. Gcpro more variables.
+ (uniprop_table_uncompress)
+ (uniprop_decode_value_run_length): New functions.
+ (uniprop_decoder, uniprop_decoder_count): New variables.
+ (uniprop_get_decoder, uniprop_encode_value_character)
+ (uniprop_encode_value_run_length, uniprop_encode_value_numeric):
+ New functions.
+ (uniprop_encoder, uniprop_encoder_count): New variables.
+ (uniprop_get_encoder, uniprop_table)
+ (Funicode_property_table_internal, Fget_unicode_property_internal)
+ (Fput_unicode_property_internal): New functions.
+ (syms_of_chartab): DEFSYM Qchar_code_property_table, defsubr
+ Sunicode_property_table_internal, Sget_unicode_property_internal,
+ and Sput_unicode_property_internal. Defvar_lisp
+ char-code-property-alist.
+
+ * composite.c (CHAR_COMPOSABLE_P): Adjusted for the change of
+ Vunicode_category_table.
+
+ * font.c (font_range): Adjusted for the change of
+ Vunicode_category_table.
+
2011-06-22 Paul Eggert <eggert@cs.ucla.edu>
Fixes for GLYPH_DEBUG found by GCC 4.6.0 static checking.
diff --git a/src/character.h b/src/character.h
index 9a45e7f003..d8e77c5095 100644
--- a/src/character.h
+++ b/src/character.h
@@ -597,6 +597,45 @@ along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
: (c) <= 0xDFFF ? 2 \
: 0)
+/* Data type for Unicode general category.
+
+ The order of members must be in sync with the 8th element of the
+ member of unidata-prop-alist (in admin/unidata/unidata-getn.el) for
+ Unicode character property `general-category'. */
+
+typedef enum {
+ UNICODE_CATEGORY_UNKNOWN = 0,
+ UNICODE_CATEGORY_Lu,
+ UNICODE_CATEGORY_Ll,
+ UNICODE_CATEGORY_Lt,
+ UNICODE_CATEGORY_Lm,
+ UNICODE_CATEGORY_Lo,
+ UNICODE_CATEGORY_Mn,
+ UNICODE_CATEGORY_Mc,
+ UNICODE_CATEGORY_Me,
+ UNICODE_CATEGORY_Nd,
+ UNICODE_CATEGORY_Nl,
+ UNICODE_CATEGORY_No,
+ UNICODE_CATEGORY_Pc,
+ UNICODE_CATEGORY_Pd,
+ UNICODE_CATEGORY_Ps,
+ UNICODE_CATEGORY_Pe,
+ UNICODE_CATEGORY_Pi,
+ UNICODE_CATEGORY_Pf,
+ UNICODE_CATEGORY_Po,
+ UNICODE_CATEGORY_Sm,
+ UNICODE_CATEGORY_Sc,
+ UNICODE_CATEGORY_Sk,
+ UNICODE_CATEGORY_So,
+ UNICODE_CATEGORY_Zs,
+ UNICODE_CATEGORY_Zl,
+ UNICODE_CATEGORY_Zp,
+ UNICODE_CATEGORY_Cc,
+ UNICODE_CATEGORY_Cf,
+ UNICODE_CATEGORY_Cs,
+ UNICODE_CATEGORY_Co,
+ UNICODE_CATEGORY_Cn
+} unicode_category_t;
extern int char_resolve_modifier_mask (int);
extern int char_string (unsigned, unsigned char *);
diff --git a/src/chartab.c b/src/chartab.c
index ed5b238646..4a9a76bdd6 100644
--- a/src/chartab.c
+++ b/src/chartab.c
@@ -53,7 +53,38 @@ static const int chartab_bits[4] =
#define CHARTAB_IDX(c, depth, min_char) \
(((c) - (min_char)) >> chartab_bits[(depth)])
+
+/* Preamble for uniprop (Unicode character property) tables. See the
+ comment of "Unicode character property tables". */
+
+/* Purpose of uniprop tables. */
+static Lisp_Object Qchar_code_property_table;
+
+/* Types of decoder and encoder functions for uniprop values. */
+typedef Lisp_Object (*uniprop_decoder_t) (Lisp_Object, Lisp_Object);
+typedef Lisp_Object (*uniprop_encoder_t) (Lisp_Object, Lisp_Object);
+
+static Lisp_Object uniprop_table_uncompress (Lisp_Object, int);
+static uniprop_decoder_t uniprop_get_decoder (Lisp_Object);
+
+/* 1 iff TABLE is a uniprop table. */
+#define UNIPROP_TABLE_P(TABLE) \
+ (EQ (XCHAR_TABLE (TABLE)->purpose, Qchar_code_property_table) \
+ && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (TABLE)) == 5)
+
+/* Return a decoder for values in the uniprop table TABLE. */
+#define UNIPROP_GET_DECODER(TABLE) \
+ (UNIPROP_TABLE_P (TABLE) ? uniprop_get_decoder (TABLE) : NULL)
+/* Nonzero iff OBJ is a string representing uniprop values of 128
+ succeeding characters (the bottom level of a char-table) by a
+ compressed format. We are sure that no property value has a string
+ starting with '\001' nor '\002'. */
+#define UNIPROP_COMPRESSED_FORM_P(OBJ) \
+ (STRINGP (OBJ) && SCHARS (OBJ) > 0 \
+ && ((SREF (OBJ, 0) == 1 || (SREF (OBJ, 0) == 2))))
+
+
DEFUN ("make-char-table", Fmake_char_table, Smake_char_table, 1, 2, 0,
doc: /* Return a newly created char-table, with purpose PURPOSE.
Each element is initialized to INIT, which defaults to nil.
@@ -107,7 +138,7 @@ make_sub_char_table (int depth, int min_char, Lisp_Object defalt)
static Lisp_Object
char_table_ascii (Lisp_Object table)
{
- Lisp_Object sub;
+ Lisp_Object sub, val;
sub = XCHAR_TABLE (table)->contents[0];
if (! SUB_CHAR_TABLE_P (sub))
@@ -115,7 +146,10 @@ char_table_ascii (Lisp_Object table)
sub = XSUB_CHAR_TABLE (sub)->contents[0];
if (! SUB_CHAR_TABLE_P (sub))
return sub;
- return XSUB_CHAR_TABLE (sub)->contents[0];
+ val = XSUB_CHAR_TABLE (sub)->contents[0];
+ if (UNIPROP_TABLE_P (table) && UNIPROP_COMPRESSED_FORM_P (val))
+ val = uniprop_table_uncompress (sub, 0);
+ return val;
}
static Lisp_Object
@@ -169,16 +203,19 @@ copy_char_table (Lisp_Object table)
}
static Lisp_Object
-sub_char_table_ref (Lisp_Object table, int c)
+sub_char_table_ref (Lisp_Object table, int c, int is_uniprop)
{
struct Lisp_Sub_Char_Table *tbl = XSUB_CHAR_TABLE (table);
int depth = XINT (tbl->depth);
int min_char = XINT (tbl->min_char);
Lisp_Object val;
+ int idx = CHARTAB_IDX (c, depth, min_char);
- val = tbl->contents[CHARTAB_IDX (c, depth, min_char)];
+ val = tbl->contents[idx];
+ if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (val))
+ val = uniprop_table_uncompress (table, idx);
if (SUB_CHAR_TABLE_P (val))
- val = sub_char_table_ref (val, c);
+ val = sub_char_table_ref (val, c, is_uniprop);
return val;
}
@@ -198,7 +235,7 @@ char_table_ref (Lisp_Object table, int c)
{
val = tbl->contents[CHARTAB_IDX (c, 0, 0)];
if (SUB_CHAR_TABLE_P (val))
- val = sub_char_table_ref (val, c);
+ val = sub_char_table_ref (val, c, UNIPROP_TABLE_P (table));
}
if (NILP (val))
{
@@ -210,7 +247,8 @@ char_table_ref (Lisp_Object table, int c)
}
static Lisp_Object
-sub_char_table_ref_and_range (Lisp_Object table, int c, int *from, int *to, Lisp_Object defalt)
+sub_char_table_ref_and_range (Lisp_Object table, int c, int *from, int *to,
+ Lisp_Object defalt, int is_uniprop)
{
struct Lisp_Sub_Char_Table *tbl = XSUB_CHAR_TABLE (table);
int depth = XINT (tbl->depth);
@@ -219,8 +257,10 @@ sub_char_table_ref_and_range (Lisp_Object table, int c, int *from, int *to, Lisp
Lisp_Object val;
val = tbl->contents[chartab_idx];
+ if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (val))
+ val = uniprop_table_uncompress (table, chartab_idx);
if (SUB_CHAR_TABLE_P (val))
- val = sub_char_table_ref_and_range (val, c, from, to, defalt);
+ val = sub_char_table_ref_and_range (val, c, from, to, defalt, is_uniprop);
else if (NILP (val))
val = defalt;
@@ -232,8 +272,11 @@ sub_char_table_ref_and_range (Lisp_Object table, int c, int *from, int *to, Lisp
c = min_char + idx * chartab_chars[depth] - 1;
idx--;
this_val = tbl->contents[idx];
+ if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (this_val))
+ this_val = uniprop_table_uncompress (table, idx);
if (SUB_CHAR_TABLE_P (this_val))
- this_val = sub_char_table_ref_and_range (this_val, c, from, to, defalt);
+ this_val = sub_char_table_ref_and_range (this_val, c, from, to, defalt,
+ is_uniprop);
else if (NILP (this_val))
this_val = defalt;
@@ -251,8 +294,11 @@ sub_char_table_ref_and_range (Lisp_Object table, int c, int *from, int *to, Lisp
chartab_idx++;
this_val = tbl->contents[chartab_idx];
+ if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (this_val))
+ this_val = uniprop_table_uncompress (table, chartab_idx);
if (SUB_CHAR_TABLE_P (this_val))
- this_val = sub_char_table_ref_and_range (this_val, c, from, to, defalt);
+ this_val = sub_char_table_ref_and_range (this_val, c, from, to, defalt,
+ is_uniprop);
else if (NILP (this_val))
this_val = defalt;
if (! EQ (this_val, val))
@@ -277,17 +323,20 @@ char_table_ref_and_range (Lisp_Object table, int c, int *from, int *to)
struct Lisp_Char_Table *tbl = XCHAR_TABLE (table);
int chartab_idx = CHARTAB_IDX (c, 0, 0), idx;
Lisp_Object val;
+ int is_uniprop = UNIPROP_TABLE_P (table);
val = tbl->contents[chartab_idx];
if (*from < 0)
*from = 0;
if (*to < 0)
*to = MAX_CHAR;
+ if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (val))
+ val = uniprop_table_uncompress (table, chartab_idx);
if (SUB_CHAR_TABLE_P (val))
- val = sub_char_table_ref_and_range (val, c, from, to, tbl->defalt);
+ val = sub_char_table_ref_and_range (val, c, from, to, tbl->defalt,
+ is_uniprop);
else if (NILP (val))
val = tbl->defalt;
-
idx = chartab_idx;
while (*from < idx * chartab_chars[0])
{
@@ -296,9 +345,11 @@ char_table_ref_and_range (Lisp_Object table, int c, int *from, int *to)
c = idx * chartab_chars[0] - 1;
idx--;
this_val = tbl->contents[idx];
+ if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (this_val))
+ this_val = uniprop_table_uncompress (table, idx);
if (SUB_CHAR_TABLE_P (this_val))
this_val = sub_char_table_ref_and_range (this_val, c, from, to,
- tbl->defalt);
+ tbl->defalt, is_uniprop);
else if (NILP (this_val))
this_val = tbl->defalt;
@@ -315,9 +366,11 @@ char_table_ref_and_range (Lisp_Object table, int c, int *from, int *to)
chartab_idx++;
c = chartab_idx * chartab_chars[0];
this_val = tbl->contents[chartab_idx];
+ if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (this_val))
+ this_val = uniprop_table_uncompress (table, chartab_idx);
if (SUB_CHAR_TABLE_P (this_val))
this_val = sub_char_table_ref_and_range (this_val, c, from, to,
- tbl->defalt);
+ tbl->defalt, is_uniprop);
else if (NILP (this_val))
this_val = tbl->defalt;
if (! EQ (this_val, val))
@@ -332,7 +385,7 @@ char_table_ref_and_range (Lisp_Object table, int c, int *from, int *to)
static void
-sub_char_table_set (Lisp_Object table, int c, Lisp_Object val)
+sub_char_table_set (Lisp_Object table, int c, Lisp_Object val, int is_uniprop)
{
struct Lisp_Sub_Char_Table *tbl = XSUB_CHAR_TABLE (table);
int depth = XINT ((tbl)->depth);
@@ -347,11 +400,17 @@ sub_char_table_set (Lisp_Object table, int c, Lisp_Object val)
sub = tbl->contents[i];
if (! SUB_CHAR_TABLE_P (sub))
{
- sub = make_sub_char_table (depth + 1,
- min_char + i * chartab_chars[depth], sub);
- tbl->contents[i] = sub;
+ if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (sub))
+ sub = uniprop_table_uncompress (table, i);
+ else
+ {
+ sub = make_sub_char_table (depth + 1,
+ min_char + i * chartab_chars[depth],
+ sub);
+ tbl->contents[i] = sub;
+ }
}
- sub_char_table_set (sub, c, val);
+ sub_char_table_set (sub, c, val, is_uniprop);
}
}
@@ -376,7 +435,7 @@ char_table_set (Lisp_Object table, int c, Lisp_Object val)
sub = make_sub_char_table (1, i * chartab_chars[0], sub);
tbl->contents[i] = sub;
}
- sub_char_table_set (sub, c, val);
+ sub_char_table_set (sub, c, val, UNIPROP_TABLE_P (table));
if (ASCII_CHAR_P (c))
tbl->ascii = char_table_ascii (table);
}
@@ -384,30 +443,40 @@ char_table_set (Lisp_Object table, int c, Lisp_Object val)
}
static void
-sub_char_table_set_range (Lisp_Object *table, int depth, int min_char, int from, int to, Lisp_Object val)
+sub_char_table_set_range (Lisp_Object table, int from, int to, Lisp_Object val,
+ int is_uniprop)
{
- int max_char = min_char + chartab_chars[depth] - 1;
-
- if (depth == 3 || (from <= min_char && to >= max_char))
- *table = val;
- else
+ struct Lisp_Sub_Char_Table *tbl = XSUB_CHAR_TABLE (table);
+ int depth = XINT ((tbl)->depth);
+ int min_char = XINT ((tbl)->min_char);
+ int chars_in_block = chartab_chars[depth];
+ int i, c, lim = chartab_size[depth];
+
+ if (from < min_char)
+ from = min_char;
+ i = CHARTAB_IDX (from, depth, min_char);
+ c = min_char + chars_in_block * i;
+ for (; i <= lim; i++, c += chars_in_block)
{
- int i;
- unsigned j;
-
- depth++;
- if (! SUB_CHAR_TABLE_P (*table))
- *table = make_sub_char_table (depth, min_char, *table);
- if (from < min_char)
- from = min_char;
- if (to > max_char)
- to = max_char;
- i = CHARTAB_IDX (from, depth, min_char);
- j = CHARTAB_IDX (to, depth, min_char);
- min_char += chartab_chars[depth] * i;
- for (j++; i < j; i++, min_char += chartab_chars[depth])
- sub_char_table_set_range (XSUB_CHAR_TABLE (*table)->contents + i,
- depth, min_char, from, to, val);
+ if (c > to)
+ break;
+ if (from <= c && c + chars_in_block - 1 <= to)
+ tbl->contents[i] = val;
+ else
+ {
+ Lisp_Object sub = tbl->contents[i];
+ if (! SUB_CHAR_TABLE_P (sub))
+ {
+ if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (sub))
+ sub = uniprop_table_uncompress (table, i);
+ else
+ {
+ sub = make_sub_char_table (depth + 1, c, sub);
+ tbl->contents[i] = sub;
+ }
+ }
+ sub_char_table_set_range (sub, from, to, val, is_uniprop);
+ }
}
}
@@ -417,16 +486,33 @@ char_table_set_range (Lisp_Object table, int from, int to, Lisp_Object val)
{
struct Lisp_Char_Table *tbl = XCHAR_TABLE (table);
Lisp_Object *contents = tbl->contents;
- int i;
if (from == to)
char_table_set (table, from, val);
else
{
- unsigned lim = to / chartab_chars[0] + 1;
- for (i = CHARTAB_IDX (from, 0, 0); i < lim; i++)
- sub_char_table_set_range (contents + i, 0, i * chartab_chars[0],
- from, to, val);
+ int is_uniprop = UNIPROP_TABLE_P (table);
+ int lim = CHARTAB_IDX (to, 0, 0);
+ int i, c;
+
+ for (i = CHARTAB_IDX (from, 0, 0), c = 0; i <= lim;
+ i++, c += chartab_chars[0])
+ {
+ if (c > to)
+ break;
+ if (from <= c && c + chartab_chars[0] - 1 <= to)
+ tbl->contents[i] = val;
+ else
+ {
+ Lisp_Object sub = tbl->contents[i];
+ if (! SUB_CHAR_TABLE_P (sub))
+ {
+ sub = make_sub_char_table (1, i * chartab_chars[0], sub);
+ tbl->contents[i] = sub;
+ }
+ sub_char_table_set_range (sub, from, to, val, is_uniprop);
+ }
+ }
if (ASCII_CHAR_P (from))
tbl->ascii = char_table_ascii (table);
}
@@ -504,6 +590,8 @@ DEFUN ("set-char-table-extra-slot", Fset_char_table_extra_slot,
(Lisp_Object char_table, Lisp_Object n, Lisp_Object value)
{
CHECK_CHAR_TABLE (char_table);
+ if (EQ (XCHAR_TABLE (char_table)->purpose, Qchar_code_property_table))
+ error ("Can't change extra-slot of char-code-property-table");
CHECK_NUMBER (n);
if (XINT (n) < 0
|| XINT (n) >= CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (char_table)))
@@ -532,8 +620,9 @@ a cons of character codes (for characters in the range), or a character code. *
CHECK_CHARACTER_CAR (range);
CHECK_CHARACTER_CDR (range);
- val = char_table_ref_and_range (char_table, XFASTINT (XCAR (range)),
- &from, &to);
+ from = XFASTINT (XCAR (range));
+ to = XFASTINT (XCDR (range));
+ val = char_table_ref_and_range (char_table, from, &from, &to);
/* Not yet implemented. */
}
else
@@ -655,8 +744,7 @@ equivalent and can be merged. It defaults to `equal'. */)
/* Map C_FUNCTION or FUNCTION over TABLE (top or sub char-table),
calling it for each character or group of characters that share a
value. RANGE is a cons (FROM . TO) specifying the range of target
- characters, VAL is a value of FROM in TABLE, DEFAULT_VAL is the
- default value of the char-table, PARENT is the parent of the
+ characters, VAL is a value of FROM in TABLE, TOP is the top
char-table.
ARG is passed to C_FUNCTION when that is called.
@@ -669,7 +757,7 @@ equivalent and can be merged. It defaults to `equal'. */)
static Lisp_Object
map_sub_char_table (void (*c_function) (Lisp_Object, Lisp_Object, Lisp_Object),
Lisp_Object function, Lisp_Object table, Lisp_Object arg, Lisp_Object val,
- Lisp_Object range, Lisp_Object default_val, Lisp_Object parent)
+ Lisp_Object range, Lisp_Object top)
{
/* Pointer to the elements of TABLE. */
Lisp_Object *contents;
@@ -681,6 +769,8 @@ map_sub_char_table (void (*c_function) (Lisp_Object, Lisp_Object, Lisp_Object),
int chars_in_block;
int from = XINT (XCAR (range)), to = XINT (XCDR (range));
int i, c;
+ int is_uniprop = UNIPROP_TABLE_P (top);
+ uniprop_decoder_t decoder = UNIPROP_GET_DECODER (top);
if (SUB_CHAR_TABLE_P (table))
{
@@ -710,28 +800,33 @@ map_sub_char_table (void (*c_function) (Lisp_Object, Lisp_Object, Lisp_Object),
for (c = min_char + chars_in_block * i; c <= max_char;
i++, c += chars_in_block)
{
- Lisp_Object this = contents[i];
+ Lisp_Object this = (SUB_CHAR_TABLE_P (table)
+ ? XSUB_CHAR_TABLE (table)->contents[i]
+ : XCHAR_TABLE (table)->contents[i]);
int nextc = c + chars_in_block;
+ if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (this))
+ this = uniprop_table_uncompress (table, i);
if (SUB_CHAR_TABLE_P (this))
{
if (to >= nextc)
XSETCDR (range, make_number (nextc - 1));
val = map_sub_char_table (c_function, function, this, arg,
- val, range, default_val, parent);
+ val, range, top);
}
else
{
if (NILP (this))
- this = default_val;
+ this = XCHAR_TABLE (top)->defalt;
if (!EQ (val, this))
{
int different_value = 1;
if (NILP (val))
{
- if (! NILP (parent))
+ if (! NILP (XCHAR_TABLE (top)->parent))
{
+ Lisp_Object parent = XCHAR_TABLE (top)->parent;
Lisp_Object temp = XCHAR_TABLE (parent)->parent;
/* This is to get a value of FROM in PARENT
@@ -742,8 +837,7 @@ map_sub_char_table (void (*c_function) (Lisp_Object, Lisp_Object, Lisp_Object),
XSETCDR (range, make_number (c - 1));
val = map_sub_char_table (c_function, function,
parent, arg, val, range,
- XCHAR_TABLE (parent)->defalt,
- XCHAR_TABLE (parent)->parent);
+ parent);
if (EQ (val, this))
different_value = 0;
}
@@ -756,14 +850,22 @@ map_sub_char_table (void (*c_function) (Lisp_Object, Lisp_Object, Lisp_Object),
if (c_function)
(*c_function) (arg, XCAR (range), val);
else
- call2 (function, XCAR (range), val);
+ {
+ if (decoder)
+ val = decoder (top, val);
+ call2 (function, XCAR (range), val);
+ }
}
else
{
if (c_function)
(*c_function) (arg, range, val);
else
- call2 (function, range, val);
+ {
+ if (decoder)
+ val = decoder (top, val);
+ call2 (function, range, val);
+ }
}
}
val = this;
@@ -783,35 +885,39 @@ map_sub_char_table (void (*c_function) (Lisp_Object, Lisp_Object, Lisp_Object),
ARG is passed to C_FUNCTION when that is called. */
void
-map_char_table (void (*c_function) (Lisp_Object, Lisp_Object, Lisp_Object), Lisp_Object function, Lisp_Object table, Lisp_Object arg)
+map_char_table (void (*c_function) (Lisp_Object, Lisp_Object, Lisp_Object),
+ Lisp_Object function, Lisp_Object table, Lisp_Object arg)
{
- Lisp_Object range, val;
- struct gcpro gcpro1, gcpro2, gcpro3;
+ Lisp_Object range, val, parent;
+ struct gcpro gcpro1, gcpro2, gcpro3, gcpro4;
+ uniprop_decoder_t decoder = UNIPROP_GET_DECODER (table);
range = Fcons (make_number (0), make_number (MAX_CHAR));
- GCPRO3 (table, arg, range);
+ parent = XCHAR_TABLE (table)->parent;
+
+ GCPRO4 (table, arg, range, parent);
val = XCHAR_TABLE (table)->ascii;
if (SUB_CHAR_TABLE_P (val))
val = XSUB_CHAR_TABLE (val)->contents[0];
val = map_sub_char_table (c_function, function, table, arg, val, range,
- XCHAR_TABLE (table)->defalt,
- XCHAR_TABLE (table)->parent);
+ table);
+
/* If VAL is nil and TABLE has a parent, we must consult the parent
recursively. */
while (NILP (val) && ! NILP (XCHAR_TABLE (table)->parent))
{
- Lisp_Object parent = XCHAR_TABLE (table)->parent;
- Lisp_Object temp = XCHAR_TABLE (parent)->parent;
+ Lisp_Object temp;
int from = XINT (XCAR (range));
+ parent = XCHAR_TABLE (table)->parent;
+ temp = XCHAR_TABLE (parent)->parent;
/* This is to get a value of FROM in PARENT without checking the
parent of PARENT. */
XCHAR_TABLE (parent)->parent = Qnil;
val = CHAR_TABLE_REF (parent, from);
XCHAR_TABLE (parent)->parent = temp;
val = map_sub_char_table (c_function, function, parent, arg, val, range,
- XCHAR_TABLE (parent)->defalt,
- XCHAR_TABLE (parent)->parent);
+ parent);
table = parent;
}
@@ -822,14 +928,22 @@ map_char_table (void (*c_function) (Lisp_Object, Lisp_Object, Lisp_Object), Lisp
if (c_function)
(*c_function) (arg, XCAR (range), val);
else
- call2 (function, XCAR (range), val);
+ {
+ if (decoder)
+ val = decoder (table, val);
+ call2 (function, XCAR (range), val);
+ }
}
else
{
if (c_function)
(*c_function) (arg, range, val);
else
- call2 (function, range, val);
+ {
+ if (decoder)
+ val = decoder (table, val);
+ call2 (function, range, val);
+ }
}
}
@@ -984,9 +1098,315 @@ map_char_table_for_charset (void (*c_function) (Lisp_Object, Lisp_Object),
}
+/* Unicode character property tables.
+
+ This section provides a convenient and efficient way to get a
+ Unicode character property from C code (from Lisp, you must use
+ get-char-code-property).
+
+ The typical usage is to get a char-table for a specific property at
+ a proper initialization time as this:
+
+ Lisp_Object bidi_class_table = uniprop_table (intern ("bidi-class"));
+
+ and get a property value for character CH as this:
+
+ Lisp_Object bidi_class = CHAR_TABLE_REF (CH, bidi_class_table);
+
+ In this case, what you actually get is an index number to the
+ vector of property values (symbols nil, L, R, etc).
+
+ A table for Unicode character property has these characteristics:
+
+ o The purpose is `char-code-property-table', which implies that the
+ table has 5 extra slots.
+
+ o The second extra slot is a Lisp function, an index (integer) to
+ the array uniprop_decoder[], or nil. If it is a Lisp function, we
+ can't use such a table from C (at the moment). If it is nil, it
+ means that we don't have to decode values.
+
+ o The third extra slot is a Lisp function, an index (integer) to
+ the array uniprop_enncoder[], or nil. If it is a Lisp function, we
+ can't use such a table from C (at the moment). If it is nil, it
+ means that we don't have to encode values. */
+
+
+/* Uncompress the IDXth element of sub-char-table TABLE. */
+
+static Lisp_Object
+uniprop_table_uncompress (Lisp_Object table, int idx)
+{
+ Lisp_Object val = XSUB_CHAR_TABLE (table)->contents[idx];
+ int min_char = (XINT (XSUB_CHAR_TABLE (table)->min_char)
+ + chartab_chars[2] * idx);
+ Lisp_Object sub = make_sub_char_table (3, min_char, Qnil);
+ struct Lisp_Sub_Char_Table *subtbl = XSUB_CHAR_TABLE (sub);
+ const unsigned char *p, *pend;
+ int i;
+
+ XSUB_CHAR_TABLE (table)->contents[idx] = sub;
+ p = SDATA (val), pend = p + SBYTES (val);
+ if (*p == 1)
+ {
+ /* SIMPLE TABLE */
+ p++;
+ idx = STRING_CHAR_ADVANCE (p);
+ while (p < pend && idx < chartab_chars[2])
+ {
+ int v = STRING_CHAR_ADVANCE (p);
+ subtbl->contents[idx++] = v > 0 ? make_number (v) : Qnil;
+ }
+ }
+ else if (*p == 2)
+ {
+ /* RUN-LENGTH TABLE */
+ p++;
+ for (idx = 0; p < pend; )
+ {
+ int v = STRING_CHAR_ADVANCE (p);
+ int count = 1;
+ int len;
+
+ if (p < pend)
+ {
+ count = STRING_CHAR_AND_LENGTH (p, len);
+ if (count < 128)
+ count = 1;
+ else
+ {
+ count -= 128;
+ p += len;
+ }
+ }
+ while (count-- > 0)
+ subtbl->contents[idx++] = make_number (v);
+ }
+ }
+/* It seems that we don't need this function because C code won't need
+ to get a property that is compressed in this form. */
+#if 0
+ else if (*p == 0)
+ {
+ /* WORD-LIST TABLE */
+ }
+#endif
+ return sub;
+}
+
+
+/* Decode VALUE as an elemnet of char-table TABLE. */
+
+static Lisp_Object
+uniprop_decode_value_run_length (Lisp_Object table, Lisp_Object value)
+{
+ if (VECTORP (XCHAR_TABLE (table)->extras[4]))
+ {
+ Lisp_Object valvec = XCHAR_TABLE (table)->extras[4];
+
+ if (XINT (value) >= 0 && XINT (value) < ASIZE (valvec))
+ value = AREF (valvec, XINT (value));
+ }
+ return value;
+}
+
+static uniprop_decoder_t uniprop_decoder [] =
+ { uniprop_decode_value_run_length };
+
+static int uniprop_decoder_count
+ = (sizeof uniprop_decoder) / sizeof (uniprop_decoder[0]);
+
+
+/* Return the decoder of char-table TABLE or nil if none. */
+
+static uniprop_decoder_t
+uniprop_get_decoder (Lisp_Object table)
+{
+ int i;
+
+ if (! INTEGERP (XCHAR_TABLE (table)->extras[1]))
+ return NULL;
+ i = XINT (XCHAR_TABLE (table)->extras[1]);
+ if (i < 0 || i >= uniprop_decoder_count)
+ return NULL;
+ return uniprop_decoder[i];
+}
+
+
+/* Encode VALUE as an element of char-table TABLE which contains
+ characters as elements. */
+
+static Lisp_Object
+uniprop_encode_value_character (Lisp_Object table, Lisp_Object value)
+{
+ if (! NILP (value) && ! CHARACTERP (value))
+ wrong_type_argument (Qintegerp, value);
+ return value;
+}
+
+
+/* Encode VALUE as an element of char-table TABLE which adopts RUN-LENGTH
+ compression. */
+
+static Lisp_Object
+uniprop_encode_value_run_length (Lisp_Object table, Lisp_Object value)
+{
+ Lisp_Object *value_table = XVECTOR (XCHAR_TABLE (table)->extras[4])->contents;
+ int i, size = ASIZE (XCHAR_TABLE (table)->extras[4]);
+
+ for (i = 0; i < size; i++)
+ if (EQ (value, value_table[i]))
+ break;
+ if (i == size)
+ wrong_type_argument (build_string ("Unicode property value"), value);
+ return make_number (i);
+}
+
+
+/* Encode VALUE as an element of char-table TABLE which adopts RUN-LENGTH
+ compression and contains numbers as elements . */
+
+static Lisp_Object
+uniprop_encode_value_numeric (Lisp_Object table, Lisp_Object value)
+{
+ Lisp_Object *value_table = XVECTOR (XCHAR_TABLE (table)->extras[4])->contents;
+ int i, size = ASIZE (XCHAR_TABLE (table)->extras[4]);
+
+ CHECK_NUMBER (value);
+ for (i = 0; i < size; i++)
+ if (EQ (value, value_table[i]))
+ break;
+ value = make_number (i);
+ if (i == size)
+ {
+ Lisp_Object args[2];
+
+ args[0] = XCHAR_TABLE (table)->extras[4];
+ args[1] = Fmake_vector (make_number (1), value);
+ XCHAR_TABLE (table)->extras[4] = Fvconcat (2, args);
+ }
+ return make_number (i);
+}
+
+static uniprop_encoder_t uniprop_encoder[] =
+ { uniprop_encode_value_character,
+ uniprop_encode_value_run_length,
+ uniprop_encode_value_numeric };
+
+static int uniprop_encoder_count
+ = (sizeof uniprop_encoder) / sizeof (uniprop_encoder[0]);
+
+
+/* Return the encoder of char-table TABLE or nil if none. */
+
+static uniprop_decoder_t
+uniprop_get_encoder (Lisp_Object table)
+{
+ int i;
+
+ if (! INTEGERP (XCHAR_TABLE (table)->extras[2]))
+ return NULL;
+ i = XINT (XCHAR_TABLE (table)->extras[2]);
+ if (i < 0 || i >= uniprop_encoder_count)
+ return NULL;
+ return uniprop_encoder[i];
+}
+
+/* Return a char-table for Unicode character property PROP. This
+ function may load a Lisp file and thus may cause
+ garbage-collection. */
+
+Lisp_Object
+uniprop_table (Lisp_Object prop)
+{
+ Lisp_Object val, table, result;
+
+ val = Fassq (prop, Vchar_code_property_alist);
+ if (! CONSP (val))
+ return Qnil;
+ table = XCDR (val);
+ if (STRINGP (table))
+ {
+ struct gcpro gcpro1;
+ GCPRO1 (val);
+ result = Fload (concat2 (build_string ("international/"), table),
+ Qt, Qt, Qt, Qt);
+ UNGCPRO;
+ if (NILP (result))
+ return Qnil;
+ table = XCDR (val);
+ }
+ if (! CHAR_TABLE_P (table)
+ || ! UNIPROP_TABLE_P (table))
+ return Qnil;
+ val = XCHAR_TABLE (table)->extras[1];
+ if (INTEGERP (val)
+ ? (XINT (val) < 0 || XINT (val) >= uniprop_decoder_count)
+ : ! NILP (val))
+ return Qnil;
+ /* Prepare ASCII values in advance for CHAR_TABLE_REF. */
+ XCHAR_TABLE (table)->ascii = char_table_ascii (table);
+ return table;
+}
+
+DEFUN ("unicode-property-table-internal", Funicode_property_table_internal,
+ Sunicode_property_table_internal, 1, 1, 0,
+ doc: /* Return a char-table for Unicode character property PROP.
+Use `get-unicode-property-internal' and
+`put-unicode-property-internal' instead of `aref' and `aset' to get
+and put an element value. */)
+ (Lisp_Object prop)
+{
+ Lisp_Object table = uniprop_table (prop);
+
+ if (CHAR_TABLE_P (table))
+ return table;
+ return Fcdr (Fassq (prop, Vchar_code_property_alist));
+}
+
+DEFUN ("get-unicode-property-internal", Fget_unicode_property_internal,
+ Sget_unicode_property_internal, 2, 2, 0,
+ doc: /* Return an element of CHAR-TABLE for character CH.
+CHAR-TABLE must be what returned by `unicode-property-table-internal'. */)
+ (Lisp_Object char_table, Lisp_Object ch)
+{
+ Lisp_Object val;
+ uniprop_decoder_t decoder;
+
+ CHECK_CHAR_TABLE (char_table);
+ CHECK_CHARACTER (ch);
+ if (! UNIPROP_TABLE_P (char_table))
+ error ("Invalid Unicode property table");
+ val = CHAR_TABLE_REF (char_table, XINT (ch));
+ decoder = uniprop_get_decoder (char_table);
+ return (decoder ? decoder (char_table, val) : val);
+}
+
+DEFUN ("put-unicode-property-internal", Fput_unicode_property_internal,
+ Sput_unicode_property_internal, 3, 3, 0,
+ doc: /* Set an element of CHAR-TABLE for character CH to VALUE.
+CHAR-TABLE must be what returned by `unicode-property-table-internal'. */)
+ (Lisp_Object char_table, Lisp_Object ch, Lisp_Object value)
+{
+ uniprop_encoder_t encoder;
+
+ CHECK_CHAR_TABLE (char_table);
+ CHECK_CHARACTER (ch);
+ if (! UNIPROP_TABLE_P (char_table))
+ error ("Invalid Unicode property table");
+ encoder = uniprop_get_encoder (char_table);
+ if (encoder)
+ value = encoder (char_table, value);
+ CHAR_TABLE_SET (char_table, XINT (ch), value);
+ return Qnil;
+}
+
+
void
syms_of_chartab (void)
{
+ DEFSYM (Qchar_code_property_table, "char-code-property-table");
+
defsubr (&Smake_char_table);
defsubr (&Schar_table_parent);
defsubr (&Schar_table_subtype);
@@ -998,4 +1418,19 @@ syms_of_chartab (void)
defsubr (&Sset_char_table_default);
defsubr (&Soptimize_char_table);
defsubr (&Smap_char_table);
+ defsubr (&Sunicode_property_table_internal);
+ defsubr (&Sget_unicode_property_internal);
+ defsubr (&Sput_unicode_property_internal);
+
+ /* Each element has the form (PROP . TABLE).
+ PROP is a symbol representing a character property.
+ TABLE is a char-table containing the property value for each character.
+ TABLE may be a name of file to load to build a char-table.
+ This variable should be modified only through
+ `define-char-code-property'. */
+
+ DEFVAR_LISP ("char-code-property-alist", Vchar_code_property_alist,
+ doc: /* Alist of character property name vs char-table containing property values.
+Internal use only. */);
+ Vchar_code_property_alist = Qnil;
}
diff --git a/src/composite.c b/src/composite.c
index 796c5a58de..7123b505e6 100644
--- a/src/composite.c
+++ b/src/composite.c
@@ -976,9 +976,8 @@ static int _work_char;
((C) > ' ' \
&& ((C) == 0x200C || (C) == 0x200D \
|| (_work_val = CHAR_TABLE_REF (Vunicode_category_table, (C)), \
- (SYMBOLP (_work_val) \
- && (_work_char = SDATA (SYMBOL_NAME (_work_val))[0]) != 'C' \
- && _work_char != 'Z'))))
+ (INTEGERP (_work_val) \
+ && (XINT (_work_val) <= UNICODE_CATEGORY_So)))))
/* Update cmp_it->stop_pos to the next position after CHARPOS (and
BYTEPOS) where character composition may happen. If BYTEPOS is
diff --git a/src/dispextern.h b/src/dispextern.h
index 57fa09d3bf..c0a67690a5 100644
--- a/src/dispextern.h
+++ b/src/dispextern.h
@@ -1773,7 +1773,11 @@ extern int face_change_count;
/* Data type for describing the bidirectional character types. The
first 7 must be at the beginning, because they are the only values
valid in the `bidi_type' member of `struct glyph'; we only reserve
- 3 bits for it, so we cannot use there values larger than 7. */
+ 3 bits for it, so we cannot use there values larger than 7.
+
+ The order of members must be in sync with the 8th element of the
+ member of unidata-prop-alist (in admin/unidata/unidata-getn.el) for
+ Unicode character property `bidi-class'. */
typedef enum {
UNKNOWN_BT = 0,
STRONG_L, /* strong left-to-right */
diff --git a/src/font.c b/src/font.c
index 14390335f3..5aff20b134 100644
--- a/src/font.c
+++ b/src/font.c
@@ -3739,8 +3739,9 @@ font_range (EMACS_INT pos, EMACS_INT *limit, struct window *w, struct face *face
else
FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, string, pos, pos_byte);
category = CHAR_TABLE_REF (Vunicode_category_table, c);
- if (EQ (category, QCf)
- || CHAR_VARIATION_SELECTOR_P (c))
+ if (INTEGERP (category)
+ && (XINT (category) == UNICODE_CATEGORY_Cf
+ || CHAR_VARIATION_SELECTOR_P (c)))
continue;
if (NILP (font_object))
{