summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEli Zaretskii <eliz@gnu.org>2011-06-08 21:01:56 +0300
committerEli Zaretskii <eliz@gnu.org>2011-06-08 21:01:56 +0300
commit87e67904f15fda542426c9159c95a19142aecbad (patch)
treee5ccb1ee1fdb9e135f2f8a7f3e09a382cdd2beef
parent9d68c2a9abaddbed86a1b2f166625c93e8f88326 (diff)
Started work on string reordering. Just compiled, not yet tested.
src/bidi.c (bidi_paragraph_info): Delete unused struct. (bidi_cache_idx, bidi_cache_last_idx): Declare EMACS_INT. (bidi_cache_start): New variable. (bidi_cache_reset): Reset bidi_cache_idx to bidi_cache_start, not to zero. (bidi_cache_fetch_state, bidi_cache_search) (bidi_cache_find_level_change, bidi_cache_iterator_state) (bidi_cache_find, bidi_peek_at_next_level) (bidi_level_of_next_char, bidi_find_other_level_edge) (bidi_move_to_visually_next): Compare cache index with bidi_cache_start rather than with zero. (bidi_fetch_char): Accept new argument STRING; all callers changed. Support iteration over a string. (bidi_paragraph_init, bidi_resolve_explicit_1) (bidi_resolve_explicit, bidi_resolve_weak) (bidi_level_of_next_char, bidi_move_to_visually_next): Support iteration over a string. (bidi_set_sor_type, bidi_resolve_explicit_1) (bidi_resolve_explicit, bidi_type_of_next_char): ignore_bn_limit can now be zero (for strings); special values 0 and -1 were changed to -1 and -2, respectively. (bidi_char_at_pos): New function. (bidi_paragraph_init, bidi_resolve_explicit, bidi_resolve_weak): Call it instead of FETCH_MULTIBYTE_CHAR. (bidi_move_to_visually_next): Abort if charpos or bytepos were not initialized to valid values. (bidi_init_it): Don't initialize charpos and bytepos with invalid values. src/xdisp.c (compute_display_string_pos) (compute_display_string_end): Accept additional argument STRING. (init_iterator, reseat_1): Initialize bidi_it->string.s to NULL. (reseat_to_string): Initialize bidi_it->string.s and bidi_it->string.schars. src/dispextern.h (struct bidi_string_data): New structure. (struct bidi_it): New member `string'. Make flag members be 1-bit fields, and put them last in the struct. (compute_display_string_pos, compute_display_string_end): Update prototypes.
-rw-r--r--src/ChangeLog43
-rw-r--r--src/bidi.c328
-rw-r--r--src/dispextern.h24
-rw-r--r--src/xdisp.c39
4 files changed, 312 insertions, 122 deletions
diff --git a/src/ChangeLog b/src/ChangeLog
index 63353b31d4..c303839b14 100644
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,46 @@
+2011-06-08 Eli Zaretskii <eliz@gnu.org>
+
+ * bidi.c (bidi_paragraph_info): Delete unused struct.
+ (bidi_cache_idx, bidi_cache_last_idx): Declare EMACS_INT.
+ (bidi_cache_start): New variable.
+ (bidi_cache_reset): Reset bidi_cache_idx to bidi_cache_start, not
+ to zero.
+ (bidi_cache_fetch_state, bidi_cache_search)
+ (bidi_cache_find_level_change, bidi_cache_iterator_state)
+ (bidi_cache_find, bidi_peek_at_next_level)
+ (bidi_level_of_next_char, bidi_find_other_level_edge)
+ (bidi_move_to_visually_next): Compare cache index with
+ bidi_cache_start rather than with zero.
+ (bidi_fetch_char): Accept new argument STRING; all callers
+ changed. Support iteration over a string.
+ (bidi_paragraph_init, bidi_resolve_explicit_1)
+ (bidi_resolve_explicit, bidi_resolve_weak)
+ (bidi_level_of_next_char, bidi_move_to_visually_next): Support
+ iteration over a string.
+ (bidi_set_sor_type, bidi_resolve_explicit_1)
+ (bidi_resolve_explicit, bidi_type_of_next_char): ignore_bn_limit
+ can now be zero (for strings); special values 0 and -1 were
+ changed to -1 and -2, respectively.
+ (bidi_char_at_pos): New function.
+ (bidi_paragraph_init, bidi_resolve_explicit, bidi_resolve_weak):
+ Call it instead of FETCH_MULTIBYTE_CHAR.
+ (bidi_move_to_visually_next): Abort if charpos or bytepos were not
+ initialized to valid values.
+ (bidi_init_it): Don't initialize charpos and bytepos with invalid
+ values.
+
+ * xdisp.c (compute_display_string_pos)
+ (compute_display_string_end): Accept additional argument STRING.
+ (init_iterator, reseat_1): Initialize bidi_it->string.s to NULL.
+ (reseat_to_string): Initialize bidi_it->string.s and
+ bidi_it->string.schars.
+
+ * dispextern.h (struct bidi_string_data): New structure.
+ (struct bidi_it): New member `string'. Make flag members be 1-bit
+ fields, and put them last in the struct.
+ (compute_display_string_pos, compute_display_string_end): Update
+ prototypes.
+
2011-06-04 Eli Zaretskii <eliz@gnu.org>
* bidi.c (bidi_level_of_next_char): clen should be EMACS_NT, not int.
diff --git a/src/bidi.c b/src/bidi.c
index ccf21827fc..ac950a9000 100644
--- a/src/bidi.c
+++ b/src/bidi.c
@@ -66,16 +66,6 @@ static Lisp_Object bidi_type_table, bidi_mirror_table;
#define RLM_CHAR 0x200F
#define BIDI_EOB -1
-/* Local data structures. (Look in dispextern.h for the rest.) */
-
-/* What we need to know about the current paragraph. */
-struct bidi_paragraph_info {
- EMACS_INT start_bytepos; /* byte position where it begins */
- EMACS_INT end_bytepos; /* byte position where it ends */
- int embedding_level; /* its basic embedding level */
- bidi_dir_t base_dir; /* its base direction */
-};
-
/* Data type for describing the bidirectional character categories. */
typedef enum {
UNKNOWN_BC,
@@ -265,16 +255,28 @@ bidi_copy_it (struct bidi_it *to, struct bidi_it *from)
static struct bidi_it *bidi_cache;
static size_t bidi_cache_size = 0;
static size_t elsz = sizeof (struct bidi_it);
-static int bidi_cache_idx; /* next unused cache slot */
-static int bidi_cache_last_idx; /* slot of last cache hit */
-
+static EMACS_INT bidi_cache_idx; /* next unused cache slot */
+static EMACS_INT bidi_cache_last_idx; /* slot of last cache hit */
+static EMACS_INT bidi_cache_start = 0; /* start of cache for this
+ "stack" level */
+
+/* Reset the cache state to the empty state. We only reset the part
+ of the cache relevant to iteration of the current object. Previous
+ objects, which are pushed on the display iterator's stack, are left
+ intact. This is called when the cached information is no more
+ useful for the current iteration, e.g. when we were reseated to a
+ new position on the same object. */
static INLINE void
bidi_cache_reset (void)
{
- bidi_cache_idx = 0;
+ bidi_cache_idx = bidi_cache_start;
bidi_cache_last_idx = -1;
}
+/* Shrink the cache to its minimal size. Called when we init the bidi
+ iterator for reordering a buffer or a string that does not come
+ from display properties, because that means all the previously
+ cached info is of no further use. */
static INLINE void
bidi_cache_shrink (void)
{
@@ -292,7 +294,7 @@ bidi_cache_fetch_state (int idx, struct bidi_it *bidi_it)
{
int current_scan_dir = bidi_it->scan_dir;
- if (idx < 0 || idx >= bidi_cache_idx)
+ if (idx < bidi_cache_start || idx >= bidi_cache_idx)
abort ();
bidi_copy_it (bidi_it, &bidi_cache[idx]);
@@ -333,7 +335,7 @@ bidi_cache_search (EMACS_INT charpos, int level, int dir)
if (dir < 0)
{
/* Linear search for now; FIXME! */
- for (i = i_start; i >= 0; i--)
+ for (i = i_start; i >= bidi_cache_start; i--)
if (bidi_cache[i].charpos <= charpos
&& charpos < bidi_cache[i].charpos + bidi_cache[i].nchars
&& (level == -1 || bidi_cache[i].resolved_level <= level))
@@ -355,8 +357,9 @@ bidi_cache_search (EMACS_INT charpos, int level, int dir)
/* Find a cached state where the resolved level changes to a value
that is lower than LEVEL, and return its cache slot index. DIR is
the direction to search, starting with the last used cache slot.
- BEFORE, if non-zero, means return the index of the slot that is
- ``before'' the level change in the search direction. That is,
+ If DIR is zero, we search backwards from the last occupied cache
+ slot. BEFORE, if non-zero, means return the index of the slot that
+ is ``before'' the level change in the search direction. That is,
given the cached levels like this:
1122333442211
@@ -381,7 +384,7 @@ bidi_cache_find_level_change (int level, int dir, int before)
if (dir < 0)
{
- while (i >= incr)
+ while (i >= bidi_cache_start + incr)
{
if (bidi_cache[i - incr].resolved_level >= 0
&& bidi_cache[i - incr].resolved_level < level)
@@ -427,13 +430,13 @@ bidi_cache_iterator_state (struct bidi_it *bidi_it, int resolved)
/* Character positions should correspond to cache positions 1:1.
If we are outside the range of cached positions, the cache is
useless and must be reset. */
- if (idx > 0 &&
+ if (idx > bidi_cache_start &&
(bidi_it->charpos > (bidi_cache[idx - 1].charpos
+ bidi_cache[idx - 1].nchars)
|| bidi_it->charpos < bidi_cache[0].charpos))
{
bidi_cache_reset ();
- idx = 0;
+ idx = bidi_cache_start;
}
if (bidi_it->nchars <= 0)
abort ();
@@ -470,7 +473,7 @@ bidi_cache_find (EMACS_INT charpos, int level, struct bidi_it *bidi_it)
{
int i = bidi_cache_search (charpos, level, bidi_it->scan_dir);
- if (i >= 0)
+ if (i >= bidi_cache_start)
{
bidi_dir_t current_scan_dir = bidi_it->scan_dir;
@@ -488,7 +491,7 @@ bidi_cache_find (EMACS_INT charpos, int level, struct bidi_it *bidi_it)
static INLINE int
bidi_peek_at_next_level (struct bidi_it *bidi_it)
{
- if (bidi_cache_idx == 0 || bidi_cache_last_idx == -1)
+ if (bidi_cache_idx == bidi_cache_start || bidi_cache_last_idx == -1)
abort ();
return bidi_cache[bidi_cache_last_idx + bidi_it->scan_dir].resolved_level;
}
@@ -550,7 +553,7 @@ bidi_set_sor_type (struct bidi_it *bidi_it, int level_before, int level_after)
bidi_it->prev_for_neutral.bytepos = bidi_it->bytepos;
bidi_it->next_for_neutral.type = bidi_it->next_for_neutral.type_after_w1 =
bidi_it->next_for_neutral.orig_type = UNKNOWN_BT;
- bidi_it->ignore_bn_limit = 0; /* meaning it's unknown */
+ bidi_it->ignore_bn_limit = -1; /* meaning it's unknown */
}
/* Perform initializations for reordering a new line of bidi text. */
@@ -571,6 +574,40 @@ bidi_line_init (struct bidi_it *bidi_it)
bidi_cache_reset ();
}
+/* Count bytes in multibyte string S between BEG/BEGBYTE and END. BEG
+ and END are zero-based character positions in S, BEGBYTE is byte
+ position corresponding to BEG. */
+static inline EMACS_INT
+bidi_count_bytes (const unsigned char *s, const EMACS_INT beg,
+ const EMACS_INT begbyte, const EMACS_INT end)
+{
+ EMACS_INT pos = beg;
+ const unsigned char *p = s + begbyte, *start = p;
+
+ if (!CHAR_HEAD_P (*p))
+ abort ();
+
+ while (pos < end)
+ {
+ p += BYTES_BY_CHAR_HEAD (*p);
+ pos++;
+ }
+
+ return p - start;
+}
+
+/* Fetch and returns the character at byte position BYTEPOS. If S is
+ non-NULL, fetch the character from string S; otherwise fetch the
+ character from the current buffer. */
+static inline int
+bidi_char_at_pos (EMACS_INT bytepos, const unsigned char *s)
+{
+ if (s)
+ return STRING_CHAR (s + bytepos);
+ else
+ return FETCH_MULTIBYTE_CHAR (bytepos);
+}
+
/* Fetch and return the character at BYTEPOS/CHARPOS. If that
character is covered by a display string, treat the entire run of
covered characters as a single character u+FFFC, and return their
@@ -578,26 +615,28 @@ bidi_line_init (struct bidi_it *bidi_it)
character position of the next display string, or -1 if not yet
computed. When the next character is at or beyond that position,
the function updates DISP_POS with the position of the next display
- string. */
-static INLINE int
+ string. STRING->s is the string to iterate, or NULL if iterating over
+ a buffer. */
+static inline int
bidi_fetch_char (EMACS_INT bytepos, EMACS_INT charpos, EMACS_INT *disp_pos,
+ struct bidi_string_data *string,
int frame_window_p, EMACS_INT *ch_len, EMACS_INT *nchars)
{
int ch;
+ EMACS_INT endpos = string->s ? string->schars : ZV;
- /* FIXME: Support strings in addition to buffers. */
/* If we got past the last known position of display string, compute
- the position of the next one. That position could be at BYTEPOS. */
- if (charpos < ZV && charpos > *disp_pos)
- *disp_pos = compute_display_string_pos (charpos, frame_window_p);
+ the position of the next one. That position could be at CHARPOS. */
+ if (charpos < endpos && charpos > *disp_pos)
+ *disp_pos = compute_display_string_pos (charpos, string, frame_window_p);
/* Fetch the character at BYTEPOS. */
- if (bytepos >= ZV_BYTE)
+ if (charpos >= endpos)
{
ch = BIDI_EOB;
*ch_len = 1;
*nchars = 1;
- *disp_pos = ZV;
+ *disp_pos = endpos;
}
else if (charpos >= *disp_pos)
{
@@ -608,24 +647,38 @@ bidi_fetch_char (EMACS_INT bytepos, EMACS_INT charpos, EMACS_INT *disp_pos,
if (charpos > *disp_pos)
abort ();
/* Return the Unicode Object Replacement Character to represent
- the entire run of characters covered by the display
- string. */
+ the entire run of characters covered by the display string. */
ch = 0xFFFC;
- disp_end_pos = compute_display_string_end (*disp_pos);
+ disp_end_pos = compute_display_string_end (*disp_pos, string);
*nchars = disp_end_pos - *disp_pos;
- *ch_len = CHAR_TO_BYTE (disp_end_pos) - bytepos;
+ if (string->s)
+ *ch_len = bidi_count_bytes (string->s, *disp_pos, bytepos,
+ disp_end_pos);
+ else
+ *ch_len = CHAR_TO_BYTE (disp_end_pos) - bytepos;
}
else
{
- ch = FETCH_MULTIBYTE_CHAR (bytepos);
+ if (string->s)
+ {
+ EMACS_INT len;
+
+ ch = STRING_CHAR_AND_LENGTH (string->s + bytepos, len);
+ *ch_len = len;
+ }
+ else
+ {
+ ch = FETCH_MULTIBYTE_CHAR (bytepos);
+ *ch_len = CHAR_BYTES (ch);
+ }
*nchars = 1;
- *ch_len = CHAR_BYTES (ch);
}
/* If we just entered a run of characters covered by a display
string, compute the position of the next display string. */
- if (charpos + *nchars <= ZV && charpos + *nchars > *disp_pos)
- *disp_pos = compute_display_string_pos (charpos + *nchars, frame_window_p);
+ if (charpos + *nchars <= endpos && charpos + *nchars > *disp_pos)
+ *disp_pos = compute_display_string_pos (charpos + *nchars, string,
+ frame_window_p);
return ch;
}
@@ -670,13 +723,19 @@ void
bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, int no_default_p)
{
EMACS_INT bytepos = bidi_it->bytepos;
+ int string_p = bidi_it->string.s != NULL;
EMACS_INT pstartbyte;
+ /* Note that begbyte is a byte position, while end is a character
+ position. Yes, this is ugly, but we are trying to avoid costly
+ calls to BYTE_TO_CHAR and its ilk. */
+ EMACS_INT begbyte = string_p ? 0 : BEGV_BYTE;
+ EMACS_INT end = string_p ? bidi_it->string.schars : ZV;
/* Special case for an empty buffer. */
- if (bytepos == BEGV_BYTE && bytepos == ZV_BYTE)
+ if (bytepos == begbyte && bidi_it->charpos == end)
dir = L2R;
/* We should never be called at EOB or before BEGV. */
- else if (bytepos >= ZV_BYTE || bytepos < BEGV_BYTE)
+ else if (bidi_it->charpos >= end || bytepos < begbyte)
abort ();
if (dir == L2R)
@@ -712,7 +771,8 @@ bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, int no_default_p)
we are potentially in a new paragraph that doesn't yet
exist. */
pos = bidi_it->charpos;
- if (bytepos > BEGV_BYTE && FETCH_CHAR (bytepos) == '\n')
+ if (bytepos > begbyte
+ && bidi_char_at_pos (bytepos, bidi_it->string.s) == '\n')
{
bytepos++;
pos++;
@@ -720,17 +780,25 @@ bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, int no_default_p)
/* We are either at the beginning of a paragraph or in the
middle of it. Find where this paragraph starts. */
- pstartbyte = bidi_find_paragraph_start (pos, bytepos);
+ if (string_p)
+ {
+ /* We don't support changes of paragraph direction inside a
+ string. It is treated as a single paragraph. */
+ pstartbyte = 0;
+ }
+ else
+ pstartbyte = bidi_find_paragraph_start (pos, bytepos);
bidi_it->separator_limit = -1;
bidi_it->new_paragraph = 0;
/* The following loop is run more than once only if NO_DEFAULT_P
- is non-zero. */
+ is non-zero, and only if we are iterating on a buffer. */
do {
bytepos = pstartbyte;
- pos = BYTE_TO_CHAR (bytepos);
- ch = bidi_fetch_char (bytepos, pos, &disp_pos, bidi_it->frame_window_p,
- &ch_len, &nchars);
+ if (!string_p)
+ pos = BYTE_TO_CHAR (bytepos);
+ ch = bidi_fetch_char (bytepos, pos, &disp_pos, &bidi_it->string,
+ bidi_it->frame_window_p, &ch_len, &nchars);
type = bidi_get_type (ch, NEUTRAL_DIR);
for (pos += nchars, bytepos += ch_len;
@@ -744,17 +812,19 @@ bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, int no_default_p)
|| type == LRE || type == LRO));
type = bidi_get_type (ch, NEUTRAL_DIR))
{
- if (type == NEUTRAL_B && bidi_at_paragraph_end (pos, bytepos) >= -1)
+ if (!string_p
+ && type == NEUTRAL_B
+ && bidi_at_paragraph_end (pos, bytepos) >= -1)
break;
- if (bytepos >= ZV_BYTE)
+ if (pos >= end)
{
/* Pretend there's a paragraph separator at end of
- buffer. */
+ buffer/string. */
type = NEUTRAL_B;
break;
}
/* Fetch next character and advance to get past it. */
- ch = bidi_fetch_char (bytepos, pos, &disp_pos,
+ ch = bidi_fetch_char (bytepos, pos, &disp_pos, &bidi_it->string,
bidi_it->frame_window_p, &ch_len, &nchars);
pos += nchars;
bytepos += ch_len;
@@ -763,7 +833,8 @@ bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, int no_default_p)
bidi_it->paragraph_dir = R2L;
else if (type == STRONG_L)
bidi_it->paragraph_dir = L2R;
- if (no_default_p && bidi_it->paragraph_dir == NEUTRAL_DIR)
+ if (!string_p
+ && no_default_p && bidi_it->paragraph_dir == NEUTRAL_DIR)
{
/* If this paragraph is at BEGV, default to L2R. */
if (pstartbyte == BEGV_BYTE)
@@ -786,7 +857,8 @@ bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, int no_default_p)
pstartbyte = prevpbyte;
}
}
- } while (no_default_p && bidi_it->paragraph_dir == NEUTRAL_DIR);
+ } while (!string_p
+ && no_default_p && bidi_it->paragraph_dir == NEUTRAL_DIR);
}
else
abort ();
@@ -822,8 +894,10 @@ bidi_init_it (EMACS_INT charpos, EMACS_INT bytepos, int frame_window_p,
{
if (! bidi_initialized)
bidi_initialize ();
- bidi_it->charpos = charpos;
- bidi_it->bytepos = bytepos;
+ if (charpos >= 0)
+ bidi_it->charpos = charpos;
+ if (bytepos >= 0)
+ bidi_it->bytepos = bytepos;
bidi_it->frame_window_p = frame_window_p;
bidi_it->nchars = -1; /* to be computed in bidi_resolve_explicit_1 */
bidi_it->first_elt = 1;
@@ -848,7 +922,10 @@ bidi_init_it (EMACS_INT charpos, EMACS_INT bytepos, int frame_window_p,
bidi_it->prev_for_neutral.orig_type = UNKNOWN_BT;
bidi_it->sor = L2R; /* FIXME: should it be user-selectable? */
bidi_it->disp_pos = -1; /* invalid/unknown */
- bidi_cache_shrink ();
+ /* We can only shrink the cache if we are at the bottom level of its
+ "stack". */
+ if (bidi_cache_start == 0)
+ bidi_cache_shrink ();
}
/* Push the current embedding level and override status; reset the
@@ -934,19 +1011,31 @@ bidi_resolve_explicit_1 (struct bidi_it *bidi_it)
int current_level;
int new_level;
bidi_dir_t override;
+ int string_p = bidi_it->string.s != NULL;
/* If reseat()'ed, don't advance, so as to start iteration from the
position where we were reseated. bidi_it->bytepos can be less
than BEGV_BYTE after reseat to BEGV. */
- if (bidi_it->bytepos < BEGV_BYTE
+ if (bidi_it->bytepos < (string_p ? 0 : BEGV_BYTE)
|| bidi_it->first_elt)
{
bidi_it->first_elt = 0;
- if (bidi_it->charpos < BEGV)
- bidi_it->charpos = BEGV;
- bidi_it->bytepos = CHAR_TO_BYTE (bidi_it->charpos);
+ if (string_p)
+ {
+ if (bidi_it->charpos < 0)
+ bidi_it->charpos = 0;
+ bidi_it->bytepos = bidi_count_bytes (bidi_it->string.s, 0, 0,
+ bidi_it->charpos);
+ }
+ else
+ {
+ if (bidi_it->charpos < BEGV)
+ bidi_it->charpos = BEGV;
+ bidi_it->bytepos = CHAR_TO_BYTE (bidi_it->charpos);
+ }
}
- else if (bidi_it->bytepos < ZV_BYTE) /* don't move at ZV */
+ /* Don't move at end of buffer/string. */
+ else if (bidi_it->charpos < (string_p ? bidi_it->string.schars : ZV))
{
/* Advance to the next character, skipping characters covered by
display strings (nchars > 1). */
@@ -962,12 +1051,12 @@ bidi_resolve_explicit_1 (struct bidi_it *bidi_it)
override = bidi_it->level_stack[bidi_it->stack_idx].override;
new_level = current_level;
- if (bidi_it->bytepos >= ZV_BYTE)
+ if (bidi_it->charpos >= (string_p ? bidi_it->string.schars : ZV))
{
curchar = BIDI_EOB;
bidi_it->ch_len = 1;
bidi_it->nchars = 1;
- bidi_it->disp_pos = ZV;
+ bidi_it->disp_pos = (string_p ? bidi_it->string.schars : ZV);
}
else
{
@@ -975,7 +1064,8 @@ bidi_resolve_explicit_1 (struct bidi_it *bidi_it)
display string, treat the entire run of covered characters as
a single character u+FFFC. */
curchar = bidi_fetch_char (bidi_it->bytepos, bidi_it->charpos,
- &bidi_it->disp_pos, bidi_it->frame_window_p,
+ &bidi_it->disp_pos, &bidi_it->string,
+ bidi_it->frame_window_p,
&bidi_it->ch_len, &bidi_it->nchars);
}
bidi_it->ch = curchar;
@@ -1000,7 +1090,7 @@ bidi_resolve_explicit_1 (struct bidi_it *bidi_it)
bidi_it->type_after_w1 = type;
bidi_check_type (bidi_it->type_after_w1);
type = WEAK_BN; /* X9/Retaining */
- if (bidi_it->ignore_bn_limit <= 0)
+ if (bidi_it->ignore_bn_limit <= -1)
{
if (current_level <= BIDI_MAXLEVEL - 4)
{
@@ -1033,7 +1123,7 @@ bidi_resolve_explicit_1 (struct bidi_it *bidi_it)
bidi_it->type_after_w1 = type;
bidi_check_type (bidi_it->type_after_w1);
type = WEAK_BN; /* X9/Retaining */
- if (bidi_it->ignore_bn_limit <= 0)
+ if (bidi_it->ignore_bn_limit <= -1)
{
if (current_level <= BIDI_MAXLEVEL - 5)
{
@@ -1068,7 +1158,7 @@ bidi_resolve_explicit_1 (struct bidi_it *bidi_it)
bidi_it->type_after_w1 = type;
bidi_check_type (bidi_it->type_after_w1);
type = WEAK_BN; /* X9/Retaining */
- if (bidi_it->ignore_bn_limit <= 0)
+ if (bidi_it->ignore_bn_limit <= -1)
{
if (!bidi_it->invalid_rl_levels)
{
@@ -1111,13 +1201,15 @@ bidi_resolve_explicit (struct bidi_it *bidi_it)
{
int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
int new_level = bidi_resolve_explicit_1 (bidi_it);
+ EMACS_INT eob = bidi_it->string.s ? bidi_it->string.schars : ZV;
if (prev_level < new_level
&& bidi_it->type == WEAK_BN
- && bidi_it->ignore_bn_limit == 0 /* only if not already known */
- && bidi_it->bytepos < ZV_BYTE /* not already at EOB */
- && bidi_explicit_dir_char (FETCH_MULTIBYTE_CHAR (bidi_it->bytepos
- + bidi_it->ch_len)))
+ && bidi_it->ignore_bn_limit == -1 /* only if not already known */
+ && bidi_it->charpos < eob /* not already at EOB */
+ && bidi_explicit_dir_char (bidi_char_at_pos (bidi_it->bytepos
+ + bidi_it->ch_len,
+ bidi_it->string.s)))
{
/* Avoid pushing and popping embedding levels if the level run
is empty, as this breaks level runs where it shouldn't.
@@ -1129,8 +1221,9 @@ bidi_resolve_explicit (struct bidi_it *bidi_it)
bidi_copy_it (&saved_it, bidi_it);
- while (bidi_explicit_dir_char (FETCH_MULTIBYTE_CHAR (bidi_it->bytepos
- + bidi_it->ch_len)))
+ while (bidi_explicit_dir_char (bidi_char_at_pos (bidi_it->bytepos
+ + bidi_it->ch_len,
+ bidi_it->string.s)))
{
/* This advances to the next character, skipping any
characters covered by display strings. */
@@ -1142,10 +1235,10 @@ bidi_resolve_explicit (struct bidi_it *bidi_it)
if (level == prev_level) /* empty embedding */
saved_it.ignore_bn_limit = bidi_it->charpos + bidi_it->nchars;
else /* this embedding is non-empty */
- saved_it.ignore_bn_limit = -1;
+ saved_it.ignore_bn_limit = -2;
bidi_copy_it (bidi_it, &saved_it);
- if (bidi_it->ignore_bn_limit > 0)
+ if (bidi_it->ignore_bn_limit > -1)
{
/* We pushed a level, but we shouldn't have. Undo that. */
if (!bidi_it->invalid_rl_levels)
@@ -1188,6 +1281,7 @@ bidi_resolve_weak (struct bidi_it *bidi_it)
int next_char;
bidi_type_t type_of_next;
struct bidi_it saved_it;
+ EMACS_INT eob = bidi_it->string.s ? bidi_it->string.schars : ZV;
type = bidi_it->type;
override = bidi_it->level_stack[bidi_it->stack_idx].override;
@@ -1255,9 +1349,10 @@ bidi_resolve_weak (struct bidi_it *bidi_it)
|| bidi_it->prev.type_after_w1 == WEAK_AN)))
{
next_char =
- bidi_it->bytepos + bidi_it->ch_len >= ZV_BYTE
- ? BIDI_EOB : FETCH_MULTIBYTE_CHAR (bidi_it->bytepos
- + bidi_it->ch_len);
+ bidi_it->charpos + bidi_it->nchars >= eob
+ ? BIDI_EOB
+ : bidi_char_at_pos (bidi_it->bytepos + bidi_it->ch_len,
+ bidi_it->string.s);
type_of_next = bidi_get_type (next_char, override);
if (type_of_next == WEAK_BN
@@ -1310,9 +1405,10 @@ bidi_resolve_weak (struct bidi_it *bidi_it)
if (bidi_it->nchars <= 0)
abort ();
next_char =
- bidi_it->bytepos + bidi_it->ch_len >= ZV_BYTE
- ? BIDI_EOB : FETCH_MULTIBYTE_CHAR (bidi_it->bytepos
- + bidi_it->ch_len);
+ bidi_it->charpos + bidi_it->nchars >= eob
+ ? BIDI_EOB
+ : bidi_char_at_pos (bidi_it->bytepos + bidi_it->ch_len,
+ bidi_it->string.s);
type_of_next = bidi_get_type (next_char, override);
if (type_of_next == WEAK_ET
@@ -1509,11 +1605,11 @@ bidi_type_of_next_char (struct bidi_it *bidi_it)
/* Reset the limit until which to ignore BNs if we step out of the
area where we found only empty levels. */
- if ((bidi_it->ignore_bn_limit > 0
+ if ((bidi_it->ignore_bn_limit > -1
&& bidi_it->ignore_bn_limit <= bidi_it->charpos)
- || (bidi_it->ignore_bn_limit == -1
+ || (bidi_it->ignore_bn_limit == -2
&& !bidi_explicit_dir_char (bidi_it->ch)))
- bidi_it->ignore_bn_limit = 0;
+ bidi_it->ignore_bn_limit = -1;
type = bidi_resolve_neutral (bidi_it);
@@ -1530,12 +1626,12 @@ bidi_level_of_next_char (struct bidi_it *bidi_it)
bidi_type_t type;
int level, prev_level = -1;
struct bidi_saved_info next_for_neutral;
- EMACS_INT next_char_pos;
+ EMACS_INT next_char_pos = -1;
if (bidi_it->scan_dir == 1)
{
/* There's no sense in trying to advance if we hit end of text. */
- if (bidi_it->bytepos >= ZV_BYTE)
+ if (bidi_it->charpos >= (bidi_it->string.s ? bidi_it->string.schars : ZV))
return bidi_it->resolved_level;
/* Record the info about the previous character. */
@@ -1575,7 +1671,7 @@ bidi_level_of_next_char (struct bidi_it *bidi_it)
/* Perhaps the character we want is already cached. If it is, the
call to bidi_cache_find below will return a type other than
UNKNOWN_BT. */
- if (bidi_cache_idx && !bidi_it->first_elt)
+ if (bidi_cache_idx > bidi_cache_start && !bidi_it->first_elt)
{
if (bidi_it->scan_dir > 0)
{
@@ -1583,9 +1679,12 @@ bidi_level_of_next_char (struct bidi_it *bidi_it)
abort ();
next_char_pos = bidi_it->charpos + bidi_it->nchars;
}
- else
+ else if (bidi_it->charpos > (bidi_it->string.s ? 0 : 1))
next_char_pos = bidi_it->charpos - 1;
- type = bidi_cache_find (next_char_pos, -1, bidi_it);
+ if (next_char_pos >= 0)
+ type = bidi_cache_find (next_char_pos, -1, bidi_it);
+ else
+ type = UNKNOWN_BT;
}
else
type = UNKNOWN_BT;
@@ -1652,13 +1751,14 @@ bidi_level_of_next_char (struct bidi_it *bidi_it)
EMACS_INT cpos = bidi_it->charpos;
EMACS_INT disp_pos = bidi_it->disp_pos;
EMACS_INT nc = bidi_it->nchars;
+ struct bidi_string_data bs = bidi_it->string;
bidi_type_t chtype;
int fwp = bidi_it->frame_window_p;
if (bidi_it->nchars <= 0)
abort ();
do {
- ch = bidi_fetch_char (bpos += clen, cpos += nc, &disp_pos, fwp,
+ ch = bidi_fetch_char (bpos += clen, cpos += nc, &disp_pos, &bs, fwp,
&clen, &nc);
if (ch == '\n' || ch == BIDI_EOB /* || ch == LINESEP_CHAR */)
chtype = NEUTRAL_B;
@@ -1759,7 +1859,8 @@ bidi_find_other_level_edge (struct bidi_it *bidi_it, int level, int end_flag)
int idx;
/* Try the cache first. */
- if ((idx = bidi_cache_find_level_change (level, dir, end_flag)) >= 0)
+ if ((idx = bidi_cache_find_level_change (level, dir, end_flag))
+ >= bidi_cache_start)
bidi_cache_fetch_state (idx, bidi_it);
else
{
@@ -1782,6 +1883,9 @@ bidi_move_to_visually_next (struct bidi_it *bidi_it)
int old_level, new_level, next_level;
struct bidi_it sentinel;
+ if (bidi_it->charpos < 0 || bidi_it->bytepos < 0)
+ abort ();
+
if (bidi_it->scan_dir == 0)
{
bidi_it->scan_dir = 1; /* default to logical order */
@@ -1794,7 +1898,7 @@ bidi_move_to_visually_next (struct bidi_it *bidi_it)
/* Prepare the sentinel iterator state, and cache it. When we bump
into it, scanning backwards, we'll know that the last non-base
level is exhausted. */
- if (bidi_cache_idx == 0)
+ if (bidi_cache_idx == bidi_cache_start)
{
bidi_copy_it (&sentinel, bidi_it);
if (bidi_it->first_elt)
@@ -1869,26 +1973,34 @@ bidi_move_to_visually_next (struct bidi_it *bidi_it)
reordering, whereas we _must_ know the paragraph base direction
_before_ we process the paragraph's text, since the base
direction affects the reordering. */
- if (bidi_it->scan_dir == 1
- && bidi_it->orig_type == NEUTRAL_B
- && bidi_it->bytepos < ZV_BYTE)
+ if (bidi_it->scan_dir == 1 && bidi_it->orig_type == NEUTRAL_B)
{
- EMACS_INT sep_len =
- bidi_at_paragraph_end (bidi_it->charpos + bidi_it->nchars,
- bidi_it->bytepos + bidi_it->ch_len);
- if (bidi_it->nchars <= 0)
- abort ();
- if (sep_len >= 0)
+ /* The paragraph direction of the entire string, once
+ determined, is in effect for the entire string. Setting the
+ separator limit to the end of the string prevents
+ bidi_paragraph_init from being called automatically on this
+ string. */
+ if (bidi_it->string.s)
+ bidi_it->separator_limit = bidi_it->string.schars;
+ else if (bidi_it->bytepos < ZV_BYTE)
{
- bidi_it->new_paragraph = 1;
- /* Record the buffer position of the last character of the
- paragraph separator. */
- bidi_it->separator_limit =
- bidi_it->charpos + bidi_it->nchars + sep_len;
+ EMACS_INT sep_len =
+ bidi_at_paragraph_end (bidi_it->charpos + bidi_it->nchars,
+ bidi_it->bytepos + bidi_it->ch_len);
+ if (bidi_it->nchars <= 0)
+ abort ();
+ if (sep_len >= 0)
+ {
+ bidi_it->new_paragraph = 1;
+ /* Record the buffer position of the last character of the
+ paragraph separator. */
+ bidi_it->separator_limit =
+ bidi_it->charpos + bidi_it->nchars + sep_len;
+ }
}
}
- if (bidi_it->scan_dir == 1 && bidi_cache_idx)
+ if (bidi_it->scan_dir == 1 && bidi_cache_idx > bidi_cache_start)
{
/* If we are at paragraph's base embedding level and beyond the
last cached position, the cache's job is done and we can
diff --git a/src/dispextern.h b/src/dispextern.h
index 7138c2225e..0f7089d791 100644
--- a/src/dispextern.h
+++ b/src/dispextern.h
@@ -1812,9 +1812,18 @@ struct bidi_stack {
bidi_dir_t override;
};
+/* Data type for storing information about a string being iterated on. */
+struct bidi_string_data {
+ const unsigned char *s; /* the string, or NULL if reordering buffer */
+ EMACS_INT schars; /* the number of characters in the string,
+ excluding the terminating null */
+ unsigned from_disp_str : 1; /* 1 means the string comes from a
+ display property */
+};
+
/* Data type for reordering bidirectional text. */
struct bidi_it {
- EMACS_INT bytepos; /* iterator's position in buffer */
+ EMACS_INT bytepos; /* iterator's position in buffer/string */
EMACS_INT charpos;
int ch; /* character at that position, or u+FFFC
("object replacement character") for a run
@@ -1844,12 +1853,13 @@ struct bidi_it {
iterator state is saved, pushed, or popped. So only put here
stuff that is not part of the bidi iterator's state! */
struct bidi_stack level_stack[BIDI_MAXLEVEL]; /* stack of embedding levels */
- int first_elt; /* if non-zero, examine current char first */
+ struct bidi_string_data string; /* string to reorder */
bidi_dir_t paragraph_dir; /* current paragraph direction */
- int new_paragraph; /* if non-zero, we expect a new paragraph */
- int frame_window_p; /* non-zero if displaying on a GUI frame */
EMACS_INT separator_limit; /* where paragraph separator should end */
EMACS_INT disp_pos; /* position of display string after ch */
+ unsigned first_elt : 1; /* if non-zero, examine current char first */
+ unsigned new_paragraph : 1; /* if non-zero, we expect a new paragraph */
+ unsigned frame_window_p : 1; /* non-zero if displaying on a GUI frame */
};
/* Value is non-zero when the bidi iterator is at base paragraph
@@ -3007,8 +3017,10 @@ extern void reseat_at_previous_visible_line_start (struct it *);
extern Lisp_Object lookup_glyphless_char_display (int, struct it *);
extern int calc_pixel_width_or_height (double *, struct it *, Lisp_Object,
struct font *, int, int *);
-extern EMACS_INT compute_display_string_pos (EMACS_INT, int);
-extern EMACS_INT compute_display_string_end (EMACS_INT);
+extern EMACS_INT compute_display_string_pos (EMACS_INT,
+ struct bidi_string_data *, int);
+extern EMACS_INT compute_display_string_end (EMACS_INT,
+ struct bidi_string_data *);
#ifdef HAVE_WINDOW_SYSTEM
diff --git a/src/xdisp.c b/src/xdisp.c
index 23667388b5..cd62989d99 100644
--- a/src/xdisp.c
+++ b/src/xdisp.c
@@ -2342,6 +2342,7 @@ init_iterator (struct it *it, struct window *w,
it->base_face_id = remapped_base_face_id;
it->string = Qnil;
IT_STRING_CHARPOS (*it) = IT_STRING_BYTEPOS (*it) = -1;
+ it->bidi_it.string.s = NULL;
/* The window in which we iterate over current_buffer: */
XSETWINDOW (it->window, w);
@@ -3087,14 +3088,16 @@ next_overlay_change (EMACS_INT pos)
return endpos;
}
-/* Return the character position of a display string at or after CHARPOS.
- If no display string exists at or after CHARPOS, return ZV. A
- display string is either an overlay with `display' property whose
- value is a string, or a `display' text property whose value is a
- string. FRAME_WINDOW_P is non-zero when we are displaying a window
- on a GUI frame. */
+/* Return the character position of a display string at or after
+ CHARPOS. If no display string exists at or after CHARPOS, return
+ ZV. A display string is either an overlay with `display' property
+ whose value is a string, or a `display' text property whose value
+ is a string. STRING is the string to iterate; if STRING->s is
+ NULL, we are iterating a buffer. FRAME_WINDOW_P is non-zero when
+ we are displaying a window on a GUI frame. */
EMACS_INT
-compute_display_string_pos (EMACS_INT charpos, int frame_window_p)
+compute_display_string_pos (EMACS_INT charpos, struct bidi_string_data *string,
+ int frame_window_p)
{
/* FIXME: Support display properties on strings (object = Qnil means
current buffer). */
@@ -3143,7 +3146,7 @@ compute_display_string_pos (EMACS_INT charpos, int frame_window_p)
`display' property whose value is a string or a `display' text
property whose value is a string. */
EMACS_INT
-compute_display_string_end (EMACS_INT charpos)
+compute_display_string_end (EMACS_INT charpos, struct bidi_string_data *string)
{
/* FIXME: Support display properties on strings (object = Qnil means
current buffer). */
@@ -5482,6 +5485,7 @@ reseat_1 (struct it *it, struct text_pos pos, int set_stop_p)
it->bidi_it.first_elt = 1;
it->bidi_it.paragraph_dir = NEUTRAL_DIR;
it->bidi_it.disp_pos = -1;
+ it->bidi_it.string.s = NULL;
}
if (set_stop_p)
@@ -5531,6 +5535,10 @@ reseat_to_string (struct it *it, const char *s, Lisp_Object string,
setting of MULTIBYTE, if specified. */
if (multibyte >= 0)
it->multibyte_p = multibyte > 0;
+#if 0
+ it->bidi_p =
+ it->multibyte_p && BVAR (&buffer_defaults, bidi_display_reordering);
+#endif
if (s == NULL)
{
@@ -5540,6 +5548,12 @@ reseat_to_string (struct it *it, const char *s, Lisp_Object string,
it->end_charpos = it->string_nchars = SCHARS (string);
it->method = GET_FROM_STRING;
it->current.string_pos = string_pos (charpos, string);
+#if 0
+ if (it->bidi_p)
+ bidi_init_it ();
+ it->bidi_it.string.s = SDATA (string);
+ it->bidi_it.string.schars = it->end_charpos;
+#endif
}
else
{
@@ -5553,11 +5567,20 @@ reseat_to_string (struct it *it, const char *s, Lisp_Object string,
{
it->current.pos = c_string_pos (charpos, s, 1);
it->end_charpos = it->string_nchars = number_of_chars (s, 1);
+#if 0
+ if (it->bidi_p)
+ bidi_init_it ();
+ it->bidi_it.string.s = s;
+ it->bidi_it.string.schars = it->end_charpos;
+#endif
}
else
{
IT_CHARPOS (*it) = IT_BYTEPOS (*it) = charpos;
it->end_charpos = it->string_nchars = strlen (s);
+#if 0
+ it->bidi_p = 0;
+#endif
}
it->method = GET_FROM_C_STRING;