diff options
author | Eli Zaretskii <eliz@gnu.org> | 2011-04-16 21:26:30 +0300 |
---|---|---|
committer | Eli Zaretskii <eliz@gnu.org> | 2011-04-16 21:26:30 +0300 |
commit | d1dfb56cc84a6d70262d979face230a71e98d479 (patch) | |
tree | 8204edb64962a875b368dd248c00e127f9584b73 /src/regex.c | |
parent | 10472dd0d62d986e048c01a3d37627b843a321d2 (diff) |
Fix regex.c, syntax.c and friends for buffers > 2GB.
src/syntax.h (struct gl_state_s): Declare character position members
EMACS_INT.
src/syntax.c (update_syntax_table): Declare 2nd argument EMACS_INT.
src/textprop.c (verify_interval_modification, interval_of): Declare
arguments EMACS_INT.
src/intervals.c (adjust_intervals_for_insertion): Declare arguments
EMACS_INT.
src/intervals.h (CHECK_TOTAL_LENGTH): Cast to EMACS_INT, not `int'.
src/indent.c (Fvertical_motion): Local variable it_start is now
EMACS_INT.
src/regex.c (re_match, re_match_2, re_match_2_internal)
(bcmp_translate, regcomp, regexec, print_double_string)
(group_in_compile_stack, re_search, re_search_2, regex_compile)
(re_compile_pattern, re_exec): Declare arguments and local
variables `size_t' and `ssize_t' and return values `regoff_t', as
appropriate.
(POP_FAILURE_REG_OR_COUNT) <pfreg>: Declare `long'.
(CHECK_INFINITE_LOOP) <failure>: Declare `ssize_t'.
<compile_stack_type>: `size' and `avail' are now `size_t'.
src/regex.h <regoff_t>: Use ssize_t, not int.
(re_search, re_search_2, re_match, re_match_2): Arguments that
specify buffer/string position and length are now ssize_t and
size_t. Return type is regoff_t.
Diffstat (limited to 'src/regex.c')
-rw-r--r-- | src/regex.c | 141 |
1 files changed, 74 insertions, 67 deletions
diff --git a/src/regex.c b/src/regex.c index a60ff0ce35..479239897b 100644 --- a/src/regex.c +++ b/src/regex.c @@ -569,12 +569,12 @@ typedef char boolean; #define false 0 #define true 1 -static int re_match_2_internal _RE_ARGS ((struct re_pattern_buffer *bufp, - re_char *string1, int size1, - re_char *string2, int size2, - int pos, - struct re_registers *regs, - int stop)); +static regoff_t re_match_2_internal _RE_ARGS ((struct re_pattern_buffer *bufp, + re_char *string1, size_t size1, + re_char *string2, size_t size2, + ssize_t pos, + struct re_registers *regs, + ssize_t stop)); /* These are the command codes that appear in compiled regular expressions. Some opcodes are followed by argument bytes. A @@ -1230,10 +1230,10 @@ print_double_string (where, string1, size1, string2, size2) re_char *where; re_char *string1; re_char *string2; - int size1; - int size2; + ssize_t size1; + ssize_t size2; { - int this_char; + ssize_t this_char; if (where == NULL) printf ("(null)"); @@ -1546,7 +1546,7 @@ do { \ /* Pop a saved register off the stack. */ #define POP_FAILURE_REG_OR_COUNT() \ do { \ - int pfreg = POP_FAILURE_INT (); \ + long pfreg = POP_FAILURE_INT (); \ if (pfreg == -1) \ { \ /* It's a counter. */ \ @@ -1568,7 +1568,7 @@ do { \ /* Check that we are not stuck in an infinite loop. */ #define CHECK_INFINITE_LOOP(pat_cur, string_place) \ do { \ - int failure = TOP_FAILURE_HANDLE (); \ + ssize_t failure = TOP_FAILURE_HANDLE (); \ /* Check for infinite matching loops */ \ while (failure > 0 \ && (FAILURE_STR (failure) == string_place \ @@ -1876,8 +1876,8 @@ typedef struct typedef struct { compile_stack_elt_t *stack; - unsigned size; - unsigned avail; /* Offset of next open position. */ + size_t size; + size_t avail; /* Offset of next open position. */ } compile_stack_type; @@ -2779,7 +2779,7 @@ regex_compile (const re_char *pattern, size_t size, reg_syntax_t syntax, struct if (many_times_ok) { boolean simple = skip_one_char (laststart) == b; - unsigned int startoffset = 0; + size_t startoffset = 0; re_opcode_t ofj = /* Check if the loop can match the empty string. */ (simple || !analyse_first (laststart, b, NULL, 0)) @@ -3361,7 +3361,7 @@ regex_compile (const re_char *pattern, size_t size, reg_syntax_t syntax, struct _____ _____ | | | | | v | v - a | b | c + a | b | c If we are at `b', then fixup_alt_jump right now points to a three-byte space after `a'. We'll put in the jump, set @@ -3905,7 +3905,7 @@ at_endline_loc_p (const re_char *p, const re_char *pend, reg_syntax_t syntax) static boolean group_in_compile_stack (compile_stack_type compile_stack, regnum_t regnum) { - int this_element; + ssize_t this_element; for (this_element = compile_stack.avail - 1; this_element >= 0; @@ -4291,8 +4291,9 @@ WEAK_ALIAS (__re_set_registers, re_set_registers) /* Like re_search_2, below, but only one string is specified, and doesn't let you say where to stop matching. */ -int -re_search (struct re_pattern_buffer *bufp, const char *string, int size, int startpos, int range, struct re_registers *regs) +regoff_t +re_search (struct re_pattern_buffer *bufp, const char *string, size_t size, + ssize_t startpos, ssize_t range, struct re_registers *regs) { return re_search_2 (bufp, NULL, 0, string, size, startpos, range, regs, size); @@ -4328,16 +4329,18 @@ WEAK_ALIAS (__re_search, re_search) found, -1 if no match, or -2 if error (such as failure stack overflow). */ -int -re_search_2 (struct re_pattern_buffer *bufp, const char *str1, int size1, const char *str2, int size2, int startpos, int range, struct re_registers *regs, int stop) +regoff_t +re_search_2 (struct re_pattern_buffer *bufp, const char *str1, size_t size1, + const char *str2, size_t size2, ssize_t startpos, ssize_t range, + struct re_registers *regs, ssize_t stop) { - int val; + regoff_t val; re_char *string1 = (re_char*) str1; re_char *string2 = (re_char*) str2; register char *fastmap = bufp->fastmap; register RE_TRANSLATE_TYPE translate = bufp->translate; - int total_size = size1 + size2; - int endpos = startpos + range; + size_t total_size = size1 + size2; + ssize_t endpos = startpos + range; boolean anchored_start; /* Nonzero if we are searching multibyte string. */ const boolean multibyte = RE_TARGET_MULTIBYTE_P (bufp); @@ -4385,7 +4388,7 @@ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, int size1, const #ifdef emacs gl_state.object = re_match_object; /* Used by SYNTAX_TABLE_BYTE_TO_CHAR. */ { - int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (POS_AS_IN_BUFFER (startpos)); + ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (POS_AS_IN_BUFFER (startpos)); SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1); } @@ -4420,7 +4423,7 @@ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, int size1, const if (range > 0) /* Searching forwards. */ { register int lim = 0; - int irange = range; + ssize_t irange = range; if (startpos < size1 && startpos + range >= size1) lim = range - (size1 - startpos); @@ -4571,7 +4574,7 @@ WEAK_ALIAS (__re_search_2, re_search_2) /* Declarations and macros for re_match_2. */ static int bcmp_translate _RE_ARGS((re_char *s1, re_char *s2, - register int len, + register ssize_t len, RE_TRANSLATE_TYPE translate, const int multibyte)); @@ -4873,11 +4876,11 @@ mutually_exclusive_p (struct re_pattern_buffer *bufp, const re_char *p1, const r && ((p2[2 + idx] & ~ p1[2 + idx]) == 0)))) break; - if (idx == p2[1]) - { - DEBUG_PRINT1 (" No match => fast loop.\n"); - return 1; - } + if (idx == p2[1]) + { + DEBUG_PRINT1 (" No match => fast loop.\n"); + return 1; + } } } } @@ -4941,12 +4944,12 @@ mutually_exclusive_p (struct re_pattern_buffer *bufp, const re_char *p1, const r #ifndef emacs /* Emacs never uses this. */ /* re_match is like re_match_2 except it takes only a single string. */ -int +regoff_t re_match (struct re_pattern_buffer *bufp, const char *string, - int size, int pos, struct re_registers *regs) + size_t size, ssize_t pos, struct re_registers *regs) { - int result = re_match_2_internal (bufp, NULL, 0, (re_char*) string, size, - pos, regs, size); + regoff_t result = re_match_2_internal (bufp, NULL, 0, (re_char*) string, + size, pos, regs, size); return result; } WEAK_ALIAS (__re_match, re_match) @@ -4971,13 +4974,15 @@ Lisp_Object re_match_object; failure stack overflowing). Otherwise, we return the length of the matched substring. */ -int -re_match_2 (struct re_pattern_buffer *bufp, const char *string1, int size1, const char *string2, int size2, int pos, struct re_registers *regs, int stop) +regoff_t +re_match_2 (struct re_pattern_buffer *bufp, const char *string1, + size_t size1, const char *string2, size_t size2, ssize_t pos, + struct re_registers *regs, ssize_t stop) { - int result; + regoff_t result; #ifdef emacs - int charpos; + ssize_t charpos; gl_state.object = re_match_object; /* Used by SYNTAX_TABLE_BYTE_TO_CHAR. */ charpos = SYNTAX_TABLE_BYTE_TO_CHAR (POS_AS_IN_BUFFER (pos)); SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1); @@ -4993,11 +4998,13 @@ WEAK_ALIAS (__re_match_2, re_match_2) /* This is a separate function so that we can force an alloca cleanup afterwards. */ -static int -re_match_2_internal (struct re_pattern_buffer *bufp, const re_char *string1, int size1, const re_char *string2, int size2, int pos, struct re_registers *regs, int stop) +static regoff_t +re_match_2_internal (struct re_pattern_buffer *bufp, const re_char *string1, + size_t size1, const re_char *string2, size_t size2, + ssize_t pos, struct re_registers *regs, ssize_t stop) { /* General temporaries. */ - int mcnt; + ssize_t mcnt; size_t reg; /* Just past the end of the corresponding string. */ @@ -5996,8 +6003,8 @@ re_match_2_internal (struct re_pattern_buffer *bufp, const re_char *string1, int int s1, s2; int dummy; #ifdef emacs - int offset = PTR_TO_OFFSET (d - 1); - int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset); + ssize_t offset = PTR_TO_OFFSET (d - 1); + ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset); UPDATE_SYNTAX_TABLE (charpos); #endif GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2); @@ -6038,8 +6045,8 @@ re_match_2_internal (struct re_pattern_buffer *bufp, const re_char *string1, int int s1, s2; int dummy; #ifdef emacs - int offset = PTR_TO_OFFSET (d); - int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset); + ssize_t offset = PTR_TO_OFFSET (d); + ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset); UPDATE_SYNTAX_TABLE (charpos); #endif PREFETCH (); @@ -6083,8 +6090,8 @@ re_match_2_internal (struct re_pattern_buffer *bufp, const re_char *string1, int int s1, s2; int dummy; #ifdef emacs - int offset = PTR_TO_OFFSET (d) - 1; - int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset); + ssize_t offset = PTR_TO_OFFSET (d) - 1; + ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset); UPDATE_SYNTAX_TABLE (charpos); #endif GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2); @@ -6127,8 +6134,8 @@ re_match_2_internal (struct re_pattern_buffer *bufp, const re_char *string1, int re_wchar_t c1, c2; int s1, s2; #ifdef emacs - int offset = PTR_TO_OFFSET (d); - int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset); + ssize_t offset = PTR_TO_OFFSET (d); + ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset); UPDATE_SYNTAX_TABLE (charpos); #endif PREFETCH (); @@ -6170,8 +6177,8 @@ re_match_2_internal (struct re_pattern_buffer *bufp, const re_char *string1, int re_wchar_t c1, c2; int s1, s2; #ifdef emacs - int offset = PTR_TO_OFFSET (d) - 1; - int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset); + ssize_t offset = PTR_TO_OFFSET (d) - 1; + ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset); UPDATE_SYNTAX_TABLE (charpos); #endif GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2); @@ -6207,8 +6214,8 @@ re_match_2_internal (struct re_pattern_buffer *bufp, const re_char *string1, int PREFETCH (); #ifdef emacs { - int offset = PTR_TO_OFFSET (d); - int pos1 = SYNTAX_TABLE_BYTE_TO_CHAR (offset); + ssize_t offset = PTR_TO_OFFSET (d); + ssize_t pos1 = SYNTAX_TABLE_BYTE_TO_CHAR (offset); UPDATE_SYNTAX_TABLE (pos1); } #endif @@ -6331,7 +6338,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, const re_char *string1, int bytes; nonzero otherwise. */ static int -bcmp_translate (const re_char *s1, const re_char *s2, register int len, +bcmp_translate (const re_char *s1, const re_char *s2, register ssize_t len, RE_TRANSLATE_TYPE translate, const int target_multibyte) { register re_char *p1 = s1, *p2 = s2; @@ -6373,7 +6380,8 @@ bcmp_translate (const re_char *s1, const re_char *s2, register int len, We call regex_compile to do the actual compilation. */ const char * -re_compile_pattern (const char *pattern, size_t length, struct re_pattern_buffer *bufp) +re_compile_pattern (const char *pattern, size_t length, + struct re_pattern_buffer *bufp) { reg_errcode_t ret; @@ -6449,14 +6457,13 @@ re_comp (s) } -int +regoff_t # ifdef _LIBC weak_function # endif -re_exec (s) - const char *s; +re_exec (const char *s) { - const int len = strlen (s); + const size_t len = strlen (s); return 0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0); } @@ -6500,7 +6507,7 @@ re_exec (s) It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for the return codes and their meanings.) */ -int +reg_errcode_t regcomp (regex_t *__restrict preg, const char *__restrict pattern, int cflags) { @@ -6564,7 +6571,7 @@ regcomp (regex_t *__restrict preg, const char *__restrict pattern, preg->fastmap = NULL; } } - return (int) ret; + return ret; } WEAK_ALIAS (__regcomp, regcomp) @@ -6583,14 +6590,14 @@ WEAK_ALIAS (__regcomp, regcomp) We return 0 if we find a match and REG_NOMATCH if not. */ -int +reg_errcode_t regexec (const regex_t *__restrict preg, const char *__restrict string, size_t nmatch, regmatch_t pmatch[__restrict_arr], int eflags) { - int ret; + reg_errcode_t ret; struct re_registers regs; regex_t private_preg; - int len = strlen (string); + size_t len = strlen (string); boolean want_reg_info = !preg->no_sub && nmatch > 0 && pmatch; private_preg = *preg; @@ -6608,7 +6615,7 @@ regexec (const regex_t *__restrict preg, const char *__restrict string, regs.num_regs = nmatch; regs.start = TALLOC (nmatch * 2, regoff_t); if (regs.start == NULL) - return (int) REG_NOMATCH; + return REG_NOMATCH; regs.end = regs.start + nmatch; } @@ -6645,7 +6652,7 @@ regexec (const regex_t *__restrict preg, const char *__restrict string, } /* We want zero return to mean success, unlike `re_search'. */ - return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH; + return ret >= 0 ? REG_NOERROR : REG_NOMATCH; } WEAK_ALIAS (__regexec, regexec) |