diff options
author | Stefan Monnier <monnier@iro.umontreal.ca> | 2007-06-12 18:40:18 +0000 |
---|---|---|
committer | Stefan Monnier <monnier@iro.umontreal.ca> | 2007-06-12 18:40:18 +0000 |
commit | c69b0314526d0b618be1af27b8b6330065401e61 (patch) | |
tree | e9adef9dfd0d31c2a84024d760882668049dd8b6 | |
parent | 495fa7db39d8b28633d410f994d9c583f407c8d3 (diff) |
(regex_compile): Remove the `regnum' counter.
Use bufp->re_nsub instead. Add support for \(?N:RE\).
-rw-r--r-- | etc/NEWS | 3 | ||||
-rw-r--r-- | lispref/searching.texi | 9 | ||||
-rw-r--r-- | src/ChangeLog | 5 | ||||
-rw-r--r-- | src/regex.c | 71 |
4 files changed, 63 insertions, 25 deletions
@@ -70,6 +70,9 @@ considered for update. * Lisp Changes in Emacs 23.1 ++++ +** The regexp form \(?<num>:<regexp>\) specifies the group number explicitly. + * New Packages for Lisp Programming in Emacs 23.1 diff --git a/lispref/searching.texi b/lispref/searching.texi index 10e8c67296..f923a58f11 100644 --- a/lispref/searching.texi +++ b/lispref/searching.texi @@ -650,6 +650,15 @@ Shy groups are particularly useful for mechanically-constructed regular expressions because they can be added automatically without altering the numbering of any ordinary, non-shy groups. +@item \(?@var{num}: @dots{} \) +is the @dfn{explicitly numbered group} construct. Normal groups get +their number implicitly, based on their position, which can be +inconvenient. This construct allows you to force a particular group +number. There is no particular restriction on the numbering, +e.g.@: you can have several groups with the same number. +Implicitly numbered groups always get the smallest integer larger than +the one of any previous group. + @item \@var{digit} matches the same text that matched the @var{digit}th occurrence of a grouping (@samp{\( @dots{} \)}) construct. diff --git a/src/ChangeLog b/src/ChangeLog index 26b07eb45e..1ce80e41fc 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,8 @@ +2007-06-12 Stefan Monnier <monnier@iro.umontreal.ca> + + * regex.c (regex_compile): Remove the `regnum' counter. + Use bufp->re_nsub instead. Add support for \(?N:RE\). + 2007-06-12 Glenn Morris <rgm@gnu.org> * config.in (HAVE_GIF): Doc fix. diff --git a/src/regex.c b/src/regex.c index 7784a3ae61..1e80b9bbee 100644 --- a/src/regex.c +++ b/src/regex.c @@ -2482,11 +2482,6 @@ regex_compile (pattern, size, syntax, bufp) last -- ends with a forward jump of this sort. */ unsigned char *fixup_alt_jump = 0; - /* Counts open-groups as they are encountered. Remembered for the - matching close-group on the compile stack, so the same register - number is put in the stop_memory as the start_memory. */ - regnum_t regnum = 0; - /* Work area for range table of charset. */ struct range_table_work_area range_table_work; @@ -3123,28 +3118,54 @@ regex_compile (pattern, size, syntax, bufp) handle_open: { int shy = 0; + regnum_t regnum = 0; if (p+1 < pend) { /* Look for a special (?...) construct */ if ((syntax & RE_SHY_GROUPS) && *p == '?') { PATFETCH (c); /* Gobble up the '?'. */ - PATFETCH (c); - switch (c) + while (!shy) { - case ':': shy = 1; break; - default: - /* Only (?:...) is supported right now. */ - FREE_STACK_RETURN (REG_BADPAT); + PATFETCH (c); + switch (c) + { + case ':': shy = 1; break; + case '0': + /* An explicitly specified regnum must start + with non-0. */ + if (regnum == 0) + FREE_STACK_RETURN (REG_BADPAT); + case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + regnum = 10*regnum + (c - '0'); break; + default: + /* Only (?:...) is supported right now. */ + FREE_STACK_RETURN (REG_BADPAT); + } } } } if (!shy) - { - bufp->re_nsub++; - regnum++; + regnum = ++bufp->re_nsub; + else if (regnum) + { /* It's actually not shy, but explicitly numbered. */ + shy = 0; + if (regnum > bufp->re_nsub) + bufp->re_nsub = regnum; + else if (regnum > bufp->re_nsub + /* Ideally, we'd want to check that the specified + group can't have matched (i.e. all subgroups + using the same regnum are in other branches of + OR patterns), but we don't currently keep track + of enough info to do that easily. */ + || group_in_compile_stack (compile_stack, regnum)) + FREE_STACK_RETURN (REG_BADPAT); } + else + /* It's really shy. */ + regnum = - bufp->re_nsub; if (COMPILE_STACK_FULL) { @@ -3163,12 +3184,11 @@ regex_compile (pattern, size, syntax, bufp) COMPILE_STACK_TOP.fixup_alt_jump = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0; COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer; - COMPILE_STACK_TOP.regnum = shy ? -regnum : regnum; + COMPILE_STACK_TOP.regnum = regnum; - /* Do not push a - start_memory for groups beyond the last one we can - represent in the compiled pattern. */ - if (regnum <= MAX_REGNUM && !shy) + /* Do not push a start_memory for groups beyond the last one + we can represent in the compiled pattern. */ + if (regnum <= MAX_REGNUM && regnum > 0) BUF_PUSH_2 (start_memory, regnum); compile_stack.avail++; @@ -3213,7 +3233,7 @@ regex_compile (pattern, size, syntax, bufp) /* We don't just want to restore into `regnum', because later groups should continue to be numbered higher, as in `(ab)c(de)' -- the second group is #2. */ - regnum_t this_group_regnum; + regnum_t regnum; compile_stack.avail--; begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset; @@ -3222,7 +3242,7 @@ regex_compile (pattern, size, syntax, bufp) ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1 : 0; laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset; - this_group_regnum = COMPILE_STACK_TOP.regnum; + regnum = COMPILE_STACK_TOP.regnum; /* If we've reached MAX_REGNUM groups, then this open won't actually generate any code, so we'll have to clear pending_exact explicitly. */ @@ -3230,8 +3250,8 @@ regex_compile (pattern, size, syntax, bufp) /* We're at the end of the group, so now we know how many groups were inside this one. */ - if (this_group_regnum <= MAX_REGNUM && this_group_regnum > 0) - BUF_PUSH_2 (stop_memory, this_group_regnum); + if (regnum <= MAX_REGNUM && regnum > 0) + BUF_PUSH_2 (stop_memory, regnum); } break; @@ -3557,8 +3577,9 @@ regex_compile (pattern, size, syntax, bufp) reg = c - '0'; - /* Can't back reference to a subexpression before its end. */ - if (reg > regnum || group_in_compile_stack (compile_stack, reg)) + if (reg > bufp->re_nsub || reg < 1 + /* Can't back reference to a subexp before its end. */ + || group_in_compile_stack (compile_stack, reg)) FREE_STACK_RETURN (REG_ESUBREG); laststart = b; |