summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStefan Monnier <monnier@iro.umontreal.ca>2007-06-12 18:40:18 +0000
committerStefan Monnier <monnier@iro.umontreal.ca>2007-06-12 18:40:18 +0000
commitc69b0314526d0b618be1af27b8b6330065401e61 (patch)
treee9adef9dfd0d31c2a84024d760882668049dd8b6
parent495fa7db39d8b28633d410f994d9c583f407c8d3 (diff)
(regex_compile): Remove the `regnum' counter.
Use bufp->re_nsub instead. Add support for \(?N:RE\).
-rw-r--r--etc/NEWS3
-rw-r--r--lispref/searching.texi9
-rw-r--r--src/ChangeLog5
-rw-r--r--src/regex.c71
4 files changed, 63 insertions, 25 deletions
diff --git a/etc/NEWS b/etc/NEWS
index 83e7477463..2c740fe0ef 100644
--- a/etc/NEWS
+++ b/etc/NEWS
@@ -70,6 +70,9 @@ considered for update.
* Lisp Changes in Emacs 23.1
++++
+** The regexp form \(?<num>:<regexp>\) specifies the group number explicitly.
+
* New Packages for Lisp Programming in Emacs 23.1
diff --git a/lispref/searching.texi b/lispref/searching.texi
index 10e8c67296..f923a58f11 100644
--- a/lispref/searching.texi
+++ b/lispref/searching.texi
@@ -650,6 +650,15 @@ Shy groups are particularly useful for mechanically-constructed regular
expressions because they can be added automatically without altering the
numbering of any ordinary, non-shy groups.
+@item \(?@var{num}: @dots{} \)
+is the @dfn{explicitly numbered group} construct. Normal groups get
+their number implicitly, based on their position, which can be
+inconvenient. This construct allows you to force a particular group
+number. There is no particular restriction on the numbering,
+e.g.@: you can have several groups with the same number.
+Implicitly numbered groups always get the smallest integer larger than
+the one of any previous group.
+
@item \@var{digit}
matches the same text that matched the @var{digit}th occurrence of a
grouping (@samp{\( @dots{} \)}) construct.
diff --git a/src/ChangeLog b/src/ChangeLog
index 26b07eb45e..1ce80e41fc 100644
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,8 @@
+2007-06-12 Stefan Monnier <monnier@iro.umontreal.ca>
+
+ * regex.c (regex_compile): Remove the `regnum' counter.
+ Use bufp->re_nsub instead. Add support for \(?N:RE\).
+
2007-06-12 Glenn Morris <rgm@gnu.org>
* config.in (HAVE_GIF): Doc fix.
diff --git a/src/regex.c b/src/regex.c
index 7784a3ae61..1e80b9bbee 100644
--- a/src/regex.c
+++ b/src/regex.c
@@ -2482,11 +2482,6 @@ regex_compile (pattern, size, syntax, bufp)
last -- ends with a forward jump of this sort. */
unsigned char *fixup_alt_jump = 0;
- /* Counts open-groups as they are encountered. Remembered for the
- matching close-group on the compile stack, so the same register
- number is put in the stop_memory as the start_memory. */
- regnum_t regnum = 0;
-
/* Work area for range table of charset. */
struct range_table_work_area range_table_work;
@@ -3123,28 +3118,54 @@ regex_compile (pattern, size, syntax, bufp)
handle_open:
{
int shy = 0;
+ regnum_t regnum = 0;
if (p+1 < pend)
{
/* Look for a special (?...) construct */
if ((syntax & RE_SHY_GROUPS) && *p == '?')
{
PATFETCH (c); /* Gobble up the '?'. */
- PATFETCH (c);
- switch (c)
+ while (!shy)
{
- case ':': shy = 1; break;
- default:
- /* Only (?:...) is supported right now. */
- FREE_STACK_RETURN (REG_BADPAT);
+ PATFETCH (c);
+ switch (c)
+ {
+ case ':': shy = 1; break;
+ case '0':
+ /* An explicitly specified regnum must start
+ with non-0. */
+ if (regnum == 0)
+ FREE_STACK_RETURN (REG_BADPAT);
+ case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ regnum = 10*regnum + (c - '0'); break;
+ default:
+ /* Only (?:...) is supported right now. */
+ FREE_STACK_RETURN (REG_BADPAT);
+ }
}
}
}
if (!shy)
- {
- bufp->re_nsub++;
- regnum++;
+ regnum = ++bufp->re_nsub;
+ else if (regnum)
+ { /* It's actually not shy, but explicitly numbered. */
+ shy = 0;
+ if (regnum > bufp->re_nsub)
+ bufp->re_nsub = regnum;
+ else if (regnum > bufp->re_nsub
+ /* Ideally, we'd want to check that the specified
+ group can't have matched (i.e. all subgroups
+ using the same regnum are in other branches of
+ OR patterns), but we don't currently keep track
+ of enough info to do that easily. */
+ || group_in_compile_stack (compile_stack, regnum))
+ FREE_STACK_RETURN (REG_BADPAT);
}
+ else
+ /* It's really shy. */
+ regnum = - bufp->re_nsub;
if (COMPILE_STACK_FULL)
{
@@ -3163,12 +3184,11 @@ regex_compile (pattern, size, syntax, bufp)
COMPILE_STACK_TOP.fixup_alt_jump
= fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
- COMPILE_STACK_TOP.regnum = shy ? -regnum : regnum;
+ COMPILE_STACK_TOP.regnum = regnum;
- /* Do not push a
- start_memory for groups beyond the last one we can
- represent in the compiled pattern. */
- if (regnum <= MAX_REGNUM && !shy)
+ /* Do not push a start_memory for groups beyond the last one
+ we can represent in the compiled pattern. */
+ if (regnum <= MAX_REGNUM && regnum > 0)
BUF_PUSH_2 (start_memory, regnum);
compile_stack.avail++;
@@ -3213,7 +3233,7 @@ regex_compile (pattern, size, syntax, bufp)
/* We don't just want to restore into `regnum', because
later groups should continue to be numbered higher,
as in `(ab)c(de)' -- the second group is #2. */
- regnum_t this_group_regnum;
+ regnum_t regnum;
compile_stack.avail--;
begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
@@ -3222,7 +3242,7 @@ regex_compile (pattern, size, syntax, bufp)
? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
: 0;
laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
- this_group_regnum = COMPILE_STACK_TOP.regnum;
+ regnum = COMPILE_STACK_TOP.regnum;
/* If we've reached MAX_REGNUM groups, then this open
won't actually generate any code, so we'll have to
clear pending_exact explicitly. */
@@ -3230,8 +3250,8 @@ regex_compile (pattern, size, syntax, bufp)
/* We're at the end of the group, so now we know how many
groups were inside this one. */
- if (this_group_regnum <= MAX_REGNUM && this_group_regnum > 0)
- BUF_PUSH_2 (stop_memory, this_group_regnum);
+ if (regnum <= MAX_REGNUM && regnum > 0)
+ BUF_PUSH_2 (stop_memory, regnum);
}
break;
@@ -3557,8 +3577,9 @@ regex_compile (pattern, size, syntax, bufp)
reg = c - '0';
- /* Can't back reference to a subexpression before its end. */
- if (reg > regnum || group_in_compile_stack (compile_stack, reg))
+ if (reg > bufp->re_nsub || reg < 1
+ /* Can't back reference to a subexp before its end. */
+ || group_in_compile_stack (compile_stack, reg))
FREE_STACK_RETURN (REG_ESUBREG);
laststart = b;