2 files changed, 548 insertions, 383 deletions
diff --git a/src/ChangeLog b/src/ChangeLog
index 67eaa09bf0..b74d831d70 100644
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,5 +1,25 @@
 2011-05-22  Paul Eggert  <eggert@cs.ucla.edu>
 
+	Rework Fformat to avoid integer overflow issues.
+	* editfns.c: Include <float.h> unconditionally, as it's everywhere
+	now (part of C89).  Include <verify.h>.
+	(MAX_10_EXP, CONVERTED_BYTE_SIZE): Remove; no longer needed.
+	(pWIDE, pWIDElen, signed_wide, unsigned_wide): New defns.
+	(Fformat): Avoid the prepass trying to compute sizes; it was only
+	approximate and thus did not catch overflow reliably.  Instead, walk
+	through the format just once, formatting and computing sizes as we go,
+	checking for integer overflow at every step, and allocating a larger
+	buffer as needed.  Keep track separately whether the format is
+	multibyte.  Keep only the most-recently calculated precision, rather
+	than them all.  Record whether each argument has been converted to
+	string.  Use EMACS_INT, not int, for byte and char and arg counts.
+	Support field widths and precisions larger than INT_MAX.  Avoid
+	sprintf's undefined behavior with conversion specifications such as %#d
+	and %.0c.  Fix bug with strchr succeeding on '\0' when looking for
+	flags.  Fix bug with (format "%c" 256.0).  Avoid integer overflow when
+	formatting out-of-range floating point numbers with int
+	formats. (Bug#8668)
+
 	* lisp.h (FIXNUM_OVERFLOW_P): Work even if arg is a NaN.
 
 2011-05-21  Paul Eggert  <eggert@cs.ucla.edu>
diff --git a/src/editfns.c b/src/editfns.c
index 5e1dcce027..84bb64f64b 100644
--- a/src/editfns.c
+++ b/src/editfns.c
@@ -45,9 +45,11 @@ along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
 #endif
 
 #include <ctype.h>
+#include <float.h>
 #include <limits.h>
 #include <intprops.h>
 #include <strftime.h>
+#include <verify.h>
 
 #include "intervals.h"
 #include "buffer.h"
@@ -57,13 +59,6 @@ along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
 #include "window.h"
 #include "blockinput.h"
 
-#ifdef STDC_HEADERS
-#include <float.h>
-#define MAX_10_EXP	DBL_MAX_10_EXP
-#else
-#define MAX_10_EXP	310
-#endif
-
 #ifndef NULL
 #define NULL 0
 #endif
@@ -3525,14 +3520,21 @@ usage: (propertize STRING &rest PROPERTIES)  */)
   RETURN_UNGCPRO (string);
 }
 
-
-/* Number of bytes that STRING will occupy when put into the result.
-   MULTIBYTE is nonzero if the result should be multibyte.  */
-
-#define CONVERTED_BYTE_SIZE(MULTIBYTE, STRING)				\
-  (((MULTIBYTE) && ! STRING_MULTIBYTE (STRING))				\
-   ? count_size_as_multibyte (SDATA (STRING), SBYTES (STRING))		\
-   : SBYTES (STRING))
+/* pWIDE is a conversion for printing large decimal integers (possibly with a
+   trailing "d" that is ignored).  pWIDElen is its length.  signed_wide and
+   unsigned_wide are signed and unsigned types for printing them.  Use widest
+   integers if available so that more floating point values can be converted.  */
+#ifdef PRIdMAX
+# define pWIDE PRIdMAX
+enum { pWIDElen = sizeof PRIdMAX - 2 }; /* Don't count trailing "d".  */
+typedef intmax_t signed_wide;
+typedef uintmax_t unsigned_wide;
+#else
+# define pWIDE pI
+enum { pWIDElen = sizeof pI - 1 };
+typedef EMACS_INT signed_wide;
+typedef EMACS_UINT unsigned_wide;
+#endif
 
 DEFUN ("format", Fformat, Sformat, 1, MANY, 0,
        doc: /* Format a string out of a format-string and arguments.
@@ -3583,11 +3585,17 @@ specifier truncates the string to the given width.
 usage: (format STRING &rest OBJECTS)  */)
   (size_t nargs, register Lisp_Object *args)
 {
-  register size_t n;		/* The number of the next arg to substitute */
-  register size_t total;	/* An estimate of the final length */
-  char *buf, *p;
+  EMACS_INT n;		/* The number of the next arg to substitute */
+  char initial_buffer[4000];
+  char *buf = initial_buffer;
+  EMACS_INT bufsize = sizeof initial_buffer;
+  EMACS_INT max_bufsize = min (MOST_POSITIVE_FIXNUM + 1, SIZE_MAX);
+  char *p;
+  Lisp_Object buf_save_value IF_LINT (= {0});
   register char *format, *end, *format_start;
-  int nchars;
+  EMACS_INT formatlen, nchars;
+  /* Nonzero if the format is multibyte.  */
+  int multibyte_format = 0;
   /* Nonzero if the output should be a multibyte string,
      which is true if any of the inputs is one.  */
   int multibyte = 0;
@@ -3596,14 +3604,6 @@ usage: (format STRING &rest OBJECTS)  */)
      multibyte character of the previous string.  This flag tells if we
      must consider such a situation or not.  */
   int maybe_combine_byte;
-  char *this_format;
-  /* Precision for each spec, or -1, a flag value meaning no precision
-     was given in that spec.  Element 0, corresponding to the format
-     string itself, will not be used.  Element NARGS, corresponding to
-     no argument, *will* be assigned to in the case that a `%' and `.'
-     occur after the final format specifier.  */
-  int *precision = (int *) (alloca ((nargs + 1) * sizeof (int)));
-  int longest_format;
   Lisp_Object val;
   int arg_intervals = 0;
   USE_SAFE_ALLOCA;
@@ -3611,458 +3611,603 @@ usage: (format STRING &rest OBJECTS)  */)
   /* discarded[I] is 1 if byte I of the format
      string was not copied into the output.
      It is 2 if byte I was not the first byte of its character.  */
-  char *discarded = 0;
+  char *discarded;
 
   /* Each element records, for one argument,
      the start and end bytepos in the output string,
+     whether the argument has been converted to string (e.g., due to "%S"),
      and whether the argument is a string with intervals.
      info[0] is unused.  Unused elements have -1 for start.  */
   struct info
   {
-    int start, end, intervals;
+    EMACS_INT start, end;
+    int converted_to_string;
+    int intervals;
   } *info = 0;
 
   /* It should not be necessary to GCPRO ARGS, because
      the caller in the interpreter should take care of that.  */
 
+  CHECK_STRING (args[0]);
+  format_start = SSDATA (args[0]);
+  formatlen = SBYTES (args[0]);
+
+  /* Allocate the info and discarded tables.  */
+  {
+    EMACS_INT i;
+    if ((SIZE_MAX - formatlen) / sizeof (struct info) <= nargs)
+      memory_full ();
+    SAFE_ALLOCA (info, struct info *, (nargs + 1) * sizeof *info + formatlen);
+    discarded = (char *) &info[nargs + 1];
+    for (i = 0; i < nargs + 1; i++)
+      {
+	info[i].start = -1;
+	info[i].intervals = info[i].converted_to_string = 0;
+      }
+    memset (discarded, 0, formatlen);
+  }
+
   /* Try to determine whether the result should be multibyte.
      This is not always right; sometimes the result needs to be multibyte
      because of an object that we will pass through prin1,
      and in that case, we won't know it here.  */
-  for (n = 0; n < nargs; n++)
-    {
-      if (STRINGP (args[n]) && STRING_MULTIBYTE (args[n]))
-	multibyte = 1;
-      /* Piggyback on this loop to initialize precision[N]. */
-      precision[n] = -1;
-    }
-  precision[nargs] = -1;
-
-  CHECK_STRING (args[0]);
-  /* We may have to change "%S" to "%s". */
-  args[0] = Fcopy_sequence (args[0]);
-
-  /* GC should never happen here, so abort if it does.  */
-  abort_on_gc++;
+  multibyte_format = STRING_MULTIBYTE (args[0]);
+  multibyte = multibyte_format;
+  for (n = 1; !multibyte && n < nargs; n++)
+    if (STRINGP (args[n]) && STRING_MULTIBYTE (args[n]))
+      multibyte = 1;
 
   /* If we start out planning a unibyte result,
-     then discover it has to be multibyte, we jump back to retry.
-     That can only happen from the first large while loop below.  */
+     then discover it has to be multibyte, we jump back to retry.  */
  retry:
 
-  format = SSDATA (args[0]);
-  format_start = format;
-  end = format + SBYTES (args[0]);
-  longest_format = 0;
-
-  /* Make room in result for all the non-%-codes in the control string.  */
-  total = 5 + CONVERTED_BYTE_SIZE (multibyte, args[0]) + 1;
-
-  /* Allocate the info and discarded tables.  */
-  {
-    size_t nbytes = (nargs+1) * sizeof *info;
-    size_t i;
-    if (!info)
-      info = (struct info *) alloca (nbytes);
-    memset (info, 0, nbytes);
-    for (i = 0; i < nargs + 1; i++)
-      info[i].start = -1;
-    if (!discarded)
-      SAFE_ALLOCA (discarded, char *, SBYTES (args[0]));
-    memset (discarded, 0, SBYTES (args[0]));
-  }
+  p = buf;
+  nchars = 0;
+  n = 0;
 
-  /* Add to TOTAL enough space to hold the converted arguments.  */
+  /* Scan the format and store result in BUF.  */
+  format = format_start;
+  end = format + formatlen;
+  maybe_combine_byte = 0;
 
-  n = 0;
   while (format != end)
-    if (*format++ == '%')
-      {
-	EMACS_INT thissize = 0;
-	EMACS_INT actual_width = 0;
-	char *this_format_start = format - 1;
-	int field_width = 0;
+    {
+      /* The values of N and FORMAT when the loop body is entered.  */
+      EMACS_INT n0 = n;
+      char *format0 = format;
 
-	/* General format specifications look like
+      /* Bytes needed to represent the output of this conversion.  */
+      EMACS_INT convbytes;
 
-	   '%' [flags] [field-width] [precision] format
+      if (*format == '%')
+	{
+	  /* General format specifications look like
 
-	   where
+	     '%' [flags] [field-width] [precision] format
 
-	   flags	::= [-+ #0]+
-	   field-width	::= [0-9]+
-	   precision	::= '.' [0-9]*
+	     where
 
-	   If a field-width is specified, it specifies to which width
-	   the output should be padded with blanks, if the output
-	   string is shorter than field-width.
+	     flags ::= [-+0# ]+
+	     field-width ::= [0-9]+
+	     precision ::= '.' [0-9]*
 
-	   If precision is specified, it specifies the number of
-	   digits to print after the '.' for floats, or the max.
-	   number of chars to print from a string.  */
+	     If a field-width is specified, it specifies to which width
+	     the output should be padded with blanks, if the output
+	     string is shorter than field-width.
 
-	while (format != end
-	       && (*format == '-' || *format == '0' || *format == '#'
-		   || * format == ' ' || *format == '+'))
-	  ++format;
+	     If precision is specified, it specifies the number of
+	     digits to print after the '.' for floats, or the max.
+	     number of chars to print from a string.  */
 
-	if (*format >= '0' && *format <= '9')
-	  {
-	    for (field_width = 0; *format >= '0' && *format <= '9'; ++format)
-	      field_width = 10 * field_width + *format - '0';
-	  }
+	  int minus_flag = 0;
+	  int  plus_flag = 0;
+	  int space_flag = 0;
+	  int sharp_flag = 0;
+	  int  zero_flag = 0;
+	  EMACS_INT field_width;
+	  int precision_given;
+	  uintmax_t precision = UINTMAX_MAX;
+	  char *num_end;
+	  char conversion;
 
-	/* N is not incremented for another few lines below, so refer to
-	   element N+1 (which might be precision[NARGS]). */
-	if (*format == '.')
-	  {
-	    ++format;
-	    for (precision[n+1] = 0; *format >= '0' && *format <= '9'; ++format)
-	      precision[n+1] = 10 * precision[n+1] + *format - '0';
-	  }
+	  while (1)
+	    {
+	      switch (*++format)
+		{
+		case '-': minus_flag = 1; continue;
+		case '+':  plus_flag = 1; continue;
+		case ' ': space_flag = 1; continue;
+		case '#': sharp_flag = 1; continue;
+		case '0':  zero_flag = 1; continue;
+		}
+	      break;
+	    }
 
-	/* Extra +1 for 'l' that we may need to insert into the
-	   format.  */
-	if (format - this_format_start + 2 > longest_format)
-	  longest_format = format - this_format_start + 2;
+	  /* Ignore flags when sprintf ignores them.  */
+	  space_flag &= ~ plus_flag;
+	  zero_flag &= ~ minus_flag;
 
-	if (format == end)
-	  error ("Format string ends in middle of format specifier");
-	if (*format == '%')
-	  format++;
-	else if (++n >= nargs)
-	  error ("Not enough arguments for format string");
-	else if (*format == 'S')
 	  {
-	    /* For `S', prin1 the argument and then treat like a string.  */
-	    register Lisp_Object tem;
-	    tem = Fprin1_to_string (args[n], Qnil);
-	    if (STRING_MULTIBYTE (tem) && ! multibyte)
-	      {
-		multibyte = 1;
-		goto retry;
-	      }
-	    args[n] = tem;
-	    /* If we restart the loop, we should not come here again
-	       because args[n] is now a string and calling
-	       Fprin1_to_string on it produces superflous double
-	       quotes.  So, change "%S" to "%s" now.  */
-	    *format = 's';
-	    goto string;
+	    uintmax_t w = strtoumax (format, &num_end, 10);
+	    if (max_bufsize <= w)
+	      string_overflow ();
+	    field_width = w;
 	  }
-	else if (SYMBOLP (args[n]))
-	  {
-	    args[n] = SYMBOL_NAME (args[n]);
-	    if (STRING_MULTIBYTE (args[n]) && ! multibyte)
-	      {
-		multibyte = 1;
-		goto retry;
-	      }
-	    goto string;
-	  }
-	else if (STRINGP (args[n]))
-	  {
-	  string:
-	    if (*format != 's' && *format != 'S')
-	      error ("Format specifier doesn't match argument type");
-	    /* In the case (PRECISION[N] > 0), THISSIZE may not need
-	       to be as large as is calculated here.  Easy check for
-	       the case PRECISION = 0. */
-	    thissize = precision[n] ? CONVERTED_BYTE_SIZE (multibyte, args[n]) : 0;
-	    /* The precision also constrains how much of the argument
-	       string will finally appear (Bug#5710). */
-	    actual_width = lisp_string_width (args[n], -1, NULL, NULL);
-	    if (precision[n] != -1)
-	      actual_width = min (actual_width, precision[n]);
-	  }
-	/* Would get MPV otherwise, since Lisp_Int's `point' to low memory.  */
-	else if (INTEGERP (args[n]) && *format != 's')
-	  {
-	    /* The following loop assumes the Lisp type indicates
-	       the proper way to pass the argument.
-	       So make sure we have a flonum if the argument should
-	       be a double.  */
-	    if (*format == 'e' || *format == 'f' || *format == 'g')
-	      args[n] = Ffloat (args[n]);
-	    else
-	      if (*format != 'd' && *format != 'o' && *format != 'x'
-		  && *format != 'i' && *format != 'X' && *format != 'c')
-		error ("Invalid format operation %%%c", *format);
-
-	    thissize = 30 + (precision[n] > 0 ? precision[n] : 0);
-	    if (*format == 'c')
-	      {
-		if (! ASCII_CHAR_P (XINT (args[n]))
-		    /* Note: No one can remember why we have to treat
-		       the character 0 as a multibyte character here.
-		       But, until it causes a real problem, let's
-		       don't change it.  */
-		    || XINT (args[n]) == 0)
-		  {
-		    if (! multibyte)
-		      {
-			multibyte = 1;
-			goto retry;
-		      }
-		    args[n] = Fchar_to_string (args[n]);
-		    thissize = SBYTES (args[n]);
-		  }
-	      }
-	  }
-	else if (FLOATP (args[n]) && *format != 's')
-	  {
-	    if (! (*format == 'e' || *format == 'f' || *format == 'g'))
-	      {
-		if (*format != 'd' && *format != 'o' && *format != 'x'
-		    && *format != 'i' && *format != 'X' && *format != 'c')
-		  error ("Invalid format operation %%%c", *format);
-		/* This fails unnecessarily if args[n] is bigger than
-		   most-positive-fixnum but smaller than MAXINT.
-		   These cases are important because we sometimes use floats
-		   to represent such integer values (typically such values
-		   come from UIDs or PIDs).  */
-		/* args[n] = Ftruncate (args[n], Qnil); */
-	      }
-
-	    /* Note that we're using sprintf to print floats,
-	       so we have to take into account what that function
-	       prints.  */
-	    /* Filter out flag value of -1.  */
-	    thissize = (MAX_10_EXP + 100
-			+ (precision[n] > 0 ? precision[n] : 0));
-	  }
-	else
-	  {
-	    /* Anything but a string, convert to a string using princ.  */
-	    register Lisp_Object tem;
-	    tem = Fprin1_to_string (args[n], Qt);
-	    if (STRING_MULTIBYTE (tem) && ! multibyte)
-	      {
-		multibyte = 1;
-		goto retry;
-	      }
-	    args[n] = tem;
-	    goto string;
-	  }
-
-	thissize += max (0, field_width - actual_width);
-	total += thissize + 4;
-      }
-
-  abort_on_gc--;
-
-  /* Now we can no longer jump to retry.
-     TOTAL and LONGEST_FORMAT are known for certain.  */
-
-  this_format = (char *) alloca (longest_format + 1);
-
-  /* Allocate the space for the result.
-     Note that TOTAL is an overestimate.  */
-  SAFE_ALLOCA (buf, char *, total);
-
-  p = buf;
-  nchars = 0;
-  n = 0;
-
-  /* Scan the format and store result in BUF.  */
-  format = SSDATA (args[0]);
-  format_start = format;
-  end = format + SBYTES (args[0]);
-  maybe_combine_byte = 0;
-  while (format != end)
-    {
-      if (*format == '%')
-	{
-	  int minlen;
-	  int negative = 0;
-	  char *this_format_start = format;
-
+	  precision_given = *num_end == '.';
+	  if (precision_given)
+	    precision = strtoumax (num_end + 1, &num_end, 10);
+	  format = num_end;
+
+	  if (format == end)
+	    error ("Format string ends in middle of format specifier");
+
+	  memset (&discarded[format0 - format_start], 1, format - format0);
+	  conversion = *format;
+	  if (conversion == '%')
+	    goto copy_char;
 	  discarded[format - format_start] = 1;
 	  format++;
 
-	  while (strchr ("-+0# ", *format))
+	  ++n;
+	  if (! (n < nargs))
+	    error ("Not enough arguments for format string");
+
+	  /* For 'S', prin1 the argument, and then treat like 's'.
+	     For 's', princ any argument that is not a string or
+	     symbol.  But don't do this conversion twice, which might
+	     happen after retrying.  */
+	  if ((conversion == 'S'
+	       || (conversion == 's'
+		   && ! STRINGP (args[n]) && ! SYMBOLP (args[n]))))
 	    {
-	      if (*format == '-')
+	      if (! info[n].converted_to_string)
 		{
-		  negative = 1;
+		  Lisp_Object noescape = conversion == 'S' ? Qnil : Qt;
+		  args[n] = Fprin1_to_string (args[n], noescape);
+		  info[n].converted_to_string = 1;
+		  if (STRING_MULTIBYTE (args[n]) && ! multibyte)
+		    {
+		      multibyte = 1;
+		      goto retry;
+		    }
 		}
-	      discarded[format - format_start] = 1;
-	      ++format;
+	      conversion = 's';
 	    }
+	  else if (conversion == 'c')
+	    {
+	      if (FLOATP (args[n]))
+		{
+		  double d = XFLOAT_DATA (args[n]);
+		  args[n] = make_number (FIXNUM_OVERFLOW_P (d) ? -1 : d);
+		}
 
-	  minlen = atoi (format);
+	      if (INTEGERP (args[n]) && ! ASCII_CHAR_P (XINT (args[n])))
+		{
+		  if (!multibyte)
+		    {
+		      multibyte = 1;
+		      goto retry;
+		    }
+		  args[n] = Fchar_to_string (args[n]);
+		  info[n].converted_to_string = 1;
+		}
 
-	  while ((*format >= '0' && *format <= '9') || *format == '.')
-	    {
-	      discarded[format - format_start] = 1;
-	      format++;
+	      if (info[n].converted_to_string)
+		conversion = 's';
+	      zero_flag = 0;
 	    }
 
-	  if (*format++ == '%')
+	  if (SYMBOLP (args[n]))
 	    {
-	      *p++ = '%';
-	      nchars++;
-	      continue;
+	      args[n] = SYMBOL_NAME (args[n]);
+	      if (STRING_MULTIBYTE (args[n]) && ! multibyte)
+		{
+		  multibyte = 1;
+		  goto retry;
+		}
 	    }
 
-	  ++n;
-
-	  discarded[format - format_start - 1] = 1;
-	  info[n].start = nchars;
-
-	  if (STRINGP (args[n]))
+	  if (conversion == 's')
 	    {
 	      /* handle case (precision[n] >= 0) */
 
-	      int width, padding;
-	      EMACS_INT nbytes, start;
+	      EMACS_INT width, padding, nbytes;
 	      EMACS_INT nchars_string;
 
+	      EMACS_INT prec = -1;
+	      if (precision_given && precision <= TYPE_MAXIMUM (EMACS_INT))
+		prec = precision;
+
 	      /* lisp_string_width ignores a precision of 0, but GNU
 		 libc functions print 0 characters when the precision
 		 is 0.  Imitate libc behavior here.  Changing
 		 lisp_string_width is the right thing, and will be
 		 done, but meanwhile we work with it. */
 
-	      if (precision[n] == 0)
+	      if (prec == 0)
 		width = nchars_string = nbytes = 0;
-	      else if (precision[n] > 0)
-		width = lisp_string_width (args[n], precision[n],
-					   &nchars_string, &nbytes);
 	      else
-		{		/* no precision spec given for this argument */
-		  width = lisp_string_width (args[n], -1, NULL, NULL);
-		  nbytes = SBYTES (args[n]);
-		  nchars_string = SCHARS (args[n]);
+		{
+		  EMACS_INT nch, nby;
+		  width = lisp_string_width (args[n], prec, &nch, &nby);
+		  if (prec < 0)
+		    {
+		      nchars_string = SCHARS (args[n]);
+		      nbytes = SBYTES (args[n]);
+		    }
+		  else
+		    {
+		      nchars_string = nch;
+		      nbytes = nby;
+		    }
 		}
 
-	      /* If spec requires it, pad on right with spaces.  */
-	      padding = minlen - width;
-	      if (! negative)
-		while (padding-- > 0)
-		  {
-		    *p++ = ' ';
-		    ++nchars;
-		  }
+	      convbytes = nbytes;
+	      if (convbytes && multibyte && ! STRING_MULTIBYTE (args[n]))
+		convbytes = count_size_as_multibyte (SDATA (args[n]), nbytes);
 
-	      info[n].start = start = nchars;
-	      nchars += nchars_string;
+	      padding = width < field_width ? field_width - width : 0;
 
-	      if (p > buf
-		  && multibyte
-		  && !ASCII_BYTE_P (*((unsigned char *) p - 1))
-		  && STRING_MULTIBYTE (args[n])
-		  && !CHAR_HEAD_P (SREF (args[n], 0)))
-		maybe_combine_byte = 1;
+	      if (max_bufsize - padding <= convbytes)
+		string_overflow ();
+	      convbytes += padding;
+	      if (convbytes <= buf + bufsize - p)
+		{
+		  if (! minus_flag)
+		    {
+		      memset (p, ' ', padding);
+		      p += padding;
+		      nchars += padding;
+		    }
 
-	      p += copy_text (SDATA (args[n]), (unsigned char *) p,
-			      nbytes,
-			      STRING_MULTIBYTE (args[n]), multibyte);
+		  if (p > buf
+		      && multibyte
+		      && !ASCII_BYTE_P (*((unsigned char *) p - 1))
+		      && STRING_MULTIBYTE (args[n])
+		      && !CHAR_HEAD_P (SREF (args[n], 0)))
+		    maybe_combine_byte = 1;
 
-	      info[n].end = nchars;
+		  p += copy_text (SDATA (args[n]), (unsigned char *) p,
+				  nbytes,
+				  STRING_MULTIBYTE (args[n]), multibyte);
 
-	      if (negative)
-		while (padding-- > 0)
-		  {
-		    *p++ = ' ';
-		    nchars++;
-		  }
+                  info[n].start = nchars;
+		  nchars += nchars_string;
+		  info[n].end = nchars;
 
-	      /* If this argument has text properties, record where
-		 in the result string it appears.  */
-	      if (STRING_INTERVALS (args[n]))
-		info[n].intervals = arg_intervals = 1;
+		  if (minus_flag)
+		    {
+		      memset (p, ' ', padding);
+		      p += padding;
+		      nchars += padding;
+		    }
+
+		  /* If this argument has text properties, record where
+		     in the result string it appears.  */
+		  if (STRING_INTERVALS (args[n]))
+		    info[n].intervals = arg_intervals = 1;
+
+		  continue;
+		}
 	    }
-	  else if (INTEGERP (args[n]) || FLOATP (args[n]))
+	  else if (! (conversion == 'c' || conversion == 'd'
+		      || conversion == 'e' || conversion == 'f'
+		      || conversion == 'g' || conversion == 'i'
+		      || conversion == 'o' || conversion == 'x'
+		      || conversion == 'X'))
+	    error ("Invalid format operation %%%c",
+		   STRING_CHAR ((unsigned char *) format - 1));
+	  else if (! (INTEGERP (args[n]) || FLOATP (args[n])))
+	    error ("Format specifier doesn't match argument type");
+	  else
 	    {
-	      int this_nchars;
-
-	      memcpy (this_format, this_format_start,
-		      format - this_format_start);
-	      this_format[format - this_format_start] = 0;
+	      enum
+	      {
+		/* Maximum precision for a %f conversion such that the
+		   trailing output digit might be nonzero.  Any precisions
+		   larger than this will not yield useful information.  */
+		USEFUL_PRECISION_MAX =
+		  ((1 - DBL_MIN_EXP)
+		   * (FLT_RADIX == 2 || FLT_RADIX == 10 ? 1
+		      : FLT_RADIX == 16 ? 4
+		      : -1)),
+
+		/* Maximum number of bytes generated by any format, if
+		   precision is no more than DBL_USEFUL_PRECISION_MAX.
+		   On all practical hosts, %f is the worst case.  */
+		SPRINTF_BUFSIZE =
+		  sizeof "-." + (DBL_MAX_10_EXP + 1) + USEFUL_PRECISION_MAX
+	      };
+	      verify (0 < USEFUL_PRECISION_MAX);
+
+	      int prec;
+	      EMACS_INT padding, sprintf_bytes;
+	      uintmax_t excess_precision, numwidth;
+	      uintmax_t leading_zeros = 0, trailing_zeros = 0;
+
+	      char sprintf_buf[SPRINTF_BUFSIZE];
+
+	      /* Copy of conversion specification, modified somewhat.
+		 At most three flags F can be specified at once.  */
+	      char convspec[sizeof "%FFF.*d" + pWIDElen];
+
+	      /* Avoid undefined behavior in underlying sprintf.  */
+	      if (conversion == 'd' || conversion == 'i')
+		sharp_flag = 0;
+
+	      /* Create the copy of the conversion specification, with
+		 any width and precision removed, with ".*" inserted,
+		 and with pWIDE inserted for integer formats.  */
+	      {
+		char *f = convspec;
+		*f++ = '%';
+		*f = '-'; f += minus_flag;
+		*f = '+'; f +=  plus_flag;
+		*f = ' '; f += space_flag;
+		*f = '#'; f += sharp_flag;
+		*f = '0'; f +=  zero_flag;
+                *f++ = '.';
+                *f++ = '*';
+		if (conversion == 'd' || conversion == 'i'
+		    || conversion == 'o' || conversion == 'x'
+		    || conversion == 'X')
+		  {
+		    memcpy (f, pWIDE, pWIDElen);
+		    f += pWIDElen;
+		    zero_flag &= ~ precision_given;
+		  }
+		*f++ = conversion;
+		*f = '\0';
+	      }
 
-	      if (format[-1] == 'e' || format[-1] == 'f' || format[-1] == 'g')
-		sprintf (p, this_format, XFLOAT_DATA (args[n]));
+	      prec = -1;
+	      if (precision_given)
+		prec = min (precision, USEFUL_PRECISION_MAX);
+
+	      /* Use sprintf to format this number into sprintf_buf.  Omit
+		 padding and excess precision, though, because sprintf limits
+		 output length to INT_MAX.
+
+		 There are four types of conversion: double, unsigned
+		 char (passed as int), wide signed int, and wide
+		 unsigned int.  Treat them separately because the
+		 sprintf ABI is sensitive to which type is passed.  Be
+		 careful about integer overflow, NaNs, infinities, and
+		 conversions; for example, the min and max macros are
+		 not suitable here.  */
+	      if (conversion == 'e' || conversion == 'f' || conversion == 'g')
+		{
+		  double x = (INTEGERP (args[n])
+			      ? XINT (args[n])
+			      : XFLOAT_DATA (args[n]));
+		  sprintf_bytes = sprintf (sprintf_buf, convspec, prec, x);
+		}
+	      else if (conversion == 'c')
+		{
+		  /* Don't use sprintf here, as it might mishandle prec.  */
+		  sprintf_buf[0] = XINT (args[n]);
+		  sprintf_bytes = prec != 0;
+		}
+	      else if (conversion == 'd')
+		{
+		  /* For float, maybe we should use "%1.0f"
+		     instead so it also works for values outside
+		     the integer range.  */
+		  signed_wide x;
+		  if (INTEGERP (args[n]))
+		    x = XINT (args[n]);
+		  else
+		    {
+		      double d = XFLOAT_DATA (args[n]);
+		      if (d < 0)
+			{
+			  x = TYPE_MINIMUM (signed_wide);
+			  if (x < d)
+			    x = d;
+			}
+		      else
+			{
+			  x = TYPE_MAXIMUM (signed_wide);
+			  if (d < x)
+			    x = d;
+			}
+		    }
+		  sprintf_bytes = sprintf (sprintf_buf, convspec, prec, x);
+		}
 	      else
 		{
-		  if (sizeof (EMACS_INT) > sizeof (int)
-		      && format[-1] != 'c')
+		  /* Don't sign-extend for octal or hex printing.  */
+		  unsigned_wide x;
+		  if (INTEGERP (args[n]))
+		    x = XUINT (args[n]);
+		  else
 		    {
-		      /* Insert 'l' before format spec.  */
-		      this_format[format - this_format_start]
-			= this_format[format - this_format_start - 1];
-		      this_format[format - this_format_start - 1] = 'l';
-		      this_format[format - this_format_start + 1] = 0;
+		      double d = XFLOAT_DATA (args[n]);
+		      if (d < 0)
+			x = 0;
+		      else
+			{
+			  x = TYPE_MAXIMUM (unsigned_wide);
+			  if (d < x)
+			    x = d;
+			}
 		    }
+		  sprintf_bytes = sprintf (sprintf_buf, convspec, prec, x);
+		}
 
-		  if (INTEGERP (args[n]))
+	      /* Now the length of the formatted item is known, except it omits
+		 padding and excess precision.  Deal with excess precision
+		 first.  This happens only when the format specifies
+		 ridiculously large precision.  */
+	      excess_precision = precision - prec;
+	      if (excess_precision)
+		{
+		  if (conversion == 'e' || conversion == 'f'
+		      || conversion == 'g')
 		    {
-		      if (format[-1] == 'c')
-			sprintf (p, this_format, (int) XINT (args[n]));
-		      else if (format[-1] == 'd')
-			sprintf (p, this_format, XINT (args[n]));
-		      /* Don't sign-extend for octal or hex printing.  */
+		      if ((conversion == 'g' && ! sharp_flag)
+			  || ! ('0' <= sprintf_buf[sprintf_bytes - 1]
+				&& sprintf_buf[sprintf_bytes - 1] <= '9'))
+			excess_precision = 0;
 		      else
-			sprintf (p, this_format, XUINT (args[n]));
+			{
+			  if (conversion == 'g')
+			    {
+			      char *dot = strchr (sprintf_buf, '.');
+			      if (!dot)
+				excess_precision = 0;
+			    }
+			}
+		      trailing_zeros = excess_precision;
 		    }
-		  else if (format[-1] == 'c')
-		    sprintf (p, this_format, (int) XFLOAT_DATA (args[n]));
-		  else if (format[-1] == 'd')
-		    /* Maybe we should use "%1.0f" instead so it also works
-		       for values larger than MAXINT.  */
-		    sprintf (p, this_format, (EMACS_INT) XFLOAT_DATA (args[n]));
 		  else
-		    /* Don't sign-extend for octal or hex printing.  */
-		    sprintf (p, this_format, (EMACS_UINT) XFLOAT_DATA (args[n]));
+		    leading_zeros = excess_precision;
+		}
+
+	      /* Compute the total bytes needed for this item, including
+		 excess precision and padding.  */
+	      numwidth = sprintf_bytes + excess_precision;
+	      padding = numwidth < field_width ? field_width - numwidth : 0;
+	      if (max_bufsize - sprintf_bytes <= excess_precision
+		  || max_bufsize - padding <= numwidth)
+		string_overflow ();
+	      convbytes = numwidth + padding;
+
+	      if (convbytes <= buf + bufsize - p)
+		{
+		  /* Copy the formatted item from sprintf_buf into buf,
+		     inserting padding and excess-precision zeros.  */
+
+                  char *src = sprintf_buf;
+		  char src0 = src[0];
+		  int exponent_bytes = 0;
+		  int signedp = src0 == '-' || src0 == '+' || src0 == ' ';
+		  int significand_bytes;
+		  if (zero_flag && '0' <= src[signedp] && src[signedp] <= '9')
+		    {
+		      leading_zeros += padding;
+		      padding = 0;
+		    }
+
+		  if (excess_precision
+		      && (conversion == 'e' || conversion == 'g'))
+		    {
+		      char *e = strchr (src, 'e');
+		      if (e)
+			exponent_bytes = src + sprintf_bytes - e;
+		    }
+
+		  if (! minus_flag)
+		    {
+		      memset (p, ' ', padding);
+		      p += padding;
+		      nchars += padding;
+		    }
+
+		  *p = src0;
+		  src += signedp;
+		  p += signedp;
+		  memset (p, '0', leading_zeros);
+		  p += leading_zeros;
+		  significand_bytes = sprintf_bytes - signedp - exponent_bytes;
+		  memcpy (p, src, significand_bytes);
+                  p += significand_bytes;
+		  src += significand_bytes;
+		  memset (p, '0', trailing_zeros);
+		  p += trailing_zeros;
+		  memcpy (p, src, exponent_bytes);
+		  p += exponent_bytes;
+
+                  info[n].start = nchars;
+		  nchars += leading_zeros + sprintf_bytes + trailing_zeros;
+		  info[n].end = nchars;
+
+		  if (minus_flag)
+		    {
+		      memset (p, ' ', padding);
+		      p += padding;
+		      nchars += padding;
+		    }
+
+		  continue;
 		}
+	    }
+	}
+      else
+      copy_char:
+	{
+	  /* Copy a single character from format to buf.  */
+
+	  char *src = format;
+	  unsigned char str[MAX_MULTIBYTE_LENGTH];
 
+	  if (multibyte_format)
+	    {
+	      /* Copy a whole multibyte character.  */
 	      if (p > buf
-		  && multibyte
 		  && !ASCII_BYTE_P (*((unsigned char *) p - 1))
-		  && !CHAR_HEAD_P (*((unsigned char *) p)))
+		  && !CHAR_HEAD_P (*format))
 		maybe_combine_byte = 1;
-	      this_nchars = strlen (p);
-	      if (multibyte)
-		p += str_to_multibyte ((unsigned char *) p,
-				       buf + total - 1 - p, this_nchars);
+
+	      do
+		format++;
+	      while (! CHAR_HEAD_P (*format));
+
+	      convbytes = format - format0;
+	      memset (&discarded[format0 + 1 - format_start], 2, convbytes - 1);
+	    }
+	  else
+	    {
+	      unsigned char uc = *format++;
+	      if (! multibyte || ASCII_BYTE_P (uc))
+		convbytes = 1;
 	      else
-		p += this_nchars;
-	      nchars += this_nchars;
-	      info[n].end = nchars;
+		{
+		  int c = BYTE8_TO_CHAR (uc);
+		  convbytes = CHAR_STRING (c, str);
+		  src = (char *) str;
+		}
 	    }
 
-	}
-      else if (STRING_MULTIBYTE (args[0]))
-	{
-	  /* Copy a whole multibyte character.  */
-	  if (p > buf
-	      && multibyte
-	      && !ASCII_BYTE_P (*((unsigned char *) p - 1))
-	      && !CHAR_HEAD_P (*format))
-	    maybe_combine_byte = 1;
-	  *p++ = *format++;
-	  while (! CHAR_HEAD_P (*format))
+	  if (convbytes <= buf + bufsize - p)
 	    {
-	      discarded[format - format_start] = 2;
-	      *p++ = *format++;
+	      memcpy (p, src, convbytes);
+	      p += convbytes;
+	      nchars++;
+	      continue;
 	    }
-	  nchars++;
 	}
-      else if (multibyte)
-	{
-	  /* Convert a single-byte character to multibyte.  */
-	  int len = copy_text ((unsigned char *) format, (unsigned char *) p,
-			       1, 0, 1);
 
-	  p += len;
-	  format++;
-	  nchars++;
-	}
-      else
-	*p++ = *format++, nchars++;
+      /* There wasn't enough room to store this conversion or single
+	 character.  CONVBYTES says how much room is needed.  Allocate
+	 enough room (and then some) and do it again.  */
+      {
+	EMACS_INT used = p - buf;
+
+	if (max_bufsize - used < convbytes)
+	  string_overflow ();
+	bufsize = used + convbytes;
+	bufsize = bufsize < max_bufsize / 2 ? bufsize * 2 : max_bufsize;
+
+	if (buf == initial_buffer)
+	  {
+	    buf = xmalloc (bufsize);
+	    sa_must_free = 1;
+	    buf_save_value = make_save_value (buf, 0);
+	    record_unwind_protect (safe_alloca_unwind, buf_save_value);
+	    memcpy (buf, initial_buffer, used);
+	  }
+	else
+	  XSAVE_VALUE (buf_save_value)->pointer = buf = xrealloc (buf, bufsize);
+
+	p = buf + used;
+      }
+
+      format = format0;
+      n = n0;
     }
 
-  if (p > buf + total)
+  if (bufsize < p - buf)
     abort ();
 
   if (maybe_combine_byte)
@@ -4089,7 +4234,7 @@ usage: (format STRING &rest OBJECTS)  */)
       if (CONSP (props))
 	{
 	  EMACS_INT bytepos = 0, position = 0, translated = 0;
-	  int argn = 1;
+	  EMACS_INT argn = 1;
 	  Lisp_Object list;
 
 	  /* Adjust the bounds of each text property