14 files changed, 656 insertions, 66 deletions
diff --git a/src/ChangeLog b/src/ChangeLog
index 1442650d43..7c3a29c5d8 100644
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,45 @@
+2013-09-22  Daniel Colascione  <dancol@dancol.org>
+
+	* data.c (Qbool_vector_p): New symbol.
+	(bool_vector_spare_mask,popcount_size_t_generic)
+	(popcount_size_t_msc,popcount_size_t_gcc)
+	(popcount_size_t)
+	(bool_vector_binop_driver)
+	(count_trailing_zero_bits,size_t_to_host_endian)
+	(Fbool_vector_exclusive_or)
+	(Fbool_vector_union)
+	(Fbool_vector_intersection,Fbool_vector_set_difference)
+	(Fbool_vector_subsetp,Fbool_vector_not)
+	(Fbool_vector_count_matches)
+	(Fbool_vector_count_matches_at): New functions.
+	(syms_of_data): Intern new symbol, functions.
+	* alloc.c (bool_vector_payload_bytes): New function.
+	(Fmake_bool_vector): Instead of calling Fmake_vector,
+	which performs redundant initialization and argument checking,
+	just call allocate_vector ourselves.  Make sure we clear any
+	terminating padding to zero.
+	(vector_nbytes,sweep_vectors): Use bool_vector_payload_bytes
+	instead of open-coding the size calculation.
+	(vroundup_ct): New macro.
+	(vroundup): Assume argument >= 0; invoke vroundup_ct.
+	* casetab.c (shuffle,set_identity): Change lint_assume to assume.
+	* composite.c (composition_gstring_put_cache): Change
+	lint_assume to assume.
+	* conf_post.h (assume): New macro.
+	(lint_assume): Remove.
+	* dispnew.c (update_frame_1): Change lint_assume to assume.
+	* ftfont.c (ftfont_shape_by_flt): Change lint_assume
+	to assume.
+	* image.c (gif_load): Change lint_assume to assume.
+	* lisp.h (eassert_and_assume): New macro.
+	(Qbool_vector_p): Declare.
+	(CHECK_BOOL_VECTOR,ROUNDUP,BITS_PER_SIZE_T): New macros.
+	(swap16,swap32,swap64): New inline functions.
+	* macfont.c (macfont_shape): Change lint_assume to assume.
+	* ralloc.c: Rename ROUNDUP to PAGE_ROUNDUP throughout.
+	* xsettings.c (parse_settings): Use new swap16 and
+	swap32 from lisp.h instead of file-specific macros.
+
 2013-09-22  Eli Zaretskii  <eliz@gnu.org>
 
 	* xdisp.c (try_window_id): Don't abort if cursor row could not be
diff --git a/src/alloc.c b/src/alloc.c
index de73ce9bae..847b3c88bb 100644
--- a/src/alloc.c
+++ b/src/alloc.c
@@ -2001,6 +2001,35 @@ INIT must be an integer that represents a character.  */)
   return val;
 }
 
+verify (sizeof (size_t) * CHAR_BIT == BITS_PER_SIZE_T);
+verify ((BITS_PER_SIZE_T & (BITS_PER_SIZE_T - 1)) == 0);
+
+static
+ptrdiff_t
+bool_vector_payload_bytes (ptrdiff_t nr_bits,
+                           ptrdiff_t* exact_needed_bytes_out)
+{
+  ptrdiff_t exact_needed_bytes;
+  ptrdiff_t needed_bytes;
+
+  eassert_and_assume (nr_bits >= 0);
+
+  exact_needed_bytes = ROUNDUP ((size_t) nr_bits, CHAR_BIT) / CHAR_BIT;
+  needed_bytes = ROUNDUP ((size_t) nr_bits, BITS_PER_SIZE_T) / CHAR_BIT;
+
+  if (needed_bytes == 0)
+    {
+      /* Always allocate at least one machine word of payload so that
+         bool-vector operations in data.c don't need a special case
+         for empty vectors.  */
+      needed_bytes = sizeof (size_t);
+    }
+
+  if (exact_needed_bytes_out != NULL)
+    *exact_needed_bytes_out = exact_needed_bytes;
+
+  return needed_bytes;
+}
 
 DEFUN ("make-bool-vector", Fmake_bool_vector, Smake_bool_vector, 2, 2, 0,
        doc: /* Return a new bool-vector of length LENGTH, using INIT for each element.
@@ -2009,37 +2038,43 @@ LENGTH must be a number.  INIT matters only in whether it is t or nil.  */)
 {
   register Lisp_Object val;
   struct Lisp_Bool_Vector *p;
-  ptrdiff_t length_in_chars;
-  EMACS_INT length_in_elts;
-  int bits_per_value;
-  int extra_bool_elts = ((bool_header_size - header_size + word_size - 1)
-			 / word_size);
+  ptrdiff_t exact_payload_bytes;
+  ptrdiff_t total_payload_bytes;
+  ptrdiff_t needed_elements;
 
   CHECK_NATNUM (length);
+  if (PTRDIFF_MAX < XFASTINT (length))
+    memory_full (SIZE_MAX);
 
-  bits_per_value = sizeof (EMACS_INT) * BOOL_VECTOR_BITS_PER_CHAR;
+  total_payload_bytes = bool_vector_payload_bytes
+    (XFASTINT (length), &exact_payload_bytes);
 
-  length_in_elts = (XFASTINT (length) + bits_per_value - 1) / bits_per_value;
+  eassert_and_assume (exact_payload_bytes <= total_payload_bytes);
+  eassert_and_assume (0 <= exact_payload_bytes);
 
-  val = Fmake_vector (make_number (length_in_elts + extra_bool_elts), Qnil);
+  needed_elements = ROUNDUP ((size_t) ((bool_header_size - header_size)
+                                       + total_payload_bytes),
+                             word_size) / word_size;
 
-  /* No Lisp_Object to trace in there.  */
+  p = (struct Lisp_Bool_Vector* ) allocate_vector (needed_elements);
+  XSETVECTOR (val, p);
   XSETPVECTYPESIZE (XVECTOR (val), PVEC_BOOL_VECTOR, 0, 0);
 
-  p = XBOOL_VECTOR (val);
   p->size = XFASTINT (length);
-
-  length_in_chars = ((XFASTINT (length) + BOOL_VECTOR_BITS_PER_CHAR - 1)
-		     / BOOL_VECTOR_BITS_PER_CHAR);
-  if (length_in_chars)
+  if (exact_payload_bytes)
     {
-      memset (p->data, ! NILP (init) ? -1 : 0, length_in_chars);
+      memset (p->data, ! NILP (init) ? -1 : 0, exact_payload_bytes);
 
       /* Clear any extraneous bits in the last byte.  */
-      p->data[length_in_chars - 1]
+      p->data[exact_payload_bytes - 1]
 	&= (1 << ((XFASTINT (length) - 1) % BOOL_VECTOR_BITS_PER_CHAR + 1)) - 1;
     }
 
+  /* Clear padding at the end.  */
+  memset (p->data + exact_payload_bytes,
+          0,
+          total_payload_bytes - exact_payload_bytes);
+
   return val;
 }
 
@@ -2565,24 +2600,22 @@ enum
     roundup_size = COMMON_MULTIPLE (word_size, USE_LSB_TAG ? GCALIGNMENT : 1)
   };
 
-/* ROUNDUP_SIZE must be a power of 2.  */
-verify ((roundup_size & (roundup_size - 1)) == 0);
-
 /* Verify assumptions described above.  */
 verify ((VECTOR_BLOCK_SIZE % roundup_size) == 0);
 verify (VECTOR_BLOCK_SIZE <= (1 << PSEUDOVECTOR_SIZE_BITS));
 
-/* Round up X to nearest mult-of-ROUNDUP_SIZE.  */
-
-#define vroundup(x) (((x) + (roundup_size - 1)) & ~(roundup_size - 1))
+/* Round up X to nearest mult-of-ROUNDUP_SIZE --- use at compile time.  */
+#define vroundup_ct(x) ROUNDUP((size_t)(x), roundup_size)
+/* Round up X to nearest mult-of-ROUNDUP_SIZE --- use at runtime.  */
+#define vroundup(x) (assume((x) >= 0), vroundup_ct(x))
 
 /* Rounding helps to maintain alignment constraints if USE_LSB_TAG.  */
 
-#define VECTOR_BLOCK_BYTES (VECTOR_BLOCK_SIZE - vroundup (sizeof (void *)))
+#define VECTOR_BLOCK_BYTES (VECTOR_BLOCK_SIZE - vroundup_ct (sizeof (void *)))
 
 /* Size of the minimal vector allocated from block.  */
 
-#define VBLOCK_BYTES_MIN vroundup (header_size + sizeof (Lisp_Object))
+#define VBLOCK_BYTES_MIN vroundup_ct (header_size + sizeof (Lisp_Object))
 
 /* Size of the largest vector allocated from block.  */
 
@@ -2642,7 +2675,7 @@ struct large_vector
     struct large_vector *vector;
 #if USE_LSB_TAG
     /* We need to maintain ROUNDUP_SIZE alignment for the vector member.  */
-    unsigned char c[vroundup (sizeof (struct large_vector *))];
+    unsigned char c[vroundup_ct (sizeof (struct large_vector *))];
 #endif
   } next;
   struct Lisp_Vector v;
@@ -2783,10 +2816,14 @@ vector_nbytes (struct Lisp_Vector *v)
   if (size & PSEUDOVECTOR_FLAG)
     {
       if (PSEUDOVECTOR_TYPEP (&v->header, PVEC_BOOL_VECTOR))
-	size = (bool_header_size
-		+ (((struct Lisp_Bool_Vector *) v)->size
-		   + BOOL_VECTOR_BITS_PER_CHAR - 1)
-		/ BOOL_VECTOR_BITS_PER_CHAR);
+        {
+          struct Lisp_Bool_Vector *bv = (struct Lisp_Bool_Vector *) v;
+          ptrdiff_t payload_bytes =
+              bool_vector_payload_bytes (bv->size, NULL);
+
+          eassert_and_assume (payload_bytes >= 0);
+          size = bool_header_size + ROUNDUP (payload_bytes, word_size);
+        }
       else
 	size = (header_size
 		+ ((size & PSEUDOVECTOR_SIZE_MASK)
@@ -2886,17 +2923,11 @@ sweep_vectors (void)
 	  total_vectors++;
 	  if (vector->header.size & PSEUDOVECTOR_FLAG)
 	    {
-	      struct Lisp_Bool_Vector *b = (struct Lisp_Bool_Vector *) vector;
-
 	      /* All non-bool pseudovectors are small enough to be allocated
 		 from vector blocks.  This code should be redesigned if some
 		 pseudovector type grows beyond VBLOCK_BYTES_MAX.  */
 	      eassert (PSEUDOVECTOR_TYPEP (&vector->header, PVEC_BOOL_VECTOR));
-
-	      total_vector_slots
-		+= (bool_header_size
-		    + ((b->size + BOOL_VECTOR_BITS_PER_CHAR - 1)
-		       / BOOL_VECTOR_BITS_PER_CHAR)) / word_size;
+              total_vector_slots += vector_nbytes (vector) / word_size;
 	    }
 	  else
 	    total_vector_slots
diff --git a/src/casetab.c b/src/casetab.c
index b6b1c99c39..69cd784f4c 100644
--- a/src/casetab.c
+++ b/src/casetab.c
@@ -205,7 +205,7 @@ set_identity (Lisp_Object table, Lisp_Object c, Lisp_Object elt)
 	from = to = XINT (c);
 
       to++;
-      lint_assume (to <= MAX_CHAR + 1);
+      assume (to <= MAX_CHAR + 1);
       for (; from < to; from++)
 	CHAR_TABLE_SET (table, from, make_number (from));
     }
@@ -232,7 +232,7 @@ shuffle (Lisp_Object table, Lisp_Object c, Lisp_Object elt)
 	from = to = XINT (c);
 
       to++;
-      lint_assume (to <= MAX_CHAR + 1);
+      assume (to <= MAX_CHAR + 1);
       for (; from < to; from++)
 	{
 	  Lisp_Object tem = Faref (table, elt);
diff --git a/src/composite.c b/src/composite.c
index ee4195572a..47cac71508 100644
--- a/src/composite.c
+++ b/src/composite.c
@@ -674,7 +674,7 @@ composition_gstring_put_cache (Lisp_Object gstring, ptrdiff_t len)
       len = j;
     }
 
-  lint_assume (len <= TYPE_MAXIMUM (ptrdiff_t) - 2);
+  assume (len <= TYPE_MAXIMUM (ptrdiff_t) - 2);
   copy = Fmake_vector (make_number (len + 2), Qnil);
   LGSTRING_SET_HEADER (copy, Fcopy_sequence (header));
   for (i = 0; i < len; i++)
diff --git a/src/conf_post.h b/src/conf_post.h
index 30a4a5a942..5f6cf0eca3 100644
--- a/src/conf_post.h
+++ b/src/conf_post.h
@@ -248,16 +248,24 @@ extern void _DebPrint (const char *fmt, ...);
 # define FLEXIBLE_ARRAY_MEMBER 1
 #endif
 
+/* assume(cond) tells the compiler (and lint) that a certain condition
+ * will always hold, and that it should optimize (or check) accordingly. */
+#if defined lint
+# define assume(cond) ((cond) ? (void) 0 : abort ())
+#elif (__GNUC__ == 4 && __GNUC_MINOR__ >= 5) || __GNUC__ > 4
+# define assume(cond) ((x) || (__builtin_unreachable(), 0))
+#elif defined __MSC_VER
+# define assume(cond) __assume ((cond))
+#else
+# define assume(cond) (0 && (cond))
+#endif
+
 /* Use this to suppress gcc's `...may be used before initialized' warnings. */
 #ifdef lint
 /* Use CODE only if lint checking is in effect.  */
 # define IF_LINT(Code) Code
-/* Assume that the expression COND is true.  This differs in intent
-   from 'assert', as it is a message from the programmer to the compiler.  */
-# define lint_assume(cond) ((cond) ? (void) 0 : abort ())
 #else
 # define IF_LINT(Code) /* empty */
-# define lint_assume(cond) ((void) (0 && (cond)))
 #endif
 
 /* conf_post.h ends here */
diff --git a/src/data.c b/src/data.c
index 51b0266eca..5a05e0652a 100644
--- a/src/data.c
+++ b/src/data.c
@@ -54,6 +54,7 @@ Lisp_Object Qintegerp, Qwholenump, Qsymbolp, Qlistp, Qconsp;
 static Lisp_Object Qnatnump;
 Lisp_Object Qstringp, Qarrayp, Qsequencep, Qbufferp;
 Lisp_Object Qchar_or_string_p, Qmarkerp, Qinteger_or_marker_p, Qvectorp;
+Lisp_Object Qbool_vector_p;
 Lisp_Object Qbuffer_or_string_p;
 static Lisp_Object Qkeywordp, Qboundp;
 Lisp_Object Qfboundp;
@@ -2956,6 +2957,457 @@ lowercase l) for small endian machines.  */)
   return make_number (order);
 }
 
+/* Because we round up the bool vector allocate size to word_size
+   units, we can safely read past the "end" of the vector in the
+   operations below.  These extra bits are always zero.  Also, we
+   always allocate bool vectors with at least one size_t of storage so
+   that we don't have to special-case empty bit vectors.  */
+
+static inline
+size_t
+bool_vector_spare_mask (ptrdiff_t nr_bits)
+{
+  eassert_and_assume (nr_bits > 0);
+  return (((size_t) 1) << (nr_bits % BITS_PER_SIZE_T)) - 1;
+}
+
+#if __MSC_VER >= 1500 && (defined _M_IX86 || defined _M_X64)
+# define USE_MSC_POPCOUNT
+#elif __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)
+# define USE_GCC_POPCOUNT
+#else
+# define NEED_GENERIC_POPCOUNT
+#endif
+
+#ifdef USE_MSC_POPCOUNT
+#define NEED_GENERIC_POPCOUNT
+#endif
+
+#ifdef NEED_GENERIC_POPCOUNT
+static inline
+unsigned int
+popcount_size_t_generic (size_t val)
+{
+    unsigned short j;
+    unsigned int count = 0;
+
+    for (j = 0; j < BITS_PER_SIZE_T; ++j)
+      count += !!((((size_t) 1) << j) & val);
+
+    return count;
+}
+#endif
+
+#ifdef USE_MSC_POPCOUNT
+static inline
+unsigned int
+popcount_size_t_msc (size_t val)
+{
+  unsigned int count;
+
+#pragma intrinsic __cpuid
+  /* While gcc falls back to its own generic code if the machine on
+     which it's running doesn't support popcount, we need to perform the
+     detection and fallback ourselves when compiling with Microsoft's
+     compiler.  */
+
+    static enum {
+      popcount_unknown_support,
+      popcount_use_generic,
+      popcount_use_intrinsic
+    } popcount_state;
+
+    if (popcount_state == popcount_unknown_support)
+      {
+        int cpu_info[4];
+        __cpuid (cpu_info, 1);
+        if (cpu_info[2] & (1<<23)) /* See MSDN.  */
+          popcount_state = popcount_use_intrinsic;
+        else
+          popcount_state = popcount_use_generic;
+      }
+
+    if (popcount_state == popcount_use_intrinsic)
+      {
+# if BITS_PER_SIZE_T == 64
+#  pragma intrinsic __popcnt64
+      count = __popcnt64 (val);
+# else
+#  pragma intrinsic __popcnt
+    count = __popcnt (val);
+# endif
+      }
+    else
+      count = popcount_size_t_generic (val);
+
+    return count;
+}
+#endif /* USE_MSC_POPCOUNT */
+
+#ifdef USE_GCC_POPCOUNT
+static inline
+unsigned int
+popcount_size_t_gcc (size_t val)
+{
+# if BITS_PER_SIZE_T == 64
+  return __builtin_popcountll (val);
+# else
+  return __builtin_popcount (val);
+# endif
+}
+#endif /* USE_GCC_POPCOUNT */
+
+static inline
+unsigned int
+popcount_size_t(size_t val)
+{
+#if defined USE_MSC_POPCOUNT
+  return popcount_size_t_msc (val);
+#elif defined USE_GCC_POPCOUNT
+  return popcount_size_t_gcc (val);
+#else
+  return popcount_size_t_generic (val);
+  #endif
+}
+
+enum bool_vector_op { bool_vector_exclusive_or,
+                      bool_vector_union,
+                      bool_vector_intersection,
+                      bool_vector_set_difference,
+                      bool_vector_subsetp };
+
+static inline
+Lisp_Object
+bool_vector_binop_driver (Lisp_Object op1,
+                          Lisp_Object op2,
+                          Lisp_Object dest,
+                          enum bool_vector_op op)
+{
+  EMACS_INT nr_bits;
+  size_t *adata, *bdata, *cdata;
+  ptrdiff_t i;
+  size_t changed = 0;
+  size_t mword;
+  ptrdiff_t nr_words;
+
+  CHECK_BOOL_VECTOR (op1);
+  CHECK_BOOL_VECTOR (op2);
+
+  nr_bits = min (XBOOL_VECTOR (op1)->size,
+                 XBOOL_VECTOR (op2)->size);
+
+  if (NILP (dest))
+    {
+      dest = Fmake_bool_vector (make_number (nr_bits), Qnil);
+      changed = 1;
+    }
+  else
+    {
+      CHECK_BOOL_VECTOR (dest);
+      nr_bits = min (nr_bits, XBOOL_VECTOR (dest)->size);
+    }
+
+  eassert_and_assume (nr_bits >= 0);
+  nr_words = ROUNDUP(nr_bits, BITS_PER_SIZE_T) / BITS_PER_SIZE_T;
+  
+  adata = (size_t*) XBOOL_VECTOR (dest)->data;
+  bdata = (size_t*) XBOOL_VECTOR (op1)->data;
+  cdata = (size_t*) XBOOL_VECTOR (op2)->data;
+  i = 0;
+  do
+    {
+      if (op == bool_vector_exclusive_or)
+        mword = bdata[i] ^ cdata[i];
+      else if (op == bool_vector_union || op == bool_vector_subsetp)
+        mword = bdata[i] | cdata[i];
+      else if (op == bool_vector_intersection)
+        mword = bdata[i] & cdata[i];
+      else if (op == bool_vector_set_difference)
+        mword = bdata[i] &~ cdata[i];
+      else
+        abort ();
+
+      changed |= adata[i] ^ mword;
+
+      if (op != bool_vector_subsetp)
+        adata[i] = mword;
+
+      i += 1;
+    }
+  while (i < nr_words);
+  return changed ? dest : Qnil;
+}
+
+/* Compute the number of trailing zero bits in val.  If val is zero,
+   return the number of bits in val.  */
+static inline
+unsigned int
+count_trailing_zero_bits (size_t val)
+{
+  if (val == 0)
+    return CHAR_BIT * sizeof (val);
+
+#if defined USE_GCC_POPCOUNT && BITS_PER_SIZE_T == 64
+  return __builtin_ctzll (val);
+#elif defined USE_GCC_POPCOUNT && BITS_PER_SIZE_T == 32
+  return __builtin_ctz (val);
+#elif __MSC_VER && BITS_PER_SIZE_T == 64
+# pragma intrinsic _BitScanForward64
+  {
+    /* No support test needed: support since 386.  */
+    unsigned long result;
+    _BitScanForward64 (&result, val);
+    return (unsigned int) result;
+  }
+#elif __MSC_VER && BITS_PER_SIZE_T == 32
+# pragma intrinsic _BitScanForward
+  {
+    /* No support test needed: support since 386.  */
+    unsigned long result;
+    _BitScanForward (&result, val);
+    return (unsigned int) result;
+  }
+#else
+  {
+    unsigned int count;
+    count = 0;
+    for(val = ~val; val & 1; val >>= 1)
+      ++count;
+
+    return count;
+  }
+#endif
+}
+
+static inline
+size_t
+size_t_to_host_endian (size_t val)
+{
+#ifdef WORDS_BIGENDIAN
+# if BITS_PER_SIZE_T == 64
+  return swap64 (val);
+# else
+  return swap32 (val);
+# endif
+#else
+  return val;
+#endif
+}
+
+DEFUN ("bool-vector-exclusive-or", Fbool_vector_exclusive_or,
+       Sbool_vector_exclusive_or, 2, 3, 0,
+       doc: /* Compute C = A ^ B, bitwise exclusive or.
+A, B, and C must be bool vectors.  If C is nil, allocate a new bool
+vector in which to store the result.  Return the destination vector if
+it changed or nil otherwise.  */
+       )
+  (Lisp_Object a, Lisp_Object b, Lisp_Object c)
+{
+  return bool_vector_binop_driver (a, b, c, bool_vector_exclusive_or);
+}
+
+DEFUN ("bool-vector-union", Fbool_vector_union,
+       Sbool_vector_union, 2, 3, 0,
+       doc: /* Compute C = A | B, bitwise or.
+A, B, and C must be bool vectors.  If C is nil, allocate a new bool
+vector in which to store the result.  Return the destination vector if
+it changed or nil otherwise.  */)
+  (Lisp_Object a, Lisp_Object b, Lisp_Object c)
+{
+  return bool_vector_binop_driver (a, b, c, bool_vector_union);
+}
+
+DEFUN ("bool-vector-intersection", Fbool_vector_intersection,
+       Sbool_vector_intersection, 2, 3, 0,
+       doc: /* Compute C = A & B, bitwise and.
+A, B, and C must be bool vectors.  If C is nil, allocate a new bool
+vector in which to store the result.  Return the destination vector if
+it changed or nil otherwise.  */)
+  (Lisp_Object a, Lisp_Object b, Lisp_Object c)
+{
+  return bool_vector_binop_driver (a, b, c, bool_vector_intersection);
+}
+
+DEFUN ("bool-vector-set-difference", Fbool_vector_set_difference,
+       Sbool_vector_set_difference, 2, 3, 0,
+       doc: /* Compute C = A &~ B, set difference.
+A, B, and C must be bool vectors.  If C is nil, allocate a new bool
+vector in which to store the result.  Return the destination vector if
+it changed or nil otherwise.  */)
+  (Lisp_Object a, Lisp_Object b, Lisp_Object c)
+{
+  return bool_vector_binop_driver (a, b, c, bool_vector_set_difference);
+}
+
+DEFUN ("bool-vector-subsetp", Fbool_vector_subsetp,
+       Sbool_vector_subsetp, 2, 2, 0,
+       doc: )
+  (Lisp_Object a, Lisp_Object b)
+{
+  /* Like bool_vector_union, but doesn't modify b.  */
+  return bool_vector_binop_driver (b, a, b, bool_vector_subsetp);
+}
+
+DEFUN ("bool-vector-not", Fbool_vector_not,
+       Sbool_vector_not, 1, 2, 0,
+       doc: /* Compute B = ~A.
+B must be a bool vector.  A must be a bool vector or nil.
+If A is nil, allocate a new bool vector in which to store the result.
+Return the destination vector.  */)
+  (Lisp_Object a, Lisp_Object b)
+{
+  EMACS_INT nr_bits;
+  size_t *bdata, *adata;
+  ptrdiff_t i;
+  size_t mword;
+
+  CHECK_BOOL_VECTOR (a);
+  nr_bits = XBOOL_VECTOR (a)->size;
+
+  if (NILP (b))
+    b = Fmake_bool_vector (make_number (nr_bits), Qnil);
+  else
+    {
+      CHECK_BOOL_VECTOR (b);
+      nr_bits = min (nr_bits, XBOOL_VECTOR (b)->size);
+    }
+
+  bdata = (size_t*) XBOOL_VECTOR (b)->data;
+  adata = (size_t*) XBOOL_VECTOR (a)->data;
+  i = 0;
+
+  eassert_and_assume (nr_bits >= 0);
+
+  while (i < nr_bits / BITS_PER_SIZE_T)
+    {
+      bdata[i] = ~adata[i];
+      i += 1;
+    }
+
+  if (nr_bits % BITS_PER_SIZE_T)
+    {
+      mword = size_t_to_host_endian (adata[i]);
+      mword = ~mword;
+      mword &= bool_vector_spare_mask (nr_bits);
+      bdata[i] = size_t_to_host_endian (mword);
+    }
+
+  return b;
+}
+
+DEFUN ("bool-vector-count-matches", Fbool_vector_count_matches,
+       Sbool_vector_count_matches, 2, 2, 0,
+       doc: /* Count how many elements in A equal B.
+A must be a bool vector. B is a generalized bool.  */)
+  (Lisp_Object a, Lisp_Object b)
+{
+  ptrdiff_t count;
+  EMACS_INT nr_bits;
+  size_t *adata;
+  size_t match;
+  ptrdiff_t i;
+
+  CHECK_BOOL_VECTOR (a);
+
+  nr_bits = XBOOL_VECTOR (a)->size;
+  count = 0;
+  match = NILP (b) ? (size_t) -1 : 0;
+  adata = (size_t*) XBOOL_VECTOR (a)->data;
+
+  eassert_and_assume (nr_bits >= 0);
+
+  for(i = 0; i < nr_bits / BITS_PER_SIZE_T; ++i)
+    count += popcount_size_t (adata[i] ^ match);
+
+  /* Mask out trailing parts of final mword.  */
+  if (nr_bits % BITS_PER_SIZE_T)
+    {
+      size_t mword = adata[i] ^ match;
+      mword = size_t_to_host_endian (mword);
+      count += popcount_size_t (mword & bool_vector_spare_mask (nr_bits));
+    }
+
+  return make_number (count);
+}
+
+DEFUN ("bool-vector-count-matches-at",
+       Fbool_vector_count_matches_at,
+       Sbool_vector_count_matches_at, 3, 3, 0,
+       doc: /* Count how many consecutive elements in A equal B at i.
+A must be a bool vector.  B is a generalized boolean.  i is an
+index into the vector.*/)
+  (Lisp_Object a, Lisp_Object b, Lisp_Object i)
+{
+  ptrdiff_t count;
+  EMACS_INT nr_bits;
+  ptrdiff_t offset;
+  size_t *adata;
+  size_t twiddle;
+  size_t mword; /* Machine word.  */
+  ptrdiff_t pos;
+  ptrdiff_t nr_words;
+
+  CHECK_BOOL_VECTOR (a);
+  CHECK_NATNUM (i);
+
+  nr_bits = XBOOL_VECTOR (a)->size;
+  if (XFASTINT (i) > nr_bits) /* Allow one past the end for convenience */
+    args_out_of_range (a, i);
+
+  adata = (size_t*) XBOOL_VECTOR (a)->data;
+
+  assume (nr_bits >= 0);
+  nr_words = ROUNDUP (nr_bits, BITS_PER_SIZE_T) / BITS_PER_SIZE_T;
+  
+  pos = XFASTINT (i) / BITS_PER_SIZE_T;
+  offset = XFASTINT (i) % BITS_PER_SIZE_T;
+  count = 0;
+
+  /* By XORing with twiddle, we transform the problem of "count
+     consecutive equal values" into "count the zero bits".  The latter
+     operation usually has hardware support.  */
+  twiddle = NILP (b) ? 0 : (size_t) -1;
+
+  /* Scan the remainder of the mword at the current offset.  */
+  if (pos < nr_words && offset != 0)
+    {
+      mword = size_t_to_host_endian (adata[pos]);
+      mword ^= twiddle;
+      mword >>= offset;
+      count = count_trailing_zero_bits (mword);
+      count = min (count, BITS_PER_SIZE_T - offset);
+      pos += 1;
+      if (count + offset < BITS_PER_SIZE_T)
+        return make_number (count);
+    }
+
+  /* Scan whole words until we either reach the end of the vector or
+     find an mword that doesn't completely match.  twiddle is
+     endian-independent.  */
+  while (pos < nr_words && adata[pos] == twiddle)
+    {
+      count += BITS_PER_SIZE_T;
+      ++pos;
+    }
+
+  if (pos < nr_words)
+    {
+      /* If we stopped because of a mismatch, see how many bits match
+         in the current mword.  */
+      mword = size_t_to_host_endian (adata[pos]);
+      mword ^= twiddle;
+      count += count_trailing_zero_bits (mword);
+    }
+  else if (nr_bits % BITS_PER_SIZE_T != 0)
+    {
+      /* If we hit the end, we might have overshot our count.  Reduce
+         the total by the number of spare bits at the end of the
+         vector.  */
+      count -= BITS_PER_SIZE_T - nr_bits % BITS_PER_SIZE_T;
+    }
+
+  return make_number (count);
+}
 
 
 void
@@ -3005,6 +3457,7 @@ syms_of_data (void)
   DEFSYM (Qsequencep, "sequencep");
   DEFSYM (Qbufferp, "bufferp");
   DEFSYM (Qvectorp, "vectorp");
+  DEFSYM (Qbool_vector_p, "bool-vector-p");
   DEFSYM (Qchar_or_string_p, "char-or-string-p");
   DEFSYM (Qmarkerp, "markerp");
   DEFSYM (Qbuffer_or_string_p, "buffer-or-string-p");
@@ -3222,6 +3675,15 @@ syms_of_data (void)
   defsubr (&Ssubr_arity);
   defsubr (&Ssubr_name);
 
+  defsubr (&Sbool_vector_exclusive_or);
+  defsubr (&Sbool_vector_union);
+  defsubr (&Sbool_vector_intersection);
+  defsubr (&Sbool_vector_set_difference);
+  defsubr (&Sbool_vector_not);
+  defsubr (&Sbool_vector_subsetp);
+  defsubr (&Sbool_vector_count_matches);
+  defsubr (&Sbool_vector_count_matches_at);
+
   set_symbol_function (Qwholenump, XSYMBOL (Qnatnump)->function);
 
   DEFVAR_LISP ("most-positive-fixnum", Vmost_positive_fixnum,
diff --git a/src/dispnew.c b/src/dispnew.c
index 5bdc84f1b1..ed7349a450 100644
--- a/src/dispnew.c
+++ b/src/dispnew.c
@@ -4451,7 +4451,7 @@ update_frame_1 (struct frame *f, bool force_p, bool inhibit_id_p)
 	}
     }
 
-  lint_assume (0 <= FRAME_LINES (f));
+  assume (0 <= FRAME_LINES (f));
   pause_p = 0 < i && i < FRAME_LINES (f) - 1;
 
   /* Now just clean up termcap drivers and set cursor, etc.  */
diff --git a/src/ftfont.c b/src/ftfont.c
index 3636f86f5c..4e58d83fd6 100644
--- a/src/ftfont.c
+++ b/src/ftfont.c
@@ -2425,7 +2425,7 @@ ftfont_shape_by_flt (Lisp_Object lgstring, struct font *font,
     }
 
   len = i;
-  lint_assume (len <= STRING_BYTES_BOUND);
+  assume (len <= STRING_BYTES_BOUND);
 
   if (with_variation_selector)
     {
diff --git a/src/image.c b/src/image.c
index e315953366..e429830cc9 100644
--- a/src/image.c
+++ b/src/image.c
@@ -7523,7 +7523,7 @@ gif_load (struct frame *f, struct image *img)
 	    {
 	      while (subimg_height <= row)
 		{
-		  lint_assume (pass < 3);
+		  assume (pass < 3);
 		  row = interlace_start[++pass];
 		}
 
diff --git a/src/intervals.c b/src/intervals.c
index 66b486e142..69a3386728 100644
--- a/src/intervals.c
+++ b/src/intervals.c
@@ -1405,7 +1405,7 @@ offset_intervals (struct buffer *buffer, ptrdiff_t start, ptrdiff_t length)
 				    start, length);
   else
     {
-      lint_assume (- TYPE_MAXIMUM (ptrdiff_t) <= length);
+      assume (- TYPE_MAXIMUM (ptrdiff_t) <= length);
       adjust_intervals_for_deletion (buffer, start, -length);
     }
 }
diff --git a/src/lisp.h b/src/lisp.h
index bd09cab5a7..0fffea5757 100644
--- a/src/lisp.h
+++ b/src/lisp.h
@@ -131,6 +131,13 @@ extern bool suppress_checking EXTERNALLY_VISIBLE;
     ? (void) 0							\
     : die (# cond, __FILE__, __LINE__))
 #endif /* ENABLE_CHECKING */
+
+/* When checking is enabled, identical to eassert.  When checking is
+ * disabled, instruct the compiler (when the compiler has such
+ * capability) to assume that cond is true and optimize
+ * accordingly.  */
+#define eassert_and_assume(cond) (eassert (cond), assume (cond))
+
 
 /* Use the configure flag --enable-check-lisp-object-type to make
    Lisp_Object use a struct type instead of the default int.  The flag
@@ -730,6 +737,7 @@ extern int char_table_translate (Lisp_Object, int);
 extern Lisp_Object Qarrayp, Qbufferp, Qbuffer_or_string_p, Qchar_table_p;
 extern Lisp_Object Qconsp, Qfloatp, Qintegerp, Qlambda, Qlistp, Qmarkerp, Qnil;
 extern Lisp_Object Qnumberp, Qstringp, Qsymbolp, Qvectorp;
+extern Lisp_Object Qbool_vector_p;
 extern Lisp_Object Qvector_or_char_table_p, Qwholenump;
 extern Lisp_Object Qwindow;
 extern Lisp_Object Ffboundp (Lisp_Object);
@@ -2359,6 +2367,11 @@ CHECK_VECTOR (Lisp_Object x)
   CHECK_TYPE (VECTORP (x), Qvectorp, x);
 }
 INLINE void
+CHECK_BOOL_VECTOR (Lisp_Object x)
+{
+  CHECK_TYPE (BOOL_VECTOR_P (x), Qbool_vector_p, x);
+}
+INLINE void
 CHECK_VECTOR_OR_STRING (Lisp_Object x)
 {
   CHECK_TYPE (VECTORP (x) || STRINGP (x), Qarrayp, x);
@@ -4347,6 +4360,43 @@ functionp (Lisp_Object object)
     return 0;
 }
 
+INLINE
+uint16_t
+swap16 (uint16_t val)
+{
+    return (val << 8) | (val & 0xFF);
+}
+
+INLINE
+uint32_t
+swap32 (uint32_t val)
+{
+  uint32_t low = swap16 (val & 0xFFFF);
+  uint32_t high = swap16 (val >> 16);
+  return (low << 16) | high;
+}
+
+#ifdef UINT64_MAX
+INLINE
+uint64_t
+swap64 (uint64_t val)
+{
+  uint64_t low = swap32 (val & 0xFFFFFFFF);
+  uint64_t high = swap32 (val >> 32);
+  return (low << 32) | high;
+}
+#endif
+
+#if ((SIZE_MAX >> 31) >> 1) & 1
+# define BITS_PER_SIZE_T 64
+#else
+# define BITS_PER_SIZE_T 32
+#endif
+
+/* Round x to the next multiple of y.  Does not overflow.  Evaluates
+   arguments repeatedly.  */
+#define ROUNDUP(x,y) ((y)*((x)/(y) + ((x)%(y)!=0)))
+
 INLINE_HEADER_END
 
 #endif /* EMACS_LISP_H */
diff --git a/src/macfont.m b/src/macfont.m
index 2a6d219d05..ab5029743e 100644
--- a/src/macfont.m
+++ b/src/macfont.m
@@ -2817,7 +2817,7 @@ macfont_shape (Lisp_Object lgstring)
     }
 
   len = i;
-  lint_assume (len <= TYPE_MAXIMUM (EMACS_INT) - 2);
+  assume (len <= TYPE_MAXIMUM (EMACS_INT) - 2);
 
   if (INT_MAX / 2 < len)
     memory_full (SIZE_MAX);
diff --git a/src/ralloc.c b/src/ralloc.c
index 5f25ef2c32..5b7d6a512d 100644
--- a/src/ralloc.c
+++ b/src/ralloc.c
@@ -85,7 +85,7 @@ static int extra_bytes;
 /* Macros for rounding.  Note that rounding to any value is possible
    by changing the definition of PAGE.  */
 #define PAGE (getpagesize ())
-#define ROUNDUP(size) (((size_t) (size) + page_size - 1) \
+#define PAGE_ROUNDUP(size) (((size_t) (size) + page_size - 1) \
 		       & ~((size_t) (page_size - 1)))
 
 #define MEM_ALIGN sizeof (double)
@@ -281,7 +281,7 @@ obtain (void *address, size_t size)
 	 Get some extra, so we can come here less often.  */
 
       get = size + extra_bytes - already_available;
-      get = (char *) ROUNDUP ((char *) last_heap->end + get)
+      get = (char *) PAGE_ROUNDUP ((char *) last_heap->end + get)
 	- (char *) last_heap->end;
 
       if (real_morecore (get) != last_heap->end)
@@ -344,7 +344,7 @@ relinquish (void)
       else
 	{
 	  excess = ((char *) last_heap->end
-		    - (char *) ROUNDUP ((char *) last_heap->end - excess));
+		    - (char *) PAGE_ROUNDUP ((char *) last_heap->end - excess));
 	  /* If the system doesn't want that much memory back, leave
 	     the end of the last heap unchanged to reflect that.  This
 	     can occur if break_value is still within the original
@@ -768,9 +768,9 @@ r_alloc_sbrk (ptrdiff_t size)
 	 not always find a space which is contiguous to the previous.  */
       void *new_bloc_start;
       heap_ptr h = first_heap;
-      size_t get = ROUNDUP (size);
+      size_t get = PAGE_ROUNDUP (size);
 
-      address = (void *) ROUNDUP (virtual_break_value);
+      address = (void *) PAGE_ROUNDUP (virtual_break_value);
 
       /* Search the list upward for a heap which is large enough.  */
       while ((char *) h->end < (char *) MEM_ROUNDUP ((char *) address + get))
@@ -778,7 +778,7 @@ r_alloc_sbrk (ptrdiff_t size)
 	  h = h->next;
 	  if (h == NIL_HEAP)
 	    break;
-	  address = (void *) ROUNDUP (h->start);
+	  address = (void *) PAGE_ROUNDUP (h->start);
 	}
 
       /* If not found, obtain more space.  */
@@ -790,9 +790,9 @@ r_alloc_sbrk (ptrdiff_t size)
 	    return 0;
 
 	  if (first_heap == last_heap)
-	    address = (void *) ROUNDUP (virtual_break_value);
+	    address = (void *) PAGE_ROUNDUP (virtual_break_value);
 	  else
-	    address = (void *) ROUNDUP (last_heap->start);
+	    address = (void *) PAGE_ROUNDUP (last_heap->start);
 	  h = last_heap;
 	}
 
@@ -1054,7 +1054,7 @@ r_alloc_check (void)
   for (h = first_heap; h; h = h->next)
     {
       assert (h->prev == ph);
-      assert ((void *) ROUNDUP (h->end) == h->end);
+      assert ((void *) PAGE_ROUNDUP (h->end) == h->end);
 #if 0 /* ??? The code in ralloc.c does not really try to ensure
 	 the heap start has any sort of alignment.
 	 Perhaps it should.  */
@@ -1190,7 +1190,7 @@ r_alloc_init (void)
   if (break_value == NULL)
     emacs_abort ();
 
-  extra_bytes = ROUNDUP (50000);
+  extra_bytes = PAGE_ROUNDUP (50000);
 #endif
 
 #ifdef DOUG_LEA_MALLOC
@@ -1212,7 +1212,7 @@ r_alloc_init (void)
 #endif
 
 #ifndef SYSTEM_MALLOC
-  first_heap->end = (void *) ROUNDUP (first_heap->start);
+  first_heap->end = (void *) PAGE_ROUNDUP (first_heap->start);
 
   /* The extra call to real_morecore guarantees that the end of the
      address space is a multiple of page_size, even if page_size is
diff --git a/src/xsettings.c b/src/xsettings.c
index bdf9f2876b..8fe82fec74 100644
--- a/src/xsettings.c
+++ b/src/xsettings.c
@@ -336,9 +336,6 @@ get_prop_window (struct x_display_info *dpyinfo)
   XUngrabServer (dpy);
 }
 
-#define SWAP32(nr) (((nr) << 24) | (((nr) << 8) & 0xff0000)     \
-                    | (((nr) >> 8) & 0xff00) | ((nr) >> 24))
-#define SWAP16(nr) (((nr) << 8) | ((nr) >> 8))
 #define PAD(nr)    (((nr) + 3) & ~3)
 
 /* Parse xsettings and extract those that deal with Xft.
@@ -408,7 +405,7 @@ parse_settings (unsigned char *prop,
 
   if (bytes < 12) return BadLength;
   memcpy (&n_settings, prop+8, 4);
-  if (my_bo != that_bo) n_settings = SWAP32 (n_settings);
+  if (my_bo != that_bo) n_settings = swap32 (n_settings);
   bytes_parsed = 12;
 
   memset (settings, 0, sizeof (*settings));
@@ -430,7 +427,7 @@ parse_settings (unsigned char *prop,
 
       memcpy (&nlen, prop+bytes_parsed, 2);
       bytes_parsed += 2;
-      if (my_bo != that_bo) nlen = SWAP16 (nlen);
+      if (my_bo != that_bo) nlen = swap16 (nlen);
       if (bytes_parsed+nlen > bytes) return BadLength;
       to_cpy = nlen > 127 ? 127 : nlen;
       memcpy (name, prop+bytes_parsed, to_cpy);
@@ -457,7 +454,7 @@ parse_settings (unsigned char *prop,
           if (want_this)
             {
               memcpy (&ival, prop+bytes_parsed, 4);
-              if (my_bo != that_bo) ival = SWAP32 (ival);
+              if (my_bo != that_bo) ival = swap32 (ival);
             }
           bytes_parsed += 4;
           break;
@@ -466,7 +463,7 @@ parse_settings (unsigned char *prop,
           if (bytes_parsed+4 > bytes) return BadLength;
           memcpy (&vlen, prop+bytes_parsed, 4);
           bytes_parsed += 4;
-          if (my_bo != that_bo) vlen = SWAP32 (vlen);
+          if (my_bo != that_bo) vlen = swap32 (vlen);
           if (want_this)
             {
               to_cpy = vlen > 127 ? 127 : vlen;