From f781f3307435c46ed3ff21708f75261dc30d2e37 Mon Sep 17 00:00:00 2001 From: Patrick McCarty Date: Thu, 29 Oct 2009 15:46:01 -0700 Subject: Move UTF-8 char length routine into separate function. --- lily/misc.cc | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'lily/misc.cc') diff --git a/lily/misc.cc b/lily/misc.cc index ee55a00c7e..2499a09e37 100644 --- a/lily/misc.cc +++ b/lily/misc.cc @@ -9,6 +9,7 @@ #include "misc.hh" +#include "warn.hh" /* Return the 2-log, rounded down @@ -86,3 +87,22 @@ camel_case_to_lisp_identifier (string in) return result; } +vsize +utf8_char_len (char current) +{ + vsize char_len = 1; + + // U+10000 - U+10FFFF + if ((current & 0xF0) == 0xF0) + char_len = 4; + // U+0800 - U+FFFF + else if ((current & 0xE0) == 0xE0) + char_len = 3; + // U+0080 - U+07FF + else if ((current & 0xC0) == 0xC0) + char_len = 2; + else if (current & 0x80) + programming_error ("invalid UTF-8 string"); + + return char_len; +} -- cgit v1.2.3