[Groonga-commit] groonga/groonga [master] normalizer: add MySQLGeneralCI normalizer

Back to archive index

Kouhei Sutou null+****@clear*****
Wed Jan 23 18:43:28 JST 2013


Kouhei Sutou	2013-01-23 18:43:28 +0900 (Wed, 23 Jan 2013)

  New Revision: 724c5443db625f2c794da197817b9fa8003e287e
  https://github.com/groonga/groonga/commit/724c5443db625f2c794da197817b9fa8003e287e

  Log:
    normalizer: add MySQLGeneralCI normalizer
    
    It is an EXPERIMENTAL normalizer! It works with a simple test but it
    should be tested with more data!

  Added files:
    plugins/normalizers/Makefile.am
    plugins/normalizers/mysql.c
    plugins/normalizers/mysql_sources.am
    test/command/suite/normalizers/mysql/general_ci/plane00.expected
    test/command/suite/normalizers/mysql/general_ci/plane00.test
  Copied files:
    plugins/normalizers/CMakeLists.txt
      (from plugins/CMakeLists.txt)
  Modified files:
    configure.ac
    plugins/CMakeLists.txt
    plugins/Makefile.am

  Modified: configure.ac (+4 -0)
===================================================================
--- configure.ac    2013-01-23 18:33:56 +0900 (ef851fd)
+++ configure.ac    2013-01-23 18:43:28 +0900 (1ae6600)
@@ -224,6 +224,7 @@ AC_CONFIG_FILES([
   plugins/suggest/Makefile
   plugins/table/Makefile
   plugins/query_expanders/Makefile
+  plugins/normalizers/Makefile
   examples/Makefile
   examples/dictionary/Makefile
   examples/dictionary/edict/Makefile
@@ -1168,6 +1169,9 @@ AC_SUBST(tokenizers_pluginsdir)
 query_expanders_pluginsdir="\${pluginsdir}/query_expanders"
 AC_SUBST(query_expanders_pluginsdir)
 
+normalizers_pluginsdir="\${pluginsdir}/normalizers"
+AC_SUBST(normalizers_pluginsdir)
+
 suggest_pluginsdir="\${pluginsdir}/suggest"
 AC_SUBST(suggest_pluginsdir)
 

  Modified: plugins/CMakeLists.txt (+1 -0)
===================================================================
--- plugins/CMakeLists.txt    2013-01-23 18:33:56 +0900 (9d94824)
+++ plugins/CMakeLists.txt    2013-01-23 18:43:28 +0900 (12883c7)
@@ -17,3 +17,4 @@ add_subdirectory(suggest)
 add_subdirectory(tokenizers)
 add_subdirectory(table)
 add_subdirectory(query_expanders)
+add_subdirectory(normalizers)

  Modified: plugins/Makefile.am (+2 -1)
===================================================================
--- plugins/Makefile.am    2013-01-23 18:33:56 +0900 (fbe6f15)
+++ plugins/Makefile.am    2013-01-23 18:43:28 +0900 (c5a0e14)
@@ -2,7 +2,8 @@ SUBDIRS =					\
 	tokenizers				\
 	suggest					\
 	table					\
-	query_expanders
+	query_expanders				\
+	normalizers
 
 EXTRA_DIST =					\
 	CMakeLists.txt

  Copied: plugins/normalizers/CMakeLists.txt (+13 -5) 58%
===================================================================
--- plugins/CMakeLists.txt    2013-01-23 18:33:56 +0900 (9d94824)
+++ plugins/normalizers/CMakeLists.txt    2013-01-23 18:43:28 +0900 (9f85773)
@@ -1,4 +1,4 @@
-# Copyright(C) 2012 Brazil
+# Copyright(C) 2013 Brazil
 #
 # This library is free software; you can redistribute it and/or
 # modify it under the terms of the GNU Lesser General Public
@@ -13,7 +13,15 @@
 # License along with this library; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 
-add_subdirectory(suggest)
-add_subdirectory(tokenizers)
-add_subdirectory(table)
-add_subdirectory(query_expanders)
+include_directories(
+  ${CMAKE_SOURCE_DIR}/lib
+  )
+
+set(NORMALIZERS_DIR "${GRN_PLUGINS_DIR}/normalizers")
+read_file_list(${CMAKE_CURRENT_SOURCE_DIR}/mysql_sources.am MYSQL_SOURCES)
+add_library(mysql_normalizer MODULE ${MYSQL_SOURCES})
+set_target_properties(mysql_normalizer PROPERTIES
+  PREFIX ""
+  OUTPUT_NAME "mysql")
+target_link_libraries(mysql_normalizer libgroonga)
+install(TARGETS mysql_normalizer DESTINATION "${NORMALIZERS_DIR}")

  Added: plugins/normalizers/Makefile.am (+20 -0) 100644
===================================================================
--- /dev/null
+++ plugins/normalizers/Makefile.am    2013-01-23 18:43:28 +0900 (5dc7c12)
@@ -0,0 +1,20 @@
+EXTRA_DIST =					\
+	CMakeLists.txt
+
+INCLUDES =			\
+	-I$(top_builddir)	\
+	-I$(top_srcdir)/include	\
+	-I$(top_srcdir)/lib
+
+AM_LDFLAGS =					\
+	-avoid-version				\
+	-module					\
+	-no-undefined
+
+LIBS =						\
+	$(top_builddir)/lib/libgroonga.la
+
+normalizers_plugins_LTLIBRARIES =
+normalizers_plugins_LTLIBRARIES += mysql.la
+
+include mysql_sources.am

  Added: plugins/normalizers/mysql.c (+1699 -0) 100644
===================================================================
--- /dev/null
+++ plugins/normalizers/mysql.c    2013-01-23 18:43:28 +0900 (e3f737b)
@@ -0,0 +1,1699 @@
+/* -*- c-basic-offset: 2 -*- */
+/*
+  Copyright(C) 2013 Brazil
+
+  This library is free software; you can redistribute it and/or
+  modify it under the terms of the GNU Library General Public
+  License as published by the Free Software Foundation; version 2
+  of the License.
+
+  This library is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  Library General Public License for more details.
+
+  You should have received a copy of the GNU Library General Public
+  License along with this library; if not, write to the Free
+  Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
+  MA 02110-1301, USA
+
+  This file uses normalization table defined in
+  MySQL-5.5.29/strings/ctype-utf8.c. The following is the header of
+  the file:
+
+    Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Library General Public
+    License as published by the Free Software Foundation; version 2
+    of the License.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Library General Public License for more details.
+
+    You should have received a copy of the GNU Library General Public
+    License along with this library; if not, write to the Free
+    Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
+    MA 02110-1301, USA
+
+    UTF8 according RFC 2279
+    Written by Alexander Barkov <bar �� udm.net>
+*/
+
+#include <groonga/normalizer.h>
+#include <groonga/tokenizer.h>
+
+/* grn_enctostr() :< */
+#include <str.h>
+
+/* based on mysql-5.5.29/strings/ctype-utf8.c: start */
+static uint32_t plane00[] = {
+  0x0000,  0x0001,
+  0x0002,  0x0003,
+  0x0004,  0x0005,
+  0x0006,  0x0007,
+  0x0008,  0x0009,
+  0x000A,  0x000B,
+  0x000C,  0x000D,
+  0x000E,  0x000F,
+  0x0010,  0x0011,
+  0x0012,  0x0013,
+  0x0014,  0x0015,
+  0x0016,  0x0017,
+  0x0018,  0x0019,
+  0x001A,  0x001B,
+  0x001C,  0x001D,
+  0x001E,  0x001F,
+  0x0020,  0x0021,
+  0x0022,  0x0023,
+  0x0024,  0x0025,
+  0x0026,  0x0027,
+  0x0028,  0x0029,
+  0x002A,  0x002B,
+  0x002C,  0x002D,
+  0x002E,  0x002F,
+  0x0030,  0x0031,
+  0x0032,  0x0033,
+  0x0034,  0x0035,
+  0x0036,  0x0037,
+  0x0038,  0x0039,
+  0x003A,  0x003B,
+  0x003C,  0x003D,
+  0x003E,  0x003F,
+  0x0040,  0x0041,
+  0x0042,  0x0043,
+  0x0044,  0x0045,
+  0x0046,  0x0047,
+  0x0048,  0x0049,
+  0x004A,  0x004B,
+  0x004C,  0x004D,
+  0x004E,  0x004F,
+  0x0050,  0x0051,
+  0x0052,  0x0053,
+  0x0054,  0x0055,
+  0x0056,  0x0057,
+  0x0058,  0x0059,
+  0x005A,  0x005B,
+  0x005C,  0x005D,
+  0x005E,  0x005F,
+  0x0060,  0x0041,
+  0x0042,  0x0043,
+  0x0044,  0x0045,
+  0x0046,  0x0047,
+  0x0048,  0x0049,
+  0x004A,  0x004B,
+  0x004C,  0x004D,
+  0x004E,  0x004F,
+  0x0050,  0x0051,
+  0x0052,  0x0053,
+  0x0054,  0x0055,
+  0x0056,  0x0057,
+  0x0058,  0x0059,
+  0x005A,  0x007B,
+  0x007C,  0x007D,
+  0x007E,  0x007F,
+  0x0080,  0x0081,
+  0x0082,  0x0083,
+  0x0084,  0x0085,
+  0x0086,  0x0087,
+  0x0088,  0x0089,
+  0x008A,  0x008B,
+  0x008C,  0x008D,
+  0x008E,  0x008F,
+  0x0090,  0x0091,
+  0x0092,  0x0093,
+  0x0094,  0x0095,
+  0x0096,  0x0097,
+  0x0098,  0x0099,
+  0x009A,  0x009B,
+  0x009C,  0x009D,
+  0x009E,  0x009F,
+  0x00A0,  0x00A1,
+  0x00A2,  0x00A3,
+  0x00A4,  0x00A5,
+  0x00A6,  0x00A7,
+  0x00A8,  0x00A9,
+  0x00AA,  0x00AB,
+  0x00AC,  0x00AD,
+  0x00AE,  0x00AF,
+  0x00B0,  0x00B1,
+  0x00B2,  0x00B3,
+  0x00B4,  0x039C,
+  0x00B6,  0x00B7,
+  0x00B8,  0x00B9,
+  0x00BA,  0x00BB,
+  0x00BC,  0x00BD,
+  0x00BE,  0x00BF,
+  0x0041,  0x0041,
+  0x0041,  0x0041,
+  0x0041,  0x0041,
+  0x00C6,  0x0043,
+  0x0045,  0x0045,
+  0x0045,  0x0045,
+  0x0049,  0x0049,
+  0x0049,  0x0049,
+  0x00D0,  0x004E,
+  0x004F,  0x004F,
+  0x004F,  0x004F,
+  0x004F,  0x00D7,
+  0x00D8,  0x0055,
+  0x0055,  0x0055,
+  0x0055,  0x0059,
+  0x00DE,  0x0053,
+  0x0041,  0x0041,
+  0x0041,  0x0041,
+  0x0041,  0x0041,
+  0x00C6,  0x0043,
+  0x0045,  0x0045,
+  0x0045,  0x0045,
+  0x0049,  0x0049,
+  0x0049,  0x0049,
+  0x00D0,  0x004E,
+  0x004F,  0x004F,
+  0x004F,  0x004F,
+  0x004F,  0x00F7,
+  0x00D8,  0x0055,
+  0x0055,  0x0055,
+  0x0055,  0x0059,
+  0x00DE,  0x0059
+};
+
+static uint32_t plane01[] = {
+  0x0041,  0x0041,
+  0x0041,  0x0041,
+  0x0041,  0x0041,
+  0x0043,  0x0043,
+  0x0043,  0x0043,
+  0x0043,  0x0043,
+  0x0043,  0x0043,
+  0x0044,  0x0044,
+  0x0110,  0x0110,
+  0x0045,  0x0045,
+  0x0045,  0x0045,
+  0x0045,  0x0045,
+  0x0045,  0x0045,
+  0x0045,  0x0045,
+  0x0047,  0x0047,
+  0x0047,  0x0047,
+  0x0047,  0x0047,
+  0x0047,  0x0047,
+  0x0048,  0x0048,
+  0x0126,  0x0126,
+  0x0049,  0x0049,
+  0x0049,  0x0049,
+  0x0049,  0x0049,
+  0x0049,  0x0049,
+  0x0049,  0x0049,
+  0x0132,  0x0132,
+  0x004A,  0x004A,
+  0x004B,  0x004B,
+  0x0138,  0x004C,
+  0x004C,  0x004C,
+  0x004C,  0x004C,
+  0x004C,  0x013F,
+  0x013F,  0x0141,
+  0x0141,  0x004E,
+  0x004E,  0x004E,
+  0x004E,  0x004E,
+  0x004E,  0x0149,
+  0x014A,  0x014A,
+  0x004F,  0x004F,
+  0x004F,  0x004F,
+  0x004F,  0x004F,
+  0x0152,  0x0152,
+  0x0052,  0x0052,
+  0x0052,  0x0052,
+  0x0052,  0x0052,
+  0x0053,  0x0053,
+  0x0053,  0x0053,
+  0x0053,  0x0053,
+  0x0053,  0x0053,
+  0x0054,  0x0054,
+  0x0054,  0x0054,
+  0x0166,  0x0166,
+  0x0055,  0x0055,
+  0x0055,  0x0055,
+  0x0055,  0x0055,
+  0x0055,  0x0055,
+  0x0055,  0x0055,
+  0x0055,  0x0055,
+  0x0057,  0x0057,
+  0x0059,  0x0059,
+  0x0059,  0x005A,
+  0x005A,  0x005A,
+  0x005A,  0x005A,
+  0x005A,  0x0053,
+  0x0180,  0x0181,
+  0x0182,  0x0182,
+  0x0184,  0x0184,
+  0x0186,  0x0187,
+  0x0187,  0x0189,
+  0x018A,  0x018B,
+  0x018B,  0x018D,
+  0x018E,  0x018F,
+  0x0190,  0x0191,
+  0x0191,  0x0193,
+  0x0194,  0x01F6,
+  0x0196,  0x0197,
+  0x0198,  0x0198,
+  0x019A,  0x019B,
+  0x019C,  0x019D,
+  0x019E,  0x019F,
+  0x004F,  0x004F,
+  0x01A2,  0x01A2,
+  0x01A4,  0x01A4,
+  0x01A6,  0x01A7,
+  0x01A7,  0x01A9,
+  0x01AA,  0x01AB,
+  0x01AC,  0x01AC,
+  0x01AE,  0x0055,
+  0x0055,  0x01B1,
+  0x01B2,  0x01B3,
+  0x01B3,  0x01B5,
+  0x01B5,  0x01B7,
+  0x01B8,  0x01B8,
+  0x01BA,  0x01BB,
+  0x01BC,  0x01BC,
+  0x01BE,  0x01F7,
+  0x01C0,  0x01C1,
+  0x01C2,  0x01C3,
+  0x01C4,  0x01C4,
+  0x01C4,  0x01C7,
+  0x01C7,  0x01C7,
+  0x01CA,  0x01CA,
+  0x01CA,  0x0041,
+  0x0041,  0x0049,
+  0x0049,  0x004F,
+  0x004F,  0x0055,
+  0x0055,  0x0055,
+  0x0055,  0x0055,
+  0x0055,  0x0055,
+  0x0055,  0x0055,
+  0x0055,  0x018E,
+  0x0041,  0x0041,
+  0x0041,  0x0041,
+  0x00C6,  0x00C6,
+  0x01E4,  0x01E4,
+  0x0047,  0x0047,
+  0x004B,  0x004B,
+  0x004F,  0x004F,
+  0x004F,  0x004F,
+  0x01B7,  0x01B7,
+  0x004A,  0x01F1,
+  0x01F1,  0x01F1,
+  0x0047,  0x0047,
+  0x01F6,  0x01F7,
+  0x004E,  0x004E,
+  0x0041,  0x0041,
+  0x00C6,  0x00C6,
+  0x00D8,  0x00D8
+};
+
+static uint32_t plane02[] = {
+  0x0041,  0x0041,
+  0x0041,  0x0041,
+  0x0045,  0x0045,
+  0x0045,  0x0045,
+  0x0049,  0x0049,
+  0x0049,  0x0049,
+  0x004F,  0x004F,
+  0x004F,  0x004F,
+  0x0052,  0x0052,
+  0x0052,  0x0052,
+  0x0055,  0x0055,
+  0x0055,  0x0055,
+  0x0053,  0x0053,
+  0x0054,  0x0054,
+  0x021C,  0x021C,
+  0x0048,  0x0048,
+  0x0220,  0x0221,
+  0x0222,  0x0222,
+  0x0224,  0x0224,
+  0x0041,  0x0041,
+  0x0045,  0x0045,
+  0x004F,  0x004F,
+  0x004F,  0x004F,
+  0x004F,  0x004F,
+  0x004F,  0x004F,
+  0x0059,  0x0059,
+  0x0234,  0x0235,
+  0x0236,  0x0237,
+  0x0238,  0x0239,
+  0x023A,  0x023B,
+  0x023C,  0x023D,
+  0x023E,  0x023F,
+  0x0240,  0x0241,
+  0x0242,  0x0243,
+  0x0244,  0x0245,
+  0x0246,  0x0247,
+  0x0248,  0x0249,
+  0x024A,  0x024B,
+  0x024C,  0x024D,
+  0x024E,  0x024F,
+  0x0250,  0x0251,
+  0x0252,  0x0181,
+  0x0186,  0x0255,
+  0x0189,  0x018A,
+  0x0258,  0x018F,
+  0x025A,  0x0190,
+  0x025C,  0x025D,
+  0x025E,  0x025F,
+  0x0193,  0x0261,
+  0x0262,  0x0194,
+  0x0264,  0x0265,
+  0x0266,  0x0267,
+  0x0197,  0x0196,
+  0x026A,  0x026B,
+  0x026C,  0x026D,
+  0x026E,  0x019C,
+  0x0270,  0x0271,
+  0x019D,  0x0273,
+  0x0274,  0x019F,
+  0x0276,  0x0277,
+  0x0278,  0x0279,
+  0x027A,  0x027B,
+  0x027C,  0x027D,
+  0x027E,  0x027F,
+  0x01A6,  0x0281,
+  0x0282,  0x01A9,
+  0x0284,  0x0285,
+  0x0286,  0x0287,
+  0x01AE,  0x0289,
+  0x01B1,  0x01B2,
+  0x028C,  0x028D,
+  0x028E,  0x028F,
+  0x0290,  0x0291,
+  0x01B7,  0x0293,
+  0x0294,  0x0295,
+  0x0296,  0x0297,
+  0x0298,  0x0299,
+  0x029A,  0x029B,
+  0x029C,  0x029D,
+  0x029E,  0x029F,
+  0x02A0,  0x02A1,
+  0x02A2,  0x02A3,
+  0x02A4,  0x02A5,
+  0x02A6,  0x02A7,
+  0x02A8,  0x02A9,
+  0x02AA,  0x02AB,
+  0x02AC,  0x02AD,
+  0x02AE,  0x02AF,
+  0x02B0,  0x02B1,
+  0x02B2,  0x02B3,
+  0x02B4,  0x02B5,
+  0x02B6,  0x02B7,
+  0x02B8,  0x02B9,
+  0x02BA,  0x02BB,
+  0x02BC,  0x02BD,
+  0x02BE,  0x02BF,
+  0x02C0,  0x02C1,
+  0x02C2,  0x02C3,
+  0x02C4,  0x02C5,
+  0x02C6,  0x02C7,
+  0x02C8,  0x02C9,
+  0x02CA,  0x02CB,
+  0x02CC,  0x02CD,
+  0x02CE,  0x02CF,
+  0x02D0,  0x02D1,
+  0x02D2,  0x02D3,
+  0x02D4,  0x02D5,
+  0x02D6,  0x02D7,
+  0x02D8,  0x02D9,
+  0x02DA,  0x02DB,
+  0x02DC,  0x02DD,
+  0x02DE,  0x02DF,
+  0x02E0,  0x02E1,
+  0x02E2,  0x02E3,
+  0x02E4,  0x02E5,
+  0x02E6,  0x02E7,
+  0x02E8,  0x02E9,
+  0x02EA,  0x02EB,
+  0x02EC,  0x02ED,
+  0x02EE,  0x02EF,
+  0x02F0,  0x02F1,
+  0x02F2,  0x02F3,
+  0x02F4,  0x02F5,
+  0x02F6,  0x02F7,
+  0x02F8,  0x02F9,
+  0x02FA,  0x02FB,
+  0x02FC,  0x02FD,
+  0x02FE,  0x02FF
+};
+
+static uint32_t plane03[] = {
+  0x0300,  0x0301,
+  0x0302,  0x0303,
+  0x0304,  0x0305,
+  0x0306,  0x0307,
+  0x0308,  0x0309,
+  0x030A,  0x030B,
+  0x030C,  0x030D,
+  0x030E,  0x030F,
+  0x0310,  0x0311,
+  0x0312,  0x0313,
+  0x0314,  0x0315,
+  0x0316,  0x0317,
+  0x0318,  0x0319,
+  0x031A,  0x031B,
+  0x031C,  0x031D,
+  0x031E,  0x031F,
+  0x0320,  0x0321,
+  0x0322,  0x0323,
+  0x0324,  0x0325,
+  0x0326,  0x0327,
+  0x0328,  0x0329,
+  0x032A,  0x032B,
+  0x032C,  0x032D,
+  0x032E,  0x032F,
+  0x0330,  0x0331,
+  0x0332,  0x0333,
+  0x0334,  0x0335,
+  0x0336,  0x0337,
+  0x0338,  0x0339,
+  0x033A,  0x033B,
+  0x033C,  0x033D,
+  0x033E,  0x033F,
+  0x0340,  0x0341,
+  0x0342,  0x0343,
+  0x0344,  0x0399,
+  0x0346,  0x0347,
+  0x0348,  0x0349,
+  0x034A,  0x034B,
+  0x034C,  0x034D,
+  0x034E,  0x034F,
+  0x0350,  0x0351,
+  0x0352,  0x0353,
+  0x0354,  0x0355,
+  0x0356,  0x0357,
+  0x0358,  0x0359,
+  0x035A,  0x035B,
+  0x035C,  0x035D,
+  0x035E,  0x035F,
+  0x0360,  0x0361,
+  0x0362,  0x0363,
+  0x0364,  0x0365,
+  0x0366,  0x0367,
+  0x0368,  0x0369,
+  0x036A,  0x036B,
+  0x036C,  0x036D,
+  0x036E,  0x036F,
+  0x0370,  0x0371,
+  0x0372,  0x0373,
+  0x0374,  0x0375,
+  0x0376,  0x0377,
+  0x0378,  0x0379,
+  0x037A,  0x037B,
+  0x037C,  0x037D,
+  0x037E,  0x037F,
+  0x0380,  0x0381,
+  0x0382,  0x0383,
+  0x0384,  0x0385,
+  0x0391,  0x0387,
+  0x0395,  0x0397,
+  0x0399,  0x038B,
+  0x039F,  0x038D,
+  0x03A5,  0x03A9,
+  0x0399,  0x0391,
+  0x0392,  0x0393,
+  0x0394,  0x0395,
+  0x0396,  0x0397,
+  0x0398,  0x0399,
+  0x039A,  0x039B,
+  0x039C,  0x039D,
+  0x039E,  0x039F,
+  0x03A0,  0x03A1,
+  0x03A2,  0x03A3,
+  0x03A4,  0x03A5,
+  0x03A6,  0x03A7,
+  0x03A8,  0x03A9,
+  0x0399,  0x03A5,
+  0x0391,  0x0395,
+  0x0397,  0x0399,
+  0x03A5,  0x0391,
+  0x0392,  0x0393,
+  0x0394,  0x0395,
+  0x0396,  0x0397,
+  0x0398,  0x0399,
+  0x039A,  0x039B,
+  0x039C,  0x039D,
+  0x039E,  0x039F,
+  0x03A0,  0x03A1,
+  0x03A3,  0x03A3,
+  0x03A4,  0x03A5,
+  0x03A6,  0x03A7,
+  0x03A8,  0x03A9,
+  0x0399,  0x03A5,
+  0x039F,  0x03A5,
+  0x03A9,  0x03CF,
+  0x0392,  0x0398,
+  0x03D2,  0x03D2,
+  0x03D2,  0x03A6,
+  0x03A0,  0x03D7,
+  0x03D8,  0x03D9,
+  0x03DA,  0x03DA,
+  0x03DC,  0x03DC,
+  0x03DE,  0x03DE,
+  0x03E0,  0x03E0,
+  0x03E2,  0x03E2,
+  0x03E4,  0x03E4,
+  0x03E6,  0x03E6,
+  0x03E8,  0x03E8,
+  0x03EA,  0x03EA,
+  0x03EC,  0x03EC,
+  0x03EE,  0x03EE,
+  0x039A,  0x03A1,
+  0x03A3,  0x03F3,
+  0x03F4,  0x03F5,
+  0x03F6,  0x03F7,
+  0x03F8,  0x03F9,
+  0x03FA,  0x03FB,
+  0x03FC,  0x03FD,
+  0x03FE,  0x03FF
+};
+
+static uint32_t plane04[] = {
+  0x0415,  0x0415,
+  0x0402,  0x0413,
+  0x0404,  0x0405,
+  0x0406,  0x0406,
+  0x0408,  0x0409,
+  0x040A,  0x040B,
+  0x041A,  0x0418,
+  0x0423,  0x040F,
+  0x0410,  0x0411,
+  0x0412,  0x0413,
+  0x0414,  0x0415,
+  0x0416,  0x0417,
+  0x0418,  0x0419,
+  0x041A,  0x041B,
+  0x041C,  0x041D,
+  0x041E,  0x041F,
+  0x0420,  0x0421,
+  0x0422,  0x0423,
+  0x0424,  0x0425,
+  0x0426,  0x0427,
+  0x0428,  0x0429,
+  0x042A,  0x042B,
+  0x042C,  0x042D,
+  0x042E,  0x042F,
+  0x0410,  0x0411,
+  0x0412,  0x0413,
+  0x0414,  0x0415,
+  0x0416,  0x0417,
+  0x0418,  0x0419,
+  0x041A,  0x041B,
+  0x041C,  0x041D,
+  0x041E,  0x041F,
+  0x0420,  0x0421,
+  0x0422,  0x0423,
+  0x0424,  0x0425,
+  0x0426,  0x0427,
+  0x0428,  0x0429,
+  0x042A,  0x042B,
+  0x042C,  0x042D,
+  0x042E,  0x042F,
+  0x0415,  0x0415,
+  0x0402,  0x0413,
+  0x0404,  0x0405,
+  0x0406,  0x0406,
+  0x0408,  0x0409,
+  0x040A,  0x040B,
+  0x041A,  0x0418,
+  0x0423,  0x040F,
+  0x0460,  0x0460,
+  0x0462,  0x0462,
+  0x0464,  0x0464,
+  0x0466,  0x0466,
+  0x0468,  0x0468,
+  0x046A,  0x046A,
+  0x046C,  0x046C,
+  0x046E,  0x046E,
+  0x0470,  0x0470,
+  0x0472,  0x0472,
+  0x0474,  0x0474,
+  0x0474,  0x0474,
+  0x0478,  0x0478,
+  0x047A,  0x047A,
+  0x047C,  0x047C,
+  0x047E,  0x047E,
+  0x0480,  0x0480,
+  0x0482,  0x0483,
+  0x0484,  0x0485,
+  0x0486,  0x0487,
+  0x0488,  0x0489,
+  0x048A,  0x048B,
+  0x048C,  0x048C,
+  0x048E,  0x048E,
+  0x0490,  0x0490,
+  0x0492,  0x0492,
+  0x0494,  0x0494,
+  0x0496,  0x0496,
+  0x0498,  0x0498,
+  0x049A,  0x049A,
+  0x049C,  0x049C,
+  0x049E,  0x049E,
+  0x04A0,  0x04A0,
+  0x04A2,  0x04A2,
+  0x04A4,  0x04A4,
+  0x04A6,  0x04A6,
+  0x04A8,  0x04A8,
+  0x04AA,  0x04AA,
+  0x04AC,  0x04AC,
+  0x04AE,  0x04AE,
+  0x04B0,  0x04B0,
+  0x04B2,  0x04B2,
+  0x04B4,  0x04B4,
+  0x04B6,  0x04B6,
+  0x04B8,  0x04B8,
+  0x04BA,  0x04BA,
+  0x04BC,  0x04BC,
+  0x04BE,  0x04BE,
+  0x04C0,  0x0416,
+  0x0416,  0x04C3,
+  0x04C3,  0x04C5,
+  0x04C6,  0x04C7,
+  0x04C7,  0x04C9,
+  0x04CA,  0x04CB,
+  0x04CB,  0x04CD,
+  0x04CE,  0x04CF,
+  0x0410,  0x0410,
+  0x0410,  0x0410,
+  0x04D4,  0x04D4,
+  0x0415,  0x0415,
+  0x04D8,  0x04D8,
+  0x04D8,  0x04D8,
+  0x0416,  0x0416,
+  0x0417,  0x0417,
+  0x04E0,  0x04E0,
+  0x0418,  0x0418,
+  0x0418,  0x0418,
+  0x041E,  0x041E,
+  0x04E8,  0x04E8,
+  0x04E8,  0x04E8,
+  0x042D,  0x042D,
+  0x0423,  0x0423,
+  0x0423,  0x0423,
+  0x0423,  0x0423,
+  0x0427,  0x0427,
+  0x04F6,  0x04F7,
+  0x042B,  0x042B,
+  0x04FA,  0x04FB,
+  0x04FC,  0x04FD,
+  0x04FE,  0x04FF
+};
+
+static uint32_t plane05[] = {
+  0x0500,  0x0501,
+  0x0502,  0x0503,
+  0x0504,  0x0505,
+  0x0506,  0x0507,
+  0x0508,  0x0509,
+  0x050A,  0x050B,
+  0x050C,  0x050D,
+  0x050E,  0x050F,
+  0x0510,  0x0511,
+  0x0512,  0x0513,
+  0x0514,  0x0515,
+  0x0516,  0x0517,
+  0x0518,  0x0519,
+  0x051A,  0x051B,
+  0x051C,  0x051D,
+  0x051E,  0x051F,
+  0x0520,  0x0521,
+  0x0522,  0x0523,
+  0x0524,  0x0525,
+  0x0526,  0x0527,
+  0x0528,  0x0529,
+  0x052A,  0x052B,
+  0x052C,  0x052D,
+  0x052E,  0x052F,
+  0x0530,  0x0531,
+  0x0532,  0x0533,
+  0x0534,  0x0535,
+  0x0536,  0x0537,
+  0x0538,  0x0539,
+  0x053A,  0x053B,
+  0x053C,  0x053D,
+  0x053E,  0x053F,
+  0x0540,  0x0541,
+  0x0542,  0x0543,
+  0x0544,  0x0545,
+  0x0546,  0x0547,
+  0x0548,  0x0549,
+  0x054A,  0x054B,
+  0x054C,  0x054D,
+  0x054E,  0x054F,
+  0x0550,  0x0551,
+  0x0552,  0x0553,
+  0x0554,  0x0555,
+  0x0556,  0x0557,
+  0x0558,  0x0559,
+  0x055A,  0x055B,
+  0x055C,  0x055D,
+  0x055E,  0x055F,
+  0x0560,  0x0531,
+  0x0532,  0x0533,
+  0x0534,  0x0535,
+  0x0536,  0x0537,
+  0x0538,  0x0539,
+  0x053A,  0x053B,
+  0x053C,  0x053D,
+  0x053E,  0x053F,
+  0x0540,  0x0541,
+  0x0542,  0x0543,
+  0x0544,  0x0545,
+  0x0546,  0x0547,
+  0x0548,  0x0549,
+  0x054A,  0x054B,
+  0x054C,  0x054D,
+  0x054E,  0x054F,
+  0x0550,  0x0551,
+  0x0552,  0x0553,
+  0x0554,  0x0555,
+  0x0556,  0x0587,
+  0x0588,  0x0589,
+  0x058A,  0x058B,
+  0x058C,  0x058D,
+  0x058E,  0x058F,
+  0x0590,  0x0591,
+  0x0592,  0x0593,
+  0x0594,  0x0595,
+  0x0596,  0x0597,
+  0x0598,  0x0599,
+  0x059A,  0x059B,
+  0x059C,  0x059D,
+  0x059E,  0x059F,
+  0x05A0,  0x05A1,
+  0x05A2,  0x05A3,
+  0x05A4,  0x05A5,
+  0x05A6,  0x05A7,
+  0x05A8,  0x05A9,
+  0x05AA,  0x05AB,
+  0x05AC,  0x05AD,
+  0x05AE,  0x05AF,
+  0x05B0,  0x05B1,
+  0x05B2,  0x05B3,
+  0x05B4,  0x05B5,
+  0x05B6,  0x05B7,
+  0x05B8,  0x05B9,
+  0x05BA,  0x05BB,
+  0x05BC,  0x05BD,
+  0x05BE,  0x05BF,
+  0x05C0,  0x05C1,
+  0x05C2,  0x05C3,
+  0x05C4,  0x05C5,
+  0x05C6,  0x05C7,
+  0x05C8,  0x05C9,
+  0x05CA,  0x05CB,
+  0x05CC,  0x05CD,
+  0x05CE,  0x05CF,
+  0x05D0,  0x05D1,
+  0x05D2,  0x05D3,
+  0x05D4,  0x05D5,
+  0x05D6,  0x05D7,
+  0x05D8,  0x05D9,
+  0x05DA,  0x05DB,
+  0x05DC,  0x05DD,
+  0x05DE,  0x05DF,
+  0x05E0,  0x05E1,
+  0x05E2,  0x05E3,
+  0x05E4,  0x05E5,
+  0x05E6,  0x05E7,
+  0x05E8,  0x05E9,
+  0x05EA,  0x05EB,
+  0x05EC,  0x05ED,
+  0x05EE,  0x05EF,
+  0x05F0,  0x05F1,
+  0x05F2,  0x05F3,
+  0x05F4,  0x05F5,
+  0x05F6,  0x05F7,
+  0x05F8,  0x05F9,
+  0x05FA,  0x05FB,
+  0x05FC,  0x05FD,
+  0x05FE,  0x05FF
+};
+
+static uint32_t plane1E[] = {
+  0x0041,  0x0041,
+  0x0042,  0x0042,
+  0x0042,  0x0042,
+  0x0042,  0x0042,
+  0x0043,  0x0043,
+  0x0044,  0x0044,
+  0x0044,  0x0044,
+  0x0044,  0x0044,
+  0x0044,  0x0044,
+  0x0044,  0x0044,
+  0x0045,  0x0045,
+  0x0045,  0x0045,
+  0x0045,  0x0045,
+  0x0045,  0x0045,
+  0x0045,  0x0045,
+  0x0046,  0x0046,
+  0x0047,  0x0047,
+  0x0048,  0x0048,
+  0x0048,  0x0048,
+  0x0048,  0x0048,
+  0x0048,  0x0048,
+  0x0048,  0x0048,
+  0x0049,  0x0049,
+  0x0049,  0x0049,
+  0x004B,  0x004B,
+  0x004B,  0x004B,
+  0x004B,  0x004B,
+  0x004C,  0x004C,
+  0x004C,  0x004C,
+  0x004C,  0x004C,
+  0x004C,  0x004C,
+  0x004D,  0x004D,
+  0x004D,  0x004D,
+  0x004D,  0x004D,
+  0x004E,  0x004E,
+  0x004E,  0x004E,
+  0x004E,  0x004E,
+  0x004E,  0x004E,
+  0x004F,  0x004F,
+  0x004F,  0x004F,
+  0x004F,  0x004F,
+  0x004F,  0x004F,
+  0x0050,  0x0050,
+  0x0050,  0x0050,
+  0x0052,  0x0052,
+  0x0052,  0x0052,
+  0x0052,  0x0052,
+  0x0052,  0x0052,
+  0x0053,  0x0053,
+  0x0053,  0x0053,
+  0x0053,  0x0053,
+  0x0053,  0x0053,
+  0x0053,  0x0053,
+  0x0054,  0x0054,
+  0x0054,  0x0054,
+  0x0054,  0x0054,
+  0x0054,  0x0054,
+  0x0055,  0x0055,
+  0x0055,  0x0055,
+  0x0055,  0x0055,
+  0x0055,  0x0055,
+  0x0055,  0x0055,
+  0x0056,  0x0056,
+  0x0056,  0x0056,
+  0x0057,  0x0057,
+  0x0057,  0x0057,
+  0x0057,  0x0057,
+  0x0057,  0x0057,
+  0x0057,  0x0057,
+  0x0058,  0x0058,
+  0x0058,  0x0058,
+  0x0059,  0x0059,
+  0x005A,  0x005A,
+  0x005A,  0x005A,
+  0x005A,  0x005A,
+  0x0048,  0x0054,
+  0x0057,  0x0059,
+  0x1E9A,  0x0053,
+  0x1E9C,  0x1E9D,
+  0x1E9E,  0x1E9F,
+  0x0041,  0x0041,
+  0x0041,  0x0041,
+  0x0041,  0x0041,
+  0x0041,  0x0041,
+  0x0041,  0x0041,
+  0x0041,  0x0041,
+  0x0041,  0x0041,
+  0x0041,  0x0041,
+  0x0041,  0x0041,
+  0x0041,  0x0041,
+  0x0041,  0x0041,
+  0x0041,  0x0041,
+  0x0045,  0x0045,
+  0x0045,  0x0045,
+  0x0045,  0x0045,
+  0x0045,  0x0045,
+  0x0045,  0x0045,
+  0x0045,  0x0045,
+  0x0045,  0x0045,
+  0x0045,  0x0045,
+  0x0049,  0x0049,
+  0x0049,  0x0049,
+  0x004F,  0x004F,
+  0x004F,  0x004F,
+  0x004F,  0x004F,
+  0x004F,  0x004F,
+  0x004F,  0x004F,
+  0x004F,  0x004F,
+  0x004F,  0x004F,
+  0x004F,  0x004F,
+  0x004F,  0x004F,
+  0x004F,  0x004F,
+  0x004F,  0x004F,
+  0x004F,  0x004F,
+  0x0055,  0x0055,
+  0x0055,  0x0055,
+  0x0055,  0x0055,
+  0x0055,  0x0055,
+  0x0055,  0x0055,
+  0x0055,  0x0055,
+  0x0055,  0x0055,
+  0x0059,  0x0059,
+  0x0059,  0x0059,
+  0x0059,  0x0059,
+  0x0059,  0x0059,
+  0x1EFA,  0x1EFB,
+  0x1EFC,  0x1EFD,
+  0x1EFE,  0x1EFF
+};
+
+static uint32_t plane1F[] = {
+  0x0391,  0x0391,
+  0x0391,  0x0391,
+  0x0391,  0x0391,
+  0x0391,  0x0391,
+  0x0391,  0x0391,
+  0x0391,  0x0391,
+  0x0391,  0x0391,
+  0x0391,  0x0391,
+  0x0395,  0x0395,
+  0x0395,  0x0395,
+  0x0395,  0x0395,
+  0x1F16,  0x1F17,
+  0x0395,  0x0395,
+  0x0395,  0x0395,
+  0x0395,  0x0395,
+  0x1F1E,  0x1F1F,
+  0x0397,  0x0397,
+  0x0397,  0x0397,
+  0x0397,  0x0397,
+  0x0397,  0x0397,
+  0x0397,  0x0397,
+  0x0397,  0x0397,
+  0x0397,  0x0397,
+  0x0397,  0x0397,
+  0x0399,  0x0399,
+  0x0399,  0x0399,
+  0x0399,  0x0399,
+  0x0399,  0x0399,
+  0x0399,  0x0399,
+  0x0399,  0x0399,
+  0x0399,  0x0399,
+  0x0399,  0x0399,
+  0x039F,  0x039F,
+  0x039F,  0x039F,
+  0x039F,  0x039F,
+  0x1F46,  0x1F47,
+  0x039F,  0x039F,
+  0x039F,  0x039F,
+  0x039F,  0x039F,
+  0x1F4E,  0x1F4F,
+  0x03A5,  0x03A5,
+  0x03A5,  0x03A5,
+  0x03A5,  0x03A5,
+  0x03A5,  0x03A5,
+  0x1F58,  0x03A5,
+  0x1F5A,  0x03A5,
+  0x1F5C,  0x03A5,
+  0x1F5E,  0x03A5,
+  0x03A9,  0x03A9,
+  0x03A9,  0x03A9,
+  0x03A9,  0x03A9,
+  0x03A9,  0x03A9,
+  0x03A9,  0x03A9,
+  0x03A9,  0x03A9,
+  0x03A9,  0x03A9,
+  0x03A9,  0x03A9,
+  0x0391,  0x1FBB,
+  0x0395,  0x1FC9,
+  0x0397,  0x1FCB,
+  0x0399,  0x1FDB,
+  0x039F,  0x1FF9,
+  0x03A5,  0x1FEB,
+  0x03A9,  0x1FFB,
+  0x1F7E,  0x1F7F,
+  0x0391,  0x0391,
+  0x0391,  0x0391,
+  0x0391,  0x0391,
+  0x0391,  0x0391,
+  0x0391,  0x0391,
+  0x0391,  0x0391,
+  0x0391,  0x0391,
+  0x0391,  0x0391,
+  0x0397,  0x0397,
+  0x0397,  0x0397,
+  0x0397,  0x0397,
+  0x0397,  0x0397,
+  0x0397,  0x0397,
+  0x0397,  0x0397,
+  0x0397,  0x0397,
+  0x0397,  0x0397,
+  0x03A9,  0x03A9,
+  0x03A9,  0x03A9,
+  0x03A9,  0x03A9,
+  0x03A9,  0x03A9,
+  0x03A9,  0x03A9,
+  0x03A9,  0x03A9,
+  0x03A9,  0x03A9,
+  0x03A9,  0x03A9,
+  0x0391,  0x0391,
+  0x0391,  0x0391,
+  0x0391,  0x1FB5,
+  0x0391,  0x0391,
+  0x0391,  0x0391,
+  0x0391,  0x1FBB,
+  0x0391,  0x1FBD,
+  0x0399,  0x1FBF,
+  0x1FC0,  0x1FC1,
+  0x0397,  0x0397,
+  0x0397,  0x1FC5,
+  0x0397,  0x0397,
+  0x0395,  0x1FC9,
+  0x0397,  0x1FCB,
+  0x0397,  0x1FCD,
+  0x1FCE,  0x1FCF,
+  0x0399,  0x0399,
+  0x0399,  0x1FD3,
+  0x1FD4,  0x1FD5,
+  0x0399,  0x0399,
+  0x0399,  0x0399,
+  0x0399,  0x1FDB,
+  0x1FDC,  0x1FDD,
+  0x1FDE,  0x1FDF,
+  0x03A5,  0x03A5,
+  0x03A5,  0x1FE3,
+  0x03A1,  0x03A1,
+  0x03A5,  0x03A5,
+  0x03A5,  0x03A5,
+  0x03A5,  0x1FEB,
+  0x03A1,  0x1FED,
+  0x1FEE,  0x1FEF,
+  0x1FF0,  0x1FF1,
+  0x03A9,  0x03A9,
+  0x03A9,  0x1FF5,
+  0x03A9,  0x03A9,
+  0x039F,  0x1FF9,
+  0x03A9,  0x1FFB,
+  0x03A9,  0x1FFD,
+  0x1FFE,  0x1FFF
+};
+
+static uint32_t plane21[] = {
+  0x2100,  0x2101,
+  0x2102,  0x2103,
+  0x2104,  0x2105,
+  0x2106,  0x2107,
+  0x2108,  0x2109,
+  0x210A,  0x210B,
+  0x210C,  0x210D,
+  0x210E,  0x210F,
+  0x2110,  0x2111,
+  0x2112,  0x2113,
+  0x2114,  0x2115,
+  0x2116,  0x2117,
+  0x2118,  0x2119,
+  0x211A,  0x211B,
+  0x211C,  0x211D,
+  0x211E,  0x211F,
+  0x2120,  0x2121,
+  0x2122,  0x2123,
+  0x2124,  0x2125,
+  0x2126,  0x2127,
+  0x2128,  0x2129,
+  0x212A,  0x212B,
+  0x212C,  0x212D,
+  0x212E,  0x212F,
+  0x2130,  0x2131,
+  0x2132,  0x2133,
+  0x2134,  0x2135,
+  0x2136,  0x2137,
+  0x2138,  0x2139,
+  0x213A,  0x213B,
+  0x213C,  0x213D,
+  0x213E,  0x213F,
+  0x2140,  0x2141,
+  0x2142,  0x2143,
+  0x2144,  0x2145,
+  0x2146,  0x2147,
+  0x2148,  0x2149,
+  0x214A,  0x214B,
+  0x214C,  0x214D,
+  0x214E,  0x214F,
+  0x2150,  0x2151,
+  0x2152,  0x2153,
+  0x2154,  0x2155,
+  0x2156,  0x2157,
+  0x2158,  0x2159,
+  0x215A,  0x215B,
+  0x215C,  0x215D,
+  0x215E,  0x215F,
+  0x2160,  0x2161,
+  0x2162,  0x2163,
+  0x2164,  0x2165,
+  0x2166,  0x2167,
+  0x2168,  0x2169,
+  0x216A,  0x216B,
+  0x216C,  0x216D,
+  0x216E,  0x216F,
+  0x2160,  0x2161,
+  0x2162,  0x2163,
+  0x2164,  0x2165,
+  0x2166,  0x2167,
+  0x2168,  0x2169,
+  0x216A,  0x216B,
+  0x216C,  0x216D,
+  0x216E,  0x216F,
+  0x2180,  0x2181,
+  0x2182,  0x2183,
+  0x2184,  0x2185,
+  0x2186,  0x2187,
+  0x2188,  0x2189,
+  0x218A,  0x218B,
+  0x218C,  0x218D,
+  0x218E,  0x218F,
+  0x2190,  0x2191,
+  0x2192,  0x2193,
+  0x2194,  0x2195,
+  0x2196,  0x2197,
+  0x2198,  0x2199,
+  0x219A,  0x219B,
+  0x219C,  0x219D,
+  0x219E,  0x219F,
+  0x21A0,  0x21A1,
+  0x21A2,  0x21A3,
+  0x21A4,  0x21A5,
+  0x21A6,  0x21A7,
+  0x21A8,  0x21A9,
+  0x21AA,  0x21AB,
+  0x21AC,  0x21AD,
+  0x21AE,  0x21AF,
+  0x21B0,  0x21B1,
+  0x21B2,  0x21B3,
+  0x21B4,  0x21B5,
+  0x21B6,  0x21B7,
+  0x21B8,  0x21B9,
+  0x21BA,  0x21BB,
+  0x21BC,  0x21BD,
+  0x21BE,  0x21BF,
+  0x21C0,  0x21C1,
+  0x21C2,  0x21C3,
+  0x21C4,  0x21C5,
+  0x21C6,  0x21C7,
+  0x21C8,  0x21C9,
+  0x21CA,  0x21CB,
+  0x21CC,  0x21CD,
+  0x21CE,  0x21CF,
+  0x21D0,  0x21D1,
+  0x21D2,  0x21D3,
+  0x21D4,  0x21D5,
+  0x21D6,  0x21D7,
+  0x21D8,  0x21D9,
+  0x21DA,  0x21DB,
+  0x21DC,  0x21DD,
+  0x21DE,  0x21DF,
+  0x21E0,  0x21E1,
+  0x21E2,  0x21E3,
+  0x21E4,  0x21E5,
+  0x21E6,  0x21E7,
+  0x21E8,  0x21E9,
+  0x21EA,  0x21EB,
+  0x21EC,  0x21ED,
+  0x21EE,  0x21EF,
+  0x21F0,  0x21F1,
+  0x21F2,  0x21F3,
+  0x21F4,  0x21F5,
+  0x21F6,  0x21F7,
+  0x21F8,  0x21F9,
+  0x21FA,  0x21FB,
+  0x21FC,  0x21FD,
+  0x21FE,  0x21FF
+};
+
+static uint32_t plane24[] = {
+  0x2400,  0x2401,
+  0x2402,  0x2403,
+  0x2404,  0x2405,
+  0x2406,  0x2407,
+  0x2408,  0x2409,
+  0x240A,  0x240B,
+  0x240C,  0x240D,
+  0x240E,  0x240F,
+  0x2410,  0x2411,
+  0x2412,  0x2413,
+  0x2414,  0x2415,
+  0x2416,  0x2417,
+  0x2418,  0x2419,
+  0x241A,  0x241B,
+  0x241C,  0x241D,
+  0x241E,  0x241F,
+  0x2420,  0x2421,
+  0x2422,  0x2423,
+  0x2424,  0x2425,
+  0x2426,  0x2427,
+  0x2428,  0x2429,
+  0x242A,  0x242B,
+  0x242C,  0x242D,
+  0x242E,  0x242F,
+  0x2430,  0x2431,
+  0x2432,  0x2433,
+  0x2434,  0x2435,
+  0x2436,  0x2437,
+  0x2438,  0x2439,
+  0x243A,  0x243B,
+  0x243C,  0x243D,
+  0x243E,  0x243F,
+  0x2440,  0x2441,
+  0x2442,  0x2443,
+  0x2444,  0x2445,
+  0x2446,  0x2447,
+  0x2448,  0x2449,
+  0x244A,  0x244B,
+  0x244C,  0x244D,
+  0x244E,  0x244F,
+  0x2450,  0x2451,
+  0x2452,  0x2453,
+  0x2454,  0x2455,
+  0x2456,  0x2457,
+  0x2458,  0x2459,
+  0x245A,  0x245B,
+  0x245C,  0x245D,
+  0x245E,  0x245F,
+  0x2460,  0x2461,
+  0x2462,  0x2463,
+  0x2464,  0x2465,
+  0x2466,  0x2467,
+  0x2468,  0x2469,
+  0x246A,  0x246B,
+  0x246C,  0x246D,
+  0x246E,  0x246F,
+  0x2470,  0x2471,
+  0x2472,  0x2473,
+  0x2474,  0x2475,
+  0x2476,  0x2477,
+  0x2478,  0x2479,
+  0x247A,  0x247B,
+  0x247C,  0x247D,
+  0x247E,  0x247F,
+  0x2480,  0x2481,
+  0x2482,  0x2483,
+  0x2484,  0x2485,
+  0x2486,  0x2487,
+  0x2488,  0x2489,
+  0x248A,  0x248B,
+  0x248C,  0x248D,
+  0x248E,  0x248F,
+  0x2490,  0x2491,
+  0x2492,  0x2493,
+  0x2494,  0x2495,
+  0x2496,  0x2497,
+  0x2498,  0x2499,
+  0x249A,  0x249B,
+  0x249C,  0x249D,
+  0x249E,  0x249F,
+  0x24A0,  0x24A1,
+  0x24A2,  0x24A3,
+  0x24A4,  0x24A5,
+  0x24A6,  0x24A7,
+  0x24A8,  0x24A9,
+  0x24AA,  0x24AB,
+  0x24AC,  0x24AD,
+  0x24AE,  0x24AF,
+  0x24B0,  0x24B1,
+  0x24B2,  0x24B3,
+  0x24B4,  0x24B5,
+  0x24B6,  0x24B7,
+  0x24B8,  0x24B9,
+  0x24BA,  0x24BB,
+  0x24BC,  0x24BD,
+  0x24BE,  0x24BF,
+  0x24C0,  0x24C1,
+  0x24C2,  0x24C3,
+  0x24C4,  0x24C5,
+  0x24C6,  0x24C7,
+  0x24C8,  0x24C9,
+  0x24CA,  0x24CB,
+  0x24CC,  0x24CD,
+  0x24CE,  0x24CF,
+  0x24B6,  0x24B7,
+  0x24B8,  0x24B9,
+  0x24BA,  0x24BB,
+  0x24BC,  0x24BD,
+  0x24BE,  0x24BF,
+  0x24C0,  0x24C1,
+  0x24C2,  0x24C3,
+  0x24C4,  0x24C5,
+  0x24C6,  0x24C7,
+  0x24C8,  0x24C9,
+  0x24CA,  0x24CB,
+  0x24CC,  0x24CD,
+  0x24CE,  0x24CF,
+  0x24EA,  0x24EB,
+  0x24EC,  0x24ED,
+  0x24EE,  0x24EF,
+  0x24F0,  0x24F1,
+  0x24F2,  0x24F3,
+  0x24F4,  0x24F5,
+  0x24F6,  0x24F7,
+  0x24F8,  0x24F9,
+  0x24FA,  0x24FB,
+  0x24FC,  0x24FD,
+  0x24FE,  0x24FF
+};
+
+static uint32_t planeFF[] = {
+  0xFF00,  0xFF01,
+  0xFF02,  0xFF03,
+  0xFF04,  0xFF05,
+  0xFF06,  0xFF07,
+  0xFF08,  0xFF09,
+  0xFF0A,  0xFF0B,
+  0xFF0C,  0xFF0D,
+  0xFF0E,  0xFF0F,
+  0xFF10,  0xFF11,
+  0xFF12,  0xFF13,
+  0xFF14,  0xFF15,
+  0xFF16,  0xFF17,
+  0xFF18,  0xFF19,
+  0xFF1A,  0xFF1B,
+  0xFF1C,  0xFF1D,
+  0xFF1E,  0xFF1F,
+  0xFF20,  0xFF21,
+  0xFF22,  0xFF23,
+  0xFF24,  0xFF25,
+  0xFF26,  0xFF27,
+  0xFF28,  0xFF29,
+  0xFF2A,  0xFF2B,
+  0xFF2C,  0xFF2D,
+  0xFF2E,  0xFF2F,
+  0xFF30,  0xFF31,
+  0xFF32,  0xFF33,
+  0xFF34,  0xFF35,
+  0xFF36,  0xFF37,
+  0xFF38,  0xFF39,
+  0xFF3A,  0xFF3B,
+  0xFF3C,  0xFF3D,
+  0xFF3E,  0xFF3F,
+  0xFF40,  0xFF21,
+  0xFF22,  0xFF23,
+  0xFF24,  0xFF25,
+  0xFF26,  0xFF27,
+  0xFF28,  0xFF29,
+  0xFF2A,  0xFF2B,
+  0xFF2C,  0xFF2D,
+  0xFF2E,  0xFF2F,
+  0xFF30,  0xFF31,
+  0xFF32,  0xFF33,
+  0xFF34,  0xFF35,
+  0xFF36,  0xFF37,
+  0xFF38,  0xFF39,
+  0xFF3A,  0xFF5B,
+  0xFF5C,  0xFF5D,
+  0xFF5E,  0xFF5F,
+  0xFF60,  0xFF61,
+  0xFF62,  0xFF63,
+  0xFF64,  0xFF65,
+  0xFF66,  0xFF67,
+  0xFF68,  0xFF69,
+  0xFF6A,  0xFF6B,
+  0xFF6C,  0xFF6D,
+  0xFF6E,  0xFF6F,
+  0xFF70,  0xFF71,
+  0xFF72,  0xFF73,
+  0xFF74,  0xFF75,
+  0xFF76,  0xFF77,
+  0xFF78,  0xFF79,
+  0xFF7A,  0xFF7B,
+  0xFF7C,  0xFF7D,
+  0xFF7E,  0xFF7F,
+  0xFF80,  0xFF81,
+  0xFF82,  0xFF83,
+  0xFF84,  0xFF85,
+  0xFF86,  0xFF87,
+  0xFF88,  0xFF89,
+  0xFF8A,  0xFF8B,
+  0xFF8C,  0xFF8D,
+  0xFF8E,  0xFF8F,
+  0xFF90,  0xFF91,
+  0xFF92,  0xFF93,
+  0xFF94,  0xFF95,
+  0xFF96,  0xFF97,
+  0xFF98,  0xFF99,
+  0xFF9A,  0xFF9B,
+  0xFF9C,  0xFF9D,
+  0xFF9E,  0xFF9F,
+  0xFFA0,  0xFFA1,
+  0xFFA2,  0xFFA3,
+  0xFFA4,  0xFFA5,
+  0xFFA6,  0xFFA7,
+  0xFFA8,  0xFFA9,
+  0xFFAA,  0xFFAB,
+  0xFFAC,  0xFFAD,
+  0xFFAE,  0xFFAF,
+  0xFFB0,  0xFFB1,
+  0xFFB2,  0xFFB3,
+  0xFFB4,  0xFFB5,
+  0xFFB6,  0xFFB7,
+  0xFFB8,  0xFFB9,
+  0xFFBA,  0xFFBB,
+  0xFFBC,  0xFFBD,
+  0xFFBE,  0xFFBF,
+  0xFFC0,  0xFFC1,
+  0xFFC2,  0xFFC3,
+  0xFFC4,  0xFFC5,
+  0xFFC6,  0xFFC7,
+  0xFFC8,  0xFFC9,
+  0xFFCA,  0xFFCB,
+  0xFFCC,  0xFFCD,
+  0xFFCE,  0xFFCF,
+  0xFFD0,  0xFFD1,
+  0xFFD2,  0xFFD3,
+  0xFFD4,  0xFFD5,
+  0xFFD6,  0xFFD7,
+  0xFFD8,  0xFFD9,
+  0xFFDA,  0xFFDB,
+  0xFFDC,  0xFFDD,
+  0xFFDE,  0xFFDF,
+  0xFFE0,  0xFFE1,
+  0xFFE2,  0xFFE3,
+  0xFFE4,  0xFFE5,
+  0xFFE6,  0xFFE7,
+  0xFFE8,  0xFFE9,
+  0xFFEA,  0xFFEB,
+  0xFFEC,  0xFFED,
+  0xFFEE,  0xFFEF,
+  0xFFF0,  0xFFF1,
+  0xFFF2,  0xFFF3,
+  0xFFF4,  0xFFF5,
+  0xFFF6,  0xFFF7,
+  0xFFF8,  0xFFF9,
+  0xFFFA,  0xFFFB,
+  0xFFFC,  0xFFFD,
+  0xFFFE,  0xFFFF
+};
+
+static uint32_t *mysql_unicode_normalize_table[256] = {
+ plane00, plane01, plane02, plane03, plane04, plane05,    NULL,    NULL,
+    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,
+    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,
+    NULL,    NULL,    NULL,    NULL,    NULL,    NULL, plane1E, plane1F,
+    NULL, plane21,    NULL,    NULL, plane24,    NULL,    NULL,    NULL,
+    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,
+    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,
+    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,
+    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,
+    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,
+    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,
+    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,
+    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,
+    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,
+    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,
+    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,
+    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,
+    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,
+    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,
+    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,
+    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,
+    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,
+    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,
+    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,
+    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,
+    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,
+    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,
+    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,
+    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,
+    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,
+    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,
+    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,    NULL, planeFF
+
+};
+/* based on mysql-5.5.29/strings/ctype-utf8.c: end */
+
+static inline unsigned int
+unichar_to_utf8(uint32_t unichar, char *output)
+{
+  unsigned int n_bytes;
+
+  if (unichar < 0x80) {
+    output[0] = unichar;
+    n_bytes = 1;
+  } else if (unichar < 0x0800) {
+    output[0] = ((unichar >> 6) & 0x1f) | 0xc0;
+    output[1] = (unichar & 0x3f) | 0x80;
+    n_bytes = 2;
+  } else if (unichar < 0x10000) {
+    output[0] = (unichar >> 12) | 0xe0;
+    output[1] = ((unichar >> 6) & 0x3f) | 0x80;
+    output[2] = (unichar & 0x3f) | 0x80;
+    n_bytes = 3;
+  } else if (unichar < 0x200000) {
+    output[0] = (unichar >> 18) | 0xf0;
+    output[1] = ((unichar >> 12) & 0x3f) | 0x80;
+    output[2] = ((unichar >> 6) & 0x3f) | 0x80;
+    output[3] = (unichar & 0x3f) | 0x80;
+    n_bytes = 4;
+  } else if (unichar < 0x4000000) {
+    output[0] = (unichar >> 24) | 0xf8;
+    output[1] = ((unichar >> 18) & 0x3f) | 0x80;
+    output[2] = ((unichar >> 12) & 0x3f) | 0x80;
+    output[3] = ((unichar >> 6) & 0x3f) | 0x80;
+    output[4] = (unichar & 0x3f) | 0x80;
+    n_bytes = 5;
+  } else {
+    output[0] = (unichar >> 30) | 0xfc;
+    output[1] = ((unichar >> 24) & 0x3f) | 0x80;
+    output[2] = ((unichar >> 18) & 0x3f) | 0x80;
+    output[3] = ((unichar >> 12) & 0x3f) | 0x80;
+    output[4] = ((unichar >> 6) & 0x3f) | 0x80;
+    output[5] = (unichar & 0x3f) | 0x80;
+    n_bytes = 6;
+  }
+
+  return n_bytes;
+}
+
+static void
+normalize(grn_ctx *ctx, grn_obj *string)
+{
+  const char *original, *rest;
+  unsigned int original_length_in_bytes, rest_length;
+  char *normalized;
+  unsigned int normalized_length_in_bytes = 0;
+  unsigned int normalized_n_characters = 0;
+  grn_encoding encoding;
+
+  encoding = grn_string_get_encoding(ctx, string);
+  grn_string_get_original(ctx, string, &original, &original_length_in_bytes);
+  normalized = GRN_PLUGIN_MALLOC(ctx, original_length_in_bytes + 1);
+  rest = original;
+  rest_length = original_length_in_bytes;
+  while (rest_length > 0) {
+    int character_length;
+    int plane;
+    uint32_t low_code;
+
+    character_length = grn_tokenizer_charlen(ctx, rest, rest_length, encoding);
+    if (character_length == 0) {
+      break;
+    }
+
+    switch (character_length) {
+    case 1 :
+      plane = 0x00;
+      low_code = rest[0] & 0x7f;
+      break;
+    case 2 :
+      plane = 0x00;
+      low_code = (rest[0] & 0x1f << 6) + (rest[1] & 0x3f);
+      break;
+    case 3 :
+      plane = rest[0] & 0x0f;
+      low_code =
+        ((rest[0] & 0x0f) << 12) +
+        ((rest[1] & 0x3f) << 6) +
+        (rest[2] & 0x3f);
+      break;
+    case 4 :
+      plane = ((rest[0] & 0x07) << 6) + (rest[1] & 0x3f);
+      low_code =
+        ((rest[0] & 0x07) << 15) +
+        ((rest[1] & 0x3f) << 12) +
+        ((rest[2] & 0x3f) << 6) +
+        (rest[3] & 0x3f);
+      if (plane > 0xff) {
+        plane = -1;
+      }
+      break;
+    default :
+      plane = -1;
+      low_code = 0x00;
+      break;
+    }
+
+    if (plane >= 0x00 && mysql_unicode_normalize_table[plane]) {
+      uint32_t normalized_code = mysql_unicode_normalize_table[plane][low_code];
+      unsigned int n_bytes;
+      n_bytes = unichar_to_utf8(normalized_code,
+                                normalized + normalized_length_in_bytes);
+      normalized_length_in_bytes += n_bytes;
+    } else {
+      int i;
+      for (i = 0; i < character_length; i++) {
+        normalized[normalized_length_in_bytes + i] = rest[i];
+      }
+      normalized_length_in_bytes += character_length;
+    }
+    normalized_n_characters++;
+    rest += character_length;
+    rest_length -= character_length;
+  }
+
+  if (rest_length == 0) {
+    grn_string_set_normalized(ctx,
+                              string,
+                              normalized,
+                              normalized_length_in_bytes,
+                              normalized_n_characters);
+  } else {
+    /* TODO: report error */
+    GRN_PLUGIN_FREE(ctx, normalized);
+  }
+}
+
+static grn_obj *
+mysql_general_ci_next(grn_ctx *ctx, int nargs, grn_obj **args,
+                      grn_user_data *user_data)
+{
+  grn_obj *string = args[0];
+  grn_encoding encoding;
+
+  encoding = grn_string_get_encoding(ctx, string);
+  if (encoding != GRN_ENC_UTF8) {
+    GRN_PLUGIN_ERROR(ctx,
+                     GRN_FUNCTION_NOT_IMPLEMENTED,
+                     "[normalizer][mysql-general-ci] "
+                     "UTF-8 encoding is only supported: %s",
+                     grn_enctostr(encoding));
+    return NULL;
+  }
+  normalize(ctx, string);
+  return NULL;
+}
+
+grn_rc
+GRN_PLUGIN_INIT(grn_ctx *ctx)
+{
+  return ctx->rc;
+}
+
+grn_rc
+GRN_PLUGIN_REGISTER(grn_ctx *ctx)
+{
+  grn_normalizer_register(ctx, "NormalizerMySQLGeneralCI", -1,
+                          NULL, mysql_general_ci_next, NULL);
+  return GRN_SUCCESS;
+}
+
+grn_rc
+GRN_PLUGIN_FIN(grn_ctx *ctx)
+{
+  return GRN_SUCCESS;
+}

  Added: plugins/normalizers/mysql_sources.am (+2 -0) 100644
===================================================================
--- /dev/null
+++ plugins/normalizers/mysql_sources.am    2013-01-23 18:43:28 +0900 (0ccf3b9)
@@ -0,0 +1,2 @@
+mysql_la_SOURCES =				\
+	mysql.c

  Added: test/command/suite/normalizers/mysql/general_ci/plane00.expected (+4 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/normalizers/mysql/general_ci/plane00.expected    2013-01-23 18:43:28 +0900 (5e595e0)
@@ -0,0 +1,4 @@
+register normalizers/mysql
+[[0,0.0,0.0],true]
+normalize NormalizerMySQLGeneralCI "aBc"
+[[0,0.0,0.0],"ABC"]

  Added: test/command/suite/normalizers/mysql/general_ci/plane00.test (+3 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/normalizers/mysql/general_ci/plane00.test    2013-01-23 18:43:28 +0900 (29c92d2)
@@ -0,0 +1,3 @@
+register normalizers/mysql
+
+normalize NormalizerMySQLGeneralCI "aBc"
-------------- next part --------------
HTML����������������������������...
Descargar 



More information about the Groonga-commit mailing list
Back to archive index