[Groonga-commit] groonga/groonga at 97071c6 [master] Add encoding converter

Back to archive index

Kouhei Sutou null+****@clear*****
Thu Jun 21 16:49:06 JST 2018


Kouhei Sutou	2018-06-21 16:49:06 +0900 (Thu, 21 Jun 2018)

  New Revision: 97071c6172aee5f09f5d890ff0ae7170e0b15566
  https://github.com/groonga/groonga/commit/97071c6172aee5f09f5d890ff0ae7170e0b15566

  Message:
    Add encoding converter
    
    We use the following rules:
    
      * grn_ctx::errbuf: grn_encoding
      * grn_logger_put: grn_encoding
      * mruby: UTF-8
      * path: locale

  Added files:
    lib/encoding.c
    lib/grn_encoding.h
  Modified files:
    lib/c_sources.am

  Modified: lib/c_sources.am (+2 -0)
===================================================================
--- lib/c_sources.am    2018-06-21 16:22:58 +0900 (62c888600)
+++ lib/c_sources.am    2018-06-21 16:49:06 +0900 (ee601829f)
@@ -20,6 +20,8 @@ libgroonga_c_sources =				\
 	db.c					\
 	grn_db.h				\
 	dump.c					\
+	encoding.c				\
+	grn_encoding.h				\
 	ts.c					\
 	grn_ts.h				\
 	type.c					\

  Added: lib/encoding.c (+334 -0) 100644
===================================================================
--- /dev/null
+++ lib/encoding.c    2018-06-21 16:49:06 +0900 (e3d040928)
@@ -0,0 +1,334 @@
+/* -*- c-basic-offset: 2 -*- */
+/*
+  Copyright(C) 2018 Brazil
+
+  This library is free software; you can redistribute it and/or
+  modify it under the terms of the GNU Lesser General Public
+  License version 2.1 as published by the Free Software Foundation.
+
+  This library is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public
+  License along with this library; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+*/
+
+#include <stdio.h>
+
+#include "grn.h"
+#include "grn_ctx.h"
+#include "grn_encoding.h"
+#include "grn_windows.h"
+
+#include <string.h>
+
+#ifdef WIN32
+static const char *
+grn_encoding_convert(grn_ctx *ctx,
+                     const char *context,
+                     UINT from_code_page,
+                     UINT to_code_page,
+                     const char *from_string,
+                     ssize_t from_string_size,
+                     size_t *converted_string_size)
+{
+  char *converted_string = NULL;
+
+  if (from_string_size < 0) {
+    from_string_size = strlen(from_string);
+  }
+
+  if (from_code_page == to_code_page) {
+    goto exit;
+  }
+
+  {
+    WCHAR *utf16_string;
+    DWORD n_utf16_chars;
+    size_t converted_string_size_;
+
+    n_utf16_chars = MultiByteToWideChar(from_code_page,
+                                        0,
+                                        from_string,
+                                        from_string_size,
+                                        NULL,
+                                        0);
+    if (n_utf16_chars == 0) {
+      SERR("%s failed to estimate the number of UTF-16 characters",
+           context);
+      goto exit;
+    }
+    utf16_string = GRN_MALLOCN(WCHAR, n_utf16_chars);
+    n_utf16_chars = MultiByteToWideChar(from_code_page,
+                                        0,
+                                        from_string,
+                                        from_string_size,
+                                        utf16_string,
+                                        n_utf16_chars);
+    if (n_utf16_chars == 0) {
+      SERR("%s failed to convert to UTF-16 characters",
+           context);
+      GRN_FREE(utf16_string);
+      goto exit;
+    }
+
+    converted_string_size_ = WideCharToMultiByte(to_code_page,
+                                                 0,
+                                                 utf16_string,
+                                                 n_utf16_chars,
+                                                 NULL,
+                                                 0,
+                                                 NULL,
+                                                 NULL);
+    if (converted_string_size_ == 0) {
+      SERR("%s failed to estimate required buffer size for converted string",
+           context);
+      GRN_FREE(utf16_string);
+      goto exit;
+    }
+
+    converted_string = GRN_MALLOCN(char, *converted_string_size + 1);
+    converted_string_size_ = WideCharToMultiByte(to_code_page,
+                                                 0,
+                                                 utf16_string,
+                                                 n_utf16_chars,
+                                                 converted_string,
+                                                 converted_string_size_,
+                                                 NULL,
+                                                 NULL);
+    GRN_FREE(utf16_string);
+    if (converted_string_size_ == 0) {
+      SERR("%s failed to estimate required buffer size for converted string",
+           context);
+      GRN_FREE(converted_string);
+      converted_string = NULL;
+      goto exit;
+    }
+    converted_string[converted_string_size_] = '\0';
+    if (converted_string_size) {
+      *converted_string_size = converted_string_size_;
+    }
+  }
+
+exit :
+  if (!converted_string) {
+    converted_string = GRN_MALLOCN(char, from_string_size + 1);
+    if (converted_string) {
+      grn_memcpy(converted_string, from_string, from_string_size);
+      converted_string[from_string_size] = '\0';
+      if (converted_string_size) {
+        *converted_string_size = from_string_size;
+      }
+    } else {
+      ERR(ctx->rc,
+          "%s failed to allocate a buffer for converted string",
+          context);
+      if (converted_string_size) {
+        *converted_string_size = 0;
+      }
+    }
+  }
+
+  return converted_string;
+}
+
+const char *
+grn_encoding_convert_to_locale(grn_ctx *ctx,
+                               const char *grn_encoding_string,
+                               ssize_t grn_encoding_string_size,
+                               size_t *converted_string_size)
+{
+  return grn_encoding_convert(ctx,
+                              "[encoding][convert][grn->locale]",
+                              CP_ACP,
+                              grn_windows_encoding_to_code_page(ctx->encoding),
+                              grn_encoding_string,
+                              grn_encoding_string_size,
+                              converted_string_size);
+}
+
+const char *
+grn_encoding_convert_to_utf8(grn_ctx *ctx,
+                             const char *grn_encoding_string,
+                             ssize_t grn_encoding_string_size,
+                             size_t *converted_string_size)
+{
+  return grn_encoding_convert(ctx,
+                              "[encoding][convert][grn->utf8]",
+                              CP_UTF8,
+                              grn_windows_encoding_to_code_page(ctx->encoding),
+                              grn_encoding_string,
+                              grn_encoding_string_size,
+                              converted_string_size);
+}
+
+const char *
+grn_encoding_convert_from_locale(grn_ctx *ctx,
+                                 const char *locale_string,
+                                 ssize_t locale_string_size,
+                                 size_t *converted_string_size)
+{
+  return grn_encoding_convert(ctx,
+                              "[encoding][convert][locale->grn]",
+                              grn_windows_encoding_to_code_page(ctx->encoding),
+                              CP_ACP,
+                              locale_string,
+                              locale_string_size,
+                              converted_string_size);
+}
+
+const char *
+grn_encoding_convert_from_utf8(grn_ctx *ctx,
+                               const char *utf8_string,
+                               ssize_t utf8_string_size,
+                               size_t *converted_string_size)
+{
+  return grn_encoding_convert(ctx,
+                              "[encoding][convert][utf8->grn]",
+                              grn_windows_encoding_to_code_page(ctx->encoding),
+                              CP_UTF8,
+                              utf8_string,
+                              utf8_string_size,
+                              converted_string_size);
+}
+
+const char *
+grn_encoding_convert_to_utf8_from_locale(grn_ctx *ctx,
+                                         const char *locale_string,
+                                         ssize_t locale_string_size,
+                                         size_t *converted_string_size)
+{
+  return grn_encoding_convert(ctx,
+                              "[encoding][convert][locale->utf8]",
+                              CP_ACP,
+                              CP_UTF8,
+                              locale_string,
+                              locale_string_size,
+                              converted_string_size);
+}
+
+const char *
+grn_encoding_convert_to_locale_from_utf8(grn_ctx *ctx,
+                                         const char *utf8_string,
+                                         ssize_t utf8_string_size,
+                                         size_t *converted_string_size)
+{
+  return grn_encoding_convert(ctx,
+                              "[encoding][convert][utf8->locale]",
+                              CP_UTF8,
+                              CP_ACP,
+                              utf8_string,
+                              utf8_string_size,
+                              converted_string_size);
+}
+
+void
+grn_encoding_converted_free(grn_ctx *ctx, const char *converted_string)
+{
+  GRN_FREE((char *)converted_string);
+}
+#else /* WIN32 */
+const char *
+grn_encoding_convert_to_locale(grn_ctx *ctx,
+                               const char *grn_encoding_string,
+                               ssize_t grn_encoding_string_size,
+                               size_t *converted_string_size)
+{
+  if (converted_string_size) {
+    if (grn_encoding_string_size < 0) {
+      *converted_string_size = strlen(grn_encoding_string);
+    } else {
+      *converted_string_size = grn_encoding_string_size;
+    }
+  }
+  return grn_encoding_string;
+}
+
+const char *
+grn_encoding_convert_to_utf8(grn_ctx *ctx,
+                             const char *grn_encoding_string,
+                             ssize_t grn_encoding_string_size,
+                             size_t *converted_string_size)
+{
+  if (converted_string_size) {
+    if (grn_encoding_string_size < 0) {
+      *converted_string_size = strlen(grn_encoding_string);
+    } else {
+      *converted_string_size = grn_encoding_string_size;
+    }
+  }
+  return grn_encoding_string;
+}
+
+const char *
+grn_encoding_convert_from_locale(grn_ctx *ctx,
+                                 const char *locale_string,
+                                 ssize_t locale_string_size,
+                                 size_t *converted_string_size)
+{
+  if (converted_string_size) {
+    if (locale_string_size < 0) {
+      *converted_string_size = strlen(locale_string);
+    } else {
+      *converted_string_size = locale_string_size;
+    }
+  }
+  return locale_string;
+}
+
+const char *
+grn_encoding_convert_from_utf8(grn_ctx *ctx,
+                               const char *utf8_string,
+                               ssize_t utf8_string_size,
+                               size_t *converted_string_size)
+{
+  if (converted_string_size) {
+    if (utf8_string_size < 0) {
+      *converted_string_size = strlen(utf8_string);
+    } else {
+      *converted_string_size = utf8_string_size;
+    }
+  }
+  return utf8_string;
+}
+
+const char *
+grn_encoding_convert_to_utf8_from_locale(grn_ctx *ctx,
+                                         const char *locale_string,
+                                         ssize_t locale_string_size,
+                                         size_t *converted_string_size)
+{
+  if (converted_string_size) {
+    if (locale_string_size < 0) {
+      *converted_string_size = strlen(locale_string);
+    } else {
+      *converted_string_size = locale_string_size;
+    }
+  }
+  return locale_string;
+}
+
+const char *
+grn_encoding_convert_to_locale_from_utf8(grn_ctx *ctx,
+                                         const char *utf8_string,
+                                         ssize_t utf8_string_size,
+                                         size_t *converted_string_size)
+{
+  if (converted_string_size) {
+    if (utf8_string_size < 0) {
+      *converted_string_size = strlen(utf8_string);
+    } else {
+      *converted_string_size = utf8_string_size;
+    }
+  }
+  return utf8_string;
+}
+
+void
+grn_encoding_converted_free(grn_ctx *ctx, const char *converted_string)
+{
+}
+#endif /* WIN32 */

  Added: lib/grn_encoding.h (+70 -0) 100644
===================================================================
--- /dev/null
+++ lib/grn_encoding.h    2018-06-21 16:49:06 +0900 (0788d52de)
@@ -0,0 +1,70 @@
+/* -*- c-basic-offset: 2 -*- */
+/*
+  Copyright(C) 2018 Brazil
+
+  This library is free software; you can redistribute it and/or
+  modify it under the terms of the GNU Lesser General Public
+  License version 2.1 as published by the Free Software Foundation.
+
+  This library is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public
+  License along with this library; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+*/
+
+#pragma once
+
+#include "grn.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * grn_ctx::errbuf: grn_encoding
+ * grn_logger_put: grn_encoding
+ * mruby: UTF-8
+ * path: locale
+ */
+
+GRN_API const char *
+grn_encoding_convert_to_locale(grn_ctx *ctx,
+                               const char *grn_encoding_string,
+                               ssize_t grn_encoding_string_size,
+                               size_t *converted_string_size);
+GRN_API const char *
+grn_encoding_convert_to_utf8(grn_ctx *ctx,
+                             const char *grn_encoding_string,
+                             ssize_t grn_encoding_string_size,
+                             size_t *converted_string_size);
+GRN_API const char *
+grn_encoding_convert_from_locale(grn_ctx *ctx,
+                                 const char *locale_string,
+                                 ssize_t locale_string_size,
+                                 size_t *converted_string_size);
+GRN_API const char *
+grn_encoding_convert_from_utf8(grn_ctx *ctx,
+                               const char *utf8_string,
+                               ssize_t utf8_string_size,
+                               size_t *converted_string_size);
+GRN_API const char *
+grn_encoding_convert_to_utf8_from_locale(grn_ctx *ctx,
+                                         const char *locale_string,
+                                         ssize_t locale_string_size,
+                                         size_t *converted_string_size);
+GRN_API const char *
+grn_encoding_convert_to_locale_from_utf8(grn_ctx *ctx,
+                                         const char *utf8_string,
+                                         ssize_t utf8_string_size,
+                                         size_t *converted_string_size);
+GRN_API void
+grn_encoding_converted_free(grn_ctx *ctx,
+                            const char *converted_string);
+
+#ifdef __cplusplus
+}
+#endif
-------------- next part --------------
HTML����������������������������...
URL: https://lists.osdn.me/mailman/archives/groonga-commit/attachments/20180621/51439970/attachment-0001.htm 



More information about the Groonga-commit mailing list
Back to archive index