[Groonga-commit] groonga/groonga [master] don't split command line to tokens by full width space. fixes #986

Back to archive index

null+****@clear***** null+****@clear*****
2011年 9月 16日 (金) 16:02:53 JST


Kouhei Sutou	2011-09-16 07:02:53 +0000 (Fri, 16 Sep 2011)

  New Revision: 4b5d310441cbd1d006d6b704236604c8d503dd41

  Log:
    don't split command line to tokens by full width space. fixes #986

  Modified files:
    lib/str.c
    test/unit/core/test-command-select-query.c
    test/unit/core/test-command-select.c

  Modified: lib/str.c (+2 -2)
===================================================================
--- lib/str.c    2011-09-16 02:18:05 +0000 (4476acd)
+++ lib/str.c    2011-09-16 07:02:53 +0000 (9016292)
@@ -2271,7 +2271,7 @@ grn_text_unesc_tok(grn_ctx *ctx, grn_obj *buf, const char *s, const char *e, cha
     }
     switch (stat) {
     case GRN_TOK_VOID :
-      if (grn_isspace(p, ctx->encoding)) { continue; }
+      if (*p == ' ') { continue; }
       switch (*p) {
       case '"' :
         stat = GRN_TOK_STRING;
@@ -2295,7 +2295,7 @@ grn_text_unesc_tok(grn_ctx *ctx, grn_obj *buf, const char *s, const char *e, cha
       }
       break;
     case GRN_TOK_SYMBOL :
-      if (grn_isspace(p, ctx->encoding)) { goto exit; }
+      if (*p == ' ') { goto exit; }
       switch (*p) {
       case '\'' :
       case '"' :

  Modified: test/unit/core/test-command-select-query.c (+20 -0)
===================================================================
--- test/unit/core/test-command-select-query.c    2011-09-16 02:18:05 +0000 (d12240d)
+++ test/unit/core/test-command-select-query.c    2011-09-16 07:02:53 +0000 (d717d62)
@@ -27,6 +27,7 @@ void test_int64_compare_over_int32(void);
 void test_int64_compare_float_literal(void);
 void test_int32_key_table_reference_compare(void);
 void test_prefix_search(void);
+void test_full_width_space(void);
 
 static gchar *tmp_directory;
 
@@ -147,3 +148,22 @@ test_prefix_search(void)
     send_command("select Users --match_columns _key --query mor*"));
 }
 
+void
+test_full_width_space(void)
+{
+  assert_send_command("table_create Users TABLE_PAT_KEY ShortText");
+  assert_send_command("table_create Terms TABLE_PAT_KEY ShortText "
+                      "--default_tokenizer TokenBigram");
+  assert_send_command("column_create Terms users COLUMN_INDEX|WITH_POSITION "
+                      "Users _key");
+  assert_send_command("load --table Users\n"
+                      "[\n"
+                      "{\"_key\":\"森 大二郎\"}\n"
+                      "]");
+  cut_assert_equal_string(
+      "[[[1],"
+       "[[\"_id\",\"UInt32\"],[\"_key\",\"ShortText\"]],"
+       "[1,\"森 大二郎\"]]]",
+    send_command("select Users --match_columns _key --query 森 二郎"));
+}
+

  Modified: test/unit/core/test-command-select.c (+54 -1)
===================================================================
--- test/unit/core/test-command-select.c    2011-09-16 02:18:05 +0000 (db72961)
+++ test/unit/core/test-command-select.c    2011-09-16 07:02:53 +0000 (0902c26)
@@ -1,5 +1,7 @@
 /* -*- c-basic-offset: 2; coding: utf-8 -*- */
-/* Copyright(C) 2009-2010 Brazil
+/*
+  Copyright(C) 2009-2010 Brazil
+  Copyright(C) 2011 Kouhei Sutou <kou****@clear*****>
 
   This library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
@@ -45,6 +47,8 @@ void data_less_than_or_equal(void);
 void test_less_than_or_equal(gconstpointer data);
 void data_equal_numeric(void);
 void test_equal_numeric(gconstpointer data);
+void data_not_tokenize_by_full_width_space(void);
+void test_not_tokenize_by_full_width_space(gconstpointer data);
 
 static gchar *tmp_directory;
 
@@ -661,3 +665,52 @@ test_equal_numeric(gconstpointer data)
                  "--output_columns _key,score "
                  "--filter 'score == 4'"));
 }
+
+void
+data_not_tokenize_by_full_width_space(void)
+{
+#define ADD_DATA(label, error_message, command)                 \
+  gcut_add_datum(label,                                         \
+                 "error-message", G_TYPE_STRING, error_message, \
+                 "command", G_TYPE_STRING, command,             \
+                 NULL)
+
+  ADD_DATA("separator",
+           "invalid table name: <Sites --output_columns>",
+           "select Sites"
+           " "
+           "--output_columns _key");
+
+  ADD_DATA("prepend",
+           "invalid table name: < Sites>",
+           "select "
+           "  Sites"
+           " "
+           "--output_columns _key");
+
+#undef ADD_DATA
+}
+
+void
+test_not_tokenize_by_full_width_space(gconstpointer data)
+{
+  const gchar *error_message;
+  const gchar *command;
+
+  error_message = gcut_data_get_string(data, "error-message");
+  command = gcut_data_get_string(data, "command");
+
+  assert_send_command("table_create Sites TABLE_HASH_KEY ShortText");
+  cut_assert_equal_string(
+    "3",
+    send_command("load --table Sites --columns '_key' \n"
+                 "[\n"
+                 " [\"groonga.org\"],\n"
+                 " [\"ruby-lang.org\"],\n"
+                 " [\"qwik.jp/senna/\"]\n"
+                 "]"));
+
+  assert_send_command_error(GRN_INVALID_ARGUMENT,
+                            error_message,
+                            command);
+}




Groonga-commit メーリングリストの案内
Back to archive index