[Groonga-commit] groonga/groonga at 632cddb [master] Support multiple token filters again

Back to archive index
Kouhei Sutou null+****@clear*****
Tue Oct 30 15:04:04 JST 2018


Kouhei Sutou	2018-10-30 15:04:04 +0900 (Tue, 30 Oct 2018)

  Revision: 632cddbf9c65ae8d10540edc5f4accf80b287eb1
  https://github.com/groonga/groonga/commit/632cddbf9c65ae8d10540edc5f4accf80b287eb1

  Message:
    Support multiple token filters again

  Modified files:
    lib/db.c
    lib/expr.c
    lib/grn_expr.h
    lib/proc/proc_dump.c
    test/command/suite/table_create/token_filters/empty_between_comma.expected
    test/command/suite/table_create/token_filters/multiple.expected
    test/command/suite/table_create/token_filters/nonexistent.expected
    test/command/suite/table_create/token_filters/spaces_around_comma.expected
    test/command/suite/table_create/token_filters/trailing_comma.expected

  Modified: lib/db.c (+4 -7)
===================================================================
--- lib/db.c    2018-10-30 14:25:49 +0900 (f48a7a1c6)
+++ lib/db.c    2018-10-30 15:04:04 +0900 (7ee13f7fb)
@@ -9349,7 +9349,7 @@ grn_obj_set_info_table_modules(grn_ctx *ctx,
         ctx->errbuf);
     goto exit;
   }
-  if (!grn_expr_is_simple_function_calls(ctx, expression)) {
+  if (!grn_expr_is_module_list(ctx, expression)) {
     ERR(GRN_INVALID_ARGUMENT,
         "%s[%.*s] must be %s(option1, option2, ...), ... format: <%.*s>",
         context_tag,
@@ -9361,18 +9361,15 @@ grn_obj_set_info_table_modules(grn_ctx *ctx,
     goto exit;
   }
 
-  n = grn_expr_simple_function_calls_get_n_calls(ctx, expression);
+  n = grn_expr_module_list_get_n_modules(ctx, expression);
   for (i = 0; i < n; i++) {
     grn_obj *proc;
     grn_id proc_id;
     grn_bool is_valid_proc = GRN_FALSE;
 
-    proc = grn_expr_simple_function_calls_get_function(ctx, expression, i);
+    proc = grn_expr_module_list_get_function(ctx, expression, i);
     GRN_BULK_REWIND(&options);
-    grn_expr_simple_function_calls_get_arguments(ctx,
-                                                 expression,
-                                                 i,
-                                                 &options);
+    grn_expr_module_list_get_arguments(ctx, expression, i, &options);
 
     switch (type) {
     case GRN_INFO_TOKEN_FILTERS :

  Modified: lib/expr.c (+97 -20)
===================================================================
--- lib/expr.c    2018-10-30 14:25:49 +0900 (5490f471b)
+++ lib/expr.c    2018-10-30 15:04:04 +0900 (d8f0a25b3)
@@ -7601,9 +7601,8 @@ grn_expr_simple_function_call_get_arguments(grn_ctx *ctx,
   return GRN_SUCCESS;
 }
 
-/* TODO: Support multiple calls. */
 grn_bool
-grn_expr_is_simple_function_calls(grn_ctx *ctx, grn_obj *expr)
+grn_expr_is_module_list(grn_ctx *ctx, grn_obj *expr)
 {
   grn_expr *e = (grn_expr *)expr;
   grn_expr_code *codes = e->codes;
@@ -7622,6 +7621,8 @@ grn_expr_is_simple_function_calls(grn_ctx *ctx, grn_obj *expr)
         return GRN_FALSE;
       }
       break;
+    case GRN_OP_COMMA :
+      break;
     default :
       return GRN_FALSE;
     }
@@ -7630,36 +7631,112 @@ grn_expr_is_simple_function_calls(grn_ctx *ctx, grn_obj *expr)
   return GRN_TRUE;
 }
 
-/* TODO: Support multiple calls. */
 unsigned int
-grn_expr_simple_function_calls_get_n_calls(grn_ctx *ctx, grn_obj *expr)
+grn_expr_module_list_get_n_modules(grn_ctx *ctx, grn_obj *expr)
 {
-  return 1;
+  grn_expr *e = (grn_expr *)expr;
+  grn_expr_code *codes = e->codes;
+  grn_expr_code *codes_end = codes + e->codes_curr;
+  unsigned int n = 1;
+
+  for (; codes < codes_end; codes++) {
+    if (codes[0].op == GRN_OP_COMMA) {
+      n++;
+    }
+  }
+
+  return n;
 }
 
-/* TODO: Support multiple calls. */
-grn_obj *
-grn_expr_simple_function_calls_get_function(grn_ctx *ctx,
-                                            grn_obj *expr,
-                                            unsigned int i)
+static void
+grn_expr_module_list_detect_module(grn_ctx *ctx,
+                                   grn_obj *expr,
+                                   unsigned int i,
+                                   grn_expr_code **module_start,
+                                   grn_expr_code **module_end)
 {
   grn_expr *e = (grn_expr *)expr;
+  grn_expr_code *codes = e->codes;
+  grn_expr_code *codes_end = codes + e->codes_curr;
+  unsigned int j = 0;
 
-  return e->codes[0].value;
+  *module_start = codes;
+  *module_end = codes_end;
+
+  if (i == 0) {
+    for (codes = e->codes; codes < codes_end; codes++) {
+      switch (codes[0].op) {
+      case GRN_OP_CALL :
+        *module_start = codes - codes[0].nargs;
+        *module_end = codes;
+        return;
+      case GRN_OP_COMMA :
+        if (codes[-1].op == GRN_OP_CALL) {
+          *module_start = codes - codes[-1].nargs - 1;
+          *module_end = codes - codes[-1].nargs;
+        } else {
+          *module_start = codes - 2;
+          *module_end = codes - 1;
+        }
+        return;
+      default :
+        break;
+      }
+    }
+    return;
+  } else {
+    for (codes = e->codes; codes < codes_end; codes++) {
+      if (codes[0].op != GRN_OP_COMMA) {
+        continue;
+      }
+      j++;
+      if (i == j) {
+        if (codes > e->codes && codes[-1].op == GRN_OP_CALL) {
+          *module_start = codes - codes[-1].nargs;
+          *module_end = codes - 1;
+        } else {
+          *module_start = codes - 1;
+          *module_end = codes;
+        }
+        return;
+      }
+    }
+  }
+
+  *module_start = NULL;
+  *module_end = NULL;
+}
+
+grn_obj *
+grn_expr_module_list_get_function(grn_ctx *ctx,
+                                  grn_obj *expr,
+                                  unsigned int i)
+{
+  grn_expr_code *module_start;
+  grn_expr_code *module_end;
+
+  grn_expr_module_list_detect_module(ctx, expr, i, &module_start, &module_end);
+
+  if (module_start) {
+    return module_start[0].value;
+  } else {
+    return NULL;
+  }
 }
 
-/* TODO: Support multiple calls. */
 grn_rc
-grn_expr_simple_function_calls_get_arguments(grn_ctx *ctx,
-                                             grn_obj *expr,
-                                             unsigned int i,
-                                             grn_obj *arguments)
+grn_expr_module_list_get_arguments(grn_ctx *ctx,
+                                   grn_obj *expr,
+                                   unsigned int i,
+                                   grn_obj *arguments)
 {
-  grn_expr *e = (grn_expr *)expr;
-  grn_expr_code *codes = e->codes;
-  grn_expr_code *codes_end = codes + e->codes_curr;
+  grn_expr_code *codes;
+  grn_expr_code *module_start;
+  grn_expr_code *module_end;
 
-  for (codes++; codes < codes_end - 1; codes++) {
+  grn_expr_module_list_detect_module(ctx, expr, i, &module_start, &module_end);
+
+  for (codes = module_start + 1; codes < module_end; codes++) {
     grn_obj *value = codes[0].value;
     switch (codes[0].op) {
     case GRN_OP_PUSH :

  Modified: lib/grn_expr.h (+10 -10)
===================================================================
--- lib/grn_expr.h    2018-10-30 14:25:49 +0900 (95e0ac64a)
+++ lib/grn_expr.h    2018-10-30 15:04:04 +0900 (8c69a176c)
@@ -126,16 +126,16 @@ grn_rc grn_expr_simple_function_call_get_arguments(grn_ctx *ctx,
                                                    grn_obj *expr,
                                                    grn_obj *arguments);
 
-grn_bool grn_expr_is_simple_function_calls(grn_ctx *ctx, grn_obj *expr);
-unsigned int grn_expr_simple_function_calls_get_n_calls(grn_ctx *ctx,
-                                                        grn_obj *expr);
-grn_obj *grn_expr_simple_function_calls_get_function(grn_ctx *ctx,
-                                                     grn_obj *expr,
-                                                     unsigned int i);
-grn_rc grn_expr_simple_function_calls_get_arguments(grn_ctx *ctx,
-                                                    grn_obj *expr,
-                                                    unsigned int i,
-                                                    grn_obj *arguments);
+grn_bool grn_expr_is_module_list(grn_ctx *ctx, grn_obj *expr);
+unsigned int grn_expr_module_list_get_n_modules(grn_ctx *ctx,
+                                                grn_obj *expr);
+grn_obj *grn_expr_module_list_get_function(grn_ctx *ctx,
+                                           grn_obj *expr,
+                                           unsigned int i);
+grn_rc grn_expr_module_list_get_arguments(grn_ctx *ctx,
+                                          grn_obj *expr,
+                                          unsigned int i,
+                                          grn_obj *arguments);
 
 #ifdef __cplusplus
 }

  Modified: lib/proc/proc_dump.c (+20 -1)
===================================================================
--- lib/proc/proc_dump.c    2018-10-30 14:25:49 +0900 (83e6ef8f1)
+++ lib/proc/proc_dump.c    2018-10-30 15:04:04 +0900 (3799b905d)
@@ -711,7 +711,26 @@ dump_optionable_obj_string(grn_ctx *ctx,
 {
   const char *value = GRN_TEXT_VALUE(string);
   size_t length = GRN_TEXT_LEN(string);
-  if (length > 0 && value[length - 1] == ')') {
+  grn_bool need_quote = GRN_FALSE;
+  size_t i;
+
+  for (i = 0; i < length; i++) {
+    switch (value[i]) {
+    case '(' :
+    case ')' :
+    case ',' :
+    case ' ' :
+      need_quote = GRN_TRUE;
+      break;
+    default :
+      break;
+    }
+    if (need_quote) {
+      break;
+    }
+  }
+
+  if (need_quote) {
     grn_text_otoj(ctx, dumper->output, string, NULL);
   } else {
     GRN_TEXT_PUT(ctx, dumper->output, value, length);

  Modified: test/command/suite/table_create/token_filters/empty_between_comma.expected (+3 -2)
===================================================================
--- test/command/suite/table_create/token_filters/empty_between_comma.expected    2018-10-30 14:25:49 +0900 (d31f4a7ae)
+++ test/command/suite/table_create/token_filters/empty_between_comma.expected    2018-10-30 15:04:04 +0900 (538c150e8)
@@ -8,10 +8,11 @@ table_create Terms TABLE_PAT_KEY ShortText   --default_tokenizer TokenBigram   -
       0.0,
       0.0
     ],
-    "[table][create][token-filter] empty token filter name: <TokenFilterStopWord,| |,TokenFilterStopWord>"
+    "[info][set][token-filters][Terms] failed to parse token filters: <TokenFilterStopWord, ,TokenFilterStopWord>: Syntax error: <To"
   ],
   false
 ]
-#|e| [table][create][token-filter] empty token filter name: <TokenFilterStopWord,| |,TokenFilterStopWord>
+#|e| Syntax error: <TokenFilterStopWord, |,|TokenFilterStopWord>
+#|e| [info][set][token-filters][Terms] failed to parse token filters: <TokenFilterStopWord, ,TokenFilterStopWord>: [info][set][token-filters][Terms] failed to parse token filters: <TokenFilterStopWord, ,TokenFilterStopWord>: Syntax error: <To
 dump
 plugin_register token_filters/stop_word

  Modified: test/command/suite/table_create/token_filters/multiple.expected (+1 -1)
===================================================================
--- test/command/suite/table_create/token_filters/multiple.expected    2018-10-30 14:25:49 +0900 (b6d5682ec)
+++ test/command/suite/table_create/token_filters/multiple.expected    2018-10-30 15:04:04 +0900 (4d4f27ed1)
@@ -5,4 +5,4 @@ table_create Terms TABLE_PAT_KEY ShortText   --default_tokenizer TokenBigram   -
 dump
 plugin_register token_filters/stop_word
 
-table_create Terms TABLE_PAT_KEY ShortText --default_tokenizer TokenBigram --normalizer NormalizerAuto --token_filters TokenFilterStopWord,TokenFilterStopWord
+table_create Terms TABLE_PAT_KEY ShortText --default_tokenizer TokenBigram --normalizer NormalizerAuto --token_filters "TokenFilterStopWord, TokenFilterStopWord"

  Modified: test/command/suite/table_create/token_filters/nonexistent.expected (+4 -2)
===================================================================
--- test/command/suite/table_create/token_filters/nonexistent.expected    2018-10-30 14:25:49 +0900 (65e527a24)
+++ test/command/suite/table_create/token_filters/nonexistent.expected    2018-10-30 15:04:04 +0900 (903f54831)
@@ -8,10 +8,12 @@ table_create Terms TABLE_PAT_KEY ShortText   --default_tokenizer TokenBigram   -
       0.0,
       0.0
     ],
-    "[table][create][token-filter] nonexistent token filter: <TokenFilterNonexistent>"
+    "[info][set][token-filters][Terms] failed to parse token filters: <TokenFilterStopWord,TokenFilterNonexistent,TokenFilterStopWor"
   ],
   false
 ]
-#|e| [table][create][token-filter] nonexistent token filter: <TokenFilterNonexistent>
+#|e| [expr][parse] unknown identifier: <TokenFilterNonexistent>
+#|e| Syntax error: <TokenFilterStopWord,TokenFilterNonexistent|,|TokenFilterStopWord>: Syntax error: <TokenFilterStopWord,TokenFilterNonexistent|,|TokenFilterStopWord>: [expr][parse] unknown identifier: <TokenFilte
+#|e| [info][set][token-filters][Terms] failed to parse token filters: <TokenFilterStopWord,TokenFilterNonexistent,TokenFilterStopWord>: [info][set][token-filters][Terms] failed to parse token filters: <TokenFilterStopWord,TokenFilterNonexistent,TokenFilterStopWor
 dump
 plugin_register token_filters/stop_word

  Modified: test/command/suite/table_create/token_filters/spaces_around_comma.expected (+1 -1)
===================================================================
--- test/command/suite/table_create/token_filters/spaces_around_comma.expected    2018-10-30 14:25:49 +0900 (ac8b07dc6)
+++ test/command/suite/table_create/token_filters/spaces_around_comma.expected    2018-10-30 15:04:04 +0900 (50907409b)
@@ -5,4 +5,4 @@ table_create Terms TABLE_PAT_KEY ShortText   --default_tokenizer TokenBigram   -
 dump
 plugin_register token_filters/stop_word
 
-table_create Terms TABLE_PAT_KEY ShortText --default_tokenizer TokenBigram --normalizer NormalizerAuto --token_filters TokenFilterStopWord,TokenFilterStopWord
+table_create Terms TABLE_PAT_KEY ShortText --default_tokenizer TokenBigram --normalizer NormalizerAuto --token_filters "TokenFilterStopWord, TokenFilterStopWord"

  Modified: test/command/suite/table_create/token_filters/trailing_comma.expected (+3 -2)
===================================================================
--- test/command/suite/table_create/token_filters/trailing_comma.expected    2018-10-30 14:25:49 +0900 (d0fd210bb)
+++ test/command/suite/table_create/token_filters/trailing_comma.expected    2018-10-30 15:04:04 +0900 (52923dd5e)
@@ -8,10 +8,11 @@ table_create Terms TABLE_PAT_KEY ShortText   --default_tokenizer TokenBigram   -
       0.0,
       0.0
     ],
-    "[table][create][token-filter] empty token filter name: <TokenFilterStopWord,||>"
+    "[info][set][token-filters][Terms] failed to parse token filters: <TokenFilterStopWord,>: Syntax error: <TokenFilterStopWord,||>"
   ],
   false
 ]
-#|e| [table][create][token-filter] empty token filter name: <TokenFilterStopWord,||>
+#|e| Syntax error: <TokenFilterStopWord,||>
+#|e| [info][set][token-filters][Terms] failed to parse token filters: <TokenFilterStopWord,>: [info][set][token-filters][Terms] failed to parse token filters: <TokenFilterStopWord,>: Syntax error: <TokenFilterStopWord,||>
 dump
 plugin_register token_filters/stop_word
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.osdn.me/mailman/archives/groonga-commit/attachments/20181030/188912fb/attachment-0001.html>


More information about the Groonga-commit mailing list
Back to archive index