[Groonga-commit] groonga/groonga at 288898c [master] sub_filter: support pre filter optimization with scalar accessor

Back to archive index

Kouhei Sutou null+****@clear*****
Tue May 15 16:45:56 JST 2018


Kouhei Sutou	2018-05-15 16:45:56 +0900 (Tue, 15 May 2018)

  New Revision: 288898ce25c0af2dc19b50e78307653f18218e81
  https://github.com/groonga/groonga/commit/288898ce25c0af2dc19b50e78307653f18218e81

  Message:
    sub_filter: support pre filter optimization with scalar accessor

  Copied files:
    test/command/suite/select/function/sub_filter/accessor/scalar/filtered.expected
      (from test/command/suite/select/function/sub_filter/accessor/scalar.expected)
    test/command/suite/select/function/sub_filter/accessor/scalar/filtered.test
      (from test/command/suite/select/function/sub_filter/accessor/scalar.test)
  Modified files:
    lib/grn_report.h
    lib/proc.c
    lib/report.c
  Renamed files:
    test/command/suite/select/function/sub_filter/accessor/scalar/only.expected
      (from test/command/suite/select/function/sub_filter/accessor/scalar.expected)
    test/command/suite/select/function/sub_filter/accessor/scalar/only.test
      (from test/command/suite/select/function/sub_filter/accessor/scalar.test)

  Modified: lib/grn_report.h (+5 -0)
===================================================================
--- lib/grn_report.h    2018-05-15 16:44:49 +0900 (abd5f2d33)
+++ lib/grn_report.h    2018-05-15 16:45:56 +0900 (e10058289)
@@ -47,6 +47,11 @@ void grn_report_column(grn_ctx *ctx,
                        const char *tag,
                        grn_obj *column);
 
+void grn_report_accessor(grn_ctx *ctx,
+                         const char *action,
+                         const char *tag,
+                         grn_obj *accessor);
+
 #ifdef __cplusplus
 }
 #endif

  Modified: lib/proc.c (+68 -0)
===================================================================
--- lib/proc.c    2018-05-15 16:44:49 +0900 (38ffd6be4)
+++ lib/proc.c    2018-05-15 16:45:56 +0900 (ffacf7989)
@@ -2105,6 +2105,60 @@ selector_query(grn_ctx *ctx, grn_obj *table, grn_obj *index,
   return run_query(ctx, table, nargs - 1, args + 1, res, op);
 }
 
+static void
+sub_filter_pre_filter_accessor(grn_ctx *ctx,
+                               grn_accessor *accessor,
+                               grn_id id,
+                               grn_obj *base_res)
+{
+  if (grn_obj_is_scalar_column(ctx, accessor->obj)) {
+    grn_obj value;
+    GRN_RECORD_INIT(&value, 0, DB_OBJ(accessor->obj)->range);
+    grn_obj_get_value(ctx, accessor->obj, id, &value);
+    if (GRN_BULK_VSIZE(&value) > 0) {
+      if (accessor->next) {
+        sub_filter_pre_filter_accessor(ctx,
+                                       accessor->next,
+                                       GRN_RECORD_VALUE(&value),
+                                       base_res);
+      } else {
+        grn_posting posting;
+        memset(&posting, 0, sizeof(grn_posting));
+        posting.rid = GRN_RECORD_VALUE(&value);
+        grn_ii_posting_add(ctx, &posting, (grn_hash *)base_res, GRN_OP_OR);
+      }
+    }
+    GRN_OBJ_FIN(ctx, &value);
+  } else if (grn_obj_is_vector_column(ctx, accessor->obj)) {
+    grn_posting posting;
+    grn_obj values;
+    unsigned int i, n;
+
+    memset(&posting, 0, sizeof(grn_posting));
+    GRN_RECORD_INIT(&values, GRN_OBJ_VECTOR, DB_OBJ(accessor->obj)->range);
+    grn_obj_get_value(ctx, accessor->obj, id, &values);
+    n = grn_vector_size(ctx, &values);
+    for (i = 0; i < n; i++) {
+      posting.rid = grn_uvector_get_element(ctx,
+                                            &values,
+                                            i,
+                                            &(posting.weight));
+      grn_ii_posting_add(ctx, &posting, (grn_hash *)base_res, GRN_OP_OR);
+    }
+    GRN_OBJ_FIN(ctx, &values);
+  } else if (grn_obj_is_index_column(ctx, accessor->obj)) {
+    if (accessor->next) {
+      /* TODO */
+    } else {
+      grn_ii_at(ctx,
+                (grn_ii *)(accessor->obj),
+                id,
+                (grn_hash *)base_res,
+                GRN_OP_OR);
+    }
+  }
+}
+
 static grn_bool
 sub_filter_pre_filter(grn_ctx *ctx,
                       grn_obj *res,
@@ -2184,6 +2238,20 @@ sub_filter_pre_filter(grn_ctx *ctx,
                       "[index]",
                       scope);
     return GRN_TRUE;
+  } else if (grn_obj_is_accessor(ctx, scope)) {
+    GRN_TABLE_EACH_BEGIN(ctx, res, cursor, id) {
+      grn_id *matched_id;
+      grn_table_cursor_get_key(ctx, cursor, (void **)&matched_id);
+      sub_filter_pre_filter_accessor(ctx,
+                                     (grn_accessor *)scope,
+                                     id,
+                                     base_res);
+    } GRN_TABLE_EACH_END(ctx, cursor);
+    grn_report_accessor(ctx,
+                        "[sub_filter][pre-filter]",
+                        "",
+                        scope);
+    return GRN_TRUE;
   } else {
     return GRN_FALSE;
   }

  Modified: lib/report.c (+38 -0)
===================================================================
--- lib/report.c    2018-05-15 16:44:49 +0900 (9bea9e239)
+++ lib/report.c    2018-05-15 16:45:56 +0900 (409bd8597)
@@ -136,3 +136,41 @@ grn_report_column(grn_ctx *ctx,
           GRN_TEXT_VALUE(&description));
   GRN_OBJ_FIN(ctx, &description);
 }
+
+void
+grn_report_accessor(grn_ctx *ctx,
+                    const char *action,
+                    const char *tag,
+                    grn_obj *accessor)
+{
+  grn_obj description;
+  grn_obj *range;
+
+  if (!grn_logger_pass(ctx, GRN_REPORT_INDEX_LOG_LEVEL)) {
+    return;
+  }
+
+  GRN_TEXT_INIT(&description, 0);
+  grn_accessor_name(ctx, accessor, &description);
+  range = grn_ctx_at(ctx, grn_obj_get_range(ctx, accessor));
+  if (range) {
+    char name[GRN_TABLE_MAX_KEY_SIZE];
+    int name_size;
+
+    name_size = grn_obj_name(ctx, range, name, GRN_TABLE_MAX_KEY_SIZE);
+    GRN_TEXT_PUTS(ctx, &description, " -> ");
+    if (name_size == 0) {
+      GRN_TEXT_PUTS(ctx, &description, "(temporary)");
+    } else {
+      GRN_TEXT_PUTS(ctx, &description, "<");
+      GRN_TEXT_PUT(ctx, &description, name, name_size);
+      GRN_TEXT_PUTS(ctx, &description, ">");
+    }
+  }
+  GRN_LOG(ctx, GRN_REPORT_INDEX_LOG_LEVEL,
+          "%s[accessor]%s %.*s",
+          action, tag,
+          (int)GRN_TEXT_LEN(&description),
+          GRN_TEXT_VALUE(&description));
+  GRN_OBJ_FIN(ctx, &description);
+}

  Copied: test/command/suite/select/function/sub_filter/accessor/scalar/filtered.expected (+13 -13) 70%
===================================================================
--- test/command/suite/select/function/sub_filter/accessor/scalar.expected    2018-05-15 16:44:49 +0900 (c7550063b)
+++ test/command/suite/select/function/sub_filter/accessor/scalar/filtered.expected    2018-05-15 16:45:56 +0900 (74430a825)
@@ -42,7 +42,9 @@ load --table Packages
 {"_key": "mroonga", "files": ["ha_mroonga.cc", "ha_mroonga.hpp"]}
 ]
 [[0,0.0,0.0],3]
-select Packages   --filter 'sub_filter(files.author, "birthday >= \\"1988-01-04 00:00:00\\" && birthday < \\"1992-02-09 00:00:00\\"")'   --output_columns '_key, files, files.author.birthday'
+log_level --level info
+[[0,0.0,0.0],true]
+select Packages   --filter '_key == "groonga" &&             sub_filter(files.author, "birthday >= \\"1988-01-04 00:00:00\\" && birthday < \\"1992-02-09 00:00:00\\"")'   --output_columns '_key, files, files.author.birthday'
 [
   [
     0,
@@ -52,7 +54,7 @@ select Packages   --filter 'sub_filter(files.author, "birthday >= \\"1988-01-04
   [
     [
       [
-        2
+        1
       ],
       [
         [
@@ -78,18 +80,16 @@ select Packages   --filter 'sub_filter(files.author, "birthday >= \\"1988-01-04
           697561200.0,
           568220400.0
         ]
-      ],
-      [
-        "mroonga",
-        [
-          "ha_mroonga.cc",
-          "ha_mroonga.hpp"
-        ],
-        [
-          568220400.0,
-          409935600.0
-        ]
       ]
     ]
   ]
 ]
+#|i| [table][select][index][equal][accessor][key] <Packages>
+#|i| [table][select][index][selector][no-index][sub_filter] <Packages>
+#|i| [sub_filter][pre-filter][accessor] files(Packages).author(Files) -> <Users>
+#|i| [table][select][index][range] <Birthdays.users_birthday>
+#|i| [table][select][index][range] <Birthdays.users_birthday>
+#|i| [accessor][resolve][data-column][index] <Users.files_author_index>
+#|i| [accessor][resolve][data-column][index] <Files.packages_files_index>
+log_level --level notice
+[[0,0.0,0.0],true]

  Copied: test/command/suite/select/function/sub_filter/accessor/scalar/filtered.test (+7 -1) 86%
===================================================================
--- test/command/suite/select/function/sub_filter/accessor/scalar.test    2018-05-15 16:44:49 +0900 (e7f009421)
+++ test/command/suite/select/function/sub_filter/accessor/scalar/filtered.test    2018-05-15 16:45:56 +0900 (2cb81e55a)
@@ -37,6 +37,12 @@ load --table Packages
 {"_key": "mroonga", "files": ["ha_mroonga.cc", "ha_mroonga.hpp"]}
 ]
 
+
+#@add-important-log-levels info
+log_level --level info
 select Packages \
-  --filter 'sub_filter(files.author, "birthday >= \\"1988-01-04 00:00:00\\" && birthday < \\"1992-02-09 00:00:00\\"")' \
+  --filter '_key == "groonga" && \
+            sub_filter(files.author, "birthday >= \\"1988-01-04 00:00:00\\" && birthday < \\"1992-02-09 00:00:00\\"")' \
   --output_columns '_key, files, files.author.birthday'
+log_level --level notice
+#@remove-important-log-levels info

  Renamed: test/command/suite/select/function/sub_filter/accessor/scalar/only.expected (+0 -0) 100%
===================================================================

  Renamed: test/command/suite/select/function/sub_filter/accessor/scalar/only.test (+0 -0) 100%
===================================================================
-------------- next part --------------
HTML����������������������������...
URL: https://lists.osdn.me/mailman/archives/groonga-commit/attachments/20180515/97462d27/attachment-0001.htm 



More information about the Groonga-commit mailing list
Back to archive index