[Groonga-commit] groonga/groonga at b1b6709 [master] select: add columns[LABEL].window.group_keys

Back to archive index

Kouhei Sutou null+****@clear*****
Wed Feb 1 10:42:20 JST 2017


Kouhei Sutou	2017-02-01 10:42:20 +0900 (Wed, 01 Feb 2017)

  New Revision: b1b6709cecfe57c227adf7ce0ef9e30157c049c8
  https://github.com/groonga/groonga/commit/b1b6709cecfe57c227adf7ce0ef9e30157c049c8

  Message:
    select: add columns[LABEL].window.group_keys
    
    See test about how to use.

  Added files:
    test/command/suite/select/columns/window_function/window_sum/group.expected
    test/command/suite/select/columns/window_function/window_sum/group.test
  Modified files:
    include/groonga/window_function.h
    lib/grn_window_function.h
    lib/proc/proc_select.c
    lib/window_function.c

  Modified: include/groonga/window_function.h (+2 -0)
===================================================================
--- include/groonga/window_function.h    2017-01-31 19:04:36 +0900 (1473b01)
+++ include/groonga/window_function.h    2017-02-01 10:42:20 +0900 (500638f)
@@ -42,6 +42,8 @@ GRN_API grn_obj *grn_window_get_table(grn_ctx *ctx,
 typedef struct _grn_window_definition {
   grn_table_sort_key *sort_keys;
   size_t n_sort_keys;
+  grn_table_sort_key *group_keys;
+  size_t n_group_keys;
 } grn_window_definition;
 
 typedef grn_rc grn_window_function_func(grn_ctx *ctx,

  Modified: lib/grn_window_function.h (+7 -2)
===================================================================
--- lib/grn_window_function.h    2017-01-31 19:04:36 +0900 (71c71e9)
+++ lib/grn_window_function.h    2017-02-01 10:42:20 +0900 (7d53977)
@@ -1,5 +1,6 @@
+/* -*- c-basic-offset: 2 -*- */
 /*
-  Copyright(C) 2016 Brazil
+  Copyright(C) 2016-2017 Brazil
 
   This library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
@@ -20,6 +21,7 @@
 
 struct _grn_window {
   grn_obj *table;
+  grn_obj *grouped_table;
   grn_obj ids;
   size_t n_ids;
   ssize_t current_index;
@@ -28,7 +30,10 @@ struct _grn_window {
   size_t n_sort_keys;
 };
 
-grn_rc grn_window_init(grn_ctx *ctx, grn_window *window, grn_obj *table);
+grn_rc grn_window_init(grn_ctx *ctx,
+                       grn_window *window,
+                       grn_obj *table,
+                       grn_obj *grouped_table);
 grn_rc grn_window_fin(grn_ctx *ctx, grn_window *window);
 grn_rc grn_window_set_sort_keys(grn_ctx *ctx,
                                 grn_window *window,

  Modified: lib/proc/proc_select.c (+79 -26)
===================================================================
--- lib/proc/proc_select.c    2017-01-31 19:04:36 +0900 (8adb3fe)
+++ lib/proc/proc_select.c    2017-02-01 10:42:20 +0900 (4e056f5)
@@ -1,6 +1,6 @@
 /* -*- c-basic-offset: 2 -*- */
 /*
-  Copyright(C) 2009-2016 Brazil
+  Copyright(C) 2009-2017 Brazil
 
   This library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
@@ -67,6 +67,7 @@ typedef struct {
   grn_select_string value;
   struct {
     grn_select_string sort_keys;
+    grn_select_string group_keys;
   } window;
 } grn_column_data;
 
@@ -331,6 +332,7 @@ grn_column_data_init(grn_ctx *ctx,
   column->flags = GRN_OBJ_COLUMN_SCALAR;
   GRN_SELECT_INIT_STRING(column->value);
   GRN_SELECT_INIT_STRING(column->window.sort_keys);
+  GRN_SELECT_INIT_STRING(column->window.group_keys);
 
   return GRN_TRUE;
 }
@@ -341,7 +343,8 @@ grn_column_data_fill(grn_ctx *ctx,
                      grn_obj *type_raw,
                      grn_obj *flags,
                      grn_obj *value,
-                     grn_obj *window_sort_keys)
+                     grn_obj *window_sort_keys,
+                     grn_obj *window_group_keys)
 {
   if (type_raw && GRN_TEXT_LEN(type_raw) > 0) {
     grn_obj *type;
@@ -399,6 +402,7 @@ grn_column_data_fill(grn_ctx *ctx,
 
   GRN_SELECT_FILL_STRING(column->value, value);
   GRN_SELECT_FILL_STRING(column->window.sort_keys, window_sort_keys);
+  GRN_SELECT_FILL_STRING(column->window.group_keys, window_group_keys);
 
   return GRN_TRUE;
 }
@@ -426,9 +430,11 @@ grn_column_data_collect(grn_ctx *ctx,
     grn_obj *value = NULL;
     struct {
       grn_obj *sort_keys;
+      grn_obj *group_keys;
     } window;
 
     window.sort_keys = NULL;
+    window.group_keys = NULL;
 
     grn_hash_cursor_get_value(ctx, cursor, (void **)&column);
 
@@ -456,13 +462,16 @@ grn_column_data_collect(grn_ctx *ctx,
     GET_VAR(flags);
     GET_VAR(value);
     GET_VAR(window.sort_keys);
+    GET_VAR(window.group_keys);
 
 #undef GET_VAR
 
 #undef GET_VAR_RAW
 
     grn_column_data_fill(ctx, column,
-                         type, flags, value, window.sort_keys);
+                         type, flags, value,
+                         window.sort_keys,
+                         window.group_keys);
   }
   grn_hash_cursor_close(ctx, cursor);
   return GRN_TRUE;
@@ -1301,29 +1310,66 @@ grn_select_apply_columns(grn_ctx *ctx,
                                         expression,
                                         data->filter.condition.expression);
 
-    if (column_data->window.sort_keys.length > 0) {
+    if (column_data->window.sort_keys.length > 0 ||
+        column_data->window.group_keys.length > 0) {
       grn_window_definition definition;
-      int n_sort_keys;
       grn_rc rc;
 
-      definition.sort_keys =
-        grn_table_sort_key_from_str(ctx,
-                                    column_data->window.sort_keys.value,
-                                    column_data->window.sort_keys.length,
-                                    table, &n_sort_keys);
-      definition.n_sort_keys = n_sort_keys;
-      if (!definition.sort_keys) {
-        grn_obj_close(ctx, expression);
-        grn_obj_close(ctx, column);
-        GRN_PLUGIN_ERROR(ctx,
-                         GRN_INVALID_ARGUMENT,
-                         "[select][column][%s][%.*s] "
-                         "failed to parse sort keys: %s",
-                         grn_column_stage_name(column_data->stage),
-                         (int)(column_data->label.length),
-                         column_data->label.value,
-                         ctx->errbuf);
-        break;
+      if (column_data->window.sort_keys.length > 0) {
+        int n_sort_keys;
+        definition.sort_keys =
+          grn_table_sort_key_from_str(ctx,
+                                      column_data->window.sort_keys.value,
+                                      column_data->window.sort_keys.length,
+                                      table, &n_sort_keys);
+        definition.n_sort_keys = n_sort_keys;
+        if (!definition.sort_keys) {
+          grn_obj_close(ctx, expression);
+          grn_obj_close(ctx, column);
+          GRN_PLUGIN_ERROR(ctx,
+                           GRN_INVALID_ARGUMENT,
+                           "[select][column][%s][%.*s] "
+                           "failed to parse sort keys: %s",
+                           grn_column_stage_name(column_data->stage),
+                           (int)(column_data->label.length),
+                           column_data->label.value,
+                           ctx->errbuf);
+          break;
+        }
+      } else {
+        definition.sort_keys = NULL;
+        definition.n_sort_keys = 0;
+      }
+
+      if (column_data->window.group_keys.length > 0) {
+        int n_group_keys;
+        definition.group_keys =
+          grn_table_sort_key_from_str(ctx,
+                                      column_data->window.group_keys.value,
+                                      column_data->window.group_keys.length,
+                                      table, &n_group_keys);
+        definition.n_group_keys = n_group_keys;
+        if (!definition.group_keys) {
+          grn_obj_close(ctx, expression);
+          grn_obj_close(ctx, column);
+          if (definition.sort_keys) {
+            grn_table_sort_key_close(ctx,
+                                     definition.sort_keys,
+                                     definition.n_sort_keys);
+          }
+          GRN_PLUGIN_ERROR(ctx,
+                           GRN_INVALID_ARGUMENT,
+                           "[select][column][%s][%.*s] "
+                           "failed to parse group keys: %s",
+                           grn_column_stage_name(column_data->stage),
+                           (int)(column_data->label.length),
+                           column_data->label.value,
+                           ctx->errbuf);
+          break;
+        }
+      } else {
+        definition.group_keys = NULL;
+        definition.n_group_keys = 0;
       }
 
       rc = grn_table_apply_window_function(ctx,
@@ -1331,9 +1377,16 @@ grn_select_apply_columns(grn_ctx *ctx,
                                            column,
                                            &definition,
                                            expression);
-      grn_table_sort_key_close(ctx,
-                               definition.sort_keys,
-                               definition.n_sort_keys);
+      if (definition.sort_keys) {
+        grn_table_sort_key_close(ctx,
+                                 definition.sort_keys,
+                                 definition.n_sort_keys);
+      }
+      if (definition.group_keys) {
+        grn_table_sort_key_close(ctx,
+                                 definition.group_keys,
+                                 definition.n_group_keys);
+      }
       if (rc != GRN_SUCCESS) {
         grn_obj_close(ctx, expression);
         grn_obj_close(ctx, column);

  Modified: lib/window_function.c (+160 -47)
===================================================================
--- lib/window_function.c    2017-01-31 19:04:36 +0900 (96482fb)
+++ lib/window_function.c    2017-02-01 10:42:20 +0900 (4989ce8)
@@ -1,6 +1,6 @@
 /* -*- c-basic-offset: 2 -*- */
 /*
-  Copyright(C) 2016 Brazil
+  Copyright(C) 2016-2017 Brazil
 
   This library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
@@ -24,11 +24,15 @@
 #include <string.h>
 
 grn_rc
-grn_window_init(grn_ctx *ctx, grn_window *window, grn_obj *table)
+grn_window_init(grn_ctx *ctx,
+                grn_window *window,
+                grn_obj *table,
+                grn_obj *grouped_table)
 {
   GRN_API_ENTER;
 
   window->table = table;
+  window->grouped_table = grouped_table;
   GRN_RECORD_INIT(&(window->ids), GRN_OBJ_VECTOR, grn_obj_id(ctx, table));
   window->n_ids = 0;
   window->current_index = 0;
@@ -154,8 +158,6 @@ grn_window_set_sort_keys(grn_ctx *ctx,
                          grn_table_sort_key *sort_keys,
                          size_t n_sort_keys)
 {
-  grn_obj *sorted;
-
   GRN_API_ENTER;
 
   if (!window) {
@@ -163,39 +165,71 @@ grn_window_set_sort_keys(grn_ctx *ctx,
     GRN_API_RETURN(ctx->rc);
   }
 
-  sorted = grn_table_create(ctx,
-                            NULL, 0, NULL,
-                            GRN_OBJ_TABLE_NO_KEY,
-                            NULL, window->table);
-  if (!sorted) {
-    ERR(ctx->rc,
-        "[window][set][sort-keys] "
-        "failed to create a table to store sorted records: %s",
-        ctx->errbuf);
-    GRN_API_RETURN(ctx->rc);
-  }
-
-  grn_table_sort(ctx, window->table, 0, -1, sorted, sort_keys, n_sort_keys);
-  if (ctx->rc != GRN_SUCCESS) {
-    ERR(ctx->rc,
-        "[window][set][sort-keys] "
-        "failed to sort: %s",
-        ctx->errbuf);
-    grn_obj_unlink(ctx, sorted);
-    GRN_API_RETURN(ctx->rc);
-  }
-
-  GRN_BULK_REWIND(&(window->ids));
-  GRN_TABLE_EACH_BEGIN(ctx, sorted, cursor, id) {
-    void *value;
-    grn_id record_id;
+  if (sort_keys) {
+    grn_obj *sorted;
+
+    sorted = grn_table_create(ctx,
+                              NULL, 0, NULL,
+                              GRN_OBJ_TABLE_NO_KEY,
+                              NULL,
+                              window->grouped_table);
+    if (!sorted) {
+      ERR(ctx->rc,
+          "[window][set][sort-keys] "
+          "failed to create a table to store sorted records: %s",
+          ctx->errbuf);
+      GRN_API_RETURN(ctx->rc);
+    }
 
-    grn_table_cursor_get_value(ctx, cursor, &value);
-    record_id = *((grn_id *)value);
-    GRN_RECORD_PUT(ctx, &(window->ids), record_id);
-  } GRN_TABLE_EACH_END(ctx, cursor);
+    grn_table_sort(ctx,
+                   window->grouped_table,
+                   0, -1,
+                   sorted,
+                   sort_keys, n_sort_keys);
+    if (ctx->rc != GRN_SUCCESS) {
+      ERR(ctx->rc,
+          "[window][set][sort-keys] "
+          "failed to sort: %s",
+          ctx->errbuf);
+      grn_obj_unlink(ctx, sorted);
+      GRN_API_RETURN(ctx->rc);
+    }
 
-  grn_obj_unlink(ctx, sorted);
+    GRN_BULK_REWIND(&(window->ids));
+    GRN_TABLE_EACH_BEGIN(ctx, sorted, cursor, id) {
+      void *value;
+      grn_id record_id;
+
+      grn_table_cursor_get_value(ctx, cursor, &value);
+      if (window->table == window->grouped_table) {
+        record_id = *((grn_id *)value);
+      } else {
+        grn_id grouped_record_id;
+        grouped_record_id = *((grn_id *)value);
+        grn_table_get_key(ctx,
+                          window->grouped_table,
+                          grouped_record_id,
+                          &record_id, sizeof(grn_id));
+      }
+      GRN_RECORD_PUT(ctx, &(window->ids), record_id);
+    } GRN_TABLE_EACH_END(ctx, cursor);
+
+    grn_obj_close(ctx, sorted);
+  } else {
+    GRN_BULK_REWIND(&(window->ids));
+    GRN_TABLE_EACH_BEGIN(ctx, window->grouped_table, cursor, id) {
+      grn_id record_id;
+      if (window->table == window->grouped_table) {
+        record_id = id;
+      } else {
+        grn_table_get_key(ctx,
+                          window->grouped_table,
+                          id,
+                          &record_id, sizeof(grn_id));
+      }
+      GRN_RECORD_PUT(ctx, &(window->ids), id);
+    } GRN_TABLE_EACH_END(ctx, cursor);
+  }
 
   window->n_ids = GRN_BULK_VSIZE(&(window->ids)) / sizeof(grn_id);
 
@@ -300,6 +334,29 @@ grn_expr_call_window_function(grn_ctx *ctx,
   return rc;
 }
 
+static grn_rc
+grn_table_apply_window_function_per_group(grn_ctx *ctx,
+                                          grn_obj *table,
+                                          grn_obj *grouped_table,
+                                          grn_window_definition *definition,
+                                          grn_obj *output_column,
+                                          grn_obj *window_function_call)
+{
+  grn_window window;
+
+  grn_window_init(ctx, &window, table, grouped_table);
+  grn_window_set_sort_keys(ctx, &window,
+                           definition->sort_keys,
+                           definition->n_sort_keys);
+  grn_expr_call_window_function(ctx,
+                                output_column,
+                                &window,
+                                window_function_call);
+  grn_window_fin(ctx, &window);
+
+  return GRN_SUCCESS;
+}
+
 grn_rc
 grn_table_apply_window_function(grn_ctx *ctx,
                                 grn_obj *table,
@@ -307,7 +364,7 @@ grn_table_apply_window_function(grn_ctx *ctx,
                                 grn_window_definition *definition,
                                 grn_obj *window_function_call)
 {
-  grn_window window;
+  grn_rc rc;
 
   GRN_API_ENTER;
 
@@ -329,16 +386,72 @@ grn_table_apply_window_function(grn_ctx *ctx,
     GRN_API_RETURN(ctx->rc);
   }
 
-  grn_window_init(ctx, &window, table);
-  grn_window_set_direction(ctx, &window, window.direction);
-  grn_window_set_sort_keys(ctx, &window,
-                           definition->sort_keys,
-                           definition->n_sort_keys);
-  grn_expr_call_window_function(ctx,
-                                output_column,
-                                &window,
-                                window_function_call);
-  grn_window_fin(ctx, &window);
+  if (definition->group_keys) {
+    grn_table_group_result grouped;
+    grn_obj subrecs;
+
+    grouped.table = NULL;
+    grouped.key_begin = 0;
+    grouped.key_end = definition->n_group_keys;
+    grouped.limit = -1;
+    grouped.flags = GRN_TABLE_GROUP_CALC_COUNT;
+    grouped.op = 0;
+    grouped.max_n_subrecs = grn_table_size(ctx, table);
+    grouped.calc_target = NULL;
+
+    /* TODO: grn_table_group() should support table output for sub records? */
+    GRN_RECORD_INIT(&subrecs, GRN_OBJ_VECTOR, grn_obj_id(ctx, table));
+    grn_bulk_reserve(ctx, &subrecs, sizeof(grn_obj *) * grouped.max_n_subrecs);
+    grn_table_group(ctx,
+                    table,
+                    definition->group_keys,
+                    definition->n_group_keys,
+                    &grouped,
+                    1);
+    GRN_TABLE_EACH_BEGIN(ctx, grouped.table, cursor, id) {
+      grn_obj *grouped_table;
+
+      grouped_table = grn_table_create(ctx,
+                                       NULL, 0,
+                                       NULL,
+                                       GRN_TABLE_HASH_KEY,
+                                       table,
+                                       NULL);
+      {
+        unsigned int i, n;
+        GRN_BULK_REWIND(&subrecs);
+        n = grn_table_get_subrecs(ctx,
+                                  grouped.table,
+                                  id,
+                                  (grn_id *)GRN_BULK_HEAD(&subrecs),
+                                  NULL,
+                                  grouped.max_n_subrecs);
+        for (i = 0; i < n; i++) {
+          grn_id subrec_id;
+          subrec_id = GRN_RECORD_VALUE_AT(&subrecs, i);
+          grn_table_add(ctx, grouped_table, &subrec_id, sizeof(grn_id), NULL);
+        }
+      }
+      /* TODO: rc handling */
+      rc = grn_table_apply_window_function_per_group(ctx,
+                                                     table,
+                                                     grouped_table,
+                                                     definition,
+                                                     output_column,
+                                                     window_function_call);
+      grn_obj_close(ctx, grouped_table);
+    } GRN_TABLE_EACH_END(ctx, cursor);
+
+    grn_obj_close(ctx, grouped.table);
+    GRN_OBJ_FIN(ctx, &subrecs);
+  } else {
+    rc = grn_table_apply_window_function_per_group(ctx,
+                                                   table,
+                                                   table,
+                                                   definition,
+                                                   output_column,
+                                                   window_function_call);
+  }
 
-  GRN_API_RETURN(GRN_SUCCESS);
+  GRN_API_RETURN(rc);
 }

  Added: test/command/suite/select/columns/window_function/window_sum/group.expected (+75 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/select/columns/window_function/window_sum/group.expected    2017-02-01 10:42:20 +0900 (2847c88)
@@ -0,0 +1,75 @@
+table_create Logs TABLE_NO_KEY
+[[0,0.0,0.0],true]
+column_create Logs item COLUMN_SCALAR ShortText
+[[0,0.0,0.0],true]
+column_create Logs price COLUMN_SCALAR UInt32
+[[0,0.0,0.0],true]
+load --table Logs
+[
+{"item": "item1", "price": 666},
+{"item": "item1", "price": 999},
+{"item": "item1", "price": 777},
+{"item": "item2", "price": 111},
+{"item": "item2", "price": 333},
+{"item": "item3", "price": 222}
+]
+[[0,0.0,0.0],6]
+select Logs   --columns[sum].stage initial   --columns[sum].value 'window_sum(price)'   --columns[sum].type UInt32   --columns[sum].window.group_keys item   --columns[sum].window.sort_keys price   --output_columns 'item, price, sum'   --sort_keys item,price
+[
+  [
+    0,
+    0.0,
+    0.0
+  ],
+  [
+    [
+      [
+        6
+      ],
+      [
+        [
+          "item",
+          "ShortText"
+        ],
+        [
+          "price",
+          "UInt32"
+        ],
+        [
+          "sum",
+          "UInt32"
+        ]
+      ],
+      [
+        "item1",
+        666,
+        666
+      ],
+      [
+        "item1",
+        777,
+        1443
+      ],
+      [
+        "item1",
+        999,
+        2442
+      ],
+      [
+        "item2",
+        111,
+        111
+      ],
+      [
+        "item2",
+        333,
+        444
+      ],
+      [
+        "item3",
+        222,
+        222
+      ]
+    ]
+  ]
+]

  Added: test/command/suite/select/columns/window_function/window_sum/group.test (+22 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/select/columns/window_function/window_sum/group.test    2017-02-01 10:42:20 +0900 (3a6f148)
@@ -0,0 +1,22 @@
+table_create Logs TABLE_NO_KEY
+column_create Logs item COLUMN_SCALAR ShortText
+column_create Logs price COLUMN_SCALAR UInt32
+
+load --table Logs
+[
+{"item": "item1", "price": 666},
+{"item": "item1", "price": 999},
+{"item": "item1", "price": 777},
+{"item": "item2", "price": 111},
+{"item": "item2", "price": 333},
+{"item": "item3", "price": 222}
+]
+
+select Logs \
+  --columns[sum].stage initial \
+  --columns[sum].value 'window_sum(price)' \
+  --columns[sum].type UInt32 \
+  --columns[sum].window.group_keys item \
+  --columns[sum].window.sort_keys price \
+  --output_columns 'item, price, sum' \
+  --sort_keys item,price
-------------- next part --------------
HTML����������������������������...
Descargar 



More information about the Groonga-commit mailing list
Back to archive index