[Groonga-commit] groonga/groonga at 1972539 [master] Use the min estimated size in terms as estimated size for query

Back to archive index

Kouhei Sutou null+****@clear*****
Thu Apr 9 14:30:30 JST 2015


Kouhei Sutou	2015-04-09 14:30:30 +0900 (Thu, 09 Apr 2015)

  New Revision: 19725397583fd61951e03a42bc7361d873127199
  https://github.com/groonga/groonga/commit/19725397583fd61951e03a42bc7361d873127199

  Message:
    Use the min estimated size in terms as estimated size for query
    
    Because all terms should be exist in query. The total number of matched
    records must be less than the min number of matched records in terms.

  Modified files:
    lib/ii.c
    test/unit/core/test-inverted-index.c

  Modified: lib/ii.c (+6 -1)
===================================================================
--- lib/ii.c    2015-04-08 15:59:17 +0900 (488abe1)
+++ lib/ii.c    2015-04-09 14:30:30 +0900 (8835236)
@@ -19,6 +19,7 @@
 #include <fcntl.h>
 #include <string.h>
 #include <sys/stat.h>
+#include <math.h>
 
 #include "grn_ii.h"
 #include "grn_ctx_impl.h"
@@ -6505,7 +6506,11 @@ grn_ii_estimate_size_for_query(grn_ctx *ctx, grn_ii *ii,
     token_info *ti = tis[i];
     double term_estimated_size;
     term_estimated_size = ((double)ti->size / ti->ntoken);
-    estimated_size += (term_estimated_size - estimated_size) / (i + 1);
+    if (i == 0) {
+      estimated_size = term_estimated_size;
+    } else {
+      estimated_size = fmin(estimated_size, term_estimated_size);
+    }
   }
 
 exit :

  Modified: test/unit/core/test-inverted-index.c (+38 -0)
===================================================================
--- test/unit/core/test-inverted-index.c    2015-04-08 15:59:17 +0900 (6d0e631)
+++ test/unit/core/test-inverted-index.c    2015-04-09 14:30:30 +0900 (b19dfd1)
@@ -40,6 +40,7 @@ void test_scalar_index(void);
 void test_int_index(void);
 void test_mroonga_index(void);
 void test_mroonga_index_score(void);
+void test_estimate_size_for_query(void);
 
 #define TYPE_SIZE 1024
 
@@ -934,3 +935,40 @@ test_mroonga_index_score(void)
   grn_obj_close(context, lc);
   grn_obj_close(context, t1);
 }
+
+void
+test_estimate_size_for_query(void)
+{
+  grn_obj *index_column;
+  grn_ii *ii;
+
+  grn_obj_close(context, db);
+  db = grn_db_create(context,
+                     cut_build_path(tmp_directory, "estimate.grn", NULL),
+                     NULL);
+
+  assert_send_command("table_create Memos TABLE_NO_KEY");
+  assert_send_command("column_create Memos content COLUMN_SCALAR Text");
+  assert_send_command("table_create Terms TABLE_PAT_KEY ShortText "
+                      "--default_tokenizer TokenBigramSplitSymbolAlphaDigit "
+                      "--normalizer NormalizerAuto");
+  assert_send_command("column_create Terms index COLUMN_INDEX|WITH_POSITION "
+                      "Memos content");
+  assert_send_command("load --table Memos\n"
+                      "["
+                      "[\"content\"],"
+                      "[\"Groonga\"],"
+                      "[\"Rroonga\"],"
+                      "[\"Mroonga\"]"
+                      "]");
+
+  index_column = grn_ctx_get(context, "Terms.index", strlen("Terms.index"));
+  ii = (grn_ii *)index_column;
+
+  cut_assert_equal_double(1, DBL_EPSILON,
+                          grn_ii_estimate_size_for_query(context,
+                                                         ii,
+                                                         "Groonga",
+                                                         strlen("Groonga"),
+                                                         NULL));
+}
-------------- next part --------------
HTML����������������������������...
Descargar 



More information about the Groonga-commit mailing list
Back to archive index