[Groonga-commit] groonga/groonga [master] added grn::dat::KeyCursor.

Back to archive index

null+****@clear***** null+****@clear*****
2011年 6月 27日 (月) 09:26:52 JST


Susumu Yata	2011-06-27 00:26:52 +0000 (Mon, 27 Jun 2011)

  New Revision: 69419b68ac100dd8629087eea943b8dc809cad6f

  Log:
    added grn::dat::KeyCursor.

  Added files:
    lib/dat/key-cursor.cpp
    lib/dat/key-cursor.hpp
  Modified files:
    lib/dat/Makefile.am
    lib/dat/common-prefix-search-cursor.cpp
    lib/dat/id-cursor.cpp
    lib/dat/predictive-search-cursor.cpp
    lib/dat/predictive-search-cursor.hpp

  Modified: lib/dat/Makefile.am (+2 -0)
===================================================================
--- lib/dat/Makefile.am    2011-06-24 15:18:34 +0000 (f85cbb1)
+++ lib/dat/Makefile.am    2011-06-27 00:26:52 +0000 (176feb0)
@@ -5,6 +5,7 @@ DEFS += -D_REENTRANT $(GRN_DEFS)
 libgrndat_la_SOURCES =				\
 	common-prefix-search-cursor.cpp		\
 	id-cursor.cpp				\
+	key-cursor.cpp				\
 	predictive-search-cursor.cpp		\
 	trie.cpp
 
@@ -17,6 +18,7 @@ noinst_HEADERS =				\
 	dat.hpp					\
 	header.hpp				\
 	id-cursor.hpp				\
+	key-cursor.hpp				\
 	key-info.hpp				\
 	key.hpp					\
 	node.hpp				\

  Modified: lib/dat/common-prefix-search-cursor.cpp (+14 -15)
===================================================================
--- lib/dat/common-prefix-search-cursor.cpp    2011-06-24 15:18:34 +0000 (6a426f0)
+++ lib/dat/common-prefix-search-cursor.cpp    2011-06-27 00:26:52 +0000 (85bf069)
@@ -96,21 +96,6 @@ void CommonPrefixSearchCursor::init(const UInt8 *ptr,
   UInt32 node_id = ROOT_NODE_ID;
   UInt32 skip_count = 0;
   for (UInt32 i = 0; i < max_length; ++i) {
-    if (trie_->ith_node(node_id).child() == TERMINAL_LABEL) {
-      if (i >= min_length) {
-        if (skip_count < offset_) {
-          ++skip_count;
-        } else {
-          const UInt32 terminal =
-              trie_->ith_node(node_id).offset() ^ TERMINAL_LABEL;
-          buf_.push_back(trie_->ith_node(terminal).key_id());
-          if (buf_.size() >= limit_) {
-            return;
-          }
-        }
-      }
-    }
-
     const Base base = trie_->ith_node(node_id).base();
     if (base.is_terminal()) {
       Key key;
@@ -126,6 +111,20 @@ void CommonPrefixSearchCursor::init(const UInt8 *ptr,
       return;
     }
 
+    if (trie_->ith_node(node_id).child() == TERMINAL_LABEL) {
+      if (i >= min_length) {
+        if (skip_count < offset_) {
+          ++skip_count;
+        } else {
+          const UInt32 terminal = base.offset() ^ TERMINAL_LABEL;
+          buf_.push_back(trie_->ith_node(terminal).key_id());
+          if (buf_.size() >= limit_) {
+            return;
+          }
+        }
+      }
+    }
+
     node_id = base.offset() ^ ptr[i];
     if (trie_->ith_node(node_id).label() != ptr[i]) {
       return;

  Modified: lib/dat/id-cursor.cpp (+2 -1)
===================================================================
--- lib/dat/id-cursor.cpp    2011-06-24 15:18:34 +0000 (809ac42)
+++ lib/dat/id-cursor.cpp    2011-06-27 00:26:52 +0000 (e0b7591)
@@ -98,7 +98,8 @@ UInt32 IdCursor::fix_flags(UInt32 flags) const {
   return flags;
 }
 
-IdCursor::IdCursor(const Trie &trie, UInt32 offset, UInt32 limit, UInt32 flags)
+IdCursor::IdCursor(const Trie &trie,
+                   UInt32 offset, UInt32 limit, UInt32 flags)
     : trie_(&trie),
       offset_(offset),
       limit_(limit),

  Added: lib/dat/key-cursor.cpp (+357 -0) 100644
===================================================================
--- /dev/null
+++ lib/dat/key-cursor.cpp    2011-06-27 00:26:52 +0000 (90c8d72)
@@ -0,0 +1,357 @@
+#include "key-cursor.hpp"
+
+#include <algorithm>
+#include <cstring>
+
+namespace grn {
+namespace dat {
+
+KeyCursor::KeyCursor()
+    : trie_(NULL),
+      offset_(0),
+      limit_(UINT32_MAX),
+      flags_(PREDICTIVE_CURSOR),
+      buf_(),
+      count_(0),
+      max_count_(0),
+      end_(false),
+      end_ptr_(NULL),
+      end_length_(0) {}
+
+KeyCursor::~KeyCursor() {
+  close();
+}
+
+
+void KeyCursor::open(const Trie &trie,
+                     const void *min_ptr, UInt32 min_length,
+                     const void *max_ptr, UInt32 max_length,
+                     UInt32 offset,
+                     UInt32 limit,
+                     UInt32 flags) {
+  GRN_DAT_PARAM_ERROR_IF((min_ptr == NULL) && (min_length != 0));
+  GRN_DAT_PARAM_ERROR_IF((max_ptr == NULL) && (max_length != 0));
+
+  flags = fix_flags(flags);
+  KeyCursor new_cursor(trie, offset, limit, flags);
+  new_cursor.init(static_cast<const UInt8 *>(min_ptr), min_length,
+                  static_cast<const UInt8 *>(max_ptr), max_length);
+  new_cursor.swap(this);
+}
+
+void KeyCursor::close() {
+  trie_ = NULL;
+  offset_ = 0;
+  limit_ = UINT32_MAX;
+  flags_ = PREDICTIVE_CURSOR;
+  buf_.clear();
+  count_ = 0;
+  max_count_ = 0;
+  end_ = false;
+  if ((end_ptr_ != NULL) && (end_length_ != 0)) {
+    delete [] end_ptr_;
+  }
+  end_ptr_ = NULL;
+  end_length_ = 0;
+}
+
+bool KeyCursor::next(Key *key) {
+  if (end_ || (count_ >= max_count_)) {
+    return false;
+  }
+
+  if ((flags_ & ASCENDING_CURSOR) == ASCENDING_CURSOR) {
+    return ascending_next(key);
+  } else {
+    return descending_next(key);
+  }
+}
+
+UInt32 KeyCursor::fix_flags(UInt32 flags) const {
+  const UInt32 cursor_type = flags & CURSOR_TYPE_MASK;
+  GRN_DAT_PARAM_ERROR_IF((cursor_type != 0) &&
+                         (cursor_type != PREDICTIVE_CURSOR));
+  flags |= PREDICTIVE_CURSOR;
+
+  const UInt32 cursor_order = flags & CURSOR_ORDER_MASK;
+  GRN_DAT_PARAM_ERROR_IF((cursor_order != 0) &&
+                         (cursor_order != ASCENDING_CURSOR) &&
+                         (cursor_order != DESCENDING_CURSOR));
+  if (cursor_order == 0) {
+    flags |= ASCENDING_CURSOR;
+  }
+
+  const UInt32 cursor_options = flags & CURSOR_OPTIONS_MASK;
+  GRN_DAT_PARAM_ERROR_IF(
+      cursor_options & ~(EXCEPT_LOWER_BOUND | EXCEPT_UPPER_BOUND));
+
+  return flags;
+}
+
+KeyCursor::KeyCursor(const Trie &trie,
+                     UInt32 offset, UInt32 limit, UInt32 flags)
+    : trie_(&trie),
+      offset_(offset),
+      limit_(limit),
+      flags_(flags),
+      buf_(),
+      count_(0),
+      max_count_(0),
+      end_(false),
+      end_ptr_(NULL),
+      end_length_(0) {}
+
+void KeyCursor::init(const UInt8 *min_ptr, UInt32 min_length,
+                     const UInt8 *max_ptr, UInt32 max_length) {
+  if (offset_ > (UINT32_MAX - limit_)) {
+    max_count_ = UINT32_MAX;
+  } else {
+    max_count_ = offset_ + limit_;
+  }
+
+  if (limit_ == 0) {
+    return;
+  }
+
+  if ((flags_ & ASCENDING_CURSOR) == ASCENDING_CURSOR) {
+    ascending_init(min_ptr, min_length, max_ptr, max_length);
+  } else {
+    descending_init(min_ptr, min_length, max_ptr, max_length);
+  }
+}
+
+void KeyCursor::ascending_init(const UInt8 *min_ptr, UInt32 min_length,
+                               const UInt8 *max_ptr, UInt32 max_length) {
+  if (max_ptr != NULL) {
+    if (max_length != 0) {
+      end_ptr_ = new UInt8[max_length];
+      std::memcpy(end_ptr_, max_ptr, max_length);
+    } else {
+      end_ptr_ = reinterpret_cast<UInt8 *>(const_cast<char *>(""));
+    }
+  }
+  end_length_ = max_length;
+
+  UInt32 node_id = ROOT_NODE_ID;
+  Node node = trie_->ith_node(node_id);
+  for (UInt32 i = 0; i < min_length; ++i) {
+    if (node.is_terminal()) {
+      Key key;
+      trie_->ith_key(node.key_id(), &key);
+      const int result = compare(key, min_ptr, min_length, i);
+      if ((result > 0) || ((result == 0) &&
+          ((flags_ & EXCEPT_LOWER_BOUND) != EXCEPT_LOWER_BOUND))) {
+        buf_.push_back(node_id);
+      }
+      return;
+    }
+
+    node_id = node.offset() ^ min_ptr[i];
+    if (trie_->ith_node(node_id).label() != min_ptr[i]) {
+      UInt16 label = node.child();
+      if (label == TERMINAL_LABEL) {
+        label = trie_->ith_node(node.offset() ^ label).sibling();
+      }
+      while (label != INVALID_LABEL) {
+        if (label > min_ptr[i]) {
+          buf_.push_back(node.offset() ^ label);
+          break;
+        }
+        label = trie_->ith_node(node.offset() ^ label).sibling();
+      }
+      return;
+    }
+
+    node = trie_->ith_node(node_id);
+    if (node.sibling() != INVALID_LABEL) {
+      buf_.push_back(node_id ^ min_ptr[i] ^ node.sibling());
+    }
+  }
+
+  const Base base = trie_->ith_node(node_id).base();
+  if (base.is_terminal()) {
+    Key key;
+    trie_->ith_key(base.key_id(), &key);
+    if ((key.length() != min_length) ||
+        ((flags_ & EXCEPT_LOWER_BOUND) != EXCEPT_LOWER_BOUND)) {
+      buf_.push_back(node_id);
+    }
+    return;
+  }
+
+  UInt16 label = trie_->ith_node(node_id).child();
+  if ((label == TERMINAL_LABEL) &&
+      ((flags_ & EXCEPT_LOWER_BOUND) == EXCEPT_LOWER_BOUND)) {
+    label = trie_->ith_node(base.offset() ^ label).sibling();
+  }
+  if (label != INVALID_LABEL) {
+    buf_.push_back(base.offset() ^ label);
+  }
+}
+
+void KeyCursor::descending_init(const UInt8 *min_ptr, UInt32 min_length,
+                                const UInt8 *max_ptr, UInt32 max_length) {
+  if (min_ptr != NULL) {
+    if (min_length != 0) {
+      end_ptr_ = new UInt8[min_length];
+      std::memcpy(end_ptr_, min_ptr, min_length);
+    } else {
+      end_ptr_ = reinterpret_cast<UInt8 *>(const_cast<char *>(""));
+    }
+  }
+  end_length_ = min_length;
+
+  UInt32 node_id = ROOT_NODE_ID;
+  for (UInt32 i = 0; i < max_length; ++i) {
+    const Base base = trie_->ith_node(node_id).base();
+    if (base.is_terminal()) {
+      Key key;
+      trie_->ith_key(base.key_id(), &key);
+      const int result = compare(key, max_ptr, max_length, i);
+      if ((result < 0) || ((result == 0) &&
+          ((flags_ & EXCEPT_UPPER_BOUND) != EXCEPT_UPPER_BOUND))) {
+        buf_.push_back(node_id | POST_ORDER_FLAG);
+      }
+      return;
+    }
+
+    UInt32 label = trie_->ith_node(node_id).child();
+    if (label == TERMINAL_LABEL) {
+      node_id = base.offset() ^ label;
+      buf_.push_back(node_id | POST_ORDER_FLAG);
+      label = trie_->ith_node(node_id).sibling();
+    }
+    while (label != INVALID_LABEL) {
+      node_id = base.offset() ^ label;
+      if (label < max_ptr[i]) {
+        buf_.push_back(node_id);
+      } else if (label > max_ptr[i]) {
+        return;
+      } else {
+        break;
+      }
+      label = trie_->ith_node(node_id).sibling();
+    }
+    if (label == INVALID_LABEL) {
+      return;
+    }
+  }
+
+  const Base base = trie_->ith_node(node_id).base();
+  if (base.is_terminal()) {
+    Key key;
+    trie_->ith_key(base.key_id(), &key);
+    if ((key.length() == max_length) &&
+        ((flags_ & EXCEPT_UPPER_BOUND) != EXCEPT_UPPER_BOUND)) {
+      buf_.push_back(node_id | POST_ORDER_FLAG);
+    }
+    return;
+  }
+
+  UInt16 label = trie_->ith_node(node_id).child();
+  if ((label == TERMINAL_LABEL) &&
+      ((flags_ & EXCEPT_UPPER_BOUND) != EXCEPT_UPPER_BOUND)) {
+    buf_.push_back((base.offset() ^ label) | POST_ORDER_FLAG);
+  }
+}
+
+void KeyCursor::swap(KeyCursor *cursor) {
+  std::swap(trie_, cursor->trie_);
+  std::swap(offset_, cursor->offset_);
+  std::swap(limit_, cursor->limit_);
+  std::swap(flags_, cursor->flags_);
+  buf_.swap(cursor->buf_);
+  std::swap(count_, cursor->count_);
+  std::swap(max_count_, cursor->max_count_);
+  std::swap(end_, cursor->end_);
+  std::swap(end_ptr_, cursor->end_ptr_);
+  std::swap(end_length_, cursor->end_length_);
+}
+
+bool KeyCursor::ascending_next(Key *key) {
+  while (!buf_.empty()) {
+    const UInt32 node_id = buf_.back();
+    buf_.pop_back();
+
+    const Node node = trie_->ith_node(node_id);
+    if (node.sibling() != INVALID_LABEL) {
+      buf_.push_back(node_id ^ node.label() ^ node.sibling());
+    }
+
+    if (node.child() != INVALID_LABEL) {
+      buf_.push_back(node.offset() ^ node.child());
+    }
+
+    if (node.is_terminal()) {
+      Key temp_key;
+      trie_->ith_key(node.key_id(), &temp_key);
+      if (end_ptr_ != NULL) {
+        const int result = compare(temp_key, end_ptr_, end_length_, 0);
+        if ((result > 0) || ((result == 0) &&
+            ((flags_ & EXCEPT_UPPER_BOUND) == EXCEPT_UPPER_BOUND))) {
+          end_ = true;
+          return false;
+        }
+      }
+      if (count_++ >= offset_) {
+        *key = temp_key;
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+bool KeyCursor::descending_next(Key *key) {
+  while (!buf_.empty()) {
+    const bool post_order = (buf_.back() & POST_ORDER_FLAG) == POST_ORDER_FLAG;
+    const UInt32 node_id = buf_.back() & ~POST_ORDER_FLAG;
+
+    const Base base = trie_->ith_node(node_id).base();
+    if (post_order) {
+      buf_.pop_back();
+      if (base.is_terminal()) {
+        Key temp_key;
+        trie_->ith_key(base.key_id(), &temp_key);
+        if (end_ptr_ != NULL) {
+          const int result = compare(temp_key, end_ptr_, end_length_, 0);
+          if ((result < 0) || ((result == 0) &&
+              ((flags_ & EXCEPT_LOWER_BOUND) == EXCEPT_LOWER_BOUND))) {
+            end_ = true;
+            return false;
+          }
+        }
+        if (count_++ >= offset_) {
+          trie_->ith_key(base.key_id(), key);
+          return true;
+        }
+      }
+    } else {
+      buf_.back() |= POST_ORDER_FLAG;
+      UInt16 label = trie_->ith_node(node_id).child();
+      while (label != INVALID_LABEL) {
+        buf_.push_back(base.offset() ^ label);
+        label = trie_->ith_node(base.offset() ^ label).sibling();
+      }
+    }
+  }
+  return false;
+}
+
+int KeyCursor::compare(const Key &key,
+                       const UInt8 *ptr, UInt32 length,
+                       UInt32 offset) const {
+  const UInt32 min_length = (key.length() < length) ? key.length() : length;
+  for (UInt32 i = offset; i < min_length; ++i) {
+    if (static_cast<UInt8>(key[i]) != ptr[i]) {
+      return static_cast<UInt8>(key[i]) - ptr[i];
+    }
+  }
+  if (key.length() < length) {
+    return -1;
+  }
+  return (key.length() > length) ? 1 : 0;
+}
+
+}  // namespace grn
+}  // namespace dat

  Added: lib/dat/key-cursor.hpp (+79 -0) 100644
===================================================================
--- /dev/null
+++ lib/dat/key-cursor.hpp    2011-06-27 00:26:52 +0000 (6315d05)
@@ -0,0 +1,79 @@
+#ifndef GRN_DAT_KEY_CURSOR_HPP_
+#define GRN_DAT_KEY_CURSOR_HPP_
+
+#include "cursor.hpp"
+#include "trie.hpp"
+
+#include <vector>
+
+namespace grn {
+namespace dat {
+
+class KeyCursor : public Cursor {
+ public:
+  KeyCursor();
+  ~KeyCursor();
+
+  void open(const Trie &trie,
+            const void *min_ptr, UInt32 min_length,
+            const void *max_ptr, UInt32 max_length,
+            UInt32 offset = 0,
+            UInt32 limit = UINT32_MAX,
+            UInt32 flags = 0);
+
+  void close();
+
+  bool next(Key *key);
+
+  UInt32 offset() const {
+    return offset_;
+  }
+  UInt32 limit() const {
+    return limit_;
+  }
+  UInt32 flags() const {
+    return flags_;
+  }
+
+ private:
+  const Trie *trie_;
+  UInt32 offset_;
+  UInt32 limit_;
+  UInt32 flags_;
+
+  std::vector<UInt32> buf_;
+  UInt32 count_;
+  UInt32 max_count_;
+  bool end_;
+  UInt8 *end_ptr_;
+  UInt32 end_length_;
+
+  UInt32 fix_flags(UInt32 flags) const;
+  KeyCursor(const Trie &trie,
+                         UInt32 offset, UInt32 limit, UInt32 flags);
+  void init(const UInt8 *min_ptr, UInt32 min_length,
+            const UInt8 *max_ptr, UInt32 max_length);
+  void ascending_init(const UInt8 *min_ptr, UInt32 min_length,
+                      const UInt8 *max_ptr, UInt32 max_length);
+  void descending_init(const UInt8 *min_ptr, UInt32 min_length,
+                       const UInt8 *max_ptr, UInt32 max_length);
+  void swap(KeyCursor *cursor);
+
+  bool ascending_next(Key *key);
+  bool descending_next(Key *key);
+
+  int compare(const Key &key,
+              const UInt8 *ptr, UInt32 length,
+              UInt32 offset) const;
+
+  static const UInt32 POST_ORDER_FLAG = 0x80000000U;
+
+  // Disallows copy and assignment.
+  KeyCursor(const KeyCursor &);
+  KeyCursor &operator=(const KeyCursor &);
+};
+
+}  // namespace grn
+}  // namespace dat
+
+#endif  // GRN_DAT_KEY_CURSOR_HPP_

  Modified: lib/dat/predictive-search-cursor.cpp (+9 -17)
===================================================================
--- lib/dat/predictive-search-cursor.cpp    2011-06-24 15:18:34 +0000 (a00db28)
+++ lib/dat/predictive-search-cursor.cpp    2011-06-27 00:26:52 +0000 (6558128)
@@ -139,18 +139,23 @@ bool PredictiveSearchCursor::ascending_next(Key *key) {
   while (!buf_.empty()) {
     const UInt32 node_id = buf_.back();
     buf_.pop_back();
-    push_next_nodes(node_id);
 
-    const Base base = trie_->ith_node(node_id).base();
-    if (base.is_terminal()) {
+    const Node node = trie_->ith_node(node_id);
+    if (node.sibling() != INVALID_LABEL) {
+      buf_.push_back(node_id ^ node.label() ^ node.sibling());
+    }
+
+    if (node.is_terminal()) {
       Key temp_key;
-      trie_->ith_key(base.key_id(), &temp_key);
+      trie_->ith_key(node.key_id(), &temp_key);
       if (temp_key.length() >= min_length_) {
         if (cur_++ >= offset_) {
           *key = temp_key;
           return true;
         }
       }
+    } else if (node.child() != INVALID_LABEL) {
+      buf_.push_back(node.offset() ^ node.child());
     }
   }
   return false;
@@ -186,18 +191,5 @@ bool PredictiveSearchCursor::descending_next(Key *key) {
   return false;
 }
 
-
-void PredictiveSearchCursor::push_next_nodes(UInt32 node_id) {
-  if (trie_->ith_node(node_id).sibling() != INVALID_LABEL) {
-    buf_.push_back(node_id ^ trie_->ith_node(node_id).label()
-        ^ trie_->ith_node(node_id).sibling());
-  }
-
-  if (trie_->ith_node(node_id).child() != INVALID_LABEL) {
-    buf_.push_back(trie_->ith_node(node_id).offset()
-        ^ trie_->ith_node(node_id).child());
-  }
-}
-
 }  // namespace grn
 }  // namespace dat

  Modified: lib/dat/predictive-search-cursor.hpp (+0 -1)
===================================================================
--- lib/dat/predictive-search-cursor.hpp    2011-06-24 15:18:34 +0000 (0522560)
+++ lib/dat/predictive-search-cursor.hpp    2011-06-27 00:26:52 +0000 (0f30357)
@@ -54,7 +54,6 @@ class PredictiveSearchCursor : public Cursor {
 
   bool ascending_next(Key *key);
   bool descending_next(Key *key);
-  void push_next_nodes(UInt32 node_id);
 
   static const UInt32 POST_ORDER_FLAG = 0x80000000U;
 




Groonga-commit メーリングリストの案内
Back to archive index