null+****@clear*****
null+****@clear*****
2011年 11月 1日 (火) 10:00:20 JST
Susumu Yata 2011-11-01 01:00:20 +0000 (Tue, 01 Nov 2011) New Revision: 2ff7f29597b066d3727425f09eee97305671aca0 Log: replace the implementation of grn_dat. Added files: lib/dat/array.hpp lib/dat/entry.hpp lib/dat/file.cpp Removed files: lib/dat/key-info.hpp lib/dat/memory-mapped-file.cpp lib/dat/timer.hpp lib/dat/usage.hpp Modified files: lib/dat.cpp lib/dat/Makefile.am lib/dat/base.hpp lib/dat/block.hpp lib/dat/check.hpp lib/dat/cursor-factory.cpp lib/dat/cursor.hpp lib/dat/dat.hpp lib/dat/header.hpp lib/dat/id-cursor.cpp lib/dat/id-cursor.hpp lib/dat/key-cursor.cpp lib/dat/key-cursor.hpp lib/dat/key.hpp lib/dat/node.hpp lib/dat/predictive-cursor.cpp lib/dat/predictive-cursor.hpp lib/dat/string.hpp lib/dat/trie.cpp lib/dat/trie.hpp lib/dat/vector.hpp Renamed files: lib/dat/file-impl.cpp (from lib/dat/memory-mapped-file-impl.cpp) lib/dat/file-impl.hpp (from lib/dat/memory-mapped-file-impl.hpp) lib/dat/file.hpp (from lib/dat/memory-mapped-file.hpp) lib/dat/prefix-cursor.cpp (from lib/dat/common-prefix-cursor.cpp) lib/dat/prefix-cursor.hpp (from lib/dat/common-prefix-cursor.hpp) Modified: lib/dat.cpp (+18 -22) =================================================================== --- lib/dat.cpp 2011-10-31 04:17:38 +0000 (ea186e9) +++ lib/dat.cpp 2011-11-01 01:00:20 +0000 (11df27f) @@ -25,8 +25,6 @@ extern "C" { -std::size_t INITIAL_FILE_SIZE = 1 << 20; - static void grn_dat_init(grn_dat *dat) { GRN_DB_OBJ_SET_TYPE(dat, GRN_TABLE_DAT_KEY); @@ -160,10 +158,11 @@ grn_dat_get(grn_ctx *ctx, grn_dat *dat, const void *key, grn_id id = GRN_ID_NIL; #ifndef WIN32 if (dat && dat->header->file_id) { + const grn::dat::Trie *trie = static_cast<grn::dat::Trie *>(dat->handle); grn_dat_confirm_handle(ctx, dat); - grn::dat::Key k; - if (static_cast<grn::dat::Trie *>(dat->handle)->search(key, key_size, &k)) { - id = k.id(); + grn::dat::UInt32 key_pos; + if (trie->search(key, key_size, &key_pos)) { + id = trie->get_key(key_pos).id(); } } #endif @@ -185,11 +184,11 @@ grn_dat_add(grn_ctx *ctx, grn_dat *dat, const void *key, char buffer[PATH_MAX]; gen_pathname(path, buffer, file_id); grn::dat::Trie *new_trie = new grn::dat::Trie; - new_trie->create(buffer, INITIAL_FILE_SIZE); + new_trie->create(buffer); dat->handle = new_trie; } else { grn::dat::Trie *new_trie = new grn::dat::Trie; - new_trie->create(NULL, INITIAL_FILE_SIZE); + new_trie->create(NULL); dat->handle = new_trie; } dat->file_id = dat->header->file_id = file_id; @@ -198,9 +197,9 @@ grn_dat_add(grn_ctx *ctx, grn_dat *dat, const void *key, grn::dat::Trie *trie = static_cast<grn::dat::Trie *>(dat->handle); if (trie == NULL) { goto exit; } try { - grn::dat::Key k; - bool res = trie->insert(key, key_size, &k); - id = k.id(); + grn::dat::UInt32 key_pos; + bool res = trie->insert(key, key_size, &key_pos); + id = trie->get_key(key_pos).id(); if (added) { *added = (int)res; } } catch (const grn::dat::Exception &ex) { char buffer[PATH_MAX]; @@ -214,9 +213,9 @@ grn_dat_add(grn_ctx *ctx, grn_dat *dat, const void *key, /* should be deleted after enough interval */ if (new_trie == NULL) { goto exit; } delete trie; - grn::dat::Key k; - bool res = new_trie->insert(key, key_size, &k); - id = k.id(); + grn::dat::UInt32 key_pos; + bool res = new_trie->insert(key, key_size, &key_pos); + id = new_trie->get_key(key_pos).id(); } } #endif @@ -231,10 +230,9 @@ grn_dat_get_key(grn_ctx *ctx, grn_dat *dat, grn_id id, void *keybuf, int bufsize #ifndef WIN32 if (dat && dat->header->file_id) { try { - grn::dat::Key k; grn_dat_confirm_handle(ctx, dat); grn::dat::Trie *trie = static_cast<grn::dat::Trie *>(dat->handle); - trie->ith_key(id, &k); + const grn::dat::Key &k = trie->ith_key(id); len = k.length(); if (keybuf && bufsize >= len) { memcpy(keybuf, k.ptr(), len); @@ -254,10 +252,9 @@ grn_dat_get_key2(grn_ctx *ctx, grn_dat *dat, grn_id id, grn_obj *bulk) #ifndef WIN32 if (dat && dat->header->file_id) { try { - grn::dat::Key k; grn_dat_confirm_handle(ctx, dat); grn::dat::Trie *trie = static_cast<grn::dat::Trie *>(dat->handle); - trie->ith_key(id, &k); + const grn::dat::Key &k = trie->ith_key(id); len = k.length(); const char *key = static_cast<const char *>(k.ptr()); if (bulk->header.impl_flags & GRN_OBJ_REFER) { @@ -313,7 +310,7 @@ grn_dat_cursor_open(grn_ctx *ctx, grn_dat *dat, grn::dat::Trie *trie = static_cast<grn::dat::Trie *>(dat->handle); grn::dat::Cursor *cursor = grn::dat::CursorFactory::open(*trie, NULL, min_size, max, max_size, offset, limit, - grn::dat::COMMON_PREFIX_CURSOR | grn::dat::DESCENDING_CURSOR); + grn::dat::PREFIX_CURSOR | grn::dat::DESCENDING_CURSOR); dc->cursor = cursor; // } else { // /* todo: near */ @@ -381,9 +378,9 @@ grn_dat_cursor_next(grn_ctx *ctx, grn_dat_cursor *c) grn_id id = GRN_ID_NIL; #ifndef WIN32 if (c && c->cursor) { - grn::dat::Key k; grn::dat::Cursor *cursor = static_cast<grn::dat::Cursor *>(c->cursor); - if (cursor->next(&k)) { + const grn::dat::Key &k = cursor->next(); + if (k.is_valid()) { id = c->curr_rec = k.id(); } else { c->curr_rec = GRN_ID_NIL; @@ -434,10 +431,9 @@ _grn_dat_key(grn_ctx *ctx, grn_dat *dat, grn_id id, uint32_t *key_size) #ifndef WIN32 if (dat && dat->header->file_id) { try { - grn::dat::Key k; grn_dat_confirm_handle(ctx, dat); grn::dat::Trie *trie = static_cast<grn::dat::Trie *>(dat->handle); - trie->ith_key(id, &k); + const grn::dat::Key &k = trie->ith_key(id); *key_size = k.length(); key = static_cast<const char *>(k.ptr()); } catch (const grn::dat::Exception &ex) { Modified: lib/dat/Makefile.am (+28 -29) =================================================================== --- lib/dat/Makefile.am 2011-10-31 04:17:38 +0000 (8217637) +++ lib/dat/Makefile.am 2011-11-01 01:00:20 +0000 (8a378d6) @@ -3,37 +3,36 @@ DEFS += -D_REENTRANT $(GRN_DEFS) if !OS_WIN32 noinst_LTLIBRARIES = libgrndat.la -libgrndat_la_SOURCES = \ - common-prefix-cursor.cpp \ - cursor-factory.cpp \ - id-cursor.cpp \ - key-cursor.cpp \ - memory-mapped-file-impl.cpp \ - memory-mapped-file.cpp \ - predictive-cursor.cpp \ +libgrndat_la_SOURCES = \ + cursor-factory.cpp \ + file-impl.cpp \ + file.cpp \ + id-cursor.cpp \ + key-cursor.cpp \ + predictive-cursor.cpp \ + prefix-cursor.cpp \ trie.cpp -noinst_HEADERS = \ - base.hpp \ - block.hpp \ - check.hpp \ - common-prefix-cursor.hpp \ - cursor-factory.hpp \ - cursor.hpp \ - dat.hpp \ - header.hpp \ - id-cursor.hpp \ - key-cursor.hpp \ - key-info.hpp \ - key.hpp \ - memory-mapped-file-impl.hpp \ - memory-mapped-file.hpp \ - node.hpp \ - predictive-cursor.hpp \ - string.hpp \ - timer.hpp \ - trie.hpp \ - usage.hpp \ +noinst_HEADERS = \ + array.hpp \ + base.hpp \ + block.hpp \ + check.hpp \ + cursor-factory.hpp \ + cursor.hpp \ + dat.hpp \ + entry.hpp \ + file-impl.hpp \ + file.hpp \ + header.hpp \ + id-cursor.hpp \ + key-cursor.hpp \ + key.hpp \ + node.hpp \ + predictive-cursor.hpp \ + prefix-cursor.hpp \ + string.hpp \ + trie.hpp \ vector.hpp endif Added: lib/dat/array.hpp (+100 -0) 100644 =================================================================== --- /dev/null +++ lib/dat/array.hpp 2011-11-01 01:00:20 +0000 (1098eec) @@ -0,0 +1,100 @@ +/* -*- c-basic-offset: 2 -*- */ +/* Copyright(C) 2011 Brazil + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License version 2.1 as published by the Free Software Foundation. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef GRN_DAT_ARRAY_HPP_ +#define GRN_DAT_ARRAY_HPP_ + +#include "dat.hpp" + +namespace grn { +namespace dat { + +// This class is used to detect an out-of-range access in debug mode. +template <typename T> +class Array { + public: + Array() : ptr_(NULL), size_(0) {} + Array(void *ptr, UInt32 size) : ptr_(static_cast<T *>(ptr)), size_(size) { + GRN_DAT_DEBUG_THROW_IF((ptr == NULL) && (size != 0)); + } + template <UInt32 U> + explicit Array(T (&array)[U]) : ptr_(array), size_(U) {} + ~Array() {} + + const T &operator[](UInt32 i) const { + GRN_DAT_DEBUG_THROW_IF(i >= size_); + return ptr_[i]; + } + T &operator[](UInt32 i) { + GRN_DAT_DEBUG_THROW_IF(i >= size_); + return ptr_[i]; + } + + const T *begin() const { + return ptr(); + } + T *begin() { + return ptr(); + } + + const T *end() const { + return ptr() + size(); + } + T *end() { + return ptr() + size(); + } + + void assign(void *ptr, UInt32 size) { + GRN_DAT_DEBUG_THROW_IF((ptr == NULL) && (size != 0)); + ptr_ = static_cast<T *>(ptr); + size_ = size; + } + template <UInt32 U> + void assign(T (&array)[U]) { + assign(array, U); + } + + void swap(Array *rhs) { + T * const temp_ptr = ptr_; + ptr_ = rhs->ptr_; + rhs->ptr_ = temp_ptr; + + const UInt32 temp_size = size_; + size_ = rhs->size_; + rhs->size_ = temp_size; + } + + T *ptr() const { + return ptr_; + } + UInt32 size() const { + return size_; + } + + private: + T *ptr_; + UInt32 size_; + + // Disallows copy and assignment. + Array(const Array &); + Array &operator=(const Array &); +}; + +} // namespace dat +} // namespace grn + +#endif // GRN_DAT_ARRAY_HPP_ Modified: lib/dat/base.hpp (+22 -24) =================================================================== --- lib/dat/base.hpp 2011-10-31 04:17:38 +0000 (2bc1d56) +++ lib/dat/base.hpp 2011-11-01 01:00:20 +0000 (51fb551) @@ -23,46 +23,44 @@ namespace grn { namespace dat { -// The most significant bit represents whether or not the node is a terminal -// node. The BASE of a terminal node represents the ID of its associated key. -// On the other hand, the BASE of a non-terminal node represents the offset to -// its child nodes. +// The most significant bit represents whether or not the node is a linker. +// BASE of a linker represents the position of its associated key and BASE of +// a non-linker represents the offset to its child nodes. class Base { public: - Base() - : base_(0) {} + Base() : value_(0) {} - UInt32 base() const { - return base_; + bool operator==(const Base &rhs) const { + return value_ == rhs.value_; } - bool is_terminal() const { - return (base_ & IS_TERMINAL_FLAG) == IS_TERMINAL_FLAG; + + bool is_linker() const { + return (value_ & IS_LINKER_FLAG) == IS_LINKER_FLAG; } UInt32 offset() const { - GRN_DAT_DEBUG_THROW_IF(is_terminal()); - return base_; + GRN_DAT_DEBUG_THROW_IF(is_linker()); + return value_; } - UInt32 key_id() const { - GRN_DAT_DEBUG_THROW_IF(!is_terminal()); - return base_ & ~IS_TERMINAL_FLAG; + UInt32 key_pos() const { + GRN_DAT_DEBUG_THROW_IF(!is_linker()); + return value_ & ~IS_LINKER_FLAG; } - void set_base(UInt32 x) { - base_ = x; - } void set_offset(UInt32 x) { + GRN_DAT_DEBUG_THROW_IF((x & IS_LINKER_FLAG) != 0); GRN_DAT_DEBUG_THROW_IF(x > MAX_OFFSET); - base_ = x; + value_ = x; } - void set_key_id(UInt32 x) { - GRN_DAT_DEBUG_THROW_IF(x > MAX_KEY_ID); - base_ = IS_TERMINAL_FLAG | x; + void set_key_pos(UInt32 x) { + GRN_DAT_DEBUG_THROW_IF((x & IS_LINKER_FLAG) != 0); + GRN_DAT_DEBUG_THROW_IF(x > MAX_OFFSET); + value_ = IS_LINKER_FLAG | x; } private: - UInt32 base_; + UInt32 value_; - static const UInt32 IS_TERMINAL_FLAG = 0x80000000U; + static const UInt32 IS_LINKER_FLAG = 0x80000000U; }; } // namespace dat Modified: lib/dat/block.hpp (+11 -11) =================================================================== --- lib/dat/block.hpp 2011-10-31 04:17:38 +0000 (42439c7) +++ lib/dat/block.hpp 2011-11-01 01:00:20 +0000 (907758f) @@ -25,12 +25,10 @@ namespace dat { class Block { public: - Block() - : next_(0), - prev_(0), - first_phantom_(0), - num_phantoms_(0) {} + Block() : next_(0), prev_(0), first_phantom_(0), num_phantoms_(0) {} + // Blocks in the same level are stored in a doubly-linked list which is + // represented by the following next() and prev(). UInt32 next() const { return next_ / BLOCK_SIZE; } @@ -38,10 +36,14 @@ class Block { return prev_ / BLOCK_SIZE; } + // A level indicates how easyily find_offset() can find a good offset in that + // block. It is easier in lower level blocks. UInt32 level() const { return next_ & BLOCK_MASK; } - UInt32 fail_count() const { + // A block level rises when find_offset() fails to find a good offset + // MAX_FAILURE_COUNT times in that block. + UInt32 failure_count() const { return prev_ & BLOCK_MASK; } @@ -66,14 +68,14 @@ class Block { GRN_DAT_DEBUG_THROW_IF(x > BLOCK_MASK); next_ = (next_ & ~BLOCK_MASK) | x; } - void set_fail_count(UInt32 x) { - GRN_DAT_DEBUG_THROW_IF(x > MAX_FAIL_COUNT); + void set_failure_count(UInt32 x) { + GRN_DAT_DEBUG_THROW_IF(x > MAX_FAILURE_COUNT); GRN_DAT_DEBUG_THROW_IF(x > BLOCK_MASK); prev_ = (prev_ & ~BLOCK_MASK) | x; } void set_first_phantom(UInt32 x) { - GRN_DAT_DEBUG_THROW_IF(x > BLOCK_MASK); + GRN_DAT_DEBUG_THROW_IF(x >= BLOCK_SIZE); first_phantom_ = (UInt16)x; } void set_num_phantoms(UInt32 x) { @@ -86,8 +88,6 @@ class Block { UInt32 prev_; UInt16 first_phantom_; UInt16 num_phantoms_; - - static const UInt32 SHIFT = 9; }; } // namespace dat Modified: lib/dat/check.hpp (+31 -33) =================================================================== --- lib/dat/check.hpp 2011-10-31 04:17:38 +0000 (ecbe7f1) +++ lib/dat/check.hpp 2011-11-01 01:00:20 +0000 (83e647b) @@ -25,84 +25,82 @@ namespace dat { class Check { public: - Check() - : check_(0) {} + Check() : value_(0) {} - UInt32 check() const { - return check_; + bool operator==(const Check &rhs) const { + return value_ == rhs.value_; } - // The most significant bit (MSB) represents whether or not the node ID is - // used as an offset. Note that the MSB is independent of the other bits. + // The most significant bit represents whether or not the node ID is used as + // an offset. Note that the MSB is independent of the other bits. bool is_offset() const { - return (check_ & IS_OFFSET_FLAG) == IS_OFFSET_FLAG; + return (value_ & IS_OFFSET_FLAG) == IS_OFFSET_FLAG; } UInt32 except_is_offset() const { GRN_DAT_DEBUG_THROW_IF(is_phantom()); - return check_ & ~IS_OFFSET_FLAG; + return value_ & ~IS_OFFSET_FLAG; } // A phantom node is a node that has never been used, and such a node is also // called an empty element. Phantom nodes form a doubly linked list in each // block, and the linked list is represented by next() and prev(). bool is_phantom() const { - return (check_ & IS_PHANTOM_FLAG) == IS_PHANTOM_FLAG; + return (value_ & IS_PHANTOM_FLAG) == IS_PHANTOM_FLAG; } UInt32 next() const { GRN_DAT_DEBUG_THROW_IF(!is_phantom()); - return (check_ >> NEXT_SHIFT) & BLOCK_MASK; + return (value_ >> NEXT_SHIFT) & BLOCK_MASK; } UInt32 prev() const { GRN_DAT_DEBUG_THROW_IF(!is_phantom()); - return (check_ >> PREV_SHIFT) & BLOCK_MASK; + return (value_ >> PREV_SHIFT) & BLOCK_MASK; } // A label is attached to each non-phantom node. A label is represented by // a byte except for a terminal label '\256'. Note that a phantom node always - // returns an invalid label with its phantom bit flag so as to reject a - // transition to the phantom node. + // returns an invalid label with its phantom bit flag so as to reject invalid + // transitions. UInt32 label() const { - return check_ & (IS_PHANTOM_FLAG | LABEL_MASK); + return value_ & (IS_PHANTOM_FLAG | LABEL_MASK); } + // A non-phantom node has the labels of the first child and the next sibling. + // Note that INVALID_LABEL is stored if the node has no child nodes or has + // no more siblings. UInt32 child() const { - return (check_ >> CHILD_SHIFT) & LABEL_MASK; + return (value_ >> CHILD_SHIFT) & LABEL_MASK; } UInt32 sibling() const { - return (check_ >> SIBLING_SHIFT) & LABEL_MASK; - } - - void set_check(UInt32 x) { - check_ = x; + return (value_ >> SIBLING_SHIFT) & LABEL_MASK; } void set_is_offset(bool x) { if (x) { GRN_DAT_DEBUG_THROW_IF(is_offset()); - check_ |= IS_OFFSET_FLAG; + value_ |= IS_OFFSET_FLAG; } else { GRN_DAT_DEBUG_THROW_IF(!is_offset()); - check_ &= ~IS_OFFSET_FLAG; + value_ &= ~IS_OFFSET_FLAG; } } void set_except_is_offset(UInt32 x) { GRN_DAT_DEBUG_THROW_IF(is_phantom()); GRN_DAT_DEBUG_THROW_IF((x & IS_OFFSET_FLAG) == IS_OFFSET_FLAG); - check_ = (check_ & IS_OFFSET_FLAG) | x; + value_ = (value_ & IS_OFFSET_FLAG) | x; } - // To reject a transition to an incomplete node, set_is_phantom() overwrites - // its label with an invalid label when a node becomes a non-phantom node. + // To reject a transition to an incomplete node, set_is_phantom() invalidates + // its label and links when it becomes non-phantom. void set_is_phantom(bool x) { if (x) { GRN_DAT_DEBUG_THROW_IF(is_phantom()); - check_ |= IS_PHANTOM_FLAG; + value_ |= IS_PHANTOM_FLAG; } else { GRN_DAT_DEBUG_THROW_IF(!is_phantom()); - check_ = (check_ & IS_OFFSET_FLAG) | (INVALID_LABEL << CHILD_SHIFT) | + value_ = (value_ & IS_OFFSET_FLAG) | (INVALID_LABEL << CHILD_SHIFT) | (INVALID_LABEL << SIBLING_SHIFT) | INVALID_LABEL; } } @@ -110,34 +108,34 @@ class Check { void set_next(UInt32 x) { GRN_DAT_DEBUG_THROW_IF(!is_phantom()); GRN_DAT_DEBUG_THROW_IF(x > BLOCK_MASK); - check_ = (check_ & ~(BLOCK_MASK << NEXT_SHIFT)) | (x << NEXT_SHIFT); + value_ = (value_ & ~(BLOCK_MASK << NEXT_SHIFT)) | (x << NEXT_SHIFT); } void set_prev(UInt32 x) { GRN_DAT_DEBUG_THROW_IF(!is_phantom()); GRN_DAT_DEBUG_THROW_IF(x > BLOCK_MASK); - check_ = (check_ & ~(BLOCK_MASK << PREV_SHIFT)) | (x << PREV_SHIFT); + value_ = (value_ & ~(BLOCK_MASK << PREV_SHIFT)) | (x << PREV_SHIFT); } void set_label(UInt32 x) { GRN_DAT_DEBUG_THROW_IF(is_phantom()); GRN_DAT_DEBUG_THROW_IF(x > MAX_LABEL); - check_ = (check_ & ~LABEL_MASK) | x; + value_ = (value_ & ~LABEL_MASK) | x; } void set_child(UInt32 x) { GRN_DAT_DEBUG_THROW_IF(is_phantom()); GRN_DAT_DEBUG_THROW_IF(x > MAX_LABEL); - check_ = (check_ & ~(LABEL_MASK << CHILD_SHIFT)) | (x << CHILD_SHIFT); + value_ = (value_ & ~(LABEL_MASK << CHILD_SHIFT)) | (x << CHILD_SHIFT); } void set_sibling(UInt32 x) { GRN_DAT_DEBUG_THROW_IF(is_phantom()); GRN_DAT_DEBUG_THROW_IF(label() > MAX_LABEL); GRN_DAT_DEBUG_THROW_IF((sibling() != INVALID_LABEL) && (x == INVALID_LABEL)); - check_ = (check_ & ~(LABEL_MASK << SIBLING_SHIFT)) | (x << SIBLING_SHIFT); + value_ = (value_ & ~(LABEL_MASK << SIBLING_SHIFT)) | (x << SIBLING_SHIFT); } private: - UInt32 check_; + UInt32 value_; static const UInt32 IS_OFFSET_FLAG = 1U << 31; static const UInt32 IS_PHANTOM_FLAG = 1U << 30; Modified: lib/dat/cursor-factory.cpp (+3 -3) =================================================================== --- lib/dat/cursor-factory.cpp 2011-10-31 04:17:38 +0000 (91305b4) +++ lib/dat/cursor-factory.cpp 2011-11-01 01:00:20 +0000 (7ef3b9a) @@ -18,7 +18,7 @@ #include "cursor-factory.hpp" #include "id-cursor.hpp" #include "key-cursor.hpp" -#include "common-prefix-cursor.hpp" +#include "prefix-cursor.hpp" #include "predictive-cursor.hpp" #include <new> @@ -62,8 +62,8 @@ Cursor *CursorFactory::open(const Trie &trie, } return cursor; } - case COMMON_PREFIX_CURSOR: { - CommonPrefixCursor *cursor = new (std::nothrow) CommonPrefixCursor; + case PREFIX_CURSOR: { + PrefixCursor *cursor = new (std::nothrow) PrefixCursor; GRN_DAT_THROW_IF(MEMORY_ERROR, cursor == NULL); try { if (&trie != NULL) { Modified: lib/dat/cursor.hpp (+1 -1) =================================================================== --- lib/dat/cursor.hpp 2011-10-31 04:17:38 +0000 (2d26fbd) +++ lib/dat/cursor.hpp 2011-11-01 01:00:20 +0000 (d4e97e5) @@ -30,7 +30,7 @@ class Cursor { virtual void close() = 0; - virtual bool next(Key *key) = 0; + virtual const Key &next() = 0; virtual UInt32 offset() const = 0; virtual UInt32 limit() const = 0; Modified: lib/dat/dat.hpp (+37 -30) =================================================================== --- lib/dat/dat.hpp 2011-10-31 04:17:38 +0000 (bdbc2fc) +++ lib/dat/dat.hpp 2011-11-01 01:00:20 +0000 (8106a8f) @@ -68,49 +68,63 @@ const UInt16 MAX_LABEL = TERMINAL_LABEL; const UInt32 INVALID_LABEL = 0x1FF; const UInt32 LABEL_MASK = 0x1FF; -// The MSB of BASE is used to represent whether the node is a terminal node or +// The MSB of BASE is used to represent whether the node is a linker node or // not and the other 31 bits represent the offset to its child nodes. So, the -// maximum number of nodes is also limited to 2^31. +// number of nodes is limited to 2^31. const UInt32 ROOT_NODE_ID = 0; const UInt32 MAX_NODE_ID = 0x7FFFFFFF; const UInt32 MAX_NUM_NODES = MAX_NODE_ID + 1; const UInt32 INVALID_NODE_ID = MAX_NODE_ID + 1; +// 0 is reserved for non-linker leaf nodes. For example, the root node of an +// initial double-array is a non-linker leaf node. const UInt32 MAX_OFFSET = MAX_NODE_ID; const UInt32 INVALID_OFFSET = 0; +// Phantom nodes are managed in each block because siblings are always put in +// the same block. const UInt32 BLOCK_SIZE = 0x200; const UInt32 BLOCK_MASK = 0x1FF; const UInt32 MAX_BLOCK_ID = MAX_NODE_ID / BLOCK_SIZE; const UInt32 MAX_NUM_BLOCKS = MAX_BLOCK_ID + 1; -// The level of a block is incremented when find_offset() has failed to find -// a good offset MAX_FAIL_COUNT times in that block. -const UInt32 MAX_FAIL_COUNT = 4; - -// The maximum number of blocks tested in each call of find_offset(). -const UInt32 MAX_BLOCK_COUNT = 16; - -// A higher level block has less phantom nodes. -const UInt32 MAX_BLOCK_LEVEL = 5; - -const UInt32 INVALID_ENTRY = 0x7FFFFFFF; +// Blocks are divided by their levels, which indicate how easily update +// operations can find a good offset in them. The level of a block rises when +// find_offset() fails in that block many times. MAX_FAILURE_COUNT is the +// threshold. Also, in order to limit the time cost, find_offset() scans at +// most MAX_BLOCK_COUNT blocks. +// Larger parameters bring more chances of finding good offsets but it leads to +// more node renumberings, which are costly operations, and thus results in +// a degradation of space/time efficiencies. +const UInt32 MAX_FAILURE_COUNT = 4; +const UInt32 MAX_BLOCK_COUNT = 16; +const UInt32 MAX_BLOCK_LEVEL = 5; + +// Blocks in the same level compose a doubly linked list. The entry block of +// a linked list is called a leader. INVALID_LEADER means that a linked list is +// empty and there exists no leader. +const UInt32 INVALID_LEADER = 0x7FFFFFFF; const UInt32 MIN_KEY_ID = 1; const UInt32 MAX_KEY_ID = MAX_NODE_ID; const UInt32 INVALID_KEY_ID = 0; -const UInt32 MAX_KEY_LENGTH = 0x7FFF; -const UInt32 MAX_NUM_KEYS = MAX_KEY_ID + 1; +// A key length is represented as a 12-bit unsigned integer in Key. +// A key ID is represented as a 28-bit unsigned integer in Key. +const UInt32 MAX_KEY_LENGTH = (1U << 12) - 1; +const UInt32 MAX_NUM_KEYS = (1U << 28) - 1; +const UInt64 MIN_FILE_SIZE = 1 << 16; const UInt64 DEFAULT_FILE_SIZE = 1 << 20; +const UInt64 MAX_FILE_SIZE = (UInt64)1 << 40; const double DEFAULT_NUM_NODES_PER_KEY = 4.0; const double DEFAULT_AVERAGE_KEY_LENGTH = 16.0; -const UInt32 MAX_KEY_BUF_SIZE = 0xFFFFFFFFU; +const UInt32 MAX_KEY_BUF_SIZE = 0x80000000U; +const UInt32 MAX_TOTAL_KEY_LENGTH = 0xFFFFFFFFU; const UInt32 ID_RANGE_CURSOR = 0x00001; const UInt32 KEY_RANGE_CURSOR = 0x00002; -const UInt32 COMMON_PREFIX_CURSOR = 0x00004; +const UInt32 PREFIX_CURSOR = 0x00004; const UInt32 PREDICTIVE_CURSOR = 0x00008; const UInt32 CURSOR_TYPE_MASK = 0x000FF; @@ -123,19 +137,13 @@ const UInt32 EXCEPT_UPPER_BOUND = 0x02000; const UInt32 EXCEPT_EXACT_MATCH = 0x04000; const UInt32 CURSOR_OPTIONS_MASK = 0xFF000; -// for x86_64-w64-mingw32 -#ifdef NO_ERROR -# undef NO_ERROR -#endif // NO_ERROR - -// To be determined... enum ErrorCode { - NO_ERROR = 0, PARAM_ERROR = -1, IO_ERROR = -2, - MEMORY_ERROR = -3, - SIZE_ERROR = -4, - UNEXPECTED_ERROR = -5 + FORMAT_ERROR = -3, + MEMORY_ERROR = -4, + SIZE_ERROR = -5, + UNEXPECTED_ERROR = -6 }; class Exception : public std::exception { @@ -161,9 +169,7 @@ class Exception : public std::exception { return *this; } - virtual ErrorCode code() const throw() { - return NO_ERROR; - } + virtual ErrorCode code() const throw() = 0; virtual const char *file() const throw() { return file_; } @@ -203,6 +209,7 @@ class Error : public Exception { typedef Error<PARAM_ERROR> ParamError; typedef Error<IO_ERROR> IOError; +typedef Error<FORMAT_ERROR> FormatError; typedef Error<MEMORY_ERROR> MemoryError; typedef Error<SIZE_ERROR> SizeError; typedef Error<UNEXPECTED_ERROR> UnexpectedError; Added: lib/dat/entry.hpp (+63 -0) 100644 =================================================================== --- /dev/null +++ lib/dat/entry.hpp 2011-11-01 01:00:20 +0000 (60be482) @@ -0,0 +1,63 @@ +/* -*- c-basic-offset: 2 -*- */ +/* Copyright(C) 2011 Brazil + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License version 2.1 as published by the Free Software Foundation. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef GRN_DAT_ENTRY_HPP_ +#define GRN_DAT_ENTRY_HPP_ + +#include "dat.hpp" + +namespace grn { +namespace dat { + +// The most significant bit represents whether or not the entry is valid. +// A valid entry stores the position of its associated key and an invalid entry +// stores the index of the next invalid entry. +class Entry { + public: + Entry() : value_(0) {} + + bool is_valid() const { + return (value_ & IS_VALID_FLAG) == IS_VALID_FLAG; + } + UInt32 key_pos() const { + GRN_DAT_DEBUG_THROW_IF(!is_valid()); + return value_ & ~IS_VALID_FLAG; + } + UInt32 next() const { + GRN_DAT_DEBUG_THROW_IF(is_valid()); + return value_; + } + + void set_key_pos(UInt32 x) { + GRN_DAT_DEBUG_THROW_IF((x & IS_VALID_FLAG) != 0); + value_ = IS_VALID_FLAG | x; + } + void set_next(UInt32 x) { + GRN_DAT_DEBUG_THROW_IF((x & IS_VALID_FLAG) != 0); + value_ = x; + } + + private: + UInt32 value_; + + static const UInt32 IS_VALID_FLAG = 0x80000000U; +}; + +} // namespace dat +} // namespace grn + +#endif // GRN_DAT_ENTRY_HPP_ Renamed: lib/dat/file-impl.cpp (+17 -17) 87% =================================================================== --- lib/dat/memory-mapped-file-impl.cpp 2011-10-31 04:17:38 +0000 (a506020) +++ lib/dat/file-impl.cpp 2011-11-01 01:00:20 +0000 (56c95b0) @@ -15,7 +15,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#include "memory-mapped-file-impl.hpp" +#include "file-impl.hpp" #include <sys/types.h> #include <sys/stat.h> @@ -41,14 +41,14 @@ namespace dat { #ifdef WIN32 -MemoryMappedFileImpl::MemoryMappedFileImpl() +FileImpl::FileImpl() : ptr_(NULL), size_(0), file_(INVALID_HANDLE_VALUE), map_(INVALID_HANDLE_VALUE), addr_(NULL) {} -MemoryMappedFileImpl::~MemoryMappedFileImpl() { +FileImpl::~FileImpl() { if (addr_ != NULL) { ::UnmapViewOfFile(addr_); } @@ -64,14 +64,14 @@ MemoryMappedFileImpl::~MemoryMappedFileImpl() { #else // WIN32 -MemoryMappedFileImpl::MemoryMappedFileImpl() +FileImpl::FileImpl() : ptr_(NULL), size_(0), fd_(-1), addr_(MAP_FAILED), length_(0) {} -MemoryMappedFileImpl::~MemoryMappedFileImpl() { +FileImpl::~FileImpl() { if (addr_ != MAP_FAILED) { ::munmap(addr_, length_); } @@ -83,33 +83,33 @@ MemoryMappedFileImpl::~MemoryMappedFileImpl() { #endif // WIN32 -void MemoryMappedFileImpl::create(const char *path, UInt64 size) { +void FileImpl::create(const char *path, UInt64 size) { GRN_DAT_THROW_IF(PARAM_ERROR, size == 0); GRN_DAT_THROW_IF(PARAM_ERROR, size > static_cast<UInt64>(std::numeric_limits< ::size_t>::max())); - MemoryMappedFileImpl new_impl; + FileImpl new_impl; new_impl.create_(path, size); new_impl.swap(this); } -void MemoryMappedFileImpl::open(const char *path) { +void FileImpl::open(const char *path) { GRN_DAT_THROW_IF(PARAM_ERROR, path == NULL); GRN_DAT_THROW_IF(PARAM_ERROR, path[0] == '\0'); - MemoryMappedFileImpl new_impl; + FileImpl new_impl; new_impl.open_(path); new_impl.swap(this); } -void MemoryMappedFileImpl::close() { - MemoryMappedFileImpl new_impl; +void FileImpl::close() { + FileImpl new_impl; new_impl.swap(this); } #ifdef WIN32 -void MemoryMappedFileImpl::swap(MemoryMappedFileImpl *rhs) { +void FileImpl::swap(FileImpl *rhs) { std::swap(ptr_, rhs->ptr_); std::swap(size_, rhs->size_); std::swap(file_, rhs->file_); @@ -117,7 +117,7 @@ void MemoryMappedFileImpl::swap(MemoryMappedFileImpl *rhs) { std::swap(addr_, rhs->addr_); } -void MemoryMappedFileImpl::create_(const char *path, UInt64 size) { +void FileImpl::create_(const char *path, UInt64 size) { if ((path != NULL) && (path[0] != '\0')) { file_ = ::CreateFileA(path, GENERIC_READ | GENERIC_WRITE, FILE_SHARE_READ | FILE_SHARE_WRITE, @@ -150,7 +150,7 @@ void MemoryMappedFileImpl::create_(const char *path, UInt64 size) { size_ = static_cast< ::size_t>(size); } -void MemoryMappedFileImpl::open_(const char *path) { +void FileImpl::open_(const char *path) { struct __stat64 st; GRN_DAT_THROW_IF(IO_ERROR, ::_stat64(path, &st) == -1); GRN_DAT_THROW_IF(IO_ERROR, st.st_size == 0); @@ -174,7 +174,7 @@ void MemoryMappedFileImpl::open_(const char *path) { #else // WIN32 -void MemoryMappedFileImpl::swap(MemoryMappedFileImpl *rhs) { +void FileImpl::swap(FileImpl *rhs) { std::swap(ptr_, rhs->ptr_); std::swap(size_, rhs->size_); std::swap(fd_, rhs->fd_); @@ -182,7 +182,7 @@ void MemoryMappedFileImpl::swap(MemoryMappedFileImpl *rhs) { std::swap(length_, rhs->length_); } -void MemoryMappedFileImpl::create_(const char *path, UInt64 size) { +void FileImpl::create_(const char *path, UInt64 size) { GRN_DAT_THROW_IF(PARAM_ERROR, size > static_cast<UInt64>(std::numeric_limits< ::off_t>::max())); @@ -208,7 +208,7 @@ void MemoryMappedFileImpl::create_(const char *path, UInt64 size) { size_ = length_; } -void MemoryMappedFileImpl::open_(const char *path) { +void FileImpl::open_(const char *path) { struct stat st; GRN_DAT_THROW_IF(IO_ERROR, ::stat(path, &st) == -1); GRN_DAT_THROW_IF(IO_ERROR, (st.st_mode & S_IFMT) != S_IFREG); Renamed: lib/dat/file-impl.hpp (+9 -9) 78% =================================================================== --- lib/dat/memory-mapped-file-impl.hpp 2011-10-31 04:17:38 +0000 (430c366) +++ lib/dat/file-impl.hpp 2011-11-01 01:00:20 +0000 (53efbfe) @@ -15,8 +15,8 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#ifndef GRN_DAT_MEMORY_MAPPED_FILE_IMPL_HPP_ -#define GRN_DAT_MEMORY_MAPPED_FILE_IMPL_HPP_ +#ifndef GRN_DAT_FILE_IMPL_HPP_ +#define GRN_DAT_FILE_IMPL_HPP_ #ifdef WIN32 #include <Windows.h> @@ -27,10 +27,10 @@ namespace grn { namespace dat { -class MemoryMappedFileImpl { +class FileImpl { public: - MemoryMappedFileImpl(); - ~MemoryMappedFileImpl(); + FileImpl(); + ~FileImpl(); void create(const char *path, UInt64 size); void open(const char *path); @@ -43,7 +43,7 @@ class MemoryMappedFileImpl { return size_; } - void swap(MemoryMappedFileImpl *rhs); + void swap(FileImpl *rhs); private: void *ptr_; @@ -63,11 +63,11 @@ class MemoryMappedFileImpl { void open_(const char *path); // Disallows copy and assignment. - MemoryMappedFileImpl(const MemoryMappedFileImpl &); - MemoryMappedFileImpl &operator=(const MemoryMappedFileImpl &); + FileImpl(const FileImpl &); + FileImpl &operator=(const FileImpl &); }; } // namespace dat } // namespace grn -#endif // GRN_DAT_MEMORY_MAPPED_FILE_IMPL_HPP_ +#endif // GRN_DAT_FILE_IMPL_HPP_ Added: lib/dat/file.cpp (+67 -0) 100644 =================================================================== --- /dev/null +++ lib/dat/file.cpp 2011-11-01 01:00:20 +0000 (68eb518) @@ -0,0 +1,67 @@ +/* -*- c-basic-offset: 2 -*- */ +/* Copyright(C) 2011 Brazil + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License version 2.1 as published by the Free Software Foundation. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include "file.hpp" +#include "file-impl.hpp" + +#include <new> + +namespace grn { +namespace dat { + +File::File() : impl_(NULL) {} + +File::~File() { + delete impl_; +} + +void File::create(const char *path, UInt64 size) { + File new_file; + new_file.impl_ = new (std::nothrow) FileImpl; + GRN_DAT_THROW_IF(MEMORY_ERROR, new_file.impl_ == NULL); + new_file.impl_->create(path, size); + new_file.swap(this); +} + +void File::open(const char *path) { + File new_file; + new_file.impl_ = new (std::nothrow) FileImpl; + GRN_DAT_THROW_IF(MEMORY_ERROR, new_file.impl_ == NULL); + new_file.impl_->open(path); + new_file.swap(this); +} + +void File::close() { + File().swap(this); +} + +void *File::ptr() const { + return (impl_ != NULL) ? impl_->ptr() : NULL; +} + +UInt64 File::size() const { + return (impl_ != NULL) ? impl_->size() : 0; +} + +void File::swap(File *rhs) { + FileImpl * const temp = impl_; + impl_ = rhs->impl_; + rhs->impl_ = temp; +} + +} // namespace dat +} // namespace grn Renamed: lib/dat/file.hpp (+18 -11) 63% =================================================================== --- lib/dat/memory-mapped-file.hpp 2011-10-31 04:17:38 +0000 (c6dee8c) +++ lib/dat/file.hpp 2011-11-01 01:00:20 +0000 (91175c6) @@ -15,39 +15,46 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#ifndef GRN_DAT_MEMORY_MAPPED_FILE_HPP_ -#define GRN_DAT_MEMORY_MAPPED_FILE_HPP_ +#ifndef GRN_DAT_FILE_HPP_ +#define GRN_DAT_FILE_HPP_ #include "dat.hpp" namespace grn { namespace dat { -class MemoryMappedFileImpl; +// This implementation class hides environment dependent codes required for +// memory-mapped I/O. +class FileImpl; -class MemoryMappedFile { +class File { public: - MemoryMappedFile(); - ~MemoryMappedFile(); + File(); + ~File(); + // This function creates a file and maps the entire file to a certain range + // of the address space. Note that a file is truncated if exists. void create(const char *path, UInt64 size); + + // This function opens a file and maps the entire file to a certain range of + // the address space. void open(const char *path); void close(); void *ptr() const; UInt64 size() const; - void swap(MemoryMappedFile *rhs); + void swap(File *rhs); private: - MemoryMappedFileImpl *impl_; + FileImpl *impl_; // Disallows copy and assignment. - MemoryMappedFile(const MemoryMappedFile &); - MemoryMappedFile &operator=(const MemoryMappedFile &); + File(const File &); + File &operator=(const File &); }; } // namespace dat } // namespace grn -#endif // GRN_DAT_MEMORY_MAPPED_FILE_HPP_ +#endif // GRN_DAT_FILE_HPP_ Modified: lib/dat/header.hpp (+58 -12) =================================================================== --- lib/dat/header.hpp 2011-10-31 04:17:38 +0000 (ecf8c49) +++ lib/dat/header.hpp 2011-11-01 01:00:20 +0000 (e09fbce) @@ -15,8 +15,8 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#ifndef GRN_DAT_HPP_EADER_HPP_ -#define GRN_DAT_HPP_EADER_HPP_ +#ifndef GRN_DAT_HPP_HEADER_HPP_ +#define GRN_DAT_HPP_HEADER_HPP_ #include "dat.hpp" @@ -27,27 +27,41 @@ class Header { public: Header() : file_size_(0), + total_key_length_(0), + next_key_id_(grn::dat::MIN_KEY_ID), + max_key_id_(0), num_keys_(0), max_num_keys_(0), num_phantoms_(0), num_zombies_(0), num_blocks_(0), max_num_blocks_(0), + next_key_pos_(0), key_buf_size_(0), - entries_() { + leaders_(), + reserved_() { for (UInt32 i = 0; i <= MAX_BLOCK_LEVEL; ++i) { - entries_[i] = INVALID_ENTRY; + leaders_[i] = INVALID_LEADER; + } + for (UInt32 i = 0; i < (sizeof(reserved_) / sizeof(*reserved_)); ++i) { + reserved_[i] = 0; } } UInt64 file_size() const { return file_size_; } + UInt32 total_key_length() const { + return total_key_length_; + } UInt32 min_key_id() const { return MIN_KEY_ID; } + UInt32 next_key_id() const { + return next_key_id_; + } UInt32 max_key_id() const { - return num_keys(); + return max_key_id_; } UInt32 num_keys() const { return num_keys_; @@ -73,57 +87,89 @@ class Header { UInt32 max_num_blocks() const { return max_num_blocks_; } + UInt32 next_key_pos() const { + return next_key_pos_; + } UInt32 key_buf_size() const { return key_buf_size_; } - UInt32 ith_entry(UInt32 i) const { + UInt32 ith_leader(UInt32 i) const { GRN_DAT_DEBUG_THROW_IF(i > MAX_BLOCK_LEVEL); - return entries_[i]; + return leaders_[i]; } void set_file_size(UInt64 x) { + GRN_DAT_DEBUG_THROW_IF(x > MAX_FILE_SIZE); file_size_ = x; } + void set_total_key_length(UInt32 x) { + GRN_DAT_DEBUG_THROW_IF(x > MAX_TOTAL_KEY_LENGTH); + total_key_length_ = x; + } + void set_next_key_id(UInt32 x) { + GRN_DAT_DEBUG_THROW_IF((x - 1) > MAX_KEY_ID); + next_key_id_ = x; + } + void set_max_key_id(UInt32 x) { + GRN_DAT_DEBUG_THROW_IF(x > MAX_KEY_ID); + max_key_id_ = x; + } void set_num_keys(UInt32 x) { + GRN_DAT_DEBUG_THROW_IF(x > MAX_NUM_KEYS); num_keys_ = x; } void set_max_num_keys(UInt32 x) { + GRN_DAT_DEBUG_THROW_IF(x > MAX_NUM_KEYS); max_num_keys_ = x; } void set_num_phantoms(UInt32 x) { + GRN_DAT_DEBUG_THROW_IF(x > max_num_nodes()); num_phantoms_ = x; } void set_num_zombies(UInt32 x) { + GRN_DAT_DEBUG_THROW_IF(x > max_num_nodes()); num_zombies_ = x; } void set_num_blocks(UInt32 x) { + GRN_DAT_DEBUG_THROW_IF(x > max_num_blocks()); num_blocks_ = x; } void set_max_num_blocks(UInt32 x) { + GRN_DAT_DEBUG_THROW_IF(x > MAX_NUM_BLOCKS); max_num_blocks_ = x; } + void set_next_key_pos(UInt32 x) { + GRN_DAT_DEBUG_THROW_IF(x > key_buf_size()); + next_key_pos_ = x; + } void set_key_buf_size(UInt32 x) { + GRN_DAT_DEBUG_THROW_IF(x > MAX_KEY_BUF_SIZE); key_buf_size_ = x; } - void set_ith_entry(UInt32 i, UInt32 x) { + void set_ith_leader(UInt32 i, UInt32 x) { GRN_DAT_DEBUG_THROW_IF(i > MAX_BLOCK_LEVEL); - GRN_DAT_DEBUG_THROW_IF((x != INVALID_ENTRY) && (x >= num_blocks())); - entries_[i] = x; + GRN_DAT_DEBUG_THROW_IF((x != INVALID_LEADER) && (x >= num_blocks())); + leaders_[i] = x; } private: UInt64 file_size_; + UInt32 total_key_length_; + UInt32 next_key_id_; + UInt32 max_key_id_; UInt32 num_keys_; UInt32 max_num_keys_; UInt32 num_phantoms_; UInt32 num_zombies_; UInt32 num_blocks_; UInt32 max_num_blocks_; + UInt32 next_key_pos_; UInt32 key_buf_size_; - UInt32 entries_[MAX_BLOCK_LEVEL + 1]; + UInt32 leaders_[MAX_BLOCK_LEVEL + 1]; + UInt32 reserved_[13]; }; } // namespace dat } // namespace grn -#endif // GRN_DAT_HPP_EADER_HPP_ +#endif // GRN_DAT_HPP_HEADER_HPP_ Modified: lib/dat/id-cursor.cpp (+18 -16) =================================================================== --- lib/dat/id-cursor.cpp 2011-10-31 04:17:38 +0000 (3a5591a) +++ lib/dat/id-cursor.cpp 2011-11-01 01:00:20 +0000 (6218d79) @@ -42,18 +42,18 @@ void IdCursor::open(const Trie &trie, UInt32 flags) { UInt32 min_id = INVALID_KEY_ID; if (min_str.ptr() != NULL) { - Key key; + UInt32 key_pos; GRN_DAT_THROW_IF(PARAM_ERROR, - !trie.search(min_str.ptr(), min_str.length(), &key)); - min_id = key.id(); + !trie.search(min_str.ptr(), min_str.length(), &key_pos)); + min_id = trie.get_key(key_pos).id(); } UInt32 max_id = INVALID_KEY_ID; if (max_str.ptr() != NULL) { - Key key; + UInt32 key_pos; GRN_DAT_THROW_IF(PARAM_ERROR, - !trie.search(max_str.ptr(), max_str.length(), &key)); - max_id = key.id(); + !trie.search(max_str.ptr(), max_str.length(), &key_pos)); + max_id = trie.get_key(key_pos).id(); } open(trie, min_id, max_id, offset, limit, flags); @@ -77,17 +77,19 @@ void IdCursor::close() { new_cursor.swap(this); } -bool IdCursor::next(Key *key) { - if (cur_ == end_) { - return false; - } - trie_->ith_key(cur_, key); - if ((flags_ & ASCENDING_CURSOR) == ASCENDING_CURSOR) { - ++cur_; - } else { - --cur_; +const Key &IdCursor::next() { + while (cur_ != end_) { + const Key &key = trie_->ith_key(cur_); + if ((flags_ & ASCENDING_CURSOR) == ASCENDING_CURSOR) { + ++cur_; + } else { + --cur_; + } + if (key.is_valid()) { + return key; + } } - return true; + return Key::invalid_key(); } IdCursor::IdCursor(const Trie &trie, Modified: lib/dat/id-cursor.hpp (+1 -1) =================================================================== --- lib/dat/id-cursor.hpp 2011-10-31 04:17:38 +0000 (b43a961) +++ lib/dat/id-cursor.hpp 2011-11-01 01:00:20 +0000 (7e0a423) @@ -46,7 +46,7 @@ class IdCursor : public Cursor { void close(); - bool next(Key *key); + const Key &next(); UInt32 offset() const { return offset_; Modified: lib/dat/key-cursor.cpp (+44 -64) =================================================================== --- lib/dat/key-cursor.cpp 2011-10-31 04:17:38 +0000 (514497f) +++ lib/dat/key-cursor.cpp 2011-11-01 01:00:20 +0000 (9cbde85) @@ -65,15 +65,15 @@ void KeyCursor::close() { new_cursor.swap(this); } -bool KeyCursor::next(Key *key) { +const Key &KeyCursor::next() { if (finished_ || (count_ >= max_count_)) { - return false; + return Key::invalid_key(); } if ((flags_ & ASCENDING_CURSOR) == ASCENDING_CURSOR) { - return ascending_next(key); + return ascending_next(); } else { - return descending_next(key); + return descending_next(); } } @@ -139,7 +139,7 @@ void KeyCursor::ascending_init(const String &min_str, const String &max_str) { } if ((min_str.ptr() == NULL) || (min_str.length() == 0)) { - GRN_DAT_THROW_IF(MEMORY_ERROR, !buf_.push_back(ROOT_NODE_ID)); + buf_.push_back(ROOT_NODE_ID); return; } @@ -147,21 +147,18 @@ void KeyCursor::ascending_init(const String &min_str, const String &max_str) { Node node; for (UInt32 i = 0; i < min_str.length(); ++i) { node = trie_->ith_node(node_id); - if (node.is_terminal()) { - Key key; - trie_->ith_key(node.key_id(), &key); + if (node.is_linker()) { + const Key &key = trie_->get_key(node.key_pos()); const int result = key.str().compare(min_str, i); if ((result > 0) || ((result == 0) && ((flags_ & EXCEPT_LOWER_BOUND) != EXCEPT_LOWER_BOUND))) { - GRN_DAT_THROW_IF(MEMORY_ERROR, !buf_.push_back(node_id)); + buf_.push_back(node_id); } else if (node.sibling() != INVALID_LABEL) { - GRN_DAT_THROW_IF(MEMORY_ERROR, - !buf_.push_back(node_id ^ node.label() ^ node.sibling())); + buf_.push_back(node_id ^ node.label() ^ node.sibling()); } return; } else if (node.sibling() != INVALID_LABEL) { - GRN_DAT_THROW_IF(MEMORY_ERROR, - !buf_.push_back(node_id ^ node.label() ^ node.sibling())); + buf_.push_back(node_id ^ node.label() ^ node.sibling()); } node_id = node.offset() ^ min_str[i]; @@ -172,8 +169,7 @@ void KeyCursor::ascending_init(const String &min_str, const String &max_str) { } while (label != INVALID_LABEL) { if (label > min_str[i]) { - GRN_DAT_THROW_IF(MEMORY_ERROR, - !buf_.push_back(node.offset() ^ label)); + buf_.push_back(node.offset() ^ label); break; } label = trie_->ith_node(node.offset() ^ label).sibling(); @@ -183,20 +179,17 @@ void KeyCursor::ascending_init(const String &min_str, const String &max_str) { } node = trie_->ith_node(node_id); - if (node.is_terminal()) { - Key key; - trie_->ith_key(node.key_id(), &key); + if (node.is_linker()) { + const Key &key = trie_->get_key(node.key_pos()); if ((key.length() != min_str.length()) || ((flags_ & EXCEPT_LOWER_BOUND) != EXCEPT_LOWER_BOUND)) { - GRN_DAT_THROW_IF(MEMORY_ERROR, !buf_.push_back(node_id)); + buf_.push_back(node_id); } else if (node.sibling() != INVALID_LABEL) { - GRN_DAT_THROW_IF(MEMORY_ERROR, - !buf_.push_back(node_id ^ node.label() ^ node.sibling())); + buf_.push_back(node_id ^ node.label() ^ node.sibling()); } return; } else if (node.sibling() != INVALID_LABEL) { - GRN_DAT_THROW_IF(MEMORY_ERROR, - !buf_.push_back(node_id ^ node.label() ^ node.sibling())); + buf_.push_back(node_id ^ node.label() ^ node.sibling()); } UInt16 label = node.child(); @@ -205,7 +198,7 @@ void KeyCursor::ascending_init(const String &min_str, const String &max_str) { label = trie_->ith_node(node.offset() ^ label).sibling(); } if (label != INVALID_LABEL) { - GRN_DAT_THROW_IF(MEMORY_ERROR, !buf_.push_back(node.offset() ^ label)); + buf_.push_back(node.offset() ^ label); } } @@ -219,21 +212,19 @@ void KeyCursor::descending_init(const String &min_str, const String &max_str) { } if ((max_str.ptr() == NULL) || (max_str.length() == 0)) { - GRN_DAT_THROW_IF(MEMORY_ERROR, !buf_.push_back(ROOT_NODE_ID)); + buf_.push_back(ROOT_NODE_ID); return; } UInt32 node_id = ROOT_NODE_ID; for (UInt32 i = 0; i < max_str.length(); ++i) { const Base base = trie_->ith_node(node_id).base(); - if (base.is_terminal()) { - Key key; - trie_->ith_key(base.key_id(), &key); + if (base.is_linker()) { + const Key &key = trie_->get_key(base.key_pos()); const int result = key.str().compare(max_str, i); if ((result < 0) || ((result == 0) && ((flags_ & EXCEPT_UPPER_BOUND) != EXCEPT_UPPER_BOUND))) { - GRN_DAT_THROW_IF(MEMORY_ERROR, - !buf_.push_back(node_id | POST_ORDER_FLAG)); + buf_.push_back(node_id | POST_ORDER_FLAG); } return; } @@ -241,14 +232,13 @@ void KeyCursor::descending_init(const String &min_str, const String &max_str) { UInt32 label = trie_->ith_node(node_id).child(); if (label == TERMINAL_LABEL) { node_id = base.offset() ^ label; - GRN_DAT_THROW_IF(MEMORY_ERROR, - !buf_.push_back(node_id | POST_ORDER_FLAG)); + buf_.push_back(node_id | POST_ORDER_FLAG); label = trie_->ith_node(node_id).sibling(); } while (label != INVALID_LABEL) { node_id = base.offset() ^ label; if (label < max_str[i]) { - GRN_DAT_THROW_IF(MEMORY_ERROR, !buf_.push_back(node_id)); + buf_.push_back(node_id); } else if (label > max_str[i]) { return; } else { @@ -262,13 +252,11 @@ void KeyCursor::descending_init(const String &min_str, const String &max_str) { } const Base base = trie_->ith_node(node_id).base(); - if (base.is_terminal()) { - Key key; - trie_->ith_key(base.key_id(), &key); + if (base.is_linker()) { + const Key &key = trie_->get_key(base.key_pos()); if ((key.length() == max_str.length()) && ((flags_ & EXCEPT_UPPER_BOUND) != EXCEPT_UPPER_BOUND)) { - GRN_DAT_THROW_IF(MEMORY_ERROR, - !buf_.push_back(node_id | POST_ORDER_FLAG)); + buf_.push_back(node_id | POST_ORDER_FLAG); } return; } @@ -276,8 +264,7 @@ void KeyCursor::descending_init(const String &min_str, const String &max_str) { UInt16 label = trie_->ith_node(node_id).child(); if ((label == TERMINAL_LABEL) && ((flags_ & EXCEPT_UPPER_BOUND) != EXCEPT_UPPER_BOUND)) { - GRN_DAT_THROW_IF(MEMORY_ERROR, - !buf_.push_back((base.offset() ^ label) | POST_ORDER_FLAG)); + buf_.push_back((base.offset() ^ label) | POST_ORDER_FLAG); } } @@ -294,41 +281,37 @@ void KeyCursor::swap(KeyCursor *cursor) { end_str_.swap(&cursor->end_str_); } -bool KeyCursor::ascending_next(Key *key) { +const Key &KeyCursor::ascending_next() { while (!buf_.empty()) { const UInt32 node_id = buf_.back(); buf_.pop_back(); const Node node = trie_->ith_node(node_id); if (node.sibling() != INVALID_LABEL) { - GRN_DAT_THROW_IF(MEMORY_ERROR, - !buf_.push_back(node_id ^ node.label() ^ node.sibling())); + buf_.push_back(node_id ^ node.label() ^ node.sibling()); } - if (node.is_terminal()) { - Key temp_key; - trie_->ith_key(node.key_id(), &temp_key); + if (node.is_linker()) { + const Key &key = trie_->get_key(node.key_pos()); if (end_buf_ != NULL) { - const int result = temp_key.str().compare(end_str_); + const int result = key.str().compare(end_str_); if ((result > 0) || ((result == 0) && ((flags_ & EXCEPT_UPPER_BOUND) == EXCEPT_UPPER_BOUND))) { finished_ = true; - return false; + return Key::invalid_key(); } } if (count_++ >= offset_) { - *key = temp_key; - return true; + return key; } } else if (node.child() != INVALID_LABEL) { - GRN_DAT_THROW_IF(MEMORY_ERROR, - !buf_.push_back(node.offset() ^ node.child())); + buf_.push_back(node.offset() ^ node.child()); } } - return false; + return Key::invalid_key(); } -bool KeyCursor::descending_next(Key *key) { +const Key &KeyCursor::descending_next() { while (!buf_.empty()) { const bool post_order = (buf_.back() & POST_ORDER_FLAG) == POST_ORDER_FLAG; const UInt32 node_id = buf_.back() & ~POST_ORDER_FLAG; @@ -336,33 +319,30 @@ bool KeyCursor::descending_next(Key *key) { const Base base = trie_->ith_node(node_id).base(); if (post_order) { buf_.pop_back(); - if (base.is_terminal()) { - Key temp_key; - trie_->ith_key(base.key_id(), &temp_key); + if (base.is_linker()) { + const Key &key = trie_->get_key(base.key_pos()); if (end_buf_ != NULL) { - const int result = - temp_key.str().compare(end_str_); + const int result = key.str().compare(end_str_); if ((result < 0) || ((result == 0) && ((flags_ & EXCEPT_LOWER_BOUND) == EXCEPT_LOWER_BOUND))) { finished_ = true; - return false; + return Key::invalid_key(); } } if (count_++ >= offset_) { - trie_->ith_key(base.key_id(), key); - return true; + return key; } } } else { buf_.back() |= POST_ORDER_FLAG; UInt16 label = trie_->ith_node(node_id).child(); while (label != INVALID_LABEL) { - GRN_DAT_THROW_IF(MEMORY_ERROR, !buf_.push_back(base.offset() ^ label)); + buf_.push_back(base.offset() ^ label); label = trie_->ith_node(base.offset() ^ label).sibling(); } } } - return false; + return Key::invalid_key(); } } // namespace dat Modified: lib/dat/key-cursor.hpp (+3 -3) =================================================================== --- lib/dat/key-cursor.hpp 2011-10-31 04:17:38 +0000 (1a3a15b) +++ lib/dat/key-cursor.hpp 2011-11-01 01:00:20 +0000 (81ed6ef) @@ -40,7 +40,7 @@ class KeyCursor : public Cursor { void close(); - bool next(Key *key); + const Key &next(); UInt32 offset() const { return offset_; @@ -74,8 +74,8 @@ class KeyCursor : public Cursor { void descending_init(const String &min_str, const String &max_str); void swap(KeyCursor *cursor); - bool ascending_next(Key *key); - bool descending_next(Key *key); + const Key &ascending_next(); + const Key &descending_next(); static const UInt32 POST_ORDER_FLAG = 0x80000000U; Deleted: lib/dat/key-info.hpp (+0 -46) 100644 =================================================================== --- lib/dat/key-info.hpp 2011-10-31 04:17:38 +0000 (6a643aa) +++ /dev/null @@ -1,46 +0,0 @@ -/* -*- c-basic-offset: 2 -*- */ -/* Copyright(C) 2011 Brazil - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License version 2.1 as published by the Free Software Foundation. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -*/ - -#ifndef GRN_DAT_KEY_INFO_HPP_ -#define GRN_DAT_KEY_INFO_HPP_ - -#include "dat.hpp" - -namespace grn { -namespace dat { - -class KeyInfo { - public: - KeyInfo() - : offset_(0) {} - - UInt32 offset() const { - return offset_; - } - - void set_offset(UInt32 x) { - offset_ = x; - } - - private: - UInt32 offset_; -}; - -} // namespace dat -} // namespace grn - -#endif // GRN_DAT_KEY_INFO_HPP_ Modified: lib/dat/key.hpp (+60 -21) =================================================================== --- lib/dat/key.hpp 2011-10-31 04:17:38 +0000 (485f0ac) +++ lib/dat/key.hpp 2011-11-01 01:00:20 +0000 (f1c1aa0) @@ -25,44 +25,83 @@ namespace dat { class Key { public: - Key() - : str_(), - id_(INVALID_KEY_ID) {} - const UInt8 &operator[](UInt32 i) const { - GRN_DAT_DEBUG_THROW_IF(i >= str_.length()); - return str_[i]; + GRN_DAT_DEBUG_THROW_IF(i >= length()); + return buf_[i]; + } + + bool is_valid() const { + return id() != INVALID_KEY_ID; } - const String &str() const { - return str_; + String str() const { + return String(ptr(), length()); } + const void *ptr() const { - return str_.ptr(); + return buf_; } UInt32 length() const { - return str_.length(); + return (length_high_ << 4) | (id_and_length_low_ & 0x0F); } UInt32 id() const { - return id_; + return id_and_length_low_ >> 4; } - void set_str(const void *ptr, UInt32 length) { - str_.assign(ptr, length); + bool equals_to(const void *ptr, UInt32 length, UInt32 offset = 0) const { + if (length != this->length()) { + return false; + } + for ( ; offset < length; ++offset) { + if ((*this)[offset] != static_cast<const UInt8 *>(ptr)[offset]) { + return false; + } + } + return true; } - void set_ptr(const void *x) { - str_.set_ptr(x); + + // Creates an object of Key from given parameters. Then, the created object + // is embedded into a specified buffer. + static const Key &create(UInt32 *buf, UInt32 key_id, + const void *key_ptr, UInt32 key_length) { + GRN_DAT_DEBUG_THROW_IF(buf == NULL); + GRN_DAT_DEBUG_THROW_IF(key_id > MAX_KEY_ID); + GRN_DAT_DEBUG_THROW_IF((key_ptr == NULL) && (key_length != 0)); + GRN_DAT_DEBUG_THROW_IF(key_length > MAX_KEY_LENGTH); + + *buf = (key_id << 4) | (key_length & 0x0F); + UInt8 *ptr = reinterpret_cast<UInt8 *>(buf + 1); + *ptr++ = key_length >> 4; + for (UInt32 i = 0; i < key_length; ++i) { + ptr[i] = static_cast<const UInt8 *>(key_ptr)[i]; + } + return *reinterpret_cast<const Key *>(buf); } - void set_length(UInt32 x) { - str_.set_length(x); + + // Calculates how many UInt32s are required for a string. It is guaranteed + // that the estimated size is not less than the actual size. + static UInt32 estimate_size(UInt32 length) { + return 2 + (length / sizeof(UInt32)); } - void set_id(UInt32 x) { - id_ = x; + + // Returns a reference to an invalid key. + static const Key &invalid_key() { + static const UInt32 invalid_key_buf[2] = { INVALID_KEY_ID << 4, 0 }; + return *reinterpret_cast<const Key *>(invalid_key_buf); } private: - String str_; - UInt32 id_; + const UInt32 id_and_length_low_; + const UInt8 length_high_; + const UInt8 buf_[3]; + + // Disallows instantiation. + Key(); + ~Key(); + + // Disallows copy and assignment. + Key(const Key &); + Key &operator=(const Key &); }; } // namespace dat Deleted: lib/dat/memory-mapped-file.cpp (+0 -79) 100644 =================================================================== --- lib/dat/memory-mapped-file.cpp 2011-10-31 04:17:38 +0000 (ed5dc2a) +++ /dev/null @@ -1,79 +0,0 @@ -/* -*- c-basic-offset: 2 -*- */ -/* Copyright(C) 2011 Brazil - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License version 2.1 as published by the Free Software Foundation. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -*/ - -#include "memory-mapped-file.hpp" - -#include <new> - -#include "memory-mapped-file-impl.hpp" - -namespace grn { -namespace dat { - -MemoryMappedFile::MemoryMappedFile() : impl_(NULL) {} - -MemoryMappedFile::~MemoryMappedFile() { - delete impl_; -} - -void MemoryMappedFile::create(const char *path, UInt64 size) { - MemoryMappedFileImpl *new_impl = new (std::nothrow) MemoryMappedFileImpl; - GRN_DAT_THROW_IF(MEMORY_ERROR, new_impl == NULL); - try { - new_impl->create(path, size); - } catch (...) { - delete new_impl; - throw; - } - delete impl_; - impl_ = new_impl; -} - -void MemoryMappedFile::open(const char *path) { - MemoryMappedFileImpl *new_impl = new (std::nothrow) MemoryMappedFileImpl; - GRN_DAT_THROW_IF(MEMORY_ERROR, new_impl == NULL); - try { - new_impl->open(path); - } catch (...) { - delete new_impl; - throw; - } - delete impl_; - impl_ = new_impl; -} - -void MemoryMappedFile::close() { - delete impl_; - impl_ = NULL; -} - -void *MemoryMappedFile::ptr() const { - return (impl_ != NULL) ? impl_->ptr() : NULL; -} - -UInt64 MemoryMappedFile::size() const { - return (impl_ != NULL) ? impl_->size() : 0; -} - -void MemoryMappedFile::swap(MemoryMappedFile *rhs) { - MemoryMappedFileImpl * const temp = impl_; - impl_ = rhs->impl_; - rhs->impl_ = temp; -} - -} // namespace dat -} // namespace grn Modified: lib/dat/node.hpp (+6 -6) =================================================================== --- lib/dat/node.hpp 2011-10-31 04:17:38 +0000 (a23221d) +++ lib/dat/node.hpp 2011-11-01 01:00:20 +0000 (035f503) @@ -32,17 +32,17 @@ class Node { Base base() const { return base_; } - bool is_terminal() const { + bool is_linker() const { GRN_DAT_DEBUG_THROW_IF(is_phantom()); - return base_.is_terminal(); + return base_.is_linker(); } UInt32 offset() const { GRN_DAT_DEBUG_THROW_IF(is_phantom()); return base_.offset(); } - UInt32 key_id() const { + UInt32 key_pos() const { GRN_DAT_DEBUG_THROW_IF(is_phantom()); - return base_.key_id(); + return base_.key_pos(); } Check check() const { @@ -81,9 +81,9 @@ class Node { GRN_DAT_DEBUG_THROW_IF(is_phantom()); base_.set_offset(x); } - void set_key_id(UInt32 x) { + void set_key_pos(UInt32 x) { GRN_DAT_DEBUG_THROW_IF(is_phantom()); - base_.set_key_id(x); + base_.set_key_pos(x); } void set_check(Check x) { Modified: lib/dat/predictive-cursor.cpp (+23 -30) =================================================================== --- lib/dat/predictive-cursor.cpp 2011-10-31 04:17:38 +0000 (1a9a66d) +++ lib/dat/predictive-cursor.cpp 2011-11-01 01:00:20 +0000 (c2bb08b) @@ -55,15 +55,15 @@ void PredictiveCursor::close() { new_cursor.swap(this); } -bool PredictiveCursor::next(Key *key) { +const Key &PredictiveCursor::next() { if (cur_ == end_) { - return false; + return Key::invalid_key(); } if ((flags_ & ASCENDING_CURSOR) == ASCENDING_CURSOR) { - return ascending_next(key); + return ascending_next(); } else { - return descending_next(key); + return descending_next(); } } @@ -112,16 +112,15 @@ void PredictiveCursor::init(const String &str) { UInt32 node_id = ROOT_NODE_ID; for (UInt32 i = 0; i < str.length(); ++i) { const Base base = trie_->ith_node(node_id).base(); - if (base.is_terminal()) { + if (base.is_linker()) { if (offset_ == 0) { - Key key; - trie_->ith_key(base.key_id(), &key); + const Key &key = trie_->get_key(base.key_pos()); if ((key.length() >= str.length()) && (key.str().substr(0, str.length()).compare(str, i) == 0)) { if ((flags_ & ASCENDING_CURSOR) == ASCENDING_CURSOR) { node_id |= IS_ROOT_FLAG; } - GRN_DAT_THROW_IF(MEMORY_ERROR, !buf_.push_back(node_id)); + buf_.push_back(node_id); } } return; @@ -136,7 +135,7 @@ void PredictiveCursor::init(const String &str) { if ((flags_ & ASCENDING_CURSOR) == ASCENDING_CURSOR) { node_id |= IS_ROOT_FLAG; } - GRN_DAT_THROW_IF(MEMORY_ERROR, !buf_.push_back(node_id)); + buf_.push_back(node_id); } void PredictiveCursor::swap(PredictiveCursor *cursor) { @@ -150,7 +149,7 @@ void PredictiveCursor::swap(PredictiveCursor *cursor) { std::swap(min_length_, cursor->min_length_); } -bool PredictiveCursor::ascending_next(Key *key) { +const Key &PredictiveCursor::ascending_next() { while (!buf_.empty()) { const bool is_root = (buf_.back() & IS_ROOT_FLAG) == IS_ROOT_FLAG; const UInt32 node_id = buf_.back() & ~IS_ROOT_FLAG; @@ -158,28 +157,24 @@ bool PredictiveCursor::ascending_next(Key *key) { const Node node = trie_->ith_node(node_id); if (!is_root && (node.sibling() != INVALID_LABEL)) { - GRN_DAT_THROW_IF(MEMORY_ERROR, - !buf_.push_back(node_id ^ node.label() ^ node.sibling())); + buf_.push_back(node_id ^ node.label() ^ node.sibling()); } - if (node.is_terminal()) { - Key temp_key; - trie_->ith_key(node.key_id(), &temp_key); - if (temp_key.length() >= min_length_) { + if (node.is_linker()) { + const Key &key = trie_->get_key(node.key_pos()); + if (key.length() >= min_length_) { if (cur_++ >= offset_) { - *key = temp_key; - return true; + return key; } } } else if (node.child() != INVALID_LABEL) { - GRN_DAT_THROW_IF(MEMORY_ERROR, - !buf_.push_back(node.offset() ^ node.child())); + buf_.push_back(node.offset() ^ node.child()); } } - return false; + return Key::invalid_key(); } -bool PredictiveCursor::descending_next(Key *key) { +const Key &PredictiveCursor::descending_next() { while (!buf_.empty()) { const bool post_order = (buf_.back() & POST_ORDER_FLAG) == POST_ORDER_FLAG; const UInt32 node_id = buf_.back() & ~POST_ORDER_FLAG; @@ -187,13 +182,11 @@ bool PredictiveCursor::descending_next(Key *key) { const Base base = trie_->ith_node(node_id).base(); if (post_order) { buf_.pop_back(); - if (base.is_terminal()) { - Key temp_key; - trie_->ith_key(base.key_id(), &temp_key); - if (temp_key.length() >= min_length_) { + if (base.is_linker()) { + const Key &key = trie_->get_key(base.key_pos()); + if (key.length() >= min_length_) { if (cur_++ >= offset_) { - *key = temp_key; - return true; + return key; } } } @@ -201,12 +194,12 @@ bool PredictiveCursor::descending_next(Key *key) { buf_.back() |= POST_ORDER_FLAG; UInt16 label = trie_->ith_node(node_id).child(); while (label != INVALID_LABEL) { - GRN_DAT_THROW_IF(MEMORY_ERROR, !buf_.push_back(base.offset() ^ label)); + buf_.push_back(base.offset() ^ label); label = trie_->ith_node(base.offset() ^ label).sibling(); } } } - return false; + return Key::invalid_key(); } } // namespace dat Modified: lib/dat/predictive-cursor.hpp (+3 -3) =================================================================== --- lib/dat/predictive-cursor.hpp 2011-10-31 04:17:38 +0000 (0e36848) +++ lib/dat/predictive-cursor.hpp 2011-11-01 01:00:20 +0000 (7110404) @@ -39,7 +39,7 @@ class PredictiveCursor : public Cursor { void close(); - bool next(Key *key); + const Key &next(); UInt32 offset() const { return offset_; @@ -69,8 +69,8 @@ class PredictiveCursor : public Cursor { void init(const String &str); void swap(PredictiveCursor *cursor); - bool ascending_next(Key *key); - bool descending_next(Key *key); + const Key &ascending_next(); + const Key &descending_next(); static const UInt32 IS_ROOT_FLAG = 0x80000000U; static const UInt32 POST_ORDER_FLAG = 0x80000000U; Renamed: lib/dat/prefix-cursor.cpp (+40 -45) 62% =================================================================== --- lib/dat/common-prefix-cursor.cpp 2011-10-31 04:17:38 +0000 (8ed7993) +++ lib/dat/prefix-cursor.cpp 2011-11-01 01:00:20 +0000 (ab17d0c) @@ -15,7 +15,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#include "common-prefix-cursor.hpp" +#include "prefix-cursor.hpp" #include <algorithm> @@ -24,53 +24,50 @@ namespace grn { namespace dat { -CommonPrefixCursor::CommonPrefixCursor() +PrefixCursor::PrefixCursor() : trie_(NULL), offset_(0), limit_(UINT32_MAX), - flags_(COMMON_PREFIX_CURSOR), + flags_(PREFIX_CURSOR), buf_(), cur_(0), end_(0) {} -CommonPrefixCursor::~CommonPrefixCursor() {} +PrefixCursor::~PrefixCursor() {} -void CommonPrefixCursor::open(const Trie &trie, - const String &str, - UInt32 min_length, - UInt32 offset, - UInt32 limit, - UInt32 flags) { +void PrefixCursor::open(const Trie &trie, + const String &str, + UInt32 min_length, + UInt32 offset, + UInt32 limit, + UInt32 flags) { GRN_DAT_THROW_IF(PARAM_ERROR, (str.ptr() == NULL) && (str.length() != 0)); GRN_DAT_THROW_IF(PARAM_ERROR, min_length > str.length()); flags = fix_flags(flags); - CommonPrefixCursor new_cursor(trie, offset, limit, flags); + PrefixCursor new_cursor(trie, offset, limit, flags); new_cursor.init(str, min_length); new_cursor.swap(this); } -void CommonPrefixCursor::close() { - CommonPrefixCursor new_cursor; +void PrefixCursor::close() { + PrefixCursor new_cursor; new_cursor.swap(this); } -bool CommonPrefixCursor::next(Key *key) { +const Key &PrefixCursor::next() { if (cur_ == end_) { - return false; + return Key::invalid_key(); } if ((flags_ & ASCENDING_CURSOR) == ASCENDING_CURSOR) { - trie_->ith_key(buf_[cur_++], key); + return trie_->get_key(buf_[cur_++]); } else { - trie_->ith_key(buf_[--cur_], key); + return trie_->get_key(buf_[--cur_]); } - return true; } -CommonPrefixCursor::CommonPrefixCursor(const Trie &trie, - UInt32 offset, - UInt32 limit, - UInt32 flags) +PrefixCursor::PrefixCursor(const Trie &trie, + UInt32 offset, UInt32 limit, UInt32 flags) : trie_(&trie), offset_(offset), limit_(limit), @@ -79,11 +76,11 @@ CommonPrefixCursor::CommonPrefixCursor(const Trie &trie, cur_(0), end_(0) {} -UInt32 CommonPrefixCursor::fix_flags(UInt32 flags) const { +UInt32 PrefixCursor::fix_flags(UInt32 flags) const { const UInt32 cursor_type = flags & CURSOR_TYPE_MASK; GRN_DAT_THROW_IF(PARAM_ERROR, (cursor_type != 0) && - (cursor_type != COMMON_PREFIX_CURSOR)); - flags |= COMMON_PREFIX_CURSOR; + (cursor_type != PREFIX_CURSOR)); + flags |= PREFIX_CURSOR; const UInt32 cursor_order = flags & CURSOR_ORDER_MASK; GRN_DAT_THROW_IF(PARAM_ERROR, (cursor_order != 0) && @@ -99,8 +96,7 @@ UInt32 CommonPrefixCursor::fix_flags(UInt32 flags) const { return flags; } -void CommonPrefixCursor::init(const String &str, - UInt32 min_length) { +void PrefixCursor::init(const String &str, UInt32 min_length) { if ((limit_ == 0) || (offset_ > (str.length() - min_length))) { return; } @@ -109,23 +105,24 @@ void CommonPrefixCursor::init(const String &str, UInt32 i; for (i = 0; i < str.length(); ++i) { const Base base = trie_->ith_node(node_id).base(); - if (base.is_terminal()) { - Key key; - trie_->ith_key(base.key_id(), &key); + if (base.is_linker()) { + const Key &key = trie_->get_key(base.key_pos()); if ((key.length() >= min_length) && (key.length() <= str.length()) && (str.substr(0, key.length()).compare(key.str(), i) == 0) && ((key.length() < str.length()) || ((flags_ & EXCEPT_EXACT_MATCH) != EXCEPT_EXACT_MATCH))) { - GRN_DAT_THROW_IF(MEMORY_ERROR, !buf_.push_back(key.id())); + buf_.push_back(base.key_pos()); } break; } if ((i >= min_length) && (trie_->ith_node(node_id).child() == TERMINAL_LABEL)) { - const UInt32 terminal = base.offset() ^ TERMINAL_LABEL; - GRN_DAT_THROW_IF(MEMORY_ERROR, - !buf_.push_back(trie_->ith_node(terminal).key_id())); + const Base linker_base = + trie_->ith_node(base.offset() ^ TERMINAL_LABEL).base(); + if (linker_base.is_linker()) { + buf_.push_back(linker_base.key_pos()); + } } node_id = base.offset() ^ str[i]; @@ -135,20 +132,18 @@ void CommonPrefixCursor::init(const String &str, } if ((i == str.length()) && - (flags_ & EXCEPT_EXACT_MATCH) != EXCEPT_EXACT_MATCH) { + ((flags_ & EXCEPT_EXACT_MATCH) != EXCEPT_EXACT_MATCH)) { const Base base = trie_->ith_node(node_id).base(); - if (base.is_terminal()) { - Key key; - trie_->ith_key(base.key_id(), &key); + if (base.is_linker()) { + const Key &key = trie_->get_key(base.key_pos()); if ((key.length() >= min_length) && (key.length() <= str.length())) { - GRN_DAT_THROW_IF(MEMORY_ERROR, !buf_.push_back(key.id())); + buf_.push_back(base.key_pos()); } } else if (trie_->ith_node(node_id).child() == TERMINAL_LABEL) { - const UInt32 terminal = base.offset() ^ TERMINAL_LABEL; - Key key; - trie_->ith_key(trie_->ith_node(terminal).key_id(), &key); - if ((key.length() >= min_length) && (key.length() <= str.length())) { - GRN_DAT_THROW_IF(MEMORY_ERROR, !buf_.push_back(key.id())); + const Base linker_base = + trie_->ith_node(base.offset() ^ TERMINAL_LABEL).base(); + if (linker_base.is_linker()) { + buf_.push_back(linker_base.key_pos()); } } } @@ -166,7 +161,7 @@ void CommonPrefixCursor::init(const String &str, } } -void CommonPrefixCursor::swap(CommonPrefixCursor *cursor) { +void PrefixCursor::swap(PrefixCursor *cursor) { std::swap(trie_, cursor->trie_); std::swap(offset_, cursor->offset_); std::swap(limit_, cursor->limit_); Renamed: lib/dat/prefix-cursor.hpp (+13 -13) 73% =================================================================== --- lib/dat/common-prefix-cursor.hpp 2011-10-31 04:17:38 +0000 (89872b6) +++ lib/dat/prefix-cursor.hpp 2011-11-01 01:00:20 +0000 (630e5b0) @@ -15,8 +15,8 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#ifndef GRN_DAT_COMMON_PREFIX_CURSOR_HPP_ -#define GRN_DAT_COMMON_PREFIX_CURSOR_HPP_ +#ifndef GRN_DAT_PREFIX_CURSOR_HPP_ +#define GRN_DAT_PREFIX_CURSOR_HPP_ #include "cursor.hpp" #include "vector.hpp" @@ -26,21 +26,21 @@ namespace dat { class Trie; -class CommonPrefixCursor : public Cursor { +class PrefixCursor : public Cursor { public: - CommonPrefixCursor(); - ~CommonPrefixCursor(); + PrefixCursor(); + ~PrefixCursor(); void open(const Trie &trie, const String &str, - UInt32 min_length, + UInt32 min_length = 0, UInt32 offset = 0, UInt32 limit = UINT32_MAX, UInt32 flags = 0); void close(); - bool next(Key *key); + const Key &next(); UInt32 offset() const { return offset_; @@ -62,19 +62,19 @@ class CommonPrefixCursor : public Cursor { UInt32 cur_; UInt32 end_; - CommonPrefixCursor(const Trie &trie, - UInt32 offset, UInt32 limit, UInt32 flags); + PrefixCursor(const Trie &trie, + UInt32 offset, UInt32 limit, UInt32 flags); UInt32 fix_flags(UInt32 flags) const; void init(const String &str, UInt32 min_length); - void swap(CommonPrefixCursor *cursor); + void swap(PrefixCursor *cursor); // Disallows copy and assignment. - CommonPrefixCursor(const CommonPrefixCursor &); - CommonPrefixCursor &operator=(const CommonPrefixCursor &); + PrefixCursor(const PrefixCursor &); + PrefixCursor &operator=(const PrefixCursor &); }; } // namespace dat } // namespace grn -#endif // GRN_DAT_COMMON_PREFIX_CURSOR_HPP_ +#endif // GRN_DAT_PREFIX_CURSOR_HPP_ Modified: lib/dat/string.hpp (+4 -0) =================================================================== --- lib/dat/string.hpp 2011-10-31 04:17:38 +0000 (83500fb) +++ lib/dat/string.hpp 2011-11-01 01:00:20 +0000 (222f90b) @@ -31,6 +31,10 @@ class String { String(const void *ptr, UInt32 length) : ptr_(static_cast<const UInt8 *>(ptr)), length_(length) {} + template <UInt32 T> + explicit String(const char (&str)[T]) + : ptr_(reinterpret_cast<const UInt8 *>(str)), + length_(T - 1) {} String(const String &rhs) : ptr_(rhs.ptr_), length_(rhs.length_) {} Deleted: lib/dat/timer.hpp (+0 -58) 100644 =================================================================== --- lib/dat/timer.hpp 2011-10-31 04:17:38 +0000 (f244806) +++ /dev/null @@ -1,58 +0,0 @@ -/* -*- c-basic-offset: 2 -*- */ -/* Copyright(C) 2011 Brazil - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License version 2.1 as published by the Free Software Foundation. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -*/ - -#ifndef GRN_DAT_TIMER_HPP_ -#define GRN_DAT_TIMER_HPP_ - -#include "dat.hpp" - -#include <sys/time.h> - -namespace grn { -namespace dat { - -class Timer { - public: - Timer() - : base_(get_time()) {} - - double elapsed() const { - return get_time() - base_; - } - - void reset() { - base_ = get_time(); - } - - private: - double base_; - - static double get_time() { - struct timeval tv; - ::gettimeofday(&tv, NULL); - return tv.tv_sec + (tv.tv_usec * 0.000001); - } - - // Disallows copy and assignment. - Timer(const Timer &); - Timer &operator=(const Timer &); -}; - -} // namespace dat -} // namespace grn - -#endif // GRN_DAT_TIMER_HPP_ Modified: lib/dat/trie.cpp (+291 -263) =================================================================== --- lib/dat/trie.cpp 2011-10-31 04:17:38 +0000 (46f5ce5) +++ lib/dat/trie.cpp 2011-11-01 01:00:20 +0000 (b592346) @@ -24,12 +24,12 @@ namespace grn { namespace dat { Trie::Trie() - : memory_mapped_file_(), + : file_(), header_(NULL), - nodes_(NULL), - blocks_(NULL), - key_infos_(NULL), - key_buf_(NULL) {} + nodes_(), + blocks_(), + entries_(), + key_buf_() {} Trie::~Trie() {} @@ -54,6 +54,9 @@ void Trie::create(const char *file_name, if (max_num_keys == 0) { if (file_size == 0) { file_size = DEFAULT_FILE_SIZE; + } else { + GRN_DAT_THROW_IF(PARAM_ERROR, file_size < MIN_FILE_SIZE); + GRN_DAT_THROW_IF(PARAM_ERROR, file_size > MAX_FILE_SIZE); } } else { GRN_DAT_THROW_IF(PARAM_ERROR, max_num_keys > MAX_NUM_KEYS); @@ -96,18 +99,19 @@ void Trie::create(const Trie &trie, if (file_size == 0) { file_size = trie.file_size(); } + GRN_DAT_THROW_IF(PARAM_ERROR, file_size < MIN_FILE_SIZE); + GRN_DAT_THROW_IF(PARAM_ERROR, file_size > MAX_FILE_SIZE); GRN_DAT_THROW_IF(PARAM_ERROR, file_size < trie.virtual_size()); } else { GRN_DAT_THROW_IF(PARAM_ERROR, max_num_keys < trie.num_keys()); - GRN_DAT_THROW_IF(PARAM_ERROR, max_num_keys > MAX_KEY_ID); + GRN_DAT_THROW_IF(PARAM_ERROR, max_num_keys < trie.max_key_id()); + GRN_DAT_THROW_IF(PARAM_ERROR, max_num_keys > MAX_NUM_KEYS); } Trie new_trie; new_trie.create_file(file_name, file_size, max_num_keys, num_nodes_per_key, average_key_length); - if (trie.num_keys() != 0) { - new_trie.build_from_trie(trie); - } + new_trie.build_from_trie(trie); new_trie.swap(this); } @@ -120,17 +124,16 @@ void Trie::open(const char *file_name) { } void Trie::close() { - Trie new_trie; - new_trie.swap(this); + Trie().swap(this); } void Trie::swap(Trie *trie) { - memory_mapped_file_.swap(&trie->memory_mapped_file_); + file_.swap(&trie->file_); std::swap(header_, trie->header_); - std::swap(nodes_, trie->nodes_); - std::swap(blocks_, trie->blocks_); - std::swap(key_infos_, trie->key_infos_); - std::swap(key_buf_, trie->key_buf_); + nodes_.swap(&trie->nodes_); + blocks_.swap(&trie->blocks_); + entries_.swap(&trie->entries_); + key_buf_.swap(&trie->key_buf_); } void Trie::create_file(const char *file_name, @@ -140,45 +143,56 @@ void Trie::create_file(const char *file_name, double average_key_length) { GRN_DAT_THROW_IF(PARAM_ERROR, (file_size == 0) && (max_num_keys == 0)); GRN_DAT_THROW_IF(PARAM_ERROR, (file_size != 0) && (max_num_keys != 0)); - - UInt32 max_num_blocks; - UInt32 key_buf_size; if (max_num_keys == 0) { const UInt64 avail = file_size - sizeof(Header); const double num_bytes_per_key = (sizeof(Node) * num_nodes_per_key) + (1.0 * sizeof(Block) / BLOCK_SIZE * num_nodes_per_key) - + sizeof(KeyInfo) + average_key_length; - GRN_DAT_THROW_IF(PARAM_ERROR, (avail / num_bytes_per_key) > MAX_NUM_KEYS); - max_num_keys = (UInt32)(avail / num_bytes_per_key); + + sizeof(Entry) + + sizeof(UInt32) + sizeof(UInt8) + average_key_length + 1.5; + if ((avail / num_bytes_per_key) > MAX_NUM_KEYS) { + max_num_keys = MAX_NUM_KEYS; + } else { + max_num_keys = (UInt32)(avail / num_bytes_per_key); + } GRN_DAT_THROW_IF(PARAM_ERROR, max_num_keys == 0); } + UInt32 max_num_blocks; { const double max_num_nodes = num_nodes_per_key * max_num_keys; - GRN_DAT_THROW_IF(PARAM_ERROR, max_num_nodes > MAX_NUM_NODES); + GRN_DAT_THROW_IF(PARAM_ERROR, (max_num_nodes - 1.0) >= MAX_NUM_NODES); max_num_blocks = ((UInt32)max_num_nodes + BLOCK_SIZE - 1) / BLOCK_SIZE; GRN_DAT_THROW_IF(PARAM_ERROR, max_num_blocks == 0); GRN_DAT_THROW_IF(PARAM_ERROR, max_num_blocks > MAX_NUM_BLOCKS); } + UInt32 key_buf_size; if (file_size == 0) { const double total_key_length = average_key_length * max_num_keys; - GRN_DAT_THROW_IF(PARAM_ERROR, total_key_length > MAX_KEY_BUF_SIZE); - key_buf_size = (UInt32)total_key_length; + GRN_DAT_THROW_IF(PARAM_ERROR, + (total_key_length - 1.0) >= MAX_TOTAL_KEY_LENGTH); + + // The last term is the estimated number of bytes that will be used for + // 32-bit alignment. + const UInt64 total_num_bytes = total_key_length + + (UInt64)(sizeof(UInt32) + sizeof(UInt8)) * max_num_keys + + (UInt32)(max_num_keys * 1.5); + GRN_DAT_THROW_IF(PARAM_ERROR, + (total_num_bytes / sizeof(UInt32)) >= MAX_KEY_BUF_SIZE); + key_buf_size = (UInt32)(total_num_bytes / sizeof(UInt32)); file_size = sizeof(Header) + (sizeof(Block) * max_num_blocks) + (sizeof(Node) * BLOCK_SIZE * max_num_blocks) - + (sizeof(KeyInfo) * (max_num_keys + 1)) - + key_buf_size; + + (sizeof(Entry) * max_num_keys) + + (sizeof(UInt32) * key_buf_size); } else { const UInt64 avail = file_size - sizeof(Header) - (sizeof(Block) * max_num_blocks) - (sizeof(Node) * BLOCK_SIZE * max_num_blocks) - - (sizeof(KeyInfo) * (max_num_keys + 1)); - GRN_DAT_THROW_IF(PARAM_ERROR, avail == 0); - GRN_DAT_THROW_IF(PARAM_ERROR, avail > MAX_KEY_BUF_SIZE); - key_buf_size = (UInt32)avail; + - (sizeof(Entry) * max_num_keys); + GRN_DAT_THROW_IF(PARAM_ERROR, (avail / sizeof(UInt32)) > MAX_KEY_BUF_SIZE); + key_buf_size = (UInt32)(avail / sizeof(UInt32)); } create_file(file_name, file_size, max_num_keys, @@ -190,65 +204,73 @@ void Trie::create_file(const char *file_name, UInt32 max_num_keys, UInt32 max_num_blocks, UInt32 key_buf_size) { - GRN_DAT_THROW_IF(PARAM_ERROR, file_size != (sizeof(Header) + GRN_DAT_THROW_IF(PARAM_ERROR, file_size < (sizeof(Header) + (sizeof(Block) * max_num_blocks) + (sizeof(Node) * BLOCK_SIZE * max_num_blocks) - + (sizeof(KeyInfo) * (max_num_keys + 1)) - + key_buf_size)); + + (sizeof(Entry) * max_num_keys) + + (sizeof(UInt32) * key_buf_size))); - memory_mapped_file_.create(file_name, file_size); + file_.create(file_name, file_size); - Header * const header = static_cast<Header *>(memory_mapped_file_.ptr()); + Header * const header = static_cast<Header *>(file_.ptr()); *header = Header(); header->set_file_size(file_size); header->set_max_num_keys(max_num_keys); header->set_max_num_blocks(max_num_blocks); header->set_key_buf_size(key_buf_size); - map_address(memory_mapped_file_.ptr()); + map_address(file_.ptr()); reserve_node(ROOT_NODE_ID); ith_node(INVALID_OFFSET).set_is_offset(true); - ith_key_info(MIN_KEY_ID).set_offset(0); } void Trie::open_file(const char *file_name) { GRN_DAT_THROW_IF(PARAM_ERROR, file_name == NULL); - memory_mapped_file_.open(file_name); - map_address(memory_mapped_file_.ptr()); + file_.open(file_name); + map_address(file_.ptr()); + GRN_DAT_THROW_IF(FORMAT_ERROR, file_size() != file_.size()); } void Trie::map_address(void *address) { GRN_DAT_THROW_IF(PARAM_ERROR, address == NULL); header_ = static_cast<Header *>(address); - nodes_ = static_cast<Node *>(static_cast<void *>(header_ + 1)); - blocks_ = static_cast<Block *>(static_cast<void *>( - nodes_ + max_num_nodes())); - key_infos_ = static_cast<KeyInfo *>(static_cast<void *>( - blocks_ + max_num_blocks())); - key_buf_ = static_cast<char *>(static_cast<void *>( - key_infos_ + (max_num_keys() + 1))); + nodes_.assign(header_ + 1, max_num_nodes()); + blocks_.assign(nodes_.end(), max_num_blocks()); + entries_.assign(reinterpret_cast<Entry *>(blocks_.end()) - 1, + max_num_keys() + 1); + key_buf_.assign(entries_.end(), key_buf_size()); + + GRN_DAT_THROW_IF(UNEXPECTED_ERROR, static_cast<void *>(key_buf_.end()) + > static_cast<void *>(static_cast<char *>(address) + file_size())); } void Trie::build_from_trie(const Trie &trie) { - GRN_DAT_DEBUG_THROW_IF(trie.num_keys() == 0); + GRN_DAT_THROW_IF(SIZE_ERROR, max_num_keys() < trie.num_keys()); + GRN_DAT_THROW_IF(SIZE_ERROR, max_num_keys() < trie.max_key_id()); + header_->set_total_key_length(trie.total_key_length()); header_->set_num_keys(trie.num_keys()); - std::memcpy(key_infos_, trie.key_infos_, - sizeof(KeyInfo) * (num_keys() + 1)); - std::memcpy(key_buf_, trie.key_buf_, - ith_key_info(max_key_id() + 1).offset()); + header_->set_max_key_id(trie.max_key_id()); + header_->set_next_key_id(trie.next_key_id()); + for (UInt32 i = min_key_id(); i <= max_key_id(); ++i) { + ith_entry(i) = trie.ith_entry(i); + } build_from_trie(trie, ROOT_NODE_ID, ROOT_NODE_ID); } -void Trie::build_from_trie(const Trie &trie, - UInt32 src, - UInt32 dest) { - ith_node(dest).set_except_is_offset(trie.ith_node(src).except_is_offset()); - if (trie.ith_node(src).is_terminal()) { - ith_node(dest).set_key_id(trie.ith_node(src).key_id()); +void Trie::build_from_trie(const Trie &trie, UInt32 src, UInt32 dest) { + // Keys are sorted in lexicographic order. + if (trie.ith_node(src).is_linker()) { + const Key &key = trie.get_key(trie.ith_node(src).key_pos()); + Key::create(key_buf_.ptr() + next_key_pos(), + key.id(), key.ptr(), key.length()); + ith_node(dest).set_key_pos(next_key_pos()); + ith_entry(key.id()).set_key_pos(next_key_pos()); + header_->set_next_key_pos( + next_key_pos() + Key::estimate_size(key.length())); return; } @@ -259,233 +281,237 @@ void Trie::build_from_trie(const Trie &trie, UInt32 num_labels = 0; UInt32 label = trie.ith_node(src).child(); - GRN_DAT_DEBUG_THROW_IF(label == INVALID_LABEL); while (label != INVALID_LABEL) { GRN_DAT_DEBUG_THROW_IF(label > MAX_LABEL); - labels[num_labels++] = label; - label = trie.ith_node(src_offset ^ label).sibling(); + const UInt32 child = src_offset ^ label; + if (trie.ith_node(child).is_linker() || + (trie.ith_node(child).child() != INVALID_LABEL)) { + labels[num_labels++] = label; + } + label = trie.ith_node(child).sibling(); + } + if (num_labels == 0) { + return; } - GRN_DAT_DEBUG_THROW_IF(num_labels == 0); dest_offset = find_offset(labels, num_labels); for (UInt32 i = 0; i < num_labels; ++i) { - reserve_node(dest_offset ^ labels[i]); + const UInt32 child = dest_offset ^ labels[i]; + reserve_node(child); + ith_node(child).set_label(labels[i]); + if ((i + 1) < num_labels) { + ith_node(child).set_sibling(labels[i + 1]); + } } GRN_DAT_DEBUG_THROW_IF(ith_node(dest_offset).is_offset()); ith_node(dest_offset).set_is_offset(true); ith_node(dest).set_offset(dest_offset); + ith_node(dest).set_child(labels[0]); } - UInt32 label = trie.ith_node(src).child(); + UInt32 label = ith_node(dest).child(); while (label != INVALID_LABEL) { build_from_trie(trie, src_offset ^ label, dest_offset ^ label); - label = trie.ith_node(src_offset ^ label).sibling(); + label = ith_node(dest_offset ^ label).sibling(); } } -bool Trie::search_from_root(const UInt8 *ptr, - UInt32 length, - Key *key) const { +bool Trie::search_key(const UInt8 *ptr, UInt32 length, UInt32 *key_pos) const { GRN_DAT_DEBUG_THROW_IF((ptr == NULL) && (length != 0)); UInt32 node_id = ROOT_NODE_ID; - for (UInt32 i = 0; i < length; ++i) { - const Base base = ith_node(node_id).base(); - if (base.is_terminal()) { - return search_from_terminal(ptr, length, key, base.key_id(), i); - } - - node_id = base.offset() ^ ptr[i]; - if (ith_node(node_id).label() != ptr[i]) { - return false; - } - } - - const Base base = ith_node(node_id).base(); - if (base.is_terminal()) { - return search_from_terminal(ptr, length, key, base.key_id(), length); - } - - node_id = base.offset() ^ TERMINAL_LABEL; - if (ith_node(node_id).label() != TERMINAL_LABEL) { + UInt32 query_pos = 0; + if (!search_linker(ptr, length, node_id, query_pos)) { return false; } - GRN_DAT_DEBUG_THROW_IF(!ith_node(node_id).is_terminal()); - - if (key != NULL) { - ith_key(ith_node(node_id).key_id(), key); - } - return true; -} - -bool Trie::search_from_terminal(const UInt8 *ptr, - UInt32 length, - Key *key, - UInt32 key_id, - UInt32 i) const { - GRN_DAT_DEBUG_THROW_IF(i > length); - GRN_DAT_DEBUG_THROW_IF(key_id < min_key_id()); - GRN_DAT_DEBUG_THROW_IF(key_id > max_key_id()); - const UInt32 key_length = ith_key_info(key_id + 1).offset() - - ith_key_info(key_id).offset(); - if (key_length != length) { + const Base base = ith_node(node_id).base(); + if (!base.is_linker()) { return false; } - const char * const key_ptr = key_buf_ + ith_key_info(key_id).offset(); - for ( ; i < length; ++i) { - if (ptr[i] != (UInt8)key_ptr[i]) { - return false; + if (get_key(base.key_pos()).equals_to(ptr, length, query_pos)) { + if (key_pos != NULL) { + *key_pos = base.key_pos(); } + return true; } - - if (key != NULL) { - key->set_ptr(key_ptr); - key->set_length(key_length); - key->set_id(key_id); - } - return true; + return false; } -bool Trie::insert_from_root(const UInt8 *ptr, - UInt32 length, - Key *key) { +bool Trie::search_linker(const UInt8 *ptr, UInt32 length, + UInt32 &node_id, UInt32 &query_pos) const { GRN_DAT_DEBUG_THROW_IF((ptr == NULL) && (length != 0)); - UInt32 node_id = ROOT_NODE_ID; - for (UInt32 i = 0; i < length; ++i) { + for ( ; query_pos < length; ++query_pos) { const Base base = ith_node(node_id).base(); - if (base.is_terminal()) { - return insert_from_terminal(ptr, length, key, node_id, i); + if (base.is_linker()) { + return true; } - const UInt32 next = base.offset() ^ ptr[i]; - if (ith_node(next).label() != ptr[i]) { - return insert_from_nonterminal(ptr, length, key, node_id, i); + const UInt32 next = base.offset() ^ ptr[query_pos]; + if (ith_node(next).label() != ptr[query_pos]) { + return false; } node_id = next; } const Base base = ith_node(node_id).base(); - if (base.is_terminal()) { - return insert_from_terminal(ptr, length, key, node_id, length); + if (base.is_linker()) { + return true; } const UInt32 next = base.offset() ^ TERMINAL_LABEL; if (ith_node(next).label() != TERMINAL_LABEL) { - return insert_from_nonterminal(ptr, length, key, node_id, length); - } - GRN_DAT_DEBUG_THROW_IF(!ith_node(next).is_terminal()); - - if (key != NULL) { - ith_key(ith_node(next).key_id(), key); + return false; } - return false; + node_id = next; + return ith_node(next).is_linker(); } -bool Trie::insert_from_terminal(const UInt8 *ptr, - UInt32 length, - Key *key, - UInt32 node_id, - UInt32 i) { - GRN_DAT_DEBUG_THROW_IF(node_id >= num_nodes()); - GRN_DAT_DEBUG_THROW_IF(!ith_node(node_id).is_terminal()); - GRN_DAT_DEBUG_THROW_IF(i > length); - - UInt32 key_id = ith_node(node_id).key_id(); - const char *key_ptr = key_buf_ + ith_key_info(key_id).offset(); - UInt32 key_length = ith_key_info(key_id + 1).offset() - - ith_key_info(key_id).offset(); +bool Trie::remove_key(const UInt8 *ptr, UInt32 length) { + GRN_DAT_DEBUG_THROW_IF((ptr == NULL) && (length != 0)); - UInt32 j = i; - while ((j < length) && (j < key_length)) { - if (ptr[j] != (UInt8)key_ptr[j]) { - break; - } - ++j; + UInt32 node_id = ROOT_NODE_ID; + UInt32 query_pos = 0; + if (!search_linker(ptr, length, node_id, query_pos)) { + return false; } - if ((j == length) && (j == key_length)) { - if (key != NULL) { - key->set_ptr(key_ptr); - key->set_length(key_length); - key->set_id(key_id); - } + const UInt32 key_pos = ith_node(node_id).key_pos(); + if (!get_key(key_pos).equals_to(ptr, length, query_pos)) { return false; } - GRN_DAT_THROW_IF(SIZE_ERROR, num_keys() >= max_num_keys()); - key_id = max_key_id() + 1; + const UInt32 key_id = get_key(key_pos).id(); + ith_node(node_id).set_offset(INVALID_OFFSET); + ith_entry(key_id).set_next(next_key_id()); + + header_->set_next_key_id(key_id); + header_->set_total_key_length(total_key_length() - length); + header_->set_num_keys(num_keys() - 1); + return true; +} + +bool Trie::insert_key(const UInt8 *ptr, UInt32 length, UInt32 *key_pos) { + GRN_DAT_DEBUG_THROW_IF((ptr == NULL) && (length != 0)); - const UInt32 key_offset = ith_key_info(key_id).offset(); - GRN_DAT_THROW_IF(SIZE_ERROR, length > (key_buf_size() - key_offset)); + UInt32 node_id = ROOT_NODE_ID; + UInt32 query_pos = 0; - for (UInt32 k = i; k < j; ++k) { - node_id = insert_node(node_id, ptr[k]); + search_linker(ptr, length, node_id, query_pos); + if (!insert_linker(ptr, length, node_id, query_pos)) { + if (key_pos != NULL) { + *key_pos = ith_node(node_id).key_pos(); + } + return false; } - node_id = separate(ptr, length, node_id, j); - ith_key_info(key_id + 1).set_offset(key_offset + length); - std::memcpy(key_buf_ + key_offset, ptr, length); - header_->set_num_keys(num_keys() + 1); + const UInt32 new_key_id = next_key_id(); + const UInt32 new_key_pos = append_key(ptr, length, new_key_id); - ith_node(node_id).set_key_id(key_id); + header_->set_total_key_length(total_key_length() + length); + header_->set_num_keys(num_keys() + 1); + if (new_key_id > max_key_id()) { + header_->set_max_key_id(new_key_id); + header_->set_next_key_id(new_key_id + 1); + } else { + header_->set_next_key_id(ith_entry(new_key_id).next()); + } - if (key != NULL) { - key->set_ptr(key_buf_ + key_offset); - key->set_length(length); - key->set_id(key_id); + ith_entry(new_key_id).set_key_pos(new_key_pos); + ith_node(node_id).set_key_pos(new_key_pos); + if (key_pos != NULL) { + *key_pos = new_key_pos; } return true; } -bool Trie::insert_from_nonterminal(const UInt8 *ptr, - UInt32 length, - Key *key, - UInt32 node_id, - UInt32 i) { - GRN_DAT_DEBUG_THROW_IF(node_id >= num_nodes()); - GRN_DAT_DEBUG_THROW_IF(ith_node(node_id).is_terminal()); - GRN_DAT_DEBUG_THROW_IF(i > length); +bool Trie::insert_linker(const UInt8 *ptr, UInt32 length, + UInt32 &node_id, UInt32 query_pos) { + if (ith_node(node_id).is_linker()) { + const Key &key = get_key(ith_node(node_id).key_pos()); + UInt32 i = query_pos; + while ((i < length) && (i < key.length())) { + if (ptr[i] != key[i]) { + break; + } + ++i; + } + if ((i == length) && (i == key.length())) { + return false; + } + GRN_DAT_THROW_IF(SIZE_ERROR, num_keys() >= max_num_keys()); + GRN_DAT_DEBUG_THROW_IF(next_key_id() > max_num_keys()); - GRN_DAT_THROW_IF(SIZE_ERROR, num_keys() >= max_num_keys()); - const UInt32 key_id = max_key_id() + 1; + for (UInt32 j = query_pos; j < i; ++j) { + node_id = insert_node(node_id, ptr[j]); + } + node_id = separate(ptr, length, node_id, i); + return true; + } else if (ith_node(node_id).label() == TERMINAL_LABEL) { + return true; + } else { + GRN_DAT_THROW_IF(SIZE_ERROR, num_keys() >= max_num_keys()); + const UInt16 label = (query_pos < length) ? + (UInt16)ptr[query_pos] : (UInt16)TERMINAL_LABEL; + const Base base = ith_node(node_id).base(); + if ((base.offset() == INVALID_OFFSET) || + !ith_node(base.offset() ^ label).is_phantom()) { + resolve(node_id, label); + } + node_id = insert_node(node_id, label); + return true; + } +} - const UInt32 key_offset = ith_key_info(key_id).offset(); - GRN_DAT_THROW_IF(SIZE_ERROR, length > (key_buf_size() - key_offset)); +bool Trie::update_key(UInt32 key_id, const UInt8 *ptr, UInt32 length, + UInt32 *key_pos) { + GRN_DAT_DEBUG_THROW_IF((ptr == NULL) && (length != 0)); - const UInt16 label = (i < length) ? (UInt16)ptr[i] : (UInt16)TERMINAL_LABEL; - const Base base = ith_node(node_id).base(); - if ((base.offset() == INVALID_OFFSET) || - !ith_node(base.offset() ^ label).is_phantom()) { - resolve(node_id, label); + if (!ith_entry(key_id).is_valid()) { + return false; } - node_id = insert_node(node_id, label); - ith_key_info(key_id + 1).set_offset(key_offset + length); - std::memcpy(key_buf_ + key_offset, ptr, length); - header_->set_num_keys(num_keys() + 1); + UInt32 node_id = ROOT_NODE_ID; + UInt32 query_pos = 0; - ith_node(node_id).set_key_id(key_id); + search_linker(ptr, length, node_id, query_pos); + if (!insert_linker(ptr, length, node_id, query_pos)) { + if (key_pos != NULL) { + *key_pos = ith_node(node_id).key_pos(); + } + return false; + } + + const Key &key = ith_key(key_id); + const UInt32 new_key_pos = append_key(ptr, length, key_id); + + header_->set_total_key_length(total_key_length() + length - key.length()); - if (key != NULL) { - key->set_ptr(key_buf_ + key_offset); - key->set_length(length); - key->set_id(key_id); + ith_entry(key_id).set_key_pos(new_key_pos); + ith_node(node_id).set_key_pos(new_key_pos); + if (key_pos != NULL) { + *key_pos = new_key_pos; } + + node_id = ROOT_NODE_ID; + query_pos = 0; + GRN_DAT_THROW_IF(UNEXPECTED_ERROR, + !search_linker(static_cast<const UInt8 *>(key.ptr()), key.length(), + node_id, query_pos)); + ith_node(node_id).set_offset(INVALID_OFFSET); return true; } -UInt32 Trie::insert_node(UInt32 node_id, - UInt16 label) { +UInt32 Trie::insert_node(UInt32 node_id, UInt16 label) { GRN_DAT_DEBUG_THROW_IF(node_id >= num_nodes()); GRN_DAT_DEBUG_THROW_IF(label > MAX_LABEL); const Base base = ith_node(node_id).base(); UInt32 offset; - if (base.is_terminal() || (base.offset() == INVALID_OFFSET)) { + if (base.is_linker() || (base.offset() == INVALID_OFFSET)) { offset = find_offset(&label, 1); } else { offset = base.offset(); @@ -495,10 +521,10 @@ UInt32 Trie::insert_node(UInt32 node_id, reserve_node(next); ith_node(next).set_label(label); - if (base.is_terminal()) { + if (base.is_linker()) { GRN_DAT_DEBUG_THROW_IF(ith_node(offset).is_offset()); ith_node(offset).set_is_offset(true); - ith_node(next).set_key_id(base.key_id()); + ith_node(next).set_key_pos(base.key_pos()); } else if (base.offset() == INVALID_OFFSET) { GRN_DAT_DEBUG_THROW_IF(ith_node(offset).is_offset()); ith_node(offset).set_is_offset(true); @@ -533,33 +559,41 @@ UInt32 Trie::insert_node(UInt32 node_id, return next; } -UInt32 Trie::separate(const UInt8 *ptr, - UInt32 length, - UInt32 node_id, - UInt32 i) { +UInt32 Trie::append_key(const UInt8 *ptr, UInt32 length, UInt32 key_id) { + GRN_DAT_THROW_IF(SIZE_ERROR, key_id > max_num_keys()); + + const UInt32 key_pos = next_key_pos(); + const UInt32 key_size = Key::estimate_size(length); + + GRN_DAT_THROW_IF(SIZE_ERROR, key_size > (key_buf_size() - key_pos)); + Key::create(key_buf_.ptr() + key_pos, key_id, ptr, length); + + header_->set_next_key_pos(key_pos + key_size); + return key_pos; +} + +UInt32 Trie::separate(const UInt8 *ptr, UInt32 length, + UInt32 node_id, UInt32 i) { GRN_DAT_DEBUG_THROW_IF(node_id >= num_nodes()); - GRN_DAT_DEBUG_THROW_IF(!ith_node(node_id).is_terminal()); + GRN_DAT_DEBUG_THROW_IF(!ith_node(node_id).is_linker()); GRN_DAT_DEBUG_THROW_IF(i > length); - const UInt32 key_id = ith_node(node_id).key_id(); - const UInt32 key_offset = ith_key_info(key_id).offset(); - const char *key_ptr = key_buf_ + key_offset; - const UInt32 key_length = ith_key_info(key_id + 1).offset() - key_offset; + const UInt32 key_pos = ith_node(node_id).key_pos(); + const Key &key = get_key(key_pos); UInt16 labels[2]; - labels[0] = (i < key_length) ? - (UInt16)(UInt8)key_ptr[i] : (UInt16)TERMINAL_LABEL; + labels[0] = (i < key.length()) ? (UInt16)key[i] : (UInt16)TERMINAL_LABEL; labels[1] = (i < length) ? (UInt16)ptr[i] : (UInt16)TERMINAL_LABEL; GRN_DAT_DEBUG_THROW_IF(labels[0] == labels[1]); const UInt32 offset = find_offset(labels, 2); - GRN_DAT_DEBUG_THROW_IF(ith_node(offset).is_offset()); UInt32 next = offset ^ labels[0]; reserve_node(next); + GRN_DAT_DEBUG_THROW_IF(ith_node(offset).is_offset()); ith_node(next).set_label(labels[0]); - ith_node(next).set_key_id(key_id); + ith_node(next).set_key_pos(key_pos); next = offset ^ labels[1]; reserve_node(next); @@ -580,10 +614,9 @@ UInt32 Trie::separate(const UInt8 *ptr, return next; } -void Trie::resolve(UInt32 node_id, - UInt16 label) { +void Trie::resolve(UInt32 node_id, UInt16 label) { GRN_DAT_DEBUG_THROW_IF(node_id >= num_nodes()); - GRN_DAT_DEBUG_THROW_IF(ith_node(node_id).is_terminal()); + GRN_DAT_DEBUG_THROW_IF(ith_node(node_id).is_linker()); GRN_DAT_DEBUG_THROW_IF(label > MAX_LABEL); UInt32 offset = ith_node(node_id).offset(); @@ -602,7 +635,7 @@ void Trie::resolve(UInt32 node_id, labels[num_labels] = label; offset = find_offset(labels, num_labels + 1); - move_nodes(node_id, offset, labels, num_labels); + migrate_nodes(node_id, offset, labels, num_labels); } else { offset = find_offset(&label, 1); if (offset >= num_nodes()) { @@ -614,12 +647,10 @@ void Trie::resolve(UInt32 node_id, } } -void Trie::move_nodes(UInt32 node_id, - UInt32 dest_offset, - const UInt16 *labels, - UInt32 num_labels) { +void Trie::migrate_nodes(UInt32 node_id, UInt32 dest_offset, + const UInt16 *labels, UInt32 num_labels) { GRN_DAT_DEBUG_THROW_IF(node_id >= num_nodes()); - GRN_DAT_DEBUG_THROW_IF(ith_node(node_id).is_terminal()); + GRN_DAT_DEBUG_THROW_IF(ith_node(node_id).is_linker()); GRN_DAT_DEBUG_THROW_IF(labels == NULL); GRN_DAT_DEBUG_THROW_IF(num_labels == 0); GRN_DAT_DEBUG_THROW_IF(num_labels > (MAX_LABEL + 1)); @@ -646,14 +677,13 @@ void Trie::move_nodes(UInt32 node_id, ith_node(node_id).set_offset(dest_offset); } -UInt32 Trie::find_offset(const UInt16 *labels, - UInt32 num_labels) { +UInt32 Trie::find_offset(const UInt16 *labels, UInt32 num_labels) { GRN_DAT_DEBUG_THROW_IF(labels == NULL); GRN_DAT_DEBUG_THROW_IF(num_labels == 0); GRN_DAT_DEBUG_THROW_IF(num_labels > (MAX_LABEL + 1)); - // Blocks are tested in descending order of level, a lower level block - // contains more phantom nodes. + // Blocks are tested in descending order of level. Basically, a lower level + // block contains more phantom nodes. UInt32 level = 1; while (num_labels >= (1U << level)) { ++level; @@ -662,13 +692,13 @@ UInt32 Trie::find_offset(const UInt16 *labels, UInt32 block_count = 0; do { - UInt32 entry = header_->ith_entry(level); - if (entry == INVALID_ENTRY) { + UInt32 leader = header_->ith_leader(level); + if (leader == INVALID_LEADER) { // This level has no blocks and it is thus skipped. continue; } - UInt32 block_id = entry; + UInt32 block_id = leader; do { const Block &block = ith_block(block_id); GRN_DAT_DEBUG_THROW_IF(block.level() != level); @@ -695,21 +725,21 @@ UInt32 Trie::find_offset(const UInt16 *labels, const UInt32 prev = block_id; const UInt32 next = block.next(); block_id = next; - ith_block(prev).set_fail_count(ith_block(prev).fail_count() + 1); + ith_block(prev).set_failure_count(ith_block(prev).failure_count() + 1); // The level of a block is updated when this function fails many times, - // actually MAX_FAIL_COUNT times, in that block. - if (ith_block(prev).fail_count() == MAX_FAIL_COUNT) { + // actually MAX_FAILURE_COUNT times, in that block. + if (ith_block(prev).failure_count() == MAX_FAILURE_COUNT) { update_block_level(prev, level + 1); - if (next == entry) { + if (next == leader) { break; } else { - // Note that the entry might be updated in the level update. - entry = header_->ith_entry(level); + // Note that the leader might be updated in the level update. + leader = header_->ith_leader(level); continue; } } - } while ((++block_count < MAX_BLOCK_COUNT) && (block_id != entry)); + } while ((++block_count < MAX_BLOCK_COUNT) && (block_id != leader)); } while ((block_count < MAX_BLOCK_COUNT) && (level-- != 0)); return num_nodes() ^ labels[0]; @@ -763,7 +793,7 @@ void Trie::reserve_block(UInt32 block_id) { GRN_DAT_THROW_IF(SIZE_ERROR, block_id >= max_num_blocks()); header_->set_num_blocks(block_id + 1); - ith_block(block_id).set_fail_count(0); + ith_block(block_id).set_failure_count(0); ith_block(block_id).set_first_phantom(0); ith_block(block_id).set_num_phantoms(BLOCK_SIZE); @@ -784,35 +814,33 @@ void Trie::reserve_block(UInt32 block_id) { ith_node(i).set_check(check); } - // The leve of the new block is 0. + // The level of the new block is 0. set_block_level(block_id, 0); header_->set_num_phantoms(num_phantoms() + BLOCK_SIZE); } -void Trie::update_block_level(UInt32 block_id, - UInt32 level) { +void Trie::update_block_level(UInt32 block_id, UInt32 level) { GRN_DAT_DEBUG_THROW_IF(block_id >= num_blocks()); GRN_DAT_DEBUG_THROW_IF(level > MAX_BLOCK_LEVEL); - clear_block_level(block_id); + unset_block_level(block_id); set_block_level(block_id, level); } -void Trie::set_block_level(UInt32 block_id, - UInt32 level) { +void Trie::set_block_level(UInt32 block_id, UInt32 level) { GRN_DAT_DEBUG_THROW_IF(block_id >= num_blocks()); GRN_DAT_DEBUG_THROW_IF(level > MAX_BLOCK_LEVEL); - const UInt32 entry = header_->ith_entry(level); - if (entry == INVALID_ENTRY) { + const UInt32 leader = header_->ith_leader(level); + if (leader == INVALID_LEADER) { // The new block becomes the only one block of the linked list. ith_block(block_id).set_next(block_id); ith_block(block_id).set_prev(block_id); - header_->set_ith_entry(level, block_id); + header_->set_ith_leader(level, block_id); } else { // The new block is added to the end of the list. - const UInt32 next = entry; - const UInt32 prev = ith_block(entry).prev(); + const UInt32 next = leader; + const UInt32 prev = ith_block(leader).prev(); GRN_DAT_DEBUG_THROW_IF(next >= num_blocks()); GRN_DAT_DEBUG_THROW_IF(prev >= num_blocks()); ith_block(block_id).set_next(next); @@ -821,17 +849,17 @@ void Trie::set_block_level(UInt32 block_id, ith_block(prev).set_next(block_id); } ith_block(block_id).set_level(level); - ith_block(block_id).set_fail_count(0); + ith_block(block_id).set_failure_count(0); } -void Trie::clear_block_level(UInt32 block_id) { +void Trie::unset_block_level(UInt32 block_id) { GRN_DAT_DEBUG_THROW_IF(block_id >= num_blocks()); const UInt32 level = ith_block(block_id).level(); GRN_DAT_DEBUG_THROW_IF(level > MAX_BLOCK_LEVEL); - const UInt32 entry = header_->ith_entry(level); - GRN_DAT_DEBUG_THROW_IF(entry == INVALID_ENTRY); + const UInt32 leader = header_->ith_leader(level); + GRN_DAT_DEBUG_THROW_IF(leader == INVALID_LEADER); const UInt32 next = ith_block(block_id).next(); const UInt32 prev = ith_block(block_id).prev(); @@ -840,13 +868,13 @@ void Trie::clear_block_level(UInt32 block_id) { if (next == block_id) { // The linked list becomes empty. - header_->set_ith_entry(level, INVALID_ENTRY); + header_->set_ith_leader(level, INVALID_LEADER); } else { ith_block(next).set_prev(prev); ith_block(prev).set_next(next); - if (block_id == entry) { - // The second block becomes the entry to the linked list. - header_->set_ith_entry(level, next); + if (block_id == leader) { + // The second block becomes the leader of the linked list. + header_->set_ith_leader(level, next); } } } Modified: lib/dat/trie.hpp (+83 -76) =================================================================== --- lib/dat/trie.hpp 2011-10-31 04:17:38 +0000 (2c27e7a) +++ lib/dat/trie.hpp 2011-11-01 01:00:20 +0000 (c1b1962) @@ -18,12 +18,13 @@ #ifndef GRN_DAT_TRIE_HPP_ #define GRN_DAT_TRIE_HPP_ +#include "array.hpp" #include "header.hpp" #include "node.hpp" #include "block.hpp" -#include "key-info.hpp" +#include "entry.hpp" #include "key.hpp" -#include "memory-mapped-file.hpp" +#include "file.hpp" namespace grn { namespace dat { @@ -51,24 +52,45 @@ class Trie { void swap(Trie *trie); - void ith_key(UInt32 key_id, Key *key) const { + // Users can access a key by its position or ID. + const Key &get_key(UInt32 key_pos) const { + GRN_DAT_DEBUG_THROW_IF(key_pos >= key_buf_size()); + return *reinterpret_cast<const Key *>(key_buf_.ptr() + key_pos); + } + // If a specified ID is invalid, e.g. the key is already deleted, this + // function returns a reference to an invalid key object whose id() returns + // INVALID_KEY_ID. + const Key &ith_key(UInt32 key_id) const { GRN_DAT_DEBUG_THROW_IF(key_id < min_key_id()); GRN_DAT_DEBUG_THROW_IF(key_id > max_key_id()); - GRN_DAT_DEBUG_THROW_IF(key == NULL); + if (ith_entry(key_id).is_valid()) { + return get_key(ith_entry(key_id).key_pos()); + } + return Key::invalid_key(); + } - key->set_ptr(key_buf_ + ith_key_info(key_id).offset()); - key->set_length(ith_key_info(key_id + 1).offset() - - ith_key_info(key_id).offset()); - key->set_id(key_id); + bool search(const void *ptr, UInt32 length, UInt32 *key_pos = NULL) const { + return search_key(static_cast<const UInt8 *>(ptr), length, key_pos); } - bool search(const void *ptr, UInt32 length, Key *key = NULL) const { - GRN_DAT_DEBUG_THROW_IF((ptr == NULL) && (length != 0)); - return search_from_root(static_cast<const UInt8 *>(ptr), length, key); + bool remove(const void *ptr, UInt32 length) { + return remove_key(static_cast<const UInt8 *>(ptr), length); } - bool insert(const void *ptr, UInt32 length, Key *key = NULL) { - GRN_DAT_DEBUG_THROW_IF((ptr == NULL) && (length != 0)); - return insert_from_root(static_cast<const UInt8 *>(ptr), length, key); + bool remove(UInt32 key_id) { + const Key &key = ith_key(key_id); + if (key.id() == INVALID_KEY_ID) { + return false; + } + return remove(key.ptr(), key.length()); + } + + bool insert(const void *ptr, UInt32 length, UInt32 *key_pos = NULL) { + return insert_key(static_cast<const UInt8 *>(ptr), length, key_pos); + } + + bool update(UInt32 key_id, const void *ptr, UInt32 length, + UInt32 *key_pos = NULL) { + return update_key(key_id, static_cast<const UInt8 *>(ptr), length, key_pos); } const Node &ith_node(UInt32 i) const { @@ -79,10 +101,10 @@ class Trie { GRN_DAT_DEBUG_THROW_IF(i >= num_blocks()); return blocks_[i]; } - const KeyInfo &ith_key_info(UInt32 i) const { + const Entry &ith_entry(UInt32 i) const { GRN_DAT_DEBUG_THROW_IF(i < min_key_id()); - GRN_DAT_DEBUG_THROW_IF(i > (max_key_id() + 1)); - return key_infos_[i - MIN_KEY_ID]; + GRN_DAT_DEBUG_THROW_IF(i > max_key_id()); + return entries_[i]; } const Header &header() const { @@ -94,13 +116,13 @@ class Trie { } UInt64 virtual_size() const { return sizeof(Header) - + (sizeof(KeyInfo) * (num_keys() + 1)) + + (sizeof(Entry) * num_keys()) + (sizeof(Block) * num_blocks()) + (sizeof(Node) * num_nodes()) + total_key_length(); } UInt32 total_key_length() const { - return ith_key_info(num_keys() + 1).offset(); + return header_->total_key_length(); } UInt32 num_keys() const { return header_->num_keys(); @@ -108,6 +130,9 @@ class Trie { UInt32 min_key_id() const { return header_->min_key_id(); } + UInt32 next_key_id() const { + return header_->next_key_id(); + } UInt32 max_key_id() const { return header_->max_key_id(); } @@ -132,17 +157,20 @@ class Trie { UInt32 max_num_blocks() const { return header_->max_num_blocks(); } + UInt32 next_key_pos() const { + return header_->next_key_pos(); + } UInt32 key_buf_size() const { return header_->key_buf_size(); } private: - MemoryMappedFile memory_mapped_file_; + File file_; Header *header_; - Node *nodes_; - Block *blocks_; - KeyInfo *key_infos_; - char *key_buf_; + Array<Node> nodes_; + Array<Block> blocks_; + Array<Entry> entries_; + Array<UInt32> key_buf_; void create_file(const char *file_name, UInt64 file_size, @@ -154,65 +182,44 @@ class Trie { UInt32 max_num_keys, UInt32 max_num_blocks, UInt32 key_buf_size); - void create_root(); void open_file(const char *file_name); void map_address(void *address); void build_from_trie(const Trie &trie); - void build_from_trie(const Trie &trie, - UInt32 src, - UInt32 dest); - - bool search_from_root(const UInt8 *ptr, - UInt32 length, - Key *key) const; - bool search_from_terminal(const UInt8 *ptr, - UInt32 length, - Key *key, - UInt32 key_id, - UInt32 i) const; - - bool insert_from_root(const UInt8 *ptr, - UInt32 length, - Key *key); - bool insert_from_terminal(const UInt8 *ptr, - UInt32 length, - Key *key, - UInt32 node_id, - UInt32 i); - bool insert_from_nonterminal(const UInt8 *ptr, - UInt32 length, - Key *key, - UInt32 node_id, - UInt32 i); - - UInt32 insert_node(UInt32 node_id, - UInt16 label); - - UInt32 separate(const UInt8 *ptr, - UInt32 length, - UInt32 node_id, - UInt32 i); - void resolve(UInt32 node_id, - UInt16 label); - void move_nodes(UInt32 node_id, - UInt32 dest_offset, - const UInt16 *labels, - UInt32 num_labels); - - UInt32 find_offset(const UInt16 *labels, - UInt32 num_labels); + void build_from_trie(const Trie &trie, UInt32 src, UInt32 dest); + + bool search_key(const UInt8 *ptr, UInt32 length, UInt32 *key_pos) const; + bool search_linker(const UInt8 *ptr, UInt32 length, + UInt32 &node_id, UInt32 &query_pos) const; + + bool remove_key(const UInt8 *ptr, UInt32 length); + + bool insert_key(const UInt8 *ptr, UInt32 length, UInt32 *key_pos); + bool insert_linker(const UInt8 *ptr, UInt32 length, + UInt32 &node_id, UInt32 query_pos); + + bool update_key(UInt32 key_id, const UInt8 *ptr, UInt32 length, + UInt32 *key_pos); + + UInt32 insert_node(UInt32 node_id, UInt16 label); + UInt32 append_key(const UInt8 *ptr, UInt32 length, UInt32 key_id); + + UInt32 separate(const UInt8 *ptr, UInt32 length, + UInt32 node_id, UInt32 i); + void resolve(UInt32 node_id, UInt16 label); + void migrate_nodes(UInt32 node_id, UInt32 dest_offset, + const UInt16 *labels, UInt32 num_labels); + + UInt32 find_offset(const UInt16 *labels, UInt32 num_labels); void reserve_node(UInt32 node_id); void reserve_block(UInt32 block_id); - void update_block_level(UInt32 block_id, - UInt32 level); - void set_block_level(UInt32 block_id, - UInt32 level); - void clear_block_level(UInt32 block_id); + void update_block_level(UInt32 block_id, UInt32 level); + void set_block_level(UInt32 block_id, UInt32 level); + void unset_block_level(UInt32 block_id); Node &ith_node(UInt32 i) { GRN_DAT_DEBUG_THROW_IF(i >= num_nodes()); @@ -222,10 +229,10 @@ class Trie { GRN_DAT_DEBUG_THROW_IF(i >= num_blocks()); return blocks_[i]; } - KeyInfo &ith_key_info(UInt32 i) { + Entry &ith_entry(UInt32 i) { GRN_DAT_DEBUG_THROW_IF(i < min_key_id()); - GRN_DAT_DEBUG_THROW_IF(i > (max_key_id() + 2)); - return key_infos_[i - MIN_KEY_ID]; + GRN_DAT_DEBUG_THROW_IF(i > (max_key_id() + 1)); + return entries_[i]; } // Disallows copy and assignment. Deleted: lib/dat/usage.hpp (+0 -65) 100644 =================================================================== --- lib/dat/usage.hpp 2011-10-31 04:17:38 +0000 (2d48393) +++ /dev/null @@ -1,65 +0,0 @@ -/* -*- c-basic-offset: 2 -*- */ -/* Copyright(C) 2011 Brazil - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License version 2.1 as published by the Free Software Foundation. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -*/ - -#ifndef GRN_DAT_USAGE_HPP_ -#define GRN_DAT_USAGE_HPP_ - -#include "dat.hpp" - -#include <sys/time.h> -#include <sys/resource.h> - -namespace grn { -namespace dat { - -class Usage { - public: - Usage() - : usage_() { - ::getrusage(RUSAGE_SELF, &usage_); - } - - double user_time() const { - return usage_.ru_utime.tv_sec + (usage_.ru_utime.tv_usec * 0.000001); - } - double system_time() const { - return usage_.ru_stime.tv_sec + (usage_.ru_stime.tv_usec * 0.000001); - } - long max_resident_set_size() const { - return usage_.ru_maxrss; - } - long minor_page_faults() const { - return usage_.ru_minflt; - } - long major_page_faults() const { - return usage_.ru_majflt; - } - long file_system_inputs() const { - return usage_.ru_inblock; - } - long file_system_outputs() const { - return usage_.ru_oublock; - } - - private: - struct rusage usage_; -}; - -} // namespace dat -} // namespace grn - -#endif // GRN_DAT_USAGE_HPP_ Modified: lib/dat/vector.hpp (+127 -17) =================================================================== --- lib/dat/vector.hpp 2011-10-31 04:17:38 +0000 (eca4d7a) +++ lib/dat/vector.hpp 2011-11-01 01:00:20 +0000 (fd84d9a) @@ -20,7 +20,7 @@ #include "dat.hpp" -#include <vector> +#include <new> namespace grn { namespace dat { @@ -28,49 +28,159 @@ namespace dat { template <typename T> class Vector { public: - Vector() - : buf_() {} - ~Vector() {} + Vector() : buf_(NULL), size_(0), capacity_(0) {} + ~Vector() { + for (UInt32 i = 0; i < size(); ++i) { + buf_[i].~T(); + } + delete [] reinterpret_cast<char *>(buf_); + } const T &operator[](UInt32 i) const { + GRN_DAT_DEBUG_THROW_IF(i >= size()); return buf_[i]; } T &operator[](UInt32 i) { + GRN_DAT_DEBUG_THROW_IF(i >= size()); return buf_[i]; } + const T &front() const { + GRN_DAT_DEBUG_THROW_IF(empty()); + return buf_[0]; + } + T &front() { + GRN_DAT_DEBUG_THROW_IF(empty()); + return buf_[0]; + } + const T &back() const { - return buf_.back(); + GRN_DAT_DEBUG_THROW_IF(empty()); + return buf_[size() - 1]; } T &back() { - return buf_.back(); + GRN_DAT_DEBUG_THROW_IF(empty()); + return buf_[size() - 1]; } - bool push_back(const T &x) try { - buf_.push_back(x); - return true; - } catch (...) { - return false; + const T *begin() const { + return buf_; + } + T *begin() { + return buf_; + } + + const T *end() const { + return buf_ + size_; + } + T *end() { + return buf_ + size_; + } + + void push_back() { + reserve(size() + 1); + new (&buf_[size()]) T; + ++size_; + } + void push_back(const T &x) { + reserve(size() + 1); + new (&buf_[size()]) T(x); + ++size_; } void pop_back() { - buf_.pop_back(); + GRN_DAT_DEBUG_THROW_IF(empty()); + back().~T(); + --size_; + } + + void clear() { + resize(0); + } + + void resize(UInt32 new_size) { + if (new_size > capacity()) { + reserve(new_size); + } + for (UInt32 i = size(); i < new_size; ++i) { + new (&buf_[i]) T; + } + for (UInt32 i = new_size; i < size(); ++i) { + buf_[i].~T(); + } + size_ = new_size; + } + template <typename U> + void resize(UInt32 new_size, const U &value) { + if (new_size > capacity()) { + reserve(new_size); + } + for (UInt32 i = size(); i < new_size; ++i) { + new (&buf_[i]) T(value); + } + for (UInt32 i = new_size; i < size(); ++i) { + buf_[i].~T(); + } + size_ = new_size; + } + + void reserve(UInt32 new_capacity) { + if (new_capacity <= capacity()) { + return; + } else if ((new_capacity / 2) < capacity()) { + if (capacity() < (UINT32_MAX / 2)) { + new_capacity = capacity() * 2; + } else { + new_capacity = UINT32_MAX; + } + } + + T *new_buf = reinterpret_cast<T *>( + new (std::nothrow) char[sizeof(new_capacity) * new_capacity]); + GRN_DAT_THROW_IF(MEMORY_ERROR, new_buf == NULL); + + for (UInt32 i = 0; i < size(); ++i) { + new (&new_buf[i]) T(buf_[i]); + } + for (UInt32 i = 0; i < size(); ++i) { + buf_[i].~T(); + } + + T *old_buf = buf_; + buf_ = new_buf; + delete [] reinterpret_cast<char *>(old_buf); + + capacity_ = new_capacity; } void swap(Vector *rhs) { - buf_.swap(rhs->buf_); + T * const temp_buf = buf_; + buf_ = rhs->buf_; + rhs->buf_ = temp_buf; + + const UInt32 temp_size = size_; + size_ = rhs->size_; + rhs->size_ = temp_size; + + const UInt32 temp_capacity = capacity_; + capacity_ = rhs->capacity_; + rhs->capacity_ = temp_capacity; } bool empty() const { - return buf_.empty(); + return size_ == 0; } - UInt32 size() const { - return static_cast<UInt32>(buf_.size()); + return size_; + } + UInt32 capacity() const { + return capacity_; } private: - std::vector<T> buf_; + T *buf_; + UInt32 size_; + UInt32 capacity_; // Disallows copy and assignment. Vector(const Vector &);