groonga/grnxx at 2864000 [master] Add a draft of Column<Text>. (Groonga-commit) - Groonga - fulltext search engine.

susumu.yata	2014-11-10 17:05:05 +0900 (Mon, 10 Nov 2014)

  New Revision: 286400083efc103f43d6a034394d55b535eca3b8
  https://github.com/groonga/grnxx/commit/286400083efc103f43d6a034394d55b535eca3b8

  Message:
    Add a draft of Column<Text>.

  Modified files:
    lib/grnxx/impl/column/base.cpp
    lib/grnxx/impl/column/scalar.hpp
    lib/grnxx/impl/column/scalar/Makefile.am
    lib/grnxx/impl/column/scalar/int.cpp
    lib/grnxx/impl/column/scalar/text.cpp
    lib/grnxx/impl/column/scalar/text.hpp

  Modified: lib/grnxx/impl/column/base.cpp (+4 -4)
===================================================================

--- lib/grnxx/impl/column/base.cpp    2014-11-10 15:36:00 +0900 (b5b1634)
+++ lib/grnxx/impl/column/base.cpp    2014-11-10 17:05:05 +0900 (797c86f)
@@ -195,10 +195,10 @@ std::unique_ptr<ColumnBase> ColumnBase::create(
       column.reset(new impl::Column<GeoPoint>(table, name, options));
       break;
     }
-//    case TEXT_DATA: {
-//      column.reset(new impl::Column<Text>(table, name, options));
-//      break;
-//    }
+    case TEXT_DATA: {
+      column.reset(new impl::Column<Text>(table, name, options));
+      break;
+    }
 //    case BOOL_VECTOR_DATA: {
 //      column.reset(new impl::Column<Vector<Bool>>(table, name, options));
 //      break;

  Modified: lib/grnxx/impl/column/scalar.hpp (+1 -1)
===================================================================
--- lib/grnxx/impl/column/scalar.hpp    2014-11-10 15:36:00 +0900 (d6d9c3a)
+++ lib/grnxx/impl/column/scalar.hpp    2014-11-10 17:05:05 +0900 (f443aff)
@@ -5,6 +5,6 @@
 #include "grnxx/impl/column/scalar/float.hpp"
 #include "grnxx/impl/column/scalar/geo_point.hpp"
 #include "grnxx/impl/column/scalar/int.hpp"
-//#include "grnxx/impl/column/scalar/text.hpp"
+#include "grnxx/impl/column/scalar/text.hpp"
 
 #endif  // GRNXX_IMPL_COLUMN_SCALAR_HPP

  Modified: lib/grnxx/impl/column/scalar/Makefile.am (+2 -3)
===================================================================
--- lib/grnxx/impl/column/scalar/Makefile.am    2014-11-10 15:36:00 +0900 (1e31021)
+++ lib/grnxx/impl/column/scalar/Makefile.am    2014-11-10 17:05:05 +0900 (e381afc)
@@ -12,9 +12,8 @@ libgrnxx_impl_column_scalar_la_SOURCES =	\
 	bool.cpp				\
 	float.cpp				\
 	geo_point.cpp				\
-	int.cpp
-
-#	text.cpp
+	int.cpp					\
+	text.cpp
 
 libgrnxx_impl_column_scalar_includedir = ${includedir}/grnxx/impl/column/scalar
 libgrnxx_impl_column_scalar_include_HEADERS =	\

  Modified: lib/grnxx/impl/column/scalar/int.cpp (+1 -0)
===================================================================
--- lib/grnxx/impl/column/scalar/int.cpp    2014-11-10 15:36:00 +0900 (784892b)
+++ lib/grnxx/impl/column/scalar/int.cpp    2014-11-10 17:05:05 +0900 (4528629)
@@ -209,6 +209,7 @@ void Column<Int>::set_key(Int row_id, const Datum &key) {
   if (value_id >= values_.size()) {
     values_.resize(value_id + 1, Int::na());
   }
+  // TODO: N/A is not available.
   Int value = parse_datum(key);
   // TODO: Update indexes if exist.
 //  for (size_t i = 0; i < num_indexes(); ++i) try {

  Modified: lib/grnxx/impl/column/scalar/text.cpp (+354 -214)
===================================================================
--- lib/grnxx/impl/column/scalar/text.cpp    2014-11-10 15:36:00 +0900 (2db612a)
+++ lib/grnxx/impl/column/scalar/text.cpp    2014-11-10 17:05:05 +0900 (b1b6b66)
@@ -1,268 +1,408 @@
-#include "grnxx/impl/column/column_text.hpp"
+#include "grnxx/impl/column/scalar/text.hpp"
+
+#include <cstring>
+//#include <set>
 
-#include "grnxx/cursor.hpp"
 #include "grnxx/impl/db.hpp"
 #include "grnxx/impl/table.hpp"
-#include "grnxx/index.hpp"
-
-#include <set>
+//#include "grnxx/impl/index.hpp"
 
 namespace grnxx {
 namespace impl {
 
-bool Column<Text>::set(Error *error, Int row_id, const Datum &datum) {
-  if (datum.type() != TEXT_DATA) {
-    GRNXX_ERROR_SET(error, INVALID_ARGUMENT, "Wrong data type");
-    return false;
+Column<Text>::Column(Table *table,
+                     const String &name,
+                     const ColumnOptions &)
+    : ColumnBase(table, name, TEXT_DATA),
+      headers_(),
+      bodies_() {}
+
+Column<Text>::~Column() {}
+
+void Column<Text>::set(Int row_id, const Datum &datum) {
+  Text new_value = parse_datum(datum);
+  if (!table_->test_row(row_id)) {
+    throw "Invalid row ID";  // TODO
+  }
+  if (is_key_) {
+    if (new_value.is_na()) {
+      throw "N/A key";  // TODO
+    }
   }
-  if (!table_->test_row(error, row_id)) {
-    return false;
+  if (new_value.is_na()) {
+    unset(row_id);
+    return;
   }
   Text old_value = get(row_id);
-  Text new_value = datum.force_text();
-  if (new_value != old_value) {
-    if (has_key_attribute_ && contains(datum)) {
-      GRNXX_ERROR_SET(error, ALREADY_EXISTS, "Key duplicate");
-      return false;
-    }
-    for (Int i = 0; i < num_indexes(); ++i) {
-      if (!indexes_[i]->insert(error, row_id, datum)) {
-        for (Int j = 0; j < i; ++i) {
-          indexes_[j]->remove(nullptr, row_id, datum);
-        }
-        return false;
-      }
-    }
-    Int offset = bodies_.size();
-    UInt new_header;
-    if (new_value.size() < 0xFFFF) {
-      if (!bodies_.resize(error, offset + new_value.size())) {
-        return false;
-      }
-      std::memcpy(&bodies_[offset], new_value.data(), new_value.size());
-      new_header = (offset << 16) | new_value.size();
-    } else {
-      // The size of a long text is stored in front of the body.
-      if ((offset % sizeof(Int)) != 0) {
-        offset += sizeof(Int) - (offset % sizeof(Int));
-      }
-      if (!bodies_.resize(error, offset + sizeof(Int) + new_value.size())) {
-        return false;
-      }
-      *reinterpret_cast<Int *>(&bodies_[offset]) = new_value.size();
-      std::memcpy(&bodies_[offset + sizeof(Int)],
-                  new_value.data(), new_value.size());
-      new_header = (offset << 16) | 0xFFFF;
-    }
-    for (Int i = 0; i < num_indexes(); ++i) {
-      indexes_[i]->remove(nullptr, row_id, old_value);
-    }
-    headers_[row_id] = new_header;
+  if (old_value == new_value) {
+    return;
   }
-  return true;
-}
-
-bool Column<Text>::get(Error *error, Int row_id, Datum *datum) const {
-  if (!table_->test_row(error, row_id)) {
-    return false;
+  if (is_key_ && contains(datum)) {
+    throw "Key already exists";  // TODO
   }
-  *datum = get(row_id);
-  return true;
-}
-
-unique_ptr<Column<Text>> Column<Text>::create(
-    Error *error,
-    Table *table,
-    const StringCRef &name,
-    const ColumnOptions &options) {
-  unique_ptr<Column> column(new (nothrow) Column);
-  if (!column) {
-    GRNXX_ERROR_SET(error, NO_MEMORY, "Memory allocation failed");
-    return nullptr;
+  if (!old_value.is_na()) {
+    // TODO: Remove the old value from indexes.
+//    for (size_t i = 0; i < num_indexes(); ++i) {
+//      indexes_[i]->remove(row_id, old_value);
+//    }
   }
-  if (!column->initialize_base(error, table, name, TEXT_DATA, options)) {
-    return nullptr;
+  size_t value_id = row_id.value();
+  if (value_id >= headers_.size()) {
+    headers_.resize(value_id + 1, na_header());
   }
-  if (!column->headers_.resize(error, table->max_row_id() + 1, 0)) {
-    return nullptr;
+  // TODO: Insert the new value into indexes.
+//  for (size_t i = 0; i < num_indexes(); ++i) try {
+//    indexes_[i]->insert(row_id, datum)) {
+//  } catch (...) {
+//    for (size_t j = 0; j < i; ++i) {
+//      indexes_[j]->remove(row_id, datum);
+//    }
+//    throw;
+//  }
+  // TODO: Error handling.
+  size_t offset = bodies_.size();
+  size_t size = new_value.size().value();
+  uint64_t header;
+  if (size < 0xFFFF) {
+    bodies_.resize(offset + size);
+    std::memcpy(&bodies_[offset], new_value.data(), size);
+    header = (offset << 16) | size;
+  } else {
+    // The size of a long text is stored in front of the body.
+    if ((offset % sizeof(uint64_t)) != 0) {
+      offset += sizeof(uint64_t) - (offset % sizeof(uint64_t));
+    }
+    bodies_.resize(offset + sizeof(uint64_t) + size);
+    *reinterpret_cast<uint64_t *>(&bodies_[offset]) = size;
+    std::memcpy(&bodies_[offset + sizeof(uint64_t)], new_value.data(), size);
+    header = (offset << 16) | 0xFFFF;
   }
-  return column;
+  headers_[value_id] = header;
 }
 
-Column<Text>::~Column() {}
+//bool Column<Text>::set(Error *error, Int row_id, const Datum &datum) {
+//  if (datum.type() != TEXT_DATA) {
+//    GRNXX_ERROR_SET(error, INVALID_ARGUMENT, "Wrong data type");
+//    return false;
+//  }
+//  if (!table_->test_row(error, row_id)) {
+//    return false;
+//  }
+//  Text old_value = get(row_id);
+//  Text new_value = datum.force_text();
+//  if (new_value != old_value) {
+//    if (has_key_attribute_ && contains(datum)) {
+//      GRNXX_ERROR_SET(error, ALREADY_EXISTS, "Key duplicate");
+//      return false;
+//    }
+//    for (Int i = 0; i < num_indexes(); ++i) {
+//      if (!indexes_[i]->insert(error, row_id, datum)) {
+//        for (Int j = 0; j < i; ++i) {
+//          indexes_[j]->remove(nullptr, row_id, datum);
+//        }
+//        return false;
+//      }
+//    }
+//    Int offset = bodies_.size();
+//    UInt new_header;
+//    if (new_value.size() < 0xFFFF) {
+//      if (!bodies_.resize(error, offset + new_value.size())) {
+//        return false;
+//      }
+//      std::memcpy(&bodies_[offset], new_value.data(), new_value.size());
+//      new_header = (offset << 16) | new_value.size();
+//    } else {
+//      // The size of a long text is stored in front of the body.
+//      if ((offset % sizeof(Int)) != 0) {
+//        offset += sizeof(Int) - (offset % sizeof(Int));
+//      }
+//      if (!bodies_.resize(error, offset + sizeof(Int) + new_value.size())) {
+//        return false;
+//      }
+//      *reinterpret_cast<Int *>(&bodies_[offset]) = new_value.size();
+//      std::memcpy(&bodies_[offset + sizeof(Int)],
+//                  new_value.data(), new_value.size());
+//      new_header = (offset << 16) | 0xFFFF;
+//    }
+//    for (Int i = 0; i < num_indexes(); ++i) {
+//      indexes_[i]->remove(nullptr, row_id, old_value);
+//    }
+//    headers_[row_id] = new_header;
+//  }
+//  return true;
+//}
 
-bool Column<Text>::set_key_attribute(Error *error) {
-  if (has_key_attribute_) {
-    GRNXX_ERROR_SET(error, INVALID_OPERATION,
-                    "This column is a key column");
-    return false;
+void Column<Text>::get(Int row_id, Datum *datum) const {
+  size_t value_id = row_id.value();
+  if (value_id >= headers_.size()) {
+    *datum = Text::na();
+  } else {
+    // TODO
+    *datum = get(row_id);
   }
-  // TODO: An index should be used if possible.
-  try {
-    std::set<Text> set;
-    // TODO: Functor-based inline callback may be better in this case,
-    //       because it does not require memory allocation.
-    auto cursor = table_->create_cursor(nullptr);
-    if (!cursor) {
-      return false;
+}
+
+bool Column<Text>::contains(const Datum &datum) const {
+  // TODO: Use an index if exists.
+  Text value = parse_datum(datum);
+  if (value.is_na()) {
+    for (size_t i = 0; i < headers_.size(); ++i) {
+      if (headers_[i] == na_header()) {
+        return true;
+      }
     }
-    Array<Record> records;
-    for ( ; ; ) {
-      auto result = cursor->read(nullptr, 1024, &records);
-      if (!result.is_ok) {
-        return false;
-      } else {
-        break;
+  } else {
+    for (size_t i = 0; i < headers_.size(); ++i) {
+      // TODO: Improve this.
+      if (get(Int(i)) == value) {
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+Int Column<Text>::find_one(const Datum &datum) const {
+  // TODO: Use an index if exists.
+  Text value = parse_datum(datum);
+  if (value.is_na()) {
+    for (size_t i = 0; i < headers_.size(); ++i) {
+      if (headers_[i] == na_header()) {
+        return Int(i);
       }
-      for (Int i = 0; i < result.count; ++i) {
-        if (!set.insert(get(records.get_row_id(i))).second) {
-          GRNXX_ERROR_SET(error, INVALID_OPERATION, "Key duplicate");
-          return false;
-        }
+    }
+  } else {
+    for (size_t i = 0; i < headers_.size(); ++i) {
+      // TODO: Improve this.
+      if (get(Int(i)) == value) {
+        return Int(i);
       }
-      records.clear();
     }
-  } catch (...) {
-    GRNXX_ERROR_SET(error, NO_MEMORY, "Memory allocation failed");
-    return false;
   }
-  has_key_attribute_ = true;
-  return true;
+  return Int::na();
 }
 
-bool Column<Text>::unset_key_attribute(Error *error) {
-  if (!has_key_attribute_) {
-    GRNXX_ERROR_SET(error, INVALID_OPERATION,
-                    "This column is not a key column");
-    return false;
+//Int Column<Text>::find_one(const Datum &datum) const {
+//  // TODO: Cursor should not be used because it takes time.
+//  // Also, cursor operations can fail due to memory allocation.
+//  Text value = datum.force_text();
+//  if (indexes_.size() != 0) {
+//    auto cursor = indexes_[0]->find(nullptr, value);
+//    Array<Record> records;
+//    auto result = cursor->read(nullptr, 1, &records);
+//    if (!result.is_ok || (result.count == 0)) {
+//      return NULL_ROW_ID;
+//    }
+//    return true;
+//  } else {
+//    // TODO: A full scan takes time.
+//    // An index should be required for a key column.
+
+//    // TODO: Functor-based inline callback may be better in this case,
+//    // because it does not require memory allocation.
+
+//    // Scan the column to find "value".
+//    auto cursor = table_->create_cursor(nullptr);
+//    if (!cursor) {
+//      return NULL_ROW_ID;
+//    }
+//    Array<Record> records;
+//    for ( ; ; ) {
+//      auto result = cursor->read(nullptr, 1024, &records);
+//      if (!result.is_ok || result.count == 0) {
+//        return NULL_ROW_ID;
+//      }
+//      for (Int i = 0; i < result.count; ++i) {
+//        if (get(records.get_row_id(i)) == value) {
+//          return records.get_row_id(i);
+//        }
+//      }
+//      records.clear();
+//    }
+//  }
+//  return NULL_ROW_ID;
+//}
+
+void Column<Text>::set_key_attribute() {
+  if (is_key_) {
+    throw "Key column";  // TODO
   }
-  has_key_attribute_ = false;
-  return true;
+  throw "Not supported yet";  // TODO
 }
 
-bool Column<Text>::set_initial_key(Error *error,
-    Int row_id,
-    const Datum &key) {
-  if (!has_key_attribute_) {
-    GRNXX_ERROR_SET(error, INVALID_OPERATION,
-                    "This column is not a key column");
-    return false;
+//bool Column<Text>::set_key_attribute(Error *error) {
+//  if (has_key_attribute_) {
+//    GRNXX_ERROR_SET(error, INVALID_OPERATION,
+//                    "This column is a key column");
+//    return false;
+//  }
+//  // TODO: An index should be used if possible.
+//  try {
+//    std::set<Text> set;
+//    // TODO: Functor-based inline callback may be better in this case,
+//    //       because it does not require memory allocation.
+//    auto cursor = table_->create_cursor(nullptr);
+//    if (!cursor) {
+//      return false;
+//    }
+//    Array<Record> records;
+//    for ( ; ; ) {
+//      auto result = cursor->read(nullptr, 1024, &records);
+//      if (!result.is_ok) {
+//        return false;
+//      } else {
+//        break;
+//      }
+//      for (Int i = 0; i < result.count; ++i) {
+//        if (!set.insert(get(records.get_row_id(i))).second) {
+//          GRNXX_ERROR_SET(error, INVALID_OPERATION, "Key duplicate");
+//          return false;
+//        }
+//      }
+//      records.clear();
+//    }
+//  } catch (...) {
+//    GRNXX_ERROR_SET(error, NO_MEMORY, "Memory allocation failed");
+//    return false;
+//  }
+//  has_key_attribute_ = true;
+//  return true;
+//}
+
+void Column<Text>::unset_key_attribute() {
+  if (!is_key_) {
+    throw "Not key column";  // TODO
   }
-  if (has_key_attribute_ && contains(key)) {
-    GRNXX_ERROR_SET(error, ALREADY_EXISTS, "Key duplicate");
-    return false;
+  is_key_ = true;
+}
+
+void Column<Text>::set_key(Int row_id, const Datum &key) {
+  if (!is_key_) {
+    throw "Not key column";  // TODO
   }
-  if (row_id >= headers_.size()) {
-    if (!headers_.resize(error, row_id + 1, 0)) {
-      return false;
-    }
+  if (contains(key)) {
+    throw "Key already exists";  // TODO
   }
-  Text value = key.force_text();
-  for (Int i = 0; i < num_indexes(); ++i) {
-    if (!indexes_[i]->insert(error, row_id, value)) {
-      for (Int j = 0; j < i; ++j) {
-        indexes_[j]->remove(nullptr, row_id, value);
-      }
-      return false;
-    }
+  size_t value_id = row_id.value();
+  if (value_id >= headers_.size()) {
+    headers_.resize(value_id + 1, na_header());
   }
-  Int offset = bodies_.size();
-  UInt header;
-  if (value.size() < 0xFFFF) {
-    if (!bodies_.resize(error, offset + value.size())) {
-      return false;
-    }
-    std::memcpy(&bodies_[offset], value.data(), value.size());
-    header = (offset << 16) | value.size();
+  // TODO: N/A is not available.
+  Text value = parse_datum(key);
+  // TODO: Update indexes if exist.
+//  for (size_t i = 0; i < num_indexes(); ++i) try {
+//    indexes_[i]->insert(row_id, value);
+//  } catch (...) {
+//    for (size_t j = 0; j < i; ++j) {
+//      indexes_[j]->remove(row_id, value);
+//    }
+//    throw;
+//  }
+  // TODO: Error handling.
+  size_t offset = bodies_.size();
+  size_t size = value.size().value();
+  uint64_t header;
+  if (size < 0xFFFF) {
+    bodies_.resize(offset + size);
+    std::memcpy(&bodies_[offset], value.data(), size);
+    header = (offset << 16) | size;
   } else {
     // The size of a long text is stored in front of the body.
-    if ((offset % sizeof(Int)) != 0) {
-      offset += sizeof(Int) - (offset % sizeof(Int));
-    }
-    if (!bodies_.resize(error, offset + sizeof(Int) + value.size())) {
-      return false;
+    if ((offset % sizeof(uint64_t)) != 0) {
+      offset += sizeof(uint64_t) - (offset % sizeof(uint64_t));
     }
-    *reinterpret_cast<Int *>(&bodies_[offset]) = value.size();
-    std::memcpy(&bodies_[offset + sizeof(Int)], value.data(), value.size());
+    bodies_.resize(offset + sizeof(uint64_t) + size);
+    *reinterpret_cast<uint64_t *>(&bodies_[offset]) = size;
+    std::memcpy(&bodies_[offset + sizeof(uint64_t)], value.data(), size);
     header = (offset << 16) | 0xFFFF;
   }
-  headers_[row_id] = header;
-  return true;
+  headers_[value_id] = header;
 }
 
-bool Column<Text>::set_default_value(Error *error, Int row_id) {
-  if (has_key_attribute_) {
-    GRNXX_ERROR_SET(error, INVALID_OPERATION,
-                    "This column is a key column");
-    return false;
-  }
-  if (row_id >= headers_.size()) {
-    if (!headers_.resize(error, row_id + 1)) {
-      return false;
-    }
-  }
-  Text value = TypeTraits<Text>::default_value();
-  for (Int i = 0; i < num_indexes(); ++i) {
-    if (!indexes_[i]->insert(error, row_id, value)) {
-      for (Int j = 0; j < i; ++j) {
-        indexes_[j]->remove(nullptr, row_id, value);
-      }
-      return false;
-    }
+//bool Column<Text>::set_initial_key(Error *error,
+//    Int row_id,
+//    const Datum &key) {
+//  if (!has_key_attribute_) {
+//    GRNXX_ERROR_SET(error, INVALID_OPERATION,
+//                    "This column is not a key column");
+//    return false;
+//  }
+//  if (has_key_attribute_ && contains(key)) {
+//    GRNXX_ERROR_SET(error, ALREADY_EXISTS, "Key duplicate");
+//    return false;
+//  }
+//  if (row_id >= headers_.size()) {
+//    if (!headers_.resize(error, row_id + 1, 0)) {
+//      return false;
+//    }
+//  }
+//  Text value = key.force_text();
+//  for (Int i = 0; i < num_indexes(); ++i) {
+//    if (!indexes_[i]->insert(error, row_id, value)) {
+//      for (Int j = 0; j < i; ++j) {
+//        indexes_[j]->remove(nullptr, row_id, value);
+//      }
+//      return false;
+//    }
+//  }
+//  Int offset = bodies_.size();
+//  UInt header;
+//  if (value.size() < 0xFFFF) {
+//    if (!bodies_.resize(error, offset + value.size())) {
+//      return false;
+//    }
+//    std::memcpy(&bodies_[offset], value.data(), value.size());
+//    header = (offset << 16) | value.size();
+//  } else {
+//    // The size of a long text is stored in front of the body.
+//    if ((offset % sizeof(Int)) != 0) {
+//      offset += sizeof(Int) - (offset % sizeof(Int));
+//    }
+//    if (!bodies_.resize(error, offset + sizeof(Int) + value.size())) {
+//      return false;
+//    }
+//    *reinterpret_cast<Int *>(&bodies_[offset]) = value.size();
+//    std::memcpy(&bodies_[offset + sizeof(Int)], value.data(), value.size());
+//    header = (offset << 16) | 0xFFFF;
+//  }
+//  headers_[row_id] = header;
+//  return true;
+//}
+
+void Column<Text>::unset(Int row_id) {
+  Text value = get(row_id);
+  if (!value.is_na()) {
+    // TODO: Update indexes if exist.
+//    for (size_t i = 0; i < num_indexes(); ++i) {
+//      indexes_[i]->remove(row_id, value);
+//    }
+    headers_[row_id.value()] = na_header();
   }
-  headers_[row_id] = 0;
-  return true;
 }
 
-void Column<Text>::unset(Int row_id) {
-  for (Int i = 0; i < num_indexes(); ++i) {
-    indexes_[i]->remove(nullptr, row_id, get(row_id));
+void Column<Text>::read(ArrayCRef<Record> records,
+                        ArrayRef<Text> values) const {
+  if (records.size() != values.size()) {
+    throw "Data size conflict";  // TODO
+  }
+  for (size_t i = 0; i < records.size(); ++i) {
+    values.set(i, get(records[i].row_id));
   }
-  headers_[row_id] = 0;
 }
 
-Int Column<Text>::find_one(const Datum &datum) const {
-  // TODO: Cursor should not be used because it takes time.
-  // Also, cursor operations can fail due to memory allocation.
-  Text value = datum.force_text();
-  if (indexes_.size() != 0) {
-    auto cursor = indexes_[0]->find(nullptr, value);
-    Array<Record> records;
-    auto result = cursor->read(nullptr, 1, &records);
-    if (!result.is_ok || (result.count == 0)) {
-      return NULL_ROW_ID;
+Text Column<Text>::parse_datum(const Datum &datum) {
+  switch (datum.type()) {
+    case NA_DATA: {
+      return Text::na();
     }
-    return true;
-  } else {
-    // TODO: A full scan takes time.
-    // An index should be required for a key column.
-
-    // TODO: Functor-based inline callback may be better in this case,
-    // because it does not require memory allocation.
-
-    // Scan the column to find "value".
-    auto cursor = table_->create_cursor(nullptr);
-    if (!cursor) {
-      return NULL_ROW_ID;
+    case TEXT_DATA: {
+      return datum.as_text();
     }
-    Array<Record> records;
-    for ( ; ; ) {
-      auto result = cursor->read(nullptr, 1024, &records);
-      if (!result.is_ok || result.count == 0) {
-        return NULL_ROW_ID;
-      }
-      for (Int i = 0; i < result.count; ++i) {
-        if (get(records.get_row_id(i)) == value) {
-          return records.get_row_id(i);
-        }
-      }
-      records.clear();
+    default: {
+      throw "Wrong data type";  // TODO
     }
   }
-  return NULL_ROW_ID;
 }
 
-Column<Text>::Column() : ColumnBase(), headers_(), bodies_() {}
-
 }  // namespace impl
 }  // namespace grnxx

  Modified: lib/grnxx/impl/column/scalar/text.hpp (+53 -36)
===================================================================
--- lib/grnxx/impl/column/scalar/text.hpp    2014-11-10 15:36:00 +0900 (fb60e55)
+++ lib/grnxx/impl/column/scalar/text.hpp    2014-11-10 17:05:05 +0900 (52559ff)
@@ -1,71 +1,88 @@
 #ifndef GRNXX_IMPL_COLUMN_SCALAR_TEXT_HPP
 #define GRNXX_IMPL_COLUMN_SCALAR_TEXT_HPP
 
-#include "grnxx/impl/column/column.hpp"
+#include <limits>
+#include <cstdint>
+
+#include "grnxx/impl/column/base.hpp"
 
 namespace grnxx {
 namespace impl {
 
+template <typename T> class Column;
+
 template <>
-class Column<Text> : public impl::ColumnBase {
+class Column<Text> : public ColumnBase {
  public:
-  // -- Public API --
+  // -- Public API (grnxx/column.hpp) --
 
-  bool set(Error *error, Int row_id, const Datum &datum);
-  bool get(Error *error, Int row_id, Datum *datum) const;
+  Column(Table *table, const String &name, const ColumnOptions &options);
+  ~Column();
 
-  // -- Internal API --
+  void set(Int row_id, const Datum &datum);
+  void get(Int row_id, Datum *datum) const;
 
-  // Create a new column.
-  //
-  // Returns a pointer to the column on success.
-  // On failure, returns nullptr and stores error information into "*error" if
-  // "error" != nullptr.
-  static unique_ptr<Column> create(Error *error,
-                                   Table *table,
-                                   const StringCRef &name,
-                                   const ColumnOptions &options);
+  bool contains(const Datum &datum) const;
+  Int find_one(const Datum &datum) const;
 
-  ~Column();
+  // -- Internal API (grnxx/impl/column/base.hpp) --
 
-  bool set_key_attribute(Error *error);
-  bool unset_key_attribute(Error *error);
+  void set_key_attribute();
+  void unset_key_attribute();
 
-  bool set_initial_key(Error *error, Int row_id, const Datum &key);
-  bool set_default_value(Error *error, Int row_id);
+  void set_key(Int row_id, const Datum &key);
   void unset(Int row_id);
-  Int find_one(const Datum &datum) const;
 
-  // Return a value identified by "row_id".
+  // -- Internal API --
+
+  // Return a value.
   //
-  // Assumes that "row_id" is valid. Otherwise, the result is undefined.
+  // If "row_id" is valid, returns the stored value.
+  // If "row_id" is invalid, returns N/A.
+  //
+  // TODO: Text cannot reuse allocated memory because of this interface.
   Text get(Int row_id) const {
-    Int size = static_cast<Int>(headers_[row_id] & 0xFFFF);
+    size_t value_id = row_id.value();
+    if (value_id >= headers_.size()) {
+      return Text::na();
+    }
+    if (headers_[value_id] == na_header()) {
+      return Text::na();
+    }
+    size_t size = headers_[value_id] & 0xFFFF;
     if (size == 0) {
-      return Text("", 0);
+      return Text(nullptr, 0);
     }
-    Int offset = static_cast<Int>(headers_[row_id] >> 16);
+    size_t offset = headers_[value_id] >> 16;
     if (size < 0xFFFF) {
       return Text(&bodies_[offset], size);
     } else {
       // The size of a long text is stored in front of the body.
-      size = *reinterpret_cast<const Int *>(&bodies_[offset]);
-      return StringCRef(&bodies_[offset + sizeof(Int)], size);
+      size = *reinterpret_cast<const uint64_t *>(&bodies_[offset]);
+      return Text(&bodies_[offset + sizeof(uint64_t)], size);
     }
   }
-
+//  Text get(Int row_id) const {
+//    size_t value_id = row_id.value();
+//    if (value_id >= values_.size()) {
+//      return Text::na();
+//    }
+//    return values_[value_id];
+//  }
   // Read values.
-  void read(ArrayCRef<Record> records, ArrayRef<Text> values) const {
-    for (Int i = 0; i < records.size(); ++i) {
-      values.set(i, get(records.get_row_id(i)));
-    }
-  }
+  //
+  // On failure, throws an exception.
+  void read(ArrayCRef<Record> records, ArrayRef<Text> values) const;
 
  private:
-  Array<UInt> headers_;
+  Array<uint64_t> headers_;
   Array<char> bodies_;
 
-  Column();
+  static constexpr uint64_t na_header() {
+    return std::numeric_limits<uint64_t>::max();
+  }
+
+  static Text parse_datum(const Datum &datum);
 };
 
 }  // namespace impl
-------------- next part --------------
HTML����������������������������...
Descargar 


Groonga - fulltext search engine.

[Groonga-commit] groonga/grnxx at 2864000 [master] Add a draft of Column<Text>.