From ca63ce553c95dbc79412e036c90b2cb1b709a6b1 Mon Sep 17 00:00:00 2001 From: Stefan Schimanski Date: Mon, 25 Feb 2008 01:56:03 +0000 Subject: [PATCH] * Timo Bingmann's btree v8.1, extended with weight counting. See http://idlebox.net/2007/stx-btree/ git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@23211 a592a061-630c-0410-9148-cb99ea01b6c8 --- src/support/weighted_btree.h | 3283 ++++++++++++++++++++++++++++++++++ 1 file changed, 3283 insertions(+) create mode 100644 src/support/weighted_btree.h diff --git a/src/support/weighted_btree.h b/src/support/weighted_btree.h new file mode 100644 index 0000000000..4418a83136 --- /dev/null +++ b/src/support/weighted_btree.h @@ -0,0 +1,3283 @@ +// $Id: btree.h 72 2008-01-25 12:47:26Z tb $ +/** \file btree.h + * Contains the main B+ tree implementation template class btree. + */ + +/* + * STX B+ Tree Template Classes v0.8.1 + * Copyright (C) 2008 Timo Bingmann + * 2008 Stefan Schimanski (weighted variant) + * + * This library is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published by the + * Free Software Foundation; either version 2.1 of the License, or (at your + * option) any later version. + * + * This library is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License + * for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _STX_RA_BTREE_H_ +#define _STX_RA_BTREE_H_ + +// *** Required Headers from the STL + +#include +#include +#include +#include +#include + +// *** Debugging Macros + +#ifdef BTREE_DEBUG + +#include + +/// Print out debug information to std::cout if BTREE_DEBUG is defined. +#define BTREE_PRINT(x) do { if (debug) (std::cout << x); } while(0) + +/// Assertion only if BTREE_DEBUG is defined. This is not used in verify(). +#define BTREE_ASSERT(x) do { assert(x); } while(0) + +#else + +/// Print out debug information to std::cout if BTREE_DEBUG is defined. +#define BTREE_PRINT(x) do { } while(0) + +/// Assertion only if BTREE_DEBUG is defined. This is not used in verify(). +#define BTREE_ASSERT(x) do { } while(0) + +#endif + +/// The maximum of a and b. Used in some compile-time formulas. +#define BTREE_MAX(a,b) ((a) < (b) ? (b) : (a)) + +#ifndef BTREE_FRIENDS +/// The macro BTREE_FRIENDS can be used by outside class to access the B+ +/// tree internals. This was added for wxBTreeDemo to be able to draw the +/// tree. +#define BTREE_FRIENDS friend class btree_friend; +#endif + +/// STX - Some Template Extensions namespace +namespace stx { + +/** Generates default traits for a B+ tree used as a set. It estimates leaf and + * inner node sizes by assuming a cache line size of 256 bytes. */ +template +struct btree_default_set_traits +{ + /// If true, the tree will self verify it's invariants after each insert() + /// or erase(). The header must have been compiled with BTREE_DEBUG defined. + static const bool selfverify = false; + + /// If true, the tree will print out debug information and a tree dump + /// during insert() or erase() operation. The header must have been + /// compiled with BTREE_DEBUG defined and key_type must be std::ostream + /// printable. + static const bool debug = false; + + /// Number of slots in each leaf of the tree. Estimated so that each node + /// has a size of about 256 bytes. + static const int leafslots = BTREE_MAX( 8, 256 / (sizeof(_Key)) ); + + /// Number of slots in each inner node of the tree. Estimated so that each node + /// has a size of about 256 bytes. + static const int innerslots = BTREE_MAX( 8, 256 / (sizeof(_Key) + sizeof(void*)) ); +}; + +/** Generates default traits for a B+ tree used as a map. It estimates leaf and + * inner node sizes by assuming a cache line size of 256 bytes. */ +template +struct btree_default_map_traits +{ + /// If true, the tree will self verify it's invariants after each insert() + /// or erase(). The header must have been compiled with BTREE_DEBUG defined. + static const bool selfverify = false; + + /// If true, the tree will print out debug information and a tree dump + /// during insert() or erase() operation. The header must have been + /// compiled with BTREE_DEBUG defined and key_type must be std::ostream + /// printable. + static const bool debug = false; + + /// Number of slots in each leaf of the tree. Estimated so that each node + /// has a size of about 256 bytes. + static const int leafslots = BTREE_MAX( 8, 256 / (sizeof(_Key) + sizeof(_Data)) ); + + /// Number of slots in each inner node of the tree. Estimated so that each node + /// has a size of about 256 bytes. + static const int innerslots = BTREE_MAX( 8, 256 / (sizeof(_Key) + sizeof(void*)) ); +}; + +struct Void {}; + +/** @brief Basic class implementing a base B+ tree data structure in memory. + * + * The base implementation of a memory B+ tree. It is based on the + * implementation in Cormen's Introduction into Algorithms, Jan Jannink's paper + * and other algorithm resources. Almost all STL-required function calls are + * implemented. The asymptotic time requirements of the STL are not always + * fulfilled in theory, however in practice this B+ tree performs better than a + * red-black tree by using more memory. The insertion function splits the nodes + * on the recursion unroll. Erase is largely based on Jannink's ideas. + * + * This class is specialized into btree_set, btree_multiset, btree_map and + * btree_multimap using default template parameters and facade functions. + */ +template , + typename _Compare = std::less<_Key>, + typename _Traits = btree_default_map_traits<_Key, _Data>, + bool _Duplicates = false> +class weighted_btree +{ +public: + // *** Template Parameter Types + + /// First template parameter: The key type of the B+ tree. This is stored + /// in inner nodes and leaves + typedef _Key key_type; + + /// + typedef _Weight weight_type; + + /// Second template parameter: The data type associated with each + /// key. Stored in the B+ tree's leaves + typedef _Data data_type; + + /// Third template parameter: Composition pair of key and data types, this + /// is required by the STL standard. The B+ tree does not store key and + /// data together. If value_type == key_type then the B+ tree implements a + /// set. + typedef _Value value_type; + + /// Fourth template parameter: Key comparison function object + typedef _Compare key_compare; + + /// Fifth template parameter: Traits object used to define more parameters + /// of the B+ tree + typedef _Traits traits; + + /// Sixth template parameter: Allow duplicate keys in the B+ tree. Used to + /// implement multiset and multimap. + static const bool allow_duplicates = _Duplicates; + + // The macro BTREE_FRIENDS can be used by outside class to access the B+ + // tree internals. This was added for wxBTreeDemo to be able to draw the + // tree. + BTREE_FRIENDS + +public: + // *** Constructed Types + + /// Typedef of our own type + typedef weighted_btree btree_self; + + /// Size type used to count keys + typedef size_t size_type; + + /// The pair of key_type and data_type, this may be different from value_type. + typedef std::pair pair_type; + +public: + // *** Static Constant Options and Values of the B+ Tree + + /// Base B+ tree parameter: The number of key/data slots in each leaf + static const unsigned short leafslotmax = traits::leafslots; + + /// Base B+ tree parameter: The number of key slots in each inner node, + /// this can differ from slots in each leaf. + static const unsigned short innerslotmax = traits::innerslots; + + /// Computed B+ tree parameter: The minimum number of key/data slots used + /// in a leaf. If fewer slots are used, the leaf will be merged or slots + /// shifted from it's siblings. + static const unsigned short minleafslots = (leafslotmax / 2); + + /// Computed B+ tree parameter: The minimum number of key slots used + /// in an inner node. If fewer slots are used, the inner node will be + /// merged or slots shifted from it's siblings. + static const unsigned short mininnerslots = (innerslotmax / 2); + + /// Debug parameter: Enables expensive and thorough checking of the B+ tree + /// invariants after each insert/erase operation. + static const bool selfverify = traits::selfverify; + + /// Debug parameter: Prints out lots of debug information about how the + /// algorithms change the tree. Requires the header file to be compiled + /// with BTREE_DEBUG and the key type must be std::ostream printable. + static const bool debug = traits::debug; + +private: + // *** Node Classes for In-Memory Nodes + + /// The header structure of each node in-memory. This structure is extended + /// by inner_node or leaf_node. + struct node + { + /// Level in the b-tree, if level == 0 -> leaf node + unsigned short level; + + /// Number of key slotuse use, so number of valid children or data + /// pointers + unsigned short slotuse; + + /// + weight_type weight; + + /// Delayed initialisation of constructed node + inline void initialize(const unsigned short l) + { + level = l; + slotuse = 0; + weight = 0; + } + + /// True if this is a leaf node + inline bool isleafnode() const + { + return (level == 0); + } + }; + + /// Extended structure of a inner node in-memory. Contains only keys and no + /// data items. + struct inner_node : public node + { + /// Keys of children or data pointers + key_type slotkey[innerslotmax]; + + /// Pointers to children + node* childid[innerslotmax+1]; + + /// Set variables to initial values + inline void initialize(const unsigned short l) + { + node::initialize(l); + } + + /// True if the node's slots are full + inline bool isfull() const + { + return (node::slotuse == innerslotmax); + } + + /// True if few used entries, less than half full + inline bool isfew() const + { + return (node::slotuse <= mininnerslots); + } + + /// True if node has too few entries + inline bool isunderflow() const + { + return (node::slotuse < mininnerslots); + } + }; + + /// Extended structure of a leaf node in memory. Contains pairs of keys and + /// data items. Key and data slots are kept in separate arrays, because the + /// key array is traversed very often compared to accessing the data items. + struct leaf_node : public node + { + /// Double linked list pointers to traverse the leaves + leaf_node *prevleaf; + + /// Double linked list pointers to traverse the leaves + leaf_node *nextleaf; + + /// Keys of children or data pointers + key_type slotkey[leafslotmax]; + + /// Array of data + data_type slotdata[leafslotmax]; + + /// Array of weights + weight_type weights[leafslotmax]; + + /// Set variables to initial values + inline void initialize() + { + node::initialize(0); + prevleaf = nextleaf = NULL; + } + + /// True if the node's slots are full + inline bool isfull() const + { + return (node::slotuse == leafslotmax); + } + + /// True if few used entries, less than half full + inline bool isfew() const + { + return (node::slotuse <= minleafslots); + } + + /// True if node has too few entries + inline bool isunderflow() const + { + return (node::slotuse < minleafslots); + } + }; + +private: + // *** Template Magic to Convert a pair or key/data types to a value_type + + /// \internal For sets the second pair_type is an empty struct, so the + /// value_type should only be the first. + template + struct btree_pair_to_value + { + /// Convert a fake pair type to just the first component + inline value_type operator()(pair_type& p) const { + return p.first; + } + /// Convert a fake pair type to just the first component + inline value_type operator()(const pair_type& p) const { + return p.first; + } + }; + + /// \internal For maps value_type is the same as the pair_type + template + struct btree_pair_to_value + { + /// Identity "convert" a real pair type to just the first component + inline value_type operator()(pair_type& p) const { + return p; + } + /// Identity "convert" a real pair type to just the first component + inline value_type operator()(const pair_type& p) const { + return p; + } + }; + + /// Using template specialization select the correct converter used by the + /// iterators + typedef btree_pair_to_value pair_to_value_type; + +public: + // *** Iterators and Reverse Iterators + + class iterator; + class const_iterator; + + /// STL-like iterator object for B+ tree items. The iterator points to a + /// specific slot number in a leaf. + class iterator + { + public: + // *** Types + + /// The key type of the btree. Returned by key(). + typedef typename weighted_btree::key_type key_type; + + /// + typedef typename weighted_btree::weight_type weight_type; + + /// The data type of the btree. Returned by data(). + typedef typename weighted_btree::data_type data_type; + + /// The value type of the btree. Returned by operator*(). + typedef typename weighted_btree::value_type value_type; + + /// The pair type of the btree. + typedef typename weighted_btree::pair_type pair_type; + + /// Reference to the value_type. Required by the reverse_iterator. + typedef value_type& reference; + + /// Pointer to the value_type. Required by the reverse_iterator. + typedef value_type* pointer; + + /// STL-magic iterator category + typedef std::bidirectional_iterator_tag iterator_category; + + /// STL-magic + typedef ptrdiff_t difference_type; + + /// Our own type + typedef iterator self; + + private: + // *** Members + + /// The currently referenced leaf node of the tree + typename weighted_btree::leaf_node* currnode; + + /// Current key/data slot referenced + unsigned short currslot; + + /// Friendly to the const_iterator, so it may access the two data items directly + friend class weighted_btree::const_iterator; + + /// Evil! A temporary value_type to STL-correctly deliver operator* and + /// operator-> + mutable value_type temp_value; + + // The macro BTREE_FRIENDS can be used by outside class to access the B+ + // tree internals. This was added for wxBTreeDemo to be able to draw the + // tree. + BTREE_FRIENDS + + public: + // *** Methods + + /// Constructor of a mutable iterator + inline iterator(typename weighted_btree::leaf_node *l, unsigned short s) + : currnode(l), currslot(s) + { } + + /// Dereference the iterator, this is not a value_type& because key and + /// value are not stored together + inline reference operator*() const + { + temp_value = pair_to_value_type()( pair_type(currnode->slotkey[currslot], + currnode->slotdata[currslot]) ); + return temp_value; + } + + /// Dereference the iterator. Do not use this if possible, use key() + /// and data() instead. The B+ tree does not stored key and data + /// together. + inline pointer operator->() const + { + temp_value = pair_to_value_type()( pair_type(currnode->slotkey[currslot], + currnode->slotdata[currslot]) ); + return &temp_value; + } + + /// Key of the current slot + inline const key_type& key() const + { + return currnode->slotkey[currslot]; + } + + /// Weight of the current slot + inline weight_type weight() const + { + return currnode->weights[currslot]; + } + + /// Writable reference to the current data object + inline data_type& data() const + { + return currnode->slotdata[currslot]; + } + + /// Prefix++ advance the iterator to the next slot + inline self& operator++() + { + if (currslot + 1 < currnode->slotuse) { + ++currslot; + } + else if (currnode->nextleaf != NULL) { + currnode = currnode->nextleaf; + currslot = 0; + } + else { + // this is end() + currslot = currnode->slotuse; + } + + return *this; + } + + /// Postfix++ advance the iterator to the next slot + inline self operator++(int) + { + self tmp = *this; // copy ourselves + + if (currslot + 1 < currnode->slotuse) { + ++currslot; + } + else if (currnode->nextleaf != NULL) { + currnode = currnode->nextleaf; + currslot = 0; + } + else { + // this is end() + currslot = currnode->slotuse; + } + + return tmp; + } + + /// Prefix-- backstep the iterator to the last slot + inline self& operator--() + { + if (currslot > 0) { + --currslot; + } + else if (currnode->prevleaf != NULL) { + currnode = currnode->prevleaf; + currslot = currnode->slotuse - 1; + } + else { + // this is begin() + currslot = 0; + } + + return *this; + } + + /// Postfix-- backstep the iterator to the last slot + inline self operator--(int) + { + self tmp = *this; // copy ourselves + + if (currslot > 0) { + --currslot; + } + else if (currnode->prevleaf != NULL) { + currnode = currnode->prevleaf; + currslot = currnode->slotuse - 1; + } + else { + // this is begin() + currslot = 0; + } + + return tmp; + } + + /// Equality of iterators + inline bool operator==(const self& x) const + { + return (x.currnode == currnode) && (x.currslot == currslot); + } + + /// Inequality of iterators + inline bool operator!=(const self& x) const + { + return (x.currnode != currnode) || (x.currslot != currslot); + } + }; + + /// STL-like read-only iterator object for B+ tree items. The iterator + /// points to a specific slot number in a leaf. + class const_iterator + { + public: + // *** Types + + /// The key type of the btree. Returned by key(). + typedef typename weighted_btree::key_type key_type; + + /// The data type of the btree. Returned by data(). + typedef typename weighted_btree::data_type data_type; + + /// The value type of the btree. Returned by operator*(). + typedef typename weighted_btree::value_type value_type; + + /// The pair type of the btree. + typedef typename weighted_btree::pair_type pair_type; + + /// Reference to the value_type. Required by the reverse_iterator. + typedef const value_type& reference; + + /// Pointer to the value_type. Required by the reverse_iterator. + typedef const value_type* pointer; + + /// STL-magic iterator category + typedef std::bidirectional_iterator_tag iterator_category; + + /// STL-magic + typedef ptrdiff_t difference_type; + + /// Our own type + typedef const_iterator self; + + private: + // *** Members + + /// The currently referenced leaf node of the tree + const typename weighted_btree::leaf_node* currnode; + + /// Current key/data slot referenced + unsigned short currslot; + + /// Evil! A temporary value_type to STL-correctly deliver operator* and + /// operator-> + mutable value_type temp_value; + + // The macro BTREE_FRIENDS can be used by outside class to access the B+ + // tree internals. This was added for wxBTreeDemo to be able to draw the + // tree. + BTREE_FRIENDS + + public: + // *** Methods + + /// Constructor of a const iterator + inline const_iterator(const typename weighted_btree::leaf_node *l, unsigned short s) + : currnode(l), currslot(s) + { } + + /// Copy-constructor from a mutable const iterator + inline const_iterator(const iterator &it) + : currnode(it.currnode), currslot(it.currslot) + { } + + /// Dereference the iterator. Do not use this if possible, use key() + /// and data() instead. The B+ tree does not stored key and data + /// together. + inline reference operator*() const + { + temp_value = pair_to_value_type()( pair_type(currnode->slotkey[currslot], + currnode->slotdata[currslot]) ); + return temp_value; + } + + /// Dereference the iterator. Do not use this if possible, use key() + /// and data() instead. The B+ tree does not stored key and data + /// together. + inline pointer operator->() const + { + temp_value = pair_to_value_type()( pair_type(currnode->slotkey[currslot], + currnode->slotdata[currslot]) ); + return &temp_value; + } + + /// Key of the current slot + inline const key_type& key() const + { + return currnode->slotkey[currslot]; + } + + /// Read-only reference to the current data object + inline const data_type& data() const + { + return currnode->slotdata[currslot]; + } + + /// Prefix++ advance the iterator to the next slot + inline self& operator++() + { + if (currslot + 1 < currnode->slotuse) { + ++currslot; + } + else if (currnode->nextleaf != NULL) { + currnode = currnode->nextleaf; + currslot = 0; + } + else { + // this is end() + currslot = currnode->slotuse; + } + + return *this; + } + + /// Postfix++ advance the iterator to the next slot + inline self operator++(int) + { + self tmp = *this; // copy ourselves + + if (currslot + 1 < currnode->slotuse) { + ++currslot; + } + else if (currnode->nextleaf != NULL) { + currnode = currnode->nextleaf; + currslot = 0; + } + else { + // this is end() + currslot = currnode->slotuse; + } + + return tmp; + } + + /// Prefix-- backstep the iterator to the last slot + inline self& operator--() + { + if (currslot > 0) { + --currslot; + } + else if (currnode->prevleaf != NULL) { + currnode = currnode->prevleaf; + currslot = currnode->slotuse - 1; + } + else { + // this is begin() + currslot = 0; + } + + return *this; + } + + /// Postfix-- backstep the iterator to the last slot + inline self operator--(int) + { + self tmp = *this; // copy ourselves + + if (currslot > 0) { + --currslot; + } + else if (currnode->prevleaf != NULL) { + currnode = currnode->prevleaf; + currslot = currnode->slotuse - 1; + } + else { + // this is begin() + currslot = 0; + } + + return tmp; + } + + /// Equality of iterators + inline bool operator==(const self& x) const + { + return (x.currnode == currnode) && (x.currslot == currslot); + } + + /// Inequality of iterators + inline bool operator!=(const self& x) const + { + return (x.currnode != currnode) || (x.currslot != currslot); + } + }; + + /// create mutable reverse iterator by using STL magic + typedef std::reverse_iterator reverse_iterator; + + /// create constant reverse iterator by using STL magic + typedef std::reverse_iterator const_reverse_iterator; + +public: + // *** Small Statistics Structure + + /** A small struct containing basic statistics about the B+ tree. It can be + * fetched using get_stats(). */ + struct tree_stats + { + /// Number of items in the B+ tree + size_type itemcount; + + /// Number of leaves in the B+ tree + size_type leaves; + + /// Number of inner nodes in the B+ tree + size_type innernodes; + + /// Base B+ tree parameter: The number of key/data slots in each leaf + static const unsigned short leafslots = btree_self::leafslotmax; + + /// Base B+ tree parameter: The number of key slots in each inner node. + static const unsigned short innerslots = btree_self::innerslotmax; + + /// Zero initialized + inline tree_stats() + : itemcount(0), + leaves(0), innernodes(0) + { + } + + /// Return the total number of nodes + inline size_type nodes() const + { + return innernodes + leaves; + } + + /// Return the average fill of leaves + inline double avgfill_leaves() const + { + return static_cast(itemcount) / (leaves * leafslots); + } + }; + +private: + // *** Tree Object Data Members + + /// Pointer to the B+ tree's root node, either leaf or inner node + node* root; + + /// Pointer to first leaf in the double linked leaf chain + leaf_node *headleaf; + + /// Pointer to last leaf in the double linked leaf chain + leaf_node *tailleaf; + + /// Other small statistics about the B+ tree + tree_stats stats; + + /// Key comparison object. More comparison functions are generated from + /// this < relation. + key_compare key_less; + +public: + // *** Constructors and Destructor + + /// Default constructor initializing an empty B+ tree with the standard key + /// comparison function + inline weighted_btree() + : root(NULL), headleaf(NULL), tailleaf(NULL) + { + } + + /// Constructor initializing an empty B+ tree with a special key + /// comparison object + inline weighted_btree(const key_compare &kcf) + : root(NULL), headleaf(NULL), tailleaf(NULL), + key_less(kcf) + { + } + + /// Constructor initializing a B+ tree with the range [first,last) + template + inline weighted_btree(InputIterator first, InputIterator last) + : root(NULL), headleaf(NULL), tailleaf(NULL) + { + insert(first, last); + } + + /// Constructor initializing a B+ tree with the range [first,last) and a + /// special key comparison object + template + inline weighted_btree(InputIterator first, InputIterator last, const key_compare &kcf) + : root(NULL), headleaf(NULL), tailleaf(NULL), + key_less(kcf) + { + insert(first, last); + } + + /// Frees up all used B+ tree memory pages + inline ~weighted_btree() + { + clear(); + } + + /// Fast swapping of two identical B+ tree objects. + void swap(btree_self& from) + { + std::swap(root, from.root); + std::swap(headleaf, from.headleaf); + std::swap(tailleaf, from.tailleaf); + std::swap(stats, from.stats); + std::swap(key_less, from.key_less); + } + +public: + // *** Key and Value Comparison Function Objects + + /// Function class to compare value_type objects. Required by the STL + class value_compare + { + protected: + /// Key comparison function from the template parameter + key_compare key_comp; + + /// Constructor called from btree::value_comp() + inline value_compare(key_compare kc) + : key_comp(kc) + { } + + /// Friendly to the btree class so it may call the constructor + friend class weighted_btree; + + public: + /// Function call "less"-operator resulting in true if x < y. + inline bool operator()(const value_type& x, const value_type& y) const + { + return key_comp(x.first, y.first); + } + }; + + /// Constant access to the key comparison object sorting the B+ tree + inline key_compare key_comp() const + { + return key_less; + } + + /// Constant access to a constructed value_type comparison object. Required + /// by the STL + inline value_compare value_comp() const + { + return value_compare(key_less); + } + +private: + // *** Convenient Key Comparison Functions Generated From key_less + + /// True if a <= b ? constructed from key_less() + inline bool key_lessequal(const key_type &a, const key_type b) const + { + return !key_less(b, a); + } + + /// True if a > b ? constructed from key_less() + inline bool key_greater(const key_type &a, const key_type &b) const + { + return key_less(b, a); + } + + /// True if a >= b ? constructed from key_less() + inline bool key_greaterequal(const key_type &a, const key_type b) const + { + return !key_less(a, b); + } + + /// True if a == b ? constructed from key_less(). This requires the < + /// relation to be a total order, otherwise the B+ tree cannot be sorted. + inline bool key_equal(const key_type &a, const key_type &b) const + { + return !key_less(a, b) && !key_less(b, a); + } + +private: + // *** Node Object Allocation and Deallocation Functions + + /// Allocate and initialize a leaf node + inline leaf_node* allocate_leaf() + { + leaf_node* n = new leaf_node; + n->initialize(); + stats.leaves++; + return n; + } + + /// Allocate and initialize an inner node + inline inner_node* allocate_inner(unsigned short l) + { + inner_node* n = new inner_node; + n->initialize(l); + stats.innernodes++; + return n; + } + + /// Correctly free either inner or leaf node, destructs all contained key + /// and value objects + inline void free_node(node *n) + { + if (n->isleafnode()) { + delete static_cast(n); + stats.leaves--; + } + else { + delete static_cast(n); + stats.innernodes--; + } + } + +public: + // *** Fast Destruction of the B+ Tree + + /// Frees all key/data pairs and all nodes of the tree + void clear() + { + if (root) + { + clear_recursive(root); + free_node(root); + + root = NULL; + headleaf = tailleaf = NULL; + + stats = tree_stats(); + } + + BTREE_ASSERT(stats.itemcount == 0); + } + +private: + /// Recursively free up nodes + void clear_recursive(node *n) + { + if (n->isleafnode()) + { + leaf_node *leafnode = static_cast(n); + + for (unsigned int slot = 0; slot < leafnode->slotuse; ++slot) + { + // data objects are deleted by leaf_node's destructor + } + } + else + { + inner_node *innernode = static_cast(n); + + for (unsigned short slot = 0; slot < innernode->slotuse + 1; ++slot) + { + clear_recursive(innernode->childid[slot]); + free_node(innernode->childid[slot]); + } + } + } + +public: + // *** STL Iterator Construction Functions + + /// Constructs a read/data-write iterator that points to the first slot in + /// the first leaf of the B+ tree. + inline iterator begin() + { + return iterator(headleaf, 0); + } + + /// Constructs a read/data-write iterator that points to the first invalid + /// slot in the last leaf of the B+ tree. + inline iterator end() + { + return iterator(tailleaf, tailleaf ? tailleaf->slotuse : 0); + } + + /// Constructs a read-only constant iterator that points to the first slot + /// in the first leaf of the B+ tree. + inline const_iterator begin() const + { + return const_iterator(headleaf, 0); + } + + /// Constructs a read-only constant iterator that points to the first + /// invalid slot in the last leaf of the B+ tree. + inline const_iterator end() const + { + return const_iterator(tailleaf, tailleaf ? tailleaf->slotuse : 0); + } + + /// Constructs a read/data-write reverse iterator that points to the first + /// invalid slot in the last leaf of the B+ tree. Uses STL magic. + inline reverse_iterator rbegin() + { + return reverse_iterator(end()); + } + + /// Constructs a read/data-write reverse iterator that points to the first + /// slot in the first leaf of the B+ tree. Uses STL magic. + inline reverse_iterator rend() + { + return reverse_iterator(begin()); + } + + /// Constructs a read-only reverse iterator that points to the first + /// invalid slot in the last leaf of the B+ tree. Uses STL magic. + inline const_reverse_iterator rbegin() const + { + return const_reverse_iterator(end()); + } + + /// Constructs a read-only reverse iterator that points to the first slot + /// in the first leaf of the B+ tree. Uses STL magic. + inline const_reverse_iterator rend() const + { + return const_reverse_iterator(begin()); + } + +private: + // *** B+ Tree Node Binary Search Functions + + /// Searches for the first key in the node n less or equal to key. Uses + /// binary search with an optional linear self-verification. This is a + /// template function, because the slotkey array is located at different + /// places in leaf_node and inner_node. + template + inline int find_lower(const node_type *n, const key_type& key) const + { + if (n->slotuse == 0) return 0; + + int lo = 0, + hi = n->slotuse - 1; + + while(lo < hi) + { + int mid = (lo + hi) / 2; + + if (key_lessequal(key, n->slotkey[mid])) { + hi = mid - 1; + } + else { + lo = mid + 1; + } + } + + if (hi < 0 || key_less(n->slotkey[hi], key)) + hi++; + + BTREE_PRINT("btree::find_lower: on " << n << " key " << key << " -> (" << lo << ") " << hi << ", "); + + // verify result using simple linear search + if (selfverify) + { + int i = n->slotuse - 1; + while(i >= 0 && key_lessequal(key, n->slotkey[i])) + i--; + i++; + + BTREE_PRINT("testfind: " << i << std::endl); + BTREE_ASSERT(i == hi); + } + else { + BTREE_PRINT(std::endl); + } + + return hi; + } + + /// Searches for the first summed weight in the node n less or equal to weight. + inline int find_summed_weight_lower(const inner_node *n, weight_type weight) const + { + if (n->slotuse == 0) return 0; + + int i = 0; + weight_type w = n->childid[0]->weight; + while (i < n->slotuse && w <= weight) { + i++; + w += n->childid[i]->weight; + } + + return i; + } + + /// Searches for the first summed weight in the node n less or equal to weight. + inline int find_summed_weight_lower(const leaf_node *n, weight_type weight) const + { + if (n->slotuse == 0) return 0; + + int i = 0; + weight_type w = n->weights[0]; + while (i < n->slotuse && w <= weight) { + i++; + w += n->weights[i]; + } + + return i; + } + + /// Searches for the first key in the node n greater than key. Uses binary + /// search with an optional linear self-verification. This is a template + /// function, because the slotkey array is located at different places in + /// leaf_node and inner_node. + template + inline int find_upper(const node_type *n, const key_type& key) const + { + if (n->slotuse == 0) return 0; + + int lo = 0, + hi = n->slotuse - 1; + + while(lo < hi) + { + int mid = (lo + hi) / 2; + + if (key_less(key, n->slotkey[mid])) { + hi = mid - 1; + } + else { + lo = mid + 1; + } + } + + if (hi < 0 || key_lessequal(n->slotkey[hi], key)) + hi++; + + BTREE_PRINT("btree::find_upper: on " << n << " key " << key << " -> (" << lo << ") " << hi << ", "); + + // verify result using simple linear search + if (selfverify) + { + int i = n->slotuse - 1; + while(i >= 0 && key_less(key, n->slotkey[i])) + i--; + i++; + + BTREE_PRINT("btree::find_upper testfind: " << i << std::endl); + BTREE_ASSERT(i == hi); + } + else { + BTREE_PRINT(std::endl); + } + + return hi; + } + + /// Searches for the first summed weight in the node n greater than weight. + inline int find_summed_weight_upper(const inner_node *n, weight_type weight) const + { + if (n->slotuse == 0) return 0; + + int i = 0; + weight_type w = n->childid[0]->weight; + while (i < n->slotuse && w < weight) { + i++; + w += n->childid[i]->weight; + } + + return i; + } + + /// Searches for the first summed weight in the node n greater than weight. + inline int find_summed_weight_upper(const leaf_node *n, weight_type weight) const + { + if (n->slotuse == 0) return 0; + + int i = 0; + weight_type w = n->weights[0]; + while (i < n->slotuse && w < weight) { + i++; + w += n->weights[i]; + } + + return i; + } + +public: + // *** Access Functions to the Item Count + + /// Return the number of key/data pairs in the B+ tree + inline size_type size() const + { + return stats.itemcount; + } + + /// + inline weight_type summed_weight() const + { + if (root) + return root->weight; + else + return 0; + } + + /// Returns true if there is at least one key/data pair in the B+ tree + inline bool empty() const + { + return (size() == size_type(0)); + } + + /// Returns the largest possible size of the B+ Tree. This is just a + /// function required by the STL standard, the B+ Tree can hold more items. + inline size_type max_size() const + { + return size_type(-1); + } + + /// Return a const reference to the current statistics. + inline const struct tree_stats& get_stats() const + { + return stats; + } + +public: + // *** Standard Access Functions Querying the Tree by Descending to a Leaf + + /// Non-STL function checking whether a key is in the B+ tree. The same as + /// (find(k) != end()) or (count() != 0). + bool exists(const key_type &key) const + { + const node *n = root; + + if (!n) return false; + + while(!n->isleafnode()) + { + const inner_node *inner = static_cast(n); + int slot = find_lower(inner, key); + + n = inner->childid[slot]; + } + + const leaf_node *leaf = static_cast(n); + + int slot = find_lower(leaf, key); + return (slot < leaf->slotuse && key_equal(key, leaf->slotkey[slot])); + } + + /// Tries to locate a key in the B+ tree and returns an iterator to the + /// key/data slot if found. If unsuccessful it returns end(). + iterator find(const key_type &key) + { + node *n = root; + if (!n) return end(); + + while(!n->isleafnode()) + { + const inner_node *inner = static_cast(n); + int slot = find_lower(inner, key); + + n = inner->childid[slot]; + } + + leaf_node *leaf = static_cast(n); + + int slot = find_lower(leaf, key); + return (slot < leaf->slotuse && key_equal(key, leaf->slotkey[slot])) + ? iterator(leaf, slot) : end(); + } + + /// Tries to locate a summed weight in the B+ tree and returns an iterator to the + /// key/data slot if found. If unsuccessful it returns end(). + iterator find_summed_weight(weight_type weight) + { + node *n = root; + if (!n) return end(); + + while(!n->isleafnode()) + { + const inner_node *inner = static_cast(n); + int slot = find_summed_weight_lower(inner, weight); + + for (unsigned short s = 0; s < slot; ++s) + weight -= inner->childid[s]->weight; + + n = inner->childid[slot]; + } + + leaf_node *leaf = static_cast(n); + + int slot = find_summed_weight_lower(leaf, weight); + for (unsigned short s = 0; s < slot; ++s) + weight -= leaf->weights[s]; + + return (slot < leaf->slotuse && weight == 0) + ? iterator(leaf, slot) : end(); + } + + /// + weight_type summed_weight(const key_type &key) + { + node *n = root; + if (!n) return 0; + + weight_type w = 0; + while(!n->isleafnode()) { + const inner_node *inner = static_cast(n); + int slot = find_lower(inner, key); + + for (unsigned short s = 0; s < slot; ++s) + w += inner->childid[slot]->weight; + + n = inner->childid[slot]; + } + + leaf_node *leaf = static_cast(n); + + int slot = find_lower(leaf, key); + + for (unsigned short s = 0; s < slot; ++s) + w += leaf->childid[slot]->weight; + + return (slot < leaf->slotuse && key_equal(key, leaf->slotkey[slot])) + ? iterator(leaf, slot) : end(); + } + + /// Tries to locate a key in the B+ tree and returns an constant iterator + /// to the key/data slot if found. If unsuccessful it returns end(). + const_iterator find(const key_type &key) const + { + const node *n = root; + if (!n) return end(); + + while(!n->isleafnode()) + { + const inner_node *inner = static_cast(n); + int slot = find_lower(inner, key); + + n = inner->childid[slot]; + } + + const leaf_node *leaf = static_cast(n); + + int slot = find_lower(leaf, key); + return (slot < leaf->slotuse && key_equal(key, leaf->slotkey[slot])) + ? const_iterator(leaf, slot) : end(); + } + + /// Tries to locate a summed weight in the B+ tree and returns an iterator to the + /// key/data slot if found. If unsuccessful it returns end(). + const_iterator find_summed_weight(weight_type weight) const + { + node *n = root; + if (!n) return end(); + + while(!n->isleafnode()) + { + const inner_node *inner = static_cast(n); + int slot = find_summed_weight_lower(inner, weight); + + for (unsigned short s = 0; s < slot; ++s) + weight -= inner->childid[s]->weight; + + n = inner->childid[slot]; + } + + leaf_node *leaf = static_cast(n); + + int slot = find_summed_weight_lower(leaf, weight); + for (unsigned short s = 0; s < slot; ++s) + weight -= leaf->childid[s]->weight; + + return (slot < leaf->slotuse && weight == 0) + ? const_iterator(leaf, slot) : end(); + } + + /// Tries to locate a key in the B+ tree and returns the number of + /// identical key entries found. + size_type count(const key_type &key) const + { + const node *n = root; + if (!n) return 0; + + while(!n->isleafnode()) + { + const inner_node *inner = static_cast(n); + int slot = find_lower(inner, key); + + n = inner->childid[slot]; + } + + const leaf_node *leaf = static_cast(n); + + int slot = find_lower(leaf, key); + size_type num = 0; + + while (leaf && slot < leaf->slotuse && key_equal(key, leaf->slotkey[slot])) + { + ++num; + if (++slot >= leaf->slotuse) + { + leaf = leaf->nextleaf; + slot = 0; + } + } + + return num; + } + + /// Searches the B+ tree and returns an iterator to the first key less or + /// equal to the parameter. If unsuccessful it returns end(). + iterator lower_bound(const key_type& key) + { + node *n = root; + if (!n) return end(); + + while(!n->isleafnode()) + { + const inner_node *inner = static_cast(n); + int slot = find_lower(inner, key); + + n = inner->childid[slot]; + } + + leaf_node *leaf = static_cast(n); + + int slot = find_lower(leaf, key); + return iterator(leaf, slot); + } + + /// Searches the B+ tree and returns an iterator to the first summed weight + /// less or equal to the parameter. If unsuccessful it returns end(). + iterator lower_summed_weight_bound(weight_type weight) + { + node *n = root; + if (!n) return end(); + + while(!n->isleafnode()) { + const inner_node *inner = static_cast(n); + int slot = find_summed_weight_lower(inner, weight); + + for (unsigned short s = 0; s < slot; ++s) + weight -= inner->childid[s]->weight; + + n = inner->childid[slot]; + } + + leaf_node *leaf = static_cast(n); + + int slot = find_summed_weight_lower(leaf, weight); + + for (unsigned short s = 0; s < slot; ++s) + weight -= leaf->weights[s]; + + return iterator(leaf, slot); + } + + /// Searches the B+ tree and returns an constant iterator to the first key less or + /// equal to the parameter. If unsuccessful it returns end(). + const_iterator lower_bound(const key_type& key) const + { + const node *n = root; + if (!n) return end(); + + while(!n->isleafnode()) + { + const inner_node *inner = static_cast(n); + int slot = find_lower(inner, key); + + n = inner->childid[slot]; + } + + const leaf_node *leaf = static_cast(n); + + int slot = find_lower(leaf, key); + return const_iterator(leaf, slot); + } + + /// Searches the B+ tree and returns an iterator to the first summed weight + /// less or equal to the parameter. If unsuccessful it returns end(). + const_iterator lower_summed_weight_bound(weight_type weight) const + { + node *n = root; + if (!n) return end(); + + while(!n->isleafnode()) + { + const inner_node *inner = static_cast(n); + int slot = find_summed_weight_lower(inner, weight); + + for (unsigned short s = 0; s < slot; ++s) + weight -= inner->childid[s]->weight; + + n = inner->childid[slot]; + } + + leaf_node *leaf = static_cast(n); + + int slot = find_summed_weight_lower(leaf, weight); + + for (unsigned short s = 0; s < slot; ++s) + weight -= leaf->weights[s]; + + return const_iterator(leaf, slot); + } + + /// Searches the B+ tree and returns an iterator to the first key greater + /// than the parameter. If unsuccessful it returns end(). + iterator upper_bound(const key_type& key) + { + node *n = root; + if (!n) return end(); + + while(!n->isleafnode()) + { + const inner_node *inner = static_cast(n); + int slot = find_upper(inner, key); + + n = inner->childid[slot]; + } + + leaf_node *leaf = static_cast(n); + + int slot = find_upper(leaf, key); + return iterator(leaf, slot); + } + + /// Searches the B+ tree and returns an constant iterator to the first key + /// greater than the parameter. If unsuccessful it returns end(). + const_iterator upper_bound(const key_type& key) const + { + const node *n = root; + if (!n) return end(); + + while(!n->isleafnode()) + { + const inner_node *inner = static_cast(n); + int slot = find_upper(inner, key); + + n = inner->childid[slot]; + } + + const leaf_node *leaf = static_cast(n); + + int slot = find_upper(leaf, key); + return const_iterator(leaf, slot); + } + + /// Searches the B+ tree and returns an iterator to the first summed weight + /// greater than the parameter. If unsuccessful it returns end(). + iterator upper_summed_weight_bound(weight_type weight) + { + node *n = root; + if (!n) return end(); + + while(!n->isleafnode()) + { + const inner_node *inner = static_cast(n); + int slot = find_summed_weight_upper(inner, weight); + + for (unsigned short s = 0; s < slot; ++s) + weight -= inner->childid[s]->weight; + + n = inner->childid[slot]; + } + + leaf_node *leaf = static_cast(n); + + int slot = find_summed_weight_upper(leaf, weight); + + for (unsigned short s = 0; s < slot; ++s) + weight -= leaf->weights[s]; + + return iterator(leaf, slot); + } + + /// Searches the B+ tree and returns an iterator to the first summed weight + /// greater than the parameter. If unsuccessful it returns end(). + const_iterator upper_summed_weight_bound(weight_type weight) const + { + node *n = root; + if (!n) return end(); + + while(!n->isleafnode()) { + const inner_node *inner = static_cast(n); + int slot = find_summed_weight_upper(inner, weight); + + for (unsigned short s = 0; s < slot; ++s) + weight -= inner->childid[s]->weight; + + n = inner->childid[slot]; + } + + leaf_node *leaf = static_cast(n); + + int slot = find_summed_weight_upper(leaf, weight); + + for (unsigned short s = 0; s < slot; ++s) + weight -= leaf->weights[s]; + + return const_iterator(leaf, slot); + } + + /// Searches the B+ tree and returns both lower_bound() and upper_bound(). + inline std::pair equal_range(const key_type& key) + { + return std::pair(lower_bound(key), upper_bound(key)); + } + + /// Searches the B+ tree and returns both lower_bound() and upper_bound(). + inline std::pair equal_range(const key_type& key) const + { + return std::pair(lower_bound(key), upper_bound(key)); + } + + /// Searches the B+ tree and returns both lower_summed_weight_bound() and upper_summed_weight_bound(). + inline std::pair equal_summed_weight_range(weight_type weight) + { + return std::pair(lower_summed_weight_bound(weight), upper_summed_weight_bound(weight)); + } + + /// Searches the B+ tree and returns both lower_summed_weight_bound() and upper_summed_weight_bound(). + inline std::pair equal_summed_weight_range(weight_type weight) const + { + return std::pair(lower_summed_weight_bound(weight), upper_summed_weight_bound(weight)); + } + +public: + // *** B+ Tree Object Comparison Functions + + /// Equality relation of B+ trees of the same type. B+ trees of the same + /// size and equal elements (both key and data) are considered + /// equal. Beware of the random ordering of duplicate keys. + inline bool operator==(const btree_self &other) const + { + return (size() == other.size()) && std::equal(begin(), end(), other.begin()); + } + + /// Inequality relation. Based on operator==. + inline bool operator!=(const btree_self &other) const + { + return !(*this == other); + } + + /// Total ordering relation of B+ trees of the same type. It uses + /// std::lexicographical_compare() for the actual comparison of elements. + inline bool operator<(const btree_self &other) const + { + return std::lexicographical_compare(begin(), end(), other.begin(), other.end()); + } + + /// Greater relation. Based on operator<. + inline bool operator>(const btree_self &other) const + { + return other < *this; + } + + /// Less-equal relation. Based on operator<. + inline bool operator<=(const btree_self &other) const + { + return !(other < *this); + } + + /// Greater-equal relation. Based on operator<. + inline bool operator>=(const btree_self &other) const + { + return !(*this < other); + } + +public: + /// *** Fast Copy: Assign Operator and Copy Constructors + + /// Assignment operator. All the key/data pairs are copied + inline btree_self& operator= (const btree_self &other) + { + if (this != &other) + { + clear(); + + key_less = other.key_comp(); + if (other.size() != 0) + { + stats.leaves = stats.innernodes = 0; + root = copy_recursive(other.root); + stats = other.stats; + } + + if (selfverify) verify(); + } + return *this; + } + + /// Copy constructor. The newly initialized B+ tree object will contain a + /// copy of all key/data pairs. + inline weighted_btree(const btree_self &other) + : root(NULL), headleaf(NULL), tailleaf(NULL), + stats( other.stats ), + key_less( other.key_comp() ) + { + if (size() > 0) + { + stats.leaves = stats.innernodes = 0; + root = copy_recursive(other.root); + if (selfverify) verify(); + } + } + +private: + /// Recursively copy nodes from another B+ tree object + struct node* copy_recursive(const node *n) + { + if (n->isleafnode()) + { + const leaf_node *leaf = static_cast(n); + leaf_node *newleaf = allocate_leaf(); + + newleaf->slotuse = leaf->slotuse; + std::copy(leaf->slotkey, leaf->slotkey + leaf->slotuse, newleaf->slotkey); + std::copy(leaf->slotdata, leaf->slotdata + leaf->slotuse, newleaf->slotdata); + + if (headleaf == NULL) + { + headleaf = tailleaf = newleaf; + newleaf->prevleaf = newleaf->nextleaf = NULL; + } + else + { + newleaf->prevleaf = tailleaf; + tailleaf->nextleaf = newleaf; + tailleaf = newleaf; + } + + return newleaf; + } + else + { + const inner_node *inner = static_cast(n); + inner_node *newinner = allocate_inner(inner->level); + + newinner->slotuse = inner->slotuse; + std::copy(inner->slotkey, inner->slotkey + inner->slotuse, newinner->slotkey); + + for (unsigned short slot = 0; slot <= inner->slotuse; ++slot) + { + newinner->childid[slot] = copy_recursive(inner->childid[slot]); + } + + return newinner; + } + } + +public: + // *** Public Insertion Functions + + /// Attempt to insert a key/data pair into the B+ tree. If the tree does not + /// allow duplicate keys, then the insert may fail if it is already + /// present. + inline std::pair insert(const pair_type& x, weight_type weight) + { + return insert_start(x.first, weight, x.second); + } + + /// Attempt to insert a key/data pair into the B+ tree. Beware that if + /// key_type == data_type, then the template iterator insert() is called + /// instead. If the tree does not allow duplicate keys, then the insert may + /// fail if it is already present. + inline std::pair insert(const key_type& key, weight_type weight, const data_type& data) + { + return insert_start(key, weight, data); + } + + /// Attempt to insert a key/data pair into the B+ tree. This function is the + /// same as the other insert, however if key_type == data_type then the + /// non-template function cannot be called. If the tree does not allow + /// duplicate keys, then the insert may fail if it is already present. + inline std::pair insert2(const key_type& key, weight_type weight, const data_type& data) + { + return insert_start(key, weight, data); + } + + /// Attempt to insert a key/data pair into the B+ tree. The iterator hint + /// is currently ignored by the B+ tree insertion routine. + inline iterator insert(iterator /* hint */, const pair_type &x, weight_type weight) + { + return insert_start(x.first, weight, x.second).first; + } + + /// Attempt to insert a key/data pair into the B+ tree. The iterator hint is + /// currently ignored by the B+ tree insertion routine. + inline iterator insert2(iterator /* hint */, const key_type& key, weight_type weight, const data_type& data) + { + return insert_start(key, weight, data).first; + } + + /// Attempt to insert the range [first,last) of value_type pairs into the B+ + /// tree. Each key/data pair is inserted individually. + template + inline void insert(InputIterator first, InputIterator last) + { + InputIterator iter = first; + while(iter != last) + { + insert(*iter, iter->weight()); + ++iter; + } + } + +private: + // *** Private Insertion Functions + + /// Start the insertion descent at the current root and handle root + /// splits. Returns true if the item was inserted + std::pair insert_start(const key_type& key, weight_type weight, const data_type& value) + { + node *newchild = NULL; + key_type newkey = key_type(); + + if (!root) + { + root = headleaf = tailleaf = allocate_leaf(); + } + + std::pair r = insert_descend(root, key, weight, value, &newkey, &newchild); + + if (newchild) + { + inner_node *newroot = allocate_inner(root->level + 1); + newroot->slotkey[0] = newkey; + + newroot->childid[0] = root; + newroot->childid[1] = newchild; + + newroot->weight = root->weight + newchild->weight; + newroot->slotuse = 1; + + root = newroot; + } + + // increment itemcount if the item was inserted + if (r.second) ++stats.itemcount; + +#ifdef BTREE_DEBUG + if (debug) print(std::cout); +#endif + + if (selfverify) { + verify(); + BTREE_ASSERT(exists(key)); + } + + return r; + } + + /** + * @brief Insert an item into the B+ tree. + * + * Descend down the nodes to a leaf, insert the key/data pair in a free + * slot. If the node overflows, then it must be split and the new split + * node inserted into the parent. Unroll / this splitting up to the root. + */ + std::pair insert_descend(node* n, + const key_type& key, + weight_type weight, + const data_type& value, + key_type* splitkey, node** splitnode) + { + if (!n->isleafnode()) + { + inner_node *inner = static_cast(n); + + key_type newkey = key_type(); + node *newchild = NULL; + + int slot = find_lower(inner, key); + + BTREE_PRINT("btree::insert_descend into " << inner->childid[slot] << std::endl); + + weight_type oldw = inner->childid[slot]->weight; + std::pair r = insert_descend(inner->childid[slot], + key, weight, value, &newkey, &newchild); + n->weight += inner->childid[slot]->weight - oldw; + + if (newchild) + { + BTREE_PRINT("btree::insert_descend newchild with key " << newkey << " node " << newchild << " at slot " << slot << std::endl); + + if (inner->isfull()) + { + split_inner_node(inner, splitkey, splitnode, slot); + + BTREE_PRINT("btree::insert_descend done split_inner: putslot: " << slot << " putkey: " << newkey << " upkey: " << *splitkey << std::endl); + +#ifdef BTREE_DEBUG + if (debug) + { + print_node(std::cout, inner); + print_node(std::cout, *splitnode); + } +#endif + + // check if insert slot is in the split sibling node + BTREE_PRINT("btree::insert_descend switch: " << slot << " > " << inner->slotuse+1 << std::endl); + + if (slot == inner->slotuse+1 && inner->slotuse < (*splitnode)->slotuse) + { + // special case when the insert slot matches the split + // place between the two nodes, then the insert key + // becomes the split key. + + BTREE_ASSERT(inner->slotuse + 1 < innerslotmax); + + inner_node *splitinner = static_cast(*splitnode); + + // move the split key and it's datum into the left node + inner->slotkey[inner->slotuse] = *splitkey; + inner->childid[inner->slotuse+1] = splitinner->childid[0]; + inner->weight += splitinner->childid[0]->weight; + splitinner->weight -= splitinner->childid[0]->weight; + inner->slotuse++; + + // set new split key and move corresponding datum into right node + splitinner->childid[0] = newchild; + splitinner->weight += newchild->weight; + *splitkey = newkey; + + return r; + } + else if (slot >= inner->slotuse+1) + { + // in case the insert slot is in the newly create split + // node, we reuse the code below. + + slot -= inner->slotuse+1; + inner = static_cast(*splitnode); + BTREE_PRINT("btree::insert_descend switching to splitted node " << inner << " slot " << slot <= 0 && slot <= inner->slotuse); + + int i = inner->slotuse; + + while(i > slot) { + inner->slotkey[i] = inner->slotkey[i - 1]; + inner->childid[i + 1] = inner->childid[i]; + i--; + } + + inner->slotkey[slot] = newkey; + inner->childid[slot + 1] = newchild; + inner->slotuse++; + inner->weight += newchild->weight; + } + + return r; + } + else // n->isleafnode() == true + { + leaf_node *leaf = static_cast(n); + + int slot = find_lower(leaf, key); + + if (!allow_duplicates && slot < leaf->slotuse && key_equal(key, leaf->slotkey[slot])) { + return std::pair(iterator(leaf, slot), false); + } + + if (leaf->isfull()) + { + split_leaf_node(leaf, splitkey, splitnode); + + // check if insert slot is in the split sibling node + if (slot >= leaf->slotuse) + { + slot -= leaf->slotuse; + leaf = static_cast(*splitnode); + } + } + + // put data item into correct data slot + + int i = leaf->slotuse - 1; + BTREE_ASSERT(i + 1 < leafslotmax); + + while(i >= 0 && key_less(key, leaf->slotkey[i])) { + leaf->slotkey[i + 1] = leaf->slotkey[i]; + leaf->slotdata[i + 1] = leaf->slotdata[i]; + leaf->weights[i + 1] = leaf->weights[i]; + i--; + } + + leaf->slotkey[i + 1] = key; + leaf->slotdata[i + 1] = value; + leaf->weights[i + 1] = weight; + leaf->weight += weight; + leaf->slotuse++; + + if (splitnode && leaf != *splitnode && slot == leaf->slotuse-1) + { + // special case: the node was split, and the insert is at the + // last slot of the old node. then the splitkey must be + // updated. + *splitkey = key; + } + + return std::pair(iterator(leaf, i + 1), true); + } + } + + /// Split up a leaf node into two equally-filled sibling leaves. Returns + /// the new nodes and it's insertion key in the two parameters. + void split_leaf_node(leaf_node* leaf, key_type* _newkey, node** _newleaf) + { + BTREE_ASSERT(leaf->isfull()); + + unsigned int mid = leaf->slotuse / 2; + + BTREE_PRINT("btree::split_leaf_node on " << leaf << std::endl); + + leaf_node *newleaf = allocate_leaf(); + + newleaf->slotuse = leaf->slotuse - mid; + + newleaf->nextleaf = leaf->nextleaf; + if (newleaf->nextleaf == NULL) { + BTREE_ASSERT(leaf == tailleaf); + tailleaf = newleaf; + } + else { + newleaf->nextleaf->prevleaf = newleaf; + } + + for(unsigned int slot = mid; slot < leaf->slotuse; ++slot) + { + unsigned int ni = slot - mid; + newleaf->slotkey[ni] = leaf->slotkey[slot]; + newleaf->slotdata[ni] = leaf->slotdata[slot]; + newleaf->weights[ni] = leaf->weights[slot]; + newleaf->weight += leaf->weights[slot]; + leaf->weight -= leaf->weights[slot]; + } + + leaf->slotuse = mid; + leaf->nextleaf = newleaf; + newleaf->prevleaf = leaf; + + *_newkey = leaf->slotkey[leaf->slotuse-1]; + *_newleaf = newleaf; + } + + /// Split up an inner node into two equally-filled sibling nodes. Returns + /// the new nodes and it's insertion key in the two parameters. Requires + /// the slot of the item will be inserted, so the nodes will be the same + /// size after the insert. + void split_inner_node(inner_node* inner, key_type* _newkey, node** _newinner, unsigned int addslot) + { + BTREE_ASSERT(inner->isfull()); + + unsigned int mid = inner->slotuse / 2; + + BTREE_PRINT("btree::split_inner: mid " << mid << " addslot " << addslot << std::endl); + + // if the split is uneven and the overflowing item will be put into the + // larger node, then the smaller split node may underflow + if (addslot <= mid && mid > inner->slotuse - (mid + 1)) + mid--; + + BTREE_PRINT("btree::split_inner: mid " << mid << " addslot " << addslot << std::endl); + + BTREE_PRINT("btree::split_inner_node on " << inner << " into two nodes " << mid << " and " << inner->slotuse - (mid + 1) << " sized" << std::endl); + + inner_node *newinner = allocate_inner(inner->level); + + newinner->slotuse = inner->slotuse - (mid + 1); + + for(unsigned int slot = mid + 1; slot < inner->slotuse; ++slot) + { + unsigned int ni = slot - (mid + 1); + newinner->slotkey[ni] = inner->slotkey[slot]; + newinner->childid[ni] = inner->childid[slot]; + newinner->weight += inner->childid[slot]->weight; + inner->weight -= inner->childid[slot]->weight; + } + newinner->childid[newinner->slotuse] = inner->childid[inner->slotuse]; + newinner->weight += inner->childid[inner->slotuse]->weight; + inner->weight -= inner->childid[inner->slotuse]->weight; + + inner->slotuse = mid; + + *_newkey = inner->slotkey[mid]; + *_newinner = newinner; + } + +private: + // *** Support Class Encapsulating Deletion Results + + /// Result flags of recursive deletion. + enum result_flags_t + { + /// Deletion successful and no fix-ups necessary. + btree_ok = 0, + + /// Deletion not successful because key was not found. + btree_not_found = 1, + + /// Deletion successful, the last key was updated so parent slotkeys + /// need updates. + btree_update_lastkey = 2, + + /// Deletion successful, children nodes were merged and the parent + /// needs to remove the empty node. + btree_fixmerge = 4 + }; + + /// \internal B+ tree recursive deletion has much information which is + /// needs to be passed upward. + struct result_t + { + /// Merged result flags + result_flags_t flags; + + /// The key to be updated at the parent's slot + key_type lastkey; + + /// Constructor of a result with a specific flag, this can also be used + /// as for implicit conversion. + inline result_t(result_flags_t f = btree_ok) + : flags(f), lastkey() + { } + + /// Constructor with a lastkey value. + inline result_t(result_flags_t f, const key_type &k) + : flags(f), lastkey(k) + { } + + /// Test if this result object has a given flag set. + inline bool has(result_flags_t f) const + { + return (flags & f) != 0; + } + + /// Merge two results OR-ing the result flags and overwriting lastkeys. + inline result_t& operator|= (const result_t &other) + { + flags = result_flags_t(flags | other.flags); + + // we overwrite existing lastkeys on purpose + if (other.has(btree_update_lastkey)) + lastkey = other.lastkey; + + return *this; + } + }; + +public: + // *** Public Erase Functions + + /// Erases one (the first) of the key/data pairs associated with the given + /// key. + bool erase_one(const key_type &key) + { + BTREE_PRINT("btree::erase_one(" << key << ") on btree size " << size() << std::endl); + + if (selfverify) verify(); + + result_t result = erase_one_descend(key, root, NULL, NULL, NULL, NULL, NULL, 0); + + if (!result.has(btree_not_found)) + --stats.itemcount; + +#ifdef BTREE_DEBUG + if (debug) print(std::cout); +#endif + if (selfverify) verify(); + + return !result.has(btree_not_found); + } + + /// Erases all the key/data pairs associated with the given key. This is + /// implemented using erase_one(). + size_type erase(const key_type &key) + { + size_type c = 0; + + while( erase_one(key) ) + { + ++c; + if (!allow_duplicates) break; + } + + return c; + } + +#ifdef BTREE_TODO + /// Erase the key/data pair referenced by the iterator. + void erase(iterator iter) + { + + } +#endif + +#ifdef BTREE_TODO + /// Erase all key/data pairs in the range [first,last). This function is + /// currently not implemented by the B+ Tree. + void erase(iterator /* first */, iterator /* last */) + { + abort(); + } +#endif + +private: + // *** Private Erase Functions + + /** @brief Erase one (the first) key/data pair in the B+ tree matching key. + * + * Descends down the tree in search of key. During the descent the parent, + * left and right siblings and their parents are computed and passed + * down. Once the key/data pair is found, it is removed from the leaf. If + * the leaf underflows 6 different cases are handled. These cases resolve + * the underflow by shifting key/data pairs from adjacent sibling nodes, + * merging two sibling nodes or trimming the tree. + */ + result_t erase_one_descend(const key_type& key, + node *curr, + node *left, node *right, + inner_node *leftparent, inner_node *rightparent, + inner_node *parent, unsigned int parentslot) + { + if (curr->isleafnode()) + { + leaf_node *leaf = static_cast(curr); + leaf_node *leftleaf = static_cast(left); + leaf_node *rightleaf = static_cast(right); + + int slot = find_lower(leaf, key); + + if (slot >= leaf->slotuse || !key_equal(key, leaf->slotkey[slot])) + { + BTREE_PRINT("Could not find key " << key << " to erase." << std::endl); + + return btree_not_found; + } + + BTREE_PRINT("Found key in leaf " << curr << " at slot " << slot << std::endl); + + leaf->weight -= leaf->weights[slot]; + for (int i = slot; i < leaf->slotuse - 1; i++) + { + leaf->slotkey[i] = leaf->slotkey[i + 1]; + leaf->slotdata[i] = leaf->slotdata[i + 1]; + leaf->weights[i] = leaf->weights[i + 1]; + } + leaf->slotuse--; + + result_t myres = btree_ok; + + // if the last key of the leaf was changed, the parent is notified + // and updates the key of this leaf + if (slot == leaf->slotuse) + { + if (parent && parentslot < parent->slotuse) + { + BTREE_ASSERT(parent->childid[parentslot] == curr); + parent->slotkey[parentslot] = leaf->slotkey[leaf->slotuse - 1]; + } + else + { + BTREE_PRINT("Scheduling lastkeyupdate: key " << leaf->slotkey[leaf->slotuse - 1] << std::endl); + myres |= result_t(btree_update_lastkey, leaf->slotkey[leaf->slotuse - 1]); + } + } + + if (leaf->isunderflow() && !(leaf == root && leaf->slotuse >= 1)) + { + // determine what to do about the underflow + + // case : if this empty leaf is the root, there is no way to + // correct underflow + if (leftleaf == NULL && rightleaf == NULL) + { + return btree_ok; + } + // case : if both left and right leaves would underflow in case of + // a shift, then merging is necessary. choose the more local merger + // with our parent + else if ( (leftleaf == NULL || leftleaf->isfew()) && (rightleaf == NULL || rightleaf->isfew()) ) + { + if (leftparent == parent) + myres |= merge_leaves(leftleaf, leaf, leftparent); + else + myres |= merge_leaves(leaf, rightleaf, rightparent); + } + // case : the right leaf has extra data, so balance right with current + else if ( (leftleaf != NULL && leftleaf->isfew()) && (rightleaf != NULL && !rightleaf->isfew()) ) + { + if (rightparent == parent) + myres |= shift_left_leaf(leaf, rightleaf, rightparent, parentslot); + else + myres |= merge_leaves(leftleaf, leaf, leftparent); + } + // case : the left leaf has extra data, so balance left with current + else if ( (leftleaf != NULL && !leftleaf->isfew()) && (rightleaf != NULL && rightleaf->isfew()) ) + { + if (leftparent == parent) + shift_right_leaf(leftleaf, leaf, leftparent, parentslot - 1); + else + myres |= merge_leaves(leaf, rightleaf, rightparent); + } + // case : both the leaf and right leaves have extra data and our + // parent, choose the leaf with more data + else if (leftparent == rightparent) + { + if (leftleaf->slotuse <= rightleaf->slotuse) + myres |= shift_left_leaf(leaf, rightleaf, rightparent, parentslot); + else + shift_right_leaf(leftleaf, leaf, leftparent, parentslot - 1); + } + else + { + if (leftparent == parent) + shift_right_leaf(leftleaf, leaf, leftparent, parentslot - 1); + else + myres |= shift_left_leaf(leaf, rightleaf, rightparent, parentslot); + } + } + + return myres; + } + else // !curr->isleafnode() + { + inner_node *inner = static_cast(curr); + inner_node *leftinner = static_cast(left); + inner_node *rightinner = static_cast(right); + + node *myleft, *myright; + inner_node *myleftparent, *myrightparent; + + int slot = find_lower(inner, key); + + if (slot == 0) { + myleft = (left == NULL) ? NULL : (static_cast(left))->childid[left->slotuse - 1]; + myleftparent = leftparent; + } + else { + myleft = inner->childid[slot - 1]; + myleftparent = inner; + } + + if (slot == inner->slotuse) { + myright = (right == NULL) ? NULL : (static_cast(right))->childid[0]; + myrightparent = rightparent; + } + else { + myright = inner->childid[slot + 1]; + myrightparent = inner; + } + + BTREE_PRINT("erase_one_descend into " << inner->childid[slot] << std::endl); + + result_t result = erase_one_descend(key, + inner->childid[slot], + myleft, myright, + myleftparent, myrightparent, + inner, slot); + + result_t myres = btree_ok; + + if (result.has(btree_not_found)) + { + return result; + } + + if (result.has(btree_update_lastkey)) + { + if (parent && parentslot < parent->slotuse) + { + BTREE_PRINT("Fixing lastkeyupdate: key " << result.lastkey << " into parent " << parent << " at parentslot " << parentslot << std::endl); + + BTREE_ASSERT(parent->childid[parentslot] == curr); + parent->slotkey[parentslot] = result.lastkey; + } + else + { + BTREE_PRINT("Forwarding lastkeyupdate: key " << result.lastkey << std::endl); + myres |= result_t(btree_update_lastkey, result.lastkey); + } + } + + if (result.has(btree_fixmerge)) + { + // either the current node or the next is empty and should be removed + if (inner->childid[slot]->slotuse != 0) + slot++; + + // this is the child slot invalidated by the merge + BTREE_ASSERT(inner->childid[slot]->slotuse == 0); + + inner->weight -= inner->childid[slot]->weight; + free_node(inner->childid[slot]); + + for(int i = slot; i < inner->slotuse; i++) + { + inner->slotkey[i - 1] = inner->slotkey[i]; + inner->childid[i] = inner->childid[i + 1]; + } + inner->slotuse--; + + if (inner->level == 1) + { + // fix split key for children leaves + slot--; + leaf_node *child = static_cast(inner->childid[slot]); + inner->slotkey[slot] = child->slotkey[ child->slotuse-1 ]; + } + } + + if (inner->isunderflow() && !(inner == root && inner->slotuse >= 1)) + { + // case: the inner node is the root and has just one child. that child becomes the new root + if (leftinner == NULL && rightinner == NULL) + { + BTREE_ASSERT(inner == root); + BTREE_ASSERT(inner->slotuse == 0); + + root = inner->childid[0]; + + inner->slotuse = 0; + free_node(inner); + + return btree_ok; + } + // case : if both left and right leaves would underflow in case of + // a shift, then merging is necessary. choose the more local merger + // with our parent + else if ( (leftinner == NULL || leftinner->isfew()) && (rightinner == NULL || rightinner->isfew()) ) + { + if (leftparent == parent) + myres |= merge_inner(leftinner, inner, leftparent, parentslot - 1); + else + myres |= merge_inner(inner, rightinner, rightparent, parentslot); + } + // case : the right leaf has extra data, so balance right with current + else if ( (leftinner != NULL && leftinner->isfew()) && (rightinner != NULL && !rightinner->isfew()) ) + { + if (rightparent == parent) + shift_left_inner(inner, rightinner, rightparent, parentslot); + else + myres |= merge_inner(leftinner, inner, leftparent, parentslot - 1); + } + // case : the left leaf has extra data, so balance left with current + else if ( (leftinner != NULL && !leftinner->isfew()) && (rightinner != NULL && rightinner->isfew()) ) + { + if (leftparent == parent) + shift_right_inner(leftinner, inner, leftparent, parentslot - 1); + else + myres |= merge_inner(inner, rightinner, rightparent, parentslot); + } + // case : both the leaf and right leaves have extra data and our + // parent, choose the leaf with more data + else if (leftparent == rightparent) + { + if (leftinner->slotuse <= rightinner->slotuse) + shift_left_inner(inner, rightinner, rightparent, parentslot); + else + shift_right_inner(leftinner, inner, leftparent, parentslot - 1); + } + else + { + if (leftparent == parent) + shift_right_inner(leftinner, inner, leftparent, parentslot - 1); + else + shift_left_inner(inner, rightinner, rightparent, parentslot); + } + } + + return myres; + } + } + + /// Merge two leaf nodes. The function moves all key/data pairs from right + /// to left and sets right's slotuse to zero. The right slot is then + /// removed by the calling parent node. + result_t merge_leaves(leaf_node* left, leaf_node* right, inner_node* parent) + { + BTREE_PRINT("Merge leaf nodes " << left << " and " << right << " with common parent " << parent << "." << std::endl); + (void)parent; + + BTREE_ASSERT(left->isleafnode() && right->isleafnode()); + BTREE_ASSERT(parent->level == 1); + + BTREE_ASSERT(left->slotuse + right->slotuse < leafslotmax); + + for (unsigned int i = 0; i < right->slotuse; i++) + { + left->slotkey[left->slotuse + i] = right->slotkey[i]; + left->slotdata[left->slotuse + i] = right->slotdata[i]; + left->weights[left->slotuse + i] = right->weights[i]; + } + left->slotuse += right->slotuse; + left->weight += right.weight; + + left->nextleaf = right->nextleaf; + if (left->nextleaf) + left->nextleaf->prevleaf = left; + else + tailleaf = left; + + right->weight = 0; + right->slotuse = 0; + + return btree_fixmerge; + } + + /// Merge two inner nodes. The function moves all key/childid pairs from + /// right to left and sets right's slotuse to zero. The right slot is then + /// removed by the calling parent node. + static result_t merge_inner(inner_node* left, inner_node* right, inner_node* parent, unsigned int parentslot) + { + BTREE_PRINT("Merge inner nodes " << left << " and " << right << " with common parent " << parent << "." << std::endl); + + BTREE_ASSERT(left->level == right->level); + BTREE_ASSERT(parent->level == left->level + 1); + + BTREE_ASSERT(parent->childid[parentslot] == left); + + BTREE_ASSERT(left->slotuse + right->slotuse < innerslotmax); + + if (selfverify) + { + // find the left node's slot in the parent's children + unsigned int leftslot = 0; + while(leftslot <= parent->slotuse && parent->childid[leftslot] != left) + ++leftslot; + + BTREE_ASSERT(leftslot < parent->slotuse); + BTREE_ASSERT(parent->childid[leftslot] == left); + BTREE_ASSERT(parent->childid[leftslot+1] == right); + + BTREE_ASSERT(parentslot == leftslot); + } + + // retrieve the decision key from parent + left->slotkey[left->slotuse] = parent->slotkey[parentslot]; + left->slotuse++; + + // copy over keys and children from right + for (unsigned int i = 0; i < right->slotuse; i++) + { + left->slotkey[left->slotuse + i] = right->slotkey[i]; + left->childid[left->slotuse + i] = right->childid[i]; + } + left->slotuse += right->slotuse; + left->weight += right.weight; + + left->childid[left->slotuse] = right->childid[right->slotuse]; + + right->weight = 0; + right->slotuse = 0; + + return btree_fixmerge; + } + + /// Balance two leaf nodes. The function moves key/data pairs from right to + /// left so that both nodes are equally filled. The parent node is updated + /// if possible. + static result_t shift_left_leaf(leaf_node *left, leaf_node *right, inner_node *parent, unsigned int parentslot) + { + BTREE_ASSERT(left->isleafnode() && right->isleafnode()); + BTREE_ASSERT(parent->level == 1); + + BTREE_ASSERT(left->nextleaf == right); + BTREE_ASSERT(left == right->prevleaf); + + BTREE_ASSERT(left->slotuse < right->slotuse); + BTREE_ASSERT(parent->childid[parentslot] == left); + + unsigned int shiftnum = (right->slotuse - left->slotuse) / 2; + + BTREE_PRINT("Shifting (leaf) " << shiftnum << " entries to left " << left << " from right " << right << " with common parent " << parent << "." << std::endl); + + BTREE_ASSERT(left->slotuse + shiftnum < leafslotmax); + + // copy the first items from the right node to the last slot in the left node. + for(unsigned int i = 0; i < shiftnum; i++) + { + left->slotkey[left->slotuse + i] = right->slotkey[i]; + left->slotdata[left->slotuse + i] = right->slotdata[i]; + left->weights[left->slotuse + i] = right->weights[i]; + left->weight += right->weights[i]; + right->weight -= right->weights[i]; + } + left->slotuse += shiftnum; + + // shift all slots in the right node to the left + + right->slotuse -= shiftnum; + for(int i = 0; i < right->slotuse; i++) + { + right->slotkey[i] = right->slotkey[i + shiftnum]; + right->slotdata[i] = right->slotdata[i + shiftnum]; + right->weights[i] = right->weights[i + shiftnum]; + } + + // fixup parent + if (parentslot < parent->slotuse) { + parent->slotkey[parentslot] = left->slotkey[left->slotuse - 1]; + return btree_ok; + } + else { // the update is further up the tree + return result_t(btree_update_lastkey, left->slotkey[left->slotuse - 1]); + } + } + + /// Balance two inner nodes. The function moves key/data pairs from right + /// to left so that both nodes are equally filled. The parent node is + /// updated if possible. + static void shift_left_inner(inner_node *left, inner_node *right, inner_node *parent, unsigned int parentslot) + { + BTREE_ASSERT(left->level == right->level); + BTREE_ASSERT(parent->level == left->level + 1); + + BTREE_ASSERT(left->slotuse < right->slotuse); + BTREE_ASSERT(parent->childid[parentslot] == left); + + unsigned int shiftnum = (right->slotuse - left->slotuse) / 2; + + BTREE_PRINT("Shifting (inner) " << shiftnum << " entries to left " << left << " from right " << right << " with common parent " << parent << "." << std::endl); + + BTREE_ASSERT(left->slotuse + shiftnum < innerslotmax); + + if (selfverify) + { + // find the left node's slot in the parent's children and compare to parentslot + + unsigned int leftslot = 0; + while(leftslot <= parent->slotuse && parent->childid[leftslot] != left) + ++leftslot; + + BTREE_ASSERT(leftslot < parent->slotuse); + BTREE_ASSERT(parent->childid[leftslot] == left); + BTREE_ASSERT(parent->childid[leftslot+1] == right); + + BTREE_ASSERT(leftslot == parentslot); + } + + // copy the parent's decision slotkey and childid to the first new key on the left + left->slotkey[left->slotuse] = parent->slotkey[parentslot]; + left->slotuse++; + + // copy the other items from the right node to the last slots in the left node. + for(unsigned int i = 0; i < shiftnum - 1; i++) + { + left->slotkey[left->slotuse + i] = right->slotkey[i]; + left->childid[left->slotuse + i] = right->childid[i]; + left->weight += right->childid[i]->weight; + right->weight -= right->childid[i]->weight; + } + left->slotuse += shiftnum - 1; + + // fixup parent + parent->slotkey[parentslot] = right->slotkey[shiftnum - 1]; + // last pointer in left + left->childid[left->slotuse] = right->childid[shiftnum - 1]; + left->weight += right->childid[shiftnum - 1]->weight; + right->weight -= right->childid[shiftnum - 1]->weight; + + // shift all slots in the right node + + right->slotuse -= shiftnum; + for(int i = 0; i < right->slotuse; i++) + { + right->slotkey[i] = right->slotkey[i + shiftnum]; + right->childid[i] = right->childid[i + shiftnum]; + } + right->childid[right->slotuse] = right->childid[right->slotuse + shiftnum]; + } + + /// Balance two leaf nodes. The function moves key/data pairs from left to + /// right so that both nodes are equally filled. The parent node is updated + /// if possible. + static void shift_right_leaf(leaf_node *left, leaf_node *right, inner_node *parent, unsigned int parentslot) + { + BTREE_ASSERT(left->isleafnode() && right->isleafnode()); + BTREE_ASSERT(parent->level == 1); + + BTREE_ASSERT(left->nextleaf == right); + BTREE_ASSERT(left == right->prevleaf); + BTREE_ASSERT(parent->childid[parentslot] == left); + + BTREE_ASSERT(left->slotuse > right->slotuse); + + unsigned int shiftnum = (left->slotuse - right->slotuse) / 2; + + BTREE_PRINT("Shifting (leaf) " << shiftnum << " entries to right " << right << " from left " << left << " with common parent " << parent << "." << std::endl); + + if (selfverify) + { + // find the left node's slot in the parent's children + unsigned int leftslot = 0; + while(leftslot <= parent->slotuse && parent->childid[leftslot] != left) + ++leftslot; + + BTREE_ASSERT(leftslot < parent->slotuse); + BTREE_ASSERT(parent->childid[leftslot] == left); + BTREE_ASSERT(parent->childid[leftslot+1] == right); + + BTREE_ASSERT(leftslot == parentslot); + } + + // shift all slots in the right node + + BTREE_ASSERT(right->slotuse + shiftnum < leafslotmax); + + for(int i = right->slotuse; i >= 0; i--) + { + right->slotkey[i + shiftnum] = right->slotkey[i]; + right->slotdata[i + shiftnum] = right->slotdata[i]; + right->weights[i + shiftnum] = right->weights[i]; + } + right->slotuse += shiftnum; + + // copy the last items from the left node to the first slot in the right node. + for(unsigned int i = 0; i < shiftnum; i++) + { + right->slotkey[i] = left->slotkey[left->slotuse - shiftnum + i]; + right->slotdata[i] = left->slotdata[left->slotuse - shiftnum + i]; + right->weights[i] = left->weights[left->slotuse - shiftnum + i]; + right->weight += left->weights[left->slotuse - shiftnum + i]; + left->weight -= left->weights[left->slotuse - shiftnum + i]; + } + left->slotuse -= shiftnum; + + parent->slotkey[parentslot] = left->slotkey[left->slotuse-1]; + } + + /// Balance two inner nodes. The function moves key/data pairs from left to + /// right so that both nodes are equally filled. The parent node is updated + /// if possible. + static void shift_right_inner(inner_node *left, inner_node *right, inner_node *parent, unsigned int parentslot) + { + BTREE_ASSERT(left->level == right->level); + BTREE_ASSERT(parent->level == left->level + 1); + + BTREE_ASSERT(left->slotuse > right->slotuse); + BTREE_ASSERT(parent->childid[parentslot] == left); + + unsigned int shiftnum = (left->slotuse - right->slotuse) / 2; + + BTREE_PRINT("Shifting (leaf) " << shiftnum << " entries to right " << right << " from left " << left << " with common parent " << parent << "." << std::endl); + + if (selfverify) + { + // find the left node's slot in the parent's children + unsigned int leftslot = 0; + while(leftslot <= parent->slotuse && parent->childid[leftslot] != left) + ++leftslot; + + BTREE_ASSERT(leftslot < parent->slotuse); + BTREE_ASSERT(parent->childid[leftslot] == left); + BTREE_ASSERT(parent->childid[leftslot+1] == right); + + BTREE_ASSERT(leftslot == parentslot); + } + + // shift all slots in the right node + + BTREE_ASSERT(right->slotuse + shiftnum < innerslotmax); + + right->childid[right->slotuse + shiftnum] = right->childid[right->slotuse]; + for(int i = right->slotuse-1; i >= 0; i--) + { + right->slotkey[i + shiftnum] = right->slotkey[i]; + right->childid[i + shiftnum] = right->childid[i]; + } + + right->slotuse += shiftnum; + + // copy the parent's decision slotkey and childid to the last new key on the right + right->slotkey[shiftnum - 1] = parent->slotkey[parentslot]; + right->childid[shiftnum - 1] = left->childid[left->slotuse]; + right->weight += left->childid[left->slotuse]->weight; + left->weight -= left->childid[left->slotuse]->weight; + + // copy the remaining last items from the left node to the first slot in the right node. + for(unsigned int i = 0; i < shiftnum - 1; i++) + { + right->slotkey[i] = left->slotkey[left->slotuse - shiftnum + i + 1]; + right->childid[i] = left->childid[left->slotuse - shiftnum + i + 1]; + right->weight += left->childid[left->slotuse - shiftnum + i + 1]->weight; + left->weight -= left->childid[left->slotuse - shiftnum + i + 1]->weight; + } + + // copy the first to-be-removed key from the left node to the parent's decision slot + parent->slotkey[parentslot] = left->slotkey[left->slotuse - shiftnum]; + + left->slotuse -= shiftnum; + } + +#ifdef BTREE_DEBUG +public: + // *** Debug Printing + + /// Print out the B+ tree structure with keys onto the given ostream. This + /// function requires that the header is compiled with BTREE_DEBUG and that + /// key_type is printable via std::ostream. + void print(std::ostream &os) const + { + if (root) { + print_node(os, root, 0, true); + } + } + + /// Print out only the leaves via the double linked list. + void print_leaves(std::ostream &os) const + { + os << "leaves:" << std::endl; + + const leaf_node *n = headleaf; + + while(n) + { + os << " " << n << std::endl; + + n = n->nextleaf; + } + } + +private: + + /// Recursively descend down the tree and print out nodes. + static void print_node(std::ostream &os, const node* node, unsigned int depth=0, bool recursive=false) + { + for(unsigned int i = 0; i < depth; i++) os << " "; + + os << "node " << node << " level " << node->level << " weight " << node->weight << " slotuse " << node->slotuse << std::endl; + + if (node->isleafnode()) + { + const leaf_node *leafnode = static_cast(node); + + for(unsigned int i = 0; i < depth; i++) os << " "; + os << " leaf prev " << leafnode->prevleaf << " next " << leafnode->nextleaf << std::endl; + + for(unsigned int i = 0; i < depth; i++) os << " "; + + for (unsigned int slot = 0; slot < leafnode->slotuse; ++slot) + { + os << leafnode->slotkey[slot] << " "; // << "(data: " << leafnode->slotdata[slot] << ") "; + } + os << std::endl; + } + else + { + const inner_node *innernode = static_cast(node); + + for(unsigned int i = 0; i < depth; i++) os << " "; + + for (unsigned short slot = 0; slot < innernode->slotuse; ++slot) + { + os << "(" << innernode->childid[slot] << ") " << innernode->slotkey[slot] << " "; + } + os << "(" << innernode->childid[innernode->slotuse] << ")" << std::endl; + + if (recursive) + { + for (unsigned short slot = 0; slot < innernode->slotuse + 1; ++slot) + { + print_node(os, innernode->childid[slot], depth + 1, recursive); + } + } + } + } +#endif + +public: + // *** Verification of B+ Tree Invariants + + /// Run a thorough verification of all B+ tree invariants. The program + /// aborts via assert() if something is wrong. + void verify() const + { + key_type minkey, maxkey; + tree_stats vstats; + + if (root) + { + verify_node(root, &minkey, &maxkey, vstats); + + assert( vstats.itemcount == stats.itemcount ); + assert( vstats.leaves == stats.leaves ); + assert( vstats.innernodes == stats.innernodes ); + } + + verify_leaflinks(); + } + +private: + + /// Recursively descend down the tree and verify each node + void verify_node(const node* n, key_type* minkey, key_type* maxkey, tree_stats &vstats) const + { + BTREE_PRINT("verifynode " << n << std::endl); + + if (n->isleafnode()) + { + const leaf_node *leaf = static_cast(n); + + assert(leaf == root || !leaf->isunderflow()); + + for(unsigned short slot = 0; slot < leaf->slotuse - 1; ++slot) + { + assert(key_lessequal(leaf->slotkey[slot], leaf->slotkey[slot + 1])); + } + + *minkey = leaf->slotkey[0]; + *maxkey = leaf->slotkey[leaf->slotuse - 1]; + + vstats.leaves++; + vstats.itemcount += leaf->slotuse; + } + else // !n->isleafnode() + { + const inner_node *inner = static_cast(n); + vstats.innernodes++; + + assert(inner == root || !inner->isunderflow()); + + for(unsigned short slot = 0; slot < inner->slotuse - 1; ++slot) + { + assert(key_lessequal(inner->slotkey[slot], inner->slotkey[slot + 1])); + } + + for(unsigned short slot = 0; slot <= inner->slotuse; ++slot) + { + const node *subnode = inner->childid[slot]; + key_type subminkey = key_type(); + key_type submaxkey = key_type(); + + assert(subnode->level + 1 == inner->level); + verify_node(subnode, &subminkey, &submaxkey, vstats); + + BTREE_PRINT("verify subnode " << subnode << ": " << subminkey << " - " << submaxkey << std::endl); + + if (slot == 0) + *minkey = subminkey; + else + assert(key_greaterequal(subminkey, inner->slotkey[slot-1])); + + if (slot == inner->slotuse) + *maxkey = submaxkey; + else + assert(key_equal(inner->slotkey[slot], submaxkey)); + + if (inner->level == 1 && slot < inner->slotuse) + { + // children are leaves and must be linked together in the + // correct order + const leaf_node *leafa = static_cast(inner->childid[slot]); + const leaf_node *leafb = static_cast(inner->childid[slot + 1]); + + assert(leafa->nextleaf == leafb); + assert(leafa == leafb->prevleaf); + (void)leafa; (void)leafb; + } + if (inner->level == 2 && slot < inner->slotuse) + { + // verify leaf links between the adjacent inner nodes + const inner_node *parenta = static_cast(inner->childid[slot]); + const inner_node *parentb = static_cast(inner->childid[slot+1]); + + const leaf_node *leafa = static_cast(parenta->childid[parenta->slotuse]); + const leaf_node *leafb = static_cast(parentb->childid[0]); + + assert(leafa->nextleaf == leafb); + assert(leafa == leafb->prevleaf); + (void)leafa; (void)leafb; + } + } + } + } + + /// Verify the double linked list of leaves. + void verify_leaflinks() const + { + const leaf_node *n = headleaf; + + assert(n->level == 0); + assert(!n || n->prevleaf == NULL); + + unsigned int testcount = 0; + + while(n) + { + assert(n->level == 0); + + for(unsigned short slot = 0; slot < n->slotuse - 1; ++slot) + { + assert(key_lessequal(n->slotkey[slot], n->slotkey[slot + 1])); + } + + testcount += n->slotuse; + + if (n->nextleaf) + { + assert(key_lessequal(n->slotkey[n->slotuse-1], n->nextleaf->slotkey[0])); + + assert(n == n->nextleaf->prevleaf); + } + else + { + assert(tailleaf == n); + } + + n = n->nextleaf; + } + + assert(testcount == size()); + } + +private: + // *** Dump and Restore of B+ Trees + + /// \internal A header for the binary image containing the base properties + /// of the B+ tree. These properties have to match the current template + /// instantiation. + struct dump_header + { + /// "stx-btree", just to stop the restore() function from loading garbage + char signature[12]; + + /// Currently 0 + unsigned short version; + + /// sizeof(key_type) + unsigned short key_type_size; + + /// sizeof(data_type) + unsigned short data_type_size; + + /// Number of slots in the leaves + unsigned short leafslots; + + /// Number of slots in the inner nodes + unsigned short innerslots; + + /// Allow duplicates + bool allow_duplicates; + + /// The item count of the tree + size_type itemcount; + + /// Fill the struct with the current B+ tree's properties, itemcount is + /// not filled. + inline void fill() + { + // don't want to include string.h just for this signature + *reinterpret_cast(signature+0) = 0x2d787473; + *reinterpret_cast(signature+4) = 0x65727462; + *reinterpret_cast(signature+8) = 0x00000065; + + version = 0; + key_type_size = sizeof(typename btree_self::key_type); + data_type_size = sizeof(typename btree_self::data_type); + leafslots = btree_self::leafslotmax; + innerslots = btree_self::innerslotmax; + allow_duplicates = btree_self::allow_duplicates; + } + + /// Returns true if the headers have the same vital properties + inline bool same(const struct dump_header &o) const + { + return (*reinterpret_cast(signature+0) == + *reinterpret_cast(o.signature+0)) + && (*reinterpret_cast(signature+4) == + *reinterpret_cast(o.signature+4)) + && (*reinterpret_cast(signature+8) == + *reinterpret_cast(o.signature+8)) + + && (version == o.version) + && (key_type_size == o.key_type_size) + && (data_type_size == o.data_type_size) + && (leafslots == o.leafslots) + && (innerslots == o.innerslots) + && (allow_duplicates == o.allow_duplicates); + } + }; + +public: + + /// Dump the contents of the B+ tree out onto an ostream as a binary + /// image. The image contains memory pointers which will be fixed when the + /// image is restored. For this to work your key_type and data_type must be + /// integral types and contain no pointers or references. + void dump(std::ostream &os) const + { + struct dump_header header; + header.fill(); + header.itemcount = size(); + + os.write(reinterpret_cast(&header), sizeof(header)); + + if (root) + dump_node(os, root); + } + + /// Restore a binary image of a dumped B+ tree from an istream. The B+ tree + /// pointers are fixed using the dump order. For dump and restore to work + /// your key_type and data_type must be integral types and contain no + /// pointers or references. Returns true if the restore was successful. + bool restore(std::istream &is) + { + struct dump_header fileheader; + is.read(reinterpret_cast(&fileheader), sizeof(fileheader)); + if (!is.good()) return false; + + struct dump_header myheader; + myheader.fill(); + myheader.itemcount = fileheader.itemcount; + + if (!myheader.same(fileheader)) + { + BTREE_PRINT("btree::restore: file header does not match instantiation signature." << std::endl); + return false; + } + + clear(); + + if (fileheader.itemcount > 0) + { + root = restore_node(is); + if (root == NULL) return false; + + stats.itemcount = fileheader.itemcount; + } + +#ifdef BTREE_DEBUG + if (debug) print(std::cout); +#endif + if (selfverify) verify(); + + return true; + } + +private: + + /// Recursively descend down the tree and dump each node in a precise order + void dump_node(std::ostream &os, const node* n) const + { + BTREE_PRINT("dump_node " << n << std::endl); + + if (n->isleafnode()) + { + const leaf_node *leaf = static_cast(n); + + os.write(reinterpret_cast(leaf), sizeof(*leaf)); + } + else // !n->isleafnode() + { + const inner_node *inner = static_cast(n); + + os.write(reinterpret_cast(inner), sizeof(*inner)); + + for(unsigned short slot = 0; slot <= inner->slotuse; ++slot) + { + const node *subnode = inner->childid[slot]; + + dump_node(os, subnode); + } + } + } + + /// Read the dump image and construct a tree from the node order in the + /// serialization. + node* restore_node(std::istream &is) + { + union { + node top; + leaf_node leaf; + inner_node inner; + } nu; + + // first read only the top of the node + is.read(reinterpret_cast(&nu.top), sizeof(nu.top)); + if (!is.good()) return NULL; + + if (nu.top.isleafnode()) + { + // read remaining data of leaf node + is.read(reinterpret_cast(&nu.leaf) + sizeof(nu.top), sizeof(nu.leaf) - sizeof(nu.top)); + if (!is.good()) return NULL; + + leaf_node *newleaf = allocate_leaf(); + + // copy over all data, the leaf nodes contain only their double linked list pointers + *newleaf = nu.leaf; + + // reconstruct the linked list from the order in the file + if (headleaf == NULL) { + BTREE_ASSERT(newleaf->prevleaf == NULL); + headleaf = tailleaf = newleaf; + } + else { + newleaf->prevleaf = tailleaf; + tailleaf->nextleaf = newleaf; + tailleaf = newleaf; + } + + return newleaf; + } + else + { + // read remaining data of inner node + is.read(reinterpret_cast(&nu.inner) + sizeof(nu.top), sizeof(nu.inner) - sizeof(nu.top)); + if (!is.good()) return NULL; + + inner_node *newinner = allocate_inner(0); + + // copy over all data, the inner nodes contain only pointers to their children + *newinner = nu.inner; + + for(unsigned short slot = 0; slot <= newinner->slotuse; ++slot) + { + newinner->childid[slot] = restore_node(is); + } + + return newinner; + } + } +}; + +} // namespace stx + +#endif // _STX_RA_BTREE_H_ -- 2.39.2