1 // $Id: btree.h 72 2008-01-25 12:47:26Z tb $
3 * Contains the main B+ tree implementation template class btree.
7 * STX B+ Tree Template Classes v0.8.1
8 * Copyright (C) 2008 Timo Bingmann
9 * 2008 Stefan Schimanski (weighted variant)
11 * This library is free software; you can redistribute it and/or modify it
12 * under the terms of the GNU Lesser General Public License as published by the
13 * Free Software Foundation; either version 2.1 of the License, or (at your
14 * option) any later version.
16 * This library is distributed in the hope that it will be useful, but WITHOUT
17 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
18 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
21 * You should have received a copy of the GNU Lesser General Public License
22 * along with this library; if not, write to the Free Software Foundation,
23 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 #ifndef _STX_RA_BTREE_H_
27 #define _STX_RA_BTREE_H_
29 // *** Required Headers from the STL
37 // *** Debugging Macros
43 /// Print out debug information to std::cout if BTREE_DEBUG is defined.
44 #define BTREE_PRINT(x) do { if (debug) (std::cout << x); } while(0)
46 /// Assertion only if BTREE_DEBUG is defined. This is not used in verify().
47 #define BTREE_ASSERT(x) do { assert(x); } while(0)
51 /// Print out debug information to std::cout if BTREE_DEBUG is defined.
52 #define BTREE_PRINT(x) do { } while(0)
54 /// Assertion only if BTREE_DEBUG is defined. This is not used in verify().
55 #define BTREE_ASSERT(x) do { } while(0)
59 /// The maximum of a and b. Used in some compile-time formulas.
60 #define BTREE_MAX(a,b) ((a) < (b) ? (b) : (a))
63 /// The macro BTREE_FRIENDS can be used by outside class to access the B+
64 /// tree internals. This was added for wxBTreeDemo to be able to draw the
66 #define BTREE_FRIENDS friend class btree_friend;
69 /// STX - Some Template Extensions namespace
72 /** Generates default traits for a B+ tree used as a set. It estimates leaf and
73 * inner node sizes by assuming a cache line size of 256 bytes. */
74 template <typename _Key>
75 struct btree_default_set_traits
77 /// If true, the tree will self verify it's invariants after each insert()
78 /// or erase(). The header must have been compiled with BTREE_DEBUG defined.
79 static const bool selfverify = false;
81 /// If true, the tree will print out debug information and a tree dump
82 /// during insert() or erase() operation. The header must have been
83 /// compiled with BTREE_DEBUG defined and key_type must be std::ostream
85 static const bool debug = false;
87 /// Number of slots in each leaf of the tree. Estimated so that each node
88 /// has a size of about 256 bytes.
89 static const int leafslots = BTREE_MAX( 8, 256 / (sizeof(_Key)) );
91 /// Number of slots in each inner node of the tree. Estimated so that each node
92 /// has a size of about 256 bytes.
93 static const int innerslots = BTREE_MAX( 8, 256 / (sizeof(_Key) + sizeof(void*)) );
96 /** Generates default traits for a B+ tree used as a map. It estimates leaf and
97 * inner node sizes by assuming a cache line size of 256 bytes. */
98 template <typename _Key, typename _Data>
99 struct btree_default_map_traits
101 /// If true, the tree will self verify it's invariants after each insert()
102 /// or erase(). The header must have been compiled with BTREE_DEBUG defined.
103 static const bool selfverify = false;
105 /// If true, the tree will print out debug information and a tree dump
106 /// during insert() or erase() operation. The header must have been
107 /// compiled with BTREE_DEBUG defined and key_type must be std::ostream
109 static const bool debug = false;
111 /// Number of slots in each leaf of the tree. Estimated so that each node
112 /// has a size of about 256 bytes.
113 static const int leafslots = BTREE_MAX( 8, 256 / (sizeof(_Key) + sizeof(_Data)) );
115 /// Number of slots in each inner node of the tree. Estimated so that each node
116 /// has a size of about 256 bytes.
117 static const int innerslots = BTREE_MAX( 8, 256 / (sizeof(_Key) + sizeof(void*)) );
122 /** @brief Basic class implementing a base B+ tree data structure in memory.
124 * The base implementation of a memory B+ tree. It is based on the
125 * implementation in Cormen's Introduction into Algorithms, Jan Jannink's paper
126 * and other algorithm resources. Almost all STL-required function calls are
127 * implemented. The asymptotic time requirements of the STL are not always
128 * fulfilled in theory, however in practice this B+ tree performs better than a
129 * red-black tree by using more memory. The insertion function splits the nodes
130 * on the recursion unroll. Erase is largely based on Jannink's ideas.
132 * This class is specialized into btree_set, btree_multiset, btree_map and
133 * btree_multimap using default template parameters and facade functions.
135 template <typename _Key,
136 typename _Weight = size_t,
137 typename _Data = Void,
138 typename _Value = std::pair<_Key, _Data>,
139 typename _Compare = std::less<_Key>,
140 typename _Traits = btree_default_map_traits<_Key, _Data>,
141 bool _Duplicates = false>
145 // *** Template Parameter Types
147 /// First template parameter: The key type of the B+ tree. This is stored
148 /// in inner nodes and leaves
149 typedef _Key key_type;
152 typedef _Weight weight_type;
154 /// Second template parameter: The data type associated with each
155 /// key. Stored in the B+ tree's leaves
156 typedef _Data data_type;
158 /// Third template parameter: Composition pair of key and data types, this
159 /// is required by the STL standard. The B+ tree does not store key and
160 /// data together. If value_type == key_type then the B+ tree implements a
162 typedef _Value value_type;
164 /// Fourth template parameter: Key comparison function object
165 typedef _Compare key_compare;
167 /// Fifth template parameter: Traits object used to define more parameters
169 typedef _Traits traits;
171 /// Sixth template parameter: Allow duplicate keys in the B+ tree. Used to
172 /// implement multiset and multimap.
173 static const bool allow_duplicates = _Duplicates;
175 // The macro BTREE_FRIENDS can be used by outside class to access the B+
176 // tree internals. This was added for wxBTreeDemo to be able to draw the
181 // *** Constructed Types
183 /// Typedef of our own type
184 typedef weighted_btree<key_type, weight_type, data_type, value_type,
185 key_compare, traits, allow_duplicates> btree_self;
187 /// Size type used to count keys
188 typedef size_t size_type;
190 /// The pair of key_type and data_type, this may be different from value_type.
191 typedef std::pair<key_type, data_type> pair_type;
194 // *** Static Constant Options and Values of the B+ Tree
196 /// Base B+ tree parameter: The number of key/data slots in each leaf
197 static const unsigned short leafslotmax = traits::leafslots;
199 /// Base B+ tree parameter: The number of key slots in each inner node,
200 /// this can differ from slots in each leaf.
201 static const unsigned short innerslotmax = traits::innerslots;
203 /// Computed B+ tree parameter: The minimum number of key/data slots used
204 /// in a leaf. If fewer slots are used, the leaf will be merged or slots
205 /// shifted from it's siblings.
206 static const unsigned short minleafslots = (leafslotmax / 2);
208 /// Computed B+ tree parameter: The minimum number of key slots used
209 /// in an inner node. If fewer slots are used, the inner node will be
210 /// merged or slots shifted from it's siblings.
211 static const unsigned short mininnerslots = (innerslotmax / 2);
213 /// Debug parameter: Enables expensive and thorough checking of the B+ tree
214 /// invariants after each insert/erase operation.
215 static const bool selfverify = traits::selfverify;
217 /// Debug parameter: Prints out lots of debug information about how the
218 /// algorithms change the tree. Requires the header file to be compiled
219 /// with BTREE_DEBUG and the key type must be std::ostream printable.
220 static const bool debug = traits::debug;
223 // *** Node Classes for In-Memory Nodes
225 /// The header structure of each node in-memory. This structure is extended
226 /// by inner_node or leaf_node.
229 /// Level in the b-tree, if level == 0 -> leaf node
230 unsigned short level;
232 /// Number of key slotuse use, so number of valid children or data
234 unsigned short slotuse;
239 /// Delayed initialisation of constructed node
240 inline void initialize(const unsigned short l)
247 /// True if this is a leaf node
248 inline bool isleafnode() const
254 /// Extended structure of a inner node in-memory. Contains only keys and no
256 struct inner_node : public node
258 /// Keys of children or data pointers
259 key_type slotkey[innerslotmax];
261 /// Pointers to children
262 node* childid[innerslotmax+1];
264 /// Set variables to initial values
265 inline void initialize(const unsigned short l)
270 /// True if the node's slots are full
271 inline bool isfull() const
273 return (node::slotuse == innerslotmax);
276 /// True if few used entries, less than half full
277 inline bool isfew() const
279 return (node::slotuse <= mininnerslots);
282 /// True if node has too few entries
283 inline bool isunderflow() const
285 return (node::slotuse < mininnerslots);
289 /// Extended structure of a leaf node in memory. Contains pairs of keys and
290 /// data items. Key and data slots are kept in separate arrays, because the
291 /// key array is traversed very often compared to accessing the data items.
292 struct leaf_node : public node
294 /// Double linked list pointers to traverse the leaves
297 /// Double linked list pointers to traverse the leaves
300 /// Keys of children or data pointers
301 key_type slotkey[leafslotmax];
304 data_type slotdata[leafslotmax];
307 weight_type weights[leafslotmax];
309 /// Set variables to initial values
310 inline void initialize()
313 prevleaf = nextleaf = NULL;
316 /// True if the node's slots are full
317 inline bool isfull() const
319 return (node::slotuse == leafslotmax);
322 /// True if few used entries, less than half full
323 inline bool isfew() const
325 return (node::slotuse <= minleafslots);
328 /// True if node has too few entries
329 inline bool isunderflow() const
331 return (node::slotuse < minleafslots);
336 // *** Template Magic to Convert a pair or key/data types to a value_type
338 /// \internal For sets the second pair_type is an empty struct, so the
339 /// value_type should only be the first.
340 template <typename value_type, typename pair_type>
341 struct btree_pair_to_value
343 /// Convert a fake pair type to just the first component
344 inline value_type operator()(pair_type& p) const {
347 /// Convert a fake pair type to just the first component
348 inline value_type operator()(const pair_type& p) const {
353 /// \internal For maps value_type is the same as the pair_type
354 template <typename value_type>
355 struct btree_pair_to_value<value_type, value_type>
357 /// Identity "convert" a real pair type to just the first component
358 inline value_type operator()(pair_type& p) const {
361 /// Identity "convert" a real pair type to just the first component
362 inline value_type operator()(const pair_type& p) const {
367 /// Using template specialization select the correct converter used by the
369 typedef btree_pair_to_value<value_type, pair_type> pair_to_value_type;
372 // *** Iterators and Reverse Iterators
375 class const_iterator;
377 /// STL-like iterator object for B+ tree items. The iterator points to a
378 /// specific slot number in a leaf.
384 /// The key type of the btree. Returned by key().
385 typedef typename weighted_btree::key_type key_type;
388 typedef typename weighted_btree::weight_type weight_type;
390 /// The data type of the btree. Returned by data().
391 typedef typename weighted_btree::data_type data_type;
393 /// The value type of the btree. Returned by operator*().
394 typedef typename weighted_btree::value_type value_type;
396 /// The pair type of the btree.
397 typedef typename weighted_btree::pair_type pair_type;
399 /// Reference to the value_type. Required by the reverse_iterator.
400 typedef value_type& reference;
402 /// Pointer to the value_type. Required by the reverse_iterator.
403 typedef value_type* pointer;
405 /// STL-magic iterator category
406 typedef std::bidirectional_iterator_tag iterator_category;
409 typedef ptrdiff_t difference_type;
412 typedef iterator self;
415 friend class weighted_btree;
419 /// The currently referenced leaf node of the tree
420 typename weighted_btree::leaf_node* currnode;
422 /// Current key/data slot referenced
423 unsigned short currslot;
425 /// Friendly to the const_iterator, so it may access the two data items directly
426 friend class weighted_btree<key_type, weight_type, data_type, value_type, key_compare, traits, allow_duplicates>::const_iterator;
428 /// Evil! A temporary value_type to STL-correctly deliver operator* and
430 mutable value_type temp_value;
432 // The macro BTREE_FRIENDS can be used by outside class to access the B+
433 // tree internals. This was added for wxBTreeDemo to be able to draw the
440 /// Constructor of a mutable iterator
441 inline iterator(typename weighted_btree::leaf_node *l, unsigned short s)
442 : currnode(l), currslot(s)
445 /// Dereference the iterator, this is not a value_type& because key and
446 /// value are not stored together
447 inline reference operator*() const
449 temp_value = pair_to_value_type()( pair_type(currnode->slotkey[currslot],
450 currnode->slotdata[currslot]) );
454 /// Dereference the iterator. Do not use this if possible, use key()
455 /// and data() instead. The B+ tree does not stored key and data
457 inline pointer operator->() const
459 temp_value = pair_to_value_type()( pair_type(currnode->slotkey[currslot],
460 currnode->slotdata[currslot]) );
464 /// Key of the current slot
465 inline const key_type& key() const
467 return currnode->slotkey[currslot];
470 /// Weight of the current slot
471 inline weight_type weight() const
473 return currnode->weights[currslot];
476 /// Writable reference to the current data object
477 inline data_type& data() const
479 return currnode->slotdata[currslot];
482 /// Prefix++ advance the iterator to the next slot
483 inline self& operator++()
485 if (currslot + 1 < currnode->slotuse) {
488 else if (currnode->nextleaf != NULL) {
489 currnode = currnode->nextleaf;
494 currslot = currnode->slotuse;
500 /// Postfix++ advance the iterator to the next slot
501 inline self operator++(int)
503 self tmp = *this; // copy ourselves
505 if (currslot + 1 < currnode->slotuse) {
508 else if (currnode->nextleaf != NULL) {
509 currnode = currnode->nextleaf;
514 currslot = currnode->slotuse;
520 /// Prefix-- backstep the iterator to the last slot
521 inline self& operator--()
526 else if (currnode->prevleaf != NULL) {
527 currnode = currnode->prevleaf;
528 currslot = currnode->slotuse - 1;
538 /// Postfix-- backstep the iterator to the last slot
539 inline self operator--(int)
541 self tmp = *this; // copy ourselves
546 else if (currnode->prevleaf != NULL) {
547 currnode = currnode->prevleaf;
548 currslot = currnode->slotuse - 1;
558 /// Equality of iterators
559 inline bool operator==(const self& x) const
561 return (x.currnode == currnode) && (x.currslot == currslot);
564 /// Inequality of iterators
565 inline bool operator!=(const self& x) const
567 return (x.currnode != currnode) || (x.currslot != currslot);
571 /// STL-like read-only iterator object for B+ tree items. The iterator
572 /// points to a specific slot number in a leaf.
578 /// The key type of the btree. Returned by key().
579 typedef typename weighted_btree::key_type key_type;
581 /// The data type of the btree. Returned by data().
582 typedef typename weighted_btree::data_type data_type;
584 /// The value type of the btree. Returned by operator*().
585 typedef typename weighted_btree::value_type value_type;
587 /// The pair type of the btree.
588 typedef typename weighted_btree::pair_type pair_type;
590 /// Reference to the value_type. Required by the reverse_iterator.
591 typedef const value_type& reference;
593 /// Pointer to the value_type. Required by the reverse_iterator.
594 typedef const value_type* pointer;
596 /// STL-magic iterator category
597 typedef std::bidirectional_iterator_tag iterator_category;
600 typedef ptrdiff_t difference_type;
603 typedef const_iterator self;
606 friend class weighted_btree;
610 /// The currently referenced leaf node of the tree
611 const typename weighted_btree::leaf_node* currnode;
613 /// Current key/data slot referenced
614 unsigned short currslot;
616 /// Evil! A temporary value_type to STL-correctly deliver operator* and
618 mutable value_type temp_value;
620 // The macro BTREE_FRIENDS can be used by outside class to access the B+
621 // tree internals. This was added for wxBTreeDemo to be able to draw the
628 /// Constructor of a const iterator
629 inline const_iterator(const typename weighted_btree::leaf_node *l, unsigned short s)
630 : currnode(l), currslot(s)
633 /// Copy-constructor from a mutable const iterator
634 inline const_iterator(const iterator &it)
635 : currnode(it.currnode), currslot(it.currslot)
638 /// Dereference the iterator. Do not use this if possible, use key()
639 /// and data() instead. The B+ tree does not stored key and data
641 inline reference operator*() const
643 temp_value = pair_to_value_type()( pair_type(currnode->slotkey[currslot],
644 currnode->slotdata[currslot]) );
648 /// Dereference the iterator. Do not use this if possible, use key()
649 /// and data() instead. The B+ tree does not stored key and data
651 inline pointer operator->() const
653 temp_value = pair_to_value_type()( pair_type(currnode->slotkey[currslot],
654 currnode->slotdata[currslot]) );
658 /// Key of the current slot
659 inline const key_type& key() const
661 return currnode->slotkey[currslot];
664 /// Weight of the current slot
665 inline weight_type weight() const
667 return currnode->weights[currslot];
670 /// Read-only reference to the current data object
671 inline const data_type& data() const
673 return currnode->slotdata[currslot];
676 /// Prefix++ advance the iterator to the next slot
677 inline self& operator++()
679 if (currslot + 1 < currnode->slotuse) {
682 else if (currnode->nextleaf != NULL) {
683 currnode = currnode->nextleaf;
688 currslot = currnode->slotuse;
694 /// Postfix++ advance the iterator to the next slot
695 inline self operator++(int)
697 self tmp = *this; // copy ourselves
699 if (currslot + 1 < currnode->slotuse) {
702 else if (currnode->nextleaf != NULL) {
703 currnode = currnode->nextleaf;
708 currslot = currnode->slotuse;
714 /// Prefix-- backstep the iterator to the last slot
715 inline self& operator--()
720 else if (currnode->prevleaf != NULL) {
721 currnode = currnode->prevleaf;
722 currslot = currnode->slotuse - 1;
732 /// Postfix-- backstep the iterator to the last slot
733 inline self operator--(int)
735 self tmp = *this; // copy ourselves
740 else if (currnode->prevleaf != NULL) {
741 currnode = currnode->prevleaf;
742 currslot = currnode->slotuse - 1;
752 /// Equality of iterators
753 inline bool operator==(const self& x) const
755 return (x.currnode == currnode) && (x.currslot == currslot);
758 /// Inequality of iterators
759 inline bool operator!=(const self& x) const
761 return (x.currnode != currnode) || (x.currslot != currslot);
765 /// create mutable reverse iterator by using STL magic
766 typedef std::reverse_iterator<iterator> reverse_iterator;
768 /// create constant reverse iterator by using STL magic
769 typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
772 // *** Small Statistics Structure
774 /** A small struct containing basic statistics about the B+ tree. It can be
775 * fetched using get_stats(). */
778 /// Number of items in the B+ tree
781 /// Number of leaves in the B+ tree
784 /// Number of inner nodes in the B+ tree
785 size_type innernodes;
787 /// Base B+ tree parameter: The number of key/data slots in each leaf
788 static const unsigned short leafslots = btree_self::leafslotmax;
790 /// Base B+ tree parameter: The number of key slots in each inner node.
791 static const unsigned short innerslots = btree_self::innerslotmax;
796 leaves(0), innernodes(0)
800 /// Return the total number of nodes
801 inline size_type nodes() const
803 return innernodes + leaves;
806 /// Return the average fill of leaves
807 inline double avgfill_leaves() const
809 return static_cast<double>(itemcount) / (leaves * leafslots);
814 // *** Tree Object Data Members
816 /// Pointer to the B+ tree's root node, either leaf or inner node
819 /// Pointer to first leaf in the double linked leaf chain
822 /// Pointer to last leaf in the double linked leaf chain
825 /// Other small statistics about the B+ tree
828 /// Key comparison object. More comparison functions are generated from
830 key_compare key_less;
833 // *** Constructors and Destructor
835 /// Default constructor initializing an empty B+ tree with the standard key
836 /// comparison function
837 inline weighted_btree()
838 : root(NULL), headleaf(NULL), tailleaf(NULL)
842 /// Constructor initializing an empty B+ tree with a special key
843 /// comparison object
844 inline weighted_btree(const key_compare &kcf)
845 : root(NULL), headleaf(NULL), tailleaf(NULL),
850 /// Constructor initializing a B+ tree with the range [first,last)
851 template <class InputIterator>
852 inline weighted_btree(InputIterator first, InputIterator last)
853 : root(NULL), headleaf(NULL), tailleaf(NULL)
858 /// Constructor initializing a B+ tree with the range [first,last) and a
859 /// special key comparison object
860 template <class InputIterator>
861 inline weighted_btree(InputIterator first, InputIterator last, const key_compare &kcf)
862 : root(NULL), headleaf(NULL), tailleaf(NULL),
868 /// Frees up all used B+ tree memory pages
869 inline ~weighted_btree()
874 /// Fast swapping of two identical B+ tree objects.
875 void swap(btree_self& from)
877 std::swap(root, from.root);
878 std::swap(headleaf, from.headleaf);
879 std::swap(tailleaf, from.tailleaf);
880 std::swap(stats, from.stats);
881 std::swap(key_less, from.key_less);
885 // *** Key and Value Comparison Function Objects
887 /// Function class to compare value_type objects. Required by the STL
891 /// Key comparison function from the template parameter
892 key_compare key_comp;
894 /// Constructor called from btree::value_comp()
895 inline value_compare(key_compare kc)
899 /// Friendly to the btree class so it may call the constructor
900 friend class weighted_btree<key_type, weight_type, data_type, value_type, key_compare, traits, allow_duplicates>;
903 /// Function call "less"-operator resulting in true if x < y.
904 inline bool operator()(const value_type& x, const value_type& y) const
906 return key_comp(x.first, y.first);
910 /// Constant access to the key comparison object sorting the B+ tree
911 inline key_compare key_comp() const
916 /// Constant access to a constructed value_type comparison object. Required
918 inline value_compare value_comp() const
920 return value_compare(key_less);
924 // *** Convenient Key Comparison Functions Generated From key_less
926 /// True if a <= b ? constructed from key_less()
927 inline bool key_lessequal(const key_type &a, const key_type b) const
929 return !key_less(b, a);
932 /// True if a > b ? constructed from key_less()
933 inline bool key_greater(const key_type &a, const key_type &b) const
935 return key_less(b, a);
938 /// True if a >= b ? constructed from key_less()
939 inline bool key_greaterequal(const key_type &a, const key_type b) const
941 return !key_less(a, b);
944 /// True if a == b ? constructed from key_less(). This requires the <
945 /// relation to be a total order, otherwise the B+ tree cannot be sorted.
946 inline bool key_equal(const key_type &a, const key_type &b) const
948 return !key_less(a, b) && !key_less(b, a);
952 // *** Node Object Allocation and Deallocation Functions
954 /// Allocate and initialize a leaf node
955 inline leaf_node* allocate_leaf()
957 leaf_node* n = new leaf_node;
963 /// Allocate and initialize an inner node
964 inline inner_node* allocate_inner(unsigned short l)
966 inner_node* n = new inner_node;
972 /// Correctly free either inner or leaf node, destructs all contained key
973 /// and value objects
974 inline void free_node(node *n)
976 if (n->isleafnode()) {
977 delete static_cast<leaf_node*>(n);
981 delete static_cast<inner_node*>(n);
987 // *** Fast Destruction of the B+ Tree
989 /// Frees all key/data pairs and all nodes of the tree
994 clear_recursive(root);
998 headleaf = tailleaf = NULL;
1000 stats = tree_stats();
1003 BTREE_ASSERT(stats.itemcount == 0);
1007 /// Recursively free up nodes
1008 void clear_recursive(node *n)
1010 if (n->isleafnode())
1012 leaf_node *leafnode = static_cast<leaf_node*>(n);
1014 for (unsigned int slot = 0; slot < leafnode->slotuse; ++slot)
1016 // data objects are deleted by leaf_node's destructor
1021 inner_node *innernode = static_cast<inner_node*>(n);
1023 for (unsigned short slot = 0; slot < innernode->slotuse + 1; ++slot)
1025 clear_recursive(innernode->childid[slot]);
1026 free_node(innernode->childid[slot]);
1032 // *** STL Iterator Construction Functions
1034 /// Constructs a read/data-write iterator that points to the first slot in
1035 /// the first leaf of the B+ tree.
1036 inline iterator begin()
1038 return iterator(headleaf, 0);
1041 /// Constructs a read/data-write iterator that points to the first invalid
1042 /// slot in the last leaf of the B+ tree.
1043 inline iterator end()
1045 return iterator(tailleaf, tailleaf ? tailleaf->slotuse : 0);
1048 /// Constructs a read-only constant iterator that points to the first slot
1049 /// in the first leaf of the B+ tree.
1050 inline const_iterator begin() const
1052 return const_iterator(headleaf, 0);
1055 /// Constructs a read-only constant iterator that points to the first
1056 /// invalid slot in the last leaf of the B+ tree.
1057 inline const_iterator end() const
1059 return const_iterator(tailleaf, tailleaf ? tailleaf->slotuse : 0);
1062 /// Constructs a read/data-write reverse iterator that points to the first
1063 /// invalid slot in the last leaf of the B+ tree. Uses STL magic.
1064 inline reverse_iterator rbegin()
1066 return reverse_iterator(end());
1069 /// Constructs a read/data-write reverse iterator that points to the first
1070 /// slot in the first leaf of the B+ tree. Uses STL magic.
1071 inline reverse_iterator rend()
1073 return reverse_iterator(begin());
1076 /// Constructs a read-only reverse iterator that points to the first
1077 /// invalid slot in the last leaf of the B+ tree. Uses STL magic.
1078 inline const_reverse_iterator rbegin() const
1080 return const_reverse_iterator(end());
1083 /// Constructs a read-only reverse iterator that points to the first slot
1084 /// in the first leaf of the B+ tree. Uses STL magic.
1085 inline const_reverse_iterator rend() const
1087 return const_reverse_iterator(begin());
1091 // *** B+ Tree Node Binary Search Functions
1093 /// Searches for the first key in the node n less or equal to key. Uses
1094 /// binary search with an optional linear self-verification. This is a
1095 /// template function, because the slotkey array is located at different
1096 /// places in leaf_node and inner_node.
1097 template <typename node_type>
1098 inline int find_lower(const node_type *n, const key_type& key) const
1100 if (n->slotuse == 0) return 0;
1103 hi = n->slotuse - 1;
1107 int mid = (lo + hi) / 2;
1109 if (key_lessequal(key, n->slotkey[mid])) {
1117 if (hi < 0 || key_less(n->slotkey[hi], key))
1120 BTREE_PRINT("btree::find_lower: on " << n << " key " << key << " -> (" << lo << ") " << hi << ", ");
1122 // verify result using simple linear search
1125 int i = n->slotuse - 1;
1126 while(i >= 0 && key_lessequal(key, n->slotkey[i]))
1130 BTREE_PRINT("testfind: " << i << std::endl);
1131 BTREE_ASSERT(i == hi);
1134 BTREE_PRINT(std::endl);
1140 /// Searches for the first summed weight in the node n less or equal to weight.
1141 inline int find_summed_weight_lower(const inner_node *n, weight_type weight) const
1143 if (n->slotuse == 0) return 0;
1146 weight_type w = n->childid[0]->weight;
1147 while (i < n->slotuse && w <= weight) {
1149 w += n->childid[i]->weight;
1155 /// Searches for the first summed weight in the node n less or equal to weight.
1156 inline int find_summed_weight_lower(const leaf_node *n, weight_type weight) const
1158 if (n->slotuse == 0) return 0;
1161 weight_type w = n->weights[0];
1162 while (i < n->slotuse && w <= weight) {
1170 /// Searches for the first key in the node n greater than key. Uses binary
1171 /// search with an optional linear self-verification. This is a template
1172 /// function, because the slotkey array is located at different places in
1173 /// leaf_node and inner_node.
1174 template <typename node_type>
1175 inline int find_upper(const node_type *n, const key_type& key) const
1177 if (n->slotuse == 0) return 0;
1180 hi = n->slotuse - 1;
1184 int mid = (lo + hi) / 2;
1186 if (key_less(key, n->slotkey[mid])) {
1194 if (hi < 0 || key_lessequal(n->slotkey[hi], key))
1197 BTREE_PRINT("btree::find_upper: on " << n << " key " << key << " -> (" << lo << ") " << hi << ", ");
1199 // verify result using simple linear search
1202 int i = n->slotuse - 1;
1203 while(i >= 0 && key_less(key, n->slotkey[i]))
1207 BTREE_PRINT("btree::find_upper testfind: " << i << std::endl);
1208 BTREE_ASSERT(i == hi);
1211 BTREE_PRINT(std::endl);
1217 /// Searches for the first summed weight in the node n greater than weight.
1218 inline int find_summed_weight_upper(const inner_node *n, weight_type weight) const
1220 if (n->slotuse == 0) return 0;
1223 weight_type w = n->childid[0]->weight;
1224 while (i < n->slotuse && w < weight) {
1226 w += n->childid[i]->weight;
1232 /// Searches for the first summed weight in the node n greater than weight.
1233 inline int find_summed_weight_upper(const leaf_node *n, weight_type weight) const
1235 if (n->slotuse == 0) return 0;
1238 weight_type w = n->weights[0];
1239 while (i < n->slotuse && w < weight) {
1248 // *** Access Functions to the Item Count
1250 /// Return the number of key/data pairs in the B+ tree
1251 inline size_type size() const
1253 return stats.itemcount;
1257 inline weight_type summed_weight() const
1260 return root->weight;
1265 /// Returns true if there is at least one key/data pair in the B+ tree
1266 inline bool empty() const
1268 return (size() == size_type(0));
1271 /// Returns the largest possible size of the B+ Tree. This is just a
1272 /// function required by the STL standard, the B+ Tree can hold more items.
1273 inline size_type max_size() const
1275 return size_type(-1);
1278 /// Return a const reference to the current statistics.
1279 inline const struct tree_stats& get_stats() const
1285 // *** Standard Access Functions Querying the Tree by Descending to a Leaf
1287 /// Non-STL function checking whether a key is in the B+ tree. The same as
1288 /// (find(k) != end()) or (count() != 0).
1289 bool exists(const key_type &key) const
1291 const node *n = root;
1293 if (!n) return false;
1295 while(!n->isleafnode())
1297 const inner_node *inner = static_cast<const inner_node*>(n);
1298 int slot = find_lower(inner, key);
1300 n = inner->childid[slot];
1303 const leaf_node *leaf = static_cast<const leaf_node*>(n);
1305 int slot = find_lower(leaf, key);
1306 return (slot < leaf->slotuse && key_equal(key, leaf->slotkey[slot]));
1309 /// Tries to locate a key in the B+ tree and returns an iterator to the
1310 /// key/data slot if found. If unsuccessful it returns end().
1311 iterator find(const key_type &key)
1314 if (!n) return end();
1316 while(!n->isleafnode())
1318 const inner_node *inner = static_cast<const inner_node*>(n);
1319 int slot = find_lower(inner, key);
1321 n = inner->childid[slot];
1324 leaf_node *leaf = static_cast<leaf_node*>(n);
1326 unsigned short slot = find_lower(leaf, key);
1327 return (slot < leaf->slotuse && key_equal(key, leaf->slotkey[slot]))
1328 ? iterator(leaf, slot) : end();
1331 /// Changes the weight of the first node with the given key to the
1333 void change_weight(iterator & it, weight_type w)
1337 if (it.weight() == w)
1340 while(!n->isleafnode())
1342 const inner_node *inner = static_cast<const inner_node*>(n);
1343 int slot = find_lower(inner, it.key());
1345 // two step because weight_type might be unsigned
1346 n->weight -= it.weight();
1349 n = inner->childid[slot];
1352 BTREE_ASSERT(n == it.curnode);
1353 n->weight -= it.weight();
1355 it.currnode->weights[it.currslot] = w;
1358 /// Tries to locate a summed weight in the B+ tree and returns an iterator to the
1359 /// key/data slot if found. If unsuccessful it returns end(). It is
1360 /// ignoring zero-weight nodes during this.
1361 iterator find_summed_weight(weight_type weight)
1364 if (!n) return end();
1366 while(!n->isleafnode())
1368 const inner_node *inner = static_cast<const inner_node*>(n);
1369 int slot = find_summed_weight_lower(inner, weight);
1371 for (unsigned short s = 0; s < slot; ++s)
1372 weight -= inner->childid[s]->weight;
1374 n = inner->childid[slot];
1377 leaf_node *leaf = static_cast<leaf_node*>(n);
1379 unsigned short slot = find_summed_weight_lower(leaf, weight);
1380 for (unsigned short s = 0; s < slot; ++s)
1381 weight -= leaf->weights[s];
1383 return (slot < leaf->slotuse && weight == 0)
1384 ? iterator(leaf, slot) : end();
1389 weight_type summed_weight(const key_type &key)
1395 while(!n->isleafnode()) {
1396 const inner_node *inner = static_cast<const inner_node*>(n);
1397 int slot = find_lower(inner, key);
1399 for (unsigned short s = 0; s < slot; ++s)
1400 w += inner->childid[slot]->weight;
1402 n = inner->childid[slot];
1405 leaf_node *leaf = static_cast<leaf_node*>(n);
1407 int slot = find_lower(leaf, key);
1409 for (unsigned short s = 0; s < slot; ++s)
1410 w += leaf->childid[slot]->weight;
1412 return (slot < leaf->slotuse && key_equal(key, leaf->slotkey[slot]))
1413 ? iterator(leaf, slot) : end();
1416 /// Tries to locate a key in the B+ tree and returns an constant iterator
1417 /// to the key/data slot if found. If unsuccessful it returns end().
1418 const_iterator find(const key_type &key) const
1420 const node *n = root;
1421 if (!n) return end();
1423 while(!n->isleafnode())
1425 const inner_node *inner = static_cast<const inner_node*>(n);
1426 int slot = find_lower(inner, key);
1428 n = inner->childid[slot];
1431 const leaf_node *leaf = static_cast<const leaf_node*>(n);
1433 int slot = find_lower(leaf, key);
1434 return (slot < leaf->slotuse && key_equal(key, leaf->slotkey[slot]))
1435 ? const_iterator(leaf, slot) : end();
1438 /// Tries to locate a summed weight in the B+ tree and returns an iterator to the
1439 /// key/data slot if found. If unsuccessful it returns end(). It is
1440 /// ignoring zero-weight nodes during this.
1441 const_iterator find_summed_weight(weight_type weight) const
1444 if (!n) return end();
1446 while(!n->isleafnode())
1448 const inner_node *inner = static_cast<const inner_node*>(n);
1449 int slot = find_summed_weight_lower(inner, weight);
1451 for (unsigned short s = 0; s < slot; ++s)
1452 weight -= inner->childid[s]->weight;
1454 n = inner->childid[slot];
1457 leaf_node *leaf = static_cast<leaf_node*>(n);
1459 int slot = find_summed_weight_lower(leaf, weight);
1460 for (unsigned short s = 0; s < slot; ++s)
1461 weight -= leaf->childid[s]->weight;
1463 return (slot < leaf->slotuse && weight == 0)
1464 ? const_iterator(leaf, slot) : end();
1467 /// Tries to locate a key in the B+ tree and returns the number of
1468 /// identical key entries found.
1469 size_type count(const key_type &key) const
1471 const node *n = root;
1474 while(!n->isleafnode())
1476 const inner_node *inner = static_cast<const inner_node*>(n);
1477 int slot = find_lower(inner, key);
1479 n = inner->childid[slot];
1482 const leaf_node *leaf = static_cast<const leaf_node*>(n);
1484 int slot = find_lower(leaf, key);
1487 while (leaf && slot < leaf->slotuse && key_equal(key, leaf->slotkey[slot]))
1490 if (++slot >= leaf->slotuse)
1492 leaf = leaf->nextleaf;
1500 /// Searches the B+ tree and returns an iterator to the first key less or
1501 /// equal to the parameter. If unsuccessful it returns end().
1502 iterator lower_bound(const key_type& key)
1505 if (!n) return end();
1507 while(!n->isleafnode())
1509 const inner_node *inner = static_cast<const inner_node*>(n);
1510 int slot = find_lower(inner, key);
1512 n = inner->childid[slot];
1515 leaf_node *leaf = static_cast<leaf_node*>(n);
1517 int slot = find_lower(leaf, key);
1518 return iterator(leaf, slot);
1521 /// Searches the B+ tree and returns an iterator to the first summed weight
1522 /// less or equal to the parameter. If unsuccessful it returns end().
1523 iterator lower_summed_weight_bound(weight_type weight)
1526 if (!n) return end();
1528 while(!n->isleafnode()) {
1529 const inner_node *inner = static_cast<const inner_node*>(n);
1530 int slot = find_summed_weight_lower(inner, weight);
1532 for (unsigned short s = 0; s < slot; ++s)
1533 weight -= inner->childid[s]->weight;
1535 n = inner->childid[slot];
1538 leaf_node *leaf = static_cast<leaf_node*>(n);
1540 int slot = find_summed_weight_lower(leaf, weight);
1542 for (unsigned short s = 0; s < slot; ++s)
1543 weight -= leaf->weights[s];
1545 return iterator(leaf, slot);
1548 /// Searches the B+ tree and returns an constant iterator to the first key less or
1549 /// equal to the parameter. If unsuccessful it returns end().
1550 const_iterator lower_bound(const key_type& key) const
1552 const node *n = root;
1553 if (!n) return end();
1555 while(!n->isleafnode())
1557 const inner_node *inner = static_cast<const inner_node*>(n);
1558 int slot = find_lower(inner, key);
1560 n = inner->childid[slot];
1563 const leaf_node *leaf = static_cast<const leaf_node*>(n);
1565 int slot = find_lower(leaf, key);
1566 return const_iterator(leaf, slot);
1569 /// Searches the B+ tree and returns an iterator to the first summed weight
1570 /// less or equal to the parameter. If unsuccessful it returns end().
1571 const_iterator lower_summed_weight_bound(weight_type weight) const
1574 if (!n) return end();
1576 while(!n->isleafnode())
1578 const inner_node *inner = static_cast<const inner_node*>(n);
1579 int slot = find_summed_weight_lower(inner, weight);
1581 for (unsigned short s = 0; s < slot; ++s)
1582 weight -= inner->childid[s]->weight;
1584 n = inner->childid[slot];
1587 leaf_node *leaf = static_cast<leaf_node*>(n);
1589 int slot = find_summed_weight_lower(leaf, weight);
1591 for (unsigned short s = 0; s < slot; ++s)
1592 weight -= leaf->weights[s];
1594 return const_iterator(leaf, slot);
1597 /// Searches the B+ tree and returns an iterator to the first key greater
1598 /// than the parameter. If unsuccessful it returns end().
1599 iterator upper_bound(const key_type& key)
1602 if (!n) return end();
1604 while(!n->isleafnode())
1606 const inner_node *inner = static_cast<const inner_node*>(n);
1607 int slot = find_upper(inner, key);
1609 n = inner->childid[slot];
1612 leaf_node *leaf = static_cast<leaf_node*>(n);
1614 int slot = find_upper(leaf, key);
1615 return iterator(leaf, slot);
1618 /// Searches the B+ tree and returns an constant iterator to the first key
1619 /// greater than the parameter. If unsuccessful it returns end().
1620 const_iterator upper_bound(const key_type& key) const
1622 const node *n = root;
1623 if (!n) return end();
1625 while(!n->isleafnode())
1627 const inner_node *inner = static_cast<const inner_node*>(n);
1628 int slot = find_upper(inner, key);
1630 n = inner->childid[slot];
1633 const leaf_node *leaf = static_cast<const leaf_node*>(n);
1635 int slot = find_upper(leaf, key);
1636 return const_iterator(leaf, slot);
1639 /// Searches the B+ tree and returns an iterator to the first summed weight
1640 /// greater than the parameter. If unsuccessful it returns end().
1641 iterator upper_summed_weight_bound(weight_type weight)
1644 if (!n) return end();
1646 while(!n->isleafnode())
1648 const inner_node *inner = static_cast<const inner_node*>(n);
1649 int slot = find_summed_weight_upper(inner, weight);
1651 for (unsigned short s = 0; s < slot; ++s)
1652 weight -= inner->childid[s]->weight;
1654 n = inner->childid[slot];
1657 leaf_node *leaf = static_cast<leaf_node*>(n);
1659 int slot = find_summed_weight_upper(leaf, weight);
1661 for (unsigned short s = 0; s < slot; ++s)
1662 weight -= leaf->weights[s];
1664 return iterator(leaf, slot);
1667 /// Searches the B+ tree and returns an iterator to the first summed weight
1668 /// greater than the parameter. If unsuccessful it returns end().
1669 const_iterator upper_summed_weight_bound(weight_type weight) const
1672 if (!n) return end();
1674 while(!n->isleafnode()) {
1675 const inner_node *inner = static_cast<const inner_node*>(n);
1676 int slot = find_summed_weight_upper(inner, weight);
1678 for (unsigned short s = 0; s < slot; ++s)
1679 weight -= inner->childid[s]->weight;
1681 n = inner->childid[slot];
1684 leaf_node *leaf = static_cast<leaf_node*>(n);
1686 int slot = find_summed_weight_upper(leaf, weight);
1688 for (unsigned short s = 0; s < slot; ++s)
1689 weight -= leaf->weights[s];
1691 return const_iterator(leaf, slot);
1694 /// Searches the B+ tree and returns both lower_bound() and upper_bound().
1695 inline std::pair<iterator, iterator> equal_range(const key_type& key)
1697 return std::pair<iterator, iterator>(lower_bound(key), upper_bound(key));
1700 /// Searches the B+ tree and returns both lower_bound() and upper_bound().
1701 inline std::pair<const_iterator, const_iterator> equal_range(const key_type& key) const
1703 return std::pair<const_iterator, const_iterator>(lower_bound(key), upper_bound(key));
1706 /// Searches the B+ tree and returns both lower_summed_weight_bound() and upper_summed_weight_bound().
1707 inline std::pair<iterator, iterator> equal_summed_weight_range(weight_type weight)
1709 return std::pair<iterator, iterator>(lower_summed_weight_bound(weight), upper_summed_weight_bound(weight));
1712 /// Searches the B+ tree and returns both lower_summed_weight_bound() and upper_summed_weight_bound().
1713 inline std::pair<const_iterator, const_iterator> equal_summed_weight_range(weight_type weight) const
1715 return std::pair<const_iterator, const_iterator>(lower_summed_weight_bound(weight), upper_summed_weight_bound(weight));
1719 // *** B+ Tree Object Comparison Functions
1721 /// Equality relation of B+ trees of the same type. B+ trees of the same
1722 /// size and equal elements (both key and data) are considered
1723 /// equal. Beware of the random ordering of duplicate keys.
1724 inline bool operator==(const btree_self &other) const
1726 return (size() == other.size()) && std::equal(begin(), end(), other.begin());
1729 /// Inequality relation. Based on operator==.
1730 inline bool operator!=(const btree_self &other) const
1732 return !(*this == other);
1735 /// Total ordering relation of B+ trees of the same type. It uses
1736 /// std::lexicographical_compare() for the actual comparison of elements.
1737 inline bool operator<(const btree_self &other) const
1739 return std::lexicographical_compare(begin(), end(), other.begin(), other.end());
1742 /// Greater relation. Based on operator<.
1743 inline bool operator>(const btree_self &other) const
1745 return other < *this;
1748 /// Less-equal relation. Based on operator<.
1749 inline bool operator<=(const btree_self &other) const
1751 return !(other < *this);
1754 /// Greater-equal relation. Based on operator<.
1755 inline bool operator>=(const btree_self &other) const
1757 return !(*this < other);
1761 /// *** Fast Copy: Assign Operator and Copy Constructors
1763 /// Assignment operator. All the key/data pairs are copied
1764 inline btree_self& operator= (const btree_self &other)
1770 key_less = other.key_comp();
1771 if (other.size() != 0)
1773 stats.leaves = stats.innernodes = 0;
1774 root = copy_recursive(other.root);
1775 stats = other.stats;
1778 if (selfverify) verify();
1783 /// Copy constructor. The newly initialized B+ tree object will contain a
1784 /// copy of all key/data pairs.
1785 inline weighted_btree(const btree_self &other)
1786 : root(NULL), headleaf(NULL), tailleaf(NULL),
1787 stats( other.stats ),
1788 key_less( other.key_comp() )
1792 stats.leaves = stats.innernodes = 0;
1793 root = copy_recursive(other.root);
1794 if (selfverify) verify();
1799 /// Recursively copy nodes from another B+ tree object
1800 struct node* copy_recursive(const node *n)
1802 if (n->isleafnode())
1804 const leaf_node *leaf = static_cast<const leaf_node*>(n);
1805 leaf_node *newleaf = allocate_leaf();
1807 newleaf->slotuse = leaf->slotuse;
1808 std::copy(leaf->slotkey, leaf->slotkey + leaf->slotuse, newleaf->slotkey);
1809 std::copy(leaf->slotdata, leaf->slotdata + leaf->slotuse, newleaf->slotdata);
1811 if (headleaf == NULL)
1813 headleaf = tailleaf = newleaf;
1814 newleaf->prevleaf = newleaf->nextleaf = NULL;
1818 newleaf->prevleaf = tailleaf;
1819 tailleaf->nextleaf = newleaf;
1827 const inner_node *inner = static_cast<const inner_node*>(n);
1828 inner_node *newinner = allocate_inner(inner->level);
1830 newinner->slotuse = inner->slotuse;
1831 std::copy(inner->slotkey, inner->slotkey + inner->slotuse, newinner->slotkey);
1833 for (unsigned short slot = 0; slot <= inner->slotuse; ++slot)
1835 newinner->childid[slot] = copy_recursive(inner->childid[slot]);
1843 // *** Public Insertion Functions
1845 /// Attempt to insert a key/data pair into the B+ tree. If the tree does not
1846 /// allow duplicate keys, then the insert may fail if it is already
1848 inline std::pair<iterator, bool> insert(const pair_type& x, weight_type weight)
1850 return insert_start(x.first, weight, x.second);
1853 /// Attempt to insert a key/data pair into the B+ tree. Beware that if
1854 /// key_type == data_type, then the template iterator insert() is called
1855 /// instead. If the tree does not allow duplicate keys, then the insert may
1856 /// fail if it is already present.
1857 inline std::pair<iterator, bool> insert(const key_type& key, weight_type weight, const data_type& data)
1859 return insert_start(key, weight, data);
1862 /// Attempt to insert a key/data pair into the B+ tree. This function is the
1863 /// same as the other insert, however if key_type == data_type then the
1864 /// non-template function cannot be called. If the tree does not allow
1865 /// duplicate keys, then the insert may fail if it is already present.
1866 inline std::pair<iterator, bool> insert2(const key_type& key, weight_type weight, const data_type& data)
1868 return insert_start(key, weight, data);
1871 /// Attempt to insert a key/data pair into the B+ tree. The iterator hint
1872 /// is currently ignored by the B+ tree insertion routine.
1873 inline iterator insert(iterator /* hint */, const pair_type &x, weight_type weight)
1875 return insert_start(x.first, weight, x.second).first;
1878 /// Attempt to insert a key/data pair into the B+ tree. The iterator hint is
1879 /// currently ignored by the B+ tree insertion routine.
1880 inline iterator insert2(iterator /* hint */, const key_type& key, weight_type weight, const data_type& data)
1882 return insert_start(key, weight, data).first;
1885 /// Attempt to insert the range [first,last) of value_type pairs into the B+
1886 /// tree. Each key/data pair is inserted individually.
1887 template <typename InputIterator>
1888 inline void insert(InputIterator first, InputIterator last)
1890 InputIterator iter = first;
1893 insert(*iter, iter->weight());
1899 // *** Private Insertion Functions
1901 /// Start the insertion descent at the current root and handle root
1902 /// splits. Returns true if the item was inserted
1903 std::pair<iterator, bool> insert_start(const key_type& key, weight_type weight, const data_type& value)
1905 node *newchild = NULL;
1906 key_type newkey = key_type();
1910 root = headleaf = tailleaf = allocate_leaf();
1913 std::pair<iterator, bool> r = insert_descend(root, key, weight, value, &newkey, &newchild);
1917 inner_node *newroot = allocate_inner(root->level + 1);
1918 newroot->slotkey[0] = newkey;
1920 newroot->childid[0] = root;
1921 newroot->childid[1] = newchild;
1923 newroot->weight = root->weight + newchild->weight;
1924 newroot->slotuse = 1;
1929 // increment itemcount if the item was inserted
1930 if (r.second) ++stats.itemcount;
1933 if (debug) print(std::cout);
1938 BTREE_ASSERT(exists(key));
1945 * @brief Insert an item into the B+ tree.
1947 * Descend down the nodes to a leaf, insert the key/data pair in a free
1948 * slot. If the node overflows, then it must be split and the new split
1949 * node inserted into the parent. Unroll / this splitting up to the root.
1951 std::pair<iterator, bool> insert_descend(node* n,
1952 const key_type& key,
1954 const data_type& value,
1955 key_type* splitkey, node** splitnode)
1957 if (!n->isleafnode())
1959 inner_node *inner = static_cast<inner_node*>(n);
1961 key_type newkey = key_type();
1962 node *newchild = NULL;
1964 int slot = find_lower(inner, key);
1966 BTREE_PRINT("btree::insert_descend into " << inner->childid[slot] << std::endl);
1968 weight_type oldw = inner->childid[slot]->weight;
1969 std::pair<iterator, bool> r = insert_descend(inner->childid[slot],
1970 key, weight, value, &newkey, &newchild);
1971 n->weight += inner->childid[slot]->weight - oldw;
1975 BTREE_PRINT("btree::insert_descend newchild with key " << newkey << " node " << newchild << " at slot " << slot << std::endl);
1977 if (inner->isfull())
1979 split_inner_node(inner, splitkey, splitnode, slot);
1981 BTREE_PRINT("btree::insert_descend done split_inner: putslot: " << slot << " putkey: " << newkey << " upkey: " << *splitkey << std::endl);
1986 print_node(std::cout, inner);
1987 print_node(std::cout, *splitnode);
1991 // check if insert slot is in the split sibling node
1992 BTREE_PRINT("btree::insert_descend switch: " << slot << " > " << inner->slotuse+1 << std::endl);
1994 if (slot == inner->slotuse+1 && inner->slotuse < (*splitnode)->slotuse)
1996 // special case when the insert slot matches the split
1997 // place between the two nodes, then the insert key
1998 // becomes the split key.
2000 BTREE_ASSERT(inner->slotuse + 1 < innerslotmax);
2002 inner_node *splitinner = static_cast<inner_node*>(*splitnode);
2004 // move the split key and it's datum into the left node
2005 inner->slotkey[inner->slotuse] = *splitkey;
2006 inner->childid[inner->slotuse+1] = splitinner->childid[0];
2007 inner->weight += splitinner->childid[0]->weight;
2008 splitinner->weight -= splitinner->childid[0]->weight;
2011 // set new split key and move corresponding datum into right node
2012 splitinner->childid[0] = newchild;
2013 splitinner->weight += newchild->weight;
2018 else if (slot >= inner->slotuse+1)
2020 // in case the insert slot is in the newly create split
2021 // node, we reuse the code below.
2023 slot -= inner->slotuse+1;
2024 inner = static_cast<inner_node*>(*splitnode);
2025 BTREE_PRINT("btree::insert_descend switching to splitted node " << inner << " slot " << slot <<std::endl);
2029 // put pointer to child node into correct slot
2030 BTREE_ASSERT(slot >= 0 && slot <= inner->slotuse);
2032 int i = inner->slotuse;
2035 inner->slotkey[i] = inner->slotkey[i - 1];
2036 inner->childid[i + 1] = inner->childid[i];
2040 inner->slotkey[slot] = newkey;
2041 inner->childid[slot + 1] = newchild;
2043 inner->weight += newchild->weight;
2048 else // n->isleafnode() == true
2050 leaf_node *leaf = static_cast<leaf_node*>(n);
2052 unsigned short slot = find_lower(leaf, key);
2054 if (!allow_duplicates && slot < leaf->slotuse && key_equal(key, leaf->slotkey[slot])) {
2055 return std::pair<iterator, bool>(iterator(leaf, slot), false);
2060 split_leaf_node(leaf, splitkey, splitnode);
2062 // check if insert slot is in the split sibling node
2063 if (slot >= leaf->slotuse)
2065 slot -= leaf->slotuse;
2066 leaf = static_cast<leaf_node*>(*splitnode);
2070 // put data item into correct data slot
2072 int i = leaf->slotuse - 1;
2073 BTREE_ASSERT(i + 1 < leafslotmax);
2075 while(i >= 0 && key_less(key, leaf->slotkey[i])) {
2076 leaf->slotkey[i + 1] = leaf->slotkey[i];
2077 leaf->slotdata[i + 1] = leaf->slotdata[i];
2078 leaf->weights[i + 1] = leaf->weights[i];
2082 leaf->slotkey[i + 1] = key;
2083 leaf->slotdata[i + 1] = value;
2084 leaf->weights[i + 1] = weight;
2085 leaf->weight += weight;
2088 if (splitnode && leaf != *splitnode && slot == leaf->slotuse-1)
2090 // special case: the node was split, and the insert is at the
2091 // last slot of the old node. then the splitkey must be
2096 return std::pair<iterator, bool>(iterator(leaf, i + 1), true);
2100 /// Split up a leaf node into two equally-filled sibling leaves. Returns
2101 /// the new nodes and it's insertion key in the two parameters.
2102 void split_leaf_node(leaf_node* leaf, key_type* _newkey, node** _newleaf)
2104 BTREE_ASSERT(leaf->isfull());
2106 unsigned int mid = leaf->slotuse / 2;
2108 BTREE_PRINT("btree::split_leaf_node on " << leaf << std::endl);
2110 leaf_node *newleaf = allocate_leaf();
2112 newleaf->slotuse = leaf->slotuse - mid;
2114 newleaf->nextleaf = leaf->nextleaf;
2115 if (newleaf->nextleaf == NULL) {
2116 BTREE_ASSERT(leaf == tailleaf);
2120 newleaf->nextleaf->prevleaf = newleaf;
2123 for(unsigned int slot = mid; slot < leaf->slotuse; ++slot)
2125 unsigned int ni = slot - mid;
2126 newleaf->slotkey[ni] = leaf->slotkey[slot];
2127 newleaf->slotdata[ni] = leaf->slotdata[slot];
2128 newleaf->weights[ni] = leaf->weights[slot];
2129 newleaf->weight += leaf->weights[slot];
2130 leaf->weight -= leaf->weights[slot];
2133 leaf->slotuse = mid;
2134 leaf->nextleaf = newleaf;
2135 newleaf->prevleaf = leaf;
2137 *_newkey = leaf->slotkey[leaf->slotuse-1];
2138 *_newleaf = newleaf;
2141 /// Split up an inner node into two equally-filled sibling nodes. Returns
2142 /// the new nodes and it's insertion key in the two parameters. Requires
2143 /// the slot of the item will be inserted, so the nodes will be the same
2144 /// size after the insert.
2145 void split_inner_node(inner_node* inner, key_type* _newkey, node** _newinner, unsigned int addslot)
2147 BTREE_ASSERT(inner->isfull());
2149 unsigned int mid = inner->slotuse / 2;
2151 BTREE_PRINT("btree::split_inner: mid " << mid << " addslot " << addslot << std::endl);
2153 // if the split is uneven and the overflowing item will be put into the
2154 // larger node, then the smaller split node may underflow
2155 if (addslot <= mid && mid > inner->slotuse - (mid + 1))
2158 BTREE_PRINT("btree::split_inner: mid " << mid << " addslot " << addslot << std::endl);
2160 BTREE_PRINT("btree::split_inner_node on " << inner << " into two nodes " << mid << " and " << inner->slotuse - (mid + 1) << " sized" << std::endl);
2162 inner_node *newinner = allocate_inner(inner->level);
2164 newinner->slotuse = inner->slotuse - (mid + 1);
2166 for(unsigned int slot = mid + 1; slot < inner->slotuse; ++slot)
2168 unsigned int ni = slot - (mid + 1);
2169 newinner->slotkey[ni] = inner->slotkey[slot];
2170 newinner->childid[ni] = inner->childid[slot];
2171 newinner->weight += inner->childid[slot]->weight;
2172 inner->weight -= inner->childid[slot]->weight;
2174 newinner->childid[newinner->slotuse] = inner->childid[inner->slotuse];
2175 newinner->weight += inner->childid[inner->slotuse]->weight;
2176 inner->weight -= inner->childid[inner->slotuse]->weight;
2178 inner->slotuse = mid;
2180 *_newkey = inner->slotkey[mid];
2181 *_newinner = newinner;
2185 // *** Support Class Encapsulating Deletion Results
2187 /// Result flags of recursive deletion.
2190 /// Deletion successful and no fix-ups necessary.
2193 /// Deletion not successful because key was not found.
2194 btree_not_found = 1,
2196 /// Deletion successful, the last key was updated so parent slotkeys
2198 btree_update_lastkey = 2,
2200 /// Deletion successful, children nodes were merged and the parent
2201 /// needs to remove the empty node.
2205 /// \internal B+ tree recursive deletion has much information which is
2206 /// needs to be passed upward.
2209 /// Merged result flags
2210 result_flags_t flags;
2212 /// The key to be updated at the parent's slot
2215 /// Constructor of a result with a specific flag, this can also be used
2216 /// as for implicit conversion.
2217 inline result_t(result_flags_t f = btree_ok)
2218 : flags(f), lastkey()
2221 /// Constructor with a lastkey value.
2222 inline result_t(result_flags_t f, const key_type &k)
2223 : flags(f), lastkey(k)
2226 /// Test if this result object has a given flag set.
2227 inline bool has(result_flags_t f) const
2229 return (flags & f) != 0;
2232 /// Merge two results OR-ing the result flags and overwriting lastkeys.
2233 inline result_t& operator|= (const result_t &other)
2235 flags = result_flags_t(flags | other.flags);
2237 // we overwrite existing lastkeys on purpose
2238 if (other.has(btree_update_lastkey))
2239 lastkey = other.lastkey;
2246 // *** Public Erase Functions
2248 /// Erases one (the first) of the key/data pairs associated with the given
2250 bool erase_one(const key_type &key)
2252 BTREE_PRINT("btree::erase_one(" << key << ") on btree size " << size() << std::endl);
2254 if (selfverify) verify();
2256 result_t result = erase_one_descend(key, root, NULL, NULL, NULL, NULL, NULL, 0);
2258 if (!result.has(btree_not_found))
2262 if (debug) print(std::cout);
2264 if (selfverify) verify();
2266 return !result.has(btree_not_found);
2269 /// Erases all the key/data pairs associated with the given key. This is
2270 /// implemented using erase_one().
2271 size_type erase(const key_type &key)
2275 while( erase_one(key) )
2278 if (!allow_duplicates) break;
2285 /// Erase the key/data pair referenced by the iterator.
2286 void erase(iterator iter)
2293 /// Erase all key/data pairs in the range [first,last). This function is
2294 /// currently not implemented by the B+ Tree.
2295 void erase(iterator /* first */, iterator /* last */)
2302 // *** Private Erase Functions
2304 /** @brief Erase one (the first) key/data pair in the B+ tree matching key.
2306 * Descends down the tree in search of key. During the descent the parent,
2307 * left and right siblings and their parents are computed and passed
2308 * down. Once the key/data pair is found, it is removed from the leaf. If
2309 * the leaf underflows 6 different cases are handled. These cases resolve
2310 * the underflow by shifting key/data pairs from adjacent sibling nodes,
2311 * merging two sibling nodes or trimming the tree.
2313 result_t erase_one_descend(const key_type& key,
2315 node *left, node *right,
2316 inner_node *leftparent, inner_node *rightparent,
2317 inner_node *parent, unsigned int parentslot)
2319 if (curr->isleafnode())
2321 leaf_node *leaf = static_cast<leaf_node*>(curr);
2322 leaf_node *leftleaf = static_cast<leaf_node*>(left);
2323 leaf_node *rightleaf = static_cast<leaf_node*>(right);
2325 int slot = find_lower(leaf, key);
2327 if (slot >= leaf->slotuse || !key_equal(key, leaf->slotkey[slot]))
2329 BTREE_PRINT("Could not find key " << key << " to erase." << std::endl);
2331 return btree_not_found;
2334 BTREE_PRINT("Found key in leaf " << curr << " at slot " << slot << std::endl);
2336 leaf->weight -= leaf->weights[slot];
2337 for (int i = slot; i < leaf->slotuse - 1; i++)
2339 leaf->slotkey[i] = leaf->slotkey[i + 1];
2340 leaf->slotdata[i] = leaf->slotdata[i + 1];
2341 leaf->weights[i] = leaf->weights[i + 1];
2345 result_t myres = btree_ok;
2347 // if the last key of the leaf was changed, the parent is notified
2348 // and updates the key of this leaf
2349 if (slot == leaf->slotuse && parent)
2351 if (parentslot < parent->slotuse)
2353 BTREE_ASSERT(parent->childid[parentslot] == curr);
2354 parent->slotkey[parentslot] = leaf->slotkey[leaf->slotuse - 1];
2358 BTREE_PRINT("Scheduling lastkeyupdate: key " << leaf->slotkey[leaf->slotuse - 1] << std::endl);
2359 myres |= result_t(btree_update_lastkey, leaf->slotkey[leaf->slotuse - 1]);
2363 if (leaf->isunderflow() && !(leaf == root && leaf->slotuse >= 1))
2365 // determine what to do about the underflow
2367 // case : if this empty leaf is the root, there is no way to
2368 // correct underflow
2369 if (leftleaf == NULL && rightleaf == NULL)
2373 // case : if both left and right leaves would underflow in case of
2374 // a shift, then merging is necessary. choose the more local merger
2376 else if ( (leftleaf == NULL || leftleaf->isfew()) && (rightleaf == NULL || rightleaf->isfew()) )
2378 if (leftparent == parent)
2379 myres |= merge_leaves(leftleaf, leaf, leftparent);
2381 myres |= merge_leaves(leaf, rightleaf, rightparent);
2383 // case : the right leaf has extra data, so balance right with current
2384 else if ( (leftleaf != NULL && leftleaf->isfew()) && (rightleaf != NULL && !rightleaf->isfew()) )
2386 if (rightparent == parent)
2387 myres |= shift_left_leaf(leaf, rightleaf, rightparent, parentslot);
2389 myres |= merge_leaves(leftleaf, leaf, leftparent);
2391 // case : the left leaf has extra data, so balance left with current
2392 else if ( (leftleaf != NULL && !leftleaf->isfew()) && (rightleaf != NULL && rightleaf->isfew()) )
2394 if (leftparent == parent)
2395 shift_right_leaf(leftleaf, leaf, leftparent, parentslot - 1);
2397 myres |= merge_leaves(leaf, rightleaf, rightparent);
2399 // case : both the leaf and right leaves have extra data and our
2400 // parent, choose the leaf with more data
2401 else if (leftparent == rightparent)
2403 if (leftleaf->slotuse <= rightleaf->slotuse)
2404 myres |= shift_left_leaf(leaf, rightleaf, rightparent, parentslot);
2406 shift_right_leaf(leftleaf, leaf, leftparent, parentslot - 1);
2410 if (leftparent == parent)
2411 shift_right_leaf(leftleaf, leaf, leftparent, parentslot - 1);
2413 myres |= shift_left_leaf(leaf, rightleaf, rightparent, parentslot);
2419 else // !curr->isleafnode()
2421 inner_node *inner = static_cast<inner_node*>(curr);
2422 inner_node *leftinner = static_cast<inner_node*>(left);
2423 inner_node *rightinner = static_cast<inner_node*>(right);
2425 node *myleft, *myright;
2426 inner_node *myleftparent, *myrightparent;
2428 int slot = find_lower(inner, key);
2431 myleft = (left == NULL) ? NULL : (static_cast<inner_node*>(left))->childid[left->slotuse - 1];
2432 myleftparent = leftparent;
2435 myleft = inner->childid[slot - 1];
2436 myleftparent = inner;
2439 if (slot == inner->slotuse) {
2440 myright = (right == NULL) ? NULL : (static_cast<inner_node*>(right))->childid[0];
2441 myrightparent = rightparent;
2444 myright = inner->childid[slot + 1];
2445 myrightparent = inner;
2448 BTREE_PRINT("erase_one_descend into " << inner->childid[slot] << std::endl);
2450 result_t result = erase_one_descend(key,
2451 inner->childid[slot],
2453 myleftparent, myrightparent,
2456 result_t myres = btree_ok;
2458 if (result.has(btree_not_found))
2463 if (result.has(btree_update_lastkey))
2465 if (parent && parentslot < parent->slotuse)
2467 BTREE_PRINT("Fixing lastkeyupdate: key " << result.lastkey << " into parent " << parent << " at parentslot " << parentslot << std::endl);
2469 BTREE_ASSERT(parent->childid[parentslot] == curr);
2470 parent->slotkey[parentslot] = result.lastkey;
2474 BTREE_PRINT("Forwarding lastkeyupdate: key " << result.lastkey << std::endl);
2475 myres |= result_t(btree_update_lastkey, result.lastkey);
2479 if (result.has(btree_fixmerge))
2481 // either the current node or the next is empty and should be removed
2482 if (inner->childid[slot]->slotuse != 0)
2485 // this is the child slot invalidated by the merge
2486 BTREE_ASSERT(inner->childid[slot]->slotuse == 0);
2488 inner->weight -= inner->childid[slot]->weight;
2489 free_node(inner->childid[slot]);
2491 for(int i = slot; i < inner->slotuse; i++)
2493 inner->slotkey[i - 1] = inner->slotkey[i];
2494 inner->childid[i] = inner->childid[i + 1];
2498 if (inner->level == 1)
2500 // fix split key for children leaves
2502 leaf_node *child = static_cast<leaf_node*>(inner->childid[slot]);
2503 inner->slotkey[slot] = child->slotkey[ child->slotuse-1 ];
2507 if (inner->isunderflow() && !(inner == root && inner->slotuse >= 1))
2509 // case: the inner node is the root and has just one child. that child becomes the new root
2510 if (leftinner == NULL && rightinner == NULL)
2512 BTREE_ASSERT(inner == root);
2513 BTREE_ASSERT(inner->slotuse == 0);
2515 root = inner->childid[0];
2522 // case : if both left and right leaves would underflow in case of
2523 // a shift, then merging is necessary. choose the more local merger
2525 else if ( (leftinner == NULL || leftinner->isfew()) && (rightinner == NULL || rightinner->isfew()) )
2527 if (leftparent == parent)
2528 myres |= merge_inner(leftinner, inner, leftparent, parentslot - 1);
2530 myres |= merge_inner(inner, rightinner, rightparent, parentslot);
2532 // case : the right leaf has extra data, so balance right with current
2533 else if ( (leftinner != NULL && leftinner->isfew()) && (rightinner != NULL && !rightinner->isfew()) )
2535 if (rightparent == parent)
2536 shift_left_inner(inner, rightinner, rightparent, parentslot);
2538 myres |= merge_inner(leftinner, inner, leftparent, parentslot - 1);
2540 // case : the left leaf has extra data, so balance left with current
2541 else if ( (leftinner != NULL && !leftinner->isfew()) && (rightinner != NULL && rightinner->isfew()) )
2543 if (leftparent == parent)
2544 shift_right_inner(leftinner, inner, leftparent, parentslot - 1);
2546 myres |= merge_inner(inner, rightinner, rightparent, parentslot);
2548 // case : both the leaf and right leaves have extra data and our
2549 // parent, choose the leaf with more data
2550 else if (leftparent == rightparent)
2552 if (leftinner->slotuse <= rightinner->slotuse)
2553 shift_left_inner(inner, rightinner, rightparent, parentslot);
2555 shift_right_inner(leftinner, inner, leftparent, parentslot - 1);
2559 if (leftparent == parent)
2560 shift_right_inner(leftinner, inner, leftparent, parentslot - 1);
2562 shift_left_inner(inner, rightinner, rightparent, parentslot);
2570 /// Merge two leaf nodes. The function moves all key/data pairs from right
2571 /// to left and sets right's slotuse to zero. The right slot is then
2572 /// removed by the calling parent node.
2573 result_t merge_leaves(leaf_node* left, leaf_node* right, inner_node* parent)
2575 BTREE_PRINT("Merge leaf nodes " << left << " and " << right << " with common parent " << parent << "." << std::endl);
2578 BTREE_ASSERT(left->isleafnode() && right->isleafnode());
2579 BTREE_ASSERT(parent->level == 1);
2581 BTREE_ASSERT(left->slotuse + right->slotuse < leafslotmax);
2583 for (unsigned int i = 0; i < right->slotuse; i++)
2585 left->slotkey[left->slotuse + i] = right->slotkey[i];
2586 left->slotdata[left->slotuse + i] = right->slotdata[i];
2587 left->weights[left->slotuse + i] = right->weights[i];
2589 left->slotuse += right->slotuse;
2590 left->weight += right->weight;
2592 left->nextleaf = right->nextleaf;
2594 left->nextleaf->prevleaf = left;
2601 return btree_fixmerge;
2604 /// Merge two inner nodes. The function moves all key/childid pairs from
2605 /// right to left and sets right's slotuse to zero. The right slot is then
2606 /// removed by the calling parent node.
2607 static result_t merge_inner(inner_node* left, inner_node* right, inner_node* parent, unsigned int parentslot)
2609 BTREE_PRINT("Merge inner nodes " << left << " and " << right << " with common parent " << parent << "." << std::endl);
2611 BTREE_ASSERT(left->level == right->level);
2612 BTREE_ASSERT(parent->level == left->level + 1);
2614 BTREE_ASSERT(parent->childid[parentslot] == left);
2616 BTREE_ASSERT(left->slotuse + right->slotuse < innerslotmax);
2620 // find the left node's slot in the parent's children
2621 unsigned int leftslot = 0;
2622 while(leftslot <= parent->slotuse && parent->childid[leftslot] != left)
2625 BTREE_ASSERT(leftslot < parent->slotuse);
2626 BTREE_ASSERT(parent->childid[leftslot] == left);
2627 BTREE_ASSERT(parent->childid[leftslot+1] == right);
2629 BTREE_ASSERT(parentslot == leftslot);
2632 // retrieve the decision key from parent
2633 left->slotkey[left->slotuse] = parent->slotkey[parentslot];
2636 // copy over keys and children from right
2637 for (unsigned int i = 0; i < right->slotuse; i++)
2639 left->slotkey[left->slotuse + i] = right->slotkey[i];
2640 left->childid[left->slotuse + i] = right->childid[i];
2642 left->slotuse += right->slotuse;
2643 left->weight += right->weight;
2645 left->childid[left->slotuse] = right->childid[right->slotuse];
2650 return btree_fixmerge;
2653 /// Balance two leaf nodes. The function moves key/data pairs from right to
2654 /// left so that both nodes are equally filled. The parent node is updated
2656 static result_t shift_left_leaf(leaf_node *left, leaf_node *right, inner_node *parent, unsigned int parentslot)
2658 BTREE_ASSERT(left->isleafnode() && right->isleafnode());
2659 BTREE_ASSERT(parent->level == 1);
2661 BTREE_ASSERT(left->nextleaf == right);
2662 BTREE_ASSERT(left == right->prevleaf);
2664 BTREE_ASSERT(left->slotuse < right->slotuse);
2665 BTREE_ASSERT(parent->childid[parentslot] == left);
2667 unsigned int shiftnum = (right->slotuse - left->slotuse) / 2;
2669 BTREE_PRINT("Shifting (leaf) " << shiftnum << " entries to left " << left << " from right " << right << " with common parent " << parent << "." << std::endl);
2671 BTREE_ASSERT(left->slotuse + shiftnum < leafslotmax);
2673 // copy the first items from the right node to the last slot in the left node.
2674 for(unsigned int i = 0; i < shiftnum; i++)
2676 left->slotkey[left->slotuse + i] = right->slotkey[i];
2677 left->slotdata[left->slotuse + i] = right->slotdata[i];
2678 left->weights[left->slotuse + i] = right->weights[i];
2679 left->weight += right->weights[i];
2680 right->weight -= right->weights[i];
2682 left->slotuse += shiftnum;
2684 // shift all slots in the right node to the left
2686 right->slotuse -= shiftnum;
2687 for(int i = 0; i < right->slotuse; i++)
2689 right->slotkey[i] = right->slotkey[i + shiftnum];
2690 right->slotdata[i] = right->slotdata[i + shiftnum];
2691 right->weights[i] = right->weights[i + shiftnum];
2695 if (parentslot < parent->slotuse) {
2696 parent->slotkey[parentslot] = left->slotkey[left->slotuse - 1];
2699 else { // the update is further up the tree
2700 return result_t(btree_update_lastkey, left->slotkey[left->slotuse - 1]);
2704 /// Balance two inner nodes. The function moves key/data pairs from right
2705 /// to left so that both nodes are equally filled. The parent node is
2706 /// updated if possible.
2707 static void shift_left_inner(inner_node *left, inner_node *right, inner_node *parent, unsigned int parentslot)
2709 BTREE_ASSERT(left->level == right->level);
2710 BTREE_ASSERT(parent->level == left->level + 1);
2712 BTREE_ASSERT(left->slotuse < right->slotuse);
2713 BTREE_ASSERT(parent->childid[parentslot] == left);
2715 unsigned int shiftnum = (right->slotuse - left->slotuse) / 2;
2717 BTREE_PRINT("Shifting (inner) " << shiftnum << " entries to left " << left << " from right " << right << " with common parent " << parent << "." << std::endl);
2719 BTREE_ASSERT(left->slotuse + shiftnum < innerslotmax);
2723 // find the left node's slot in the parent's children and compare to parentslot
2725 unsigned int leftslot = 0;
2726 while(leftslot <= parent->slotuse && parent->childid[leftslot] != left)
2729 BTREE_ASSERT(leftslot < parent->slotuse);
2730 BTREE_ASSERT(parent->childid[leftslot] == left);
2731 BTREE_ASSERT(parent->childid[leftslot+1] == right);
2733 BTREE_ASSERT(leftslot == parentslot);
2736 // copy the parent's decision slotkey and childid to the first new key on the left
2737 left->slotkey[left->slotuse] = parent->slotkey[parentslot];
2740 // copy the other items from the right node to the last slots in the left node.
2741 for(unsigned int i = 0; i < shiftnum - 1; i++)
2743 left->slotkey[left->slotuse + i] = right->slotkey[i];
2744 left->childid[left->slotuse + i] = right->childid[i];
2745 left->weight += right->childid[i]->weight;
2746 right->weight -= right->childid[i]->weight;
2748 left->slotuse += shiftnum - 1;
2751 parent->slotkey[parentslot] = right->slotkey[shiftnum - 1];
2752 // last pointer in left
2753 left->childid[left->slotuse] = right->childid[shiftnum - 1];
2754 left->weight += right->childid[shiftnum - 1]->weight;
2755 right->weight -= right->childid[shiftnum - 1]->weight;
2757 // shift all slots in the right node
2759 right->slotuse -= shiftnum;
2760 for(int i = 0; i < right->slotuse; i++)
2762 right->slotkey[i] = right->slotkey[i + shiftnum];
2763 right->childid[i] = right->childid[i + shiftnum];
2765 right->childid[right->slotuse] = right->childid[right->slotuse + shiftnum];
2768 /// Balance two leaf nodes. The function moves key/data pairs from left to
2769 /// right so that both nodes are equally filled. The parent node is updated
2771 static void shift_right_leaf(leaf_node *left, leaf_node *right, inner_node *parent, unsigned int parentslot)
2773 BTREE_ASSERT(left->isleafnode() && right->isleafnode());
2774 BTREE_ASSERT(parent->level == 1);
2776 BTREE_ASSERT(left->nextleaf == right);
2777 BTREE_ASSERT(left == right->prevleaf);
2778 BTREE_ASSERT(parent->childid[parentslot] == left);
2780 BTREE_ASSERT(left->slotuse > right->slotuse);
2782 unsigned int shiftnum = (left->slotuse - right->slotuse) / 2;
2784 BTREE_PRINT("Shifting (leaf) " << shiftnum << " entries to right " << right << " from left " << left << " with common parent " << parent << "." << std::endl);
2788 // find the left node's slot in the parent's children
2789 unsigned int leftslot = 0;
2790 while(leftslot <= parent->slotuse && parent->childid[leftslot] != left)
2793 BTREE_ASSERT(leftslot < parent->slotuse);
2794 BTREE_ASSERT(parent->childid[leftslot] == left);
2795 BTREE_ASSERT(parent->childid[leftslot+1] == right);
2797 BTREE_ASSERT(leftslot == parentslot);
2800 // shift all slots in the right node
2802 BTREE_ASSERT(right->slotuse + shiftnum < leafslotmax);
2804 for(int i = right->slotuse; i >= 0; i--)
2806 right->slotkey[i + shiftnum] = right->slotkey[i];
2807 right->slotdata[i + shiftnum] = right->slotdata[i];
2808 right->weights[i + shiftnum] = right->weights[i];
2810 right->slotuse += shiftnum;
2812 // copy the last items from the left node to the first slot in the right node.
2813 for(unsigned int i = 0; i < shiftnum; i++)
2815 right->slotkey[i] = left->slotkey[left->slotuse - shiftnum + i];
2816 right->slotdata[i] = left->slotdata[left->slotuse - shiftnum + i];
2817 right->weights[i] = left->weights[left->slotuse - shiftnum + i];
2818 right->weight += left->weights[left->slotuse - shiftnum + i];
2819 left->weight -= left->weights[left->slotuse - shiftnum + i];
2821 left->slotuse -= shiftnum;
2823 parent->slotkey[parentslot] = left->slotkey[left->slotuse-1];
2826 /// Balance two inner nodes. The function moves key/data pairs from left to
2827 /// right so that both nodes are equally filled. The parent node is updated
2829 static void shift_right_inner(inner_node *left, inner_node *right, inner_node *parent, unsigned int parentslot)
2831 BTREE_ASSERT(left->level == right->level);
2832 BTREE_ASSERT(parent->level == left->level + 1);
2834 BTREE_ASSERT(left->slotuse > right->slotuse);
2835 BTREE_ASSERT(parent->childid[parentslot] == left);
2837 unsigned int shiftnum = (left->slotuse - right->slotuse) / 2;
2839 BTREE_PRINT("Shifting (leaf) " << shiftnum << " entries to right " << right << " from left " << left << " with common parent " << parent << "." << std::endl);
2843 // find the left node's slot in the parent's children
2844 unsigned int leftslot = 0;
2845 while(leftslot <= parent->slotuse && parent->childid[leftslot] != left)
2848 BTREE_ASSERT(leftslot < parent->slotuse);
2849 BTREE_ASSERT(parent->childid[leftslot] == left);
2850 BTREE_ASSERT(parent->childid[leftslot+1] == right);
2852 BTREE_ASSERT(leftslot == parentslot);
2855 // shift all slots in the right node
2857 BTREE_ASSERT(right->slotuse + shiftnum < innerslotmax);
2859 right->childid[right->slotuse + shiftnum] = right->childid[right->slotuse];
2860 for(int i = right->slotuse-1; i >= 0; i--)
2862 right->slotkey[i + shiftnum] = right->slotkey[i];
2863 right->childid[i + shiftnum] = right->childid[i];
2866 right->slotuse += shiftnum;
2868 // copy the parent's decision slotkey and childid to the last new key on the right
2869 right->slotkey[shiftnum - 1] = parent->slotkey[parentslot];
2870 right->childid[shiftnum - 1] = left->childid[left->slotuse];
2871 right->weight += left->childid[left->slotuse]->weight;
2872 left->weight -= left->childid[left->slotuse]->weight;
2874 // copy the remaining last items from the left node to the first slot in the right node.
2875 for(unsigned int i = 0; i < shiftnum - 1; i++)
2877 right->slotkey[i] = left->slotkey[left->slotuse - shiftnum + i + 1];
2878 right->childid[i] = left->childid[left->slotuse - shiftnum + i + 1];
2879 right->weight += left->childid[left->slotuse - shiftnum + i + 1]->weight;
2880 left->weight -= left->childid[left->slotuse - shiftnum + i + 1]->weight;
2883 // copy the first to-be-removed key from the left node to the parent's decision slot
2884 parent->slotkey[parentslot] = left->slotkey[left->slotuse - shiftnum];
2886 left->slotuse -= shiftnum;
2891 // *** Debug Printing
2893 /// Print out the B+ tree structure with keys onto the given ostream. This
2894 /// function requires that the header is compiled with BTREE_DEBUG and that
2895 /// key_type is printable via std::ostream.
2896 void print(std::ostream &os) const
2899 print_node(os, root, 0, true);
2903 /// Print out only the leaves via the double linked list.
2904 void print_leaves(std::ostream &os) const
2906 os << "leaves:" << std::endl;
2908 const leaf_node *n = headleaf;
2912 os << " " << n << std::endl;
2920 /// Recursively descend down the tree and print out nodes.
2921 static void print_node(std::ostream &os, const node* node, unsigned int depth=0, bool recursive=false)
2923 for(unsigned int i = 0; i < depth; i++) os << " ";
2925 os << "node " << node << " level " << node->level << " weight " << node->weight << " slotuse " << node->slotuse << std::endl;
2927 if (node->isleafnode())
2929 const leaf_node *leafnode = static_cast<const leaf_node*>(node);
2931 for(unsigned int i = 0; i < depth; i++) os << " ";
2932 os << " leaf prev " << leafnode->prevleaf << " next " << leafnode->nextleaf << std::endl;
2934 for(unsigned int i = 0; i < depth; i++) os << " ";
2936 for (unsigned int slot = 0; slot < leafnode->slotuse; ++slot)
2938 os << leafnode->slotkey[slot] << " "; // << "(data: " << leafnode->slotdata[slot] << ") ";
2944 const inner_node *innernode = static_cast<const inner_node*>(node);
2946 for(unsigned int i = 0; i < depth; i++) os << " ";
2948 for (unsigned short slot = 0; slot < innernode->slotuse; ++slot)
2950 os << "(" << innernode->childid[slot] << ") " << innernode->slotkey[slot] << " ";
2952 os << "(" << innernode->childid[innernode->slotuse] << ")" << std::endl;
2956 for (unsigned short slot = 0; slot < innernode->slotuse + 1; ++slot)
2958 print_node(os, innernode->childid[slot], depth + 1, recursive);
2966 // *** Verification of B+ Tree Invariants
2968 /// Run a thorough verification of all B+ tree invariants. The program
2969 /// aborts via assert() if something is wrong.
2972 key_type minkey, maxkey;
2977 verify_node(root, &minkey, &maxkey, vstats);
2979 assert( vstats.itemcount == stats.itemcount );
2980 assert( vstats.leaves == stats.leaves );
2981 assert( vstats.innernodes == stats.innernodes );
2989 /// Recursively descend down the tree and verify each node
2990 void verify_node(const node* n, key_type* minkey, key_type* maxkey, tree_stats &vstats) const
2992 BTREE_PRINT("verifynode " << n << std::endl);
2994 if (n->isleafnode())
2996 const leaf_node *leaf = static_cast<const leaf_node*>(n);
2998 assert(leaf == root || !leaf->isunderflow());
3000 for(unsigned short slot = 0; slot < leaf->slotuse - 1; ++slot)
3002 assert(key_lessequal(leaf->slotkey[slot], leaf->slotkey[slot + 1]));
3005 *minkey = leaf->slotkey[0];
3006 *maxkey = leaf->slotkey[leaf->slotuse - 1];
3009 vstats.itemcount += leaf->slotuse;
3011 else // !n->isleafnode()
3013 const inner_node *inner = static_cast<const inner_node*>(n);
3014 vstats.innernodes++;
3016 assert(inner == root || !inner->isunderflow());
3018 for(unsigned short slot = 0; slot < inner->slotuse - 1; ++slot)
3020 assert(key_lessequal(inner->slotkey[slot], inner->slotkey[slot + 1]));
3023 for(unsigned short slot = 0; slot <= inner->slotuse; ++slot)
3025 const node *subnode = inner->childid[slot];
3026 key_type subminkey = key_type();
3027 key_type submaxkey = key_type();
3029 assert(subnode->level + 1 == inner->level);
3030 verify_node(subnode, &subminkey, &submaxkey, vstats);
3032 BTREE_PRINT("verify subnode " << subnode << ": " << subminkey << " - " << submaxkey << std::endl);
3035 *minkey = subminkey;
3037 assert(key_greaterequal(subminkey, inner->slotkey[slot-1]));
3039 if (slot == inner->slotuse)
3040 *maxkey = submaxkey;
3042 assert(key_equal(inner->slotkey[slot], submaxkey));
3044 if (inner->level == 1 && slot < inner->slotuse)
3046 // children are leaves and must be linked together in the
3048 const leaf_node *leafa = static_cast<const leaf_node*>(inner->childid[slot]);
3049 const leaf_node *leafb = static_cast<const leaf_node*>(inner->childid[slot + 1]);
3051 assert(leafa->nextleaf == leafb);
3052 assert(leafa == leafb->prevleaf);
3053 (void)leafa; (void)leafb;
3055 if (inner->level == 2 && slot < inner->slotuse)
3057 // verify leaf links between the adjacent inner nodes
3058 const inner_node *parenta = static_cast<const inner_node*>(inner->childid[slot]);
3059 const inner_node *parentb = static_cast<const inner_node*>(inner->childid[slot+1]);
3061 const leaf_node *leafa = static_cast<const leaf_node*>(parenta->childid[parenta->slotuse]);
3062 const leaf_node *leafb = static_cast<const leaf_node*>(parentb->childid[0]);
3064 assert(leafa->nextleaf == leafb);
3065 assert(leafa == leafb->prevleaf);
3066 (void)leafa; (void)leafb;
3072 /// Verify the double linked list of leaves.
3073 void verify_leaflinks() const
3075 const leaf_node *n = headleaf;
3077 assert(n->level == 0);
3078 assert(!n || n->prevleaf == NULL);
3080 unsigned int testcount = 0;
3084 assert(n->level == 0);
3086 for(unsigned short slot = 0; slot < n->slotuse - 1; ++slot)
3088 assert(key_lessequal(n->slotkey[slot], n->slotkey[slot + 1]));
3091 testcount += n->slotuse;
3095 assert(key_lessequal(n->slotkey[n->slotuse-1], n->nextleaf->slotkey[0]));
3097 assert(n == n->nextleaf->prevleaf);
3101 assert(tailleaf == n);
3107 assert(testcount == size());
3111 // *** Dump and Restore of B+ Trees
3113 /// \internal A header for the binary image containing the base properties
3114 /// of the B+ tree. These properties have to match the current template
3118 /// "stx-btree", just to stop the restore() function from loading garbage
3122 unsigned short version;
3124 /// sizeof(key_type)
3125 unsigned short key_type_size;
3127 /// sizeof(data_type)
3128 unsigned short data_type_size;
3130 /// Number of slots in the leaves
3131 unsigned short leafslots;
3133 /// Number of slots in the inner nodes
3134 unsigned short innerslots;
3136 /// Allow duplicates
3137 bool allow_duplicates;
3139 /// The item count of the tree
3140 size_type itemcount;
3142 /// Fill the struct with the current B+ tree's properties, itemcount is
3146 // don't want to include string.h just for this signature
3147 *reinterpret_cast<unsigned int*>(signature+0) = 0x2d787473;
3148 *reinterpret_cast<unsigned int*>(signature+4) = 0x65727462;
3149 *reinterpret_cast<unsigned int*>(signature+8) = 0x00000065;
3152 key_type_size = sizeof(typename btree_self::key_type);
3153 data_type_size = sizeof(typename btree_self::data_type);
3154 leafslots = btree_self::leafslotmax;
3155 innerslots = btree_self::innerslotmax;
3156 allow_duplicates = btree_self::allow_duplicates;
3159 /// Returns true if the headers have the same vital properties
3160 inline bool same(const struct dump_header &o) const
3162 return (*reinterpret_cast<const unsigned int*>(signature+0) ==
3163 *reinterpret_cast<const unsigned int*>(o.signature+0))
3164 && (*reinterpret_cast<const unsigned int*>(signature+4) ==
3165 *reinterpret_cast<const unsigned int*>(o.signature+4))
3166 && (*reinterpret_cast<const unsigned int*>(signature+8) ==
3167 *reinterpret_cast<const unsigned int*>(o.signature+8))
3169 && (version == o.version)
3170 && (key_type_size == o.key_type_size)
3171 && (data_type_size == o.data_type_size)
3172 && (leafslots == o.leafslots)
3173 && (innerslots == o.innerslots)
3174 && (allow_duplicates == o.allow_duplicates);
3180 /// Dump the contents of the B+ tree out onto an ostream as a binary
3181 /// image. The image contains memory pointers which will be fixed when the
3182 /// image is restored. For this to work your key_type and data_type must be
3183 /// integral types and contain no pointers or references.
3184 void dump(std::ostream &os) const
3186 struct dump_header header;
3188 header.itemcount = size();
3190 os.write(reinterpret_cast<char*>(&header), sizeof(header));
3193 dump_node(os, root);
3196 /// Restore a binary image of a dumped B+ tree from an istream. The B+ tree
3197 /// pointers are fixed using the dump order. For dump and restore to work
3198 /// your key_type and data_type must be integral types and contain no
3199 /// pointers or references. Returns true if the restore was successful.
3200 bool restore(std::istream &is)
3202 struct dump_header fileheader;
3203 is.read(reinterpret_cast<char*>(&fileheader), sizeof(fileheader));
3204 if (!is.good()) return false;
3206 struct dump_header myheader;
3208 myheader.itemcount = fileheader.itemcount;
3210 if (!myheader.same(fileheader))
3212 BTREE_PRINT("btree::restore: file header does not match instantiation signature." << std::endl);
3218 if (fileheader.itemcount > 0)
3220 root = restore_node(is);
3221 if (root == NULL) return false;
3223 stats.itemcount = fileheader.itemcount;
3227 if (debug) print(std::cout);
3229 if (selfverify) verify();
3236 /// Recursively descend down the tree and dump each node in a precise order
3237 void dump_node(std::ostream &os, const node* n) const
3239 BTREE_PRINT("dump_node " << n << std::endl);
3241 if (n->isleafnode())
3243 const leaf_node *leaf = static_cast<const leaf_node*>(n);
3245 os.write(reinterpret_cast<const char*>(leaf), sizeof(*leaf));
3247 else // !n->isleafnode()
3249 const inner_node *inner = static_cast<const inner_node*>(n);
3251 os.write(reinterpret_cast<const char*>(inner), sizeof(*inner));
3253 for(unsigned short slot = 0; slot <= inner->slotuse; ++slot)
3255 const node *subnode = inner->childid[slot];
3257 dump_node(os, subnode);
3262 /// Read the dump image and construct a tree from the node order in the
3264 node* restore_node(std::istream &is)
3272 // first read only the top of the node
3273 is.read(reinterpret_cast<char*>(&nu.top), sizeof(nu.top));
3274 if (!is.good()) return NULL;
3276 if (nu.top.isleafnode())
3278 // read remaining data of leaf node
3279 is.read(reinterpret_cast<char*>(&nu.leaf) + sizeof(nu.top), sizeof(nu.leaf) - sizeof(nu.top));
3280 if (!is.good()) return NULL;
3282 leaf_node *newleaf = allocate_leaf();
3284 // copy over all data, the leaf nodes contain only their double linked list pointers
3287 // reconstruct the linked list from the order in the file
3288 if (headleaf == NULL) {
3289 BTREE_ASSERT(newleaf->prevleaf == NULL);
3290 headleaf = tailleaf = newleaf;
3293 newleaf->prevleaf = tailleaf;
3294 tailleaf->nextleaf = newleaf;
3302 // read remaining data of inner node
3303 is.read(reinterpret_cast<char*>(&nu.inner) + sizeof(nu.top), sizeof(nu.inner) - sizeof(nu.top));
3304 if (!is.good()) return NULL;
3306 inner_node *newinner = allocate_inner(0);
3308 // copy over all data, the inner nodes contain only pointers to their children
3309 *newinner = nu.inner;
3311 for(unsigned short slot = 0; slot <= newinner->slotuse; ++slot)
3313 newinner->childid[slot] = restore_node(is);
3323 #endif // _STX_RA_BTREE_H_