1 // $Id: btree.h 72 2008-01-25 12:47:26Z tb $
3 * Contains the main B+ tree implementation template class btree.
7 * STX B+ Tree Template Classes v0.8.1
8 * Copyright (C) 2008 Timo Bingmann
9 * 2008 Stefan Schimanski (weighted variant)
11 * This library is free software; you can redistribute it and/or modify it
12 * under the terms of the GNU Lesser General Public License as published by the
13 * Free Software Foundation; either version 2.1 of the License, or (at your
14 * option) any later version.
16 * This library is distributed in the hope that it will be useful, but WITHOUT
17 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
18 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
21 * You should have received a copy of the GNU Lesser General Public License
22 * along with this library; if not, write to the Free Software Foundation,
23 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 #ifndef _STX_RA_BTREE_H_
27 #define _STX_RA_BTREE_H_
29 // *** Required Headers from the STL
38 // *** Debugging Macros
44 /// Print out debug information to std::cout if BTREE_DEBUG is defined.
45 #define BTREE_PRINT(x) do { if (debug) (std::cout << x); } while(0)
47 /// Assertion only if BTREE_DEBUG is defined. This is not used in verify().
48 #define BTREE_ASSERT(x) do { assert(x); } while(0)
52 /// Print out debug information to std::cout if BTREE_DEBUG is defined.
53 #define BTREE_PRINT(x) do { } while(0)
55 /// Assertion only if BTREE_DEBUG is defined. This is not used in verify().
56 #define BTREE_ASSERT(x) do { } while(0)
60 /// The maximum of a and b. Used in some compile-time formulas.
61 #define BTREE_MAX(a,b) ((a) < (b) ? (b) : (a))
64 /// The macro BTREE_FRIENDS can be used by outside class to access the B+
65 /// tree internals. This was added for wxBTreeDemo to be able to draw the
67 #define BTREE_FRIENDS friend class btree_friend;
70 /// STX - Some Template Extensions namespace
73 /** Generates default traits for a B+ tree used as a set. It estimates leaf and
74 * inner node sizes by assuming a cache line size of 256 bytes. */
75 template <typename _Key>
76 struct btree_default_set_traits
78 /// If true, the tree will self verify it's invariants after each insert()
79 /// or erase(). The header must have been compiled with BTREE_DEBUG defined.
80 static const bool selfverify = false;
82 /// If true, the tree will print out debug information and a tree dump
83 /// during insert() or erase() operation. The header must have been
84 /// compiled with BTREE_DEBUG defined and key_type must be std::ostream
86 static const bool debug = false;
88 /// Number of slots in each leaf of the tree. Estimated so that each node
89 /// has a size of about 256 bytes.
90 static const int leafslots = BTREE_MAX( 8, 256 / (sizeof(_Key)) );
92 /// Number of slots in each inner node of the tree. Estimated so that each node
93 /// has a size of about 256 bytes.
94 static const int innerslots = BTREE_MAX( 8, 256 / (sizeof(_Key) + sizeof(void*)) );
97 /** Generates default traits for a B+ tree used as a map. It estimates leaf and
98 * inner node sizes by assuming a cache line size of 256 bytes. */
99 template <typename _Key, typename _Data>
100 struct btree_default_map_traits
102 /// If true, the tree will self verify it's invariants after each insert()
103 /// or erase(). The header must have been compiled with BTREE_DEBUG defined.
104 static const bool selfverify = false;
106 /// If true, the tree will print out debug information and a tree dump
107 /// during insert() or erase() operation. The header must have been
108 /// compiled with BTREE_DEBUG defined and key_type must be std::ostream
110 static const bool debug = false;
112 /// Number of slots in each leaf of the tree. Estimated so that each node
113 /// has a size of about 256 bytes.
114 static const int leafslots = BTREE_MAX( 8, 256 / (sizeof(_Key) + sizeof(_Data)) );
116 /// Number of slots in each inner node of the tree. Estimated so that each node
117 /// has a size of about 256 bytes.
118 static const int innerslots = BTREE_MAX( 8, 256 / (sizeof(_Key) + sizeof(void*)) );
123 /** @brief Basic class implementing a base B+ tree data structure in memory.
125 * The base implementation of a memory B+ tree. It is based on the
126 * implementation in Cormen's Introduction into Algorithms, Jan Jannink's paper
127 * and other algorithm resources. Almost all STL-required function calls are
128 * implemented. The asymptotic time requirements of the STL are not always
129 * fulfilled in theory, however in practice this B+ tree performs better than a
130 * red-black tree by using more memory. The insertion function splits the nodes
131 * on the recursion unroll. Erase is largely based on Jannink's ideas.
133 * This class is specialized into btree_set, btree_multiset, btree_map and
134 * btree_multimap using default template parameters and facade functions.
136 template <typename _Key,
137 typename _Weight = size_t,
138 typename _Data = Void,
139 typename _Value = std::pair<_Key, _Data>,
140 typename _Compare = std::less<_Key>,
141 typename _Traits = btree_default_map_traits<_Key, _Data>,
142 bool _Duplicates = false>
146 // *** Template Parameter Types
148 /// First template parameter: The key type of the B+ tree. This is stored
149 /// in inner nodes and leaves
150 typedef _Key key_type;
153 typedef _Weight weight_type;
155 /// Second template parameter: The data type associated with each
156 /// key. Stored in the B+ tree's leaves
157 typedef _Data data_type;
159 /// Third template parameter: Composition pair of key and data types, this
160 /// is required by the STL standard. The B+ tree does not store key and
161 /// data together. If value_type == key_type then the B+ tree implements a
163 typedef _Value value_type;
165 /// Fourth template parameter: Key comparison function object
166 typedef _Compare key_compare;
168 /// Fifth template parameter: Traits object used to define more parameters
170 typedef _Traits traits;
172 /// Sixth template parameter: Allow duplicate keys in the B+ tree. Used to
173 /// implement multiset and multimap.
174 static const bool allow_duplicates = _Duplicates;
176 // The macro BTREE_FRIENDS can be used by outside class to access the B+
177 // tree internals. This was added for wxBTreeDemo to be able to draw the
182 // *** Constructed Types
184 /// Typedef of our own type
185 typedef weighted_btree<key_type, weight_type, data_type, value_type,
186 key_compare, traits, allow_duplicates> btree_self;
188 /// Size type used to count keys
189 typedef size_t size_type;
191 /// The pair of key_type and data_type, this may be different from value_type.
192 typedef std::pair<key_type, data_type> pair_type;
195 // *** Static Constant Options and Values of the B+ Tree
197 /// Base B+ tree parameter: The number of key/data slots in each leaf
198 static const unsigned short leafslotmax = traits::leafslots;
200 /// Base B+ tree parameter: The number of key slots in each inner node,
201 /// this can differ from slots in each leaf.
202 static const unsigned short innerslotmax = traits::innerslots;
204 /// Computed B+ tree parameter: The minimum number of key/data slots used
205 /// in a leaf. If fewer slots are used, the leaf will be merged or slots
206 /// shifted from it's siblings.
207 static const unsigned short minleafslots = (leafslotmax / 2);
209 /// Computed B+ tree parameter: The minimum number of key slots used
210 /// in an inner node. If fewer slots are used, the inner node will be
211 /// merged or slots shifted from it's siblings.
212 static const unsigned short mininnerslots = (innerslotmax / 2);
214 /// Debug parameter: Enables expensive and thorough checking of the B+ tree
215 /// invariants after each insert/erase operation.
216 static const bool selfverify = traits::selfverify;
218 /// Debug parameter: Prints out lots of debug information about how the
219 /// algorithms change the tree. Requires the header file to be compiled
220 /// with BTREE_DEBUG and the key type must be std::ostream printable.
221 static const bool debug = traits::debug;
224 // *** Node Classes for In-Memory Nodes
226 /// The header structure of each node in-memory. This structure is extended
227 /// by inner_node or leaf_node.
230 /// Level in the b-tree, if level == 0 -> leaf node
231 unsigned short level;
233 /// Number of key slotuse use, so number of valid children or data
235 unsigned short slotuse;
240 /// Delayed initialisation of constructed node
241 inline void initialize(const unsigned short l)
248 /// True if this is a leaf node
249 inline bool isleafnode() const
255 /// Extended structure of a inner node in-memory. Contains only keys and no
257 struct inner_node : public node
259 /// Keys of children or data pointers
260 key_type slotkey[innerslotmax];
262 /// Pointers to children
263 node* childid[innerslotmax+1];
265 /// Set variables to initial values
266 inline void initialize(const unsigned short l)
271 /// True if the node's slots are full
272 inline bool isfull() const
274 return (node::slotuse == innerslotmax);
277 /// True if few used entries, less than half full
278 inline bool isfew() const
280 return (node::slotuse <= mininnerslots);
283 /// True if node has too few entries
284 inline bool isunderflow() const
286 return (node::slotuse < mininnerslots);
290 /// Extended structure of a leaf node in memory. Contains pairs of keys and
291 /// data items. Key and data slots are kept in separate arrays, because the
292 /// key array is traversed very often compared to accessing the data items.
293 struct leaf_node : public node
295 /// Double linked list pointers to traverse the leaves
298 /// Double linked list pointers to traverse the leaves
301 /// Keys of children or data pointers
302 key_type slotkey[leafslotmax];
305 data_type slotdata[leafslotmax];
308 weight_type weights[leafslotmax];
310 /// Set variables to initial values
311 inline void initialize()
314 prevleaf = nextleaf = NULL;
317 /// True if the node's slots are full
318 inline bool isfull() const
320 return (node::slotuse == leafslotmax);
323 /// True if few used entries, less than half full
324 inline bool isfew() const
326 return (node::slotuse <= minleafslots);
329 /// True if node has too few entries
330 inline bool isunderflow() const
332 return (node::slotuse < minleafslots);
337 // *** Template Magic to Convert a pair or key/data types to a value_type
339 /// \internal For sets the second pair_type is an empty struct, so the
340 /// value_type should only be the first.
341 template <typename value_type, typename pair_type>
342 struct btree_pair_to_value
344 /// Convert a fake pair type to just the first component
345 inline value_type operator()(pair_type& p) const {
348 /// Convert a fake pair type to just the first component
349 inline value_type operator()(const pair_type& p) const {
354 /// \internal For maps value_type is the same as the pair_type
355 template <typename value_type>
356 struct btree_pair_to_value<value_type, value_type>
358 /// Identity "convert" a real pair type to just the first component
359 inline value_type operator()(pair_type& p) const {
362 /// Identity "convert" a real pair type to just the first component
363 inline value_type operator()(const pair_type& p) const {
368 /// Using template specialization select the correct converter used by the
370 typedef btree_pair_to_value<value_type, pair_type> pair_to_value_type;
373 // *** Iterators and Reverse Iterators
376 class const_iterator;
378 /// STL-like iterator object for B+ tree items. The iterator points to a
379 /// specific slot number in a leaf.
385 /// The key type of the btree. Returned by key().
386 typedef typename weighted_btree::key_type key_type;
389 typedef typename weighted_btree::weight_type weight_type;
391 /// The data type of the btree. Returned by data().
392 typedef typename weighted_btree::data_type data_type;
394 /// The value type of the btree. Returned by operator*().
395 typedef typename weighted_btree::value_type value_type;
397 /// The pair type of the btree.
398 typedef typename weighted_btree::pair_type pair_type;
400 /// Reference to the value_type. Required by the reverse_iterator.
401 typedef value_type& reference;
403 /// Pointer to the value_type. Required by the reverse_iterator.
404 typedef value_type* pointer;
406 /// STL-magic iterator category
407 typedef std::bidirectional_iterator_tag iterator_category;
410 typedef ptrdiff_t difference_type;
413 typedef iterator self;
416 friend class weighted_btree;
420 /// The currently referenced leaf node of the tree
421 typename weighted_btree::leaf_node* currnode;
423 /// Current key/data slot referenced
424 unsigned short currslot;
426 /// Friendly to the const_iterator, so it may access the two data items directly
427 friend class weighted_btree<key_type, weight_type, data_type, value_type, key_compare, traits, allow_duplicates>::const_iterator;
429 /// Evil! A temporary value_type to STL-correctly deliver operator* and
431 mutable value_type temp_value;
433 // The macro BTREE_FRIENDS can be used by outside class to access the B+
434 // tree internals. This was added for wxBTreeDemo to be able to draw the
441 /// Constructor of a mutable iterator
442 inline iterator(typename weighted_btree::leaf_node *l, unsigned short s)
443 : currnode(l), currslot(s)
446 /// Dereference the iterator, this is not a value_type& because key and
447 /// value are not stored together
448 inline reference operator*() const
450 temp_value = pair_to_value_type()( pair_type(currnode->slotkey[currslot],
451 currnode->slotdata[currslot]) );
455 /// Dereference the iterator. Do not use this if possible, use key()
456 /// and data() instead. The B+ tree does not stored key and data
458 inline pointer operator->() const
460 temp_value = pair_to_value_type()( pair_type(currnode->slotkey[currslot],
461 currnode->slotdata[currslot]) );
465 /// Key of the current slot
466 inline const key_type& key() const
468 return currnode->slotkey[currslot];
471 /// Weight of the current slot
472 inline weight_type weight() const
474 return currnode->weights[currslot];
477 /// Writable reference to the current data object
478 inline data_type& data() const
480 return currnode->slotdata[currslot];
483 /// Prefix++ advance the iterator to the next slot
484 inline self& operator++()
486 if (currslot + 1 < currnode->slotuse) {
489 else if (currnode->nextleaf != NULL) {
490 currnode = currnode->nextleaf;
495 currslot = currnode->slotuse;
501 /// Postfix++ advance the iterator to the next slot
502 inline self operator++(int)
504 self tmp = *this; // copy ourselves
506 if (currslot + 1 < currnode->slotuse) {
509 else if (currnode->nextleaf != NULL) {
510 currnode = currnode->nextleaf;
515 currslot = currnode->slotuse;
521 /// Prefix-- backstep the iterator to the last slot
522 inline self& operator--()
527 else if (currnode->prevleaf != NULL) {
528 currnode = currnode->prevleaf;
529 currslot = currnode->slotuse - 1;
539 /// Postfix-- backstep the iterator to the last slot
540 inline self operator--(int)
542 self tmp = *this; // copy ourselves
547 else if (currnode->prevleaf != NULL) {
548 currnode = currnode->prevleaf;
549 currslot = currnode->slotuse - 1;
559 /// Equality of iterators
560 inline bool operator==(const self& x) const
562 return (x.currnode == currnode) && (x.currslot == currslot);
565 /// Inequality of iterators
566 inline bool operator!=(const self& x) const
568 return (x.currnode != currnode) || (x.currslot != currslot);
572 /// STL-like read-only iterator object for B+ tree items. The iterator
573 /// points to a specific slot number in a leaf.
579 /// The key type of the btree. Returned by key().
580 typedef typename weighted_btree::key_type key_type;
582 /// The data type of the btree. Returned by data().
583 typedef typename weighted_btree::data_type data_type;
585 /// The value type of the btree. Returned by operator*().
586 typedef typename weighted_btree::value_type value_type;
588 /// The pair type of the btree.
589 typedef typename weighted_btree::pair_type pair_type;
591 /// Reference to the value_type. Required by the reverse_iterator.
592 typedef const value_type& reference;
594 /// Pointer to the value_type. Required by the reverse_iterator.
595 typedef const value_type* pointer;
597 /// STL-magic iterator category
598 typedef std::bidirectional_iterator_tag iterator_category;
601 typedef ptrdiff_t difference_type;
604 typedef const_iterator self;
607 friend class weighted_btree;
611 /// The currently referenced leaf node of the tree
612 const typename weighted_btree::leaf_node* currnode;
614 /// Current key/data slot referenced
615 unsigned short currslot;
617 /// Evil! A temporary value_type to STL-correctly deliver operator* and
619 mutable value_type temp_value;
621 // The macro BTREE_FRIENDS can be used by outside class to access the B+
622 // tree internals. This was added for wxBTreeDemo to be able to draw the
629 /// Constructor of a const iterator
630 inline const_iterator(const typename weighted_btree::leaf_node *l, unsigned short s)
631 : currnode(l), currslot(s)
634 /// Copy-constructor from a mutable const iterator
635 inline const_iterator(const iterator &it)
636 : currnode(it.currnode), currslot(it.currslot)
639 /// Dereference the iterator. Do not use this if possible, use key()
640 /// and data() instead. The B+ tree does not stored key and data
642 inline reference operator*() const
644 temp_value = pair_to_value_type()( pair_type(currnode->slotkey[currslot],
645 currnode->slotdata[currslot]) );
649 /// Dereference the iterator. Do not use this if possible, use key()
650 /// and data() instead. The B+ tree does not stored key and data
652 inline pointer operator->() const
654 temp_value = pair_to_value_type()( pair_type(currnode->slotkey[currslot],
655 currnode->slotdata[currslot]) );
659 /// Key of the current slot
660 inline const key_type& key() const
662 return currnode->slotkey[currslot];
665 /// Weight of the current slot
666 inline weight_type weight() const
668 return currnode->weights[currslot];
671 /// Read-only reference to the current data object
672 inline const data_type& data() const
674 return currnode->slotdata[currslot];
677 /// Prefix++ advance the iterator to the next slot
678 inline self& operator++()
680 if (currslot + 1 < currnode->slotuse) {
683 else if (currnode->nextleaf != NULL) {
684 currnode = currnode->nextleaf;
689 currslot = currnode->slotuse;
695 /// Postfix++ advance the iterator to the next slot
696 inline self operator++(int)
698 self tmp = *this; // copy ourselves
700 if (currslot + 1 < currnode->slotuse) {
703 else if (currnode->nextleaf != NULL) {
704 currnode = currnode->nextleaf;
709 currslot = currnode->slotuse;
715 /// Prefix-- backstep the iterator to the last slot
716 inline self& operator--()
721 else if (currnode->prevleaf != NULL) {
722 currnode = currnode->prevleaf;
723 currslot = currnode->slotuse - 1;
733 /// Postfix-- backstep the iterator to the last slot
734 inline self operator--(int)
736 self tmp = *this; // copy ourselves
741 else if (currnode->prevleaf != NULL) {
742 currnode = currnode->prevleaf;
743 currslot = currnode->slotuse - 1;
753 /// Equality of iterators
754 inline bool operator==(const self& x) const
756 return (x.currnode == currnode) && (x.currslot == currslot);
759 /// Inequality of iterators
760 inline bool operator!=(const self& x) const
762 return (x.currnode != currnode) || (x.currslot != currslot);
766 /// create mutable reverse iterator by using STL magic
767 typedef std::reverse_iterator<iterator> reverse_iterator;
769 /// create constant reverse iterator by using STL magic
770 typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
773 // *** Small Statistics Structure
775 /** A small struct containing basic statistics about the B+ tree. It can be
776 * fetched using get_stats(). */
779 /// Number of items in the B+ tree
782 /// Number of leaves in the B+ tree
785 /// Number of inner nodes in the B+ tree
786 size_type innernodes;
788 /// Base B+ tree parameter: The number of key/data slots in each leaf
789 static const unsigned short leafslots = btree_self::leafslotmax;
791 /// Base B+ tree parameter: The number of key slots in each inner node.
792 static const unsigned short innerslots = btree_self::innerslotmax;
797 leaves(0), innernodes(0)
801 /// Return the total number of nodes
802 inline size_type nodes() const
804 return innernodes + leaves;
807 /// Return the average fill of leaves
808 inline double avgfill_leaves() const
810 return static_cast<double>(itemcount) / (leaves * leafslots);
815 // *** Tree Object Data Members
817 /// Pointer to the B+ tree's root node, either leaf or inner node
820 /// Pointer to first leaf in the double linked leaf chain
823 /// Pointer to last leaf in the double linked leaf chain
826 /// Other small statistics about the B+ tree
829 /// Key comparison object. More comparison functions are generated from
831 key_compare key_less;
834 // *** Constructors and Destructor
836 /// Default constructor initializing an empty B+ tree with the standard key
837 /// comparison function
838 inline weighted_btree()
839 : root(NULL), headleaf(NULL), tailleaf(NULL)
843 /// Constructor initializing an empty B+ tree with a special key
844 /// comparison object
845 inline weighted_btree(const key_compare &kcf)
846 : root(NULL), headleaf(NULL), tailleaf(NULL),
851 /// Constructor initializing a B+ tree with the range [first,last)
852 template <class InputIterator>
853 inline weighted_btree(InputIterator first, InputIterator last)
854 : root(NULL), headleaf(NULL), tailleaf(NULL)
859 /// Constructor initializing a B+ tree with the range [first,last) and a
860 /// special key comparison object
861 template <class InputIterator>
862 inline weighted_btree(InputIterator first, InputIterator last, const key_compare &kcf)
863 : root(NULL), headleaf(NULL), tailleaf(NULL),
869 /// Frees up all used B+ tree memory pages
870 inline ~weighted_btree()
875 /// Fast swapping of two identical B+ tree objects.
876 void swap(btree_self& from)
878 std::swap(root, from.root);
879 std::swap(headleaf, from.headleaf);
880 std::swap(tailleaf, from.tailleaf);
881 std::swap(stats, from.stats);
882 std::swap(key_less, from.key_less);
886 // *** Key and Value Comparison Function Objects
888 /// Function class to compare value_type objects. Required by the STL
892 /// Key comparison function from the template parameter
893 key_compare key_comp;
895 /// Constructor called from btree::value_comp()
896 inline value_compare(key_compare kc)
900 /// Friendly to the btree class so it may call the constructor
901 friend class weighted_btree<key_type, weight_type, data_type, value_type, key_compare, traits, allow_duplicates>;
904 /// Function call "less"-operator resulting in true if x < y.
905 inline bool operator()(const value_type& x, const value_type& y) const
907 return key_comp(x.first, y.first);
911 /// Constant access to the key comparison object sorting the B+ tree
912 inline key_compare key_comp() const
917 /// Constant access to a constructed value_type comparison object. Required
919 inline value_compare value_comp() const
921 return value_compare(key_less);
925 // *** Convenient Key Comparison Functions Generated From key_less
927 /// True if a <= b ? constructed from key_less()
928 inline bool key_lessequal(const key_type &a, const key_type b) const
930 return !key_less(b, a);
933 /// True if a > b ? constructed from key_less()
934 inline bool key_greater(const key_type &a, const key_type &b) const
936 return key_less(b, a);
939 /// True if a >= b ? constructed from key_less()
940 inline bool key_greaterequal(const key_type &a, const key_type b) const
942 return !key_less(a, b);
945 /// True if a == b ? constructed from key_less(). This requires the <
946 /// relation to be a total order, otherwise the B+ tree cannot be sorted.
947 inline bool key_equal(const key_type &a, const key_type &b) const
949 return !key_less(a, b) && !key_less(b, a);
953 // *** Node Object Allocation and Deallocation Functions
955 /// Allocate and initialize a leaf node
956 inline leaf_node* allocate_leaf()
958 leaf_node* n = new leaf_node;
964 /// Allocate and initialize an inner node
965 inline inner_node* allocate_inner(unsigned short l)
967 inner_node* n = new inner_node;
973 /// Correctly free either inner or leaf node, destructs all contained key
974 /// and value objects
975 inline void free_node(node *n)
977 if (n->isleafnode()) {
978 delete static_cast<leaf_node*>(n);
982 delete static_cast<inner_node*>(n);
988 // *** Fast Destruction of the B+ Tree
990 /// Frees all key/data pairs and all nodes of the tree
995 clear_recursive(root);
999 headleaf = tailleaf = NULL;
1001 stats = tree_stats();
1004 BTREE_ASSERT(stats.itemcount == 0);
1008 /// Recursively free up nodes
1009 void clear_recursive(node *n)
1011 if (n->isleafnode())
1013 leaf_node *leafnode = static_cast<leaf_node*>(n);
1015 for (unsigned int slot = 0; slot < leafnode->slotuse; ++slot)
1017 // data objects are deleted by leaf_node's destructor
1022 inner_node *innernode = static_cast<inner_node*>(n);
1024 for (unsigned short slot = 0; slot < innernode->slotuse + 1; ++slot)
1026 clear_recursive(innernode->childid[slot]);
1027 free_node(innernode->childid[slot]);
1033 // *** STL Iterator Construction Functions
1035 /// Constructs a read/data-write iterator that points to the first slot in
1036 /// the first leaf of the B+ tree.
1037 inline iterator begin()
1039 return iterator(headleaf, 0);
1042 /// Constructs a read/data-write iterator that points to the first invalid
1043 /// slot in the last leaf of the B+ tree.
1044 inline iterator end()
1046 return iterator(tailleaf, tailleaf ? tailleaf->slotuse : 0);
1049 /// Constructs a read-only constant iterator that points to the first slot
1050 /// in the first leaf of the B+ tree.
1051 inline const_iterator begin() const
1053 return const_iterator(headleaf, 0);
1056 /// Constructs a read-only constant iterator that points to the first
1057 /// invalid slot in the last leaf of the B+ tree.
1058 inline const_iterator end() const
1060 return const_iterator(tailleaf, tailleaf ? tailleaf->slotuse : 0);
1063 /// Constructs a read/data-write reverse iterator that points to the first
1064 /// invalid slot in the last leaf of the B+ tree. Uses STL magic.
1065 inline reverse_iterator rbegin()
1067 return reverse_iterator(end());
1070 /// Constructs a read/data-write reverse iterator that points to the first
1071 /// slot in the first leaf of the B+ tree. Uses STL magic.
1072 inline reverse_iterator rend()
1074 return reverse_iterator(begin());
1077 /// Constructs a read-only reverse iterator that points to the first
1078 /// invalid slot in the last leaf of the B+ tree. Uses STL magic.
1079 inline const_reverse_iterator rbegin() const
1081 return const_reverse_iterator(end());
1084 /// Constructs a read-only reverse iterator that points to the first slot
1085 /// in the first leaf of the B+ tree. Uses STL magic.
1086 inline const_reverse_iterator rend() const
1088 return const_reverse_iterator(begin());
1092 // *** B+ Tree Node Binary Search Functions
1094 /// Searches for the first key in the node n less or equal to key. Uses
1095 /// binary search with an optional linear self-verification. This is a
1096 /// template function, because the slotkey array is located at different
1097 /// places in leaf_node and inner_node.
1098 template <typename node_type>
1099 inline int find_lower(const node_type *n, const key_type& key) const
1101 if (n->slotuse == 0) return 0;
1104 hi = n->slotuse - 1;
1108 int mid = (lo + hi) / 2;
1110 if (key_lessequal(key, n->slotkey[mid])) {
1118 if (hi < 0 || key_less(n->slotkey[hi], key))
1121 BTREE_PRINT("btree::find_lower: on " << n << " key " << key << " -> (" << lo << ") " << hi << ", ");
1123 // verify result using simple linear search
1126 int i = n->slotuse - 1;
1127 while(i >= 0 && key_lessequal(key, n->slotkey[i]))
1131 BTREE_PRINT("testfind: " << i << std::endl);
1132 BTREE_ASSERT(i == hi);
1135 BTREE_PRINT(std::endl);
1141 /// Searches for the first summed weight in the node n less or equal to weight.
1142 inline int find_summed_weight_lower(const inner_node *n, weight_type weight) const
1144 if (n->slotuse == 0) return 0;
1147 weight_type w = n->childid[0]->weight;
1148 while (i < n->slotuse && w <= weight) {
1150 w += n->childid[i]->weight;
1156 /// Searches for the first summed weight in the node n less or equal to weight.
1157 inline int find_summed_weight_lower(const leaf_node *n, weight_type weight) const
1159 if (n->slotuse == 0) return 0;
1162 weight_type w = n->weights[0];
1163 while (i < n->slotuse && w <= weight) {
1171 /// Searches for the first key in the node n greater than key. Uses binary
1172 /// search with an optional linear self-verification. This is a template
1173 /// function, because the slotkey array is located at different places in
1174 /// leaf_node and inner_node.
1175 template <typename node_type>
1176 inline int find_upper(const node_type *n, const key_type& key) const
1178 if (n->slotuse == 0) return 0;
1181 hi = n->slotuse - 1;
1185 int mid = (lo + hi) / 2;
1187 if (key_less(key, n->slotkey[mid])) {
1195 if (hi < 0 || key_lessequal(n->slotkey[hi], key))
1198 BTREE_PRINT("btree::find_upper: on " << n << " key " << key << " -> (" << lo << ") " << hi << ", ");
1200 // verify result using simple linear search
1203 int i = n->slotuse - 1;
1204 while(i >= 0 && key_less(key, n->slotkey[i]))
1208 BTREE_PRINT("btree::find_upper testfind: " << i << std::endl);
1209 BTREE_ASSERT(i == hi);
1212 BTREE_PRINT(std::endl);
1218 /// Searches for the first summed weight in the node n greater than weight.
1219 inline int find_summed_weight_upper(const inner_node *n, weight_type weight) const
1221 if (n->slotuse == 0) return 0;
1224 weight_type w = n->childid[0]->weight;
1225 while (i < n->slotuse && w < weight) {
1227 w += n->childid[i]->weight;
1233 /// Searches for the first summed weight in the node n greater than weight.
1234 inline int find_summed_weight_upper(const leaf_node *n, weight_type weight) const
1236 if (n->slotuse == 0) return 0;
1239 weight_type w = n->weights[0];
1240 while (i < n->slotuse && w < weight) {
1249 // *** Access Functions to the Item Count
1251 /// Return the number of key/data pairs in the B+ tree
1252 inline size_type size() const
1254 return stats.itemcount;
1258 inline weight_type summed_weight() const
1261 return root->weight;
1266 /// Returns true if there is at least one key/data pair in the B+ tree
1267 inline bool empty() const
1269 return (size() == size_type(0));
1272 /// Returns the largest possible size of the B+ Tree. This is just a
1273 /// function required by the STL standard, the B+ Tree can hold more items.
1274 inline size_type max_size() const
1276 return size_type(-1);
1279 /// Return a const reference to the current statistics.
1280 inline const struct tree_stats& get_stats() const
1286 // *** Standard Access Functions Querying the Tree by Descending to a Leaf
1288 /// Non-STL function checking whether a key is in the B+ tree. The same as
1289 /// (find(k) != end()) or (count() != 0).
1290 bool exists(const key_type &key) const
1292 const node *n = root;
1294 if (!n) return false;
1296 while(!n->isleafnode())
1298 const inner_node *inner = static_cast<const inner_node*>(n);
1299 int slot = find_lower(inner, key);
1301 n = inner->childid[slot];
1304 const leaf_node *leaf = static_cast<const leaf_node*>(n);
1306 int slot = find_lower(leaf, key);
1307 return (slot < leaf->slotuse && key_equal(key, leaf->slotkey[slot]));
1310 /// Tries to locate a key in the B+ tree and returns an iterator to the
1311 /// key/data slot if found. If unsuccessful it returns end().
1312 iterator find(const key_type &key)
1315 if (!n) return end();
1317 while(!n->isleafnode())
1319 const inner_node *inner = static_cast<const inner_node*>(n);
1320 int slot = find_lower(inner, key);
1322 n = inner->childid[slot];
1325 leaf_node *leaf = static_cast<leaf_node*>(n);
1327 unsigned short slot = find_lower(leaf, key);
1328 return (slot < leaf->slotuse && key_equal(key, leaf->slotkey[slot]))
1329 ? iterator(leaf, slot) : end();
1332 /// Changes the weight of the first node with the given key to the
1334 void change_weight(iterator & it, weight_type w)
1338 if (it.weight() == w)
1341 while(!n->isleafnode())
1343 const inner_node *inner = static_cast<const inner_node*>(n);
1344 int slot = find_lower(inner, it.key());
1346 // two step because weight_type might be unsigned
1347 n->weight -= it.weight();
1350 n = inner->childid[slot];
1353 BTREE_ASSERT(n == it.curnode);
1354 n->weight -= it.weight();
1356 it.currnode->weights[it.currslot] = w;
1359 /// Tries to locate a summed weight in the B+ tree and returns an iterator to the
1360 /// key/data slot if found. If unsuccessful it returns end(). It is
1361 /// ignoring zero-weight nodes during this.
1362 iterator find_summed_weight(weight_type weight)
1365 if (!n) return end();
1367 while(!n->isleafnode())
1369 const inner_node *inner = static_cast<const inner_node*>(n);
1370 int slot = find_summed_weight_lower(inner, weight);
1372 for (unsigned short s = 0; s < slot; ++s)
1373 weight -= inner->childid[s]->weight;
1375 n = inner->childid[slot];
1378 leaf_node *leaf = static_cast<leaf_node*>(n);
1380 unsigned short slot = find_summed_weight_lower(leaf, weight);
1381 for (unsigned short s = 0; s < slot; ++s)
1382 weight -= leaf->weights[s];
1384 return (slot < leaf->slotuse && weight == 0)
1385 ? iterator(leaf, slot) : end();
1390 weight_type summed_weight(const key_type &key)
1396 while(!n->isleafnode()) {
1397 const inner_node *inner = static_cast<const inner_node*>(n);
1398 int slot = find_lower(inner, key);
1400 for (unsigned short s = 0; s < slot; ++s)
1401 w += inner->childid[slot]->weight;
1403 n = inner->childid[slot];
1406 leaf_node *leaf = static_cast<leaf_node*>(n);
1408 int slot = find_lower(leaf, key);
1410 for (unsigned short s = 0; s < slot; ++s)
1411 w += leaf->childid[slot]->weight;
1413 return (slot < leaf->slotuse && key_equal(key, leaf->slotkey[slot]))
1414 ? iterator(leaf, slot) : end();
1417 /// Tries to locate a key in the B+ tree and returns an constant iterator
1418 /// to the key/data slot if found. If unsuccessful it returns end().
1419 const_iterator find(const key_type &key) const
1421 const node *n = root;
1422 if (!n) return end();
1424 while(!n->isleafnode())
1426 const inner_node *inner = static_cast<const inner_node*>(n);
1427 int slot = find_lower(inner, key);
1429 n = inner->childid[slot];
1432 const leaf_node *leaf = static_cast<const leaf_node*>(n);
1434 int slot = find_lower(leaf, key);
1435 return (slot < leaf->slotuse && key_equal(key, leaf->slotkey[slot]))
1436 ? const_iterator(leaf, slot) : end();
1439 /// Tries to locate a summed weight in the B+ tree and returns an iterator to the
1440 /// key/data slot if found. If unsuccessful it returns end(). It is
1441 /// ignoring zero-weight nodes during this.
1442 const_iterator find_summed_weight(weight_type weight) const
1445 if (!n) return end();
1447 while(!n->isleafnode())
1449 const inner_node *inner = static_cast<const inner_node*>(n);
1450 int slot = find_summed_weight_lower(inner, weight);
1452 for (unsigned short s = 0; s < slot; ++s)
1453 weight -= inner->childid[s]->weight;
1455 n = inner->childid[slot];
1458 leaf_node *leaf = static_cast<leaf_node*>(n);
1460 int slot = find_summed_weight_lower(leaf, weight);
1461 for (unsigned short s = 0; s < slot; ++s)
1462 weight -= leaf->childid[s]->weight;
1464 return (slot < leaf->slotuse && weight == 0)
1465 ? const_iterator(leaf, slot) : end();
1468 /// Tries to locate a key in the B+ tree and returns the number of
1469 /// identical key entries found.
1470 size_type count(const key_type &key) const
1472 const node *n = root;
1475 while(!n->isleafnode())
1477 const inner_node *inner = static_cast<const inner_node*>(n);
1478 int slot = find_lower(inner, key);
1480 n = inner->childid[slot];
1483 const leaf_node *leaf = static_cast<const leaf_node*>(n);
1485 int slot = find_lower(leaf, key);
1488 while (leaf && slot < leaf->slotuse && key_equal(key, leaf->slotkey[slot]))
1491 if (++slot >= leaf->slotuse)
1493 leaf = leaf->nextleaf;
1501 /// Searches the B+ tree and returns an iterator to the first key less or
1502 /// equal to the parameter. If unsuccessful it returns end().
1503 iterator lower_bound(const key_type& key)
1506 if (!n) return end();
1508 while(!n->isleafnode())
1510 const inner_node *inner = static_cast<const inner_node*>(n);
1511 int slot = find_lower(inner, key);
1513 n = inner->childid[slot];
1516 leaf_node *leaf = static_cast<leaf_node*>(n);
1518 int slot = find_lower(leaf, key);
1519 return iterator(leaf, slot);
1522 /// Searches the B+ tree and returns an iterator to the first summed weight
1523 /// less or equal to the parameter. If unsuccessful it returns end().
1524 iterator lower_summed_weight_bound(weight_type weight)
1527 if (!n) return end();
1529 while(!n->isleafnode()) {
1530 const inner_node *inner = static_cast<const inner_node*>(n);
1531 int slot = find_summed_weight_lower(inner, weight);
1533 for (unsigned short s = 0; s < slot; ++s)
1534 weight -= inner->childid[s]->weight;
1536 n = inner->childid[slot];
1539 leaf_node *leaf = static_cast<leaf_node*>(n);
1541 int slot = find_summed_weight_lower(leaf, weight);
1543 for (unsigned short s = 0; s < slot; ++s)
1544 weight -= leaf->weights[s];
1546 return iterator(leaf, slot);
1549 /// Searches the B+ tree and returns an constant iterator to the first key less or
1550 /// equal to the parameter. If unsuccessful it returns end().
1551 const_iterator lower_bound(const key_type& key) const
1553 const node *n = root;
1554 if (!n) return end();
1556 while(!n->isleafnode())
1558 const inner_node *inner = static_cast<const inner_node*>(n);
1559 int slot = find_lower(inner, key);
1561 n = inner->childid[slot];
1564 const leaf_node *leaf = static_cast<const leaf_node*>(n);
1566 int slot = find_lower(leaf, key);
1567 return const_iterator(leaf, slot);
1570 /// Searches the B+ tree and returns an iterator to the first summed weight
1571 /// less or equal to the parameter. If unsuccessful it returns end().
1572 const_iterator lower_summed_weight_bound(weight_type weight) const
1575 if (!n) return end();
1577 while(!n->isleafnode())
1579 const inner_node *inner = static_cast<const inner_node*>(n);
1580 int slot = find_summed_weight_lower(inner, weight);
1582 for (unsigned short s = 0; s < slot; ++s)
1583 weight -= inner->childid[s]->weight;
1585 n = inner->childid[slot];
1588 leaf_node *leaf = static_cast<leaf_node*>(n);
1590 int slot = find_summed_weight_lower(leaf, weight);
1592 for (unsigned short s = 0; s < slot; ++s)
1593 weight -= leaf->weights[s];
1595 return const_iterator(leaf, slot);
1598 /// Searches the B+ tree and returns an iterator to the first key greater
1599 /// than the parameter. If unsuccessful it returns end().
1600 iterator upper_bound(const key_type& key)
1603 if (!n) return end();
1605 while(!n->isleafnode())
1607 const inner_node *inner = static_cast<const inner_node*>(n);
1608 int slot = find_upper(inner, key);
1610 n = inner->childid[slot];
1613 leaf_node *leaf = static_cast<leaf_node*>(n);
1615 int slot = find_upper(leaf, key);
1616 return iterator(leaf, slot);
1619 /// Searches the B+ tree and returns an constant iterator to the first key
1620 /// greater than the parameter. If unsuccessful it returns end().
1621 const_iterator upper_bound(const key_type& key) const
1623 const node *n = root;
1624 if (!n) return end();
1626 while(!n->isleafnode())
1628 const inner_node *inner = static_cast<const inner_node*>(n);
1629 int slot = find_upper(inner, key);
1631 n = inner->childid[slot];
1634 const leaf_node *leaf = static_cast<const leaf_node*>(n);
1636 int slot = find_upper(leaf, key);
1637 return const_iterator(leaf, slot);
1640 /// Searches the B+ tree and returns an iterator to the first summed weight
1641 /// greater than the parameter. If unsuccessful it returns end().
1642 iterator upper_summed_weight_bound(weight_type weight)
1645 if (!n) return end();
1647 while(!n->isleafnode())
1649 const inner_node *inner = static_cast<const inner_node*>(n);
1650 int slot = find_summed_weight_upper(inner, weight);
1652 for (unsigned short s = 0; s < slot; ++s)
1653 weight -= inner->childid[s]->weight;
1655 n = inner->childid[slot];
1658 leaf_node *leaf = static_cast<leaf_node*>(n);
1660 int slot = find_summed_weight_upper(leaf, weight);
1662 for (unsigned short s = 0; s < slot; ++s)
1663 weight -= leaf->weights[s];
1665 return iterator(leaf, slot);
1668 /// Searches the B+ tree and returns an iterator to the first summed weight
1669 /// greater than the parameter. If unsuccessful it returns end().
1670 const_iterator upper_summed_weight_bound(weight_type weight) const
1673 if (!n) return end();
1675 while(!n->isleafnode()) {
1676 const inner_node *inner = static_cast<const inner_node*>(n);
1677 int slot = find_summed_weight_upper(inner, weight);
1679 for (unsigned short s = 0; s < slot; ++s)
1680 weight -= inner->childid[s]->weight;
1682 n = inner->childid[slot];
1685 leaf_node *leaf = static_cast<leaf_node*>(n);
1687 int slot = find_summed_weight_upper(leaf, weight);
1689 for (unsigned short s = 0; s < slot; ++s)
1690 weight -= leaf->weights[s];
1692 return const_iterator(leaf, slot);
1695 /// Searches the B+ tree and returns both lower_bound() and upper_bound().
1696 inline std::pair<iterator, iterator> equal_range(const key_type& key)
1698 return std::pair<iterator, iterator>(lower_bound(key), upper_bound(key));
1701 /// Searches the B+ tree and returns both lower_bound() and upper_bound().
1702 inline std::pair<const_iterator, const_iterator> equal_range(const key_type& key) const
1704 return std::pair<const_iterator, const_iterator>(lower_bound(key), upper_bound(key));
1707 /// Searches the B+ tree and returns both lower_summed_weight_bound() and upper_summed_weight_bound().
1708 inline std::pair<iterator, iterator> equal_summed_weight_range(weight_type weight)
1710 return std::pair<iterator, iterator>(lower_summed_weight_bound(weight), upper_summed_weight_bound(weight));
1713 /// Searches the B+ tree and returns both lower_summed_weight_bound() and upper_summed_weight_bound().
1714 inline std::pair<const_iterator, const_iterator> equal_summed_weight_range(weight_type weight) const
1716 return std::pair<const_iterator, const_iterator>(lower_summed_weight_bound(weight), upper_summed_weight_bound(weight));
1720 // *** B+ Tree Object Comparison Functions
1722 /// Equality relation of B+ trees of the same type. B+ trees of the same
1723 /// size and equal elements (both key and data) are considered
1724 /// equal. Beware of the random ordering of duplicate keys.
1725 inline bool operator==(const btree_self &other) const
1727 return (size() == other.size()) && std::equal(begin(), end(), other.begin());
1730 /// Inequality relation. Based on operator==.
1731 inline bool operator!=(const btree_self &other) const
1733 return !(*this == other);
1736 /// Total ordering relation of B+ trees of the same type. It uses
1737 /// std::lexicographical_compare() for the actual comparison of elements.
1738 inline bool operator<(const btree_self &other) const
1740 return std::lexicographical_compare(begin(), end(), other.begin(), other.end());
1743 /// Greater relation. Based on operator<.
1744 inline bool operator>(const btree_self &other) const
1746 return other < *this;
1749 /// Less-equal relation. Based on operator<.
1750 inline bool operator<=(const btree_self &other) const
1752 return !(other < *this);
1755 /// Greater-equal relation. Based on operator<.
1756 inline bool operator>=(const btree_self &other) const
1758 return !(*this < other);
1762 /// *** Fast Copy: Assign Operator and Copy Constructors
1764 /// Assignment operator. All the key/data pairs are copied
1765 inline btree_self& operator= (const btree_self &other)
1771 key_less = other.key_comp();
1772 if (other.size() != 0)
1774 stats.leaves = stats.innernodes = 0;
1775 root = copy_recursive(other.root);
1776 stats = other.stats;
1779 if (selfverify) verify();
1784 /// Copy constructor. The newly initialized B+ tree object will contain a
1785 /// copy of all key/data pairs.
1786 inline weighted_btree(const btree_self &other)
1787 : root(NULL), headleaf(NULL), tailleaf(NULL),
1788 stats( other.stats ),
1789 key_less( other.key_comp() )
1793 stats.leaves = stats.innernodes = 0;
1794 root = copy_recursive(other.root);
1795 if (selfverify) verify();
1800 /// Recursively copy nodes from another B+ tree object
1801 struct node* copy_recursive(const node *n)
1803 if (n->isleafnode())
1805 const leaf_node *leaf = static_cast<const leaf_node*>(n);
1806 leaf_node *newleaf = allocate_leaf();
1808 newleaf->slotuse = leaf->slotuse;
1809 std::copy(leaf->slotkey, leaf->slotkey + leaf->slotuse, newleaf->slotkey);
1810 std::copy(leaf->slotdata, leaf->slotdata + leaf->slotuse, newleaf->slotdata);
1812 if (headleaf == NULL)
1814 headleaf = tailleaf = newleaf;
1815 newleaf->prevleaf = newleaf->nextleaf = NULL;
1819 newleaf->prevleaf = tailleaf;
1820 tailleaf->nextleaf = newleaf;
1828 const inner_node *inner = static_cast<const inner_node*>(n);
1829 inner_node *newinner = allocate_inner(inner->level);
1831 newinner->slotuse = inner->slotuse;
1832 std::copy(inner->slotkey, inner->slotkey + inner->slotuse, newinner->slotkey);
1834 for (unsigned short slot = 0; slot <= inner->slotuse; ++slot)
1836 newinner->childid[slot] = copy_recursive(inner->childid[slot]);
1844 // *** Public Insertion Functions
1846 /// Attempt to insert a key/data pair into the B+ tree. If the tree does not
1847 /// allow duplicate keys, then the insert may fail if it is already
1849 inline std::pair<iterator, bool> insert(const pair_type& x, weight_type weight)
1851 return insert_start(x.first, weight, x.second);
1854 /// Attempt to insert a key/data pair into the B+ tree. Beware that if
1855 /// key_type == data_type, then the template iterator insert() is called
1856 /// instead. If the tree does not allow duplicate keys, then the insert may
1857 /// fail if it is already present.
1858 inline std::pair<iterator, bool> insert(const key_type& key, weight_type weight, const data_type& data)
1860 return insert_start(key, weight, data);
1863 /// Attempt to insert a key/data pair into the B+ tree. This function is the
1864 /// same as the other insert, however if key_type == data_type then the
1865 /// non-template function cannot be called. If the tree does not allow
1866 /// duplicate keys, then the insert may fail if it is already present.
1867 inline std::pair<iterator, bool> insert2(const key_type& key, weight_type weight, const data_type& data)
1869 return insert_start(key, weight, data);
1872 /// Attempt to insert a key/data pair into the B+ tree. The iterator hint
1873 /// is currently ignored by the B+ tree insertion routine.
1874 inline iterator insert(iterator /* hint */, const pair_type &x, weight_type weight)
1876 return insert_start(x.first, weight, x.second).first;
1879 /// Attempt to insert a key/data pair into the B+ tree. The iterator hint is
1880 /// currently ignored by the B+ tree insertion routine.
1881 inline iterator insert2(iterator /* hint */, const key_type& key, weight_type weight, const data_type& data)
1883 return insert_start(key, weight, data).first;
1886 /// Attempt to insert the range [first,last) of value_type pairs into the B+
1887 /// tree. Each key/data pair is inserted individually.
1888 template <typename InputIterator>
1889 inline void insert(InputIterator first, InputIterator last)
1891 InputIterator iter = first;
1894 insert(*iter, iter->weight());
1900 // *** Private Insertion Functions
1902 /// Start the insertion descent at the current root and handle root
1903 /// splits. Returns true if the item was inserted
1904 std::pair<iterator, bool> insert_start(const key_type& key, weight_type weight, const data_type& value)
1906 node *newchild = NULL;
1907 key_type newkey = key_type();
1911 root = headleaf = tailleaf = allocate_leaf();
1914 std::pair<iterator, bool> r = insert_descend(root, key, weight, value, &newkey, &newchild);
1918 inner_node *newroot = allocate_inner(root->level + 1);
1919 newroot->slotkey[0] = newkey;
1921 newroot->childid[0] = root;
1922 newroot->childid[1] = newchild;
1924 newroot->weight = root->weight + newchild->weight;
1925 newroot->slotuse = 1;
1930 // increment itemcount if the item was inserted
1931 if (r.second) ++stats.itemcount;
1934 if (debug) print(std::cout);
1939 BTREE_ASSERT(exists(key));
1946 * @brief Insert an item into the B+ tree.
1948 * Descend down the nodes to a leaf, insert the key/data pair in a free
1949 * slot. If the node overflows, then it must be split and the new split
1950 * node inserted into the parent. Unroll / this splitting up to the root.
1952 std::pair<iterator, bool> insert_descend(node* n,
1953 const key_type& key,
1955 const data_type& value,
1956 key_type* splitkey, node** splitnode)
1958 if (!n->isleafnode())
1960 inner_node *inner = static_cast<inner_node*>(n);
1962 key_type newkey = key_type();
1963 node *newchild = NULL;
1965 int slot = find_lower(inner, key);
1967 BTREE_PRINT("btree::insert_descend into " << inner->childid[slot] << std::endl);
1969 weight_type oldw = inner->childid[slot]->weight;
1970 std::pair<iterator, bool> r = insert_descend(inner->childid[slot],
1971 key, weight, value, &newkey, &newchild);
1972 n->weight += inner->childid[slot]->weight - oldw;
1976 BTREE_PRINT("btree::insert_descend newchild with key " << newkey << " node " << newchild << " at slot " << slot << std::endl);
1978 if (inner->isfull())
1980 split_inner_node(inner, splitkey, splitnode, slot);
1982 BTREE_PRINT("btree::insert_descend done split_inner: putslot: " << slot << " putkey: " << newkey << " upkey: " << *splitkey << std::endl);
1987 print_node(std::cout, inner);
1988 print_node(std::cout, *splitnode);
1992 // check if insert slot is in the split sibling node
1993 BTREE_PRINT("btree::insert_descend switch: " << slot << " > " << inner->slotuse+1 << std::endl);
1995 if (slot == inner->slotuse+1 && inner->slotuse < (*splitnode)->slotuse)
1997 // special case when the insert slot matches the split
1998 // place between the two nodes, then the insert key
1999 // becomes the split key.
2001 BTREE_ASSERT(inner->slotuse + 1 < innerslotmax);
2003 inner_node *splitinner = static_cast<inner_node*>(*splitnode);
2005 // move the split key and it's datum into the left node
2006 inner->slotkey[inner->slotuse] = *splitkey;
2007 inner->childid[inner->slotuse+1] = splitinner->childid[0];
2008 inner->weight += splitinner->childid[0]->weight;
2009 splitinner->weight -= splitinner->childid[0]->weight;
2012 // set new split key and move corresponding datum into right node
2013 splitinner->childid[0] = newchild;
2014 splitinner->weight += newchild->weight;
2019 else if (slot >= inner->slotuse+1)
2021 // in case the insert slot is in the newly create split
2022 // node, we reuse the code below.
2024 slot -= inner->slotuse+1;
2025 inner = static_cast<inner_node*>(*splitnode);
2026 BTREE_PRINT("btree::insert_descend switching to split node " << inner << " slot " << slot <<std::endl);
2030 // put pointer to child node into correct slot
2031 BTREE_ASSERT(slot >= 0 && slot <= inner->slotuse);
2033 int i = inner->slotuse;
2036 inner->slotkey[i] = inner->slotkey[i - 1];
2037 inner->childid[i + 1] = inner->childid[i];
2041 inner->slotkey[slot] = newkey;
2042 inner->childid[slot + 1] = newchild;
2044 inner->weight += newchild->weight;
2049 else // n->isleafnode() == true
2051 leaf_node *leaf = static_cast<leaf_node*>(n);
2053 unsigned short slot = find_lower(leaf, key);
2055 if (!allow_duplicates && slot < leaf->slotuse && key_equal(key, leaf->slotkey[slot])) {
2056 return std::pair<iterator, bool>(iterator(leaf, slot), false);
2061 split_leaf_node(leaf, splitkey, splitnode);
2063 // check if insert slot is in the split sibling node
2064 if (slot >= leaf->slotuse)
2066 slot -= leaf->slotuse;
2067 leaf = static_cast<leaf_node*>(*splitnode);
2071 // put data item into correct data slot
2073 int i = leaf->slotuse - 1;
2074 BTREE_ASSERT(i + 1 < leafslotmax);
2076 while(i >= 0 && key_less(key, leaf->slotkey[i])) {
2077 leaf->slotkey[i + 1] = leaf->slotkey[i];
2078 leaf->slotdata[i + 1] = leaf->slotdata[i];
2079 leaf->weights[i + 1] = leaf->weights[i];
2083 leaf->slotkey[i + 1] = key;
2084 leaf->slotdata[i + 1] = value;
2085 leaf->weights[i + 1] = weight;
2086 leaf->weight += weight;
2089 if (splitnode && leaf != *splitnode && slot == leaf->slotuse-1)
2091 // special case: the node was split, and the insert is at the
2092 // last slot of the old node. then the splitkey must be
2097 return std::pair<iterator, bool>(iterator(leaf, i + 1), true);
2101 /// Split up a leaf node into two equally-filled sibling leaves. Returns
2102 /// the new nodes and it's insertion key in the two parameters.
2103 void split_leaf_node(leaf_node* leaf, key_type* _newkey, node** _newleaf)
2105 BTREE_ASSERT(leaf->isfull());
2107 unsigned int mid = leaf->slotuse / 2;
2109 BTREE_PRINT("btree::split_leaf_node on " << leaf << std::endl);
2111 leaf_node *newleaf = allocate_leaf();
2113 newleaf->slotuse = leaf->slotuse - mid;
2115 newleaf->nextleaf = leaf->nextleaf;
2116 if (newleaf->nextleaf == NULL) {
2117 BTREE_ASSERT(leaf == tailleaf);
2121 newleaf->nextleaf->prevleaf = newleaf;
2124 for(unsigned int slot = mid; slot < leaf->slotuse; ++slot)
2126 unsigned int ni = slot - mid;
2127 newleaf->slotkey[ni] = leaf->slotkey[slot];
2128 newleaf->slotdata[ni] = leaf->slotdata[slot];
2129 newleaf->weights[ni] = leaf->weights[slot];
2130 newleaf->weight += leaf->weights[slot];
2131 leaf->weight -= leaf->weights[slot];
2134 leaf->slotuse = mid;
2135 leaf->nextleaf = newleaf;
2136 newleaf->prevleaf = leaf;
2138 *_newkey = leaf->slotkey[leaf->slotuse-1];
2139 *_newleaf = newleaf;
2142 /// Split up an inner node into two equally-filled sibling nodes. Returns
2143 /// the new nodes and it's insertion key in the two parameters. Requires
2144 /// the slot of the item will be inserted, so the nodes will be the same
2145 /// size after the insert.
2146 void split_inner_node(inner_node* inner, key_type* _newkey, node** _newinner, unsigned int addslot)
2148 BTREE_ASSERT(inner->isfull());
2150 unsigned int mid = inner->slotuse / 2;
2152 BTREE_PRINT("btree::split_inner: mid " << mid << " addslot " << addslot << std::endl);
2154 // if the split is uneven and the overflowing item will be put into the
2155 // larger node, then the smaller split node may underflow
2156 if (addslot <= mid && mid > inner->slotuse - (mid + 1))
2159 BTREE_PRINT("btree::split_inner: mid " << mid << " addslot " << addslot << std::endl);
2161 BTREE_PRINT("btree::split_inner_node on " << inner << " into two nodes " << mid << " and " << inner->slotuse - (mid + 1) << " sized" << std::endl);
2163 inner_node *newinner = allocate_inner(inner->level);
2165 newinner->slotuse = inner->slotuse - (mid + 1);
2167 for(unsigned int slot = mid + 1; slot < inner->slotuse; ++slot)
2169 unsigned int ni = slot - (mid + 1);
2170 newinner->slotkey[ni] = inner->slotkey[slot];
2171 newinner->childid[ni] = inner->childid[slot];
2172 newinner->weight += inner->childid[slot]->weight;
2173 inner->weight -= inner->childid[slot]->weight;
2175 newinner->childid[newinner->slotuse] = inner->childid[inner->slotuse];
2176 newinner->weight += inner->childid[inner->slotuse]->weight;
2177 inner->weight -= inner->childid[inner->slotuse]->weight;
2179 inner->slotuse = mid;
2181 *_newkey = inner->slotkey[mid];
2182 *_newinner = newinner;
2186 // *** Support Class Encapsulating Deletion Results
2188 /// Result flags of recursive deletion.
2191 /// Deletion successful and no fix-ups necessary.
2194 /// Deletion not successful because key was not found.
2195 btree_not_found = 1,
2197 /// Deletion successful, the last key was updated so parent slotkeys
2199 btree_update_lastkey = 2,
2201 /// Deletion successful, children nodes were merged and the parent
2202 /// needs to remove the empty node.
2206 /// \internal B+ tree recursive deletion has much information which is
2207 /// needs to be passed upward.
2210 /// Merged result flags
2211 result_flags_t flags;
2213 /// The key to be updated at the parent's slot
2216 /// Constructor of a result with a specific flag, this can also be used
2217 /// as for implicit conversion.
2218 inline result_t(result_flags_t f = btree_ok)
2219 : flags(f), lastkey()
2222 /// Constructor with a lastkey value.
2223 inline result_t(result_flags_t f, const key_type &k)
2224 : flags(f), lastkey(k)
2227 /// Test if this result object has a given flag set.
2228 inline bool has(result_flags_t f) const
2230 return (flags & f) != 0;
2233 /// Merge two results OR-ing the result flags and overwriting lastkeys.
2234 inline result_t& operator|= (const result_t &other)
2236 flags = result_flags_t(flags | other.flags);
2238 // we overwrite existing lastkeys on purpose
2239 if (other.has(btree_update_lastkey))
2240 lastkey = other.lastkey;
2247 // *** Public Erase Functions
2249 /// Erases one (the first) of the key/data pairs associated with the given
2251 bool erase_one(const key_type &key)
2253 BTREE_PRINT("btree::erase_one(" << key << ") on btree size " << size() << std::endl);
2255 if (selfverify) verify();
2257 result_t result = erase_one_descend(key, root, NULL, NULL, NULL, NULL, NULL, 0);
2259 if (!result.has(btree_not_found))
2263 if (debug) print(std::cout);
2265 if (selfverify) verify();
2267 return !result.has(btree_not_found);
2270 /// Erases all the key/data pairs associated with the given key. This is
2271 /// implemented using erase_one().
2272 size_type erase(const key_type &key)
2276 while( erase_one(key) )
2279 if (!allow_duplicates) break;
2286 /// Erase the key/data pair referenced by the iterator.
2287 void erase(iterator iter)
2294 /// Erase all key/data pairs in the range [first,last). This function is
2295 /// currently not implemented by the B+ Tree.
2296 void erase(iterator /* first */, iterator /* last */)
2303 // *** Private Erase Functions
2305 /** @brief Erase one (the first) key/data pair in the B+ tree matching key.
2307 * Descends down the tree in search of key. During the descent the parent,
2308 * left and right siblings and their parents are computed and passed
2309 * down. Once the key/data pair is found, it is removed from the leaf. If
2310 * the leaf underflows 6 different cases are handled. These cases resolve
2311 * the underflow by shifting key/data pairs from adjacent sibling nodes,
2312 * merging two sibling nodes or trimming the tree.
2314 result_t erase_one_descend(const key_type& key,
2316 node *left, node *right,
2317 inner_node *leftparent, inner_node *rightparent,
2318 inner_node *parent, unsigned int parentslot)
2320 if (curr->isleafnode())
2322 leaf_node *leaf = static_cast<leaf_node*>(curr);
2323 leaf_node *leftleaf = static_cast<leaf_node*>(left);
2324 leaf_node *rightleaf = static_cast<leaf_node*>(right);
2326 int slot = find_lower(leaf, key);
2328 if (slot >= leaf->slotuse || !key_equal(key, leaf->slotkey[slot]))
2330 BTREE_PRINT("Could not find key " << key << " to erase." << std::endl);
2332 return btree_not_found;
2335 BTREE_PRINT("Found key in leaf " << curr << " at slot " << slot << std::endl);
2337 leaf->weight -= leaf->weights[slot];
2338 for (int i = slot; i < leaf->slotuse - 1; i++)
2340 leaf->slotkey[i] = leaf->slotkey[i + 1];
2341 leaf->slotdata[i] = leaf->slotdata[i + 1];
2342 leaf->weights[i] = leaf->weights[i + 1];
2346 result_t myres = btree_ok;
2348 // if the last key of the leaf was changed, the parent is notified
2349 // and updates the key of this leaf
2350 if (slot == leaf->slotuse && parent)
2352 if (parentslot < parent->slotuse)
2354 BTREE_ASSERT(parent->childid[parentslot] == curr);
2355 parent->slotkey[parentslot] = leaf->slotkey[leaf->slotuse - 1];
2359 BTREE_PRINT("Scheduling lastkeyupdate: key " << leaf->slotkey[leaf->slotuse - 1] << std::endl);
2360 myres |= result_t(btree_update_lastkey, leaf->slotkey[leaf->slotuse - 1]);
2364 if (leaf->isunderflow() && !(leaf == root && leaf->slotuse >= 1))
2366 // determine what to do about the underflow
2368 // case : if this empty leaf is the root, there is no way to
2369 // correct underflow
2370 if (leftleaf == NULL && rightleaf == NULL)
2374 // case : if both left and right leaves would underflow in case of
2375 // a shift, then merging is necessary. choose the more local merger
2377 else if ( (leftleaf == NULL || leftleaf->isfew()) && (rightleaf == NULL || rightleaf->isfew()) )
2379 if (leftparent == parent)
2380 myres |= merge_leaves(leftleaf, leaf, leftparent);
2382 myres |= merge_leaves(leaf, rightleaf, rightparent);
2384 // case : the right leaf has extra data, so balance right with current
2385 else if ( (leftleaf != NULL && leftleaf->isfew()) && (rightleaf != NULL && !rightleaf->isfew()) )
2387 if (rightparent == parent)
2388 myres |= shift_left_leaf(leaf, rightleaf, rightparent, parentslot);
2390 myres |= merge_leaves(leftleaf, leaf, leftparent);
2392 // case : the left leaf has extra data, so balance left with current
2393 else if ( (leftleaf != NULL && !leftleaf->isfew()) && (rightleaf != NULL && rightleaf->isfew()) )
2395 if (leftparent == parent)
2396 shift_right_leaf(leftleaf, leaf, leftparent, parentslot - 1);
2398 myres |= merge_leaves(leaf, rightleaf, rightparent);
2400 // case : both the leaf and right leaves have extra data and our
2401 // parent, choose the leaf with more data
2402 else if (leftparent == rightparent)
2404 if (leftleaf->slotuse <= rightleaf->slotuse)
2405 myres |= shift_left_leaf(leaf, rightleaf, rightparent, parentslot);
2407 shift_right_leaf(leftleaf, leaf, leftparent, parentslot - 1);
2411 if (leftparent == parent)
2412 shift_right_leaf(leftleaf, leaf, leftparent, parentslot - 1);
2414 myres |= shift_left_leaf(leaf, rightleaf, rightparent, parentslot);
2420 else // !curr->isleafnode()
2422 inner_node *inner = static_cast<inner_node*>(curr);
2423 inner_node *leftinner = static_cast<inner_node*>(left);
2424 inner_node *rightinner = static_cast<inner_node*>(right);
2426 node *myleft, *myright;
2427 inner_node *myleftparent, *myrightparent;
2429 int slot = find_lower(inner, key);
2432 myleft = (left == NULL) ? NULL : (static_cast<inner_node*>(left))->childid[left->slotuse - 1];
2433 myleftparent = leftparent;
2436 myleft = inner->childid[slot - 1];
2437 myleftparent = inner;
2440 if (slot == inner->slotuse) {
2441 myright = (right == NULL) ? NULL : (static_cast<inner_node*>(right))->childid[0];
2442 myrightparent = rightparent;
2445 myright = inner->childid[slot + 1];
2446 myrightparent = inner;
2449 BTREE_PRINT("erase_one_descend into " << inner->childid[slot] << std::endl);
2451 result_t result = erase_one_descend(key,
2452 inner->childid[slot],
2454 myleftparent, myrightparent,
2457 result_t myres = btree_ok;
2459 if (result.has(btree_not_found))
2464 if (result.has(btree_update_lastkey))
2466 if (parent && parentslot < parent->slotuse)
2468 BTREE_PRINT("Fixing lastkeyupdate: key " << result.lastkey << " into parent " << parent << " at parentslot " << parentslot << std::endl);
2470 BTREE_ASSERT(parent->childid[parentslot] == curr);
2471 parent->slotkey[parentslot] = result.lastkey;
2475 BTREE_PRINT("Forwarding lastkeyupdate: key " << result.lastkey << std::endl);
2476 myres |= result_t(btree_update_lastkey, result.lastkey);
2480 if (result.has(btree_fixmerge))
2482 // either the current node or the next is empty and should be removed
2483 if (inner->childid[slot]->slotuse != 0)
2486 // this is the child slot invalidated by the merge
2487 BTREE_ASSERT(inner->childid[slot]->slotuse == 0);
2489 inner->weight -= inner->childid[slot]->weight;
2490 free_node(inner->childid[slot]);
2492 for(int i = slot; i < inner->slotuse; i++)
2494 inner->slotkey[i - 1] = inner->slotkey[i];
2495 inner->childid[i] = inner->childid[i + 1];
2499 if (inner->level == 1)
2501 // fix split key for children leaves
2503 leaf_node *child = static_cast<leaf_node*>(inner->childid[slot]);
2504 inner->slotkey[slot] = child->slotkey[ child->slotuse-1 ];
2508 if (inner->isunderflow() && !(inner == root && inner->slotuse >= 1))
2510 // case: the inner node is the root and has just one child. that child becomes the new root
2511 if (leftinner == NULL && rightinner == NULL)
2513 BTREE_ASSERT(inner == root);
2514 BTREE_ASSERT(inner->slotuse == 0);
2516 root = inner->childid[0];
2523 // case : if both left and right leaves would underflow in case of
2524 // a shift, then merging is necessary. choose the more local merger
2526 else if ( (leftinner == NULL || leftinner->isfew()) && (rightinner == NULL || rightinner->isfew()) )
2528 if (leftparent == parent)
2529 myres |= merge_inner(leftinner, inner, leftparent, parentslot - 1);
2531 myres |= merge_inner(inner, rightinner, rightparent, parentslot);
2533 // case : the right leaf has extra data, so balance right with current
2534 else if ( (leftinner != NULL && leftinner->isfew()) && (rightinner != NULL && !rightinner->isfew()) )
2536 if (rightparent == parent)
2537 shift_left_inner(inner, rightinner, rightparent, parentslot);
2539 myres |= merge_inner(leftinner, inner, leftparent, parentslot - 1);
2541 // case : the left leaf has extra data, so balance left with current
2542 else if ( (leftinner != NULL && !leftinner->isfew()) && (rightinner != NULL && rightinner->isfew()) )
2544 if (leftparent == parent)
2545 shift_right_inner(leftinner, inner, leftparent, parentslot - 1);
2547 myres |= merge_inner(inner, rightinner, rightparent, parentslot);
2549 // case : both the leaf and right leaves have extra data and our
2550 // parent, choose the leaf with more data
2551 else if (leftparent == rightparent)
2553 if (leftinner->slotuse <= rightinner->slotuse)
2554 shift_left_inner(inner, rightinner, rightparent, parentslot);
2556 shift_right_inner(leftinner, inner, leftparent, parentslot - 1);
2560 if (leftparent == parent)
2561 shift_right_inner(leftinner, inner, leftparent, parentslot - 1);
2563 shift_left_inner(inner, rightinner, rightparent, parentslot);
2571 /// Merge two leaf nodes. The function moves all key/data pairs from right
2572 /// to left and sets right's slotuse to zero. The right slot is then
2573 /// removed by the calling parent node.
2574 result_t merge_leaves(leaf_node* left, leaf_node* right, inner_node* parent)
2576 BTREE_PRINT("Merge leaf nodes " << left << " and " << right << " with common parent " << parent << "." << std::endl);
2579 BTREE_ASSERT(left->isleafnode() && right->isleafnode());
2580 BTREE_ASSERT(parent->level == 1);
2582 BTREE_ASSERT(left->slotuse + right->slotuse < leafslotmax);
2584 for (unsigned int i = 0; i < right->slotuse; i++)
2586 left->slotkey[left->slotuse + i] = right->slotkey[i];
2587 left->slotdata[left->slotuse + i] = right->slotdata[i];
2588 left->weights[left->slotuse + i] = right->weights[i];
2590 left->slotuse += right->slotuse;
2591 left->weight += right->weight;
2593 left->nextleaf = right->nextleaf;
2595 left->nextleaf->prevleaf = left;
2602 return btree_fixmerge;
2605 /// Merge two inner nodes. The function moves all key/childid pairs from
2606 /// right to left and sets right's slotuse to zero. The right slot is then
2607 /// removed by the calling parent node.
2608 static result_t merge_inner(inner_node* left, inner_node* right, inner_node* parent, unsigned int parentslot)
2610 BTREE_PRINT("Merge inner nodes " << left << " and " << right << " with common parent " << parent << "." << std::endl);
2612 BTREE_ASSERT(left->level == right->level);
2613 BTREE_ASSERT(parent->level == left->level + 1);
2615 BTREE_ASSERT(parent->childid[parentslot] == left);
2617 BTREE_ASSERT(left->slotuse + right->slotuse < innerslotmax);
2621 // find the left node's slot in the parent's children
2622 unsigned int leftslot = 0;
2623 while(leftslot <= parent->slotuse && parent->childid[leftslot] != left)
2626 BTREE_ASSERT(leftslot < parent->slotuse);
2627 BTREE_ASSERT(parent->childid[leftslot] == left);
2628 BTREE_ASSERT(parent->childid[leftslot+1] == right);
2630 BTREE_ASSERT(parentslot == leftslot);
2633 // retrieve the decision key from parent
2634 left->slotkey[left->slotuse] = parent->slotkey[parentslot];
2637 // copy over keys and children from right
2638 for (unsigned int i = 0; i < right->slotuse; i++)
2640 left->slotkey[left->slotuse + i] = right->slotkey[i];
2641 left->childid[left->slotuse + i] = right->childid[i];
2643 left->slotuse += right->slotuse;
2644 left->weight += right->weight;
2646 left->childid[left->slotuse] = right->childid[right->slotuse];
2651 return btree_fixmerge;
2654 /// Balance two leaf nodes. The function moves key/data pairs from right to
2655 /// left so that both nodes are equally filled. The parent node is updated
2657 static result_t shift_left_leaf(leaf_node *left, leaf_node *right, inner_node *parent, unsigned int parentslot)
2659 BTREE_ASSERT(left->isleafnode() && right->isleafnode());
2660 BTREE_ASSERT(parent->level == 1);
2662 BTREE_ASSERT(left->nextleaf == right);
2663 BTREE_ASSERT(left == right->prevleaf);
2665 BTREE_ASSERT(left->slotuse < right->slotuse);
2666 BTREE_ASSERT(parent->childid[parentslot] == left);
2668 unsigned int shiftnum = (right->slotuse - left->slotuse) / 2;
2670 BTREE_PRINT("Shifting (leaf) " << shiftnum << " entries to left " << left << " from right " << right << " with common parent " << parent << "." << std::endl);
2672 BTREE_ASSERT(left->slotuse + shiftnum < leafslotmax);
2674 // copy the first items from the right node to the last slot in the left node.
2675 for(unsigned int i = 0; i < shiftnum; i++)
2677 left->slotkey[left->slotuse + i] = right->slotkey[i];
2678 left->slotdata[left->slotuse + i] = right->slotdata[i];
2679 left->weights[left->slotuse + i] = right->weights[i];
2680 left->weight += right->weights[i];
2681 right->weight -= right->weights[i];
2683 left->slotuse += shiftnum;
2685 // shift all slots in the right node to the left
2687 right->slotuse -= shiftnum;
2688 for(int i = 0; i < right->slotuse; i++)
2690 right->slotkey[i] = right->slotkey[i + shiftnum];
2691 right->slotdata[i] = right->slotdata[i + shiftnum];
2692 right->weights[i] = right->weights[i + shiftnum];
2696 if (parentslot < parent->slotuse) {
2697 parent->slotkey[parentslot] = left->slotkey[left->slotuse - 1];
2700 else { // the update is further up the tree
2701 return result_t(btree_update_lastkey, left->slotkey[left->slotuse - 1]);
2705 /// Balance two inner nodes. The function moves key/data pairs from right
2706 /// to left so that both nodes are equally filled. The parent node is
2707 /// updated if possible.
2708 static void shift_left_inner(inner_node *left, inner_node *right, inner_node *parent, unsigned int parentslot)
2710 BTREE_ASSERT(left->level == right->level);
2711 BTREE_ASSERT(parent->level == left->level + 1);
2713 BTREE_ASSERT(left->slotuse < right->slotuse);
2714 BTREE_ASSERT(parent->childid[parentslot] == left);
2716 unsigned int shiftnum = (right->slotuse - left->slotuse) / 2;
2718 BTREE_PRINT("Shifting (inner) " << shiftnum << " entries to left " << left << " from right " << right << " with common parent " << parent << "." << std::endl);
2720 BTREE_ASSERT(left->slotuse + shiftnum < innerslotmax);
2724 // find the left node's slot in the parent's children and compare to parentslot
2726 unsigned int leftslot = 0;
2727 while(leftslot <= parent->slotuse && parent->childid[leftslot] != left)
2730 BTREE_ASSERT(leftslot < parent->slotuse);
2731 BTREE_ASSERT(parent->childid[leftslot] == left);
2732 BTREE_ASSERT(parent->childid[leftslot+1] == right);
2734 BTREE_ASSERT(leftslot == parentslot);
2737 // copy the parent's decision slotkey and childid to the first new key on the left
2738 left->slotkey[left->slotuse] = parent->slotkey[parentslot];
2741 // copy the other items from the right node to the last slots in the left node.
2742 for(unsigned int i = 0; i < shiftnum - 1; i++)
2744 left->slotkey[left->slotuse + i] = right->slotkey[i];
2745 left->childid[left->slotuse + i] = right->childid[i];
2746 left->weight += right->childid[i]->weight;
2747 right->weight -= right->childid[i]->weight;
2749 left->slotuse += shiftnum - 1;
2752 parent->slotkey[parentslot] = right->slotkey[shiftnum - 1];
2753 // last pointer in left
2754 left->childid[left->slotuse] = right->childid[shiftnum - 1];
2755 left->weight += right->childid[shiftnum - 1]->weight;
2756 right->weight -= right->childid[shiftnum - 1]->weight;
2758 // shift all slots in the right node
2760 right->slotuse -= shiftnum;
2761 for(int i = 0; i < right->slotuse; i++)
2763 right->slotkey[i] = right->slotkey[i + shiftnum];
2764 right->childid[i] = right->childid[i + shiftnum];
2766 right->childid[right->slotuse] = right->childid[right->slotuse + shiftnum];
2769 /// Balance two leaf nodes. The function moves key/data pairs from left to
2770 /// right so that both nodes are equally filled. The parent node is updated
2772 static void shift_right_leaf(leaf_node *left, leaf_node *right, inner_node *parent, unsigned int parentslot)
2774 BTREE_ASSERT(left->isleafnode() && right->isleafnode());
2775 BTREE_ASSERT(parent->level == 1);
2777 BTREE_ASSERT(left->nextleaf == right);
2778 BTREE_ASSERT(left == right->prevleaf);
2779 BTREE_ASSERT(parent->childid[parentslot] == left);
2781 BTREE_ASSERT(left->slotuse > right->slotuse);
2783 unsigned int shiftnum = (left->slotuse - right->slotuse) / 2;
2785 BTREE_PRINT("Shifting (leaf) " << shiftnum << " entries to right " << right << " from left " << left << " with common parent " << parent << "." << std::endl);
2789 // find the left node's slot in the parent's children
2790 unsigned int leftslot = 0;
2791 while(leftslot <= parent->slotuse && parent->childid[leftslot] != left)
2794 BTREE_ASSERT(leftslot < parent->slotuse);
2795 BTREE_ASSERT(parent->childid[leftslot] == left);
2796 BTREE_ASSERT(parent->childid[leftslot+1] == right);
2798 BTREE_ASSERT(leftslot == parentslot);
2801 // shift all slots in the right node
2803 BTREE_ASSERT(right->slotuse + shiftnum < leafslotmax);
2805 for(int i = right->slotuse; i >= 0; i--)
2807 right->slotkey[i + shiftnum] = right->slotkey[i];
2808 right->slotdata[i + shiftnum] = right->slotdata[i];
2809 right->weights[i + shiftnum] = right->weights[i];
2811 right->slotuse += shiftnum;
2813 // copy the last items from the left node to the first slot in the right node.
2814 for(unsigned int i = 0; i < shiftnum; i++)
2816 right->slotkey[i] = left->slotkey[left->slotuse - shiftnum + i];
2817 right->slotdata[i] = left->slotdata[left->slotuse - shiftnum + i];
2818 right->weights[i] = left->weights[left->slotuse - shiftnum + i];
2819 right->weight += left->weights[left->slotuse - shiftnum + i];
2820 left->weight -= left->weights[left->slotuse - shiftnum + i];
2822 left->slotuse -= shiftnum;
2824 parent->slotkey[parentslot] = left->slotkey[left->slotuse-1];
2827 /// Balance two inner nodes. The function moves key/data pairs from left to
2828 /// right so that both nodes are equally filled. The parent node is updated
2830 static void shift_right_inner(inner_node *left, inner_node *right, inner_node *parent, unsigned int parentslot)
2832 BTREE_ASSERT(left->level == right->level);
2833 BTREE_ASSERT(parent->level == left->level + 1);
2835 BTREE_ASSERT(left->slotuse > right->slotuse);
2836 BTREE_ASSERT(parent->childid[parentslot] == left);
2838 unsigned int shiftnum = (left->slotuse - right->slotuse) / 2;
2840 BTREE_PRINT("Shifting (leaf) " << shiftnum << " entries to right " << right << " from left " << left << " with common parent " << parent << "." << std::endl);
2844 // find the left node's slot in the parent's children
2845 unsigned int leftslot = 0;
2846 while(leftslot <= parent->slotuse && parent->childid[leftslot] != left)
2849 BTREE_ASSERT(leftslot < parent->slotuse);
2850 BTREE_ASSERT(parent->childid[leftslot] == left);
2851 BTREE_ASSERT(parent->childid[leftslot+1] == right);
2853 BTREE_ASSERT(leftslot == parentslot);
2856 // shift all slots in the right node
2858 BTREE_ASSERT(right->slotuse + shiftnum < innerslotmax);
2860 right->childid[right->slotuse + shiftnum] = right->childid[right->slotuse];
2861 for(int i = right->slotuse-1; i >= 0; i--)
2863 right->slotkey[i + shiftnum] = right->slotkey[i];
2864 right->childid[i + shiftnum] = right->childid[i];
2867 right->slotuse += shiftnum;
2869 // copy the parent's decision slotkey and childid to the last new key on the right
2870 right->slotkey[shiftnum - 1] = parent->slotkey[parentslot];
2871 right->childid[shiftnum - 1] = left->childid[left->slotuse];
2872 right->weight += left->childid[left->slotuse]->weight;
2873 left->weight -= left->childid[left->slotuse]->weight;
2875 // copy the remaining last items from the left node to the first slot in the right node.
2876 for(unsigned int i = 0; i < shiftnum - 1; i++)
2878 right->slotkey[i] = left->slotkey[left->slotuse - shiftnum + i + 1];
2879 right->childid[i] = left->childid[left->slotuse - shiftnum + i + 1];
2880 right->weight += left->childid[left->slotuse - shiftnum + i + 1]->weight;
2881 left->weight -= left->childid[left->slotuse - shiftnum + i + 1]->weight;
2884 // copy the first to-be-removed key from the left node to the parent's decision slot
2885 parent->slotkey[parentslot] = left->slotkey[left->slotuse - shiftnum];
2887 left->slotuse -= shiftnum;
2892 // *** Debug Printing
2894 /// Print out the B+ tree structure with keys onto the given ostream. This
2895 /// function requires that the header is compiled with BTREE_DEBUG and that
2896 /// key_type is printable via std::ostream.
2897 void print(std::ostream &os) const
2900 print_node(os, root, 0, true);
2904 /// Print out only the leaves via the double linked list.
2905 void print_leaves(std::ostream &os) const
2907 os << "leaves:" << std::endl;
2909 const leaf_node *n = headleaf;
2913 os << " " << n << std::endl;
2921 /// Recursively descend down the tree and print out nodes.
2922 static void print_node(std::ostream &os, const node* node, unsigned int depth=0, bool recursive=false)
2924 for(unsigned int i = 0; i < depth; i++) os << " ";
2926 os << "node " << node << " level " << node->level << " weight " << node->weight << " slotuse " << node->slotuse << std::endl;
2928 if (node->isleafnode())
2930 const leaf_node *leafnode = static_cast<const leaf_node*>(node);
2932 for(unsigned int i = 0; i < depth; i++) os << " ";
2933 os << " leaf prev " << leafnode->prevleaf << " next " << leafnode->nextleaf << std::endl;
2935 for(unsigned int i = 0; i < depth; i++) os << " ";
2937 for (unsigned int slot = 0; slot < leafnode->slotuse; ++slot)
2939 os << leafnode->slotkey[slot] << " "; // << "(data: " << leafnode->slotdata[slot] << ") ";
2945 const inner_node *innernode = static_cast<const inner_node*>(node);
2947 for(unsigned int i = 0; i < depth; i++) os << " ";
2949 for (unsigned short slot = 0; slot < innernode->slotuse; ++slot)
2951 os << "(" << innernode->childid[slot] << ") " << innernode->slotkey[slot] << " ";
2953 os << "(" << innernode->childid[innernode->slotuse] << ")" << std::endl;
2957 for (unsigned short slot = 0; slot < innernode->slotuse + 1; ++slot)
2959 print_node(os, innernode->childid[slot], depth + 1, recursive);
2967 // *** Verification of B+ Tree Invariants
2969 /// Run a thorough verification of all B+ tree invariants. The program
2970 /// aborts via assert() if something is wrong.
2973 key_type minkey, maxkey;
2978 verify_node(root, &minkey, &maxkey, vstats);
2980 assert( vstats.itemcount == stats.itemcount );
2981 assert( vstats.leaves == stats.leaves );
2982 assert( vstats.innernodes == stats.innernodes );
2990 /// Recursively descend down the tree and verify each node
2991 void verify_node(const node* n, key_type* minkey, key_type* maxkey, tree_stats &vstats) const
2993 BTREE_PRINT("verifynode " << n << std::endl);
2995 if (n->isleafnode())
2997 const leaf_node *leaf = static_cast<const leaf_node*>(n);
2999 assert(leaf == root || !leaf->isunderflow());
3001 for(unsigned short slot = 0; slot < leaf->slotuse - 1; ++slot)
3003 assert(key_lessequal(leaf->slotkey[slot], leaf->slotkey[slot + 1]));
3006 *minkey = leaf->slotkey[0];
3007 *maxkey = leaf->slotkey[leaf->slotuse - 1];
3010 vstats.itemcount += leaf->slotuse;
3012 else // !n->isleafnode()
3014 const inner_node *inner = static_cast<const inner_node*>(n);
3015 vstats.innernodes++;
3017 assert(inner == root || !inner->isunderflow());
3019 for(unsigned short slot = 0; slot < inner->slotuse - 1; ++slot)
3021 assert(key_lessequal(inner->slotkey[slot], inner->slotkey[slot + 1]));
3024 for(unsigned short slot = 0; slot <= inner->slotuse; ++slot)
3026 const node *subnode = inner->childid[slot];
3027 key_type subminkey = key_type();
3028 key_type submaxkey = key_type();
3030 assert(subnode->level + 1 == inner->level);
3031 verify_node(subnode, &subminkey, &submaxkey, vstats);
3033 BTREE_PRINT("verify subnode " << subnode << ": " << subminkey << " - " << submaxkey << std::endl);
3036 *minkey = subminkey;
3038 assert(key_greaterequal(subminkey, inner->slotkey[slot-1]));
3040 if (slot == inner->slotuse)
3041 *maxkey = submaxkey;
3043 assert(key_equal(inner->slotkey[slot], submaxkey));
3045 if (inner->level == 1 && slot < inner->slotuse)
3047 // children are leaves and must be linked together in the
3049 const leaf_node *leafa = static_cast<const leaf_node*>(inner->childid[slot]);
3050 const leaf_node *leafb = static_cast<const leaf_node*>(inner->childid[slot + 1]);
3052 assert(leafa->nextleaf == leafb);
3053 assert(leafa == leafb->prevleaf);
3054 (void)leafa; (void)leafb;
3056 if (inner->level == 2 && slot < inner->slotuse)
3058 // verify leaf links between the adjacent inner nodes
3059 const inner_node *parenta = static_cast<const inner_node*>(inner->childid[slot]);
3060 const inner_node *parentb = static_cast<const inner_node*>(inner->childid[slot+1]);
3062 const leaf_node *leafa = static_cast<const leaf_node*>(parenta->childid[parenta->slotuse]);
3063 const leaf_node *leafb = static_cast<const leaf_node*>(parentb->childid[0]);
3065 assert(leafa->nextleaf == leafb);
3066 assert(leafa == leafb->prevleaf);
3067 (void)leafa; (void)leafb;
3073 /// Verify the double linked list of leaves.
3074 void verify_leaflinks() const
3076 const leaf_node *n = headleaf;
3078 assert(n->level == 0);
3079 assert(!n || n->prevleaf == NULL);
3081 unsigned int testcount = 0;
3085 assert(n->level == 0);
3087 for(unsigned short slot = 0; slot < n->slotuse - 1; ++slot)
3089 assert(key_lessequal(n->slotkey[slot], n->slotkey[slot + 1]));
3092 testcount += n->slotuse;
3096 assert(key_lessequal(n->slotkey[n->slotuse-1], n->nextleaf->slotkey[0]));
3098 assert(n == n->nextleaf->prevleaf);
3102 assert(tailleaf == n);
3108 assert(testcount == size());
3112 // *** Dump and Restore of B+ Trees
3114 /// \internal A header for the binary image containing the base properties
3115 /// of the B+ tree. These properties have to match the current template
3119 /// "stx-btree", just to stop the restore() function from loading garbage
3123 unsigned short version;
3125 /// sizeof(key_type)
3126 unsigned short key_type_size;
3128 /// sizeof(data_type)
3129 unsigned short data_type_size;
3131 /// Number of slots in the leaves
3132 unsigned short leafslots;
3134 /// Number of slots in the inner nodes
3135 unsigned short innerslots;
3137 /// Allow duplicates
3138 bool allow_duplicates;
3140 /// The item count of the tree
3141 size_type itemcount;
3143 /// Fill the struct with the current B+ tree's properties, itemcount is
3147 // don't want to include string.h just for this signature
3148 *reinterpret_cast<unsigned int*>(signature+0) = 0x2d787473;
3149 *reinterpret_cast<unsigned int*>(signature+4) = 0x65727462;
3150 *reinterpret_cast<unsigned int*>(signature+8) = 0x00000065;
3153 key_type_size = sizeof(typename btree_self::key_type);
3154 data_type_size = sizeof(typename btree_self::data_type);
3155 leafslots = btree_self::leafslotmax;
3156 innerslots = btree_self::innerslotmax;
3157 allow_duplicates = btree_self::allow_duplicates;
3160 /// Returns true if the headers have the same vital properties
3161 inline bool same(const struct dump_header &o) const
3163 return (*reinterpret_cast<const unsigned int*>(signature+0) ==
3164 *reinterpret_cast<const unsigned int*>(o.signature+0))
3165 && (*reinterpret_cast<const unsigned int*>(signature+4) ==
3166 *reinterpret_cast<const unsigned int*>(o.signature+4))
3167 && (*reinterpret_cast<const unsigned int*>(signature+8) ==
3168 *reinterpret_cast<const unsigned int*>(o.signature+8))
3170 && (version == o.version)
3171 && (key_type_size == o.key_type_size)
3172 && (data_type_size == o.data_type_size)
3173 && (leafslots == o.leafslots)
3174 && (innerslots == o.innerslots)
3175 && (allow_duplicates == o.allow_duplicates);
3181 /// Dump the contents of the B+ tree out onto an ostream as a binary
3182 /// image. The image contains memory pointers which will be fixed when the
3183 /// image is restored. For this to work your key_type and data_type must be
3184 /// integral types and contain no pointers or references.
3185 void dump(std::ostream &os) const
3187 struct dump_header header;
3189 header.itemcount = size();
3191 os.write(reinterpret_cast<char*>(&header), sizeof(header));
3194 dump_node(os, root);
3197 /// Restore a binary image of a dumped B+ tree from an istream. The B+ tree
3198 /// pointers are fixed using the dump order. For dump and restore to work
3199 /// your key_type and data_type must be integral types and contain no
3200 /// pointers or references. Returns true if the restore was successful.
3201 bool restore(std::istream &is)
3203 struct dump_header fileheader;
3204 is.read(reinterpret_cast<char*>(&fileheader), sizeof(fileheader));
3205 if (!is.good()) return false;
3207 struct dump_header myheader;
3209 myheader.itemcount = fileheader.itemcount;
3211 if (!myheader.same(fileheader))
3213 BTREE_PRINT("btree::restore: file header does not match instantiation signature." << std::endl);
3219 if (fileheader.itemcount > 0)
3221 root = restore_node(is);
3222 if (root == NULL) return false;
3224 stats.itemcount = fileheader.itemcount;
3228 if (debug) print(std::cout);
3230 if (selfverify) verify();
3237 /// Recursively descend down the tree and dump each node in a precise order
3238 void dump_node(std::ostream &os, const node* n) const
3240 BTREE_PRINT("dump_node " << n << std::endl);
3242 if (n->isleafnode())
3244 const leaf_node *leaf = static_cast<const leaf_node*>(n);
3246 os.write(reinterpret_cast<const char*>(leaf), sizeof(*leaf));
3248 else // !n->isleafnode()
3250 const inner_node *inner = static_cast<const inner_node*>(n);
3252 os.write(reinterpret_cast<const char*>(inner), sizeof(*inner));
3254 for(unsigned short slot = 0; slot <= inner->slotuse; ++slot)
3256 const node *subnode = inner->childid[slot];
3258 dump_node(os, subnode);
3263 /// Read the dump image and construct a tree from the node order in the
3265 node* restore_node(std::istream &is)
3273 // first read only the top of the node
3274 is.read(reinterpret_cast<char*>(&nu.top), sizeof(nu.top));
3275 if (!is.good()) return NULL;
3277 if (nu.top.isleafnode())
3279 // read remaining data of leaf node
3280 is.read(reinterpret_cast<char*>(&nu.leaf) + sizeof(nu.top), sizeof(nu.leaf) - sizeof(nu.top));
3281 if (!is.good()) return NULL;
3283 leaf_node *newleaf = allocate_leaf();
3285 // copy over all data, the leaf nodes contain only their double linked list pointers
3288 // reconstruct the linked list from the order in the file
3289 if (headleaf == NULL) {
3290 BTREE_ASSERT(newleaf->prevleaf == NULL);
3291 headleaf = tailleaf = newleaf;
3294 newleaf->prevleaf = tailleaf;
3295 tailleaf->nextleaf = newleaf;
3303 // read remaining data of inner node
3304 is.read(reinterpret_cast<char*>(&nu.inner) + sizeof(nu.top), sizeof(nu.inner) - sizeof(nu.top));
3305 if (!is.good()) return NULL;
3307 inner_node *newinner = allocate_inner(0);
3309 // copy over all data, the inner nodes contain only pointers to their children
3310 *newinner = nu.inner;
3312 for(unsigned short slot = 0; slot <= newinner->slotuse; ++slot)
3314 newinner->childid[slot] = restore_node(is);
3324 #endif // _STX_RA_BTREE_H_