Skip to content

B+ Tree

A B+ Tree is an advanced self-balancing tree data structure commonly used in databases and file systems. It is an extension of the B-Tree with improved search performance and better support for range queries. Unlike B-Trees, all the keys in a B+ Tree are stored in the leaf nodes, and internal nodes only serve as an index for navigation.


Properties of B+ Tree

  1. All Keys in Leaf Nodes:
    1. Keys are stored in sorted order in the leaf nodes.
    1. Internal nodes contain only index keys for navigation.
  2. Balanced Structure:
    1. Like B-Trees, all leaf nodes are at the same level.
  3. Linked Leaf Nodes:
    1. Leaf nodes are connected via pointers to allow sequential access for range queries.
  4. Node Capacity:
    1. Each node can have a maximum of M−1M – 1M−1 keys and MMM children (for a tree of order MMM).
  5. Efficient Searching:
    1. Internal nodes direct the search, while all key comparisons occur in the leaf nodes.

Advantages of B+ Tree

  1. Fast Range Queries:
    1. Sequential access through linked leaf nodes.
  2. Efficient Disk I/O:
    1. Used in databases to handle large datasets due to minimal disk access.
  3. Balanced Search Tree:
    1. Ensures O(log⁡n)O(\log n)O(logn) complexity for search, insertion, and deletion.

Node Structure in C

#define MAX 3  // Maximum degree/order of the B+ Tree

#define MIN (MAX + 1) / 2  // Minimum number of keys

struct BPlusNode {

    int keys[MAX + 1];               // Array to store keys

    struct BPlusNode* children[MAX + 2];  // Pointers to child nodes

    struct BPlusNode* next;          // Pointer to the next leaf node (for leaf nodes)

    int isLeaf;                      // 1 if node is a leaf, 0 otherwise

    int numKeys;                     // Number of keys in the node

};


Operations in B+ Tree

1. Search Operation

  • The search begins at the root and navigates down to the appropriate child until a leaf node is reached.
  • The key is located in the leaf node.

struct BPlusNode* search(struct BPlusNode* root, int key) {

    if (root == NULL) return NULL;

    int i = 0;

    while (i < root->numKeys && key > root->keys[i]) {

        i++;

    }

    if (root->isLeaf) {

        if (i < root->numKeys && root->keys[i] == key) {

            return root; // Key found

        } else {

            return NULL; // Key not found

        }

    }

    return search(root->children[i], key);

}


2. Insert Operation

  • Insertions start by finding the appropriate leaf node.
  • If the leaf node is full, it is split into two nodes, and the middle key is promoted to the parent.
  • Splits propagate up the tree as necessary.

Split a Node

void splitChild(struct BPlusNode* parent, int i, struct BPlusNode* child) {

    struct BPlusNode* newChild = createNode(child->isLeaf);

    newChild->numKeys = MIN – 1;

    for (int j = 0; j < MIN – 1; j++) {

        newChild->keys[j] = child->keys[j + MIN];

    }

    if (!child->isLeaf) {

        for (int j = 0; j < MIN; j++) {

            newChild->children[j] = child->children[j + MIN];

        }

    }

    child->numKeys = MIN – 1;

    for (int j = parent->numKeys; j >= i + 1; j–) {

        parent->children[j + 1] = parent->children[j];

    }

    parent->children[i + 1] = newChild;

    for (int j = parent->numKeys – 1; j >= i; j–) {

        parent->keys[j + 1] = parent->keys[j];

    }

    parent->keys[i] = child->keys[MIN – 1];

    parent->numKeys++;

}

Insert a Key

void insertNonFull(struct BPlusNode* node, int key) {

    int i = node->numKeys – 1;

    if (node->isLeaf) {

        while (i >= 0 && key < node->keys[i]) {

            node->keys[i + 1] = node->keys[i];

            i–;

        }

        node->keys[i + 1] = key;

        node->numKeys++;

    } else {

        while (i >= 0 && key < node->keys[i]) {

            i–;

        }

        i++;

        if (node->children[i]->numKeys == MAX) {

            splitChild(node, i, node->children[i]);

            if (key > node->keys[i]) {

                i++;

            }

        }

        insertNonFull(node->children[i], key);

    }

}

struct BPlusNode* insert(struct BPlusNode* root, int key) {

    if (root == NULL) {

        root = createNode(1);

        root->keys[0] = key;

        root->numKeys = 1;

        return root;

    }

    if (root->numKeys == MAX) {

        struct BPlusNode* newRoot = createNode(0);

        newRoot->children[0] = root;

        splitChild(newRoot, 0, root);

        root = newRoot;

    }

    insertNonFull(root, key);

    return root;

}


3. Traversal

Traverse the leaf nodes sequentially for range queries or all keys.

void traverse(struct BPlusNode* root) {

    if (root == NULL) return;

    if (root->isLeaf) {

        for (int i = 0; i < root->numKeys; i++) {

            printf(“%d “, root->keys[i]);

        }

        return;

    }

    for (int i = 0; i < root->numKeys; i++) {

        traverse(root->children[i]);

        printf(“%d “, root->keys[i]);

    }

    traverse(root->children[root->numKeys]);

}


Main Function

#include <stdio.h>

#include <stdlib.h>

#define MAX 3

// Node structure

struct BPlusNode {

    int keys[MAX + 1];

    struct BPlusNode* children[MAX + 2];

    struct BPlusNode* next; // Next pointer for leaf nodes

    int isLeaf;

    int numKeys;

};

// Function prototypes

struct BPlusNode* createNode(int isLeaf);

struct BPlusNode* insert(struct BPlusNode* root, int key);

void traverse(struct BPlusNode* root);

int main() {

    struct BPlusNode* root = NULL;

    root = insert(root, 10);

    root = insert(root, 20);

    root = insert(root, 5);

    root = insert(root, 6);

    root = insert(root, 12);

    root = insert(root, 30);

    root = insert(root, 7);

    root = insert(root, 17);

    printf(“Traversal of B+ Tree:\n”);

    traverse(root);

    return 0;

}


Output

For the input sequence 10,20,5,6,12,30,7,1710, 20, 5, 6, 12, 30, 7, 1710,20,5,6,12,30,7,17, the traversal outputs:

Traversal of B+ Tree:

5 6 7 10 12 17 20 30


Applications of B+ Tree

  1. Databases:
    1. Efficient indexing and range queries.
  2. File Systems:
    1. Directory management and file organization.
  3. Search Engines:
    1. Used for high-performance search and retrieval.