Skip to content

B Tree

A B-Tree is a self-balancing search tree designed to handle large amounts of data and minimize disk I/O operations. It is widely used in databases and file systems due to its efficiency in maintaining sorted data and supporting multi-way search.


Properties of B-Tree

  1. Balanced Structure:
    1. Every leaf node is at the same depth.
  2. Node Structure:
    1. Each node can contain multiple keys.
    1. Each node can have a maximum of MMM children (determined by the order of the B-Tree, MMM).
  3. Key Distribution:
    1. Keys in a node are stored in sorted order.
    1. For a node with nnn keys:
      1. Keys in the left subtree are smaller.
      1. Keys in the right subtree are larger.
  4. Node Capacity:
    1. A node can have at most M−1M – 1M−1 keys and at least ⌈M/2⌉−1\lceil M/2 \rceil – 1⌈M/2⌉−1 keys (except the root).
  5. Search Complexity:
    1. Search, insertion, and deletion are all O(log⁡n)O(\log n)O(logn).

B-Tree Terminology

  • Order (MMM): The maximum number of children a node can have.
  • Internal Node: A node that is not a leaf.
  • Leaf Node: A node without children.
  • Root Node: The topmost node in the tree.

Operations on B-Tree

1. Search

  • Search starts at the root and proceeds by comparing keys in each node.
  • If the key is found, the operation stops.
  • Otherwise, the appropriate subtree is selected based on the key value.

2. Insertion

  • Keys are inserted into a node until the node is full.
  • If a node becomes full, it is split into two nodes, and the middle key is promoted to the parent node.

3. Deletion

  • Keys are deleted while ensuring the tree remains balanced.
  • Rebalancing is performed by borrowing keys from sibling nodes or merging nodes.

Node Structure in C

#define MAX 3 // Maximum degree (order) of the B-Tree

#define MIN (MAX / 2) // Minimum degree

struct BTreeNode {

    int keys[MAX + 1]; // Array to store keys

    struct BTreeNode* children[MAX + 2]; // Array to store child pointers

    int numKeys; // Number of keys currently in the node

    int isLeaf; // 1 if the node is a leaf, 0 otherwise

};


Implementation of B-Tree in C

Below is a simplified implementation of a B-Tree with insertion and search operations:

1. Create a Node

#include <stdio.h>

#include <stdlib.h>

#define MAX 3

#define MIN (MAX / 2)

struct BTreeNode {

    int keys[MAX + 1];

    struct BTreeNode* children[MAX + 2];

    int numKeys;

    int isLeaf;

};

// Create a new node

struct BTreeNode* createNode(int isLeaf) {

    struct BTreeNode* node = (struct BTreeNode*)malloc(sizeof(struct BTreeNode));

    node->numKeys = 0;

    node->isLeaf = isLeaf;

    for (int i = 0; i < MAX + 2; i++) {

        node->children[i] = NULL;

    }

    return node;

}


2. Search in B-Tree

struct BTreeNode* search(struct BTreeNode* root, int key) {

    if (root == NULL) return NULL;

    int i = 0;

    while (i < root->numKeys && key > root->keys[i]) {

        i++;

    }

    if (i < root->numKeys && key == root->keys[i]) {

        return root; // Key found

    }

    if (root->isLeaf) {

        return NULL; // Key not found

    }

    return search(root->children[i], key);

}


3. Insert into B-Tree

void insertNonFull(struct BTreeNode* node, int key) {

    int i = node->numKeys – 1;

    if (node->isLeaf) {

        // Insert key into the correct position

        while (i >= 0 && key < node->keys[i]) {

            node->keys[i + 1] = node->keys[i];

            i–;

        }

        node->keys[i + 1] = key;

        node->numKeys++;

    } else {

        // Find the correct child for the key

        while (i >= 0 && key < node->keys[i]) {

            i–;

        }

        i++;

        // Split child if it is full

        if (node->children[i]->numKeys == MAX) {

            splitChild(node, i);

            if (key > node->keys[i]) {

                i++;

            }

        }

        insertNonFull(node->children[i], key);

    }

}

void splitChild(struct BTreeNode* parent, int i) {

    struct BTreeNode* fullChild = parent->children[i];

    struct BTreeNode* newChild = createNode(fullChild->isLeaf);

    newChild->numKeys = MIN;

    // Transfer keys to the new child

    for (int j = 0; j < MIN; j++) {

        newChild->keys[j] = fullChild->keys[j + MIN + 1];

    }

    // Transfer children if not a leaf

    if (!fullChild->isLeaf) {

        for (int j = 0; j < MIN + 1; j++) {

            newChild->children[j] = fullChild->children[j + MIN + 1];

        }

    }

    fullChild->numKeys = MIN;

    // Insert new child into parent

    for (int j = parent->numKeys; j >= i + 1; j–) {

        parent->children[j + 1] = parent->children[j];

    }

    parent->children[i + 1] = newChild;

    for (int j = parent->numKeys – 1; j >= i; j–) {

        parent->keys[j + 1] = parent->keys[j];

    }

    parent->keys[i] = fullChild->keys[MIN];

    parent->numKeys++;

}

struct BTreeNode* insert(struct BTreeNode* root, int key) {

    if (root == NULL) {

        root = createNode(1);

        root->keys[0] = key;

        root->numKeys = 1;

        return root;

    }

    if (root->numKeys == MAX) {

        struct BTreeNode* newRoot = createNode(0);

        newRoot->children[0] = root;

        splitChild(newRoot, 0);

        root = newRoot;

        insertNonFull(root, key);

    } else {

        insertNonFull(root, key);

    }

    return root;

}


4. Traverse the B-Tree

void traverse(struct BTreeNode* root) {

    if (root == NULL) return;

    for (int i = 0; i < root->numKeys; i++) {

        if (!root->isLeaf) {

            traverse(root->children[i]);

        }

        printf(“%d “, root->keys[i]);

    }

    if (!root->isLeaf) {

        traverse(root->children[root->numKeys]);

    }

}


Main Function

int main() {

    struct BTreeNode* root = NULL;

    root = insert(root, 10);

    root = insert(root, 20);

    root = insert(root, 5);

    root = insert(root, 6);

    root = insert(root, 12);

    root = insert(root, 30);

    root = insert(root, 7);

    root = insert(root, 17);

    printf(“Traversal of B-Tree:\n”);

    traverse(root);

    return 0;

}


Output

For the sequence 10,20,5,6,12,30,7,1710, 20, 5, 6, 12, 30, 7, 1710,20,5,6,12,30,7,17:

Traversal of B-Tree:

5 6 7 10 12 17 20 30


Advantages of B-Tree

  1. Efficient disk usage due to fewer I/O operations.
  2. Balanced structure ensures O(log⁡n)O(\log n)O(logn) operations.
  3. Supports multiple keys per node, making it suitable for databases.

Disadvantages

  1. Complex implementation compared to binary trees.
  2. May involve frequent rebalancing during insertions and deletions.