The data structures that separate Div 2 solvers from Div 1 solvers. Fenwick trees, segment trees with lazy propagation, sparse tables, HLD, centroid decomposition, treaps, and more -- with full implementations in Python and C++.
A Fenwick tree (BIT) supports point updates and prefix queries in O(log n) with minimal memory. The key insight: every index i is responsible for a range of elements determined by the lowest set bit of i.
lowbit(x) = x & (-x) -- isolates the lowest set bit
Update(i, delta): add delta to index i, then i += lowbit(i) until i > n
Query(i): sum from [1..i], subtract lowbit(i) from i until i == 0
Both operations: O(log n) time, O(n) space
C++
struct BIT {
int n;
vector<long long> tree;
BIT(int n) : n(n), tree(n + 1, 0) {}
void update(int i, long long delta) {
for (; i <= n; i += i & (-i))
tree[i] += delta;
}
long long query(int i) {
long long s = 0;
for (; i > 0; i -= i & (-i))
s += tree[i];
return s;
}
long long query(int l, int r) {
return query(r) - query(l - 1);
}
};
Python
class BIT:
def __init__(self, n):
self.n = n
self.tree = [0] * (n + 1)
def update(self, i, delta):
while i <= self.n:
self.tree[i] += delta
i += i & (-i)
def query(self, i):
s = 0
while i > 0:
s += self.tree[i]
i -= i & (-i)
return s
def range_query(self, l, r):
return self.query(r) - self.query(l - 1)
Fenwick trees use half the memory (n+1 vs 4n), have smaller constants, and the code is much shorter. Use them whenever you only need prefix sums + point updates. Switch to a segment tree when you need lazy propagation or non-invertible operations (like min/max).
To support range updates [l, r] += val and point queries, use a difference array approach: update(l, val) and update(r+1, -val). Then query(i) gives the value at index i.
C++
// Range update [l, r] += val, point query at i
void range_update(BIT& bit, int l, int r, long long val) {
bit.update(l, val);
bit.update(r + 1, -val);
}
// query(i) now returns the actual value at position i
Maintain two BITs: B1 and B2. For range update [l, r] += val:
B1.update(l, val), B1.update(r+1, -val)
B2.update(l, val*(l-1)), B2.update(r+1, -val*r)
prefix_sum(i) = B1.query(i) * i - B2.query(i)
C++
struct BIT2D {
int N, M;
vector<vector<long long>> tree;
BIT2D(int n, int m) : N(n), M(m), tree(n + 1, vector<long long>(m + 1, 0)) {}
void update(int x, int y, long long val) {
for (int i = x; i <= N; i += i & (-i))
for (int j = y; j <= M; j += j & (-j))
tree[i][j] += val;
}
long long query(int x, int y) {
long long s = 0;
for (int i = x; i > 0; i -= i & (-i))
for (int j = y; j > 0; j -= j & (-j))
s += tree[i][j];
return s;
}
long long query(int x1, int y1, int x2, int y2) {
return query(x2, y2) - query(x1 - 1, y2)
- query(x2, y1 - 1) + query(x1 - 1, y1 - 1);
}
};
Python
class BIT2D:
def __init__(self, n, m):
self.N, self.M = n, m
self.tree = [[0] * (m + 1) for _ in range(n + 1)]
def update(self, x, y, val):
i = x
while i <= self.N:
j = y
while j <= self.M:
self.tree[i][j] += val
j += j & (-j)
i += i & (-i)
def query(self, x, y):
s = 0
i = x
while i > 0:
j = y
while j > 0:
s += self.tree[i][j]
j -= j & (-j)
i -= i & (-i)
return s
Index: 1 2 3 4 5 6 7 8
lowbit: 1 2 1 4 1 2 1 8
Responsibility ranges:
tree[1] = a[1]
tree[2] = a[1..2]
tree[3] = a[3]
tree[4] = a[1..4]
tree[5] = a[5]
tree[6] = a[5..6]
tree[7] = a[7]
tree[8] = a[1..8]
query(7) = tree[7] + tree[6] + tree[4]
= a[7] + a[5..6] + a[1..4]
= a[1..7] (remove lowest bit each step: 7->6->4->0)
The most versatile range query data structure in competitive programming. Supports arbitrary associative operations, range updates with lazy propagation, and can be made persistent.
Build: O(n) | Update: O(log n) | Query: O(log n) | Space: O(4n)
C++
struct SegTree {
int n;
vector<long long> tree;
SegTree(int n) : n(n), tree(4 * n, 0) {}
void build(vector<int>& a, int v, int tl, int tr) {
if (tl == tr) {
tree[v] = a[tl];
return;
}
int tm = (tl + tr) / 2;
build(a, 2*v, tl, tm);
build(a, 2*v+1, tm+1, tr);
tree[v] = tree[2*v] + tree[2*v+1];
}
void update(int v, int tl, int tr, int pos, int val) {
if (tl == tr) {
tree[v] = val;
return;
}
int tm = (tl + tr) / 2;
if (pos <= tm)
update(2*v, tl, tm, pos, val);
else
update(2*v+1, tm+1, tr, pos, val);
tree[v] = tree[2*v] + tree[2*v+1];
}
long long query(int v, int tl, int tr, int l, int r) {
if (l > r) return 0;
if (l == tl && r == tr) return tree[v];
int tm = (tl + tr) / 2;
return query(2*v, tl, tm, l, min(r, tm))
+ query(2*v+1, tm+1, tr, max(l, tm+1), r);
}
};
Python
class SegTree:
def __init__(self, n):
self.n = n
self.tree = [0] * (4 * n)
def build(self, a, v, tl, tr):
if tl == tr:
self.tree[v] = a[tl]
return
tm = (tl + tr) // 2
self.build(a, 2*v, tl, tm)
self.build(a, 2*v+1, tm+1, tr)
self.tree[v] = self.tree[2*v] + self.tree[2*v+1]
def update(self, v, tl, tr, pos, val):
if tl == tr:
self.tree[v] = val
return
tm = (tl + tr) // 2
if pos <= tm:
self.update(2*v, tl, tm, pos, val)
else:
self.update(2*v+1, tm+1, tr, pos, val)
self.tree[v] = self.tree[2*v] + self.tree[2*v+1]
def query(self, v, tl, tr, l, r):
if l > r:
return 0
if l == tl and r == tr:
return self.tree[v]
tm = (tl + tr) // 2
return (self.query(2*v, tl, tm, l, min(r, tm))
+ self.query(2*v+1, tm+1, tr, max(l, tm+1), r))
[11] node 1: sum(0..3)
/ \
[3] [8] node 2: sum(0..1), node 3: sum(2..3)
/ \ / \
[2] [1] [5] [3] leaves: a[0], a[1], a[2], a[3]
query(1, 2) = go left child right + go right child left
= tree[5]=1 + tree[6]=5 = 6
C++
struct LazySegTree {
int n;
vector<long long> tree, lazy;
LazySegTree(int n) : n(n), tree(4*n, 0), lazy(4*n, 0) {}
void push(int v, int tl, int tr) {
if (lazy[v]) {
int tm = (tl + tr) / 2;
apply(2*v, tl, tm, lazy[v]);
apply(2*v+1, tm+1, tr, lazy[v]);
lazy[v] = 0;
}
}
void apply(int v, int tl, int tr, long long val) {
tree[v] += val * (tr - tl + 1);
lazy[v] += val;
}
void update(int v, int tl, int tr, int l, int r, long long val) {
if (l > r) return;
if (l == tl && r == tr) {
apply(v, tl, tr, val);
return;
}
push(v, tl, tr);
int tm = (tl + tr) / 2;
update(2*v, tl, tm, l, min(r, tm), val);
update(2*v+1, tm+1, tr, max(l, tm+1), r, val);
tree[v] = tree[2*v] + tree[2*v+1];
}
long long query(int v, int tl, int tr, int l, int r) {
if (l > r) return 0;
if (l == tl && r == tr) return tree[v];
push(v, tl, tr);
int tm = (tl + tr) / 2;
return query(2*v, tl, tm, l, min(r, tm))
+ query(2*v+1, tm+1, tr, max(l, tm+1), r);
}
};
Always push lazy values BEFORE recursing into children during both update and query. Forgetting to push is the #1 bug. Also make sure your apply function updates BOTH the tree node AND the lazy tag.
Create a new version of the tree on each update by only copying the O(log n) nodes on the path from root to the updated leaf. All other nodes are shared between versions.
C++
struct Node {
int left, right;
long long val;
};
const int MAXNODES = 20000000;
Node nodes[MAXNODES];
int cnt = 0;
int newNode(long long v = 0, int l = 0, int r = 0) {
nodes[cnt] = {l, r, v};
return cnt++;
}
int build(int tl, int tr) {
if (tl == tr) return newNode(0);
int tm = (tl + tr) / 2;
int l = build(tl, tm);
int r = build(tm + 1, tr);
return newNode(nodes[l].val + nodes[r].val, l, r);
}
int update(int v, int tl, int tr, int pos, long long val) {
if (tl == tr) return newNode(nodes[v].val + val);
int tm = (tl + tr) / 2;
int l = nodes[v].left, r = nodes[v].right;
if (pos <= tm)
l = update(l, tl, tm, pos, val);
else
r = update(r, tm + 1, tr, pos, val);
return newNode(nodes[l].val + nodes[r].val, l, r);
}
long long query(int v, int tl, int tr, int l, int r) {
if (l > r) return 0;
if (l == tl && r == tr) return nodes[v].val;
int tm = (tl + tr) / 2;
return query(nodes[v].left, tl, tm, l, min(r, tm))
+ query(nodes[v].right, tm+1, tr, max(l, tm+1), r);
}
A static data structure for answering range minimum/maximum/GCD queries in O(1) after O(n log n) preprocessing. Works for any idempotent operation (where f(a, a) = a).
sparse[k][i] = answer for the range [i, i + 2^k - 1]
sparse[k][i] = op(sparse[k-1][i], sparse[k-1][i + 2^(k-1)])
Query [l, r]: k = floor(log2(r - l + 1)), answer = op(sparse[k][l], sparse[k][r - 2^k + 1])
The two ranges overlap, but for idempotent ops that is fine.
C++
struct SparseTable {
int n, LOG;
vector<vector<int>> table;
vector<int> lg;
SparseTable(vector<int>& a) {
n = a.size();
LOG = __lg(n) + 1;
table.assign(LOG, vector<int>(n));
lg.assign(n + 1, 0);
for (int i = 2; i <= n; i++)
lg[i] = lg[i / 2] + 1;
for (int i = 0; i < n; i++)
table[0][i] = a[i];
for (int k = 1; k < LOG; k++)
for (int i = 0; i + (1 << k) <= n; i++)
table[k][i] = min(table[k-1][i],
table[k-1][i + (1 << (k-1))]);
}
int query(int l, int r) {
int k = lg[r - l + 1];
return min(table[k][l], table[k][r - (1 << k) + 1]);
}
};
Python
import math
class SparseTable:
def __init__(self, a):
self.n = len(a)
self.LOG = self.n.bit_length()
self.table = [list(a)]
for k in range(1, self.LOG):
prev = self.table[k - 1]
half = 1 << (k - 1)
row = []
for i in range(self.n - (1 << k) + 1):
row.append(min(prev[i], prev[i + half]))
self.table.append(row)
def query(self, l, r):
k = (r - l + 1).bit_length() - 1
return min(self.table[k][l], self.table[k][r - (1 << k) + 1])
k=0: [3] [1] [4] [1] [5] [9] [2] [6] (individual elements) k=1: [1] [1] [1] [1] [5] [2] [2] -- (min of pairs) k=2: [1] [1] [1] [1] [2] [2] -- -- (min of quads) k=3: [1] [1] [1] [1] -- -- -- -- (min of octets) query(2, 6): k = floor(log2(5)) = 2 = min(table[2][2], table[2][6 - 4 + 1]) = min(table[2][2], table[2][3]) = min(1, 1) = 1
Sparse table only works for static arrays (no updates). If you need updates, use a segment tree instead. Also, for non-idempotent operations like sum, the overlapping ranges trick does not work -- you would need a different query strategy (Disjoint Sparse Table) or just use a segment tree.
Union-Find with path compression + union by rank gives near O(1) amortized per operation (technically O(alpha(n)) where alpha is the inverse Ackermann function). Advanced variants add rollback and weighted edges.
Union by rank + path compression: O(alpha(n)) per operation (effectively O(1))
Union by rank only (no path compression): O(log n) per operation -- but supports rollback
C++
struct DSU {
vector<int> parent, rank_;
DSU(int n) : parent(n), rank_(n, 0) {
iota(parent.begin(), parent.end(), 0);
}
int find(int x) {
while (x != parent[x])
x = parent[x] = parent[parent[x]]; // path compression (halving)
return x;
}
bool unite(int a, int b) {
a = find(a); b = find(b);
if (a == b) return false;
if (rank_[a] < rank_[b]) swap(a, b);
parent[b] = a;
if (rank_[a] == rank_[b]) rank_[a]++;
return true;
}
};
Python
class DSU:
def __init__(self, n):
self.parent = list(range(n))
self.rank = [0] * n
def find(self, x):
while self.parent[x] != x:
self.parent[x] = self.parent[self.parent[x]]
x = self.parent[x]
return x
def unite(self, a, b):
a, b = self.find(a), self.find(b)
if a == b:
return False
if self.rank[a] < self.rank[b]:
a, b = b, a
self.parent[b] = a
if self.rank[a] == self.rank[b]:
self.rank[a] += 1
return True
For problems that require undoing unions (e.g., offline divide and conquer), use union by rank WITHOUT path compression and maintain a stack of changes.
C++
struct RollbackDSU {
vector<int> parent, rank_;
vector<pair<int,int>> history; // (node, old_parent_or_rank)
RollbackDSU(int n) : parent(n), rank_(n, 0) {
iota(parent.begin(), parent.end(), 0);
}
int find(int x) {
while (x != parent[x]) x = parent[x]; // NO path compression
return x;
}
int snapshot() { return history.size(); }
bool unite(int a, int b) {
a = find(a); b = find(b);
if (a == b) return false;
if (rank_[a] < rank_[b]) swap(a, b);
history.push_back({b, parent[b]});
history.push_back({~a, rank_[a]}); // ~a marks rank entry
parent[b] = a;
if (rank_[a] == rank_[b]) rank_[a]++;
return true;
}
void rollback(int snap) {
while ((int)history.size() > snap) {
auto [node, val] = history.back();
history.pop_back();
if (node < 0) rank_[~node] = val;
else parent[node] = val;
}
}
};
Each node stores a weight/distance relative to its parent. Useful for problems like "is the distance between a and b consistent with given constraints?"
C++
struct WeightedDSU {
vector<int> parent, rank_;
vector<long long> diff; // diff[x] = weight(x) - weight(parent[x])
WeightedDSU(int n) : parent(n), rank_(n, 0), diff(n, 0) {
iota(parent.begin(), parent.end(), 0);
}
pair<int, long long> find(int x) {
if (x == parent[x]) return {x, 0};
auto [root, d] = find(parent[x]);
parent[x] = root;
diff[x] += d;
return {root, diff[x]};
}
// unite with constraint: weight(a) - weight(b) = w
bool unite(int a, int b, long long w) {
auto [ra, da] = find(a);
auto [rb, db] = find(b);
if (ra == rb) return (da - db) == w; // check consistency
if (rank_[ra] < rank_[rb]) {
swap(ra, rb); swap(da, db); w = -w;
}
parent[rb] = ra;
diff[rb] = da - db - w;
if (rank_[ra] == rank_[rb]) rank_[ra]++;
return true;
}
};
Decomposes a tree into chains so that any root-to-leaf path crosses at most O(log n) chains. Combined with a segment tree on the flattened array, this gives O(log^2 n) path queries and updates.
For each node, the heavy child is the child with the largest subtree. The edge to the heavy child is a heavy edge; all others are light edges.
Any path from root to leaf has at most O(log n) light edges, so at most O(log n) chain switches.
Path query: O(log^2 n) -- O(log n) chains, each queried in O(log n) on the segment tree.
Tree: 1 Heavy edges: 1-2, 2-4, 4-7
/ \ Light edges: 1-3, 2-5, 3-6
2 3
/ \ \ Chains (heavy paths):
4 5 6 Chain 0: [1, 2, 4, 7]
| Chain 1: [3, 6]
7 Chain 2: [5]
Euler tour (HLD order): 1 2 4 7 5 3 6
positions: 0 1 2 3 4 5 6
path_query(5, 6):
5 is in chain [5], head=5 -> query pos[5]=4, climb to parent(5)=2
6 is in chain [3,6], head=3 -> query pos[3..6]=[5,6], climb to parent(3)=1
Now 2 and 1 are in same chain [1,2,4,7] -> query pos[1..2]=[0,1]
Merge all three segment tree queries.
C++
struct HLD {
int n, timer = 0;
vector<vector<int>> adj;
vector<int> parent, depth, heavy, head, pos, sz;
HLD(int n) : n(n), adj(n), parent(n), depth(n),
heavy(n, -1), head(n), pos(n), sz(n) {}
void addEdge(int u, int v) {
adj[u].push_back(v);
adj[v].push_back(u);
}
void dfs_sz(int v, int p, int d) {
parent[v] = p; depth[v] = d; sz[v] = 1;
int maxSz = 0;
for (int u : adj[v]) {
if (u == p) continue;
dfs_sz(u, v, d + 1);
sz[v] += sz[u];
if (sz[u] > maxSz) { maxSz = sz[u]; heavy[v] = u; }
}
}
void dfs_hld(int v, int h) {
head[v] = h; pos[v] = timer++;
if (heavy[v] != -1) dfs_hld(heavy[v], h);
for (int u : adj[v]) {
if (u == parent[v] || u == heavy[v]) continue;
dfs_hld(u, u); // new chain starts
}
}
void init(int root = 0) {
dfs_sz(root, root, 0);
dfs_hld(root, root);
}
// Query path (u, v) using a segment tree
template<typename F>
void path(int u, int v, F&& f) {
while (head[u] != head[v]) {
if (depth[head[u]] < depth[head[v]]) swap(u, v);
f(pos[head[u]], pos[u]); // query segment [pos[head[u]], pos[u]]
u = parent[head[u]];
}
if (depth[u] > depth[v]) swap(u, v);
f(pos[u], pos[v]); // query segment within same chain
}
};
For trees with up to 2*10^5 nodes, recursive DFS may cause a stack overflow (especially on Codeforces with its default stack size). Use iterative DFS or increase the stack size with a pragma/thread trick in C++.
Recursively find the centroid of the tree, remove it, and recurse on each remaining subtree. This creates a "centroid tree" of depth O(log n). Any path in the original tree passes through the LCA in the centroid tree.
The centroid of a tree is a node whose removal splits the tree into components each of size at most n/2.
The centroid tree has depth O(log n).
Every path u-v in the original tree passes through either u's or v's ancestor in the centroid tree.
C++
struct CentroidDecomp {
int n;
vector<vector<int>> adj;
vector<int> sz, par;
vector<bool> removed;
CentroidDecomp(int n) : n(n), adj(n), sz(n), par(n, -1), removed(n, false) {}
void addEdge(int u, int v) {
adj[u].push_back(v);
adj[v].push_back(u);
}
void calc_sz(int v, int p) {
sz[v] = 1;
for (int u : adj[v])
if (u != p && !removed[u]) {
calc_sz(u, v);
sz[v] += sz[u];
}
}
int centroid(int v, int p, int tree_sz) {
for (int u : adj[v])
if (u != p && !removed[u] && sz[u] > tree_sz / 2)
return centroid(u, v, tree_sz);
return v;
}
void build(int v, int p) {
calc_sz(v, -1);
int c = centroid(v, -1, sz[v]);
par[c] = p;
removed[c] = true;
for (int u : adj[c])
if (!removed[u])
build(u, c);
}
void init() { build(0, -1); }
};
Python
import sys
sys.setrecursionlimit(300000)
class CentroidDecomp:
def __init__(self, n):
self.n = n
self.adj = [[] for _ in range(n)]
self.sz = [0] * n
self.par = [-1] * n
self.removed = [False] * n
def add_edge(self, u, v):
self.adj[u].append(v)
self.adj[v].append(u)
def calc_sz(self, v, p):
self.sz[v] = 1
for u in self.adj[v]:
if u != p and not self.removed[u]:
self.calc_sz(u, v)
self.sz[v] += self.sz[u]
def centroid(self, v, p, tree_sz):
for u in self.adj[v]:
if u != p and not self.removed[u] and self.sz[u] > tree_sz // 2:
return self.centroid(u, v, tree_sz)
return v
def build(self, v, p):
self.calc_sz(v, -1)
c = self.centroid(v, -1, self.sz[v])
self.par[c] = p
self.removed[c] = True
for u in self.adj[c]:
if not self.removed[u]:
self.build(u, c)
def init(self):
self.build(0, -1)
Use centroid decomposition for path queries on trees where you need to aggregate over all paths through a node: closest marked node, count of paths with given property, distance queries. For path updates/queries on edges (like "update all edges on path u-v"), HLD is usually a better fit.
A treap is a BST where each node also has a random priority, maintaining heap order on priorities. This ensures O(log n) expected height. An implicit treap uses the position (subtree size) as the implicit key, enabling array-like operations (insert at index, delete at index, reverse a subarray) in O(log n).
Split(t, k): split treap t into two treaps: one with first k elements, one with the rest.
Merge(l, r): merge two treaps where all keys in l < all keys in r.
All operations (insert, delete, reverse, query) are built from split + merge.
C++
mt19937 rng(chrono::steady_clock::now().time_since_epoch().count());
struct Node {
int val, pri, sz;
bool rev;
Node *l, *r;
Node(int v) : val(v), pri(rng()), sz(1), rev(false), l(nullptr), r(nullptr) {}
};
int sz(Node* t) { return t ? t->sz : 0; }
void pull(Node* t) {
if (t) t->sz = 1 + sz(t->l) + sz(t->r);
}
void push(Node* t) {
if (t && t->rev) {
swap(t->l, t->r);
if (t->l) t->l->rev ^= 1;
if (t->r) t->r->rev ^= 1;
t->rev = false;
}
}
void split(Node* t, int k, Node*& l, Node*& r) {
if (!t) { l = r = nullptr; return; }
push(t);
int cur = sz(t->l) + 1;
if (k < cur) {
split(t->l, k, l, t->l);
r = t;
} else {
split(t->r, k - cur, t->r, r);
l = t;
}
pull(t);
}
void merge(Node*& t, Node* l, Node* r) {
if (!l || !r) { t = l ? l : r; return; }
push(l); push(r);
if (l->pri > r->pri) {
merge(l->r, l->r, r);
t = l;
} else {
merge(r->l, l, r->l);
t = r;
}
pull(t);
}
// Reverse subarray [l, r] (0-indexed)
void reverse(Node*& root, int l, int r) {
Node *a, *b, *c;
split(root, l, a, b);
split(b, r - l + 1, b, c);
b->rev ^= 1;
merge(root, a, b);
merge(root, root, c);
}
Python
import random
class TreapNode:
__slots__ = ['val', 'pri', 'sz', 'rev', 'l', 'r']
def __init__(self, val):
self.val = val
self.pri = random.randint(0, (1 << 62) - 1)
self.sz = 1
self.rev = False
self.l = self.r = None
def sz(t):
return t.sz if t else 0
def pull(t):
if t:
t.sz = 1 + sz(t.l) + sz(t.r)
def push(t):
if t and t.rev:
t.l, t.r = t.r, t.l
if t.l: t.l.rev ^= True
if t.r: t.r.rev ^= True
t.rev = False
def split(t, k):
if not t:
return None, None
push(t)
cur = sz(t.l) + 1
if k < cur:
l, t.l = split(t.l, k)
pull(t)
return l, t
else:
t.r, r = split(t.r, k - cur)
pull(t)
return t, r
def merge(l, r):
if not l or not r:
return l or r
push(l); push(r)
if l.pri > r.pri:
l.r = merge(l.r, r)
pull(l)
return l
else:
r.l = merge(l, r.l)
pull(r)
return r
A segment tree specialized for the "convex hull trick" problem: maintain a set of lines y = mx + b and answer "which line gives the minimum (or maximum) y at a given x?" in O(log C) per operation, where C is the coordinate range.
Insert line y = mx + b: O(log C)
Query min/max at x: O(log C)
Works for both online and offline. Does not require lines to be inserted in sorted order (unlike CHT with deque).
C++
struct Line {
long long m, b;
long long eval(long long x) { return m * x + b; }
};
struct LiChaoTree {
int lo, hi;
vector<Line> tree;
LiChaoTree(int lo, int hi) : lo(lo), hi(hi),
tree(4 * (hi - lo + 1), Line{0, LLONG_MAX}) {}
void insert(Line seg, int v, int tl, int tr) {
int tm = (tl + tr) / 2;
bool leftBetter = seg.eval(tl) < tree[v].eval(tl);
bool midBetter = seg.eval(tm) < tree[v].eval(tm);
if (midBetter) swap(tree[v], seg);
if (tl == tr) return;
if (leftBetter != midBetter)
insert(seg, 2*v, tl, tm);
else
insert(seg, 2*v+1, tm+1, tr);
}
void insert(Line seg) { insert(seg, 1, lo, hi); }
long long query(int x, int v, int tl, int tr) {
long long best = tree[v].eval(x);
if (tl == tr) return best;
int tm = (tl + tr) / 2;
if (x <= tm)
return min(best, query(x, 2*v, tl, tm));
else
return min(best, query(x, 2*v+1, tm+1, tr));
}
long long query(int x) { return query(x, 1, lo, hi); }
};
Python
class LiChaoTree:
def __init__(self, lo, hi):
self.lo, self.hi = lo, hi
self.INF = float('inf')
self.tree = {} # sparse: node_id -> (m, b)
def _eval(self, line, x):
if line is None:
return self.INF
return line[0] * x + line[1]
def insert(self, m, b, v=1, tl=None, tr=None):
if tl is None: tl, tr = self.lo, self.hi
seg = (m, b)
cur = self.tree.get(v)
tm = (tl + tr) // 2
left_better = self._eval(seg, tl) < self._eval(cur, tl)
mid_better = self._eval(seg, tm) < self._eval(cur, tm)
if mid_better:
self.tree[v] = seg
seg = cur
if tl == tr:
return
if left_better != mid_better:
self.insert(seg[0], seg[1], 2*v, tl, tm)
elif seg is not None:
self.insert(seg[0], seg[1], 2*v+1, tm+1, tr)
def query(self, x, v=1, tl=None, tr=None):
if tl is None: tl, tr = self.lo, self.hi
best = self._eval(self.tree.get(v), x)
if tl == tr:
return best
tm = (tl + tr) // 2
if x <= tm:
return min(best, self.query(x, 2*v, tl, tm))
else:
return min(best, self.query(x, 2*v+1, tm+1, tr))
Li Chao tree is more general: it handles arbitrary insertion order and supports line segments (not just full lines). The classic CHT with a deque is faster (O(1) amortized) but requires queries in sorted order of x. Use Li Chao when queries come in arbitrary order or when you need to insert line segments.
Divide the array into blocks of size approximately sqrt(n). This enables O(sqrt(n)) per query for many problems. Mo's algorithm is an offline technique that orders range queries to minimize the total number of add/remove operations.
Block size B = sqrt(n). Sort queries by (l/B, r) with alternating direction for even/odd blocks.
Total pointer movements: O((n + q) * sqrt(n))
Works for any problem where you can add/remove an element from the current range in O(1) or O(log n).
Array: [3, 1, 4, 1, 5, 9, 2, 6, 5, 3, 5, 8]
|---------| |---------| |---------|
Block 0 Block 1 Block 2
B = 4 B = 4 B = 4
Block sums: 9 22 21
Point update a[5] += 3:
-> Block 1 sum += 3 (O(1) block update)
Range query sum(2, 9):
-> Partial block 0: a[2]+a[3] = 4+1 = 5
-> Full block 1: 22
-> Partial block 2: a[8]+a[9] = 5+3 = 8
-> Total = 35 (O(sqrt(n)) query)
C++
// Mo's algorithm: count distinct elements in range [l, r]
struct Query {
int l, r, idx;
};
int block;
bool mo_cmp(const Query& a, const Query& b) {
int ba = a.l / block, bb = b.l / block;
if (ba != bb) return ba < bb;
return (ba & 1) ? (a.r > b.r) : (a.r < b.r);
}
void solve() {
int n, q;
cin >> n >> q;
vector<int> a(n);
for (auto& x : a) cin >> x;
vector<Query> queries(q);
for (int i = 0; i < q; i++) {
cin >> queries[i].l >> queries[i].r;
queries[i].idx = i;
}
block = max(1, (int)sqrt(n));
sort(queries.begin(), queries.end(), mo_cmp);
vector<int> cnt(1000001, 0);
vector<int> ans(q);
int cur_l = 0, cur_r = -1, distinct = 0;
auto add = [&](int idx) {
if (cnt[a[idx]]++ == 0) distinct++;
};
auto rem = [&](int idx) {
if (--cnt[a[idx]] == 0) distinct--;
};
for (auto& [l, r, qi] : queries) {
while (cur_r < r) add(++cur_r);
while (cur_l > l) add(--cur_l);
while (cur_r > r) rem(cur_r--);
while (cur_l < l) rem(cur_l++);
ans[qi] = distinct;
}
for (int x : ans) cout << x << "\n";
}
Python
import math
from collections import defaultdict
def mo_algorithm(a, queries):
"""queries = list of (l, r, original_index)"""
n = len(a)
B = max(1, int(math.sqrt(n)))
queries.sort(key=lambda q: (q[0] // B, q[1] if (q[0] // B) % 2 == 0 else -q[1]))
cnt = defaultdict(int)
distinct = 0
cur_l, cur_r = 0, -1
ans = [0] * len(queries)
def add(idx):
nonlocal distinct
cnt[a[idx]] += 1
if cnt[a[idx]] == 1:
distinct += 1
def rem(idx):
nonlocal distinct
cnt[a[idx]] -= 1
if cnt[a[idx]] == 0:
distinct -= 1
for l, r, qi in queries:
while cur_r < r: cur_r += 1; add(cur_r)
while cur_l > l: cur_l -= 1; add(cur_l)
while cur_r > r: rem(cur_r); cur_r -= 1
while cur_l < l: rem(cur_l); cur_l += 1
ans[qi] = distinct
return ans
Always expand the window before shrinking. If you shrink first, you might access an invalid state (e.g., removing an element that is not in the current window). The order in the loop matters: expand cur_r, expand cur_l, shrink cur_r, shrink cur_l.
A persistent data structure preserves all previous versions after each modification. The key technique is path copying: when updating, only create new copies of the O(log n) nodes on the root-to-leaf path.
The classic application: build a persistent segment tree over the value range. Version i includes the first i elements. To query kth smallest in [l, r], subtract version l-1 from version r and walk down the tree.
1. Coordinate compress the values
2. Build version 0 (empty tree)
3. For each element a[i], create version i by inserting a[i] into version i-1
4. To find kth in [l, r]: compare left child counts of version[r] vs version[l-1]
Time: O(n log n) build, O(log n) per query. Space: O(n log n).
C++
// Persistent segment tree for kth smallest in range
const int MAXN = 20000000;
int ls[MAXN], rs[MAXN], sum[MAXN], tot = 0;
int build(int l, int r) {
int p = ++tot;
sum[p] = 0;
if (l == r) return p;
int m = (l + r) / 2;
ls[p] = build(l, m);
rs[p] = build(m + 1, r);
return p;
}
int update(int prev, int l, int r, int pos) {
int p = ++tot;
ls[p] = ls[prev]; rs[p] = rs[prev]; sum[p] = sum[prev] + 1;
if (l == r) return p;
int m = (l + r) / 2;
if (pos <= m) ls[p] = update(ls[prev], l, m, pos);
else rs[p] = update(rs[prev], m + 1, r, pos);
return p;
}
int kth(int u, int v, int l, int r, int k) {
if (l == r) return l;
int m = (l + r) / 2;
int leftCnt = sum[ls[v]] - sum[ls[u]];
if (k <= leftCnt) return kth(ls[u], ls[v], l, m, k);
else return kth(rs[u], rs[v], m + 1, r, k - leftCnt);
}
// Usage:
// 1. Coordinate compress a[] into values 0..M-1
// 2. root[0] = build(0, M-1)
// 3. for i in 1..n: root[i] = update(root[i-1], 0, M-1, compressed[i])
// 4. query kth in [l,r]: kth(root[l-1], root[r], 0, M-1, k)
Python
# Persistent segment tree for kth smallest
class PersistentSegTree:
def __init__(self, maxval):
self.MAXN = 20 * maxval # enough nodes
self.ls = [0] * self.MAXN
self.rs = [0] * self.MAXN
self.sm = [0] * self.MAXN
self.tot = 0
def _new(self):
self.tot += 1
return self.tot
def build(self, l, r):
p = self._new()
if l == r:
return p
m = (l + r) // 2
self.ls[p] = self.build(l, m)
self.rs[p] = self.build(m + 1, r)
return p
def update(self, prev, l, r, pos):
p = self._new()
self.ls[p] = self.ls[prev]
self.rs[p] = self.rs[prev]
self.sm[p] = self.sm[prev] + 1
if l == r:
return p
m = (l + r) // 2
if pos <= m:
self.ls[p] = self.update(self.ls[prev], l, m, pos)
else:
self.rs[p] = self.update(self.rs[prev], m + 1, r, pos)
return p
def kth(self, u, v, l, r, k):
if l == r:
return l
m = (l + r) // 2
left_cnt = self.sm[self.ls[v]] - self.sm[self.ls[u]]
if k <= left_cnt:
return self.kth(self.ls[u], self.ls[v], l, m, k)
else:
return self.kth(self.rs[u], self.rs[v], m + 1, r, k - left_cnt)
Array: [3, 1, 4, 1] Compressed: [1, 0, 2, 0] Values: {0:1, 1:3, 2:4}
Version 0 (empty): Version 1 (+val 1): Version 2 (+val 0):
[0] [1] [2]
/ \ / \ / \
[0] [0] [0] [1]* [1]* [1]
/ \ / \ / \ / \ / \ / \
[0] [0][0][0] [0][0][0][1]* [1]*[0][0][1]
* = newly created node (others shared with previous version)
kth(root[0], root[3], 0, 2, 2):
Find 2nd smallest in a[1..3] = {1, 4, 1}
Left child count = 2 (two 1's mapped to val 0)
k=2 <= 2, go left -> return value 0 -> original = 1
Each update creates O(log n) new nodes. For n updates, total nodes = O(n log n). Pre-allocate arrays of size ~20*n to be safe. Using dynamic allocation (new/malloc) per node is much slower -- always use flat arrays in competitive programming.
Start with Fenwick tree and basic segment tree -- you will use these constantly. Then learn lazy propagation (it appears in probably 30% of Div 2 D/E problems). DSU you likely already know; learn rollback when you hit offline divide and conquer problems. Sparse table is quick to learn and very useful for LCA and RMQ. After that, tackle HLD and centroid decomposition for tree path queries. Save treaps, Li Chao trees, and persistent structures for when you are pushing into Div 1 C+ territory.
Keep pre-written templates for: BIT (point update / prefix sum), segment tree with lazy propagation, DSU with union by rank, and sparse table. These four cover the vast majority of contest problems. For the more exotic structures (HLD, centroid decomp, treap), have tested code ready but expect to adapt it per-problem.