Fork me on GitHub

Implement MVCC

We have learned the transactions in the database class, and we also know what the isolation level is. In this article, we would implement it all by our own.

Terminology

Record

The single entity in the database, anything that encapsulates something. for example file, json object

Collection

The set of Record

Transaction ID

The unique number of each transaction.

Implmentation

We implement our code all in python.

Framework

1
2
3
4
5
6
7
8
9
10
11
12
13
14
next_xid = 1
active_xids = set()
records = []

def new_transaction():
global next_xid
next_xid += 1
active_xids.add(next_xid)
return Transaction(next_xid)

class Transaction:
def __init__(self, xod):
self.xid= xid
self.rollback_actions = []

Visibility and Locking

Then to express the visibility and the locking function of each record, we need to add tow extra pieces of information to the record, the detailed code is shown below:

1
2
3
4
5
6
7
8
9
10
class Transaction:
def record_is_visible(self, record):

if record['created_xid'] in active_xids and record['created_xid'] != self.xid:
return False

if record['expired_xid'] != 0 and (record['expired_xid'] not in active_xids or record['expired_xid'] == self.xid):
return False

return True
1
2
3
class Transaction:
def row_is_locked(self, record):
return record['expired_xid'] != 0 and row['expireed_xid'] in active_xids

Add a record

1
2
3
4
5
6
class Transaction:
def add_record(self, record):
record['created_xid'] = self.xid
record['expired_xid'] = 0 ##Which means the record has never been deleted by anyone
self.rollback_actions.append(["delete", len(records)]) ##This will be explained laterx
records.append(record)

Deleting a record

1
2
3
4
5
6
7
8
9
class Transaction:
def delete_record(self, id):
for i, record in enumerate(records):
if self.record_is_visible(record) and record["id"] == id:
if self.row_is_lock(record):
raise Error("Row locked by another transaction.")
else:
record['expired_xid'] = self.xid
self.rollback_actions.append(["add", i]) //This will be explained later

Updating a record

Updating can be splited into two procedures, deletion and appendix.

1
2
3
4
5
6
7
class Transaction:
def update_record(self, id, name);
self.delete_record(id)
self.add_record({
'id': id,
'name': name
})

Committing Changes

1
2
3
class Transaction:
def commit(self):
activate_xids.discard(self.xid)

Rollback Changes

1
2
3
4
5
6
7
8
9
class Transaction:
def rolloback(self):
for action in reversed(self.rollback_actions):
if action[0] == 'add':
records[action[1]]['expired_xid'] = 0
elif action[0] == 'delete':
records[action[1]]['expired_xid'] = self.xid

active_xids.discard(self.xid)

Implement some isolation level

In this part I will implement two kinds of isolation level: repeatable read and seriealiztion

Extend transaction class

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
class Transaction:	
def __init__(self, table, xid):
self.table = table
self.xi = xid
self.rollback_actions = []

def add_record(self, id, name):
record = {
'id': id,
'name': name,
'created_xid': self.xid,
'expired_xid': 0
}
self.rollback_actions.append(["delete", len(self.table.records)])
self.table.records.append(record)

def delete_record(elf, id):
def delete_record(self, id):
for i, record in enumerate(self.table.records):
if self.record_is_visible(record) and record['id'] == id:
if self.record_is_locked(record):
raise RollbackException("Row locked by another transaction.")
else:
record['expired_xid'] = self.xid
self.rollback_actions.append(["add", i])

def update_record(self, id, name):
self.delete_record(id)
self.add_record(id, name)

def fetch_record(self, id):
for record in self.table.records:
if self.record_is_visible(record) and record['id'] is id:
return record
return None

def count_records(self, min_id, max_id):
count = 0
for record in self.table.records:
if self.record_is_visible(record) and \
min_id <= record['id'] <= max_id:
count += 1

return count

def fecth_all_records(self):
visible_records = []
for record in self.table.records:
if self.record_is_visible(record);
visible_records.append(record)

return visible_records


def fetch(self, expr):
visible_records = []
for record in self.table.records:
if self.record_is_visible(record) and expr(record):
visible_records.append(record)

return visible_records


def commit(self):
self.table.active_xids.discard(self.xid)

def rollback(self):
for action in reversed(self.rollback_actions):
if action[0] == 'add':
self.table.records[action[1]]['expired_xid'] = 0
elif action[0] == 'delete':
self.table.records[action[1]]['expired_xid'] = self.xid

self.table.active_xids.discard(self.xid)

Based on the work above, we introduce the most simple isolation level ——— Read Uncommited

1
2
3
4
5
6
class ReadUncommittedTransaction(Transaction):
def record_is_locked(self, record):
return record['expired_xid'] != 0

def record_is_visible(self, record):
return record['expired_xid'] == 0

Then the case of read committed is a little complicated

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
class ReadCommittedTransaction(Transaction):
def record_is_locked(self, record):
return record['created_xid'] != 0 and row['expired_xid'] in self.table.active_xids

def record_is_visible(self, record):

if record['created_xid'] in self.table.active_xids and record['created_xid'] != self.xid:
return False

if record['expired_xid'] != 0 and \
(record['expired_xid'] not in self.table.active_xids or \
record['expired_xid'] == self.xid):
return False

return True

Then the next level is Repeatable Read

1
2
3
4
5
6
7
8
9
10
11
12
class RepeatableReadTransaction(ReadCommittedTransaction):
def record_is_locked(self, record):
return ReadCommittedTransaction.record_is_locked(self, record) or \
self.table.locks.exists(self, record['id'])

def record_is_visible(self, record):
is_visible = ReadCommittedTransaction.record_is_visible(self, record)

if is_visible:
self.table.locks.add(self, record['id'])

return is_visible

Next step is to add a new class to manage all the lock

1
2
3
4
5
6
7
8
9
10
11
class LockManager:
def __init__(self):
self.locks = []

def add(self, transaction, record_id):
if not self.exists(transaction, record_id):
self.locks.append([transaction, record_id])

def exists(self, transaction, record_id):
return any(lock[0] is transaction and lock[1] == record_id \
for lock in self.locks)

Based on which, we can implement serializable

1
2
3
4
5
6
7
8
9
10
11
12
13
14
class SerializableTransaction(RepeatableReadTransaction):
def __init__(self, table, xid):
Transaction.__init__(self, table, xid)
self.existing_xids = self.table.active_xids.copy()

def record_is_visible(self, record):
is_visible = ReadCommittedTransaction.record_is_visible(self, record) \
and record['created_xid'] <= self.xid \
and record['created_xid'] in self.existing_xids

if is_visible:
self.table.locks.add(self, record['id'])

return is_visible