Ringbuffer in SQL
On a student platform we show on every page a random user in the upper right corner. Since we have over 11.000 users, the database has to select all matching users and sort them by random(). Thats very expensive and one of the longest running query in the current version.
Since we are currently redeveloping the whole platform, i decided to try something different for the random user. The user must not be really random, it should be enough to just display a different user every time the page is displayed. So my idea was to implement a ringbuffer in SQL and forward the pointer to the next user in the buffer if we need to search the next random user.
First some preparations:
CREATE SCHEMA ringbuffer;
-- create a dummy user table just for this example
CREATE TABLE ringbuffer.users (
id INT NOT NULL
UNIQUE
PRIMARY KEY,
username VARCHAR(20) NOT NULL
UNIQUE
) WITHOUT OIDS;
-- insert some dummy users into our dummy table
INSERT INTO ringbuffer.users (id, username) VALUES (6, 'user 6');
INSERT INTO ringbuffer.users (id, username) VALUES (1, 'user 1');
INSERT INTO ringbuffer.users (id, username) VALUES (2, 'user 2');
INSERT INTO ringbuffer.users (id, username) VALUES (3, 'user 3');
INSERT INTO ringbuffer.users (id, username) VALUES (4, 'user 4');
INSERT INTO ringbuffer.users (id, username) VALUES (5, 'user 5');
After this, create the ringbuffer:
-- now create the ringbuffer
-- we use a foreign key to the user table to assure we only insert
-- existing userIDs here
CREATE TABLE ringbuffer.ringbuffer (
id INT NOT NULL
UNIQUE
PRIMARY KEY,
value_id INT NOT NULL
REFERENCES ringbuffer.users(id),
next_id INT REFERENCES ringbuffer.ringbuffer(id)
) WITHOUT OIDS;
-- and an additional status table
CREATE TABLE ringbuffer.ringbuffer_data (
id INT NOT NULL
UNIQUE
PRIMARY KEY
CHECK (id = 1),
ringbuffer_id INT REFERENCES ringbuffer.ringbuffer(id),
max_id INT NOT NULL
DEFAULT 0
) WITHOUT OIDS;
INSERT INTO ringbuffer.ringbuffer_data
(id, ringbuffer_id, max_id)
VALUES (1, NULL, 0);
-- protect against deletes, nobody should delete your status entry
-- or the have more overhead in the functions
CREATE RULE ringbuffer_data_del
AS ON DELETE TO ringbuffer.ringbuffer_data
DO INSTEAD nothing;
And some supporting functions:
-- function for insert an user (respective the ID) into the buffer
CREATE OR REPLACE FUNCTION ringbuffer.insert_into_ringbuffer(insert_id INT)
RETURNS INTEGER
AS $$
DECLARE
query TEXT;
loop_record RECORD;
loop_record2 RECORD;
BEGIN
-- check that this entry is not yet in the ringbuffer table
query := 'SELECT COUNT(id) AS number_count
FROM ringbuffer.ringbuffer
WHERE value_id = ' || quote_literal(insert_id) || '';
EXECUTE query INTO loop_record;
IF loop_record.number_count > 0
THEN
RAISE NOTICE 'user is already in ringbuffer: %', insert_id;
ELSE
-- now check, if the data table is empty
query := 'SELECT *
FROM ringbuffer.ringbuffer_data
WHERE id = 1';
EXECUTE query INTO loop_record;
RAISE NOTICE 'max id: %', loop_record.max_id;
IF loop_record.max_id > 0
THEN
-- need the current last record
query := 'SELECT *
FROM ringbuffer.ringbuffer
WHERE id = ' || quote_literal(loop_record.max_id) || '';
EXECUTE query INTO loop_record2;
IF loop_record2.next_id < 1
THEN
RAISE EXCEPTION 'should have a next_id, but missing it';
END IF;
-- insert new record at end of table
RAISE NOTICE 'insert at end of table with id: %', loop_record.max_id + 1;
query := 'INSERT INTO ringbuffer.ringbuffer
(id, value_id, next_id)
VALUES (' || quote_literal(loop_record.max_id + 1) || ',
' || quote_literal(insert_id) || ',
' || quote_literal(loop_record2.next_id) || ')';
-- RAISE NOTICE 'query: %', query;
EXECUTE query;
-- update last record
query := 'UPDATE ringbuffer.ringbuffer
SET next_id=' || quote_literal(loop_record.max_id + 1) || '
WHERE id=' || quote_literal(loop_record.max_id) || '';
-- RAISE NOTICE 'query: %', query;
EXECUTE query;
-- update pointer table
query := 'UPDATE ringbuffer.ringbuffer_data
SET max_id = ' || quote_literal(loop_record.max_id + 1) || '
WHERE id = 1';
-- RAISE NOTICE 'query: %', query;
EXECUTE query;
ELSE
-- just insert as first value
RAISE NOTICE 'insert as first value into ringbuffer ...';
query := 'INSERT INTO ringbuffer.ringbuffer
(id, value_id, next_id)
VALUES (1, ' || quote_literal(insert_id) || ', 1)';
-- RAISE NOTICE 'query: %', query;
EXECUTE query;
-- update pointer table
query := 'UPDATE ringbuffer.ringbuffer_data
SET max_id = ' || quote_literal(loop_record.max_id + 1) || ',
ringbuffer_id = 1
WHERE id = 1';
-- RAISE NOTICE 'query: %', query;
EXECUTE query;
END IF;
END IF;
RETURN 1;
END;
$$
LANGUAGE 'plpgsql';
-- function for get next user from rungbuffer (the semi-random function)
CREATE OR REPLACE FUNCTION ringbuffer.get_next_ringbuffer()
RETURNS INTEGER
AS $$
DECLARE
query TEXT;
loop_record RECORD;
BEGIN
-- check, if the table is empty
query :='SELECT *
FROM ringbuffer.ringbuffer_data
WHERE id = 1';
EXECUTE query INTO loop_record;
-- RAISE NOTICE 'max id: %', loop_record.max_id;
IF loop_record.max_id IS NULL
THEN
RAISE NOTICE 'ringbuffer is empty!';
RETURN NULL;
ELSE
-- get the value the pointer is currently on
RAISE NOTICE 'get value from ringbuffer ...';
query := 'SELECT *
FROM ringbuffer.ringbuffer
WHERE id = (SELECT ringbuffer_id
FROM ringbuffer.ringbuffer_data
WHERE id = 1)';
-- RAISE NOTICE 'query: %', query;
EXECUTE query INTO loop_record;
RAISE NOTICE 'current value id: %', loop_record.value_id;
RAISE NOTICE 'next id: %', loop_record.next_id;
-- update pointer table
query := 'UPDATE ringbuffer.ringbuffer_data
SET ringbuffer_id=' || quote_literal(loop_record.next_id) || '
WHERE id = 1';
-- RAISE NOTICE 'query: %', query;
EXECUTE query;
-- return result
RETURN loop_record.value_id;
END IF;
RETURN NULL;
END;
$$
LANGUAGE 'plpgsql';
-- function for delete an user from ringbuffer
CREATE OR REPLACE FUNCTION ringbuffer.delete_from_ringbuffer(delete_id INT)
RETURNS INTEGER
AS $$
DECLARE
query TEXT;
loop_record_data RECORD;
loop_record RECORD;
loop_record2 RECORD;
BEGIN
-- check, if the table is empty
query :='SELECT *
FROM ringbuffer.ringbuffer_data
WHERE id = 1';
EXECUTE query INTO loop_record_data;
-- RAISE NOTICE 'max id: %', loop_record.max_id;
IF loop_record_data.max_id IS NULL
THEN
RAISE NOTICE 'ringbuffer is empty!';
RETURN NULL;
ELSE
-- get the value which should be deleted
-- RAISE NOTICE 'delete value from ringbuffer ...';
query := 'SELECT *
FROM ringbuffer.ringbuffer
WHERE value_id = ' || quote_literal(delete_id) || '';
-- RAISE NOTICE 'query: %', query;
EXECUTE query INTO loop_record;
IF loop_record.id IS NULL
THEN
-- nothing to delete
RETURN NULL;
END IF;
IF loop_record.id = loop_record.next_id
THEN
-- we only have this value, update pointer table
-- (next_id == id)
query := 'UPDATE ringbuffer.ringbuffer_data
SET ringbuffer_id = NULL,
max_id = 0
WHERE id = 1';
-- RAISE NOTICE 'query: %', query;
EXECUTE query;
ELSE
-- get the value which is referring us
query := 'SELECT *
FROM ringbuffer.ringbuffer
WHERE next_id = ' || quote_literal(loop_record.id) || '';
-- RAISE NOTICE 'query: %', query;
EXECUTE query INTO loop_record2;
-- now twist the pointer from last to the next record
query := 'UPDATE ringbuffer.ringbuffer
SET next_id = ' || quote_literal(loop_record.next_id) || '
WHERE id= ' || quote_literal(loop_record2.id) || '';
-- RAISE NOTICE 'query: %', query;
EXECUTE query;
IF loop_record_data.ringbuffer_id = loop_record.id
THEN
-- we are deleting the next entry
query := 'UPDATE ringbuffer.ringbuffer_data
SET ringbuffer_id = ' || quote_literal(loop_record.next_id) || ',
max_id = (SELECT MAX(next_id)
FROM ringbuffer.ringbuffer)
WHERE id = 1';
-- RAISE NOTICE 'query: %', query;
EXECUTE query;
ELSE
-- we are deleting another entry
query := 'UPDATE ringbuffer.ringbuffer_data
SET max_id = (SELECT MAX(next_id)
FROM ringbuffer.ringbuffer)
WHERE id = 1';
-- RAISE NOTICE 'query: %', query;
EXECUTE query;
END IF;
END IF;
-- delete the dataset
query := 'DELETE FROM ringbuffer.ringbuffer
WHERE id = ' || quote_literal(loop_record.id) || '';
-- RAISE NOTICE 'query: %', query;
EXECUTE query;
-- return deleted result
RETURN loop_record.value_id;
END IF;
RETURN NULL;
END;
$$
LANGUAGE 'plpgsql';
Now lets test this stuff:
SELECT ringbuffer.insert_into_ringbuffer((SELECT id
FROM ringbuffer.users
WHERE username='user 1'));
SELECT ringbuffer.insert_into_ringbuffer((SELECT id
FROM ringbuffer.users
WHERE username='user 2'));
SELECT ringbuffer.insert_into_ringbuffer((SELECT id
FROM ringbuffer.users
WHERE username='user 5'));
SELECT ringbuffer.insert_into_ringbuffer((SELECT id
FROM ringbuffer.users
WHERE username='user 4'));
SELECT ringbuffer.insert_into_ringbuffer((SELECT id
FROM ringbuffer.users
WHERE username='user 3'));
SELECT ringbuffer.insert_into_ringbuffer((SELECT id
FROM ringbuffer.users
WHERE username='user 6'));
SELECT * FROM ringbuffer.ringbuffer ORDER BY id;
SELECT * FROM ringbuffer.ringbuffer_data ORDER BY id;
SELECT ringbuffer.get_next_ringbuffer();
SELECT ringbuffer.get_next_ringbuffer();
SELECT ringbuffer.get_next_ringbuffer();
SELECT ringbuffer.get_next_ringbuffer();
SELECT ringbuffer.get_next_ringbuffer();
SELECT ringbuffer.get_next_ringbuffer();
As you can see, every time you call ringbuffer.get_next_ringbuffer(), you get another userID back, which will perfectly fit in a subselect to retrieve the actual userdata.
Comments
Display comments as Linear | Threaded
Nikolay Samokhvalov on :
Andreas on :
Andreas Scherbaum on :