Upload
vanlien
View
219
Download
4
Embed Size (px)
Citation preview
●●●
●
2
3
PostgreSQL 9.5
CitusDB 5
pg_shard
4
CitusDB 4
pg_shard
Events
…
… … …
…
… … …
…
… … ……
5
$ ./configure && make && sudo make install
$ cat > /db/pg_worker_list.confip-10-192-0-113.eu-central-1.compute.internal 5432ip-10-192-0-134.eu-central-1.compute.internal 5432
$ vim /db/postgresql.conf
…shared_preload_libraries = 'citusdb'
$ pg_ctl -D /db -l /db/logfile restart
$ psql -c "CREATE EXTENSION citusdb"
6
7
SELECT time::date AS day, count(*) FROM events WHERE data->>'type' = 'signup' GROUP BY day ORDER BY day ASC;
SELECT sum(price) FROM orders, nation WHERE orders.nation = nation.name AND orders.date >= '2016-01-01' AND nation.region = 'Asia';
8
●●●●
9
10
Events
…
… … …
…
… … …
…
… … ……
11
$ psql -h master-node-1
# \d
Schema | Name | Type | Owner
--------+---------------+---------------+-------
public | events | table | marco
12
$ psql -h worker-node-1
# \d
Schema | Name | Type | Owner
--------+---------------+---------------+-------
public | events_10018 | table | marco
public | events_10020 | table | marco
...
13
●
●
14
shard shardminvalue shardmaxvalue
events_10011 2015-06-09 10:00:00 2015-06-09 10:59:59
events_10012 2015-06-09 11:00:00 2015-06-09 11:59:59
events_10023 2015-06-09 12:00:00 2015-06-09 12:59:59
events_10024 2015-06-09 13:00:00 2015-06-09 13:59:59
15
CREATE TABLE events ( time timestamp, data jsonb
);
SELECT master_create_distributed_table('events', 'time', 'append');
CREATE INDEX ON events (time);
CREATE INDEX ON events USING GIN (data);
\STAGE events FROM '/logs/2015-06-09_10:00:00.log'
\STAGE events FROM '/logs/2015-06-09_11:00:00.log'
16
Partition column
→
shard shardminvalue shardmaxvalue
events_10018 -2147483648 -1073741826
events_10019 -1073741825 -3
events_10020 -2 1073741820
events_10021 1073741821 2147483647
17
CREATE TABLE events ( userid int PRIMARY KEY, time timestamp, data jsonb, …);
SELECT master_create_distributed_table('events', 'userid', 'hash');
SELECT master_create_worker_shards('events', 128, 2);
INSERT INTO events VALUES(6, '2015-06-09 10:30:16', '{type:"click", ...}');
18
19
Events
…
… … …
…
… … …
…
… … ……
20
Orders
21
Orders
22
SELECT sum(price) FROM orders, nation WHERE orders.nation = nation.name AND orders.date >= '2016-01-01' AND nation.region = 'Asia';
…23
24
SELECT sum(intermediate_0) FROM merge_job_1;
SELECT sum(price)FROM orders_109, nation_101WHERE orders.date >= '2016-01-01' AND nation.region = 'Asia'AND orders_109.nation = nation_101.name;
25
●●●●
26
●
●
●
●
●● 27
28
29
30
31
32
CREATE TEMPORARY TABLE input (data jsonb);
COPY input FROM '$FILE' csv quote e'\x01' delimiter e'\x02’;
CREATE UNLOGGED TABLE $stage_table AS SELECT (data->>'id'), (data->>'created_at'), (data->>'type'), data->'actor', data->'repo', data->'payload' as payload FROM input;
SELECT master_append_table_to_shard($shard, $stage_table, $node, $port);
33
34
35
using distributed table
~950ms
36
using local table
~2 minutes
37
~270ms
using GIN index
38
scan >100m PushEvents
2.9 seconds
39
1 month -> 31 cores, 1.5 seconds
3 months -> 80 cores, 1.7 seconds
5 months -> 80 cores, 2.3 seconds
40
●●
●
●
41
42
43
Citus Data team outing :)