Result Visualization Notebook

The following assemble data and produces the charts presented in:

Beyond Macrobenchmarks: Microbenchmark-based Graph Database Evaluation. by Lissandrini, Matteo; Brugnara, Martin; and Velegrakis, Yannis. In PVLDB, 12(4):390-403, 2018.

This notebook can process experiments incrementally, so you can use the notebook while experiments are still running, replaying the notebook should produce updated charts.

Note: in the following we assume that missing experiments imply the system has reached the time-out.

Quick References

Data Preprocessing

In [1]:
# Starting fresh each time.
import os
if os.path.exists('results.db'):
    os.remove('results.db')
In [2]:
%matplotlib inline
import os
import re
import csv
import sqlite3
import matplotlib.pyplot as plt
from os import path
from collections import OrderedDict
from IPython.core.display import display, HTML


def get_default_db():
    return DbSession('results.db')


class DbSession(object):
    def __init__(self, conn_str):
        self.conn = sqlite3.connect(conn_str)
        self.cursor = self.conn.cursor()
        
    def __enter__(self):
        return self.cursor
    
    def __exit__(self, t, value, traceback):
        if t is not None:
            log(traceback)
        self.cursor.close()
        self.conn.commit()
        self.conn.close()


def log(msg):
    import datetime
    print('%s - %s' % (datetime.datetime.now().isoformat(), msg))

    
def gby(lst, key=0):
    """ Group by that respect keys order.
    """
    from collections import OrderedDict
    res = OrderedDict()
    for item in lst:
        k = item[key]
        if k in res: 
            res[k].append(item)
        else: 
            res[k] = [item]
    return res

Bootstrap

Let's first create, intialize and then load the data from the *.csv file.

Schema

In [3]:
tables_list = ['results', 'queries', 'datasets', 'images', 'dborder']

with get_default_db() as c:
    # Reset database
    stmt = 'DROP TABLE IF EXISTS '
    for tbl in tables_list:
        c.execute(stmt + tbl)

# Setup database
create_tables = [
    '''
    -- Only common fields
    CREATE TABLE IF NOT EXISTS results (
            -- _rowid_ (https://www.sqlite.org/lang_createtable.html#rowid)
            date       INTEGER NOT NULL, 
            dbengine   TEXT NOT NULL, 
            dataset    TEXT NOT NULL,     -- please store just name, not path
            query      TEXT NOT NULL, 
            sid        INTEGER,
            int_order  INTEGER,
            exec_time  INTEGER NOT NULL
    )

    ''',
    '''
    CREATE TABLE IF NOT EXISTS queries (
            -- _rowid_ (https://www.sqlite.org/lang_createtable.html#rowid)
            query      TEXT, 
            cnt        INTEGER, -- expected number of queries
            cls        TEXT,
            qord       int,
            paper_id   TEXT
    );
    ''',
    '''
    CREATE TABLE IF NOT EXISTS datasets (
            -- _rowid_ (https://www.sqlite.org/lang_createtable.html#rowid)
            dataset TEXT, 
            json2   REAL,
            dord    INT
    );
    ''',
    '''
    -- Images size on disk
    CREATE TABLE IF NOT EXISTS images (
            -- _rowid_ (https://www.sqlite.org/lang_createtable.html#rowid)
            dbengine      TEXT, 
            dataset       TEXT, -- Special value 'empty'
            space         REAL
    );
    ''',
    '''
    -- Defined ordering
    CREATE TABLE IF NOT EXISTS dborder (
            -- _rowid_ (https://www.sqlite.org/lang_createtable.html#rowid)
            dbengine      TEXT, 
            dbalias       TEXT,
            ord           int 
    );
    ''',
]

assert(len(tables_list) == len(create_tables))
    
with get_default_db() as c:
    for tbl in create_tables:
        c.execute(tbl)
    

log('Database intialized')
2019-09-12T13:26:52.122231 - Database intialized

Fixtures

While the following meta data may be derived from the queries themselves, hardcoding them allows us to simplify the presentation.
See for example, how we deal with the BFS length parameters in the insert_result() function.

Queries

Comment any of the following tuples to avoid analizing the specific queries. Please remeber the implicit assumption that if there is no result entry then it is a timeout.

In [4]:
"""
Meta array format:
(QueryClass, [
  ('query file name wo .rb', <expected # of results for run>, '<id_on_paper>')
])
"""
meta = [
    ('L', [('loader', 1, '1')]),
    ('C', [
        ('insert-node', 10, '2'),
        ('insert-edge', 10, '3'),
        ('insert-edge-with-property', 10, '4'),
        ('insert-node-property', 10, '5'),
        ('insert-edge-property', 10, '6'),
        ('insert-node-with-edges', 10, '7'),
    ]),
    ('R', [
        ('count-nodes', 1, '8'),
        ('count-edges', 1, '9'),
        ('find-unique-labels', 1, '10'),
        ('node-property-search', 10, '11'),
        ('edge-specific-property-search', 10, '12'),
        ('label-search', 10, '13'),
        ('id-search-node', 10, '14'),
        ('id-search-edge', 10, '15'),
    ]),
    ('U', [
        ('update-node-property', 10, '16'),
        ('update-edge-property', 10, '17'),
    ]),
    ('D', [
        ('delete-nodes', 10, '18'),
        ('delete-edges', 10, '19'),
        ('delete-node-property', 10, '20'),
        ('delete-edge-property', 10, '21'),        
    ]),
    ('T', [
        ('NN-incoming', 10, '22'),
        ('NN-outgoing', 10, '23'),        
        ('NN-both-filtered', 10, '24'),   
        ('NN-incoming-unique-label', 10, '25'),
        ('NN-outgoing-unique-label', 10, '26'),
        ('NN-both-unique-label', 10, '27'),
        ('k-degree-in', 1, '28'),
        ('k-degree-out', 1, '29'),
        ('k-degree-both', 1, '30'),
        ('find-non-root-nodes', 1, '31'),
        # BFS, and BFS-labelled would go here
        ('shortest-path', 10, '34'),
        ('shortest-path-labelled', 10, '35'),
    ]),  
    ('B', [
        # 32
        ('BFS2', 10, ''),
        ('BFS3', 10, ''),
        ('BFS4', 10, ''),
        ('BFS5', 10, ''),
        # 33
        ('BFS-labelled2', 10, ''),
        ('BFS-labelled3', 10, ''),
        ('BFS-labelled4', 10, ''),
        ('BFS-labelled5', 10, ''),
    ]),
    
    ('I', [
        # ("X-create-index", 1, ''),
        ("X-insert-node", 10,          '2'),
        ("X-insert-node-property", 10, '5'),
        ("X-node-property-search", 10, '11'),
        ("X-update-node-property", 10, '16'),
        ("X-delete-nodes", 10,         '18'),
        ("X-delete-node-property", 10, '20'),
    ]),
    
    ('S', [
        # ("ldbc-create-index", 1, ''),
        ("ldbc-macrobench-max-iid", 10, ''),
        ("ldbc-macrobench-max-oid", 10, ''),
        ("ldbc-macrobench-create", 10, ''),
        ("ldbc-macrobench-city", 10, ''),
        ("ldbc-macrobench-company", 10, ''),
        ("ldbc-macrobench-university", 10, ''),
        ("ldbc-macrobench-friend1", 10, ''),
        ("ldbc-macrobench-friend2", 10, ''),
        ("ldbc-macrobench-friend-tags", 10, ''),
        ("ldbc-macrobench-add-tags", 10, ''),
        ("ldbc-macrobench-friend-of-friend", 10, ''),
        ("ldbc-macrobench-triangle-closure", 10, ''),
        ("ldbc-macrobench-places", 10, ''),
    ]),
]

# Add queries in order 
insert = 'INSERT INTO queries VALUES (?, ?, ?, ?, ?);'
with get_default_db() as c:
    qord = 0
    for (cls, queries) in  meta:
        for (q, cnt, paper_id) in  queries:
            c.execute(insert, [q, cnt, cls, qord, paper_id])
            qord += 1
            
log('Queries meta have been loaded')
2019-09-12T13:26:52.256371 - Queries meta have been loaded
In [5]:
# Databases
# NOTE: first index-capables then others.
dborder = [
    ("neo4j",      "Neo4j 1.9"),
    ("neo4j-tp3",  "Neo4j 3.0"),
    ("orientdb",   "OrientDB"),
    ("titan",      "Titan 0.5"),
    ("titan-tp3",  "Titan 1.0"),
    ("sparksee",   "Sparksee"),
    ("arangodb",   "ArangoDB"), 
    ("pg",         "Sqlg"),
    ("blazegraph", "Blazegraph"),
]

with get_default_db() as c:
    for (i,d) in enumerate(dborder):
        c.execute('INSERT INTO dborder VALUES (?,?,?)', (*d, i))

log('Fixture loadings completed')
2019-09-12T13:26:52.283086 - Fixture loadings completed

Data

Datasets

In [6]:
q = 'INSERT INTO datasets VALUES (?,?,?)'
with get_default_db() as c:
    with open('datasets.tsv') as f:
        i = 0
        for l in f:
            size, ds = l.split()
            c.execute(q, [ds, float(size)/1024, i])
            i += 1

Images

In [7]:
q = 'INSERT INTO images VALUES (?,?,?)'
with get_default_db() as c:
    with open('images.csv') as f:
        for l in f:
            name, size = l.split(',')
            if '_' in name:
                db, ds = name.strip()[8:-6].split('_',1)
            else:
                db, ds = name.strip()[8:], 'empty'
            size = size.strip()
            size = float(size[:-2]) * (1024 if size[-2] == 'G' else 1)
            c.execute(q, [db, ds, size]) 

Results

The following import support importing results of experiments carried out on different time or machines. It is up to the researche to guarantee the consistency of the external environemnt, e.g. when usign multiple machine, they must make sure the machines are identical in hardware, software, and configuration.

See collect.sh for more detail about results harvesting.

In [8]:
# Path to results directory
RES_DIR='/results/'

# Get the list of the experiment results available on disk.
exps = set(fn.split('_',1)[0]
           for fn in os.listdir(RES_DIR)
           if os.path.isfile(path.join(RES_DIR, fn)) 
               and fn[0] is not '.')
log('Importing: {}'.format(exps))

# Get list of experiments executed with indexes.
indexed_exps = []
if os.path.exists('indexed.csv'):
    with open('indexed.csv') as f:
        indexed_exps = set(t.strip() for t in f)
log('Experiments with indexes: {}'.format(indexed_exps))


# Support function
def insert_result(c, timestamp, row, indexed_exps):    
    insert_res = 'INSERT INTO results VALUES (?, ?, ?, ?, ?, ?, ?);'
    
    # NOTE: we use replace instead of substr so it works also for LDBC.
    query = row[2].split('/')[-1].replace('.groovy', '').replace('bulk', '')
    if query.startswith('BFS'):
        query += row[-1]
    exec_time = int(row[6])
    
    if timestamp in indexed_exps:
        query = 'X-' + query

    if query.startswith('loader'):
        exec_time *= 1000000.0
    
    param = [
        timestamp,
        row[0][len('gremlin-'):],
        row[1].split('/')[-1].replace('_hashed', '').replace('_noslash.json', '').rstrip('2')[:-len('.json')],
        query,
        row[3] or '-1', # sid
        row[5], # int_order
        exec_time,
    ]
    c.execute(insert_res, param)
    

with get_default_db() as c:
    for res in [fn for fn in os.listdir(RES_DIR)
                if os.path.isfile(path.join(RES_DIR, fn))
                and fn[0] is not '.' and fn.split('_',2)[0] in exps]:
        with open(path.join(RES_DIR, res), 'r') as csvfile:
            if not res[-len('results.csv'):] == 'results.csv':
                continue
            reader = (row for row in csv.reader(csvfile) if row[0].startswith('gremlin-'))
            for row in reader:
                insert_result(c, res.split('_',2)[0], row, indexed_exps)
log('Results import process completed')
2019-09-12T13:26:52.376506 - Importing: {'1567548898', '1551761122', '1565805219'}
2019-09-12T13:26:52.377275 - Experiments with indexes: {'1567548898'}
2019-09-12T13:26:55.210225 - Results import process completed

Support Views

Define several views used to make analitics' SQL for the Charts less obscure.

In [9]:
views_names = [
    'one_per_row', 'single', 'bulk', 'dbs',
    'full_queries_set', 'single_means_wtimeout', 
    'bulk_sum_wtimeout', 'healt', 'healt_bulk',
]

views = [
    # Results normalization:
    # sum the query that reports the exec_time splitted in multiple record.
    # NOTE, here we make the assumption (even if not 100% valid) 
    #       that all "pieces" are in the same result file ('date'),
    #       and that a query is not run more than once
    #       in the same experiment ('date')
    # NOTE, "-tp3" may have been colleted both in `$DB` and `${DB}-tp3`
    #       results. Thus use DISTINCT.
    '''
      CREATE VIEW one_per_row AS 
        SELECT dbengine,dataset,query,sid,int_order,
            CASE 
                WHEN COUNT(*) >= 5 THEN
                    (SUM(exec_time) - MIN(exec_time) - MAX(exec_time)) / (COUNT(*)-2)
                ELSE
                    AVG(exec_time)
            END AS exec_time
        FROM (
            -- already ok
            SELECT DISTINCT * FROM results
            WHERE 
                (query <> 'insert-node-with-edges') AND
                (dbengine <> "arangodb" OR query <> 'loader')

            UNION

            -- insert-node-with-edges.groovy & arango loader        
            SELECT date,dbengine,dataset,query,sid,int_order,SUM(exec_time)
            FROM results
            WHERE 
                query = 'insert-node-with-edges' OR
                (dbengine = "arangodb" AND query = 'loader')
            GROUP BY date,dbengine,dataset,query,sid,int_order  
            
            UNION
            
            SELECT date,dbengine,dataset,'ldbc-create-user',sid,int_order,SUM(exec_time)
            FROM results
            WHERE query LIKE 'ldbc-create-user%'
            GROUP BY date,dbengine,dataset,sid,int_order
                
        )
        GROUP BY dbengine,dataset,query,sid,int_order  
    ''',
    
    # Run that are NOT bulk mode
    '''
        CREATE VIEW single AS
            SELECT DISTINCT * FROM one_per_row WHERE int_order = 0 OR int_order = ''
    ''',
    
    # ONLY run that are bulk mode
    '''
        CREATE VIEW bulk AS
            SELECT DISTINCT * FROM one_per_row WHERE NOT (int_order = 0 OR int_order = '')
    ''',
    
    # dbengines view (based on images size)
    '''
        CREATE VIEW dbs AS
            SELECT DISTINCT dbengine 
            FROM images
    ''',
    
    # Expected query set (wo/int_order)
    '''
        CREATE VIEW full_queries_set AS
            SELECT dbengine,dataset,query,cnt
            FROM dbs, queries, datasets
    ''',
       
    # single_means_wtimeout
    # Assumption: if COUNT() < expected --> timedout
    # SUM(query) + timeout (:= 2h) * (Expected query count - COUNT(query))
    '''
        CREATE VIEW single_means_wtimeout AS
            SELECT dbengine,dataset,q.query,
                (SUM(exec_time/1000000) + (7200000 * max(0,q.cnt - SUM(done))))/q.cnt AS exec_time,
                q.cnt, SUM(done) AS done
            FROM (
                SELECT dbengine,dataset,query,exec_time,1 AS done
                FROM single
                
                UNION
                
                SELECT dbengine,dataset,query,0,0
                FROM full_queries_set
            ) AS r
            NATURAL JOIN queries AS q
            GROUP BY dbengine,dataset,query
    ''',
    
    # bulk_sum_wtimeout
    # Assumption: if COUNT() < expected --> timedout
    '''
        CREATE VIEW bulk_sum_wtimeout AS
            SELECT dbengine,dataset,r.query,
                CASE
                    WHEN SUM(done) <> q.cnt THEN 7200000
                    ELSE SUM(exec_time) / 1000000
                END AS exec_time,
                q.cnt, SUM(done) AS done
            FROM (
                SELECT dbengine,dataset,query,exec_time,1 AS done
                FROM bulk
                
                UNION
                
                SELECT dbengine,dataset,query,0,0
                FROM full_queries_set
                WHERE cnt > 1
            ) AS r
            NATURAL JOIN queries AS q
            GROUP BY dbengine,dataset,query
    ''',
    
    # health
    # Checks how many queries has been completed and how many timedout.
    '''
    CREATE VIEW health AS
        SELECT dbengine, dataset, query,
            -- NOTE, count on any of "single" field, but not on *. 
            -- Remember that we are using a left natual join.
            -- Assumption, NOT NULL
            COUNT(single.query) AS cnt, 
            
            -- NOTE, assumpion SID is always present (NOT NULL, DEFAULT '-1')
            COUNT(DISTINCT sid) AS sid_cnt,
            cnt AS expected_cnt,
            CASE      
                WHEN COUNT(DISTINCT sid) < cnt
                    THEN "missing"
                WHEN COUNT(DISTINCT sid) > cnt
                    THEN "wat!"
                WHEN COUNT(DISTINCT sid) <>  COUNT(*)
                    THEN "sid inconsistence"
                ELSE "ok"
            END AS status                   
        FROM full_queries_set
        LEFT NATURAL JOIN single
        GROUP BY dbengine, dataset, query
    ''',
    
    # healt_bulk
    # Checks how many queries has been completed and how many timedout.
    '''
    CREATE VIEW health_bulk AS
        SELECT dbengine, dataset, query,
            COUNT(bulk.query) AS cnt,
            COUNT(DISTINCT int_order) AS sid_cnt,
            cnt AS expected_cnt,
            CASE      
                WHEN COUNT(DISTINCT int_order) < cnt
                    THEN "missing"
                WHEN COUNT(DISTINCT int_order) > cnt
                    THEN "wat!"
                WHEN COUNT(DISTINCT int_order) <>  COUNT(*)
                    THEN "sid inconsistence"
                ELSE "ok"
            END AS status                   
        FROM full_queries_set
        LEFT NATURAL JOIN bulk
        -- queries with cnt == 1 does not have bulk version
        WHERE cnt > 1 
        GROUP BY dbengine, dataset, query        
    ''',
]

assert len(views_names) is len(views)

with get_default_db() as c:
    stmt = 'DROP VIEW IF EXISTS %s'
    for v in views_names:
        c.execute(stmt % v)
    log("All views have been dropped")

    for v in views:
        c.execute(v)   
log("All views have been recreated")
2019-09-12T13:26:55.231511 - All views have been dropped
2019-09-12T13:26:55.351274 - All views have been recreated

Charts

Below all the plots of the experiments on running time comparison. For all the following charts, lower values are better.

In [10]:
import numpy as np

# Style from paper.
styles = [
    {'color':'blue',      'linestyle':':',  'marker': 'x'}, 
    {'color':'cyan',      'linestyle':':',  'marker': 'o'},
    {'color':'gold',      'linestyle':'-',  'marker': 'D'},
    {'color':'olive',     'linestyle':'-',  'marker': 'x'},
    {'color':'darkgreen', 'linestyle':':',  'marker': '+'},
    {'color':'black',     'linestyle':'-.', 'marker': '*'},
    {'color':'red',       'linestyle':'--', 'marker': '^'},
    {'color':'grey',      'linestyle':':',  'marker': 'v'},
    {'color':'green',     'linestyle':'-',  'marker': 's'},
]
markers = {'markersize': 8, 'fillstyle': 'none'}

# Make it readable
plt.rcParams.update({'figure.figsize': (20,10), 'font.size': 20})


def ds_short(ds_name):
    if ds_name[:len('freebase')] == 'freebase':
        return 'FBR-' + ds_name.split('_')[1][0].upper()
    return ' '.join(ds_name.upper().split())    


re_qpff = re.compile('[^a-zA-Z0-9]')
def q_prettify(q):
    return re_qpff.sub(' ', q)


def get_queries(not_cls=list()):
    """ Queries with at least one experiment,
        in selcted classes (not_cls).
    """
    with get_default_db() as c:
        q = '''
        SELECT DISTINCT cls, s.query, paper_id || ' ' || s.query
        FROM single s
        JOIN queries as q ON q.query = s.query AND cls NOT IN ({})
        ORDER BY qord
        '''
        qf = q.format(','.join(map(lambda x: "'" + x + "'", not_cls)))
        queries = [(r[0], r[1]) for r in c.execute(qf)]
        queries_labels = [q_prettify(r[2].strip()) for r in c.execute(qf)]
        in_queries = ','.join(map(lambda x: "'" + x[1] + "'", queries))
        return queries, queries_labels, in_queries

        
# Datasets with at least one experiment
with get_default_db() as c:
    q= '''
    SELECT DISTINCT dataset
    FROM single
    NATURAL JOIN datasets
    ORDER BY dord
    '''
    datasets = [r[0] for r in c.execute(q)]
in_datasets = ','.join(map(lambda x: "'" + x + "'", datasets))

Microbenchmark Queries

The plots shows for the mean execution time for each query, where timeouts are counted as the timeout it self, 2h.

In [11]:
# Plots all queries excpet these with specialized plots
q = '''
    SELECT dbalias, dataset, ROUND(exec_time, 2)        
    FROM single_means_wtimeout as t
    JOIN queries as q ON q.query = t.query
    NATURAL JOIN dborder
    NATURAL JOIN datasets AS ds
    WHERE q.query='{}' AND dataset IN ({}) 
        -- Exclude classes with specialized plots
        AND q.cls NOT IN ('B', 'S', 'I')
    ORDER BY ord, dord
'''

queries, queries_labels, in_queries = get_queries(['B', 'S', 'I'])
    
xlabels = list(map(ds_short, datasets))
current_cls = ''
with get_default_db() as c:
    for i, x in enumerate(queries):
        cls, query = x
        if cls != current_cls:
            current_cls = cls
            display(HTML('<h1 id="current_cls">{}</h1>'.format(current_cls)))
            
        plt.title('#'+queries_labels[i].upper())
        plt.xlabel('Dataset')
        plt.ylabel('Time (ms)')
        plt.yscale('log')
        plt.grid(True)
    
        rs = [r for r in c.execute(q.format(query, in_datasets))]
        for i, r in enumerate(gby(rs).items()):
            db, items = r
            ts  = [x[2] for x in items]
            plt.plot(ts, label=db, **styles[i%len(styles)], **markers)

        plt.xticks(np.arange(len(xlabels)), xlabels, rotation=90)
        plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
        plt.show()

L

C