import os
import pandas as pd
import numpy as np
import re
import IPython.display as ipd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
Load first dataset with scores:
df_ratings = pd.read_csv('voip-ratings.csv', index_col='Distorted')
# df_ratings.describe()
Calculate Tx (outgoing) and Rx (Incoming) bitrates for both caller and callee:
df_ratings['duration'] = df_ratings['Sample'].str.extract('sample0?(\d+)', expand=False).astype(int)
bitrate_cols = []
for role in ['Caller', 'Callee']:
for xx in ['Rx', 'Tx']:
df_ratings[xx + 'Bitrate' + role] = df_ratings[xx + 'Bytes' + role] / df_ratings['duration'] / 1024.0 * 8
bitrate_cols.append(xx + 'Bitrate' + role)
# df_ratings[bitrate_cols].describe()
Fill empty scores with 1.0, replace incorrect scores with lower/upper bound.
Final score (ScoreFinal
column) is a weighted sum of different rater scores from first contest stage.
ScoreColumns = ['ScoreCombined','ScoreOutput','Score1010','Score1012','Score1002','Score1007','Score997']
for col in ScoreColumns:
if df_ratings.dtypes[col] != 'float64':
df_ratings[col] = df_ratings[col].astype('object').str.replace('^.*ERROR:.*$', '1')
df_ratings[col] = df_ratings[col].astype('float64')
df_ratings.loc[df_ratings[col] < 1.0, col] = 1.0
df_ratings.loc[df_ratings[col] > 5.0, col] = 5.0
df_ratings[col].fillna(1.0, inplace=True)
df_ratings['ScoreOutput'] = df_ratings[['Score997','ScoreOutput']].max(axis=1)
df_ratings['ScoreFail'] = (df_ratings['ScoreCombined'] <= 1.0) * 1.0;
df_ratings['ScoreFinal'] = df_ratings['ScoreCombined'] * 0.3 + df_ratings['ScoreOutput'] * 0.2 + df_ratings['Score1010'] * 0.16 + df_ratings['Score1012'] * 0.16 + df_ratings['Score1007'] * 0.16
ScoreColumns.append('ScoreFinal')
df_empty = df_ratings.loc[(df_ratings['ScoreFail'] > 0)]
#df_noempty = df_ratings.loc[(df_ratings['ScoreFail'] <= 0.0)]
df_noempty = df_ratings.copy()
df_noempty.describe()
f, axs = plt.subplots(3, 2, figsize=(25, 35))
def heatmapOnAx(score_col, ax):
entries_net_ratings = df_noempty.groupby(['Network', 'Entry'])[score_col].mean().unstack()
entries_net_ratings.loc['Overall'] = entries_net_ratings.mean()
entries_net_ratings = entries_net_ratings.transpose()
entries_net_ratings = entries_net_ratings.sort_values('Overall', ascending=False);
entries_net_ratings.loc['Overall'] = entries_net_ratings.mean()
networkorder = entries_net_ratings.loc['Overall'].sort_values(ascending=False).index.tolist()
entries_net_ratings = entries_net_ratings[networkorder]
RateRatingOrder = networkorder;
cmap = sns.diverging_palette(10, 133, as_cmap=True)
ax = sns.heatmap(entries_net_ratings, cmap=cmap, square=True, annot=True, fmt='.3g', linewidths=.5, ax=ax)
bottom, top = ax.get_ylim()
ax.set_ylim(bottom + 0.5, top - 0.5)
ax.set_yticklabels(ax.get_yticklabels(), rotation = 0, fontsize = 8)
ax.set_xticklabels(ax.get_xticklabels(), rotation = 0, fontsize = 8)
ax.set_title('Average ratings by ' + score_col)
return networkorder
for i, score_col in enumerate(ScoreColumns):
if i < 6:
ax = axs[i // 2][i % 2]
heatmapOnAx(score_col, ax)
f, ax = plt.subplots(figsize=(15, 15))
RateRatingOrder = heatmapOnAx('ScoreFinal', ax)
You can listen to audio files: choose 2 entries to compare for some network conditions and launch this cell.
First you will need to download audios from https://data-static.usercontent.dev/VoIP-Stage2-Audios.tar.gz (2.7GB) and extract into audios folder.
if os.path.isdir("./audios"):
entry1 = 'stable'
entry2 = 'unstable'
net = '3G2'
scoreType = 'Score1007'
number = 3
print(scoreType, 'of', entry1, 'vs', entry2, 'over', net)
print('==========================')
df_check_audios = df_ratings[(df_ratings['Network'] == net) & ((df_ratings['Entry'] == entry1) | (df_ratings['Entry'] == entry2))];
df_groups = df_check_audios.groupby(['Sample', 'Entry'], as_index=True)
df_group_keys = df_groups.groups.keys()
number *= 2
for k in df_group_keys:
number = number - 1
if number <= 0:
break
df_group = df_groups.get_group(k)
df_group = df_group.sort_values(scoreType)
row = df_group.iloc[len(df_group)//2]
filename_pcm = row.name
file_name_wav = re.sub(r"\.pcm$", ".wav", filename_pcm)
file_path_wav = 'audio5/' + file_name_wav
print(scoreType, "%0.2f" % row[scoreType], ', ScoreFinal', "%0.2f" % row['ScoreFinal'], file_name_wav)
ipd.display(ipd.Audio(file_path_wav))
Plot number of failed calls (score <= 1.0):
entries_net_fails = df_empty.groupby(['Network', 'Entry'])['ScoreFinal'].count().unstack().fillna(0)
entries_net_fails.loc['Overall'] = entries_net_fails.sum()
entries_net_fails = entries_net_fails.transpose()
entries_net_fails = entries_net_fails.sort_values('Overall', ascending=False)
entries_net_fails.loc['Overall'] = entries_net_fails.sum()
networkorder = entries_net_fails.loc['Overall'].sort_values(ascending=False).index.tolist()
entries_net_fails = entries_net_fails[networkorder]
f, ax = plt.subplots(figsize=(15, 11))
cmap = sns.diverging_palette(133, 10, as_cmap=True)
ax = sns.heatmap(entries_net_fails, cmap=cmap, square=True, vmax=50, annot=True, fmt='.3g', linewidths=.5)
bottom, top = ax.get_ylim()
ax.set_ylim(bottom + 0.5, top - 0.5)
ax.set_yticklabels(ax.get_yticklabels(), rotation = 0, fontsize = 8)
ax.set_xticklabels(ax.get_xticklabels(), rotation = 0, fontsize = 8)
_ = ax.set_title('Failed calls')
Plot bitrates:
f, axs = plt.subplots(2, 2, figsize=(20, 20))
plt.tight_layout()
for i, xx in enumerate(['Tx', 'Rx']):
for j, role in enumerate(['Caller', 'Callee']):
entries_net_bitrate = df_noempty.groupby(['Network', 'Entry'])[xx + 'Bitrate' + role].mean().unstack()
entries_net_bitrate.loc['Overall'] = entries_net_bitrate.mean()
entries_net_bitrate = entries_net_bitrate.transpose()
entries_net_bitrate = entries_net_bitrate.sort_values('Overall', ascending=True)
entries_net_bitrate.loc['Overall'] = entries_net_bitrate.mean()
entries_net_bitrate = entries_net_bitrate[RateRatingOrder]
cmap = sns.diverging_palette(10, 133, as_cmap=True)
ax = axs[i][j]
ax.set_title(role + ' ' + xx + ' Bitrate, KBit/sec')
ax = sns.heatmap(entries_net_bitrate, cmap=cmap, square=True, annot=True, fmt='.3g', vmax=80, linewidths=.5, ax=ax)
bottom, top = ax.get_ylim()
ax.set_ylim(bottom + 0.5, top - 0.5)
ax.set_yticklabels(ax.get_yticklabels(), rotation = 0, fontsize = 8)
ax.set_xticklabels(ax.get_xticklabels(), rotation = 0, fontsize = 8)
Load separate dataset with timings. This dataset is smaller than the first one and was collected on one host for both caller and callee to minimize time difference.
df_latency = pd.read_csv('voip-latency.csv')
# Exclude 1281, as it used custom tgvoipcall binary, without timestamps
df_latency = df_latency.loc[(df_latency['ScoreCombined'] > 1.0) & (df_latency['Entry'] != 'entry1281')]
df_latency['TimeInit'] = df_latency[['TimeInitCaller', 'TimeInitCallee']].max(axis=1)
df_latency['TimeConnect'] = df_latency[['TimeFirstWriteCaller', 'TimeFirstReadCaller', 'TimeFirstWriteCallee', 'TimeFirstReadCallee']].min(axis=1)
df_latency['TimeEndCaller'] = df_latency[['TimeLastWriteCaller', 'TimeLastReadCaller']].max(axis=1)
df_latency['TimeEndCallee'] = df_latency[['TimeLastWriteCallee', 'TimeLastReadCallee']].max(axis=1)
df_latency['CallerFirstLatency'] = (df_latency[['TimeFirstWriteCaller', 'TimeFirstReadCaller']].max(axis=1) - df_latency['TimeConnect']) / 1000000.0
df_latency['CalleeFirstLatency'] = (df_latency[['TimeFirstWriteCallee', 'TimeFirstReadCallee']].max(axis=1) - df_latency['TimeConnect']) / 1000000.0
df_latency['CallerDuration'] = (df_latency['TimeEndCaller'] - df_latency['TimeConnect']) / 1000000.0
df_latency['CalleeDuration'] = (df_latency['TimeEndCallee'] - df_latency['TimeConnect']) / 1000000.0
df_latency['ConnectDuration'] = (df_latency['TimeConnect'] - df_latency['TimeInit']) / 1000000.0
LatencyColumns = ['CallerFirstLatency', 'CalleeFirstLatency', 'CallerDuration', 'CalleeDuration', 'ConnectDuration']
df_latency.set_index('Distorted', inplace=True)
df_latency.describe()
f, axs = plt.subplots(2, 2, figsize=(25, 25))
def heatmapLatencyOnAx(score_col, ax):
entries_net_ratings = df_latency.groupby(['Network', 'Entry'])[score_col].mean().unstack()
entries_net_ratings.loc['Overall'] = entries_net_ratings.mean()
entries_net_ratings = entries_net_ratings.transpose()
entries_net_ratings = entries_net_ratings.sort_values('Overall', ascending=False);
entries_net_ratings.loc['Overall'] = entries_net_ratings.mean()
networkorder = entries_net_ratings.loc['Overall'].sort_values(ascending=False).index.tolist()
entries_net_ratings = entries_net_ratings[networkorder]
RateRatingOrder = networkorder;
# cmap = sns.diverging_palette(220, 10, as_cmap=True)
cmap = sns.diverging_palette(133, 10, as_cmap=True)
vmax = None
if score_col.endswith('FirstLatency'):
vmax = 0.5
elif score_col == 'ConnectDuration':
vmax = 1
ax = sns.heatmap(entries_net_ratings, cmap=cmap, square=True, annot=True, fmt='.3g', linewidths=.5, ax=ax, vmax=vmax)
bottom, top = ax.get_ylim()
ax.set_ylim(bottom + 0.5, top - 0.5)
ax.set_yticklabels(ax.get_yticklabels(), rotation = 0, fontsize = 8)
ax.set_xticklabels(ax.get_xticklabels(), rotation = 0, fontsize = 8)
ax.set_title(score_col)
return networkorder
for i, score_col in enumerate(LatencyColumns):
if i < 4:
ax = axs[i // 2][i % 2]
heatmapLatencyOnAx(score_col, ax)
Time taken to establish connection (from tgvoipcall init until first byte read/sent):
f, ax = plt.subplots(figsize=(15, 15))
c = heatmapLatencyOnAx('ConnectDuration', ax)