In [62]:
%matplotlib inline
import flowtools
from collections import defaultdict
import glob
from datetime import datetime
In [63]:
files = glob.glob("hpcf-flows/2011-10-26/ft-*")
files[:10]
Out[63]:
['hpcf-flows/2011-10-26/ft-v05.2011-10-26.001000-0400', 'hpcf-flows/2011-10-26/ft-v05.2011-10-26.002000-0400', 'hpcf-flows/2011-10-26/ft-v05.2011-10-26.003000-0400', 'hpcf-flows/2011-10-26/ft-v05.2011-10-26.004500-0400', 'hpcf-flows/2011-10-26/ft-v05.2011-10-26.005500-0400', 'hpcf-flows/2011-10-26/ft-v05.2011-10-26.010000-0400', 'hpcf-flows/2011-10-26/ft-v05.2011-10-26.011500-0400', 'hpcf-flows/2011-10-26/ft-v05.2011-10-26.012000-0400', 'hpcf-flows/2011-10-26/ft-v05.2011-10-26.013500-0400', 'hpcf-flows/2011-10-26/ft-v05.2011-10-26.015000-0400']
In [64]:
#flowset = (flowfile for flowfile in files[0])
In [65]:
total = defaultdict(int)
hourly = []
for i in range(24):
hourly.append(defaultdict(int))
count = 0
for flowfile in files:
for flow in flowtools.FlowSet(flowfile):
srcIP = flow.srcaddr
dstIP = flow.dstaddr
timestamp = datetime.fromtimestamp(flow.unix_secs)
octets = flow.dOctets
hourly [timestamp.hour][(srcIP, dstIP)] += octets
total[(srcIP, dstIP)] += octets
#count += 1
#if count > 1000:
# break
In [66]:
toplot = []
pairs = [(bytes, pair) for (pair, bytes) in total.items()]
pairs.sort(reverse=True)
flows = []
data = np.zeros((10,24))
count = 0
for bytes, pair in pairs[:10]:
flows.append( pair)
for i in range(len(hourly)):
data[count,i] = hourly[i][pair]
count += 1
In [67]:
data.shape
Out[67]:
(10, 24)
In [68]:
ind = np.arange(24)
bottom = np.cumsum(data, axis=0)
d_colors = ['#2166ac',
'#fee090',
'#fdbb84',
'#fc8d59',
'#e34a33',
'#b30000',
'#777777','#ff3333', '#33ff33', '#3333ff', '#33ffff']
plt.bar(ind, data[0], color=d_colors[0])
for j in xrange(1, data.shape[0]):
plt.bar(ind, data[j], bottom=bottom[j-1], color=d_colors[j])
plt.legend( flows ,loc=(1.1,0.1))
Out[68]:
<matplotlib.legend.Legend at 0x1052f4050>
In [68]:
In [68]: