Lico Stats¶

This document show the preparation/exploration phase before of the bubble chart representation shown on the main page. The work was done in Python with help of jupyter.

import pandas as pd
import numpy as np
import seaborn
import matplotlib.pyplot as plt
import json

%matplotlib inline
%pylab inline

Populating the interactive namespace from numpy and matplotlib

csv_path = "./va-project/data/lico.csv"

Data Preparation¶

# Read dataset from csv
df = pd.read_csv(csv_path, skipinitialspace=True, delimiter=";")

# Deleting attributes not useful
df = df.drop("diskSpace",1)
df = df.drop("swap",1)
df = df.drop("memory",1)
df = df.drop("distroVersion",1)
df = df.drop("userId",1)
df = df.drop("kernel",1)
df = df.drop("cpu",1)
df = df.drop("country",1)

# Substituting null values and /N with "unknown"
df = df.replace("\\N","unknown")
df = df.fillna("unknown")

df = df[df["numCores"] != "0"]

for a in df:
	df = df[df[a] != "unknown"]

Simple data exploration¶

df.sample(10)

Distribution¶

First 30 distro ordered by number of machines

pylab.rcParams['figure.figsize'] = (15, 6) #Image size
ax = df["distribution"].value_counts().head(30).plot(fontsize=15, kind='bar', color='#7CB518')
ax.set_ylabel("Machines",fontsize=15)

<matplotlib.text.Text at 0x7f5341e8a550>

Class¶

pylab.rcParams['figure.figsize'] = (15, 6) #Image size
ax = df["class"].value_counts().plot(fontsize= 15, kind='bar', color='#F7B32B')
ax.set_ylabel("Machines",fontsize=15)

<matplotlib.text.Text at 0x7f5341f02860>

Architecture¶

pylab.rcParams['figure.figsize'] = (15, 6) #Image size
ax = df["architecture"].value_counts().plot(fontsize=15,kind='bar', color='#5BC0EB')
ax.set_ylabel("Machines",fontsize=15)

<matplotlib.text.Text at 0x7f53429ae780>

Cores¶

pylab.rcParams['figure.figsize'] = (15, 6) #Image size
ax = df["numCores"].value_counts().plot(fontsize=15, kind='bar', color='#EF5B5B')
ax.set_ylabel("Machines",fontsize=15)

<matplotlib.text.Text at 0x7f53429487b8>

CSV to Hierarchical JSON¶

The CSV file is converted to JSON format in order to follow the hierarchy Distribution->Class->Architecture->Cores

def hierarchical_json(df):
	order = ["distribution","class","architecture","numCores"]
	dist_attr = df["distribution"].unique()
	class_attr = df["class"].unique()
	arch_attr = df["architecture"].unique()
	cores_attr = df["numCores"].unique()
	
	data_dict = {}
	data_dict["name"] = "machine"
	data_dict["size"] = df.shape[0]
	data_dict["children"] = []
	data_dict["depth"] = 0
	
	for dist in dist_attr:
		dist_dict = {}
		dist_dict["name"] = dist
		dist_dict["children"] = []
		data_dict["children"].append(dist_dict)
		
		dist_df = df[df["distribution"] == dist]
		dist_dict["size"] = dist_df.shape[0]
		dist_dict["depth"] = 1
		class_attr = dist_df["class"].unique()
		for class_ in class_attr:
			class_dict = {}
			class_dict["name"] = class_
			class_dict["children"] = []
			dist_dict["children"].append(class_dict)
			
			class_df = dist_df[dist_df["class"] == class_]
			class_dict["size"] = class_df.shape[0]
			class_dict["depth"] = 2
			arch_attr = class_df["architecture"].unique()
			for arch in arch_attr:
				arch_dict = {}
				arch_dict["name"] = arch
				arch_dict["children"] = []
				class_dict["children"].append(arch_dict)

				arch_df = class_df[class_df["architecture"] == arch]
				arch_dict["size"] = arch_df.shape[0]
				arch_dict["depth"] = 3
				core_attr = arch_df["numCores"].unique()
				for core in core_attr:
					core_dict = {}
					core_dict["name"] = core
					core_df = arch_df[arch_df["numCores"] == core]
					core_dict["size"] = core_df.shape[0]
					core_dict["depth"] = 4

					if(core_df.shape[0] != 0):
						arch_dict["children"].append(core_dict)

	return data_dict

data_out = hierarchical_json(df)

with open('data.json', 'w') as outfile:
	json.dump(data_out, outfile, ensure_ascii=False)

Back to main page.

	numCores	class	architecture	distribution
5116	4	other	x86_64	Slackware
9607	2	smartphone	arm	Android
7684	1	server	i686	ClearOS
7289	2	server	i686	Debian
13697	4	workstation	x86_64	Ubuntu
4221	2	laptop	amd64	Debian
4490	8	laptop	x86_64	Ubuntu
12300	1	workstation	i686	Debian
4063	2	laptop	x86_64	Mageia
15275	2	workstation	x86_64	Kubuntu