from __future__ import division, print_function # Makes division and printing work like python 3 (we're using 2) import os import numpy as np import pandas as pd from matplotlib import pyplot as plt import seaborn as sns from scipy.spatial import distance from tabulate import tabulate import csv %matplotlib inline #Please note that we have changed excel file names to better put the files onto the wiki, but the data is intact. data_2013 = os.path.join(os.getcwd(), 'datasets', '2013N.csv') data2013 = pd.read_csv(data_2013) #data2013.head(3) data_2014 = os.path.join(os.getcwd(), 'datasets', '2014N.csv') data2014 = pd.read_csv(data_2014) #data2014.head(3) data_2015 = os.path.join(os.getcwd(), 'datasets', '2015N.csv') data2015 = pd.read_csv(data_2015) #data2015.head(3) coor = os.path.join(os.getcwd(), 'datasets', 'coordinates.csv') coordinates = pd.read_csv(coor) #coordinates.head(3) #coordinates[0:1][['x', 'y', 'z']].values distance_matrix = distance.cdist(coordinates[0:46][['x', 'y', 'z']].values, coordinates[0:46][['x', 'y', 'z']].values, 'euclidean') p_2013 = data2013.values[:, 4:] p_2014 = data2014.values[:, 4:] p_2015 = data2015.values[:, 4:] print(p_2013.shape) print(p_2014.shape) print(p_2015.shape) for i in range(104): for j in range(44): p_2013[i][j+1] /= float(p_2013[i][0]) for i in range(113): for j in range(44): p_2014[i][j+1] /= float(p_2014[i][0]) for i in range(153): for j in range(44): p_2015[i][j+1] /= float(p_2015[i][0]) diversity_2013 = [] for i in range(104): diversity = 0 for j in range(1, 44): if p_2013[i][j] > 0: base = p_2013[i][j] for k in range(j+1, 45): if p_2013[i][k] > 0: diversity += distance_matrix[j][k] * p_2013[i][j] * p_2013[i][k] diversity_2013.append(diversity) #print(len(diversity_2013)) sns.set(style="whitegrid") f, ax = plt.subplots(figsize=(12, 12)) # Plot the orbital period with horizontal boxes sns.boxplot(x=data2013.Medal, y=diversity_2013,whis=np.inf, color="powderblue") # Add in points to show each observation sns.swarmplot(x=data2013.Medal, y=diversity_2013,size=4, color=".3", linewidth=0) sns.despine(trim=True, left=True) plt.xticks(range(4),["Gold", "Silver", "Bronze","None"]) plt.title("Teams of 2013", size=20) plt.ylabel("Diversity", size=20) plt.xlabel("Medal", size=20) plt.tick_params(labelsize=18) plt.savefig("2013 diversity box plot.png", dpi=300) plt.show() diversity_2014 = [] for i in range(113): diversity = 0 for j in range(1, 44): if p_2014[i][j] > 0: base = p_2014[i][j] for k in range(j+1, 45): if p_2014[i][k] > 0: diversity += distance_matrix[j][k] * p_2014[i][j] * p_2014[i][k] diversity_2014.append(diversity) #print(diversity_2014) sns.set(style="whitegrid") f, ax = plt.subplots(figsize=(12, 12)) # Plot the orbital period with horizontal boxes sns.boxplot(x=data2014.Medal, y=diversity_2014,whis=np.inf, color="powderblue") # Add in points to show each observation sns.swarmplot(x=data2014.Medal, y=diversity_2014,size=4, color=".3", linewidth=0) sns.despine(trim=True, left=True) plt.xticks(range(4),["Gold", "Silver", "Bronze","None"]) plt.title("Teams of 2014", size=20) plt.ylabel("Diversity", size=20) plt.xlabel("Medal", size=20) plt.tick_params(labelsize=18) plt.savefig("2014 diversity box plot.png", dpi=300) plt.show() diversity_2015 = [] for i in range(153): diversity = 0 for j in range(1, 44): if p_2015[i][j] > 0: base = p_2015[i][j] for k in range(j+1, 45): if p_2015[i][k] > 0: diversity += distance_matrix[j][k] * p_2015[i][j] * p_2015[i][k] diversity_2015.append(diversity) #print(diversity_2015) sns.set(style="whitegrid") f, ax = plt.subplots(figsize=(12, 12)) # Plot the orbital period with horizontal boxes sns.boxplot(x=data2015.Medal, y=diversity_2015,whis=np.inf, color="powderblue") # Add in points to show each observation sns.swarmplot(x=data2015.Medal, y=diversity_2015,size=4, color=".3", linewidth=0) sns.despine(trim=True, left=True) plt.xticks(range(4),["Gold", "Silver", "Bronze","None"]) plt.title("Teams of 2015", size=20) plt.ylabel("Diversity", size=20) plt.xlabel("Medal", size=20) plt.tick_params(labelsize=18) plt.savefig("2015 diversity box plot.png", dpi=300) plt.show() sns.set(style="whitegrid") f, ax = plt.subplots(figsize=(40, 10)) # Plot the orbital period with horizontal boxes sns.boxplot(x=data2014.Track, y=diversity_2014,whis=np.inf, palette="Set3", width=0.3) # Add in points to show each observation sns.swarmplot(x=data2014.Track, y=diversity_2014,size=4, color=".3", linewidth=0) sns.despine(trim=True, left=True) # plt.xticks(range(4),["Gold", "Silver", "Bronze","None"]) plt.title("Teams of 2014", size=20) plt.ylabel("Diversity", size=20) plt.xlabel("Track", size=20) plt.tick_params(labelsize=18) #plt.savefig("2014 diversity diversity box plot.png", dpi=300) #plt.xticks([]) plt.legend(loc='upper left') plt.tick_params(axis='x', length=0.1, width=0.1) plt.show() # The labels argument in ax.boxplot() sets the xticklabels of the axis, not the labels of the boxes objects (which would be used in e.g. ax.legend()). # So instead, you can access the labels you want from ax.get_xticklabels. The relevant snippet to replace in your code is: # for b in boxplot_dict['boxes']: # lab = ax.get_xticklabels()[i].get_text() # print("Label property of box {0} is {1}".format(i, lab)) # b.set_facecolor(color_dict[lab]) # i += 1 # The full code would be: # from matplotlib import pyplot as plt # import numpy as np # data = {} # data['a'] = np.arange(12)+1 # data['b'] = np.arange(14)+1 # data['c'] = np.arange(8)+1 # color_dict = {'trt_a':'orange', 'trt_b':'blue', 'trt_c':'green'} # controls = ['trt_a', 'trt_b', 'trt_c'] # fig, ax = plt.subplots() # boxplot_dict = ax.boxplot( # [data[x] for x in ['a', 'b', 'c']], # positions = [1, 1.5, 2], # labels = controls, # patch_artist = True, # widths = 0.25) # i=0 # for b in boxplot_dict['boxes']: # lab = ax.get_xticklabels()[i].get_text() # print("Label property of box {0} is {1}".format(i, lab)) # b.set_facecolor(color_dict[lab]) # i += 1 # ax.set_ylim([0,16]) # plt.show() # color_dict = {'Measurement':'orange', 'Health & Medicine':'blue', # 'Information Processing':'black', # 'Foundational Advantage':'green', 'Software':'skyblue', # 'Energy':'grey', 'Art & Design Track': 'white', # 'New Application':'grey', 'Environment':'white', # 'Manufacturing':'grey', 'Policy & Practices': 'grey', # 'Food & Nutrition':'grey', 'Microfluidics':'grey'} # controls = ['Measurement', 'Health & Medicine', 'Information Processing', 'Foundational Advantage', # 'Software', 'Energy', 'Art & Design Track', 'New Application', 'Environment', # 'Manufacturing', 'Policy & Practices', 'Food & Nutrition', 'Microfluidics'] # boxplot_dict = ax.boxplot( # [data[x] for x in ['a', 'b', 'c']], # positions = [1, 1.5, 2], # labels = controls, # # patch_artist = True, # Legend isn't working with patch objects.. # widths = 0.25) # i=0 # for b in boxplot_dict['boxes']: # lab = ax.get_xticklabels()[i].get_text() # print("Label property of box {0} is {1}".format(i, lab)) # b.set_color(color_dict[lab]) # b.set_label(lab) # i += 1 # ax.set_ylim([0,16]) # ax.legend() # plt.show()