python - matplotlib very slow in plotting -
i have multiple functions in input array or dict path argument, , function save figure path of particular path.
trying keep example minimal possible, here 2 functions:
def valuechartpatterns(dict,path): seen_values = counter() data in dict.itervalues(): seen_values += counter(data.values()) seen_values = seen_values.most_common() seen_values_pct = map(itemgetter(1), tuplecounts2percents(seen_values)) seen_values_pct = ['{:.2%}'.format(item)for item in seen_values_pct] plt.figure() numberchart = plt.bar(range(len(seen_values)), map(itemgetter(1), seen_values), width=0.9,align='center') plt.xticks(range(len(seen_values)), map(itemgetter(0), seen_values)) plt.title('values in pattern dataset') plt.xlabel('values in data') plt.ylabel('occurrences') plt.tick_params(axis='both', which='major', labelsize=6) plt.tick_params(axis='both', which='minor', labelsize=6) plt.tight_layout() plt.savefig(path) plt.clf() def countrychartpatterns(dict,path): seen_countries = counter() data in dict.itervalues(): seen_countries += counter(data.keys()) seen_countries = seen_countries.most_common() seen_countries_percentage = map(itemgetter(1), tuplecounts2percents(seen_countries)) seen_countries_percentage = ['{:.2%}'.format(item)for item in seen_countries_percentage] yvals = map(itemgetter(1), seen_countries) xvals = map(itemgetter(0), seen_countries) plt.figure() countrychart = plt.bar(range(len(seen_countries)), yvals, width=0.9,align='center') plt.xticks(range(len(seen_countries)), xvals) plt.title('countries in pattern dataset') plt.xlabel('countries in data') plt.ylabel('occurrences') plt.tick_params(axis='both', which='major', labelsize=6) plt.tick_params(axis='both', which='minor', labelsize=6) plt.tight_layout() plt.savefig(path) plt.clf()
a minimal example dict is, actual dict contains 56000 values:
dict = {"a": {"germany": 20006.0, "united kingdom": 20016.571428571428}, "b": {"chad": 13000.0, "south africa": 3000000.0},"c":{"chad": 200061.0, "south africa": 3000000.0} }
and in script, call:
if __name__ == "__main__": plt.close('all') print "starting pattern charting...\n" countrychartpatterns(dict,'newpatterncountries.png')) valuechartpatterns(dict,'newpatternvalues.png'))
note, load import matplotlib.pyplot plt
.
when running script in pycharm, starting pattern charting...
in console functions take super long plot.
what doing wrong? should using histogram instead of bar plot should achieve same aim of giving number of occurrences of countries/values? can change gui backend somehow? advice welcome.
this test mentioned in comments above, resulting in:
elapsed pre-processing = 13.79 s elapsed plotting = 0.17 s pre-processing / plotting = 83.3654562565
test script:
import matplotlib.pylab plt collections import counter operator import itemgetter import time def countrychartpatterns(dict,path): # pre-processing ------------------- t0 = time.time() seen_countries = counter() data in dict.itervalues(): seen_countries += counter(data.keys()) seen_countries = seen_countries.most_common() yvals = map(itemgetter(1), seen_countries) xvals = map(itemgetter(0), seen_countries) dt1 = time.time() - t0 print("elapsed pre-processing = {0:.2f} s".format(dt1)) t0 = time.time() # plotting ------------------- plt.figure() countrychart = plt.bar(range(len(seen_countries)), yvals, width=0.9,align='center') plt.xticks(range(len(seen_countries)), xvals) plt.title('countries in pattern dataset') plt.xlabel('countries in data') plt.ylabel('occurrences') plt.tick_params(axis='both', which='major', labelsize=6) plt.tick_params(axis='both', which='minor', labelsize=6) plt.tight_layout() plt.savefig(path) plt.clf() dt2 = time.time() - t0 print("elapsed plotting = {0:.2f} s".format(dt2)) print("pre-processing / plotting = {}".format(dt1/dt2)) if __name__ == "__main__": import random rd import numpy np countries = ["united states of america", "afghanistan", "albania", "algeria", "andorra", "angola", "antigua & deps", "argentina", "armenia", "australia", "austria", "azerbaijan"] def item(): return {rd.choice(countries): np.random.randint(1e3), rd.choice(countries): np.random.randint(1e3)} dict = {} in range(1000000): dict[i] = item() print("starting pattern charting...") countrychartpatterns(dict,'newpatterncountries.png')
Comments
Post a Comment