import os, sys, os.path, shutil
import matplotlib as mpl
import xlrd, openpyxl
import hashlib
import pweave
mode_serv = False
for param in sys.argv[1:]:
if param[:len("serv")]=="serv" and param[len("serv")] == "=":
mode_serv = param[len("serv")+1:] in "1 y Y yes Yes YES"
#if not mode_serv:
# mpl.use('TkAgg')
#else:
# mpl.use('Agg')
if mode_serv:
mpl.use('Agg')
try:
import appnope
appnope.nope() # stop the apple power nap
except ImportError:
pass
import pandas
from math import pi
import matplotlib.pyplot as plt
import matplotlib.colors as colors
import matplotlib.patches as mpatches
from matplotlib.collections import PatchCollection
import numpy as np
from numpy.random import rand
from pylab import pcolor, show, colorbar, xticks, yticks, pcolormesh, imshow
from random import gauss
from scipy.interpolate import interp2d
import threading, multiprocessing, time
from decimal import Decimal
from scipy.optimize import curve_fit
mpl.rc('text', usetex=True)
#plt.rc('text', usetex=True)
mpl.rc('text.latex',preamble=r"\usepackage{amsmath} \usepackage{graphicx} \usepackage{nicefrac} \usepackage{xcolor}")
import scipy as sp
import subprocess
import datetime
from matplotlib.ticker import PercentFormatter
Excel_wb=pandas.ExcelFile("data_publication.xlsx")
Data=pandas.read_excel(Excel_wb,"Data")
country=pandas.read_excel(Excel_wb,"Metadata - Countries")
population=pandas.read_excel(Excel_wb,"population",header=3)
population=population[2:]
print(Data)
print(len(Data))
print(population[2:])
print(pandas.merge(population,Data,how="left",on="Country")) # if a value doesnt exit, it places a Nan
Merged_file=pandas.merge(population,Data,how="left",on="Country")
Merged_file["Code"].astype("category")
Merged_file["Region"].astype("string")
# Merged_file["Region"].astype("category")
print(Merged_file["Region"])
Merged_file["Level"].astype("category")
ax=Merged_file.plot(x="Level", y="publication 2018",marker=".",linestyle="")
# plt.xscale("log")
plt.yscale("log")
Table2=Merged_file.groupby("Region").aggregate(func=sum).reset_index()
print(Table2)
print(np.divide(Table2["publication 2018"].values,min(Table2["publication 2018"].values)))
si=np.divide(Table2["publication 2018"].values,min(Table2["publication 2018"].values))
Table2["Region"].astype("category")
Table2.plot.scatter(x="Region", y="2018", s=si*10)
print(Merged_file.aggregate(func=sum)["publication 2000":"publication 2018"].reset_index().rename(columns={0:"A"})) #omg ça marche
print(Merged_file.aggregate(func=sum)["publication 2000":"publication 2018"].reset_index().rename(columns={0:"A"}).dtypes) #omg ça marche
Table3=Merged_file.aggregate(func=sum)["publication 2000":"publication 2018"].reset_index().rename(columns={0:"A"})
Table4=Table3.astype({"A":"float"})
print(Table4.dtypes)
plt.figure(3)
Table4.boxplot(column="A")
#sort DataFrame by publication 2000 descending
df = Merged_file.sort_values(by='publication 2000', ascending=False).reset_index()
#add column to display cumulative percentage
df['cumperc2000'] = df['publication 2000'].cumsum()/df['publication 2000'].sum()*100
print(df["publication 2000"])
#define aesthetics for plot
color1 = 'steelblue'
color2 = 'red'
line_size = 4
#create basic bar plot
fig, ax = plt.subplots()
ax.bar(df.index, df['publication 2000'], color=color1)
#add cumulative percentage line to plot
ax2 = ax.twinx()
ax2.plot(df.index, df['cumperc2000'], color=color2, marker="D", ms=line_size)
ax2.yaxis.set_major_formatter(PercentFormatter())
#specify axis colors
ax.tick_params(axis='y', colors=color1)
ax2.tick_params(axis='y', colors=color2)
df = Merged_file.sort_values(by='publication 2018', ascending=False).reset_index()
#add column to display cumulative percentage
df['cumperc2018'] = df['publication 2018'].cumsum()/df['publication 2018'].sum()*100
print(df["publication 2018"])
#define aesthetics for plot
color1 = 'steelblue'
color2 = 'red'
line_size = 4
#create basic bar plot
fig, ax = plt.subplots()
ax.bar(df.index, df['publication 2018'], color=color1)
#add cumulative percentage line to plot
ax2 = ax.twinx()
ax2.plot(df.index, df['cumperc2018'], color=color2, marker="D", ms=line_size)
ax2.yaxis.set_major_formatter(PercentFormatter())
#specify axis colors
ax.tick_params(axis='y', colors=color1)
ax2.tick_params(axis='y', colors=color2)
#sort DataFrame by publication 2000 descending
df = Table2.sort_values(by='publication 2000', ascending=False).reset_index()
#add column to display cumulative percentage
df['cumperc2000'] = df['publication 2000'].cumsum()/df['publication 2000'].sum()*100
print(df["publication 2000"])
#define aesthetics for plot
color1 = 'steelblue'
color2 = 'red'
line_size = 4
#create basic bar plot
fig, ax = plt.subplots()
ax.bar(df.index, df['publication 2000'], color=color1)
#add cumulative percentage line to plot
ax2 = ax.twinx()
ax2.plot(df.index, df['cumperc2000'], color=color2, marker="D", ms=line_size)
ax2.yaxis.set_major_formatter(PercentFormatter())
#specify axis colors
ax.tick_params(axis='y', colors=color1)
ax2.tick_params(axis='y', colors=color2)
df = Table2.sort_values(by='publication 2018', ascending=False).reset_index()
#add column to display cumulative percentage
df['cumperc2018'] = df['publication 2018'].cumsum()/df['publication 2018'].sum()*100
print(df["publication 2018"])
#define aesthetics for plot
color1 = 'steelblue'
color2 = 'red'
line_size = 4
#create basic bar plot
fig, ax = plt.subplots()
ax.bar(df.index, df['publication 2018'], color=color1)
#add cumulative percentage line to plot
ax2 = ax.twinx()
ax2.plot(df.index, df['cumperc2018'], color=color2, marker="D", ms=line_size)
ax2.yaxis.set_major_formatter(PercentFormatter())
#specify axis colors
ax.tick_params(axis='y', colors=color1)
ax2.tick_params(axis='y', colors=color2)
plt.show()
Country Code Region \
0 Afghanistan AFG South Asia
1 Angola AGO Sub-Saharan Africa
2 Albania ALB Europe and Central Asia
3 Andorra AND Europe and Central Asia
4 United Arab Emirates ARE Middle East and North Africa
.. ... ... ...
191 Kosovo XKX Europe and Central Asia
192 Yemen, Rep. YEM Middle East and North Africa
193 South Africa ZAF Sub-Saharan Africa
194 Zambia ZMB Sub-Saharan Africa
195 Zimbabwe ZWE Sub-Saharan Africa
Income Level publication 2000 2001 2002
2003 \
0 Low income 1 4.00 1.00 4.50
8.34
1 Lower middle income 2 7.17 10.58 15.13
5.89
2 Upper middle income 3 22.34 18.71 24.53
23.82
3 High income 4 0.00 2.64 0.50
4.70
4 High income 4 330.44 380.58 399.54
539.87
.. ... ... ... ... ...
...
191 Upper middle income 3 0.00 0.00 0.00
0.00
192 Low income 1 22.83 42.49 27.13
33.84
193 Upper middle income 3 3897.55 4099.38 4413.16
4267.39
194 Lower middle income 2 33.60 51.96 61.54
47.78
195 Lower middle income 2 231.01 213.85 165.61
180.57
2004 ... 2011 2012 2013 2014 2015
2016 \
0 5.75 ... 39.62 34.15 27.81 34.69 22.20
81.64
1 9.23 ... 17.78 22.38 23.85 33.48 31.59
39.97
2 17.91 ... 146.82 165.75 162.65 180.39 177.80
185.87
3 1.31 ... 0.91 3.58 5.26 4.18 8.08
7.99
4 555.02 ... 1421.83 1625.96 1765.77 1816.50 2294.58
2484.16
.. ... ... ... ... ... ... ...
...
191 0.00 ... 106.73 113.40 123.34 122.23 136.84
187.91
192 41.15 ... 105.17 106.85 118.75 139.90 107.34
119.49
193 4831.02 ... 8293.80 9105.71 9709.05 10349.90 10964.38
12045.47
194 42.84 ... 101.03 106.12 119.47 134.46 157.23
181.66
195 138.17 ... 147.07 202.94 223.58 273.00 268.98
313.80
2017 publication 2018 pop2000 pop2018
0 91.89 111.72 20779957 37171922
1 55.50 30.12 16395477 30809787
2 149.54 180.36 3089027 2866376
3 2.01 3.60 65390 77008
4 2899.67 3144.89 3134067 9630966
.. ... ... ... ...
191 261.45 278.43 1700000 1797085
192 126.37 137.44 17409071 28498683
193 12846.08 13008.74 44967713 57792520
194 209.45 213.07 10415942 17351714
195 340.45 359.33 11881482 14438812
[196 rows x 26 columns]
196
Country Country Code Indicator Name Indicator
Code \
4 Angola AGO Population, total
SP.POP.TOTL
5 Albania ALB Population, total
SP.POP.TOTL
6 Andorra AND Population, total
SP.POP.TOTL
7 Arab World ARB Population, total
SP.POP.TOTL
8 United Arab Emirates ARE Population, total
SP.POP.TOTL
.. ... ... ...
...
261 Kosovo XKX Population, total
SP.POP.TOTL
262 Yemen, Rep. YEM Population, total
SP.POP.TOTL
263 South Africa ZAF Population, total
SP.POP.TOTL
264 Zambia ZMB Population, total
SP.POP.TOTL
265 Zimbabwe ZWE Population, total
SP.POP.TOTL
1960 1961 1962 1963 1964 \
4 5454938.0 5531451.0 5608499.0 5679409.0 5734995.0
5 1608800.0 1659800.0 1711319.0 1762621.0 1814135.0
6 13410.0 14378.0 15379.0 16407.0 17466.0
7 92197715.0 94724540.0 97334438.0 100034191.0 102832792.0
8 92417.0 100801.0 112112.0 125130.0 138049.0
.. ... ... ... ... ...
261 947000.0 966000.0 994000.0 1022000.0 1050000.0
262 5315351.0 5393034.0 5473671.0 5556767.0 5641598.0
263 17099836.0 17524533.0 17965733.0 18423157.0 18896303.0
264 3070780.0 3164330.0 3260645.0 3360099.0 3463211.0
265 3776679.0 3905038.0 4039209.0 4178726.0 4322854.0
1965 ... 2011 2012 2013
2014 \
4 5770573.0 ... 24220660.0 25107925.0 26015786.0
26941773.0
5 1864791.0 ... 2905195.0 2900401.0 2895092.0
2889104.0
6 18542.0 ... 83748.0 82427.0 80770.0
79213.0
7 105736428.0 ... 363156846.0 371437642.0 379696477.0
387899835.0
8 149855.0 ... 8946778.0 9141598.0 9197908.0
9214182.0
.. ... ... ... ... ...
...
261 1078000.0 ... 1791000.0 1807106.0 1818117.0
1812771.0
262 5727745.0 ... 23807586.0 24473176.0 25147112.0
25823488.0
263 19384838.0 ... 52003759.0 52832659.0 53687125.0
54544184.0
264 3570466.0 ... 14023199.0 14465148.0 14926551.0
15399793.0
265 4471178.0 ... 12894323.0 13115149.0 13350378.0
13586710.0
2015 2016 2017 2018 2019
\
4 27884380.0 28842482.0 29816769.0 30809787.0 31825299.0
5 2880703.0 2876101.0 2873457.0 2866376.0 2854191.0
6 77993.0 77295.0 76997.0 77008.0 77146.0
7 396028301.0 404042892.0 411942825.0 419851989.0 427870273.0
8 9262896.0 9360975.0 9487206.0 9630966.0 9770526.0
.. ... ... ... ... ...
261 1788196.0 1777557.0 1791003.0 1797085.0 1788878.0
262 26497881.0 27168210.0 27834811.0 28498683.0 29161922.0
263 55386369.0 56207649.0 57009751.0 57792520.0 58558267.0
264 15879370.0 16363449.0 16853608.0 17351714.0 17861034.0
265 13814642.0 14030338.0 14236599.0 14438812.0 14645473.0
2020
4 32866268.0
5 2837743.0
6 77265.0
7 436080728.0
8 9890400.0
.. ...
261 1775378.0
262 29825968.0
263 59308690.0
264 18383956.0
265 14862927.0
[262 rows x 65 columns]
Country Country Code Indicator Name \
0 Afghanistan AFG Population, total
1 Africa Western and Central AFW Population, total
2 Angola AGO Population, total
3 Albania ALB Population, total
4 Andorra AND Population, total
.. ... ... ...
259 Kosovo XKX Population, total
260 Yemen, Rep. YEM Population, total
261 South Africa ZAF Population, total
262 Zambia ZMB Population, total
263 Zimbabwe ZWE Population, total
Indicator Code 1960 1961 1962 1963
\
0 SP.POP.TOTL 8996967.0 9169406.0 9351442.0 9543200.0
1 SP.POP.TOTL 96396419.0 98407221.0 100506960.0 102691339.0
2 SP.POP.TOTL 5454938.0 5531451.0 5608499.0 5679409.0
3 SP.POP.TOTL 1608800.0 1659800.0 1711319.0 1762621.0
4 SP.POP.TOTL 13410.0 14378.0 15379.0 16407.0
.. ... ... ... ... ...
259 SP.POP.TOTL 947000.0 966000.0 994000.0 1022000.0
260 SP.POP.TOTL 5315351.0 5393034.0 5473671.0 5556767.0
261 SP.POP.TOTL 17099836.0 17524533.0 17965733.0 18423157.0
262 SP.POP.TOTL 3070780.0 3164330.0 3260645.0 3360099.0
263 SP.POP.TOTL 3776679.0 3905038.0 4039209.0 4178726.0
1964 1965 ... 2011 2012 2013
2014 \
0 9744772.0 9956318.0 ... 39.62 34.15 27.81
34.69
1 104953470.0 107289875.0 ... NaN NaN NaN
NaN
2 5734995.0 5770573.0 ... 17.78 22.38 23.85
33.48
3 1814135.0 1864791.0 ... 146.82 165.75 162.65
180.39
4 17466.0 18542.0 ... 0.91 3.58 5.26
4.18
.. ... ... ... ... ... ...
...
259 1050000.0 1078000.0 ... 106.73 113.40 123.34
122.23
260 5641598.0 5727745.0 ... 105.17 106.85 118.75
139.90
261 18896303.0 19384838.0 ... 8293.80 9105.71 9709.05
10349.90
262 3463211.0 3570466.0 ... 101.03 106.12 119.47
134.46
263 4322854.0 4471178.0 ... 147.07 202.94 223.58
273.00
2015 2016 2017 publication 2018 pop2000
pop2018
0 22.20 81.64 91.89 111.72 20779957.0
37171922.0
1 NaN NaN NaN NaN NaN
NaN
2 31.59 39.97 55.50 30.12 16395477.0
30809787.0
3 177.80 185.87 149.54 180.36 3089027.0
2866376.0
4 8.08 7.99 2.01 3.60 65390.0
77008.0
.. ... ... ... ... ...
...
259 136.84 187.91 261.45 278.43 1700000.0
1797085.0
260 107.34 119.49 126.37 137.44 17409071.0
28498683.0
261 10964.38 12045.47 12846.08 13008.74 44967713.0
57792520.0
262 157.23 181.66 209.45 213.07 10415942.0
17351714.0
263 268.98 313.80 340.45 359.33 11881482.0
14438812.0
[264 rows x 90 columns]
0 South Asia
1 NaN
2 Sub-Saharan Africa
3 Europe and Central Asia
4 Europe and Central Asia
...
259 Europe and Central Asia
260 Middle East and North Africa
261 Sub-Saharan Africa
262 Sub-Saharan Africa
263 Sub-Saharan Africa
Name: Region, Length: 264, dtype: object
Region 1960 1961
1962 \
0 East Asia and Pacific 1.027475e+09 1.028985e+09
1.042918e+09
1 Europe and Central Asia 6.665374e+08 6.742331e+08
6.821781e+08
2 Latin America and Caribbean 2.195888e+08 2.256677e+08
2.319338e+08
3 Middle East and North Africa 1.052032e+08 1.080618e+08
1.110454e+08
4 North America 1.985800e+08 2.019620e+08
2.051520e+08
5 South Asia 5.728395e+08 5.849397e+08
5.974944e+08
6 Sub-Saharan Africa 2.271689e+08 2.325024e+08
2.380571e+08
1963 1964 1965 1966
1967 \
0 1.068161e+09 1.093067e+09 1.119016e+09 1.148447e+09
1.176956e+09
1 6.901894e+08 6.981305e+08 7.058424e+08 7.125991e+08
7.191716e+08
2 2.383549e+08 2.448856e+08 2.514925e+08 2.581672e+08
2.649088e+08
3 1.141016e+08 1.172712e+08 1.205412e+08 1.239082e+08
1.274449e+08
4 2.082060e+08 2.112140e+08 2.139810e+08 2.166080e+08
2.191240e+08
5 6.104983e+08 6.239433e+08 6.378232e+08 6.521442e+08
6.669086e+08
6 2.438291e+08 2.498094e+08 2.559950e+08 2.623883e+08
2.690044e+08
1968 ... 2011 2012 2013 2014
2015 \
0 1.206577e+09 ... 580361.76 587211.64 624914.07 661841.80
680474.82
1 7.254875e+08 ... 694995.56 723161.35 740463.98 762865.15
769555.61
2 2.717176e+08 ... 77965.99 84149.96 87652.50 93200.28
94047.77
3 1.310269e+08 ... 70470.98 75282.80 79492.50 86429.97
90270.13
4 2.214500e+08 ... 481553.74 487604.31 489472.51 493966.96
490522.13
5 6.821022e+08 ... 83008.02 90409.70 97142.87 107871.28
113844.20
6 2.758673e+08 ... 17387.01 18320.24 19651.81 21536.41
22555.45
2016 2017 publication 2018 pop2000 pop2018
0 719251.88 765947.33 837290.10 2.017650e+09 2.295710e+09
1 786776.72 796395.21 793756.77 8.609753e+08 9.177457e+08
2 99146.15 104318.82 108130.54 5.204279e+08 6.399019e+08
3 100704.57 109457.45 119302.31 3.153268e+08 4.489742e+08
4 487309.63 492431.33 482775.50 3.128481e+08 3.639034e+08
5 126457.70 138108.56 154138.63 1.390946e+09 1.814455e+09
6 24873.32 27165.81 29475.12 6.651853e+08 1.077870e+09
[7 rows x 84 columns]
[28.40667315 26.92972141 3.66853604 4.04755977 16.37908514
5.22944877
1. ]
index A
0 publication 2000 1050699.61
1 2001 1086942.92
2 2002 1133793.27
3 2003 1187972.09
4 2004 1299180.97
5 2005 1459198.63
6 2006 1537021.05
7 2007 1627624.08
8 2008 1715118.28
9 2009 1816979.35
10 2010 1904577.12
11 2011 2005743.06
12 2012 2066140.0
13 2013 2138790.24
14 2014 2227711.85
15 2015 2261270.11
16 2016 2344519.97
17 2017 2433824.51
18 publication 2018 2524868.97
index object
A object
dtype: object
index object
A float64
dtype: object
0 304781.56
1 97047.60
2 77244.90
3 69002.63
4 53064.35
...
259 NaN
260 NaN
261 NaN
262 NaN
263 NaN
Name: publication 2000, Length: 264, dtype: float64
/tmp/ipykernel_2978/345872911.py:80: FutureWarning: Dropping of
nuisance columns in DataFrame reductions (with
'numeric_only=None') is deprecated; in a future version this
will raise TypeError. Select only valid columns before calling the
reduction.
print(Merged_file.aggregate(func=sum)["publication
2000":"publication
2018"].reset_index().rename(columns={0:"A"})) #omg ça
marche
/tmp/ipykernel_2978/345872911.py:81: FutureWarning: Dropping of
nuisance columns in DataFrame reductions (with
'numeric_only=None') is deprecated; in a future version this
will raise TypeError. Select only valid columns before calling the
reduction.
print(Merged_file.aggregate(func=sum)["publication
2000":"publication
2018"].reset_index().rename(columns={0:"A"}).dtypes) #omg
ça marche
/tmp/ipykernel_2978/345872911.py:82: FutureWarning: Dropping of
nuisance columns in DataFrame reductions (with
'numeric_only=None') is deprecated; in a future version this
will raise TypeError. Select only valid columns before calling the
reduction.
Table3=Merged_file.aggregate(func=sum)["publication
2000":"publication
2018"].reset_index().rename(columns={0:"A"})
0 528263.25
1 422807.71
2 135787.79
3 104396.12
4 98792.50
...
259 NaN
260 NaN
261 NaN
262 NaN
263 NaN
Name: publication 2018, Length: 264, dtype: float64
0 431619.12
1 338635.95
2 202513.14
3 27688.74
4 23565.15
5 19484.24
6 7193.27
Name: publication 2000, dtype: float64
0 837290.10
1 793756.77
2 482775.50
3 154138.63
4 119302.31
5 108130.54
6 29475.12
Name: publication 2018, dtype: float64