import os, sys, os.path, shutil
import matplotlib as mpl

import xlrd, openpyxl
import hashlib
import pweave

mode_serv = False
for param in sys.argv[1:]:
    if param[:len("serv")]=="serv" and param[len("serv")] == "=":
        mode_serv = param[len("serv")+1:] in "1 y Y yes Yes YES"
#if not mode_serv:
#    mpl.use('TkAgg')
#else:
#    mpl.use('Agg')
if mode_serv:
    mpl.use('Agg')
try:
    import appnope
    appnope.nope()    # stop the apple power nap
except ImportError:
    pass

import pandas
from math import pi
import matplotlib.pyplot as plt
import matplotlib.colors as colors
import matplotlib.patches as mpatches
from matplotlib.collections import PatchCollection
import numpy as np
from numpy.random import rand
from pylab import pcolor, show, colorbar, xticks, yticks, pcolormesh, imshow
from random import gauss
from scipy.interpolate import interp2d
import threading, multiprocessing, time
from decimal import Decimal

from scipy.optimize import curve_fit
mpl.rc('text', usetex=True)
#plt.rc('text', usetex=True)
mpl.rc('text.latex',preamble=r"\usepackage{amsmath} \usepackage{graphicx} \usepackage{nicefrac} \usepackage{xcolor}")

import scipy as sp
import subprocess
import datetime
from matplotlib.ticker import PercentFormatter




Excel_wb=pandas.ExcelFile("data_publication.xlsx")
Data=pandas.read_excel(Excel_wb,"Data")
country=pandas.read_excel(Excel_wb,"Metadata - Countries")
population=pandas.read_excel(Excel_wb,"population",header=3)
population=population[2:]
print(Data)
print(len(Data))
print(population[2:])
print(pandas.merge(population,Data,how="left",on="Country")) # if a value doesnt exit, it places a Nan
Merged_file=pandas.merge(population,Data,how="left",on="Country")
Merged_file["Code"].astype("category")
Merged_file["Region"].astype("string")
# Merged_file["Region"].astype("category")
print(Merged_file["Region"])
Merged_file["Level"].astype("category")
ax=Merged_file.plot(x="Level", y="publication 2018",marker=".",linestyle="")
# plt.xscale("log")
plt.yscale("log")


Table2=Merged_file.groupby("Region").aggregate(func=sum).reset_index()
print(Table2)
print(np.divide(Table2["publication 2018"].values,min(Table2["publication 2018"].values)))

si=np.divide(Table2["publication 2018"].values,min(Table2["publication 2018"].values))

Table2["Region"].astype("category")
Table2.plot.scatter(x="Region", y="2018", s=si*10)

print(Merged_file.aggregate(func=sum)["publication 2000":"publication 2018"].reset_index().rename(columns={0:"A"})) #omg ça marche
print(Merged_file.aggregate(func=sum)["publication 2000":"publication 2018"].reset_index().rename(columns={0:"A"}).dtypes) #omg ça marche
Table3=Merged_file.aggregate(func=sum)["publication 2000":"publication 2018"].reset_index().rename(columns={0:"A"})
Table4=Table3.astype({"A":"float"})
print(Table4.dtypes)
plt.figure(3)
Table4.boxplot(column="A")




#sort DataFrame by publication 2000 descending
df = Merged_file.sort_values(by='publication 2000', ascending=False).reset_index()

#add column to display cumulative percentage
df['cumperc2000'] = df['publication 2000'].cumsum()/df['publication 2000'].sum()*100
print(df["publication 2000"])
#define aesthetics for plot
color1 = 'steelblue'
color2 = 'red'
line_size = 4

#create basic bar plot
fig, ax = plt.subplots()
ax.bar(df.index, df['publication 2000'], color=color1)

#add cumulative percentage line to plot
ax2 = ax.twinx()
ax2.plot(df.index, df['cumperc2000'], color=color2, marker="D", ms=line_size)
ax2.yaxis.set_major_formatter(PercentFormatter())

#specify axis colors
ax.tick_params(axis='y', colors=color1)
ax2.tick_params(axis='y', colors=color2)


df = Merged_file.sort_values(by='publication 2018', ascending=False).reset_index()

#add column to display cumulative percentage
df['cumperc2018'] = df['publication 2018'].cumsum()/df['publication 2018'].sum()*100
print(df["publication 2018"])
#define aesthetics for plot
color1 = 'steelblue'
color2 = 'red'
line_size = 4

#create basic bar plot
fig, ax = plt.subplots()
ax.bar(df.index, df['publication 2018'], color=color1)

#add cumulative percentage line to plot
ax2 = ax.twinx()
ax2.plot(df.index, df['cumperc2018'], color=color2, marker="D", ms=line_size)
ax2.yaxis.set_major_formatter(PercentFormatter())

#specify axis colors
ax.tick_params(axis='y', colors=color1)
ax2.tick_params(axis='y', colors=color2)






#sort DataFrame by publication 2000 descending
df = Table2.sort_values(by='publication 2000', ascending=False).reset_index()

#add column to display cumulative percentage
df['cumperc2000'] = df['publication 2000'].cumsum()/df['publication 2000'].sum()*100
print(df["publication 2000"])
#define aesthetics for plot
color1 = 'steelblue'
color2 = 'red'
line_size = 4

#create basic bar plot
fig, ax = plt.subplots()
ax.bar(df.index, df['publication 2000'], color=color1)

#add cumulative percentage line to plot
ax2 = ax.twinx()
ax2.plot(df.index, df['cumperc2000'], color=color2, marker="D", ms=line_size)
ax2.yaxis.set_major_formatter(PercentFormatter())

#specify axis colors
ax.tick_params(axis='y', colors=color1)
ax2.tick_params(axis='y', colors=color2)


df = Table2.sort_values(by='publication 2018', ascending=False).reset_index()

#add column to display cumulative percentage
df['cumperc2018'] = df['publication 2018'].cumsum()/df['publication 2018'].sum()*100
print(df["publication 2018"])
#define aesthetics for plot
color1 = 'steelblue'
color2 = 'red'
line_size = 4

#create basic bar plot
fig, ax = plt.subplots()
ax.bar(df.index, df['publication 2018'], color=color1)

#add cumulative percentage line to plot
ax2 = ax.twinx()
ax2.plot(df.index, df['cumperc2018'], color=color2, marker="D", ms=line_size)
ax2.yaxis.set_major_formatter(PercentFormatter())

#specify axis colors
ax.tick_params(axis='y', colors=color1)
ax2.tick_params(axis='y', colors=color2)


plt.show()
                  Country Code                        Region  \
0             Afghanistan  AFG                    South Asia
1                  Angola  AGO            Sub-Saharan Africa
2                 Albania  ALB       Europe and Central Asia
3                 Andorra  AND       Europe and Central Asia
4    United Arab Emirates  ARE  Middle East and North Africa
..                    ...  ...                           ...
191                Kosovo  XKX       Europe and Central Asia
192           Yemen, Rep.  YEM  Middle East and North Africa
193          South Africa  ZAF            Sub-Saharan Africa
194                Zambia  ZMB            Sub-Saharan Africa
195              Zimbabwe  ZWE            Sub-Saharan Africa

                  Income  Level  publication 2000     2001     2002
2003  \
0             Low income      1              4.00     1.00     4.50
8.34
1    Lower middle income      2              7.17    10.58    15.13
5.89
2    Upper middle income      3             22.34    18.71    24.53
23.82
3            High income      4              0.00     2.64     0.50
4.70
4            High income      4            330.44   380.58   399.54
539.87
..                   ...    ...               ...      ...      ...
...
191  Upper middle income      3              0.00     0.00     0.00
0.00
192           Low income      1             22.83    42.49    27.13
33.84
193  Upper middle income      3           3897.55  4099.38  4413.16
4267.39
194  Lower middle income      2             33.60    51.96    61.54
47.78
195  Lower middle income      2            231.01   213.85   165.61
180.57

        2004  ...     2011     2012     2013      2014      2015
2016  \
0       5.75  ...    39.62    34.15    27.81     34.69     22.20
81.64
1       9.23  ...    17.78    22.38    23.85     33.48     31.59
39.97
2      17.91  ...   146.82   165.75   162.65    180.39    177.80
185.87
3       1.31  ...     0.91     3.58     5.26      4.18      8.08
7.99
4     555.02  ...  1421.83  1625.96  1765.77   1816.50   2294.58
2484.16
..       ...  ...      ...      ...      ...       ...       ...
...
191     0.00  ...   106.73   113.40   123.34    122.23    136.84
187.91
192    41.15  ...   105.17   106.85   118.75    139.90    107.34
119.49
193  4831.02  ...  8293.80  9105.71  9709.05  10349.90  10964.38
12045.47
194    42.84  ...   101.03   106.12   119.47    134.46    157.23
181.66
195   138.17  ...   147.07   202.94   223.58    273.00    268.98
313.80

         2017  publication 2018   pop2000   pop2018
0       91.89            111.72  20779957  37171922
1       55.50             30.12  16395477  30809787
2      149.54            180.36   3089027   2866376
3        2.01              3.60     65390     77008
4     2899.67           3144.89   3134067   9630966
..        ...               ...       ...       ...
191    261.45            278.43   1700000   1797085
192    126.37            137.44  17409071  28498683
193  12846.08          13008.74  44967713  57792520
194    209.45            213.07  10415942  17351714
195    340.45            359.33  11881482  14438812

[196 rows x 26 columns]
196
                  Country Country Code     Indicator Name Indicator
Code  \
4                  Angola          AGO  Population, total
SP.POP.TOTL
5                 Albania          ALB  Population, total
SP.POP.TOTL
6                 Andorra          AND  Population, total
SP.POP.TOTL
7              Arab World          ARB  Population, total
SP.POP.TOTL
8    United Arab Emirates          ARE  Population, total
SP.POP.TOTL
..                    ...          ...                ...
...
261                Kosovo          XKX  Population, total
SP.POP.TOTL
262           Yemen, Rep.          YEM  Population, total
SP.POP.TOTL
263          South Africa          ZAF  Population, total
SP.POP.TOTL
264                Zambia          ZMB  Population, total
SP.POP.TOTL
265              Zimbabwe          ZWE  Population, total
SP.POP.TOTL

           1960        1961        1962         1963         1964  \
4     5454938.0   5531451.0   5608499.0    5679409.0    5734995.0
5     1608800.0   1659800.0   1711319.0    1762621.0    1814135.0
6       13410.0     14378.0     15379.0      16407.0      17466.0
7    92197715.0  94724540.0  97334438.0  100034191.0  102832792.0
8       92417.0    100801.0    112112.0     125130.0     138049.0
..          ...         ...         ...          ...          ...
261    947000.0    966000.0    994000.0    1022000.0    1050000.0
262   5315351.0   5393034.0   5473671.0    5556767.0    5641598.0
263  17099836.0  17524533.0  17965733.0   18423157.0   18896303.0
264   3070780.0   3164330.0   3260645.0    3360099.0    3463211.0
265   3776679.0   3905038.0   4039209.0    4178726.0    4322854.0

            1965  ...         2011         2012         2013
2014  \
4      5770573.0  ...   24220660.0   25107925.0   26015786.0
26941773.0
5      1864791.0  ...    2905195.0    2900401.0    2895092.0
2889104.0
6        18542.0  ...      83748.0      82427.0      80770.0
79213.0
7    105736428.0  ...  363156846.0  371437642.0  379696477.0
387899835.0
8       149855.0  ...    8946778.0    9141598.0    9197908.0
9214182.0
..           ...  ...          ...          ...          ...
...
261    1078000.0  ...    1791000.0    1807106.0    1818117.0
1812771.0
262    5727745.0  ...   23807586.0   24473176.0   25147112.0
25823488.0
263   19384838.0  ...   52003759.0   52832659.0   53687125.0
54544184.0
264    3570466.0  ...   14023199.0   14465148.0   14926551.0
15399793.0
265    4471178.0  ...   12894323.0   13115149.0   13350378.0
13586710.0

            2015         2016         2017         2018         2019
\
4     27884380.0   28842482.0   29816769.0   30809787.0   31825299.0
5      2880703.0    2876101.0    2873457.0    2866376.0    2854191.0
6        77993.0      77295.0      76997.0      77008.0      77146.0
7    396028301.0  404042892.0  411942825.0  419851989.0  427870273.0
8      9262896.0    9360975.0    9487206.0    9630966.0    9770526.0
..           ...          ...          ...          ...          ...
261    1788196.0    1777557.0    1791003.0    1797085.0    1788878.0
262   26497881.0   27168210.0   27834811.0   28498683.0   29161922.0
263   55386369.0   56207649.0   57009751.0   57792520.0   58558267.0
264   15879370.0   16363449.0   16853608.0   17351714.0   17861034.0
265   13814642.0   14030338.0   14236599.0   14438812.0   14645473.0

            2020
4     32866268.0
5      2837743.0
6        77265.0
7    436080728.0
8      9890400.0
..           ...
261    1775378.0
262   29825968.0
263   59308690.0
264   18383956.0
265   14862927.0

[262 rows x 65 columns]
                        Country Country Code     Indicator Name  \
0                   Afghanistan          AFG  Population, total
1    Africa Western and Central          AFW  Population, total
2                        Angola          AGO  Population, total
3                       Albania          ALB  Population, total
4                       Andorra          AND  Population, total
..                          ...          ...                ...
259                      Kosovo          XKX  Population, total
260                 Yemen, Rep.          YEM  Population, total
261                South Africa          ZAF  Population, total
262                      Zambia          ZMB  Population, total
263                    Zimbabwe          ZWE  Population, total

    Indicator Code        1960        1961         1962         1963
\
0      SP.POP.TOTL   8996967.0   9169406.0    9351442.0    9543200.0
1      SP.POP.TOTL  96396419.0  98407221.0  100506960.0  102691339.0
2      SP.POP.TOTL   5454938.0   5531451.0    5608499.0    5679409.0
3      SP.POP.TOTL   1608800.0   1659800.0    1711319.0    1762621.0
4      SP.POP.TOTL     13410.0     14378.0      15379.0      16407.0
..             ...         ...         ...          ...          ...
259    SP.POP.TOTL    947000.0    966000.0     994000.0    1022000.0
260    SP.POP.TOTL   5315351.0   5393034.0    5473671.0    5556767.0
261    SP.POP.TOTL  17099836.0  17524533.0   17965733.0   18423157.0
262    SP.POP.TOTL   3070780.0   3164330.0    3260645.0    3360099.0
263    SP.POP.TOTL   3776679.0   3905038.0    4039209.0    4178726.0

            1964         1965  ...     2011     2012     2013
2014  \
0      9744772.0    9956318.0  ...    39.62    34.15    27.81
34.69
1    104953470.0  107289875.0  ...      NaN      NaN      NaN
NaN
2      5734995.0    5770573.0  ...    17.78    22.38    23.85
33.48
3      1814135.0    1864791.0  ...   146.82   165.75   162.65
180.39
4        17466.0      18542.0  ...     0.91     3.58     5.26
4.18
..           ...          ...  ...      ...      ...      ...
...
259    1050000.0    1078000.0  ...   106.73   113.40   123.34
122.23
260    5641598.0    5727745.0  ...   105.17   106.85   118.75
139.90
261   18896303.0   19384838.0  ...  8293.80  9105.71  9709.05
10349.90
262    3463211.0    3570466.0  ...   101.03   106.12   119.47
134.46
263    4322854.0    4471178.0  ...   147.07   202.94   223.58
273.00

         2015      2016      2017  publication 2018     pop2000
pop2018
0       22.20     81.64     91.89            111.72  20779957.0
37171922.0
1         NaN       NaN       NaN               NaN         NaN
NaN
2       31.59     39.97     55.50             30.12  16395477.0
30809787.0
3      177.80    185.87    149.54            180.36   3089027.0
2866376.0
4        8.08      7.99      2.01              3.60     65390.0
77008.0
..        ...       ...       ...               ...         ...
...
259    136.84    187.91    261.45            278.43   1700000.0
1797085.0
260    107.34    119.49    126.37            137.44  17409071.0
28498683.0
261  10964.38  12045.47  12846.08          13008.74  44967713.0
57792520.0
262    157.23    181.66    209.45            213.07  10415942.0
17351714.0
263    268.98    313.80    340.45            359.33  11881482.0
14438812.0

[264 rows x 90 columns]
0                        South Asia
1                               NaN
2                Sub-Saharan Africa
3           Europe and Central Asia
4           Europe and Central Asia
                   ...
259         Europe and Central Asia
260    Middle East and North Africa
261              Sub-Saharan Africa
262              Sub-Saharan Africa
263              Sub-Saharan Africa
Name: Region, Length: 264, dtype: object
                         Region          1960          1961
1962  \
0         East Asia and Pacific  1.027475e+09  1.028985e+09
1.042918e+09
1       Europe and Central Asia  6.665374e+08  6.742331e+08
6.821781e+08
2   Latin America and Caribbean  2.195888e+08  2.256677e+08
2.319338e+08
3  Middle East and North Africa  1.052032e+08  1.080618e+08
1.110454e+08
4                 North America  1.985800e+08  2.019620e+08
2.051520e+08
5                    South Asia  5.728395e+08  5.849397e+08
5.974944e+08
6            Sub-Saharan Africa  2.271689e+08  2.325024e+08
2.380571e+08

           1963          1964          1965          1966
1967  \
0  1.068161e+09  1.093067e+09  1.119016e+09  1.148447e+09
1.176956e+09
1  6.901894e+08  6.981305e+08  7.058424e+08  7.125991e+08
7.191716e+08
2  2.383549e+08  2.448856e+08  2.514925e+08  2.581672e+08
2.649088e+08
3  1.141016e+08  1.172712e+08  1.205412e+08  1.239082e+08
1.274449e+08
4  2.082060e+08  2.112140e+08  2.139810e+08  2.166080e+08
2.191240e+08
5  6.104983e+08  6.239433e+08  6.378232e+08  6.521442e+08
6.669086e+08
6  2.438291e+08  2.498094e+08  2.559950e+08  2.623883e+08
2.690044e+08

           1968  ...       2011       2012       2013       2014
2015  \
0  1.206577e+09  ...  580361.76  587211.64  624914.07  661841.80
680474.82
1  7.254875e+08  ...  694995.56  723161.35  740463.98  762865.15
769555.61
2  2.717176e+08  ...   77965.99   84149.96   87652.50   93200.28
94047.77
3  1.310269e+08  ...   70470.98   75282.80   79492.50   86429.97
90270.13
4  2.214500e+08  ...  481553.74  487604.31  489472.51  493966.96
490522.13
5  6.821022e+08  ...   83008.02   90409.70   97142.87  107871.28
113844.20
6  2.758673e+08  ...   17387.01   18320.24   19651.81   21536.41
22555.45

        2016       2017  publication 2018       pop2000       pop2018
0  719251.88  765947.33         837290.10  2.017650e+09  2.295710e+09
1  786776.72  796395.21         793756.77  8.609753e+08  9.177457e+08
2   99146.15  104318.82         108130.54  5.204279e+08  6.399019e+08
3  100704.57  109457.45         119302.31  3.153268e+08  4.489742e+08
4  487309.63  492431.33         482775.50  3.128481e+08  3.639034e+08
5  126457.70  138108.56         154138.63  1.390946e+09  1.814455e+09
6   24873.32   27165.81          29475.12  6.651853e+08  1.077870e+09

[7 rows x 84 columns]
[28.40667315 26.92972141  3.66853604  4.04755977 16.37908514
5.22944877
  1.        ]
               index           A
0   publication 2000  1050699.61
1               2001  1086942.92
2               2002  1133793.27
3               2003  1187972.09
4               2004  1299180.97
5               2005  1459198.63
6               2006  1537021.05
7               2007  1627624.08
8               2008  1715118.28
9               2009  1816979.35
10              2010  1904577.12
11              2011  2005743.06
12              2012   2066140.0
13              2013  2138790.24
14              2014  2227711.85
15              2015  2261270.11
16              2016  2344519.97
17              2017  2433824.51
18  publication 2018  2524868.97
index    object
A        object
dtype: object
index     object
A        float64
dtype: object
0      304781.56
1       97047.60
2       77244.90
3       69002.63
4       53064.35
         ...
259          NaN
260          NaN
261          NaN
262          NaN
263          NaN
Name: publication 2000, Length: 264, dtype: float64
/tmp/ipykernel_2978/345872911.py:80: FutureWarning: Dropping of
nuisance columns in DataFrame reductions (with
'numeric_only=None') is deprecated; in a future version this
will raise TypeError.  Select only valid columns before calling the
reduction.
  print(Merged_file.aggregate(func=sum)["publication
2000":"publication
2018"].reset_index().rename(columns={0:"A"})) #omg ça
marche
/tmp/ipykernel_2978/345872911.py:81: FutureWarning: Dropping of
nuisance columns in DataFrame reductions (with
'numeric_only=None') is deprecated; in a future version this
will raise TypeError.  Select only valid columns before calling the
reduction.
  print(Merged_file.aggregate(func=sum)["publication
2000":"publication
2018"].reset_index().rename(columns={0:"A"}).dtypes) #omg
ça marche
/tmp/ipykernel_2978/345872911.py:82: FutureWarning: Dropping of
nuisance columns in DataFrame reductions (with
'numeric_only=None') is deprecated; in a future version this
will raise TypeError.  Select only valid columns before calling the
reduction.
  Table3=Merged_file.aggregate(func=sum)["publication
2000":"publication
2018"].reset_index().rename(columns={0:"A"})
0      528263.25
1      422807.71
2      135787.79
3      104396.12
4       98792.50
         ...
259          NaN
260          NaN
261          NaN
262          NaN
263          NaN
Name: publication 2018, Length: 264, dtype: float64
0    431619.12
1    338635.95
2    202513.14
3     27688.74
4     23565.15
5     19484.24
6      7193.27
Name: publication 2000, dtype: float64
0    837290.10
1    793756.77
2    482775.50
3    154138.63
4    119302.31
5    108130.54
6     29475.12
Name: publication 2018, dtype: float64