%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Script to read in and preprocess raw FLUXNET2015 data for the T&C model
% NOTE: The script is a refactored version of 'Read_Fluxnet_2015_IT_Ro2.m'
%       from Simone Fatichi
% Author: Akash Koppa
% Date: 05.03.2024
% Note: modified for teaching of ENV-411, EPFL by Taiqi Lian, 14/11/2024
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% User defined configuration (to be improved/removed later)
% currently the variables to be extracted are hard coded here
% later on, change the text config.txt file into .m file
var_reqd={'TA_F';'SW_IN_F';'PPFD_IN';'LW_IN_F';'VPD_F';'PA_F';'P_F';...%%required data
          'WS_F';'RH';'CO2_F_MDS';'TS_F_MDS_1';'TS_F_MDS_2';'TS_F_MDS_3';...
          'TS_F_MDS_4';'TS_F_MDS_5';'TS_F_MDS_6';'SWC_F_MDS_1';'SWC_F_MDS_2';...
          'SWC_F_MDS_3';'SWC_F_MDS_4';'SWC_F_MDS_5';'SWC_F_MDS_6';...
          'SWC_F_MDS_7';'NETRAD';'LE_F_MDS';'LE_F_MDS_QC';'LE_CORR';'H_F_MDS';...
          'H_F_MDS_QC';'H_CORR';'G_F_MDS'; 'NEE_CUT_REF';'NEE_VUT_REF';...
          'NEE_CUT_50';'NEE_VUT_50';'RECO_NT_VUT_REF';'RECO_NT_VUT_50 ';...
          'RECO_DT_VUT_REF';'RECO_DT_VUT_50';'GPP_NT_VUT_REF';'GPP_NT_VUT_50';...
          'GPP_DT_VUT_REF';'GPP_DT_VUT_50'};

%% Read in the configuration file

% configuration information
config = configureDictionary('string', 'string');
% store the configuration information in the dictionary
config("station") = "CH-Cha";
config("inputfile") = "C:\Users\luo\Desktop\excercise1d\FLX_CH-Cha_FLUXNET2015_FULLSET_2005-2023_1-3\FLX_CH-Cha_FLUXNET2015_FULLSET_HH_2005-2023_1-3.csv";
config("outputdir") = "C:\Users\luo\Desktop\excercise1d\output";

% config("station") = "CH-Cha";
% config("inputfile") = "C:\Users\tlian\Desktop\Ecohydrology teaching\Exercise 1\Swiss_Case_Studies\Swiss_Case_Studies\Pre_process\FLX_CH-Cha_FLUXNET2015_SUBSET_HH_2005-2014_2-4.csv";
% config("outputdir") = "C:\Users\tlian\Desktop\Ecohydrology teaching\Exercise 1\Swiss_Case_Studies\Swiss_Case_Studies";

%% Read in the FLUXNET sitelist with all the metadata
% extract longitude, latitude, deltagmt, and zbas
% Lon = ; 
% Lat = ;
% DeltaGMT = ;
% Zbas = ;

% Chamau
Lon = 8.4104; 
Lat = 47.2102;
DeltaGMT = 1;
Zbas = 	393;

%% Read in the raw FLUXNET csv file
data_temp = importdata(config("inputfile"),',',1);
var_list = data_temp.textdata; data=data_temp.data;  clear data_temp
data(data==-9999)=NaN;

%% Find the index of the required variables in the raw data 
% (should be improved)
var_index = NaN(length(var_reqd),1);
for k=1:length(var_reqd)
    temp=strcmp(var_list,var_reqd{k});
    pos = find(temp==1);
    if isempty(pos)
        var_index(k)=NaN;
    else
        var_index(k)=pos;
    end
end
clear temp k pos

%% Extract and process the dates
date_start=datenum(num2str(data(1,2)),'yyyymmddHHMM');
date_end=datenum(num2str(data(end,2)),'yyyymmddHHMM');
% Hourly Date 
Date = date_start:1/24:date_end; 
Date=Date';

%% Aggregate half-hourly data to hourly
dt = 0.5;
n=length(data(:,1)); 
fr=1/dt;  
m=floor(n/fr);
data_hourly=NaN*ones(m,length(var_reqd));
j=0; 
%
for j=1:length(var_reqd)
    if not(isnan(var_index(j)))
        var_temp=data(:,var_index(j));
        var_temp=reshape(var_temp(1:m*fr),fr,m);
        if(j==7) %% Precipitation 
            var_temp_hourly= sum(var_temp, "omitnan");
        else
            var_temp_hourly= mean(var_temp, "omitnan");
        end
        data_hourly(:,j)=var_temp_hourly;
        clear var_temp var_temp_hourly
    end
end

clear data

%% Remove poorly interpolated data
threshold_radiation=3; %% Delta W/m2 / h  
threshold_co2=0.2; %% Delta uCO2/m2 s / h 
%%% Cleaning of repetead Variables badly interpolated 
for i=[24 25 28 31]
        v= data_hourly(:,i);
        v3=v;
        for j=336:168:length(v)
            if sum(abs(v(j-335:j-168)-v(j-167:j)))< threshold_radiation*168
                v3(j-167:j)=NaN;
            end
        end
        v=v3; v2=v;
        for j=144:72:length(v)
            if sum(abs(v(j-143:j-72)-v(j-71:j)))< threshold_radiation*72
                v2(j-71:j)=NaN;
            end
        end
        v=v2; v1=v;
        for j=48:24:length(v)
            if sum(abs(v(j-47:j-24)-v(j-23:j)))< threshold_radiation*24
                v2(j-23:j)=NaN;
            end
        end
        data_hourly(:,i)=v1;
        clear v v1 v2 v3
end
for i=32:43
        v=data_hourly(:,i);
        v3=v;
        for j=336:168:length(v)
            if sum(abs(v(j-335:j-168)-v(j-167:j)))< threshold_co2*168
                v3(j-167:j)=NaN;
            end
        end
        v=v3; v2=v;
        for j=144:72:length(v)
            if sum(abs(v(j-143:j-72)-v(j-71:j)))< threshold_co2*72
                v2(j-71:j)=NaN;
            end
        end
        v=v2; v1=v;
        for j=48:24:length(v)
            if sum(abs(v(j-47:j-24)-v(j-23:j)))< threshold_co2*24
                v2(j-23:j)=NaN;
            end
        end
        data_hourly(:,i)=v1;
        clear v v1 v2 v3 
end

%% Store the required data in separate variables (to be improved)
Ta=data_hourly(:,1);    Ta(Ta<-60)=NaN; Ta(Ta>70)=NaN;                            % TA_F - degC
Rsw=data_hourly(:,2);  Rsw(Rsw<-10)=NaN; Rsw(Rsw>2000)=NaN; Rsw(Rsw<0)=0;       % SW_IN_F - W/m2 - Incoming global solar radiation (W m-2) 
PPFD=data_hourly(:,3); PPFD(PPFD<-10)=NaN; PPFD(PPFD>3000)=NaN;   PPFD(PPFD<0)=0;    % PPFD_IN - W/m2 
Latm=data_hourly(:,4); Latm(Latm<-600)=NaN; Latm(Latm>600)=NaN; 
VPD=data_hourly(:,5);  VPD(VPD<0)=NaN; VPD(VPD>70)=NaN;                         % VPD_F - hPa
Pre=data_hourly(:,6)*10; Pre(Pre<0)=NaN;                                        % PA_F - kPa -> mbar
Pr=data_hourly(:,7);    Pr(Pr<0)=NaN; Pr(Pr>220)=NaN;                            % P_F - mm (35)
Ws=data_hourly(:,8);  Ws(Ws<0)=NaN; Ws(Ws>100)=NaN;                             % WS_F - m/s
U=data_hourly(:,9);   U(U<0)=NaN; U(U>100)=NaN;                                 % RH - %
CO2=data_hourly(:,10); CO2(CO2<0)=NaN; CO2(CO2>900)=NaN;                         % CO2 concentration (umol mol-1) 
Tsoil1=data_hourly(:,11) ; Tsoil1(Tsoil1<-50)=NaN; Tsoil1(Tsoil1>90)=NaN;         % TS_F_MDS_1 - degC - Soil temperature 1 (degrees C) 
Tsoil2=data_hourly(:,12) ; Tsoil2(Tsoil2<-50)=NaN; Tsoil2(Tsoil2>90)=NaN;         % TS_F_MDS_2 - degC - Soil temperature  2 (degrees C) 
Tsoil3=data_hourly(:,13) ; Tsoil3(Tsoil3<-50)=NaN; Tsoil3(Tsoil3>90)=NaN;         % TS_F_MDS_3 - degC - Soil temperature 3 (degrees C) 
Tsoil4=data_hourly(:,14) ; Tsoil4(Tsoil4<-50)=NaN; Tsoil4(Tsoil4>90)=NaN;         % TS_F_MDS_4 - degC - Soil temperature 4 (degrees C)
Tsoil5=data_hourly(:,15) ; Tsoil5(Tsoil5<-50)=NaN; Tsoil5(Tsoil5>90)=NaN;         % TS_F_MDS_5 - degC - Soil temperature 4 (degrees C)
Tsoil6=data_hourly(:,16) ; Tsoil6(Tsoil6<-50)=NaN; Tsoil6(Tsoil6>90)=NaN;         % TS_F_MDS_6 - degC - Soil temperature 4 (degrees C)
SWC1=data_hourly(:,17);  SWC1(SWC1<0)=NaN; SWC1(SWC1>100)=NaN;                   % SWC_F_MDS_1 - % - Soil moisture content at depth 1
SWC2=data_hourly(:,18);  SWC2(SWC2<0)=NaN; SWC2(SWC2>100)=NaN;                   % SWC_F_MDS_2 - % - Soil moisture content at depth 2
SWC3=data_hourly(:,19);  SWC3(SWC3<0)=NaN; SWC3(SWC3>100)=NaN;                   % SWC_F_MDS_3 - % - Soil moisture content at depth 3
SWC4=data_hourly(:,20);  SWC4(SWC4<0)=NaN; SWC4(SWC4>100)=NaN;                   % SWC_F_MDS_4 - % - Soil moisture content at depth 4
SWC5=data_hourly(:,21);  SWC5(SWC5<0)=NaN; SWC5(SWC5>100)=NaN;                   % SWC_F_MDS_5 - % - Soil moisture content at depth 5
SWC6=data_hourly(:,22);  SWC6(SWC6<0)=NaN; SWC6(SWC6>100)=NaN;                   % SWC_F_MDS_6 - % - Soil moisture content at depth 6
SWC7=data_hourly(:,23);  SWC7(SWC7<0)=NaN; SWC7(SWC7>100)=NaN;                   % SWC_F_MDS_7 - % - Soil moisture content at depth 7
Rn=data_hourly(:,24); Rn(Rn<-1550)=NaN; Rn(Rn>1000)=NaN;                          % NETRAD - W/m2
LE=data_hourly(:,25);  LE(LE<-250)=NaN; LE(LE>1500)=NaN;                         % LE_F_MDS - W/m2
LE_QC=data_hourly(:,26); %0 = measured; 1 = good quality gapfill; 2 = medium; 3 = poor
LE_CORR=data_hourly(:,27); %%% LE corrected for energy balance closure
H=data_hourly(:,28);   H(H<-1550)=NaN; H(H>1500)=NaN;                             % H_F_MDS - W/m2
H_QC=data_hourly(:,29);%0 = measured; 1 = good quality gapfill; 2 = medium; 3 = poor
H_CORR=data_hourly(:,30); %%% H corrected for energy balance closure
G=data_hourly(:,31);  G(G<-250)=NaN; G(G>1500)=NaN;                              % G_F_MDS - W/m2 - Soil heat flux (W m-2) 
NEE1=data_hourly(:,32); NEE1(NEE1<-500)=NaN; NEE1(NEE1>500)=NaN;                      % NEE_CUT_REF - umol/m2/s - NEE of CO2 (with storage) (umol m-2 s-1)
NEE2=data_hourly(:,33); NEE2(NEE2<-500)=NaN; NEE2(NEE2>500)=NaN;                      % NEE_VUT_REF - umol/m2/s - NEE of CO2 (with storage) (umol m-2 s-1)
NEE3=data_hourly(:,34); NEE3(NEE3<-500)=NaN; NEE3(NEE3>500)=NaN;                      
NEE4=data_hourly(:,35); NEE4(NEE4<-500)=NaN; NEE4(NEE4>500)=NaN;                   
Reco1=data_hourly(:,36); Reco1(Reco1<-10)=NaN; Reco1(Reco1>1500)=NaN;  
Reco2=data_hourly(:,37);  Reco2(Reco2<-10)=NaN; Reco2(Reco2>1500)=NaN;  
Reco3=data_hourly(:,38);  Reco3(Reco3<-10)=NaN; Reco3(Reco3>1500)=NaN;  
Reco4=data_hourly(:,39);  Reco4(Reco4<-10)=NaN; Reco4(Reco4>1500)=NaN;  
GPP1=data_hourly(:,40);  GPP1(GPP1<-10)=NaN;                                         % GPP_NT_VUT_REF (nighttime) - umo/m2/s - Gross primary production (umol/m2/s)
GPP2=data_hourly(:,41);  GPP2(GPP2<-10)=NaN;                                      % GPP_DT_VUT_REF (daytime) - umo/m2/s - Gross primary production (umol/m2/s)
GPP3=data_hourly(:,42);  GPP3(GPP3<-10)=NaN;   
GPP4=data_hourly(:,43);  GPP4(GPP4<-10)=NaN;   
U=U/100;
LE(LE_QC==3)=NaN; 
H(H_QC==3)=NaN; 

%% Fill gaps through interpolation
Pre(isnan(Pre))=nanmean(Pre);
if nansum(Pre)==0
    p=1013.25*exp(-Zbas/8434.5); %%%
    Pre2= p*ones(1,length(Date)); clear p
    Pre(isnan(Pre))=Pre2(isnan(Pre));
end

dT =[0; diff(Ta)];
Ta(abs(dT)>10.0)=NaN;
dT =[0; diff(Ta)];
Ta(isnan(dT))=NaN;
dT =[0; diff(Ta)];
Ta(isnan(dT))=NaN;
dWs =[0; diff(Ws)];
Ws(abs(dWs)>9)=NaN;
dWs =[0; diff(Ws)];
Ws(isnan(dWs))=NaN;
dWs =[0; diff(Ws)];
Ws(isnan(dWs))=NaN;
dCO2 =[0; diff(CO2)];
CO2(abs(dCO2)>40)=NaN;
dCO2 =[0; diff(CO2)];
CO2(isnan(dCO2))=NaN;
dCO2 =[0; diff(CO2)];
CO2(isnan(dCO2))=NaN;

%% Smoothen the gapfilled variables
[Ta]=interpolate_smooth(Ta,Date);
Ta(isnan(Ta))=interp1(Date(not(isnan(Ta))),Ta(not(isnan(Ta))),Date(isnan(Ta)));
Ta(isnan(Ta))=nanmean(Ta);

[Ws]=interpolate_smooth(Ws,Date);
Ws(isnan(Ws))=interp1(Date(not(isnan(Ws))),Ws(not(isnan(Ws))),Date(isnan(Ws)));
Ws(isnan(Ws))=nanmean(Ws);

[U]=interpolate_smooth(U,Date);
U(U<0.04)=NaN;
U(isnan(U))=interp1(Date(not(isnan(U))),U(not(isnan(U))),Date(isnan(U)));
U(isnan(U))=nanmean(U);

[Pr]=interpolate_smooth_pr(Pr,Date);
[CO2]=interpolate_smooth(CO2,Date);
[Latm]=interpolate_smooth(Latm,Date);

Latm(isnan(Latm))=interp1(Date(not(isnan(Latm))),Latm(not(isnan(Latm))),Date(isnan(Latm)));
Latm(isnan(Latm))=nanmean(Latm);

%% Calculate vapor pressure and dew point temperature
esat=611*exp(17.27*Ta./(237.3+Ta)); % [Pa] Vapor pressure saturation
ea=esat.*U; % Vapor Pressure
a=17.27; b=237.7;
xr= a*Ta./(b+Ta) + log(U);
Tdew =b*xr./(a-xr); % Dew point temperature
clear a b xr

%% Save the combined input and validation dataset
save(join([config("outputdir"),'/Res_',config("station"),'.mat'], ""),...
    'Date','DeltaGMT','Lon','Lat','Zbas','Ta','Rsw','esat','ea','Tdew', ...
    'PPFD','Latm','VPD','Pre','Pr','Ws','U','CO2','Tsoil1','Tsoil2', ...
    'Tsoil3','Tsoil4','Tsoil5','Tsoil6','SWC1','SWC2','SWC3','SWC4', ...
    'SWC5','SWC6','SWC7','Rn','LE','LE_CORR','H','H_CORR','G','NEE1', ...
    'NEE2','NEE3','NEE4','Reco1','Reco2','Reco3','Reco4','GPP1','GPP2', ...
    'GPP3','GPP4');  

%% Partition the radiation into different required components
clearvars -except config
[SD,SB,SAD1,SAD2,SAB1,SAB2,PARB,PARD,N,Rsws,t_bef,t_aft]= ...
radiation_partition_tandc(config("outputdir"),config("station"),0);

%% Save the final input variables required to run the T&C model
% read in the res file again
load(join([config("outputdir"),'/Res_',config("station"),'.mat'], ""))
disp(size(Date))

% save the required variables
save(join([config("outputdir"),'/Data_',config("station"),'_run.mat'],""), ...
    'Date','Pr','Ta','Ws','U','ea','SAD1','SAD2','SAB1','SAB2','N', ...
    'Latm','Tdew','esat','PARB','PARD','Pre','DeltaGMT','Lon','Lat', ...
    'Zbas','CO2','t_bef','t_aft');

%% Exit the program
% exit







