function [x,solverinfo]=broyden_solver(F,x0,P)
% function [x,solverinfo,P]=broyden_solver(F,x0,P)
% Broyden method to solve F(x)=0
% The code follows the implementation presented in
%
% [1] Fang, H.-r. and Saad, Y. (2009), Two classes of multisecant methods for nonlinear acceleration.
% Numer. Linear Algebra Appl., 16: 197-221. https://doi.org/10.1002/nla.617
%
% Update of the (inverse) jacobian is done every 'group_size' function call.
% Within a group of function calls, an online update of the inverse
% jacobian is used to compute jacobian-vector products (no inplace update
% of H). It uses a generalization of the algorithm QNERR presented in
%
% [2] Deuflhard P. (2004) Newton Methods for Nonlinear Problems. Springer
% Series in Computational Mathematics 35.
%
% The generalization take into account partitions of the inverse jacobian as H=D+Q,
% where D can be easily computed and Q is updated with low rank matrices
%
% If the solver detects that the sequence is non-convergent, the algorithm
% will
% 1) Recompute the Jacobian
% 2) If it fails, the algorithm will switch to the Newton solver (jfnk) and as
% soon as the iterate is close enough to the final solution will switch
% back to broyden method.
% See: QNH1 and QNJ1 for specific arguments
%
%
% inputs:
%   F:                  Function handle, to solve F==0 for. For requirements, see below
%   x0:                 Vector, inital solution guess
%   P:                  struct, solver parameters. For more info, see solveF
% returns:
%   x:          Vector, Solution, if solver converged, otherwise its set to x0
%   solverinfo: Struct, Info struct for solver operation
%
% Check solveF description for the requirements for F
%
% [+MEQ MatlabEQuilibrium Toolbox+] Swiss Plasma Center EPFL Lausanne 2022. All rights reserved.
P.ciy = setdiff(1:numel(x0), P.iy);

% Run broyden
[x,solverinfo] = QNH1(F,x0,P,P.prev_H);
% Broyden solver failed
if solverinfo.failed
  s = warning('off','backtrace');
  warning('Broyden failed. Starting Newton iterations...')
  warning(s);

  % Try to run the newton solver for several iteration to get to a better
  % starting point

  % Setup
  P_newton = P;
  P_newton.anajac = false;
  P_newton.algoF = 'jfnk';
  if ~isempty(solverinfo.prev_H)
    P_newton.prev_H = solverinfo.prev_H;
    % Building inverse jacobian as preconditionner for newton
    if P.use_inverse
      P_newton.prectype = 'user';
      P_newton.precupdate = 'once';
      P_newton.Prec = @(v) right_mult(P_newton.is_factored,P_newton.prev_H.H,v,[], P.iy,P.ciy) + P_newton.prev_H.d.*v;
    end
  end

  % Running Newton solver (jfnk starting at x0)
  [x,solverinfo_newton] = newton_solver(F,x0,P_newton,true);

  % Close to optimum, switch to broyden
  if solverinfo_newton.stopped_early
    s = warning('off','backtrace');
    warning('Close to optimum switch to Broyden...')
    warning(s);

    % Run broyden
    prev_H = [];
    [x,solverinfo_broy] = QNH1(F,x,P,prev_H);

    % Combine info from all solvers
    solverinfo = combine_solverinfo(solverinfo,solverinfo_newton);
    solverinfo = combine_solverinfo(solverinfo,solverinfo_broy);

  else
    % Combine info from all solvers
    solverinfo = combine_solverinfo(solverinfo,solverinfo_newton);
  end
end
end

function [x,solverinfo]=QNH1(F,x0,P,initH)
% function [x,solverinfo]=QNH1(F,x0,P,initH)
% Broyden type 1 update to solve
%          F(x)=0
% The inverse jacobian is updated by rank-s updates at each iteration,
% where s is the size of the group.
%
% The code follows the implementation presented in
%
% [1] Fang, H.-r. and Saad, Y. (2009), Two classes of multisecant methods for nonlinear acceleration.
% Numer. Linear Algebra Appl., 16: 197-221. https://doi.org/10.1002/nla.617
%
% Within a group of function calls, an online update of the inverse
% jacobian is used to compute jacobian-vector products (no inplace update
% of H). It uses a generalization of the algorithm QNERR presented in
%
% [2] Deuflhard P. (2004) Newton Methods for Nonlinear Problems. Springer
% Series in Computational Mathematics 35.
%
% The generalization take into account partitions of the inverse jacobian as H=D+Q,
% where D can be easily computed and Q is updated with low rank matrices.
% This step is only applied when P.use_inverse=true. The other case does
% not have close form solutions.
%
% In the current implementation, H=diag(d0)+Q where d0(mask)=1./diag(mask)
% if P.use_inverse=true and d0(mask)=diag(mask) otherwise
% In addition, it is possible to factor Q as [U*S,Q12;Q21,Q22] where U and S are
% low rank approximation. The argument P.is_factored controls this behavior
% and P.ldim is the dimension of the low rank approximation (only if P.use_inverse=true). Finally, two
% algorithms are implemented:
% 1) Update the jacobian Q every 'P.group_size' function call
% 2) (P.large_system=true) Low-rank update are tracked over time, but the updated jacobian Q is only built
% at the end of the solver call
%
% P.use_inverse=true should be preferred over P.use_inverse=false.
%
%
% inputs:
%   F:              Function handle, to solve F==0 for. For requirements, see below
%   x0:             Vector, inital solution guess
%   P:              struct, solver parameters
%   initH:          the (inverse) jacobian to start the iteration with. It can be
%                   empty. In that case, the jacobian is first computed and
%                   then inverted if P.use_inverse=true
%                   (takes more time).
% returns:
%   x:          Vector, Solution, if solver converged, otherwise its set to x0
%   solverinfo: Struct, Info struct for solver operation
%
% Check solveF description for the requirements for F
%
% [+MEQ MatlabEQuilibrium Toolbox+] Swiss Plasma Center EPFL Lausanne 2022. All rights reserved.


%% debug/verbosity options
dispprogress   = (P.debug>1); % textual display of each iteration results
displinesrch   = (P.debug>2); % display line search information
dispFeachnewton= (P.debug>3); % display F call internals on each newton iteration
dispFeachkryl  = (P.debug>4); % display F call internals on each krylov iteration

ploteachnewton = (P.debugplot>3); % each call F in newton iteration produces a plot
ploteachkryl   = (P.debugplot>4); % each call of F during krylov iteration produces a plot
plotlinesrch   = (P.debuglinesrch);

%% options for F evaluation
% For each newton iteration
optsbroyden = optsF('doplot',ploteachnewton,'dojacx',P.anajac,'dodisp',dispFeachnewton);
% For each line search step
optslinesrch = optsF('dodisp',dispFeachnewton,'doplot',ploteachnewton);
% For each krylov iteration
optskrylov = optsF('dodisp',dispFeachkryl,'doplot',ploteachkryl);

%% Algorithm options
group_size = P.group_size;% max group size
group_ind = 1:group_size:(ceil(P.kmax/group_size)+1)*group_size;

param_jac = struct('optskrylov',optskrylov); % Parameters used when P.use_inverse=false
param_ls = struct();

% Initialization
n = numel(x0); % Dimension of x
kmax = numel(group_ind); % Max number of iteration
x = x0;
dx = zeros(n,1);
kbroyden = 0; % Number of iteration
converged = false; % Flag for convergence
gmres_iter_total = 0; % number of completed inner gmres iterations (bounded by mkryl)
nfeval = 0; % Counter for number of function call steps

if P.large_system
  maxFCall = group_ind(end);
  param_ls.E = zeros(n,maxFCall);
  param_ls.V = zeros(n,maxFCall);
  param_ls.start = 1;
end

% Storing
dFs = zeros(n,group_size);
Dxs = zeros(n,group_size);
Dxs_ = zeros(n,group_size);
if dispprogress
  format_iter = '%12d | %15.3e | %20.13e | %10.3e | %10d | %6d\n';
  header_str=sprintf('\n%12s | %15s | %20s | %10s | %8s | %6s\n', 'k_step', '||dx||/||x||', ...
    'norm(residual)',  'alpha', 'group size','krylov');
end

alpha_step = 1.0 - P.relax; % default Newton step size

% Those are parameters of the linesearch, which controls the convergence rate
% ∣F(x+a*v)∣<(rho+tau_t)∣F(x)∣−ca^2v'*v, c=1e-4
% tau_t must satisfy sum_t tau_t<inf
% The parameter rho controls the convergence rate, since for t sufficiently large
% ∣F(x+a*v)∣<rho_hat∣F(x)∣ for (rho+tau_t)<rho_hat<1
% The choice tau(it)=20./(2*it+1).^2 seems to work well practice.
% In particular, allowing residuals increase is useful at the beginning of the optimization
% process, where some "walls", that need to be jumped over, might appear.
tau = @(it)20./(2*it+1).^2;
rho = 0.95;

P_newt=P;
P_newt.algoGMRES='direct_inversion';
while ~converged && kbroyden<kmax-1
  %% Initial function eval
  % Evaluate F on first iteration
  if (kbroyden == 0)
    if ~isempty(initH) && P.use_inverse
      % Do not compute jacobian, using the one from the previous solver
      % call (only if use_inverse=true)
      optsbroyden.dojacx = false;
    end
    % Return the function and the jacobian evaluated in x
    [normsq,nfeval,Fx,mask,Jdiag,J] = evaluate_F(F,x,optsbroyden,P,nfeval);
    if isnan(normsq)
      if dispprogress
        fprintf('Broyden solver encountered Fx=NaN for kbroyden=%d/%d, aborting...\n',kbroyden,kmax);
      end
      % Failed on first iteration
      solverinfo = initsolverinfo('isconverged',false,'failed',true, ...
        'niter',kbroyden,'residual_max',inf,'residual_2norm',inf,'res',inf, ...
        'nfeval',nfeval,'prev_H',[],'mkryl',0);
      return
    end
    resnorm = sqrt(2*normsq);
  end

  % Initialize H on the first iteration
  if (kbroyden==0)
    converged = (resnorm < P.tolF);
    [H,d,d0] = initial_H(initH,J,Jdiag,mask,P);
    if ~P.use_inverse
      % initialize preconditioner matrix function handle
      param_jac.Pinv = preconditioner_init(P,J,n);
    end
    % Set jacobian to false for next calls
    optsbroyden.dojacx = false;
    % Store values in case of non-convergence
    init_H = finalize_H(H,d,d0);
    % Show initial residual
    if dispprogress
      fprintf(header_str)
      fprintf(format_iter, 0, NaN, resnorm, 1, NaN,NaN)
    end
    if converged, break; end
  end
  kbroyden = kbroyden+1; % iteration counter

  group_size_i = group_ind(kbroyden+1)-group_ind(kbroyden); % size of the inner loop
  resnorm_g = zeros(group_size_i,1); % collect residual norms
  gmres_iter = 0; % collect number of krylov iterations

  if P.large_system
    param_ls.slice = group_ind(param_ls.start):group_ind(kbroyden)-1; % indices of the inner loop
    param_ls.iter_since_restart = kbroyden-param_ls.start;
  end

  %% Inner loop: Accumulation of function evaluations
  for j = 1:group_size_i
    % Compute -H*Fx or H\Fx;
    [dx,Dxs,Dxs_,gmres] = find_direction(j,H,d,Jdiag,Fx,mask, Dxs,Dxs_, ...
      param_ls,param_jac,P);
    gmres_iter = gmres_iter+gmres;

    % Update x
    lsfun = @(a,nfeval) evaluate_F(F,x+a*dx,optslinesrch,P,nfeval);
    iteration = group_ind(kbroyden)-1;
    norm2x=dx'*dx;
    % Linesearch
    [a,normsq,accept,~,nfeval,Fx_,mask,Jdiag] = ...
      linesearch(lsfun,normsq,[],norm2x,rho,tau(iteration),alpha_step,nfeval,displinesrch,plotlinesrch);
    if ~accept
      % Fails to converge -> compute jacobian and perform newton step
      % dx is then stored to update the Broyden approximation of the
      % inverse Jacobian
      optsbroyden.dojacx = true;
      [hnormsq,nfeval,~,mask,Jdiag,J] = evaluate_F(F, x, optsbroyden,P,nfeval);
      optsbroyden.dojacx = false;
      if dispprogress
        s = warning('off','backtrace');
        warning('Recomputing jacobian');
        warning(s);
      end
      v0 = hnormsq;
      dv0da = -2*v0; % dv/da at a=0 (see newton solver)
      dx = solve_linear_problem(J,-Fx,[],[],mask,Jdiag,P_newt);
      lsfun = @(a,nfeval) evaluate_F(F,x+a*dx,optslinesrch,P,nfeval);
      [a,normsq,accept,~,nfeval,Fx_,mask,Jdiag] = linesearch(lsfun,v0,dv0da,[],[],[],alpha_step,nfeval,displinesrch,plotlinesrch);
      if ~accept
        % Still fails to converge
        % -> Switch back to newton steps
        resnorm = sqrt(2*normsq);
        resmax = max(abs(Fx_)); % for output only
        solverinfo = initsolverinfo('isconverged',false,'failed',true, ...
          'niter',kbroyden,'residual_max',resmax,'residual_2norm',resnorm,'res',resmax, ...
          'nfeval',nfeval,'prev_H',init_H,'mkryl',gmres_iter_total);
        return
      else
        dx = a*dx;
      end
    else
      dx = a*dx;
    end
    x = x+dx;
    resnorm_inner = sqrt(2*normsq);

    % New D
    d = update_D(P.use_inverse,d0,mask,Jdiag);
    if P.use_inverse
      % Store values
      dFs(:,j) = Fx_ - Fx;
      Dxs(:,j) = dx - d.*dFs(:,j);
    else
      Dxs(:,j) = dx;
      dFs(:,j) = Fx_ - Fx - d.*dx;
    end

    resnorm_g(j) = resnorm_inner;
    % Keep track of last Fx
    Fx = Fx_;

    % Converged + store final results
    converged = (resnorm_inner < P.tolF);
    if converged
      group_size_i = j;
      break
    end
  end
  resnorm = resnorm_inner;
  gmres_iter_total = gmres_iter_total+gmres_iter;

  %% Update of the (inverse) Jacobian
  % Selecting slice for (inverse) jacobian update
  dxs_i = Dxs(:,1:group_size_i)';
  dfs_i = dFs(:,1:group_size_i)';
  [H,param_ls] = update_approx_jac(kbroyden,group_ind,group_size_i,H,dfs_i,dxs_i,param_ls,P);

  %% Display quantities
  if dispprogress
    dx_rel_norm = norm(dx)/norm(x);
    fprintf(format_iter, kbroyden, dx_rel_norm, resnorm,1,group_size_i,gmres_iter)
  end
  if converged, break; end
end % Broyden outer loop

if P.large_system && kbroyden>0 && P.use_inverse
  % Update matrix at the end
  s1 = group_ind(param_ls.start);
  s2 = group_ind(kbroyden);
  slice = s1:s2-1 + group_size_i;
  H = update_H(P.is_factored,H,param_ls.E(:,slice),param_ls.V(:,slice)',P.iy,P.ciy);
end

% Output statistics
converged = converged && ~any(isnan(x));
if ~converged
  % If not converged store the initial (inverse) jacobian
  prev_H = init_H;
else
  prev_H = finalize_H(H,d,d0);
end
resmax = max(abs(Fx));
solverinfo = initsolverinfo('isconverged',converged, ...
  'niter',kbroyden,'residual_max',resmax,'residual_2norm',resnorm,'res',resnorm, ...
  'nfeval',nfeval,'prev_H',prev_H,'mkryl',gmres_iter_total);
end

function H=update_H(is_factored,H_prev,l,V,iy,ciy)
% H = update_H(is_factored, H_prev, l, V, iy,ciy)
% Helper function that updates the matrix H with a low-rank update l*V.
% This function takes advantage of various decompositions of H.
%
% Inputs:
%   - is_factored:    Specifies whether the matrix H has a factored or dense format.
%   - H_prev:       Matrix of size DxD or structure (if is_factored)
%                   containing 5 fields: H11U, H11V, H12, H21, H22 such that H_prev=[H11U*H11V,H12;H21,H22].
%   - l:            Left update.
%   - V:            Right update.
%   - iy:           Index such that H(iy, iy) = U*V. Only used if
%                   is_factored is true.
%   - ciy:          Complement indices of iy. Only used if
%                   is_factored is true.
%
% Returns:
%   - H:            Updated matrix H, calculated as H = H_prev + l*V.

if is_factored
  H.H12 =   H_prev.H12 + l(iy,:)*V(:,ciy);
  H.H21 =   H_prev.H21 + l(ciy,:)*V(:,iy);
  H.H22 =   H_prev.H22 + l(ciy,:)*V(:,ciy);
  H.H11U = [H_prev.H11U,l(iy,:)];
  H.H11V = [H_prev.H11V;V(:,iy)];
else
  H = H_prev + l*V;
end
end

function res=right_mult(is_factored,H,v,mask_inner, iy,ciy)
% res = right_mult(is_factored, H, v, mask_inner, iy,ciy)
% Helper function to efficiently compute the matrix product H*v. This
% function takes advantage of various decompositions of H.
%
% inputs:
%   - is_factored:  Specifies whether the matrix H has a factored format or a dense
%                   format.
%   - H:            Matrix of size DxD or structure (if is_factored)
%                   containing 5 fields: H11U, H11V, H12, H21, H22 such that H_prev=[H11U*H11V,H12;H21,H22].
%   - v:            Matrix of size (D x N).
%   - mask_inner:   Defines the columns of v and rows of H to include for the
%                   product, i.e., H(:,mask_inner)*v(mask_inner,:).
%                   Array of size D (can be empty).
%   - iy:           Index such that H(iy, iy) = U*V. Only used if
%                   is_factored is true.
%   - ciy:          Complement indices of iy. Only used if
%                   is_factored is true.
%
% returns:
%   - res:          The product H*v.

if is_factored
  U = H.H11U;
  V = H.H11V;
  H12 = H.H12;
  H21 = H.H21;
  H22 = H.H22;
  v1 = v(iy,:);
  v2 = v(ciy,:);
  if ~isempty(mask_inner)
    mask_inner1 = mask_inner(iy);
    mask_inner2 = mask_inner(ciy);
    res = v;
    res(iy,:) = U*(V(:,mask_inner1)*v1(mask_inner1,:)) + H12(:,mask_inner2)*v2(mask_inner2,:);
    res(ciy,:) = H21(:,mask_inner1)*v1(mask_inner1,:) + H22(:,mask_inner2)*v2(mask_inner2,:);
  else
    res = v;
    res(iy,:) = U*(V*v1) + H12*v2;
    res(ciy,:) = H21*v1 + H22*v2;
  end
else
  if ~isempty(mask_inner)
    res = H(:,mask_inner)*v(mask_inner,:);
  else
    res = H*v;
  end
end
end

function res=left_mult(is_factored,H,v,iy,ciy)
% res = left_mult(is_factored, H, v,iy,ciy)
% Helper function to efficiently compute the matrix product v*H. This
% function takes advantage of various decompositions of H.
%
% inputs:
%   - is_factored:  Specifies whether the matrix H has a factored format or a dense
%                   format.
%   - H:            Matrix of size DxD or structure (if is_factored)
%                   containing 5 fields: H11U, H11V, H12, H21, H22 such that H_prev=[H11U*H11V,H12;H21,H22].
%   - v:            Matrix of size (N x D).
%   - iy:           Index such that H(iy, iy) = U*V. Only used if
%                   is_factored is true.
%   - ciy:          Complement indices of iy. Only used if
%                   is_factored is true.
%
% returns:
%   - res:          The product v*H.

% Apply the appropriate multiplication based on use_factor
if is_factored
  res = zeros(size(v,1),size(H.H12,2)+size(H.H11V,2));
  res(:,iy) = (v(:,iy)*H.H11U)*H.H11V+v(:,ciy)*H.H21;
  res(:,ciy) = v(:,iy)*H.H12+v(:,ciy)*H.H22;
else
  res = v*H;
end
end

function H=low_rank(H_prev,rank)
% [H] = low_rank2(H_prev, k)
% Find a low-rank approximation of the matrix H_prev=B*C (of rank 'rank').
% Solve the system
%       min B_, C_ ||B_*C_-B*C||^2
% This is performed by first computing the QR decomp. of B and C and then
% taking the svd of the R1*R2'.
%
% The algorithm takes advantage of the fact that the matrix H is the product of
% 2 matrices B, C of smaller size. Hence, the full matrix H is never built.
%
% inputs:
%   H_prev:     Matrix to compute a low-rank approximation. H_prev is a structure
%               containing two fields B=U and C=S, such that U*S=H_prev.
%   k:          The rank dimension (k < ncol(U)).
%
% returns:
%   H:          Same structure as H_prev, where H11U and H11V are replaced with the
%               low-rank counterparts.

B = H_prev.H11U;
C = H_prev.H11V;
[q1,r1] = qr(B,0);
[q2,r2] = qr(C',0);

A = r1*r2';
% Faster to compute svd than svds
[U,D,V] = svd(A);
U = U(:,1:rank);
V = V(:,1:rank);
D = D(1:rank,1:rank);
H = H_prev;
H.H11U = q1*U*sqrt(D);
H.H11V = sqrt(D)*V'*q2';
end

function [H]=add_diag(H,d)
% [H]=add_diag(H,d)
% Helper function that adds the diagonal d to the matrix H
% (faster than H+diag(d))
s     = size(H);
[~,I] = min([s(1), s(2)]);
index = 1:s(I)+1:s(I)^2;  % Indices of the main diagonal
H(index) = H(index)+d';
end

function [dx]=orthogonalization(j,dx,Dxs, Dxs_)
if j>1
  if j>2
    for l=2:j-1
      if norm(Dxs_(:,l-1))==0
        continue
      else
        a = Dxs(:,l-1)'*dx/(Dxs_(:,l-1)'*Dxs(:,l-1));
      end
      dx = dx + a*(Dxs_(:,l)-Dxs_(:,l-1)+Dxs(:,l-1));
    end
  end
  if norm(Dxs_(:,j-1))==0
    a = 0;
  else
    a = dx'*Dxs(:,j-1)/(Dxs_(:,j-1)'*Dxs(:,j-1));
  end
  dx = (dx + a*(Dxs(:,j-1)-Dxs_(:,j-1)))./(1-a);
end
end

function d=update_D(use_inverse,d0,mask,Jdiag)
% [d]=update_D(use_inverse,d0,mask,Jdiag)
% helper function that updates the matrix D=diag(d0) if mask=0 otherwise
% 1/Jdiag or Jdiag if use_inverse is true or false
%
% Inputs:
%   - use_inverse:  Specifies whether H represents the inverse Jacobian or
%                   not
%   - d0:           Matrix of size d
%   - mask:         Index Location where d=1/Jdiag or Jdiag
%   - Jdiag:        Values to replace in d0 where mask=true
d = d0;
if use_inverse
  d(mask) = 1./Jdiag(mask);
else
  d(mask) = Jdiag(mask);
end
end

function [H,d,d0]=initial_H(initH,J,Jdiag,mask,P)
% [H,d,d0]=initial_H(initH,J,Jdiag,mask,P)
% Initialize the matrix H,d,d0 based on arguments use_inverse,is_factored.
% If initH is empty a new matrix is computed using J,Jdiag,mask
% otherwise the information is retrieved from initH
%
% Inputs:
%   - initH:        Structure containing H,d,d0 if already computed. Can be empty
%   - J:            Jacobian (only used if initH is empty)
%   - Jdiag:        Diagonal value of J where mask=true
%   - mask:         Rows of J with single non-zero entry
%   - P:            Parameters
if isempty(initH)
  % Store the inverse of the jacobian
  if P.use_inverse
    H = inv(J);
  else
    H = J;
  end
  n = size(J,1);

  % Decompose H as H=diag(d)+H_remainder.
  % only H_remainder is udpated
  d0 = zeros(n,1);
  d0(P.iy) = -1;
  d = update_D(P.use_inverse,d0,mask,Jdiag);
  H = add_diag(H,-d);
  % H_remainder is approximate by a low rank approx where the rank is
  % given by ldim, such that H_remainder=[U*S,H12;H21,H22]
  if P.is_factored && P.use_inverse
    [U,S,V_] = svds(H(P.iy,P.iy),P.ldim);
    U = U*sqrt(S);
    V_ = sqrt(S)*V_';
    H = struct('H11U',U,'H11V',V_,'H12',H(P.iy,P.ciy),'H21',H(P.ciy,P.iy),'H22',H(P.ciy,P.ciy));
  end
else
  if P.is_factored && P.use_inverse
    H = initH.H;
    % Find a low rank approximation
    % (exploit the current factorization of H=[U*S,H12;H21,H22])
    H = low_rank(H,P.ldim);
  else
    H = initH.H;
  end
  d = initH.d;
  d0 = initH.d0;
end
end

function [dx,Dxs,Dxs_,gmres]=find_direction(j,H,d,Jdiag,Fx,mask, Dxs,Dxs_, ...
  param_ls,param_jac,P)
% [dx,Dxs,Dxs_,gmres]=find_direction(j,H,d,Jdiag,Fx,mask, Dxs,Dxs_,
%                                             param_ls,param_jac,P)
% Find the newton direction dx using the jacobian approximation H
% Compute -H*Fx or H\Fx if use_inverse is true or false.
%
% Inputs:
%   - j:            Inner iteration number
%   - H:            Approximate (inverse) Jacobian
%   - d:            Matrix such that (H+D)^(-1)~J or H+D~J (use_inverse=false)
%   - Jdiag:        Diagonal value of J where mask=true
%   - Fx:           Current function eval
%   - mask:         Rows of J with single non-zero entry
%   - Dxs:          Past dx
%   - Dxs_:         Past dx
%   - param_ls:     Parameter used for solving H*Fx when
%                   large_system=true
%   - param_jac:    Parameter used for solving H*Fx when
%                   use_inverse=true
%   - P:            Parameters

gmres = 0;
m = abs(Fx)~=0;
if ~P.use_inverse
  % Solve the system dx=-H\Fx
  J = add_diag(H,d);
  [dx,gmres] = solve_linear_problem(J,-Fx,[],param_jac.Pinv,mask,Jdiag,P);
  % Take into account previous iterates (only valid if d=0)
  if max(abs(d))==0
    dx = orthogonalization(j,dx,Dxs,Dxs);
  end
else
  if P.large_system
    dx = right_mult(P.is_factored,H,-Fx,m, P.iy,P.ciy);
    if param_ls.iter_since_restart>0
      dx = dx-param_ls.E(:,param_ls.slice)*(param_ls.V(m,param_ls.slice)'*Fx(m));
    end
  else
    dx = right_mult(P.is_factored,H,-Fx,m, P.iy,P.ciy);
  end
  % Take into account previous iterates
  dx = orthogonalization(j,dx,Dxs,Dxs_);
  Dxs_(:,j) = dx;

  % Multiply dx by M and Substract D (e.g. dx=-(M*H+D)*Fx))
  dx(mask) = 0;
  dx(m) = dx(m)-d(m).*Fx(m);
end
end

function [H,param_ls]=update_approx_jac(kbroyden,group_ind,group_size_i,H,df,dx,param_ls,P)
% [H,param_ls]=update_approx_jac(kbroyden,group_ind,group_size_i,H,df,dx,param_ls,P)
% Update the approximation of the jacobian using the secant method (i.e., broyden method)
%
% Inputs:
%   - kbroyden:     Outer iteration number
%   - group_ind:    Index partition of the outer loop
%   - group_size_i: Size of the current inner loop
%   - H:            Approximate (inverse) Jacobian
%   - df,dx:        Matrix for updating Jac.
%   - param_ls:     parameters associate to large_system=true
%   - P:            Parameters
if P.use_inverse
  m_f = max(abs(df),[],1)~=0;
  if P.large_system
    E_tmp = dx'-right_mult(P.is_factored,H,df',m_f, P.iy,P.ciy);
    dxH = left_mult(P.is_factored,H,dx,P.iy,P.ciy);

    if kbroyden-param_ls.start>0
      for g = param_ls.start:kbroyden-1
        E_g = param_ls.E(:,group_ind(g):group_ind(g+1)-1);
        V_g = param_ls.V(:,group_ind(g):group_ind(g+1)-1);
        E_tmp = E_tmp-E_g*(df(:,m_f)*V_g(m_f,:))';
        dxH = dxH+(dx*E_g)*V_g';
      end
    end
    slice = group_ind(kbroyden):group_ind(kbroyden)+group_size_i-1;
    param_ls.E(:,slice)=E_tmp;
    tmp = P.ratio_gb*dxH+(1-P.ratio_gb)*df;
    M = tmp(:,m_f)*df(:,m_f)';
    [Q,R,Perm] = qr(M);
    V_tmp = Perm*pinv(R)*Q'*tmp;
    param_ls.V(:,slice) = V_tmp';
  else
    E = dx'-right_mult(P.is_factored,H,df',m_f, P.iy,P.ciy);
    dxH = left_mult(P.is_factored,H,dx,P.iy,P.ciy);
    tmp = P.ratio_gb*dxH+(1-P.ratio_gb)*df;
    M = tmp(:,m_f)*df(:,m_f)';
    N = tmp;

    % Householder QR
    [Q,R,Perm] = qr(M);
    V = Perm*pinv(R)*Q'*N;
    H = update_H(P.is_factored,H,E,V,P.iy,P.ciy);
  end
else
  % Update full matrix
  s = pinv(dx*dx');
  E = df'-H*dx';
  V = s*dx; % (Dx'*Dx)^-1 * Dx'
  H = H + E*V;
end
end

function H_final=finalize_H(H,d,d0)
% [H_final]=finalize_H(H,d,d0)
% Helper function to collect all matrices
H_final.H  = H;
H_final.d  = d;
H_final.d0 = d0;
end

function solverinfo=combine_solverinfo(solA,solB)
% Combining two solverinfo (assume that solB come after solA)
solverinfo.isconverged      = solB.isconverged;
solverinfo.failed           = solB.failed;
solverinfo.niter            = solA.niter + solB.niter;
solverinfo.residual_max     = solB.residual_max;
solverinfo.res              = solB.res;
solverinfo.residual_2norm   = solB.residual_2norm;
solverinfo.restarts         = solA.restarts + solB.restarts;
solverinfo.mkryl            = solA.mkryl + solB.mkryl;
solverinfo.nfeval           = solA.nfeval + solB.nfeval;
solverinfo.stopped_early    = solB.stopped_early;
solverinfo.prev_H           = solB.prev_H;
end
