function [post nlZ dnlZ] = infGrid_Laplace(hyp, mean, cov, lik, x, y, opt)

% Laplace approximation to the posterior Gaussian process with covGrid
% covariance and (possibly) non-Gaussian likelihood.
% The (Kronecker) covariance matrix used is given by:
%   K = kron( kron(...,K{2}), K{1} ) = K_p x .. x K_2 x K_1.
%
% The function takes a specified covariance function (see covFunctions.m) and
% likelihood function (see likFunctions.m), and is designed to be used with
% gp.m and in conjunction with covGrid*. See also "help infGrid.m" for further
% hints on the interface.
%
% Copyright (c) by Hannes Nickisch 2016-01-15.
%
% See also INFMETHODS.M, INFGRID.M, COVGRID.M.

persistent last_alpha                                   % copy of the last alpha
if any(isnan(last_alpha)), last_alpha = zeros(size(last_alpha)); end   % prevent

inf = 'infLaplace';
cov1 = cov{1}; if isa(cov1, 'function_handle'), cov1 = func2str(cov1); end
if ~strncmp(cov1,'covGrid',7); error('Only covGrid* supported.'), end% check cov
scale_add = strcmp(cov1,'covGridScaleAdd');    % additive scaled covariance mode
if scale_add, Q = numel(cov{2}); end           % number of scaled additive terms

xg = cov{3}; p = numel(xg);                 % extract underlying grid parameters
if nargin<=6, opt = []; end                        % make opt variable available
proj_norm  = false; proj_ortho = false;                                   % init
if isfield(opt,'proj_ortho'), proj_ortho = opt.proj_ortho; end  % enforce P*P'=I
if isfield(opt,'proj_norm'),  proj_norm  = opt.proj_norm; end  % or diag(P*P')=1
hP = 1;                     % no projection at all
if isfield(hyp,'P')                  % apply transformation matrix P if provided
  hP = hyp.P;
  if proj_ortho                                                       % priority
    hP = sqrtm(hP*hP')\hP;                               % orthonormal projector
  elseif proj_norm
    hP = diag(1./sqrt(diag(hP*hP')))*hP;                      % normal projector
  end
end
if isfield(opt,'stat'), stat = opt.stat; else stat = false; end  % report status

[K,M] = feval(cov{:}, hyp.cov, x*hP');    % evaluate covariance mat constituents
[xx,ng,Dg] = covGrid('idx2dat',xg,x);             % turn into data vector if idx
if stat      % show some information about the nature of the p Kronecker factors
  if scale_add, s = ['K = sum(',num2str(Q),')']; Ks = K.sum.K; Ks = Ks.kron;
  else          s = 'K ='; Ks = K.kron; end; s = [s,' kron[ '];
  for i=1:p
    if isnumeric(Ks(i).factor)
      si = sprintf('mat(%d)',size(Ks(i).factor,1));
    else
      sz = num2str(size(xg{i}{1},1));
      for j=2:numel(xg{i}), sz = [sz,'x',num2str(size(xg{i}{j},1))]; end
      si = sprintf('%s(%s)',Ks(i).factor.descr(1:4),sz);
    end
    if i<p, si = [si,' x ']; end
    s = sprintf('%s%s',s,si);
  end
  fprintf('%s ]\n',s)
end
n = size(x,1); D = sum(Dg); N = prod(ng);                                 % dims
mid = n==N; if mid, q = randn(N,1); mid = max(abs(q-M*q))<1e-10; end % M==eye(N)
m = feval(mean{:}, hyp.mean, xx*hP');                     % evaluate mean vector
likfun = @(f) feval(lik{:},hyp.lik,y,f,[],inf);        % log likelihood function

if isfield(opt,'cg_tol'),   cgtol = opt.cg_tol;       % stop conjugate gradients
else cgtol = 1e-5; end
if isfield(opt,'cg_maxit'), cgmit = opt.cg_maxit;      % number of cg iterations
else cgmit = min(n,20); end
if isfield(opt,'nlZ_exact'), exact = opt.nlZ_exact;        % use dense matrix ..
else exact = 0; end                             % .. computations instead of LCG
% no of samples for perturb-and-MAP, see George Papandreou and Alan L. Yuille:
% "Efficient Variational Inference in Large-Scale Bayesian Compressed Sensing"
ns = 0;                       % do nothing per default, 20 is suggested in paper
if isfield(opt,'pred_var') && opt.pred_var, ns = max(opt.pred_var,20); end

if any(size(last_alpha)~=[n,1])     % find a good starting point for alpha and f
  alpha = zeros(n,1);                      % start at mean if sizes do not match
else
  alpha = last_alpha;                                             % try last one
  if Psi(alpha,m,K,M,likfun) > -sum(likfun(m))   % default f==m better => use it
    alpha = zeros(n,1);
  end
end

% switch between optimisation methods
alpha = irls(alpha, m,K,M,likfun, opt);                       % run optimisation
alpha = irls(alpha, m,K,M,likfun, opt);                              % run again

f = M*K.mvm(M'*alpha) + m;                      % compute latent function values
last_alpha = alpha;                                     % remember for next call
[lp,dlp,d2lp,d3lp] = likfun(f); W = -d2lp;                 % evaluate likelihood
post.alpha = alpha;                            % return the posterior parameters
sW = sqrt(max(W,0)); post.sW = sW;             % remove sign in case of negative
mvm = @(t) mvmsWKsW(t,K,sW,M);                  % symmetric single parameter mvm
rep = @(sW,k) repmat(sW,1,size(k,2));
post.L = @(k) -rep(sW,k).*solveMVM(rep(sW,k).*k,mvm,cgtol,cgmit);
Mtal = M'*alpha;                              % blow up alpha vector from n to N
if ns>0
  kronmvm = K.kronmvm;
  circ_emb = -5;                              % Whittle embedding overlap factor
  [V,e,ir] = eigkron(K,xg,ng,circ_emb);               % eigenvalue decomposition
  % explained variance on the grid  vg=diag(Ku*M'*inv(M*Ku*M'+diag(sn.^2))*M*Ku)
  % relative accuracy r = std(vg_est)/vg_exact = sqrt(2/ns)
  z = kronmvm(V,repmat(sqrt(e(ir)),1,ns).*kronmvm(V,randn(N,ns),1)); % z~N(0,Ku)
  sn = 1./sW;
  z = M*real(z) + repmat(sn,1,ns).*randn(n,ns);% z~N(0,A), A=M*Ku*M'+diag(sn.^2)
  z = K.mvm(M'*post.L(z)); vg = sum(z.*z,2)/ns;       % z~N(0,Ku*M'*inv(A)*M*Ku)
else
  vg = zeros(N,1);                                       % no variance explained
end
post.predict = @(xs) predict(xs,xg,K.mvm(Mtal),vg,hyp,mean,cov,lik); % f|y,mu|s2

% diagnose optimality
err = @(x,y) norm(x-y)/max([norm(x),norm(y),1]);   % we need to have alpha = dlp
% dev = err(alpha,dlp);  if dev>1e-4, warning('Not at optimum %1.2e.',dev), end

if nargout>1
  if exact                               % exact marginal likelihood computation
    Ki = K.mvm(eye(N)); Ki = M*Ki*M';
    Li = chol(eye(n)+sW*sW'.*Ki);
    nlZ = alpha'*(f-m)/2 - sum(lp) + sum(log(diag(Li)));
  else                 % Fiedler's 1971 upper log determinant bound on A = I+K*W
    a = logdet_I_KW(K,W,xg,ng);
    nlZ = alpha'*(f-m)/2 - sum(lp) + a/2;                   % upper bound on nlZ
  end
end

if nargout>2                                           % do we want derivatives?
  dnlZ = hyp;                                   % allocate space for derivatives
  mvm = @(t) mvmsWKsW(t,K,sW,M);                          % single parameter mvm
  mvmZ = @(k) sW.*solveMVM(k.*sW,mvm,cgtol,cgmit);           % Z = inv(K+inv(W))
  if exact              % deriv. of ln|B| wrt W; g = diag(inv(inv(K)+diag(W)))/2
    A = Ki*diag(W)+eye(numel(m));
    Ci = Li'\(repmat(sW,1,numel(m)).*Ki);
    g = (diag(Ki)-sum(Ci.^2,1)')/2;
    dfhat = g.*d3lp;              % d nlZ / d fhat:  diag(inv(inv(K)+W)).*d3lp/2
  else
    g = zeros(n,1);
  end                                         % no implicit derivative if ~exact
  for i=1:length(hyp.cov)                                    % covariance hypers
    dK = feval(cov{:}, hyp.cov, x*hP', [], i);
    if exact
      dKi = dK.mvm(eye(N)); dKi = M*dKi*M';
      da = trace( A'\(diag(W)*dKi) );
    else
      [lb,ub,da] = logdet_I_KW(K,W,xg,ng,dK);
    end
    dnlZ.cov(i) = da/2 - Mtal'*dK.mvm(Mtal)/2;                   % explicit part
    if exact
      b = M*dK.mvm(M'*dlp);                            % inv(eye(n)+K*diag(W))*b
      dnlZ.cov(i) = dnlZ.cov(i)-dfhat'*(b-M*K.mvm(M'*mvmZ(b)));       % implicit
    end
  end
  for i=1:length(hyp.lik)                                    % likelihood hypers
    [lp_dhyp,dlp_dhyp,d2lp_dhyp] = feval(lik{:},hyp.lik,y,f,[],inf,i);
    dnlZ.lik(i) = -g'*d2lp_dhyp - sum(lp_dhyp);                  % explicit part
    if exact
      b = M*K.mvm(M'*(dlp_dhyp));                      % inv(eye(n)+K*diag(W))*b
      dnlZ.lik(i) = dnlZ.lik(i)-dfhat'*(b-M*K.mvm(M'*mvmZ(b)));       % implicit
    end
  end
  for i=1:length(hyp.mean)                                         % mean hypers
    dm = feval(mean{:}, hyp.mean, xx*hP', i);
    dnlZ.mean(i) = -alpha'*dm;                                   % explicit part
    if exact
      dnlZ.mean(i) = dnlZ.mean(i)-dfhat'*(dm-M*K.mvm(M'*mvmZ(dm)));   % implicit
    end
  end
end

% Upper determinant bound on log |I+K*diag(W)| using Fiedler's 1971 inequality.
% K = kron( kron(...,K{2}), K{1} ), W = diag(w) both symmetric psd.
% The bound is exact for W = w*ones(N,1).
%
% Given nxn spd matrices C and D with ordered nx1 eigenvalues c, d 
% then exp(lb)=prod(c+d) <= det(C+D) <= prod(c+flipud(d))=exp(ub).
function [ub,lb,dub,dlb] = logdet_I_KW(K,W,xg,ng,dK)
  circ_emb = -5;                              % Whittle embedding overlap factor
  [V,e,ir] = eigkron(K,xg,ng,circ_emb);               % eigenvalue decomposition
  vnum = true;            % flag indicating whether all entries in V are numeric
  p = numel(xg); for j=1:p, vnum = vnum && isnumeric(V{j}); end
  n = numel(W); N = numel(e);                                       % dimensions
  [ed,ie] = sort(e,'descend'); ie = ie(1:min(n,N));         % sorted eigenvalues
  a = n/N*ed;                                        % approximate spectrum of K
  if n>N, a = [a;zeros(n-N,1)]; else a = a(1:n); end % See also Prob.III.6.14 ..
  Wd = sort(W,'descend');   Wa = sort(W,'ascend');    % .. in Matrix Analysis ..
  ub = sum(log(1 + a.*Wd)); lb = sum(log(1 + a.*Wa));          % ..  Bhatia 1997
  if nargin>4 && nargout>2
    % ub = sum(log(1 + e(ir).*Wu)); lb = sum(log(1 + e(ir).*Wl));
    Wu = zeros(N,1); Wu(ie) = n/N*Wd(1:min(n,N)); Wu = Wu(ir);
    Wl = zeros(N,1); Wl(ie) = n/N*Wa(1:min(n,N)); Wl = Wl(ir);
    ubp = Wu./(1 + e(ir).*Wu); lbp = Wl./(1 + e(ir).*Wl);
    dg = 1;                                                 % dg = diag(V'*dK*V)
    for j=1:numel(K.kron)
      dKj = dK.kron(j).factor;
      if isstruct(V{j})
        [xj,nj]  = covGrid('expand',xg{j}); Nj = prod(nj);
        dkj = circ(dKj.kii,nj,circ_emb);
        dj = sqrt(Nj)*real(V{j}.mvmt(dkj));
      else
        dj = diag( V{j}' * dKj * V{j} );
      end
      dg = kron(dj,dg);
    end
    dub = ubp'*dg; dlb = lbp'*dg;
  end

% Evaluate criterion Psi(alpha) = alpha'*K*alpha + likfun(f), where 
% f = K*alpha+m, and likfun(f) = feval(lik{:},hyp.lik,y,  f,  [],inf).
function [psi,dpsi,f,alpha,dlp,W] = Psi(alpha,m,K,M,likfun)
  f = M*K.mvm(M'*alpha) + m;
  [lp,dlp,d2lp] = likfun(f); W = -d2lp;
  psi = alpha'*(f-m)/2 - sum(lp);
  if nargout>1, dpsi = M*K.mvm(M'*(alpha-dlp)); end

% Run IRLS Newton algorithm to optimise Psi(alpha).
function alpha = irls(alpha, m,K,M,likfun, opt)
  n = numel(alpha);
  if isfield(opt,'irls_maxit'), maxit = opt.irls_maxit; % max no of Newton steps
  else maxit = 20; end                                           % default value
  if isfield(opt,'irls_Wmin'),  Wmin = opt.irls_Wmin; % min likelihood curvature
  else Wmin = 0.0; end                                           % default value
  if isfield(opt,'irls_tol'),   tol = opt.irls_tol;     % stop Newton iterations
  else tol = 1e-6; end                                           % default value
  if isfield(opt,'cg_tol'),   cgtol = opt.cg_tol;     % stop conjugate gradients
  else cgtol = 1e-5; end
  if isfield(opt,'cg_maxit'), cgmit = opt.cg_maxit;    % number of cg iterations
  else cgmit = 2*n; end         
  
  smin_line = 0; smax_line = 2;           % min/max line search steps size range
  nmax_line = 10;                          % maximum number of line search steps
  thr_line = 1e-4;                                       % line search threshold
  Psi_line = @(s,alpha,dalpha) Psi(alpha+s*dalpha, m,K,M,likfun);  % line search
  pars_line = {smin_line,smax_line,nmax_line,thr_line};  % line seach parameters
  search_line = @(alpha,dalpha) brentmin(pars_line{:},Psi_line,5,alpha,dalpha);

  f = M*K.mvm(M'*alpha) + m;
  [lp,dlp,d2lp] = likfun(f); W = -d2lp;
  Psi_new = Psi(alpha,m,K,M,likfun);
  Psi_old = Inf;  % make sure while loop starts by the largest old objective val
  it = 0;                          % this happens for the Student's t likelihood
  while Psi_old - Psi_new > tol && it<maxit                       % begin Newton
    Psi_old = Psi_new; it = it+1;                               % limit stepsize
    W = max(W,Wmin); sW = sqrt(W);     % reduce steps by incr curvature of bad W
    b = W.*(f-m) + dlp; mvm = @(t) mvmsWKsW(t,K,sW,M);    % single parameter mvm
    dalpha = sW.*solveMVM(b./sW,mvm,cgtol,cgmit) - alpha; % Newt dir+line search
%     c = sW.*(M*K.mvm(M'*b)); % like in GPML book
%     dalpha = b - sW.*solveMVM(c,mvm,cgtol,cgmit) - alpha;
    [s_line,Psi_new,n_line,dPsi_new,f,alpha,dlp,W] = search_line(alpha,dalpha);
  end                                                  % end Newton's iterations

% symmetric mvm so that q = diag(sW)*M*K*M'*diag(sW)*p + p
% using Kronecker representation
function q = mvmsWKsW(p,K,sW,M)
  q = repmat(sW,1,size(p,2)).*p;
  q = M*K.mvm(M'*q);
  q = repmat(sW,1,size(p,2)).*q + p;

%------------------------------------------------------------------------------%

function y = zfill(z,n)                                      % fill z with zeros
  y = accumarray((1:numel(z))',z,[n,1]);

function q = solveMVM(p,mvm,varargin) % solve q = mvm(p) via conjugate gradients
  [q,flag,relres,iter] = conjgrad(mvm,p,varargin{:});                 % like pcg
  if ~flag,error('Not converged after %d iterations, r=%1.2e\n',iter,relres),end

% Compute latent and predictive means and variances by grid interpolation.
function [fmu,fs2,ymu,ys2] = predict(xs,xg,Kalpha,vg,hyp,mean,cov,lik)
  Ms = covGrid('interp',xg,xs);                    % obtain interpolation matrix
  xs = covGrid('idx2dat',xg,xs);                        % deal with index vector
  ms = feval(mean{:},hyp.mean,xs);                         % evaluate prior mean
  fmu = ms + Ms*Kalpha;                 % combine and perform grid interpolation
  if nargout>1
    if norm(vg,1)>1e-10, ve = Ms*vg; else ve = 0; end    % interp grid var expl.
    ks = feval(cov{:},hyp.cov,xs,'diag');              % evaluate prior variance
    fs2 = max(ks-ve,0);              % combine, perform grid interpolation, clip
    if nargout>2, [lp, ymu, ys2] = feval(lik{:},hyp.lik,[],fmu,fs2); end
  end

% Solve x=A*b with symmetric A(n,n), b(n,m), x(n,m) using conjugate gradients.
% The method is along the lines of PCG but suited for matrix inputs b.
function [x,flag,relres,iter,r] = conjgrad(A,b,tol,maxit)
if nargin<3, tol = 1e-10; end
if nargin<4, maxit = min(size(b,1),20); end
x0 = zeros(size(b)); x = x0;
if isnumeric(A), r = b-A*x; else r = b-A(x); end, r2 = sum(r.*r,1); r2new = r2;
nb = sqrt(sum(b.*b,1)); flag = 0; iter = 1;
relres = sqrt(r2)./nb; todo = relres>=tol; if ~any(todo), flag = 1; return, end
on = ones(size(b,1),1); r = r(:,todo); d = r;
for iter = 2:maxit
  if isnumeric(A), z = A*d; else z = A(d); end
  a = r2(todo)./sum(d.*z,1);
  a = on*a;
  x(:,todo) = x(:,todo) + a.*d;
  r = r - a.*z;
  r2new(todo) = sum(r.*r,1);
  relres = sqrt(r2new)./nb; cnv = relres(todo)<tol; todo = relres>=tol;
  d = d(:,~cnv); r = r(:,~cnv);                           % get rid of converged
  if ~any(todo), flag = 1; return, end
  b = r2new./r2;                                               % Fletcher-Reeves
  d = r + (on*b(todo)).*d;
  r2 = r2new;
end

% Eigendecomposition of a Kronecker matrix with dense, Toeplitz or BTTB factors.
% K.mvm(z) == kronmvm(V,e(ir).*kronmvm(V,z,1)), approximate for Toeplitz/BTTB
function [V,e,ir] = eigkron(K,xg,ng,circ_emb)
isbttb = @(Ki) isstruct(Ki) && (strcmp (Ki.descr,'toep') ...
                             || strncmp(Ki.descr,'bttb',4));   % BTTB covariance
p = numel(K.kron); V = cell(p,1); E = cell(p,1);     % sizes and allocate memory
e = 1; ir = 1;      % compute eigenvalue diagonal matrix full/reduced rank index
for j=1:p
  if isbttb(K.kron(j).factor)
    [xj,nj] = covGrid('expand',xg{j});                    % extract subgrid size
    kc = circ(K.kron(j).factor.kii,nj,circ_emb); % circulant embedded covariance
    ej = max(real(fftn(kc)),0);        % thresholding to ensure pd approximation
    ep = ej>0; Ej = ej(ep);            % keep only strictly positive eigenvalues
    V{j}.mvm  = @(v) Fmvm( v,ep,nj);           % V{j}' is partial Fourier matrix
    V{j}.mvmt = @(v) Fmvmt(v,ep,nj);
    V{j}.size = [numel(ep),sum(ep(:))];
  else [V{j},Ej] = eigr(K.kron(j).factor,0);   % eigenvalues of non-Toeplitz mat
  end
  E{j} = Ej;
end
for j=1:p, de = zfill(E{j},ng(j)); e = kron(de,e); ir = kron(de>0,ir); end
ir = ir~=0;

function a = Fmvmt(b,ep,nj)
  Nj = prod(nj); sNj = sqrt(Nj);       % scaling factor to make FFTN orthonormal
  nr = numel(b)/Nj;                        % number of right-hand-side arguments
  b = reshape(b,[nj(:)',nr]);
  for i=1:numel(nj), b = fft(b,[],i); end                         % emulate fftn
  b = reshape(b,Nj,[]);
  a = b(ep,:)/sNj;                          % perform rescaling and thresholding

function b = Fmvm(a,ep,nj)
  Nj = prod(nj); sNj = sqrt(Nj);       % scaling factor to make FFTN orthonormal
  nr = numel(a)/sum(ep(:));                % number of right-hand-side arguments
  b = zeros(Nj,nr);                        % allocate memory for return argument
  b(ep(:),:) = a; b = reshape(b,[nj(:)',nr]);      % accumarray and target shape
  for i=1:numel(nj), b = ifft(b,[],i); end                       % emulate ifftn
  b = reshape(b,Nj,nr)*sNj;                                  % perform rescaling

% Real eigenvalues and eigenvectors up to the rank of a real symmetric matrix.
% Decompose A into V*D*V' with orthonormal matrix V and diagonal matrix
% D = diag(d)represented by a column vector.
% Entries of d with index larger tha the rank r of the matrix A as returned by
% the call rank(A,tol) are zero.
function [V,d] = eigr(A,econ,tol)
if nargin<2, econ = false; end                          % assign a default value
if isnumeric(econ), econ = econ==0; end                      % turn into boolean
if ischar(econ), econ = strcmp(econ,'econ'); end             % turn into boolean
if nargout==0, return, end                     % avoid computation in limit case
if nargout==1
  d = sort(eig((A+A')/2),'descend');       % eigenvalues of strictly symmetric A
else
  [V,D] = eig((A+A')/2);                 % decomposition of strictly symmetric A
  d = max(real(diag(D)),0); [d,ord] = sort(d,'descend');      % tidy up and sort
end
n = size(A,1);                                                  % dimensionality
if nargin<3, tol = n*eps(max(d)); end, r = sum(d>tol);             % get rank(A)
d(r+1:n) = 0;                             % set junk eigenvalues to strict zeros
if econ, d = d(1:r); end                                   % truncate if desired
if nargout==1                                        % only eigenvalues required
  V = d;
else                                             % entire decomposition required
  V(:,1:r) = real(V(:,ord(1:r)));                    % eigenvectors up to rank r
  if econ
    V = V(:,1:r);
  else                                            % ortho completion if required
    V(:,r+1:n) = null(V(:,1:r)');
  end
end

% Construct a circular embedding c(nx1) from a covariance function k.
%  - k is a function and the call k(1:n) returns a vector of length n
%  - s is the setting for the embedding with values s={..,-2,-1,0,1,2,3,4,5}.
%
% s<0  Whittle embedding [2] as described by Guinness & Fuentes [3] in
%      equation (5) with N = |s|.
% s=0  No embedding c = k(n).
% s=1  Generic embedding c = [k(1:n/2), k(n/2:-1:2)] without smoothing
%      as in the Strang preconditioner.
% s=2  Variant of smoothing inspired by [1] respecting the conditions
%      c(1) = k(1), c(i)=c(n-i+2) for i=2..n and idempotence. We use the generic
%      scheme c(2:n) = w.*k(2:end) + flipud(w.*k(2:end)), where w are sigmoidal
%      interpolation weights.
% s=3  Linear smoothing as in T. Chan's preconditioner.
% s=4  Tyrtyshnikov preconditioner.
% s=5  R. Chan's preconditioner.
%
% [1] Helgason, Pipiras & Abry, Smoothing windows for the synthesis of
%     Gaussian stationary random fields using circulant matrix embedding,
%     Journal of Computational and Graphical Statistics, 2014, 23(3).
% [2] Whittle, On stationary processes in the plane, Biometrika, 1954, 41(3/4).
% [3] Guinness & Fuentes, Circulant embedding of approximate covariances for
%     inference from Gaussian data on large lattices, 2014, preprint,
%     http://www4.stat.ncsu.edu/~guinness/circembed.html.
function c = circ(k,n,s)
p = numel(n); n = n(:)';                             % dimensions and row vector
if nargin<3, s = -2; end                                         % default value
if s==0                                                    % no embedding at all
  xg = cell(p,1); for i=1:p, xg{i} = (1:n(i))'; end              % standard grid
  xc = covGrid('expand',xg);
  c = reshape(k(xc),[n,1]);
elseif s<0                                           % Whittle/Guinness aliasing
  N = abs(s);
  xg = cell(p,1); for i=1:p, xg{i} = (1-N*n(i):N*n(i))'; end
  sz = [n; 2*N*ones(1,p)]; sz = sz(:)';
  c = reshape(k(covGrid('expand',xg)),sz);
  for i=1:p, c = sum(c,2*i); end, c = squeeze(c);
else
  if     s==1                                                 % Strang embedding
    xg = cell(p,1);
    for i=1:p, n2 = floor(n(i)/2)+1; xg{i} = [1:n2, n2-n(i)+1:0]'; end
    xc = covGrid('expand',xg);
    c = reshape(k(xc),[n,1]);
  elseif s==2                                               % Helgason smoothing
    if numel(n)>1, error('Only 1d allowed'), end
    k = k(1:n); k = k(:);
    k0 = k(2:n); % smooth by sigmoid interpolation between the two flipped parts
    wk = k0./(1+exp( 30*(((1:n-1)-n/2)'/n) ));           % sigmoid-weighted part
    c = [k(1); wk + flipud(wk)];
  elseif s==3                                       % T. Chan's linear smoothing
    if numel(n)>1, error('Only 1d allowed'), end
    k = k(1:n); k = k(:);
    w = (1:n-1)'/n;
    c = k; c(2:n) = (1-w).*c(2:n) + w.*flipud(c(2:n));
  elseif s==4                                                     % Tyrtyshnikov
    if numel(n)>1, error('Only 1d allowed'), end
    k = k(1:n); k = k(:);
    w = (1:n-1)'/n;
    c = k; c2 = c(2:n).*c(2:n);
    c(2:n) = ((1-w).*c2 + w.*flipud(c2)) ./ ((1-w).*c(2:n) + w.*flipud(c(2:n)));
  elseif s==5                                                          % R. Chan
    if numel(n)>1, error('Only 1d allowed'), end
    k = k(1:n); k = k(:);
    c = k; c(2:n) = c(2:n) + flipud(c(2:n));                   % mirrored add-up
  end
end
assert(numel(c)==prod(n))                                               % length
c2 = c;
for i=1:p
  c2 = reshape(c2,[prod(n(1:i-1)-1),n(i),prod(n(i+1:end))]); c2 = c2(:,2:end,:);
end
c2f = c2; for i=1:p, c2f = flipdim(c2f,i); end
if s~=0, assert(max(reshape(abs(c2-c2f),[],1))<1e-10), end         % circularity