clear all, close all

%% param config

use_gpu = 1 
root_path = [pwd '/../../'];
cnn_model_name = [root_path 'caffe/examples/face/face_iter_6000.caffemodel.example']; % pre-trained nn for initialization
num_iters = 20; % # training iterations
lr_scale = 0.002; % learning rate scaling
nx = 70; % grid size

%% initialize env

% related paths
addpath([root_path 'caffe/matlab']);
addpath([root_path 'gpml/kissgp']);
% caffe mode
if use_gpu
  caffe.set_mode_gpu();
  %caffe.set_device(1);
else
  caffe.set_mode_cpu();
end

%% initialize CNN

example_dir = [root_path 'examples/face/'];
solver = caffe.Solver([example_dir 'face_solver.prototxt_matlab']);
train_net = solver.net;
test_net = solver.test_nets(1);
train_net.copy_from(cnn_model_name);

%% load data

fprintf('loading data ...\n');
% load mean image
data_dir = [root_path 'caffe/examples/face/data/'];
im_mean = caffe.io.read_mean([data_dir 'mean.binaryproto']);
im_size = 28*28;
numtrain = 12000;
numtest = 800;

% load training data
train_dir = [data_dir 'face_images_train/']
train_fid = fopen([train_dir 'train_list.txt']);
C = textscan(train_fid, '%s %f', numtrain);
file_names = C{1};
ytrain = C{2};
input_data = zeros(28, 28, 1, numtrain, 'single');
for m=1:numtrain
  % image
  file_name = file_names(m);
  file_name = [train_dir file_name{1}];
  im = zeros(im_size, 'single');
  im = caffe.io.load_image(file_name);
  im = reshape(im,28,28);
  im = (im - im_mean) * 0.00390625;
  input_data(:,:,1,m) = im; 
end
% forward to get cnn features
train_net.blobs('data').reshape([28 28 1 numtrain]); %reshape cnn input blob
train_net.reshape();
train_net.forward({input_data});
% get cnn feature from the last layer
Xtrain_cnn = train_net.blobs('ip1.6').get_data(); %cnn features
Xtrain_cnn = double(Xtrain_cnn'); %size = numtrain x D
D = size(Xtrain_cnn, 2);

% load test data
test_dir = [data_dir 'face_images_test/'];
test_fid = fopen([test_dir 'test_list.txt']);
C = textscan(test_fid, '%s %f', numtest);
file_names = C{1};
ytest = C{2};
test_data = zeros(28, 28, 1, numtest, 'single');
for m=1:numtest
  % image
  file_name = file_names(m);
  file_name = [test_dir file_name{1}];
  im = zeros(im_size, 'single');
  im = caffe.io.load_image(file_name);
  im = reshape(im,28,28);
  im = (im - im_mean) * 0.00390625;
  test_data(:,:,1,m) = im; 
end
% forward to get cnn features
train_net.blobs('data').reshape([28 28 1 numtest]); %reshape cnn input blob
train_net.reshape();
train_net.forward({test_data});
% get cnn feature from the last layer
Xtest_cnn = train_net.blobs('ip1.6').get_data(); %cnn features
Xtest_cnn = double(Xtest_cnn'); %size = numtrain x D

% test cnn performance
y_reg = train_net.blobs('ip2').get_data();
rmse = sqrt(mean((ytest-y_reg').^2));
fprintf('CNN RMSE %f\n', rmse);

%% initialize GP

% optimization params
opt.toep1d = false;    % toggle [], true, false toggle to allow use of toep*
opt.ntoep_min = 1000;
opt.circ_emb = true;   % toggle to use the circular embedding
opt.cg_maxit = 10000; opt.cg_tol = 1e-6;

% kernel hyps
cov = {@covSEiso};
cov = repmat({@covSEiso}, [1, D]);
sf = 1; ell = 0.5; hyp.cov = repmat(log([ell;sf]), [D, 1]);
meanfunc = {@meanSum, {@meanLinear, @meanConst}}; hyp.mean = [zeros(size(Xtrain_cnn,2),1); 0];
sn = 0.3;  hyp.lik = log(sn);
likfunc = @likGauss;
inf_method = @(varargin) infGrid(varargin{:},opt);
% grid
xg = covGrid('create', [Xtrain_cnn; Xtest_cnn], 1, nx);
covg = {@covGrid,cov,xg};

% training 

hyp = minimize(hyp,@gp,-100,inf_method,meanfunc,covg,likfunc,Xtrain_cnn,ytrain);

for iter=1:num_iters
  if iter>1
      lr_scale = max([lr_scale/10, 1e-5]);
  end

  % compute gradient w.r.t the base kernel input
  [~,~,~,dx] = infGrid_cnngp(hyp,meanfunc,covg,'likGauss',Xtrain_cnn,ytrain,opt);
  % update base kernel hyps using the GPML routine
  hyp = minimize(hyp,@gp,1,inf_method,meanfunc,covg,likfunc,Xtrain_cnn,ytrain);
  % update CNN params through backpropagation
  diff = {dx' * lr_scale};
  train_net.blobs('data').reshape([28 28 1 numtrain]); %reshape cnn input blob
  train_net.reshape();
  train_net.forward({input_data}); % forward pass
  train_net.backward_from('ip1.6', {'ip1.6'}, diff); % backward pass
  solver.apply_update();

  % get the latest cnn features
  train_net.forward({input_data});
  Xtrain_cnn = train_net.blobs('ip1.6').get_data();
  Xtrain_cnn = double(Xtrain_cnn');
  train_net.blobs('data').reshape([28 28 1 numtest]);
  train_net.reshape();
  train_net.forward({test_data});
  Xtest_cnn = train_net.blobs('ip1.6').get_data();
  Xtest_cnn = double(Xtest_cnn');

  % test
  [postg,nlZg,dnlZg] = infGrid(hyp,meanfunc,covg,'likGauss',Xtrain_cnn,ytrain,opt);
  ymug = postg.fmu(Xtest_cnn); % quick interpolated prediction
  rmse = sqrt(mean((ytest-ymug).^2));
  fprintf('iter %d, RMSE %f\n', iter, rmse);
end

%% save
%train_net.save(['dkl_rbf_face.caffemodel']);
%save dkl_rbf_face_hyps.mat hyp
