%% This script tests all models using cross validation
% seed random number generator
rng(1);
% load patient data from simulations
load('patient_data_fake.mat');
% load modeling frameworks
load('model_names.mat')

%%%%%%%% IF ON AN ITERATION STEP
load('uhat_fake_iter3_020717.mat')
%%%%%%%%

% number of proposed models
num_models = length(model_names);
% select model to fit this round
model_index = 1;
% get the model name (e.g. 'threshold model')
modelname = model_names{model_index};
% number of patients
num_patients = length(patient_names_fake);
% number of hours we want to inspect (336 is two weeks)
total_time = 336;
% time resolution for integration and plotting (0.5 is half hour)
time_res = 0.5;
% number of initial grid search locations for each patient
num_runs = 1;
% fixed k0 for patients (how fast patients rebound from reduced or increased pain, absent drugs)
% we take rebound half life to be half an hour
fixed_k0 = log(2)/0.5;
% indicator to merge all drug classes together (1 for merge, 0 for not)
merge_all_drugs = 0; common_drug_info = [log(0.5)/(-4.0),1];
% indicator of drug classes to include in fitting (LA,SA,NO) 
drugs_to_include = [1 1 1];
% threshold for drug dose times to be included in model (default is 1)
drug_threshold = 1;
% indicator that pain reporting probability is skewed towards higher pain
report_skew = 0;

%% Cross validation for full model and all patients
% reset number of models during testing - TEMP
num_models = 1;
% reset number of patients for testing - TEMP
%num_patients = 1;
% average testing error for all models and patients (store both error and both 'u' values for the hybrid model)
cv_error_all_models = zeros(num_models,num_patients,3);
% cycle through models to compare error
for ii=1:num_models
    % set model options
    [merge_all_drugs,drugs_to_include,drug_threshold,report_skew] = set_model_options(ii);
    % two-fold cross validation (split data into two halves)
    for jj=1:num_patients
        % print patient number so we know where we're at
        jj
        % Long acting (t_halflife = 10 hr); Short acting (t_halflife = 4 hr); Non-opiod (t_halflife = 4 hr)
        drug_info = [log(0.5)/(-10.0),1; log(0.5)/(-4.0),1; log(0.5)/(-4.0),1];
        % load patient data into convenient form (just useful info on specified time)
        filename = patient_names_fake{jj};
        % load patient data - use [1:2,4:end] for noisy data and [1,3:end] for deterministic data
        patient_data = reshape(patient_data_fake(jj,:,[1,3:end]), length(patient_data_fake(1,:,1)), 5);
        
        % number of pain reports provided by patient (split into two halves for cv) 
        num_reports_total = length(patient_data(:,1));
        num_reports1 = round(num_reports_total/2); num_reports2 = num_reports_total-num_reports1;
        patient_data1 = patient_data(1:num_reports1,:); patient_data2 = patient_data(num_reports1+1:end,:); 
        
        % indicator time vectors of drugs taken (1 for drug taken, 0 for not)
        la1 = patient_data1(:,3); sa1 = patient_data1(:,4); no1 = patient_data1(:,5);
        % drug time vectors for each class of drug (empty vector if no drugs taken)
        la_drugtimes1 = patient_data1(la1>0,1); sa_drugtimes1 = patient_data1(sa1>0,1); no_drugtimes1 = patient_data1(no1>0,1);
        % indicator time vectors of drugs taken (1 for drug taken, 0 for not)
        la2 = patient_data2(:,3); sa2 = patient_data2(:,4); no2 = patient_data2(:,5);
        % drug time vectors for each class of drug (empty vector if no drugs taken)
        la_drugtimes2 = patient_data2(la2>0,1); sa_drugtimes2 = patient_data2(sa2>0,1); no_drugtimes2 = patient_data2(no2>0,1);
        
        % check if drugs taken are same for first and second half of trial
        % (if not, throw that drug out)
        if (length(la_drugtimes1)-drug_threshold+0.5)*(length(la_drugtimes2)-drug_threshold+0.5) < 0
            drugs_to_include(1) = 0;
        end
        if (length(sa_drugtimes1)-drug_threshold+0.5)*(length(sa_drugtimes2)-drug_threshold+0.5) < 0
            drugs_to_include(2) = 0;
        end
        if (length(no_drugtimes1)-drug_threshold+0.5)*(length(no_drugtimes2)-drug_threshold+0.5) < 0
            drugs_to_include(3) = 0;
        end
        
        % collect and reorder drugs taken to eliminate those not taken; drug_info must also change to reflect reordering of drugs
        [dose_times1_1,dose_times2_1,dose_times3_1,drug_info1] = get_dose_times(drug_info,la_drugtimes1,sa_drugtimes1,no_drugtimes1,merge_all_drugs,drugs_to_include,common_drug_info,drug_threshold);
        % collect and reorder drugs taken to eliminate those not taken; drug_info must also change to reflect reordering of drugs
        [dose_times1_2,dose_times2_2,dose_times3_2,drug_info2] = get_dose_times(drug_info,la_drugtimes2,sa_drugtimes2,no_drugtimes2,merge_all_drugs,drugs_to_include,common_drug_info,drug_threshold);
        
        
        % time vector (add an extra hour to end for interp1 - it freaks out about rounding errors)
        tvec1 = (patient_data1(1,1):time_res:(patient_data1(end,1)+1))';
        % pain report contains time and pain info from patient (remove times when patient does not report actual pain)
        pain_report_real1 = patient_data1(:,1:2)'; pain_report_real1(:,isnan(pain_report_real1(2,:))) = [];
        % time vector (add an extra hour to end for interp1 - it freaks out about rounding errors)
        tvec2 = (patient_data2(1,1):time_res:(patient_data2(end,1)+1))';
        % pain report contains time and pain info from patient (remove times when patient does not report actual pain)
        pain_report_real2 = patient_data2(:,1:2)'; pain_report_real2(:,isnan(pain_report_real2(2,:))) = [];
        
        % total number of drug classes taken (between 0 and 3; 0 or 1 if merged)
        num_drugs_taken1 = (~isempty(dose_times1_1))+(~isempty(dose_times2_1))+(~isempty(dose_times3_1));
        % total number of drug classes taken (between 0 and 3; 0 or 1 if merged)
        num_drugs_taken2 = (~isempty(dose_times1_2))+(~isempty(dose_times2_2))+(~isempty(dose_times3_2));
        
%         % IF NOT ON ITER STEP: use pseudo grid search to find best fit parameters for patient
%         % MAKE SURE TO CHECK find_patient_params has correct lines commented
%         [params1,err1] = find_patient_params(tvec1,dose_times1_1,dose_times2_1,dose_times3_1,pain_report_real1,patient_data1,drug_info1,fixed_k0,num_drugs_taken1,num_runs,report_skew,nan);
%         % use pseudo grid search to find best fit parameters for patient
% % SKIP THIS STEP FOR DEBUG
% %        [params2,err2] = find_patient_params(tvec2,dose_times1_2,dose_times2_2,dose_times3_2,pain_report_real2,patient_data2,drug_info2,fixed_k0,num_drugs_taken2,num_runs,report_skew,nan);
        
        
        % IF ON ITER - use pseudo grid search to find best fit parameters for patient
        % MAKE SURE TO CHECK find_patient_params has correct lines commented
        [params1,err1] = find_patient_params(tvec1,dose_times1_1,dose_times2_1,dose_times3_1,pain_report_real1,patient_data1,drug_info1,fixed_k0,num_drugs_taken1,num_runs,report_skew,uhatstat1(jj));
        % use pseudo grid search to find best fit parameters for patient
% SKIP THIS STEP FOR DEBUG        
%        [params2,err2] = find_patient_params(tvec2,dose_times1_2,dose_times2_2,dose_times3_2,pain_report_real2,patient_data2,drug_info2,fixed_k0,num_drugs_taken2,num_runs,report_skew,uhatstat2(jj));
        
        
        
        % best fit patient info (P0,u,eps,k0,ki) for first half of data
        patient_info1 = [nanmean(patient_data1(:,2)),params1(1),params1(2),fixed_k0,params1(3:length(params1)-1)'];
        % generate patient report (both real and simulated pain) using second half as testing data
        report1 = run_painsim_drugs3_ode(tvec2,patient_info1,drug_info2,dose_times1_2,dose_times2_2,dose_times3_2,pain_report_real2);
        % best fit patient info for second half of data
% SKIP THIS STEP FOR DEBUG
%        patient_info2 = [nanmean(patient_data2(:,2)),params2(1),params2(2),fixed_k0,params2(3:length(params2)-1)'];
        % generate patient report (both real and simulated pain) using first half as testing data
% SKIP THIS STEP FOR DEBUG
%        report2 = run_painsim_drugs3_ode(tvec1,patient_info2,drug_info1,dose_times1_1,dose_times2_1,dose_times3_1,pain_report_real1);
        % compute errors in testing data (err1 is on second half, err2 is on first half)
        err_test1 = sqrt(1/(length(report1))*sum((report1(2,:)-report1(3,:)).^2));
% SKIP THIS STEP FOR DEBUG
%        err_test2 = std(report2(2,:)-report2(3,:));
        
        % store u values (both halves) and average testing error
        cv_error_all_models(ii,jj,1) = params1(1);
% SKIP THIS STEP FOR DEBUG
%        cv_error_all_models(ii,jj,2) = params2(1);
%        cv_error_all_models(ii,jj,3) = 0.5*(err_test1+err_test2);
        cv_error_all_models(ii,jj,3) = err_test1;
        % SAVE THIS MATRIX FOR FUTURE USE (I don't hard code to prevent accidental overwriting)
        
        % plot results of model fit -> comment out if no plot desired
        %plot_model_fit(tvec,params,err,patient_data,drug_info,dose_times1,dose_times2,dose_times3,pain_report_real,fixed_k0);
        
        % plot raw data (pain reports and drugs taken) -> comment out if you don't want to see the raw time series data
        %plot_raw_data(patient_data);
        
%%%%%%%% CHANGE FOLDER EVERY TIME!!! save results (if desired) -> comment out if you don't want to overwrite other csv or eps data
        csvwrite(strcat('Data/Patient_fake_params_iter3/',filename,'_fixk0_1.csv'),params1);
%        csvwrite(strcat(filename,'_fixk0_2.csv'),params2);
        csvwrite(strcat('Data/Patient_fake_params_iter3/',filename,'_report_fixk0_1.csv'),report1);
%        csvwrite(strcat(filename,'_report_fixk0_2.csv'),report2);
        %saveas(gcf,strcat(filename,'_fixk0.eps'),'eps2c'); close
    end
end

%%%%%%%%% save files (CHANGE TITLES EVERY TIME!!!)
cv_error_fake_iter = reshape(cv_error_all_models(1,:,[1 3]),num_patients,2);
      save('cv_error_fake_iter3_020717.mat','cv_error_fake_iter');
 csvwrite('Data/uhat_fake_iter3_020717.csv',cv_error_fake_iter(:,1));
csvwrite('Data/cverr_fake_iter3_020717.csv',cv_error_fake_iter(:,2));

%% %% plot testing error for all models and all patients
% % average testing error for all models and patients (store both error and both 'u' values for the hybrid model)
% load('cv_error_fake_iter2.mat')
% cv_error_iter2_fake = cv_error_all_models(:,:,3);
% %load('cv_error_hybrid_iter2.mat')
% load('cv_error_mech_fake.mat')
% cv_error_mech_fake = cv_error_all_models(:,:,3);
% 
% % cycle through models to compare error (comment out if you saved all errors)
% % for ii=1:num_models
% %     % two-fold cross validation (split data into two halves)
% %     for jj=1:num_patients
% %         % load patient data into convenient form (just useful info on specified time)
% %         filename = patient_names_fake{jj};
% %         report1 = csvread(strcat('Data/Patient_report_fixk0 cv fake/',filename,'_report_fixk0_1.csv'));
% %         report2 = csvread(strcat('Data/Patient_report_fixk0 cv fake/',filename,'_report_fixk0_2.csv'));
% %         % compute errors in testing data
% %         err_test1 = std(report1(2,:)-report1(3,:)); err_test2 = std(report2(2,:)-report2(3,:));
% %         cv_error_mechonly(ii,jj) = 0.5*(err_test1+err_test2);
% %     end
% % end
% 
% figure
% plot(1:num_patients,cv_error_mech_fake,'o-')
% hold on
% plot(1:num_patients,cv_error_iter2_fake,'o-r')
% xlabel('patient number')
% ylabel('average testing error')
% legend('mechanistic only','hybrid (iteration 2)')