%%% This Matlab script will calculate the regression coefficient by elastic-net algorithm with 10-fold cross validation
%%% This script will also provide minimum mean squared error (MSE) and regression coefficient of genetic features
xx = importdata('./ClinicalStage/X-combined_normalized_matrix.txt','\t'); %% Input the genomics file as available from http://genomeportal.stanford.edu/tcga-crc
x = xx.data;
yy = importdata('./ClinicalStage/Y-sample_matrix.txt','\t'); %% Input the clinical annotation file
y = yy.data;
[B fitinfo] = lasso(x,y,'CV',10,'Alpha',.5); %% Elastic-net with 10 fold cross validation
save ./Lasso_Output/ClinicalStage B -ASCII %% save results as text file
lambdaindex = fitinfo.IndexMinMSE; %% get the lambda value that has minimum mean squared error (MSE)
cutoff = fitinfo.DF(lambdaindex); %% get the number of genomic features do not have zero regression coefficient at the minimum MSE
mse = fitinfo.MSE(lambdaindex); %% get the value of MSE
coeff = B(:,lambdaindex); %% get the regression coeffficient of all genetic features at the minimum MSE
save ./Lasso_CrossValidation/ClinicalStage cutoff mse -ASCII %% save the number of genomic features with non-zero regression coefficient and MSE value as text file
save ./Lasso_CoeffMinMSE/ClinicalStage coeff -ASCII %% save the regression coefficient of genomic features as text file
clear