#! /usr/bin/perl -w

sub reset_variables	{
					@list = ();
					$max = 0;
					$seq_count = 0;
					%all = ();
					@con = ();
					}
					
sub get_con_seq	{
				foreach my $entry (keys(%all))
					{
					if ($all{$entry} > 1)
						{
						push (@con, $entry);
						}
					}
				}

sub index_query_alignment	{
							my $file = $_[0];
							open FILE, $file;
							my @input = <FILE>;
							close FILE;
							my $name = $_[1];
							push (@list, $name);
							my $join = join '', @input;
							$join =~ s/>/\n\n>/gmsi;
							$join = $join."\n\n\n";
							while ($join =~ m/>(.+?)\n(.+?)\n\n/gmis)
								{
								my $acc = $1;
								my $seq = $2;
								$acc =~ s/[\s\n]//gmis;
								$seq =~ s/[\s\n]//gmis;
								$seq =~ s/\./\-/gmis;
								$seq =~ tr/a-z/A-Z/;
								$$name{$acc} = $seq_count;
								++$all{$acc};
								@$seq_count = 0;
								my $mod_seq = $seq;
								$mod_seq =~ s/\-//gmis;
								my @index = split(//, $mod_seq);
								my $j = 1;
								my $i = 0;
								while ($seq =~ m/(.{1})/gmsi)
									{
									my $k = $1;
									if ($k eq '-')
										{
										$$seq_count[$i] = 0;
										}
									else
										{
										$$seq_count[$i] = $j;
										++$j;
										}
									++$i;
									}
								if ($i > $max)
									{
									$max = $i;
									}
								++$seq_count;
								}
							}
							
sub index_reference_alignment	{
								my $file = $_[0];
								open FILE, $file;
								my @input = <FILE>;
								close FILE;
								my $name = $_[1];
								push (@list, $name);
								my $join = join '', @input;
								$join =~ s/>/\n\n>/gmsi;
								$join = $join."\n\n\n";
								while ($join =~ m/>(.+?)\n(.+?)\n\n/gmis)
									{
									my $acc = $1;
									my $seq = $2;
									$acc =~ s/[\s\n]//gmis;
									$seq =~ s/[\s\n]//gmis;
									$seq =~ s/\./\-/gmis;
									$seq =~ tr/a-z/A-Z/;
									$$name{$acc} = $seq_count;
									++$all{$acc};
									@$seq_count = 0;
									my $mod_seq = $seq;
									$mod_seq =~ s/\-//gmis;
									my @index = split(//, $mod_seq);
									my $j = 1;
									my $i = 1;
									while ($seq =~ m/(.{1})/gmsi)
										{
										my $k = $1;
										unless ($k eq '-')
											{
											$$seq_count[$j] = $i;
											++$j;
											}
										++$i;
										}
									++$seq_count;
									}
								}
								
					
sub calculate_scores	{
						open FILE, ">column_scores.txt";
						my $i = 0;
						my $query = "query";
						my $ref = "ref";
						my @sorted = @con;
						my $gc = 0;
						my $gs = 0;
						while ($i < $max)
							{
							my $j = 0;
							my $s = 0;
							my $c = 0;
							while ($j < $#sorted)
								{
								my $k = $j + 1;
								my $q1 = $$query{$sorted[$j]};
								my $q1i = $$q1[$i];
								unless ($q1i == 0)
									{
									my $r1n = $$ref{$sorted[$j]};
									my $r1 = $$r1n[$q1i];
									while ($k <= $#sorted)
										{
										my $q2 = $$query{$sorted[$k]};
										my $q2i = $$q2[$i];
										unless ($q2i == 0)
											{
											my $r2n = $$ref{$sorted[$k]};
											my $r2 = $$r2n[$q2i];
											if ($r1 == $r2)
												{
												++$s;
												++$gs;
												}
											++$gc;
											++$c;
											}
										++$k;
										}
									}									
								++$j;
								}
							if ($c == 0)
								{
								print FILE "$i\t0\t0\n";
								}
							else
								{
								my $cs = $s/$c;
								print FILE "$i\t$c\t$cs\n";
								}			
							++$i;
							}
						$precision = $gs/$gc;
						close FILE;
						}
						
sub calculate_scores_2	{
						my $i = 0;
						my $query = "query";
						my $ref = "ref";
						my @sorted = @con;
						my $gc = 0;
						my $gs = 0;
						while ($i < $max)
							{
							my $j = 0;
							while ($j < $#sorted)
								{
								my $k = $j + 1;
								my $q1 = $$query{$sorted[$j]};
								my $q1i = $$q1[$i];
								unless ($q1i == 0)
									{
									my $r1n = $$ref{$sorted[$j]};
									my $r1 = $$r1n[$q1i];
									while ($k <= $#sorted)
										{
										my $q2 = $$query{$sorted[$k]};
										my $q2i = $$q2[$i];
										unless ($q2i == 0)
											{
											my $r2n = $$ref{$sorted[$k]};
											my $r2 = $$r2n[$q2i];
											if ($r1 == $r2)
												{
												++$gs;
												}
											++$gc;
											}
										++$k;
										}
									}									
								++$j;
								}
							++$i;
							}
						my $recall = $gs/$gc;
						if ($precision + $recall == 0)
							{
							$f = 0;
							}
						else
							{
							$f = (2*($precision*$recall))/($precision+$recall);
							}
						print "Precision\t$precision\nRecall\t$recall\nF-score\t$f\n";
						}
	
sub run_program	{
				&reset_variables;
				&index_query_alignment($ARGV[0], "query");
				&index_reference_alignment($ARGV[1], "ref");
				&get_con_seq;
				&calculate_scores;
				&reset_variables;
				&index_query_alignment($ARGV[1], "query");
				&index_reference_alignment($ARGV[0], "ref");
				&get_con_seq;
				&calculate_scores_2;
				}
				
&run_program;

exit;
