#!/usr/bin/perl -w
use strict;

#written by Belinda Giardine 2006

my $seqFile = shift @ARGV;
my $inName = shift @ARGV;

if (!$seqFile or !$inName) { 
   print "ERROR missing parameters\n";
   die "usage: sequenceCheck fastaFile hgvsName\n";
}

my @mut;
if ($inName =~ /\[(.*;.*)\]/) { #multiple mutations
   my $mut = $1;
   @mut = split(/;/, $mut) 
}elsif ($inName =~ /\[.*?\]\+\[.*\]/) {
   #NEED to split these
}elsif ($inName =~ /\[(.*\(\+\).*)\]/) {
   my $mut = $1;
   @mut = split(/\(\+\)/, $mut);
}elsif ($inName =~ /((c|g)\.)\[\.\.(.*)\.\.\]/) { #abreviated name
   $mut[0] = "$1$3";
}else {
   $mut[0] = $inName;
}

foreach my $name (@mut) {
   if ($name =~ /c\.\d+[+-]\d+/) {
      print "ERROR $name: can't lookup introns\n";
   }elsif ($name =~ /p\.\w+\d+\w+/) { 
      print "ERROR $name: can't lookup proteins\n";
   }elsif ($name =~ /(g|c)\.(\d+_*\d*)(del|dup)([ACTG]+)$/) {
      my $pos = $2;
      my $pos2;
      my $type = $3;
      my $nt = $4;
      if ($pos =~ /_(\d+)/) {
         $pos2 = $1;
         $pos =~ s/_.*//;
      }else { $pos2 = $pos; }
      if ($type eq 'dup' && $pos != $pos2) { #can't check same as insertion
         print "ERROR $name: can't do duplications with ranges\n";
         next;
      }
      my $dna = fetchDna($pos, $pos2);
      if ($dna eq '') { 
         print "ERROR Failed to fetch dna 'dna $pos,$pos $seqFile' for $name.\n"; 
         exit;
      }
      if ($dna ne $nt) { 
         my $rev = reverse (split(/ */, $nt));
         if ($dna ne $rev) {
            print "$name: Sequence doesn't match refseq=$dna name=$nt\n";
         }else {
            print "$name: Sequence matched\n";
         }
      }else {
         print "$name: Sequence matched\n";
      }
   }elsif ($name =~ /(g|c)\.(\d+)([ACTG])>[ACTG]/) {
      my $pos = $2;
      my $nt = $3;
      my $dna = fetchDna($pos, $pos);
      if (!$dna or $dna eq '') { 
         print "ERROR Failed to fetch dna 'dna $pos,$pos $seqFile' for $name.\n";
         exit;
      }
      if ($dna && $dna ne $nt) {
         print "$name: Sequence doesn't match refseq=$dna name=$nt\n";
      }elsif (!$dna) {
         print "$name: ERROR couldn't fetch dna\n";
      }else {
         print "$name: Sequence matched\n";
      }
   }else {
      print "ERROR can't do $name\n";
   }
}
   
exit 0;

sub fetchDna {
   my $pos = shift @_;
   my $pos2 = shift @_;
   my $fh;
   my $dna = '';
   my $good;
   open ($fh, "dna $pos,$pos2 $seqFile 2>&1 |") or die "Couldn't run dna, $!\n";
   while (<$fh>) {
      chomp;
      if (/^\>dna/) { $good = 1; next; } #must not have errors in show command
      $dna .= $_;
   }
   close $fh or return undef;
   if (!$good or $dna eq '') { undef $dna; }
   if ($dna) { $dna = uc($dna); }
   return $dna;
}
####End
