#!/usr/bin/perl -w ########################################################################################## ## program: calculateGibbs.pl ## author: Bruno Contreras-Moreira, with help from Heladia Salgado & Osbaldo Resendis ## what it does: ## calculates Gibbs destabilization energies for DNA duplexes based on their sequences sliding windows and optionally locates ## known sequence sites in the context of the calculated energy profiles ## follows the dinucleotide nearest-neighbour algorithm proposed by Breslauer et al.(1986) PNAS, 83:3746-3750. ## ## arguments: calculateGibbs.pl -i InputSeqFile (compulsory) (run ./calculateGibbs.pl for more) ## ## ## returns: 1) energy profiles for each sequence parsed at InputSeqFile ## ## ########################################################################################## use strict; use Getopt::Std; $| = 1; ####################### predefined constants & variables ################################# my $K = 1.987; # cal/mol.K Boltzmann my $defT = 310; # default temperature(K) my $defL = 15; # default window length my $l = $defL; # window length my $t = $defT; # temperature my %thermoTable; # dinucleotide energies obtained from Santalucia et al(1996) Biochemistry: 35:3555­3562 # measured at NaCl 1M, 37C & pH=7 # H in kcal/mol # S in eu # G in kcal/mol, not used, calculated from H & S with respect to temperature t $thermoTable{'AA/TT'}{'H'} = 8.4; $thermoTable{'AA/TT'}{'S'} = 23.6; $thermoTable{'AA/TT'}{'G'} = 1.02; $thermoTable{'AT/TA'}{'H'} = 6.5; $thermoTable{'AT/TA'}{'S'} = 18.8; $thermoTable{'AT/TA'}{'G'} = 0.73; $thermoTable{'TA/AT'}{'H'} = 6.3; $thermoTable{'TA/AT'}{'S'} = 18.5; $thermoTable{'TA/AT'}{'G'} = 0.60; $thermoTable{'CA/GT'}{'H'} = 7.4; $thermoTable{'CA/GT'}{'S'} = 19.3; $thermoTable{'CA/GT'}{'G'} = 1.38; $thermoTable{'GT/CA'}{'H'} = 8.6; $thermoTable{'GT/CA'}{'S'} = 23.0; $thermoTable{'GT/CA'}{'G'} = 1.43; $thermoTable{'CT/GA'}{'H'} = 6.1; $thermoTable{'CT/GA'}{'S'} = 16.1; $thermoTable{'CT/GA'}{'G'} = 1.16; $thermoTable{'GA/CT'}{'H'} = 7.7; $thermoTable{'GA/CT'}{'S'} = 20.3; $thermoTable{'GA/CT'}{'G'} = 1.46; $thermoTable{'CG/GC'}{'H'} = 10.1; $thermoTable{'CG/GC'}{'S'} = 25.5; $thermoTable{'CG/GC'}{'G'} = 2.09; $thermoTable{'GC/CG'}{'H'} = 11.1; $thermoTable{'GC/CG'}{'S'} = 28.4; $thermoTable{'GC/CG'}{'G'} = 2.28; $thermoTable{'GG/CC'}{'H'} = 6.7; $thermoTable{'GG/CC'}{'S'} = 15.6; $thermoTable{'GG/CC'}{'G'} = 1.77; my (%initGC,%initAT); $initGC{S} = 5.9; # helix formation start costs $initGC{G} = -1.82; $initAT{S} = 9.0; $initAT{G} = -2.8; my $nullvalue = -9999; # use to pad sequence profiles (see calculateGibbsProfile) my $min_n_of_sites_for_histogram = 5; ####################### main function ############################################################ ## parsing command-line options ## my $progname = "calculateGibbs.pl"; my (%opts,$parameters); getopts('hsi:t:l:',\%opts); if(($opts{'h'})||(scalar(keys(%opts))==0)) { print < 1\n"; } if (defined($opts{'t'})) { $t = $opts{'t'}; } $parameters = sprintf("-i %s -l %d -t %d",$infile,$l,$t); print "# $progname : parameters = $parameters\n\n"; ## parse input file and compute energies for each sequence ## my ($name,$seq,$site,@siteCoords,$sbeg,$send,$act,$TF,$siteZ,$siteG,$n_of_validZ,$s,$seqlength,@gibbsProfile,%STATS,%TFSTATS); open(SEQ, $infile) || die "# $progname : cannot open input $infile\n"; while() { next if(/^#/); # skip commented lines chomp; $_ =~ s/ //g; ($name,$seq,$site) = split(/\\/,$_); $seq = uc($seq); $seqlength = length($seq); print ">$name ",length($seq)," nts "; # more things are printed at calculateGibbsProfile @gibbsProfile = &calculateGibbsProfile($seq, $l, $t, 1); for($s=0;$s=0;$i--) { push(@profile,[($i,"",$nullvalue,$nullvalue)]); } @profile = reverse @profile; # now pad the right side for($i=$lasti+1;$i