20000000){die("Error: Maximum allowed size of file is 20 MB");} $file_name=strtolower($_FILES['userfile']['name']); //LEER CADENA $file=$_FILES['userfile']['tmp_name']; $zd = fopen($file, "r"); $file_content=fread($zd,20000000); fclose($zd); print "
Uploaded file: $file_name
\n";
} else {
die("No sequences submitted. Please go back and try again");
}
}
// At this point $file_type contains whatever has been submitted
// The first problem is to know whether an unique sequence or multiple sequences had been submitted
$greater_than_signs=substr_count ($file_content,">");
// Three options are possible:
// no ">" are contained within $sequence ($greater_than_signs=0),
// only 1 ">" is contained $greater_than_signs(=1),
// several ">" are contained (so that several sequences have been submitted and $greater_than_signs is >1)
// We must also remove all useless information.
// Method: Convert content at $sequence to a variable containing 1 unique sequence per line.
// A line break (\n) will separate each sequence.
if ($greater_than_signs==0){
// Only the sequence has been submitted, so useless characters must be removed
$sequence=preg_replace("/\W|\d/","",$file_content);
}elseif ($greater_than_signs>1){
// One or more sequences with their identification after ">" have been submitted,
// As identification is not required, the lines containg ">" are removed
$file_content=substr($file_content,strpos($file_content,">")+1); // remove first and whatever is before it
$theseqs=preg_split("/>/",$file_content,-1,PREG_SPLIT_NO_EMPTY); // get all sequences to array $theseqs. The first line of each element in the array is the decription of the sequence without the ">" sign
$sequence=""; // in this variable all sequences will be stored (one sequence per line)
foreach($theseqs as $k => $v){
$v=preg_replace("/\r/","\n",$v); // replace returns by linebreaks (may be present or not)
$v=preg_replace("/\n\n/","\n",$v); // remove subsequence linebreaks (may be present or not)
$v=preg_replace("/\n\n/","\n",$v); // again (just in case)
while (substr_count($v,"\n\n")>0){
$v=preg_replace("/\n\n/","\n",$v); // again (just in case)
}
if (substr_count($v,"\n")==0){
die("Error:
Input information is not a correct Fasta file."); // just a simple checking
}
$pos=strpos($v,"\n"); // find first position of linebreak. The text from position 0 to $post will be the descriptor of the sequence
if($pos==1){die("Error:
Input information is not a correct Fasta file.");} // just a simple checking
$v=substr($v,$pos+1); // The descriptor of the sequence is removed and at this point $v will only contain the sequence
$v=preg_replace("/\W|\d/","",$v); // Non-word characters (\W) (as for example linebreaks) and digists (\d) are remove
// Here, $v will contain only the ACGT code in one line
$sequence.=$v."\n"; // The sequences are added to variable $sequence, so that in each line one sequence is contained
}
$theseqs=array(); // Free memory
}
// get additional data
$oligo_len=$_POST["len"];
$strands=$_POST["strands"];
// when length of query sequence is 0 => error
if (strlen($sequence)==0){die("Error: query sequence not provided. Plase go back and try again.");}
// print the form with the sequence
print_form ("",$oligo_len,$strands);
// when frequencies at both strands are requested, place in one line the sequence and its reverse complement
if ($strands==2){$sequence.=" ".RevComp($sequence); }
// compute requested. Data is saved to array $results
if ($oligo_len=="1" or $oligo_len=="2" or $oligo_len=="3" or $oligo_len=="4" or $oligo_len=="5" or $oligo_len=="6" or $oligo_len=="7" or $oligo_len=="8"){
$result=find_oligos($sequence,$oligo_len);
}elseif($oligo_len=="1s" or $oligo_len=="2s" or $oligo_len=="3s" or $oligo_len=="4s" or $oligo_len=="5s" or $oligo_len=="6s" or $oligo_len=="7s" or $oligo_len=="8s"){
$result=find_oligos($sequence,$oligo_len);
$result=standarize_frecuencies($result);
}elseif ($oligo_len=="ZOM"){
$result=ZOM_oligonucleotide_frecuencies($sequence);
}elseif($oligo_len=="FOM"){
$result=FOM_oligonucleotide_frecuencies($sequence);
}elseif($oligo_len=="SOM"){
$result=SOM_oligonucleotide_frecuencies($sequence);
}elseif($oligo_len=="zscore"){
$result=zscores_for_tetranucleotide ($sequence); }
// Number of sequences
$NoSequences=$greater_than_signs;
// Number of nucleotides in all sequences ($greater_than_signs is equal to number of linebreaks, and one linebreak is at the end of each sequence)
$sequencelen=strlen($sequence)-$NoSequences;
//print out results
print "
Number of sequences: $NoSequences";
print "
Total length: $sequencelen bp";
if ($oligo_len=="1" or $oligo_len=="2" or $oligo_len=="3" or $oligo_len=="4" or $oligo_len=="5" or $oligo_len=="6" or $oligo_len=="7" or $oligo_len=="8"){
print "
Frequency of oligos with length $oligo_len
";
}elseif($oligo_len=="1s" or $oligo_len=="2s" or $oligo_len=="3s" or $oligo_len=="4s" or $oligo_len=="5s" or $oligo_len=="6s" or $oligo_len=="7s" or $oligo_len=="8s"){
print "
Standarized frequency of oligos with length $oligo_len
";
}elseif ($oligo_len=="ZOM"){
print "
Zero'th Order Markov chain for tetranucleotides (ZOM)
";
}elseif($oligo_len=="FOM"){
print "
First Order Markov chain for tetranucleotides (FOM)
";
}elseif($oligo_len=="SOM"){
print "
Second Order Markov chain for tetranucleotides (SOM)
";
}elseif($oligo_len=="zscore"){
print "
Z-scores for tetranucleotides
";
}
print "\n\n";
}
// ######################################################################################################
// ##################################### FUNCTIONS ###################################
// ######################################################################################################
// NOTE: The functions bellow to search for frecuencies will work independently.
// It is possible to generate functions which are nested to compute oligonucleotide frecuencies
// (one function requiring another ones to work) and probably it will be more elegant,
// but we wanted to generate copy and paste functions that may be easily used in other scripts.
function print_form($sequence,$method,$strand){
if ($method==""){$method="2";}
print "
\n"; print "\n"; print " |