use strict; #opening an file open(IN1,"input.txt") || die "cannot open features\n"; #opening an output file, notice that the > is the only difference open(OUT1,">output.txt") || die "cannot open features\n"; #saving the input file into an array in which each element of the array coresponds to a line in the input file my @in1 = ; #initializing some variables arrays and a hash my $temp; my @sp; my %hash; my @peptides; my @scores; #for loop that iterates through all the elements in the in1 array #which goes through every line in the input file #I use the $# to get the last element of the array #inside the loop the first line saves the first line into variable temp #the second line remove next line character from end of that variable #third line splits the line into multiple strings separated by a tab and saves those strings into the array sp #the fourth line stores the first element (the peptide) into the specific position of the peptides array #the fifth line stores the matching score in the scores array #the sixth line store the peptide/score pair into a hash using the peptide as the key and the score as the value #the hash in this code is only being used to demonstrate how to use a hash for (my $i=0; $i<=$#in1; $i++) { $temp = $in1[$i]; chomp $temp; @sp = split(/\t/,$temp); $peptides[$i] = $sp[0]; $scores[$i] = $sp[1]; $hash{$sp[0]} = $sp[1]; } #new array that will store 1 or 0 based on peptide score my @newscores; #this loop simply goes through all scores and checks if they are more than 10000 and assigns 1 or 0 accordingly for (my $i=0; $i<=$#scores; $i++) { if($scores[$i]>=10000) { $newscores[$i] = 1; } else { $newscores[$i] = 0; } } #this shows the access of a hash element print $hash{"AAAAAAARRQEQTLR"} . "this is the score\n"; #features will hold the arrays of features for each peptide my @features; my @tempfeatures; my $result; my $offset; #this is the main loop that goes through all peptides and creates all the features for each peptide #it stores all these features into an array and at the end it pushes this array of featues #into the features array (which is an array of arrays) for (my $i=0; $i<=$#peptides; $i++) { #clearing out array @tempfeatures=(); $temp=0; #index function scnas a string from left to right and returns the position of the char or string passed to it #if none is found it returns -1 $result = index($peptides[$i],'A'); #this loop will find all the A's in the peptide and keep count with the temp variable while($result != -1) { $temp++; $offset = $result+1; $result = index($peptides[$i],'A',$offset); } #here I am pushing this first feature into the array push @tempfeatures,$temp; #next I do the same thing but for V instead of A and also push it in the array $temp=0; $result = index($peptides[$i],'V'); while($result != -1) { $temp++; $offset = $result+1; $result = index($peptides[$i],'V',$offset); } push @tempfeatures,$temp; #after all the features are done I push the array of features for that peptide into the features array push @features, [@tempfeatures]; } #this is a sample output in which I access the array of arrays with features and print them out print OUT1 "This is the output file\n"; for (my $i=0; $i<=$#features; $i++) { for (my $j=0; $j<$#{$features[$i]}; $j++) { print OUT1 $features[$i][$j] . ","; } print OUT1 $features[$i][$#{$features[$i]}] . "\n"; }