00001 #include "log.h"
00002 #include "format.h"
00003 #include "array.h"
00004 #include <math.h>
00005 #include "gfr.h"
00006
00007 double getNonExonicSPER( GfrEntry* currGE ) {
00008 int i;
00009
00010 Array starts1 = arrayCreate( 50, int);
00011 Array starts2 = arrayCreate( 50, int);
00012 for( i = 0; i<arrayMax(currGE->interReads); i++) {
00013 array(starts1, arrayMax(starts1), int) = arrp(currGE->interReads, i, GfrInterRead)->readStart1;
00014 array(starts2, arrayMax(starts2), int) = arrp(currGE->interReads, i, GfrInterRead)->readStart2;
00015
00016
00017
00018
00019
00020
00021
00022
00023 }
00024 arraySort( starts1, (ARRAYORDERF) arrayIntcmp);
00025 arraySort( starts2, (ARRAYORDERF) arrayIntcmp);
00026 int tenthQuantile = (int) round( arrayMax( starts1 ) * 0.10 );
00027 int ninetiethQuantile = (int) round( arrayMax( starts1 ) * 0.90 );
00028 double area1 = (double) (arru( starts1, ninetiethQuantile , int ) - arru( starts1, tenthQuantile, int ) );
00029 double area2 = (double) (arru( starts2, ninetiethQuantile , int ) - arru( starts2, tenthQuantile, int ) );
00030 int nonExonicPEReads = (int) round( arrayMax( starts1 ) * 0.80 );
00031 arrayDestroy( starts1 );
00032 arrayDestroy( starts2 );
00033
00034 return MIN( (double) nonExonicPEReads / (double)(area1), (double) nonExonicPEReads / (double)( area2 ));
00035 }
00036
00037 int getTranscriptLength1( GfrEntry* currGE) {
00038 int i;
00039 int lengthTranscript=0;
00040 GfrExonCoordinate* currEC;
00041 for( i=0; i < arrayMax(currGE->exonCoordinatesTranscript1); i++) {
00042 currEC = arrp( currGE->exonCoordinatesTranscript1, i, GfrExonCoordinate);
00043 lengthTranscript += currEC->end - currEC->start;
00044 }
00045 return lengthTranscript;
00046 }
00047 int getTranscriptLength2( GfrEntry* currGE) {
00048 int i;
00049 int lengthTranscript = 0;
00050 GfrExonCoordinate* currEC;
00051 for( i=0; i < arrayMax(currGE->exonCoordinatesTranscript2); i++) {
00052 currEC = arrp( currGE->exonCoordinatesTranscript2, i, GfrExonCoordinate);
00053 lengthTranscript += currEC->end - currEC->start;
00054 }
00055 return lengthTranscript;
00056 }
00057
00058 int main (int argc, char *argv[])
00059 {
00060 GfrEntry *currGE;
00061 int count;
00062 int countRemoved;
00063 int exonicPEReads, nonExonicPEReads,lengthTranscript1,lengthTranscript2;
00064 int i;
00065 GfrPairCount* currGEPC;
00066 double nonExonicSPER;
00067
00068 count = 0;
00069 countRemoved = 0;
00070 gfr_init ("-");
00071 puts (gfr_writeHeader ());
00072 while (currGE = gfr_nextEntry () ) {
00073 exonicPEReads=0;
00074 nonExonicPEReads = 0;
00075 int allIntronic = 1;
00076
00077 for( i=0; i<arrayMax( currGE->pairCounts ); i++ ) {
00078 currGEPC = arrp( currGE->pairCounts, i, GfrPairCount);
00079 if( currGEPC->pairType == GFR_PAIR_TYPE_EXONIC_EXONIC ) {
00080 exonicPEReads+= currGEPC->count;
00081 allIntronic = 0;
00082 }
00083
00084
00085 }
00086 if( allIntronic == 1 ) {
00087 nonExonicSPER = getNonExonicSPER( currGE );
00088 lengthTranscript1 = getTranscriptLength1( currGE );
00089 lengthTranscript2 = getTranscriptLength2( currGE );
00090 if( ( nonExonicSPER > ( (double) currGE->numIntra1 / (double) lengthTranscript1 ) ) ||
00091 ( nonExonicSPER > ( (double) currGE->numIntra2 / (double) lengthTranscript2 ) ) ) {
00092 countRemoved++;
00093 continue;
00094 } else {
00095 puts (gfr_writeGfrEntry (currGE));
00096 count++;
00097 }
00098 } else {
00099 puts (gfr_writeGfrEntry (currGE));
00100 count++;
00101 }
00102 }
00103 gfr_deInit ();
00104 warn ("%s_numRemoved: %d",argv[0],countRemoved);
00105 warn ("%s_numGfrEntries: %d",argv[0],count);
00106 return 0;
00107 }