00001 #include "log.h"
00002 #include "format.h"
00003 #include "util.h"
00004 #include "gfr.h"
00005 #include "geneFusionsConfig.h"
00006
00007
00008
00009 static int sortKgTreeFamsByTranscriptName (KgTreeFam *a, KgTreeFam *b)
00010 {
00011 return strcmp (a->transcriptName,b->transcriptName);
00012 }
00013
00014
00015
00016 static char* lookUpTreeFam (Array kgTreeFams, char *transcript)
00017 {
00018 KgTreeFam testKGTF;
00019 int index;
00020 int foundIt;
00021
00022 foundIt = 0;
00023 testKGTF.transcriptName = hlr_strdup (transcript);
00024 foundIt = arrayFind (kgTreeFams,&testKGTF,&index,(ARRAYORDERF)sortKgTreeFamsByTranscriptName);
00025 hlr_free (testKGTF.transcriptName);
00026 if (foundIt) {
00027 return arrp (kgTreeFams,index,KgTreeFam)->treeFamId;
00028 }
00029 return NULL;
00030 }
00031
00032
00033
00034 static int isHomologous (Array kgTreeFams, char *transcript1, char *transcript2)
00035 {
00036 Texta tokens;
00037 int i,j;
00038 char *treeFamId;
00039 static Texta treeFamIdsTranscript1 = NULL;
00040 static Texta treeFamIdsTranscript2 = NULL;
00041
00042 textCreateClear (treeFamIdsTranscript1,100);
00043 textCreateClear (treeFamIdsTranscript2,100);
00044 tokens = textFieldtokP (transcript1,"|");
00045 for (i = 0; i < arrayMax (tokens); i++) {
00046 if (treeFamId = lookUpTreeFam (kgTreeFams,textItem (tokens,i))) {
00047 textAdd (treeFamIdsTranscript1,treeFamId);
00048 }
00049 }
00050 textDestroy (tokens);
00051 tokens = textFieldtokP (transcript2,"|");
00052 for (i = 0; i < arrayMax (tokens); i++) {
00053 if (treeFamId = lookUpTreeFam (kgTreeFams,textItem (tokens,i))) {
00054 textAdd (treeFamIdsTranscript2,treeFamId);
00055 }
00056 }
00057 textDestroy (tokens);
00058 for (i = 0; i < arrayMax (treeFamIdsTranscript1); i++) {
00059 for (j = 0; j < arrayMax (treeFamIdsTranscript2); j++) {
00060 if (strEqual (textItem (treeFamIdsTranscript1,i),textItem (treeFamIdsTranscript2,j))) {
00061 return 1;
00062 }
00063 }
00064 }
00065 return 0;
00066 }
00067
00068
00069
00070 int main (int argc, char *argv[])
00071 {
00072 GfrEntry *currGE;
00073 Array kgTreeFams;
00074 Stringa buffer;
00075 int count;
00076 int countRemoved;
00077
00078 buffer = stringCreate (100);
00079 stringPrintf (buffer,"%s/%s",ANNOTATION_DIR, KNOWN_GENE_TREE_FAM_FILENAME);
00080 kgTreeFams = util_readKnownGeneTreeFams (string (buffer));
00081 arraySort (kgTreeFams,(ARRAYORDERF)sortKgTreeFamsByTranscriptName);
00082 stringDestroy (buffer);
00083
00084 count = 0;
00085 countRemoved = 0;
00086 gfr_init ("-");
00087 puts (gfr_writeHeader ());
00088 while (currGE = gfr_nextEntry ()){
00089 if (isHomologous (kgTreeFams,currGE->nameTranscript1,currGE->nameTranscript2)) {
00090 countRemoved++;
00091 continue;
00092 }
00093 puts (gfr_writeGfrEntry (currGE));
00094 count++;
00095 }
00096 gfr_deInit ();
00097 warn ("%s_numRemoved: %d",argv[0],countRemoved);
00098 warn ("%s_numGfrEntries: %d",argv[0],count);
00099 return 0;
00100 }
00101