00001 #include "log.h" 00002 #include "format.h" 00003 #include "util.h" 00004 #include "gfr.h" 00005 #include "geneFusionsConfig.h" 00006 00007 00008 00009 static int sortKgTreeFamsByTranscriptName (KgTreeFam *a, KgTreeFam *b) 00010 { 00011 return strcmp (a->transcriptName,b->transcriptName); 00012 } 00013 00014 00015 00016 static char* lookUpTreeFam (Array kgTreeFams, char *transcript) 00017 { 00018 KgTreeFam testKGTF; 00019 int index; 00020 int foundIt; 00021 00022 foundIt = 0; 00023 testKGTF.transcriptName = hlr_strdup (transcript); 00024 foundIt = arrayFind (kgTreeFams,&testKGTF,&index,(ARRAYORDERF)sortKgTreeFamsByTranscriptName); 00025 hlr_free (testKGTF.transcriptName); 00026 if (foundIt) { 00027 return arrp (kgTreeFams,index,KgTreeFam)->treeFamId; 00028 } 00029 return NULL; 00030 } 00031 00032 00033 00034 static int isHomologous (Array kgTreeFams, char *transcript1, char *transcript2) 00035 { 00036 Texta tokens; 00037 int i,j; 00038 char *treeFamId; 00039 static Texta treeFamIdsTranscript1 = NULL; 00040 static Texta treeFamIdsTranscript2 = NULL; 00041 00042 textCreateClear (treeFamIdsTranscript1,100); 00043 textCreateClear (treeFamIdsTranscript2,100); 00044 tokens = textFieldtokP (transcript1,"|"); 00045 for (i = 0; i < arrayMax (tokens); i++) { 00046 if (treeFamId = lookUpTreeFam (kgTreeFams,textItem (tokens,i))) { 00047 textAdd (treeFamIdsTranscript1,treeFamId); 00048 } 00049 } 00050 textDestroy (tokens); 00051 tokens = textFieldtokP (transcript2,"|"); 00052 for (i = 0; i < arrayMax (tokens); i++) { 00053 if (treeFamId = lookUpTreeFam (kgTreeFams,textItem (tokens,i))) { 00054 textAdd (treeFamIdsTranscript2,treeFamId); 00055 } 00056 } 00057 textDestroy (tokens); 00058 for (i = 0; i < arrayMax (treeFamIdsTranscript1); i++) { 00059 for (j = 0; j < arrayMax (treeFamIdsTranscript2); j++) { 00060 if (strEqual (textItem (treeFamIdsTranscript1,i),textItem (treeFamIdsTranscript2,j))) { 00061 return 1; 00062 } 00063 } 00064 } 00065 return 0; 00066 } 00067 00068 00069 00070 int main (int argc, char *argv[]) 00071 { 00072 GfrEntry *currGE; 00073 Array kgTreeFams; 00074 Stringa buffer; 00075 int count; 00076 int countRemoved; 00077 00078 buffer = stringCreate (100); 00079 stringPrintf (buffer,"%s/%s",ANNOTATION_DIR, KNOWN_GENE_TREE_FAM_FILENAME); 00080 kgTreeFams = util_readKnownGeneTreeFams (string (buffer)); 00081 arraySort (kgTreeFams,(ARRAYORDERF)sortKgTreeFamsByTranscriptName); 00082 stringDestroy (buffer); 00083 00084 count = 0; 00085 countRemoved = 0; 00086 gfr_init ("-"); 00087 puts (gfr_writeHeader ()); 00088 while (currGE = gfr_nextEntry ()){ 00089 if (isHomologous (kgTreeFams,currGE->nameTranscript1,currGE->nameTranscript2)) { 00090 countRemoved++; 00091 continue; 00092 } 00093 puts (gfr_writeGfrEntry (currGE)); 00094 count++; 00095 } 00096 gfr_deInit (); 00097 warn ("%s_numRemoved: %d",argv[0],countRemoved); 00098 warn ("%s_numGfrEntries: %d",argv[0],count); 00099 return 0; 00100 } 00101