00001 #include "log.h"
00002 #include "format.h"
00003 #include "gfr.h"
00004 #include "linestream.h"
00005 #include <stdio.h>
00006
00007 typedef struct {
00008 char* gene1;
00009 char* gene2;
00010 } BLEntry;
00011
00012 static int sortBlackListByName1 (BLEntry *a, BLEntry *b)
00013 {
00014 int res = strcmp ( a->gene1, b->gene1);
00015 if( res==0 ) res = strcmp ( a->gene2, b->gene2 );
00016 return (res);
00017 }
00018
00019 int main (int argc, char *argv[])
00020 {
00021 GfrEntry *currGE;
00022 BLEntry *currBLE;
00023 BLEntry currQuery;
00024 FILE *fp;
00025 char *line;
00026 int count;
00027 int countRemoved;
00028
00029 int index;
00030 WordIter w;
00031 Array blackList = arrayCreate(20, BLEntry);
00032
00033 if (argc != 2) {
00034 usage ("%s <blackList.txt>",argv[0]);
00035 }
00036 fp = fopen( argv[1], "r" );
00037
00038 if( !fp ) die("Unable to open file: %s", argv[1]);
00039
00040 LineStream ls = ls_createFromFile( argv[1] );
00041 while( line = ls_nextLine(ls) ) {
00042 w = wordIterCreate( line, "\t", 1);
00043 currBLE = arrayp( blackList, arrayMax(blackList), BLEntry);
00044 currBLE->gene1 = hlr_strdup ( wordNext(w) );
00045 currBLE->gene2 = hlr_strdup ( wordNext(w) );
00046 wordIterDestroy(w);
00047 }
00048 fclose(fp);
00049 arraySort( blackList, (ARRAYORDERF) sortBlackListByName1);
00050
00051
00052 count = 0;
00053 countRemoved = 0;
00054 gfr_init ("-");
00055 puts (gfr_writeHeader ());
00056 while (currGE = gfr_nextEntry ()) {
00057
00058 currQuery.gene1 = currGE->geneSymbolTranscript1;
00059 currQuery.gene2 = currGE->geneSymbolTranscript2;
00060
00061 int res = arrayFind( blackList, &currQuery,
00062 &index, (ARRAYORDERF) sortBlackListByName1);
00063
00064 if( !res ) {
00065 currQuery.gene1 = currGE->geneSymbolTranscript2;
00066 currQuery.gene2 = currGE->geneSymbolTranscript1;
00067
00068 res = arrayFind( blackList, &currQuery,
00069 &index, (ARRAYORDERF) sortBlackListByName1 );
00070
00071 if( !res ) {
00072 puts (gfr_writeGfrEntry (currGE));
00073 count++;
00074 } else {
00075 countRemoved++;
00076 }
00077 } else {
00078 countRemoved++;
00079 }
00080 }
00081 gfr_deInit ();
00082 arrayDestroy( blackList );
00083 warn ("%s_BlackListFilter: %s",argv[0], argv[1]);
00084 warn ("%s_numRemoved: %d",argv[0],countRemoved);
00085 warn ("%s_numGfrEntries: %d",argv[0],count);
00086 return 0;
00087 }
00088