00001 #include "log.h" 00002 #include "format.h" 00003 #include "linestream.h" 00004 #include "common.h" 00005 #include "bits.h" 00006 #include "gfr.h" 00007 00008 00009 00010 static LineStream lsGfr = NULL; 00011 static Bits* presentColumnTypes = NULL; 00012 static Array columnTypes = NULL; 00013 static Texta columnHeaders = NULL; 00014 static char* headerLine = NULL; 00015 00016 00017 00018 static void gfr_addColumnType (char *type) 00019 { 00020 if (strEqual (type,GFR_COLUMN_NAME_NUM_INTER)) { 00021 bitSetOne (presentColumnTypes,GFR_COLUMN_TYPE_NUM_INTER); 00022 array (columnTypes,arrayMax (columnTypes),int) = GFR_COLUMN_TYPE_NUM_INTER; 00023 textAdd (columnHeaders,GFR_COLUMN_NAME_NUM_INTER); 00024 } 00025 else if (strEqual (type,GFR_COLUMN_NAME_INTER_MEAN_AB)) { 00026 bitSetOne (presentColumnTypes,GFR_COLUMN_TYPE_INTER_MEAN_AB); 00027 array (columnTypes,arrayMax (columnTypes),int) = GFR_COLUMN_TYPE_INTER_MEAN_AB; 00028 textAdd (columnHeaders,GFR_COLUMN_NAME_INTER_MEAN_AB); 00029 } 00030 else if (strEqual (type,GFR_COLUMN_NAME_INTER_MEAN_BA)) { 00031 bitSetOne (presentColumnTypes,GFR_COLUMN_TYPE_INTER_MEAN_BA); 00032 array (columnTypes,arrayMax (columnTypes),int) = GFR_COLUMN_TYPE_INTER_MEAN_BA; 00033 textAdd (columnHeaders,GFR_COLUMN_NAME_INTER_MEAN_BA); 00034 } 00035 else if (strEqual (type,GFR_COLUMN_NAME_PVALUE_AB)) { 00036 bitSetOne (presentColumnTypes,GFR_COLUMN_TYPE_PVALUE_AB); 00037 array (columnTypes,arrayMax (columnTypes),int) = GFR_COLUMN_TYPE_PVALUE_AB; 00038 textAdd (columnHeaders,GFR_COLUMN_NAME_PVALUE_AB); 00039 } 00040 else if (strEqual (type,GFR_COLUMN_NAME_PVALUE_BA)) { 00041 bitSetOne (presentColumnTypes,GFR_COLUMN_TYPE_PVALUE_BA); 00042 array (columnTypes,arrayMax (columnTypes),int) = GFR_COLUMN_TYPE_PVALUE_BA; 00043 textAdd (columnHeaders,GFR_COLUMN_NAME_PVALUE_BA); 00044 } 00045 else if (strEqual (type,GFR_COLUMN_NAME_NUM_INTRA1)) { 00046 bitSetOne (presentColumnTypes,GFR_COLUMN_TYPE_NUM_INTRA1); 00047 array (columnTypes,arrayMax (columnTypes),int) = GFR_COLUMN_TYPE_NUM_INTRA1; 00048 textAdd (columnHeaders,GFR_COLUMN_NAME_NUM_INTRA1); 00049 } 00050 else if (strEqual (type,GFR_COLUMN_NAME_NUM_INTRA2)) { 00051 bitSetOne (presentColumnTypes,GFR_COLUMN_TYPE_NUM_INTRA2); 00052 array (columnTypes,arrayMax (columnTypes),int) = GFR_COLUMN_TYPE_NUM_INTRA2; 00053 textAdd (columnHeaders,GFR_COLUMN_NAME_NUM_INTRA2); 00054 } 00055 else if (strEqual (type,GFR_COLUMN_NAME_FUSION_TYPE)) { 00056 bitSetOne (presentColumnTypes,GFR_COLUMN_TYPE_FUSION_TYPE); 00057 array (columnTypes,arrayMax (columnTypes),int) = GFR_COLUMN_TYPE_FUSION_TYPE; 00058 textAdd (columnHeaders,GFR_COLUMN_NAME_FUSION_TYPE); 00059 } 00060 else if (strEqual (type,GFR_COLUMN_NAME_NAME_TRANSCRIPT1)) { 00061 bitSetOne (presentColumnTypes,GFR_COLUMN_TYPE_NAME_TRANSCRIPT1); 00062 array (columnTypes,arrayMax (columnTypes),int) = GFR_COLUMN_TYPE_NAME_TRANSCRIPT1; 00063 textAdd (columnHeaders,GFR_COLUMN_NAME_NAME_TRANSCRIPT1); 00064 } 00065 else if (strEqual (type,GFR_COLUMN_NAME_CHROMOSOME_TRANSCRIPT1)) { 00066 bitSetOne (presentColumnTypes,GFR_COLUMN_TYPE_CHROMOSOME_TRANSCRIPT1); 00067 array (columnTypes,arrayMax (columnTypes),int) = GFR_COLUMN_TYPE_CHROMOSOME_TRANSCRIPT1; 00068 textAdd (columnHeaders,GFR_COLUMN_NAME_CHROMOSOME_TRANSCRIPT1); 00069 } 00070 else if (strEqual (type,GFR_COLUMN_NAME_STRAND_TRANSCRIPT1)) { 00071 bitSetOne (presentColumnTypes,GFR_COLUMN_TYPE_STRAND_TRANSCRIPT1); 00072 array (columnTypes,arrayMax (columnTypes),int) = GFR_COLUMN_TYPE_STRAND_TRANSCRIPT1; 00073 textAdd (columnHeaders,GFR_COLUMN_NAME_STRAND_TRANSCRIPT1); 00074 } 00075 else if (strEqual (type,GFR_COLUMN_NAME_NUM_EXONS_TRANSCRIPT1)) { 00076 bitSetOne (presentColumnTypes,GFR_COLUMN_TYPE_NUM_EXONS_TRANSCRIPT1); 00077 array (columnTypes,arrayMax (columnTypes),int) = GFR_COLUMN_TYPE_NUM_EXONS_TRANSCRIPT1; 00078 textAdd (columnHeaders,GFR_COLUMN_NAME_NUM_EXONS_TRANSCRIPT1); 00079 } 00080 else if (strEqual (type,GFR_COLUMN_NAME_EXON_COORDINATES_TRANSCRIPT1)) { 00081 bitSetOne (presentColumnTypes,GFR_COLUMN_TYPE_EXON_COORDINATES_TRANSCRIPT1); 00082 array (columnTypes,arrayMax (columnTypes),int) = GFR_COLUMN_TYPE_EXON_COORDINATES_TRANSCRIPT1; 00083 textAdd (columnHeaders,GFR_COLUMN_NAME_EXON_COORDINATES_TRANSCRIPT1); 00084 } 00085 else if (strEqual (type,GFR_COLUMN_NAME_START_TRANSCRIPT1)) { 00086 bitSetOne (presentColumnTypes,GFR_COLUMN_TYPE_START_TRANSCRIPT1); 00087 array (columnTypes,arrayMax (columnTypes),int) = GFR_COLUMN_TYPE_START_TRANSCRIPT1; 00088 textAdd (columnHeaders,GFR_COLUMN_NAME_START_TRANSCRIPT1); 00089 } 00090 else if (strEqual (type,GFR_COLUMN_NAME_END_TRANSCRIPT1)) { 00091 bitSetOne (presentColumnTypes,GFR_COLUMN_TYPE_END_TRANSCRIPT1); 00092 array (columnTypes,arrayMax (columnTypes),int) = GFR_COLUMN_TYPE_END_TRANSCRIPT1; 00093 textAdd (columnHeaders,GFR_COLUMN_NAME_END_TRANSCRIPT1); 00094 } 00095 else if (strEqual (type,GFR_COLUMN_NAME_GENE_SYMBOL_TRANSCRIPT1)) { 00096 bitSetOne (presentColumnTypes,GFR_COLUMN_TYPE_GENE_SYMBOL_TRANSCRIPT1); 00097 array (columnTypes,arrayMax (columnTypes),int) = GFR_COLUMN_TYPE_GENE_SYMBOL_TRANSCRIPT1; 00098 textAdd (columnHeaders,GFR_COLUMN_NAME_GENE_SYMBOL_TRANSCRIPT1); 00099 } 00100 else if (strEqual (type,GFR_COLUMN_NAME_DESCRIPTION_TRANSCRIPT1)) { 00101 bitSetOne (presentColumnTypes,GFR_COLUMN_TYPE_DESCRIPTION_TRANSCRIPT1); 00102 array (columnTypes,arrayMax (columnTypes),int) = GFR_COLUMN_TYPE_DESCRIPTION_TRANSCRIPT1; 00103 textAdd (columnHeaders,GFR_COLUMN_NAME_DESCRIPTION_TRANSCRIPT1); 00104 } 00105 else if (strEqual (type,GFR_COLUMN_NAME_NAME_TRANSCRIPT2)) { 00106 bitSetOne (presentColumnTypes,GFR_COLUMN_TYPE_NAME_TRANSCRIPT2); 00107 array (columnTypes,arrayMax (columnTypes),int) = GFR_COLUMN_TYPE_NAME_TRANSCRIPT2; 00108 textAdd (columnHeaders,GFR_COLUMN_NAME_NAME_TRANSCRIPT2); 00109 } 00110 else if (strEqual (type,GFR_COLUMN_NAME_CHROMOSOME_TRANSCRIPT2)) { 00111 bitSetOne (presentColumnTypes,GFR_COLUMN_TYPE_CHROMOSOME_TRANSCRIPT2); 00112 array (columnTypes,arrayMax (columnTypes),int) = GFR_COLUMN_TYPE_CHROMOSOME_TRANSCRIPT2; 00113 textAdd (columnHeaders,GFR_COLUMN_NAME_CHROMOSOME_TRANSCRIPT2); 00114 } 00115 else if (strEqual (type,GFR_COLUMN_NAME_STRAND_TRANSCRIPT2)) { 00116 bitSetOne (presentColumnTypes,GFR_COLUMN_TYPE_STRAND_TRANSCRIPT2); 00117 array (columnTypes,arrayMax (columnTypes),int) = GFR_COLUMN_TYPE_STRAND_TRANSCRIPT2; 00118 textAdd (columnHeaders,GFR_COLUMN_NAME_STRAND_TRANSCRIPT2); 00119 } 00120 else if (strEqual (type,GFR_COLUMN_NAME_NUM_EXONS_TRANSCRIPT2)) { 00121 bitSetOne (presentColumnTypes,GFR_COLUMN_TYPE_NUM_EXONS_TRANSCRIPT2); 00122 array (columnTypes,arrayMax (columnTypes),int) = GFR_COLUMN_TYPE_NUM_EXONS_TRANSCRIPT2; 00123 textAdd (columnHeaders,GFR_COLUMN_NAME_NUM_EXONS_TRANSCRIPT2); 00124 } 00125 else if (strEqual (type,GFR_COLUMN_NAME_EXON_COORDINATES_TRANSCRIPT2)) { 00126 bitSetOne (presentColumnTypes,GFR_COLUMN_TYPE_EXON_COORDINATES_TRANSCRIPT2); 00127 array (columnTypes,arrayMax (columnTypes),int) = GFR_COLUMN_TYPE_EXON_COORDINATES_TRANSCRIPT2; 00128 textAdd (columnHeaders,GFR_COLUMN_NAME_EXON_COORDINATES_TRANSCRIPT2); 00129 } 00130 else if (strEqual (type,GFR_COLUMN_NAME_START_TRANSCRIPT2)) { 00131 bitSetOne (presentColumnTypes,GFR_COLUMN_TYPE_START_TRANSCRIPT2); 00132 array (columnTypes,arrayMax (columnTypes),int) = GFR_COLUMN_TYPE_START_TRANSCRIPT2; 00133 textAdd (columnHeaders,GFR_COLUMN_NAME_START_TRANSCRIPT2); 00134 } 00135 else if (strEqual (type,GFR_COLUMN_NAME_END_TRANSCRIPT2)) { 00136 bitSetOne (presentColumnTypes,GFR_COLUMN_TYPE_END_TRANSCRIPT2); 00137 array (columnTypes,arrayMax (columnTypes),int) = GFR_COLUMN_TYPE_END_TRANSCRIPT2; 00138 textAdd (columnHeaders,GFR_COLUMN_NAME_END_TRANSCRIPT2); 00139 } 00140 else if (strEqual (type,GFR_COLUMN_NAME_GENE_SYMBOL_TRANSCRIPT2)) { 00141 bitSetOne (presentColumnTypes,GFR_COLUMN_TYPE_GENE_SYMBOL_TRANSCRIPT2); 00142 array (columnTypes,arrayMax (columnTypes),int) = GFR_COLUMN_TYPE_GENE_SYMBOL_TRANSCRIPT2; 00143 textAdd (columnHeaders,GFR_COLUMN_NAME_GENE_SYMBOL_TRANSCRIPT2); 00144 } 00145 else if (strEqual (type,GFR_COLUMN_NAME_DESCRIPTION_TRANSCRIPT2)) { 00146 bitSetOne (presentColumnTypes,GFR_COLUMN_TYPE_DESCRIPTION_TRANSCRIPT2); 00147 array (columnTypes,arrayMax (columnTypes),int) = GFR_COLUMN_TYPE_DESCRIPTION_TRANSCRIPT2; 00148 textAdd (columnHeaders,GFR_COLUMN_NAME_DESCRIPTION_TRANSCRIPT2); 00149 } 00150 else if (strEqual (type,GFR_COLUMN_NAME_INTER_READS)) { 00151 bitSetOne (presentColumnTypes,GFR_COLUMN_TYPE_INTER_READS); 00152 array (columnTypes,arrayMax (columnTypes),int) = GFR_COLUMN_TYPE_INTER_READS; 00153 textAdd (columnHeaders,GFR_COLUMN_NAME_INTER_READS); 00154 } 00155 else if (strEqual (type,GFR_COLUMN_NAME_PAIR_COUNT)) { 00156 bitSetOne (presentColumnTypes,GFR_COLUMN_TYPE_PAIR_COUNT); 00157 array (columnTypes,arrayMax (columnTypes),int) = GFR_COLUMN_TYPE_PAIR_COUNT; 00158 textAdd (columnHeaders,GFR_COLUMN_NAME_PAIR_COUNT); 00159 } 00160 else if (strEqual (type,GFR_COLUMN_NAME_ID)) { 00161 bitSetOne (presentColumnTypes,GFR_COLUMN_TYPE_ID); 00162 array (columnTypes,arrayMax (columnTypes),int) = GFR_COLUMN_TYPE_ID; 00163 textAdd (columnHeaders,GFR_COLUMN_NAME_ID); 00164 } 00165 else if (strEqual (type,GFR_COLUMN_NAME_READS_TRANSCRIPT1)) { 00166 bitSetOne (presentColumnTypes,GFR_COLUMN_TYPE_READS_TRANSCRIPT1); 00167 array (columnTypes,arrayMax (columnTypes),int) = GFR_COLUMN_TYPE_READS_TRANSCRIPT1; 00168 textAdd (columnHeaders,GFR_COLUMN_NAME_READS_TRANSCRIPT1); 00169 } 00170 else if (strEqual (type,GFR_COLUMN_NAME_READS_TRANSCRIPT2)) { 00171 bitSetOne (presentColumnTypes,GFR_COLUMN_TYPE_READS_TRANSCRIPT2); 00172 array (columnTypes,arrayMax (columnTypes),int) = GFR_COLUMN_TYPE_READS_TRANSCRIPT2; 00173 textAdd (columnHeaders,GFR_COLUMN_NAME_READS_TRANSCRIPT2); 00174 } 00175 else if (strEqual (type,GFR_COLUMN_NAME_SPER)) { 00176 bitSetOne (presentColumnTypes,GFR_COLUMN_TYPE_SPER); 00177 array (columnTypes,arrayMax (columnTypes),int) = GFR_COLUMN_TYPE_SPER; 00178 textAdd (columnHeaders,GFR_COLUMN_NAME_SPER); 00179 } 00180 else if (strEqual (type,GFR_COLUMN_NAME_DASPER)) { 00181 bitSetOne (presentColumnTypes,GFR_COLUMN_TYPE_DASPER); 00182 array (columnTypes,arrayMax (columnTypes),int) = GFR_COLUMN_TYPE_DASPER; 00183 textAdd (columnHeaders,GFR_COLUMN_NAME_DASPER); 00184 } 00185 else if (strEqual (type,GFR_COLUMN_NAME_RESPER)) { 00186 bitSetOne (presentColumnTypes,GFR_COLUMN_TYPE_RESPER); 00187 array (columnTypes,arrayMax (columnTypes),int) = GFR_COLUMN_TYPE_RESPER; 00188 textAdd (columnHeaders,GFR_COLUMN_NAME_RESPER); 00189 } 00190 else { 00191 die ("Unknown presentColumn: %s",type); 00192 } 00193 } 00194 00195 00196 00197 int gfr_init (char *fileName) 00198 { 00199 int i; 00200 Texta tokens; 00201 lsGfr = ls_createFromFile (fileName); 00202 char* firstLine = ls_nextLine( lsGfr ); 00203 if( firstLine==NULL) return 0; 00204 columnTypes = arrayCreate (20,int); 00205 columnHeaders = textCreate (20); 00206 presentColumnTypes = bitAlloc (100); 00207 headerLine = hlr_strdup ( firstLine ); 00208 tokens = textFieldtokP (headerLine,"\t"); 00209 for (i = 0; i < arrayMax (tokens); i++) { 00210 gfr_addColumnType (textItem (tokens,i)); 00211 } 00212 return 1; 00213 } 00214 00215 00216 00217 void gfr_addNewColumnType (char* columnName) 00218 { 00219 int i; 00220 00221 i = 0; 00222 while (i < arrayMax (columnHeaders)) { 00223 if (strEqual (textItem (columnHeaders,i),columnName)) { 00224 break; 00225 } 00226 i++; 00227 } 00228 if (i == arrayMax (columnHeaders)) { 00229 gfr_addColumnType (columnName); 00230 } 00231 } 00232 00233 00234 00235 void gfr_deInit (void) 00236 { 00237 if (lsGfr != NULL) { 00238 ls_destroy (lsGfr); 00239 } 00240 arrayDestroy (columnTypes); 00241 textDestroy (columnHeaders); 00242 bitFree (&presentColumnTypes); 00243 hlr_free (headerLine); 00244 } 00245 00246 00247 00248 static void gfr_freeEntry (GfrEntry* currEntry) 00249 { 00250 if (currEntry == NULL) { 00251 return; 00252 } 00253 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_FUSION_TYPE)) { 00254 hlr_free (currEntry->fusionType); 00255 } 00256 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_NAME_TRANSCRIPT1)) { 00257 hlr_free (currEntry->nameTranscript1); 00258 } 00259 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_CHROMOSOME_TRANSCRIPT1)) { 00260 hlr_free (currEntry->chromosomeTranscript1); 00261 } 00262 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_GENE_SYMBOL_TRANSCRIPT1)) { 00263 hlr_free (currEntry->geneSymbolTranscript1); 00264 } 00265 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_EXON_COORDINATES_TRANSCRIPT1)) { 00266 arrayDestroy (currEntry->exonCoordinatesTranscript1); 00267 } 00268 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_DESCRIPTION_TRANSCRIPT1)) { 00269 hlr_free (currEntry->descriptionTranscript1); 00270 } 00271 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_NAME_TRANSCRIPT2)) { 00272 hlr_free (currEntry->nameTranscript2); 00273 } 00274 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_CHROMOSOME_TRANSCRIPT2)) { 00275 hlr_free (currEntry->chromosomeTranscript2); 00276 } 00277 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_GENE_SYMBOL_TRANSCRIPT2)) { 00278 hlr_free (currEntry->geneSymbolTranscript2); 00279 } 00280 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_EXON_COORDINATES_TRANSCRIPT2)) { 00281 arrayDestroy (currEntry->exonCoordinatesTranscript2); 00282 } 00283 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_DESCRIPTION_TRANSCRIPT2)) { 00284 hlr_free (currEntry->descriptionTranscript2); 00285 } 00286 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_ID)) { 00287 hlr_free (currEntry->id); 00288 } 00289 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_INTER_READS)) { 00290 arrayDestroy (currEntry->interReads); 00291 } 00292 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_PAIR_COUNT)) { 00293 arrayDestroy (currEntry->pairCounts); 00294 } 00295 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_READS_TRANSCRIPT1)) { 00296 textDestroy (currEntry->readsTranscript1); 00297 } 00298 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_READS_TRANSCRIPT2)) { 00299 textDestroy (currEntry->readsTranscript2); 00300 } 00301 freeMem (currEntry); 00302 currEntry = NULL; 00303 } 00304 00305 00306 00307 static GfrEntry* gfr_processNextEntry (int freeMemory) 00308 { 00309 static GfrEntry *currEntry = NULL; 00310 char *line,*token,*pos; 00311 WordIter w; 00312 int index,columnType; 00313 GfrPairCount *currGPC; 00314 GfrInterRead *currGIR; 00315 Texta tokens,items; 00316 int i; 00317 GfrExonCoordinate *currEC; 00318 00319 if (!ls_isEof (lsGfr)) { 00320 while (line = ls_nextLine (lsGfr)) { 00321 if (line[0] == '\0') { 00322 continue; 00323 } 00324 if (freeMemory) { 00325 gfr_freeEntry (currEntry); 00326 } 00327 AllocVar (currEntry); 00328 index = 0; 00329 w = wordIterCreate (line,"\t",0); 00330 while (token = wordNext (w)) { 00331 columnType = arru (columnTypes,index,int); 00332 if (columnType == GFR_COLUMN_TYPE_NUM_INTER) { 00333 currEntry->numInter = atoi (token); 00334 } 00335 else if (columnType == GFR_COLUMN_TYPE_INTER_MEAN_AB) { 00336 currEntry->interMeanAB = atof (token); 00337 } 00338 else if (columnType == GFR_COLUMN_TYPE_INTER_MEAN_BA) { 00339 currEntry->interMeanBA = atof (token); 00340 } 00341 else if (columnType == GFR_COLUMN_TYPE_PVALUE_AB) { 00342 currEntry->pValueAB = atof (token); 00343 } 00344 else if (columnType == GFR_COLUMN_TYPE_PVALUE_BA) { 00345 currEntry->pValueBA = atof (token); 00346 } 00347 else if (columnType == GFR_COLUMN_TYPE_NUM_INTRA1) { 00348 currEntry->numIntra1 = atoi (token); 00349 } 00350 else if (columnType == GFR_COLUMN_TYPE_NUM_INTRA2) { 00351 currEntry->numIntra2 = atoi (token); 00352 } 00353 else if (columnType == GFR_COLUMN_TYPE_FUSION_TYPE) { 00354 currEntry->fusionType = hlr_strdup (token); 00355 } 00356 else if (columnType == GFR_COLUMN_TYPE_NAME_TRANSCRIPT1) { 00357 currEntry->nameTranscript1 = hlr_strdup (token); 00358 } 00359 else if (columnType == GFR_COLUMN_TYPE_CHROMOSOME_TRANSCRIPT1) { 00360 currEntry->chromosomeTranscript1 = hlr_strdup (token); 00361 } 00362 else if (columnType == GFR_COLUMN_TYPE_STRAND_TRANSCRIPT1) { 00363 currEntry->strandTranscript1 = token[0]; 00364 } 00365 else if (columnType == GFR_COLUMN_TYPE_NUM_EXONS_TRANSCRIPT1) { 00366 currEntry->numExonsTranscript1 = atoi (token); 00367 } 00368 else if (columnType == GFR_COLUMN_TYPE_EXON_COORDINATES_TRANSCRIPT1) { 00369 tokens = textFieldtok (token,"|"); 00370 currEntry->exonCoordinatesTranscript1 = arrayCreate (100,GfrExonCoordinate); 00371 for (i = 0; i < arrayMax (tokens); i++) { 00372 currEC = arrayp (currEntry->exonCoordinatesTranscript1,arrayMax (currEntry->exonCoordinatesTranscript1),GfrExonCoordinate); 00373 pos = strchr (textItem (tokens,i),','); 00374 *pos = '\0'; 00375 currEC->start = atoi (textItem (tokens,i)); 00376 currEC->end = atoi (pos + 1); 00377 } 00378 textDestroy (tokens); 00379 } 00380 else if (columnType == GFR_COLUMN_TYPE_START_TRANSCRIPT1) { 00381 currEntry->startTranscript1 = atoi (token); 00382 } 00383 else if (columnType == GFR_COLUMN_TYPE_END_TRANSCRIPT1) { 00384 currEntry->endTranscript1 = atoi (token); 00385 } 00386 else if (columnType == GFR_COLUMN_TYPE_GENE_SYMBOL_TRANSCRIPT1) { 00387 currEntry->geneSymbolTranscript1 = hlr_strdup (token); 00388 } 00389 else if (columnType == GFR_COLUMN_TYPE_DESCRIPTION_TRANSCRIPT1) { 00390 currEntry->descriptionTranscript1 = hlr_strdup (token); 00391 } 00392 else if (columnType == GFR_COLUMN_TYPE_NAME_TRANSCRIPT2) { 00393 currEntry->nameTranscript2 = hlr_strdup (token); 00394 } 00395 else if (columnType == GFR_COLUMN_TYPE_CHROMOSOME_TRANSCRIPT2) { 00396 currEntry->chromosomeTranscript2 = hlr_strdup (token); 00397 } 00398 else if (columnType == GFR_COLUMN_TYPE_STRAND_TRANSCRIPT2) { 00399 currEntry->strandTranscript2 = token[0]; 00400 } 00401 else if (columnType == GFR_COLUMN_TYPE_NUM_EXONS_TRANSCRIPT2) { 00402 currEntry->numExonsTranscript2 = atoi (token); 00403 } 00404 else if (columnType == GFR_COLUMN_TYPE_EXON_COORDINATES_TRANSCRIPT2) { 00405 tokens = textFieldtok (token,"|"); 00406 currEntry->exonCoordinatesTranscript2 = arrayCreate (100,GfrExonCoordinate); 00407 for (i = 0; i < arrayMax (tokens); i++) { 00408 currEC = arrayp (currEntry->exonCoordinatesTranscript2,arrayMax (currEntry->exonCoordinatesTranscript2),GfrExonCoordinate); 00409 pos = strchr (textItem (tokens,i),','); 00410 *pos = '\0'; 00411 currEC->start = atoi (textItem (tokens,i)); 00412 currEC->end = atoi (pos + 1); 00413 } 00414 textDestroy (tokens); 00415 } 00416 else if (columnType == GFR_COLUMN_TYPE_START_TRANSCRIPT2) { 00417 currEntry->startTranscript2 = atoi (token); 00418 } 00419 else if (columnType == GFR_COLUMN_TYPE_END_TRANSCRIPT2) { 00420 currEntry->endTranscript2 = atoi (token); 00421 } 00422 else if (columnType == GFR_COLUMN_TYPE_GENE_SYMBOL_TRANSCRIPT2) { 00423 currEntry->geneSymbolTranscript2 = hlr_strdup (token); 00424 } 00425 else if (columnType == GFR_COLUMN_TYPE_DESCRIPTION_TRANSCRIPT2) { 00426 currEntry->descriptionTranscript2 = hlr_strdup (token); 00427 } 00428 else if (columnType == GFR_COLUMN_TYPE_INTER_READS) { 00429 tokens = textFieldtok (token,"|"); 00430 currEntry->interReads = arrayCreate (100,GfrInterRead); 00431 for (i = 0; i < arrayMax (tokens); i++) { 00432 if (textItem (tokens,i)[0] == '\0') { 00433 continue; 00434 } 00435 currGIR = arrayp (currEntry->interReads,arrayMax (currEntry->interReads),GfrInterRead); 00436 items = textFieldtok (textItem (tokens,i),","); 00437 if( arrayMax( items ) > 6 ) { 00438 currGIR->pairType = atoi (textItem (items,0)); 00439 currGIR->number1 = atoi (textItem (items,1)); 00440 currGIR->number2 = atoi (textItem (items,2)); 00441 currGIR->readStart1 = atoi (textItem (items,3)); 00442 currGIR->readEnd1 = atoi (textItem (items,4)); 00443 currGIR->readStart2 = atoi (textItem (items,5)); 00444 currGIR->readEnd2 = atoi (textItem (items,6)); 00445 } else { 00446 currGIR->pairType = GFR_PAIR_TYPE_EXONIC_EXONIC; 00447 currGIR->number1 = atoi (textItem (items,0)); 00448 currGIR->readStart1 = atoi (textItem (items,1)); 00449 currGIR->readEnd1 = atoi (textItem (items,2)); 00450 currGIR->number2 = atoi (textItem (items,3)); 00451 currGIR->readStart2 = atoi (textItem (items,4)); 00452 currGIR->readEnd2 = atoi (textItem (items,5)); 00453 } 00454 currGIR->flag = 0; 00455 textDestroy (items); 00456 } 00457 textDestroy (tokens); 00458 } 00459 else if (columnType == GFR_COLUMN_TYPE_PAIR_COUNT) { 00460 tokens = textFieldtok (token,"|"); 00461 currEntry->pairCounts = arrayCreate (100,GfrPairCount); 00462 for (i = 0; i < arrayMax (tokens); i++) { 00463 currGPC = arrayp (currEntry->pairCounts,arrayMax (currEntry->pairCounts),GfrPairCount); 00464 items = textFieldtok (textItem (tokens,i),","); 00465 if( arrayMax( items ) > 3 ) { 00466 currGPC->pairType = atoi (textItem (items,0)); 00467 currGPC->count = atoi (textItem (items,1)); 00468 currGPC->number1 = atoi (textItem (items,2)); 00469 currGPC->number2 = atoi (textItem (items,3)); 00470 } else { 00471 currGPC->pairType = GFR_PAIR_TYPE_EXONIC_EXONIC; 00472 currGPC->count = atoi (textItem (items,2)); 00473 currGPC->number1 = atoi (textItem (items,0)); 00474 currGPC->number2 = atoi (textItem (items,1)); 00475 } 00476 textDestroy (items); 00477 } 00478 textDestroy (tokens); 00479 } 00480 else if (columnType == GFR_COLUMN_TYPE_ID) { 00481 currEntry->id = hlr_strdup (token); 00482 } 00483 else if (columnType == GFR_COLUMN_TYPE_READS_TRANSCRIPT1) { 00484 tokens = textFieldtok (token,"|"); 00485 currEntry->readsTranscript1 = textCreate (100); 00486 for (i = 0; i < arrayMax (tokens); i++) { 00487 textAdd (currEntry->readsTranscript1,textItem (tokens,i)); 00488 } 00489 textDestroy (tokens); 00490 } 00491 else if (columnType == GFR_COLUMN_TYPE_READS_TRANSCRIPT2) { 00492 tokens = textFieldtok (token,"|"); 00493 currEntry->readsTranscript2 = textCreate (100); 00494 for (i = 0; i < arrayMax (tokens); i++) { 00495 textAdd (currEntry->readsTranscript2,textItem (tokens,i)); 00496 } 00497 textDestroy (tokens); 00498 } 00499 else if (columnType == GFR_COLUMN_TYPE_SPER) { 00500 currEntry->SPER = atof(token); 00501 } 00502 else if (columnType == GFR_COLUMN_TYPE_DASPER) { 00503 currEntry->DASPER = atof(token); 00504 } 00505 else if (columnType == GFR_COLUMN_TYPE_RESPER) { 00506 currEntry->RESPER = atof(token); 00507 } 00508 else { 00509 die ("Unknown columnType: %d",columnType); 00510 } 00511 index++; 00512 } 00513 wordIterDestroy (w); 00514 return currEntry; 00515 } 00516 } 00517 if (freeMemory) { 00518 gfr_freeEntry (currEntry); 00519 } 00520 currEntry = NULL; 00521 return currEntry; 00522 } 00523 00524 00525 00526 GfrEntry* gfr_nextEntry (void) 00527 { 00528 return gfr_processNextEntry (1); 00529 } 00530 00531 00532 00533 Array gfr_parse (void) 00534 { 00535 Array gfrEntries; 00536 GfrEntry *currEntry; 00537 00538 gfrEntries = arrayCreate (100000,GfrEntry); 00539 while (currEntry = gfr_processNextEntry (0)) { 00540 array (gfrEntries,arrayMax (gfrEntries),GfrEntry) = *currEntry; 00541 } 00542 return gfrEntries; 00543 } 00544 00545 00546 00547 static void gfr_addTab (Stringa buffer, int *first) 00548 { 00549 if (*first == 1) { 00550 *first = 0; 00551 return; 00552 } 00553 stringCatChar (buffer,'\t'); 00554 } 00555 00556 00557 00558 char* gfr_writeHeader (void) 00559 { 00560 static Stringa buffer = NULL; 00561 int i; 00562 00563 stringCreateClear (buffer,100); 00564 for (i = 0; i < arrayMax (columnHeaders); i++) { 00565 stringAppendf (buffer,"%s%s",textItem (columnHeaders,i), 00566 i < arrayMax (columnHeaders) - 1 ? "\t" : ""); 00567 } 00568 return string (buffer); 00569 } 00570 00571 00572 00573 char* gfr_writeGfrEntry (GfrEntry *currEntry) 00574 { 00575 static Stringa buffer = NULL; 00576 int first; 00577 int i,j; 00578 int columnType; 00579 GfrPairCount *currGPC; 00580 GfrInterRead *currGIR; 00581 GfrExonCoordinate *currEC; 00582 00583 stringCreateClear (buffer,100); 00584 first = 1; 00585 for (i = 0; i < arrayMax (columnTypes); i++) { 00586 columnType = arru (columnTypes,i,int); 00587 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_NUM_INTER) && columnType == GFR_COLUMN_TYPE_NUM_INTER) { 00588 gfr_addTab (buffer,&first); 00589 stringAppendf (buffer,"%d",currEntry->numInter); 00590 } 00591 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_INTER_MEAN_AB) && columnType == GFR_COLUMN_TYPE_INTER_MEAN_AB) { 00592 gfr_addTab (buffer,&first); 00593 stringAppendf (buffer,"%.2f",currEntry->interMeanAB); 00594 } 00595 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_INTER_MEAN_BA) && columnType == GFR_COLUMN_TYPE_INTER_MEAN_BA) { 00596 gfr_addTab (buffer,&first); 00597 stringAppendf (buffer,"%.2f",currEntry->interMeanBA); 00598 } 00599 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_PVALUE_AB) && columnType == GFR_COLUMN_TYPE_PVALUE_AB) { 00600 gfr_addTab (buffer,&first); 00601 stringAppendf (buffer,"%.5f",currEntry->pValueAB); 00602 } 00603 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_PVALUE_BA) && columnType == GFR_COLUMN_TYPE_PVALUE_BA) { 00604 gfr_addTab (buffer,&first); 00605 stringAppendf (buffer,"%.5f",currEntry->pValueBA); 00606 } 00607 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_NUM_INTRA1) && columnType == GFR_COLUMN_TYPE_NUM_INTRA1) { 00608 gfr_addTab (buffer,&first); 00609 stringAppendf (buffer,"%d",currEntry->numIntra1); 00610 } 00611 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_NUM_INTRA2) && columnType == GFR_COLUMN_TYPE_NUM_INTRA2) { 00612 gfr_addTab (buffer,&first); 00613 stringAppendf (buffer,"%d",currEntry->numIntra2); 00614 } 00615 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_FUSION_TYPE) && columnType == GFR_COLUMN_TYPE_FUSION_TYPE) { 00616 gfr_addTab (buffer,&first); 00617 stringAppendf (buffer,"%s",currEntry->fusionType); 00618 } 00619 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_NAME_TRANSCRIPT1) && columnType == GFR_COLUMN_TYPE_NAME_TRANSCRIPT1) { 00620 gfr_addTab (buffer,&first); 00621 stringAppendf (buffer,"%s",currEntry->nameTranscript1); 00622 } 00623 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_NUM_EXONS_TRANSCRIPT1) && columnType == GFR_COLUMN_TYPE_NUM_EXONS_TRANSCRIPT1) { 00624 gfr_addTab (buffer,&first); 00625 stringAppendf (buffer,"%d",currEntry->numExonsTranscript1); 00626 } 00627 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_EXON_COORDINATES_TRANSCRIPT1) && columnType == GFR_COLUMN_TYPE_EXON_COORDINATES_TRANSCRIPT1) { 00628 gfr_addTab (buffer,&first); 00629 for (j = 0; j < arrayMax (currEntry->exonCoordinatesTranscript1); j++) { 00630 currEC = arrp (currEntry->exonCoordinatesTranscript1,j,GfrExonCoordinate); 00631 stringAppendf (buffer,"%d,%d%s",currEC->start,currEC->end,j < arrayMax (currEntry->exonCoordinatesTranscript1) - 1 ? "|" : ""); 00632 } 00633 } 00634 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_CHROMOSOME_TRANSCRIPT1) && columnType == GFR_COLUMN_TYPE_CHROMOSOME_TRANSCRIPT1) { 00635 gfr_addTab (buffer,&first); 00636 stringAppendf (buffer,"%s",currEntry->chromosomeTranscript1); 00637 } 00638 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_STRAND_TRANSCRIPT1) && columnType == GFR_COLUMN_TYPE_STRAND_TRANSCRIPT1) { 00639 gfr_addTab (buffer,&first); 00640 stringAppendf (buffer,"%c",currEntry->strandTranscript1); 00641 } 00642 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_START_TRANSCRIPT1) && columnType == GFR_COLUMN_TYPE_START_TRANSCRIPT1) { 00643 gfr_addTab (buffer,&first); 00644 stringAppendf (buffer,"%d",currEntry->startTranscript1); 00645 } 00646 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_END_TRANSCRIPT1) && columnType == GFR_COLUMN_TYPE_END_TRANSCRIPT1) { 00647 gfr_addTab (buffer,&first); 00648 stringAppendf (buffer,"%d",currEntry->endTranscript1); 00649 } 00650 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_GENE_SYMBOL_TRANSCRIPT1) && columnType == GFR_COLUMN_TYPE_GENE_SYMBOL_TRANSCRIPT1) { 00651 gfr_addTab (buffer,&first); 00652 stringAppendf (buffer,"%s",currEntry->geneSymbolTranscript1); 00653 } 00654 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_DESCRIPTION_TRANSCRIPT1) && columnType == GFR_COLUMN_TYPE_DESCRIPTION_TRANSCRIPT1) { 00655 gfr_addTab (buffer,&first); 00656 stringAppendf (buffer,"%s",currEntry->descriptionTranscript1); 00657 } 00658 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_NAME_TRANSCRIPT2) && columnType == GFR_COLUMN_TYPE_NAME_TRANSCRIPT2) { 00659 gfr_addTab (buffer,&first); 00660 stringAppendf (buffer,"%s",currEntry->nameTranscript2); 00661 } 00662 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_NUM_EXONS_TRANSCRIPT2) && columnType == GFR_COLUMN_TYPE_NUM_EXONS_TRANSCRIPT2) { 00663 gfr_addTab (buffer,&first); 00664 stringAppendf (buffer,"%d",currEntry->numExonsTranscript2); 00665 } 00666 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_EXON_COORDINATES_TRANSCRIPT2) && columnType == GFR_COLUMN_TYPE_EXON_COORDINATES_TRANSCRIPT2) { 00667 gfr_addTab (buffer,&first); 00668 for (j = 0; j < arrayMax (currEntry->exonCoordinatesTranscript2); j++) { 00669 currEC = arrp (currEntry->exonCoordinatesTranscript2,j,GfrExonCoordinate); 00670 stringAppendf (buffer,"%d,%d%s",currEC->start,currEC->end,j < arrayMax (currEntry->exonCoordinatesTranscript2) - 1 ? "|" : ""); 00671 } 00672 } 00673 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_CHROMOSOME_TRANSCRIPT2) && columnType == GFR_COLUMN_TYPE_CHROMOSOME_TRANSCRIPT2) { 00674 gfr_addTab (buffer,&first); 00675 stringAppendf (buffer,"%s",currEntry->chromosomeTranscript2); 00676 } 00677 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_STRAND_TRANSCRIPT2) && columnType == GFR_COLUMN_TYPE_STRAND_TRANSCRIPT2) { 00678 gfr_addTab (buffer,&first); 00679 stringAppendf (buffer,"%c",currEntry->strandTranscript2); 00680 } 00681 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_START_TRANSCRIPT2) && columnType == GFR_COLUMN_TYPE_START_TRANSCRIPT2) { 00682 gfr_addTab (buffer,&first); 00683 stringAppendf (buffer,"%d",currEntry->startTranscript2); 00684 } 00685 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_END_TRANSCRIPT2) && columnType == GFR_COLUMN_TYPE_END_TRANSCRIPT2) { 00686 gfr_addTab (buffer,&first); 00687 stringAppendf (buffer,"%d",currEntry->endTranscript2); 00688 } 00689 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_GENE_SYMBOL_TRANSCRIPT2) && columnType == GFR_COLUMN_TYPE_GENE_SYMBOL_TRANSCRIPT2) { 00690 gfr_addTab (buffer,&first); 00691 stringAppendf (buffer,"%s",currEntry->geneSymbolTranscript2); 00692 } 00693 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_DESCRIPTION_TRANSCRIPT2) && columnType == GFR_COLUMN_TYPE_DESCRIPTION_TRANSCRIPT2) { 00694 gfr_addTab (buffer,&first); 00695 stringAppendf (buffer,"%s",currEntry->descriptionTranscript2); 00696 } 00697 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_PAIR_COUNT) && columnType == GFR_COLUMN_TYPE_PAIR_COUNT) { 00698 gfr_addTab (buffer,&first); 00699 for (j = 0; j < arrayMax (currEntry->pairCounts); j++) { 00700 currGPC = arrp (currEntry->pairCounts,j,GfrPairCount); 00701 stringAppendf (buffer,"%d,%d,%d,%d%s",currGPC->pairType,currGPC->count,currGPC->number1,currGPC->number2, 00702 j < arrayMax (currEntry->pairCounts) - 1 ? "|" : ""); 00703 } 00704 } 00705 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_INTER_READS) && columnType == GFR_COLUMN_TYPE_INTER_READS) { 00706 gfr_addTab (buffer,&first); 00707 for (j = 0; j < arrayMax (currEntry->interReads); j++) { 00708 currGIR = arrp (currEntry->interReads,j,GfrInterRead); 00709 if (currGIR->flag == 0) { 00710 stringAppendf (buffer,"%d,%d,%d,%d,%d,%d,%d%s", 00711 currGIR->pairType,currGIR->number1,currGIR->number2, 00712 currGIR->readStart1,currGIR->readEnd1, 00713 currGIR->readStart2,currGIR->readEnd2, 00714 j < arrayMax (currEntry->interReads) - 1 ? "|" : ""); 00715 } 00716 } 00717 } 00718 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_ID) && columnType == GFR_COLUMN_TYPE_ID) { 00719 gfr_addTab (buffer,&first); 00720 stringAppendf (buffer,"%s",currEntry->id); 00721 } 00722 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_READS_TRANSCRIPT1) && columnType == GFR_COLUMN_TYPE_READS_TRANSCRIPT1) { 00723 gfr_addTab (buffer,&first); 00724 for (j = 0; j < arrayMax (currEntry->readsTranscript1); j++) { 00725 stringAppendf (buffer,"%s%s",textItem (currEntry->readsTranscript1,j), 00726 j < arrayMax (currEntry->readsTranscript1) - 1 ? "|" : ""); 00727 } 00728 } 00729 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_READS_TRANSCRIPT2) && columnType == GFR_COLUMN_TYPE_READS_TRANSCRIPT2) { 00730 gfr_addTab (buffer,&first); 00731 for (j = 0; j < arrayMax (currEntry->readsTranscript2); j++) { 00732 stringAppendf (buffer,"%s%s",textItem (currEntry->readsTranscript2,j), 00733 j < arrayMax (currEntry->readsTranscript2) - 1 ? "|" : ""); 00734 } 00735 } 00736 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_SPER) && columnType == GFR_COLUMN_TYPE_SPER) { 00737 gfr_addTab (buffer,&first); 00738 stringAppendf (buffer,"%f",currEntry->SPER); 00739 } 00740 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_DASPER) && columnType == GFR_COLUMN_TYPE_DASPER) { 00741 gfr_addTab (buffer,&first); 00742 stringAppendf (buffer,"%f",currEntry->DASPER); 00743 } 00744 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_RESPER) && columnType == GFR_COLUMN_TYPE_RESPER) { 00745 gfr_addTab (buffer,&first); 00746 stringAppendf (buffer,"%f",currEntry->RESPER); 00747 } 00748 } 00749 return string (buffer); 00750 } 00751 00752