00001 #include "log.h"
00002 #include "format.h"
00003 #include "linestream.h"
00004 #include "common.h"
00005 #include "bits.h"
00006 #include "gfr.h"
00007
00008
00009
00010 static LineStream lsGfr = NULL;
00011 static Bits* presentColumnTypes = NULL;
00012 static Array columnTypes = NULL;
00013 static Texta columnHeaders = NULL;
00014 static char* headerLine = NULL;
00015
00016
00017
00018 static void gfr_addColumnType (char *type)
00019 {
00020 if (strEqual (type,GFR_COLUMN_NAME_NUM_INTER)) {
00021 bitSetOne (presentColumnTypes,GFR_COLUMN_TYPE_NUM_INTER);
00022 array (columnTypes,arrayMax (columnTypes),int) = GFR_COLUMN_TYPE_NUM_INTER;
00023 textAdd (columnHeaders,GFR_COLUMN_NAME_NUM_INTER);
00024 }
00025 else if (strEqual (type,GFR_COLUMN_NAME_INTER_MEAN_AB)) {
00026 bitSetOne (presentColumnTypes,GFR_COLUMN_TYPE_INTER_MEAN_AB);
00027 array (columnTypes,arrayMax (columnTypes),int) = GFR_COLUMN_TYPE_INTER_MEAN_AB;
00028 textAdd (columnHeaders,GFR_COLUMN_NAME_INTER_MEAN_AB);
00029 }
00030 else if (strEqual (type,GFR_COLUMN_NAME_INTER_MEAN_BA)) {
00031 bitSetOne (presentColumnTypes,GFR_COLUMN_TYPE_INTER_MEAN_BA);
00032 array (columnTypes,arrayMax (columnTypes),int) = GFR_COLUMN_TYPE_INTER_MEAN_BA;
00033 textAdd (columnHeaders,GFR_COLUMN_NAME_INTER_MEAN_BA);
00034 }
00035 else if (strEqual (type,GFR_COLUMN_NAME_PVALUE_AB)) {
00036 bitSetOne (presentColumnTypes,GFR_COLUMN_TYPE_PVALUE_AB);
00037 array (columnTypes,arrayMax (columnTypes),int) = GFR_COLUMN_TYPE_PVALUE_AB;
00038 textAdd (columnHeaders,GFR_COLUMN_NAME_PVALUE_AB);
00039 }
00040 else if (strEqual (type,GFR_COLUMN_NAME_PVALUE_BA)) {
00041 bitSetOne (presentColumnTypes,GFR_COLUMN_TYPE_PVALUE_BA);
00042 array (columnTypes,arrayMax (columnTypes),int) = GFR_COLUMN_TYPE_PVALUE_BA;
00043 textAdd (columnHeaders,GFR_COLUMN_NAME_PVALUE_BA);
00044 }
00045 else if (strEqual (type,GFR_COLUMN_NAME_NUM_INTRA1)) {
00046 bitSetOne (presentColumnTypes,GFR_COLUMN_TYPE_NUM_INTRA1);
00047 array (columnTypes,arrayMax (columnTypes),int) = GFR_COLUMN_TYPE_NUM_INTRA1;
00048 textAdd (columnHeaders,GFR_COLUMN_NAME_NUM_INTRA1);
00049 }
00050 else if (strEqual (type,GFR_COLUMN_NAME_NUM_INTRA2)) {
00051 bitSetOne (presentColumnTypes,GFR_COLUMN_TYPE_NUM_INTRA2);
00052 array (columnTypes,arrayMax (columnTypes),int) = GFR_COLUMN_TYPE_NUM_INTRA2;
00053 textAdd (columnHeaders,GFR_COLUMN_NAME_NUM_INTRA2);
00054 }
00055 else if (strEqual (type,GFR_COLUMN_NAME_FUSION_TYPE)) {
00056 bitSetOne (presentColumnTypes,GFR_COLUMN_TYPE_FUSION_TYPE);
00057 array (columnTypes,arrayMax (columnTypes),int) = GFR_COLUMN_TYPE_FUSION_TYPE;
00058 textAdd (columnHeaders,GFR_COLUMN_NAME_FUSION_TYPE);
00059 }
00060 else if (strEqual (type,GFR_COLUMN_NAME_NAME_TRANSCRIPT1)) {
00061 bitSetOne (presentColumnTypes,GFR_COLUMN_TYPE_NAME_TRANSCRIPT1);
00062 array (columnTypes,arrayMax (columnTypes),int) = GFR_COLUMN_TYPE_NAME_TRANSCRIPT1;
00063 textAdd (columnHeaders,GFR_COLUMN_NAME_NAME_TRANSCRIPT1);
00064 }
00065 else if (strEqual (type,GFR_COLUMN_NAME_CHROMOSOME_TRANSCRIPT1)) {
00066 bitSetOne (presentColumnTypes,GFR_COLUMN_TYPE_CHROMOSOME_TRANSCRIPT1);
00067 array (columnTypes,arrayMax (columnTypes),int) = GFR_COLUMN_TYPE_CHROMOSOME_TRANSCRIPT1;
00068 textAdd (columnHeaders,GFR_COLUMN_NAME_CHROMOSOME_TRANSCRIPT1);
00069 }
00070 else if (strEqual (type,GFR_COLUMN_NAME_STRAND_TRANSCRIPT1)) {
00071 bitSetOne (presentColumnTypes,GFR_COLUMN_TYPE_STRAND_TRANSCRIPT1);
00072 array (columnTypes,arrayMax (columnTypes),int) = GFR_COLUMN_TYPE_STRAND_TRANSCRIPT1;
00073 textAdd (columnHeaders,GFR_COLUMN_NAME_STRAND_TRANSCRIPT1);
00074 }
00075 else if (strEqual (type,GFR_COLUMN_NAME_NUM_EXONS_TRANSCRIPT1)) {
00076 bitSetOne (presentColumnTypes,GFR_COLUMN_TYPE_NUM_EXONS_TRANSCRIPT1);
00077 array (columnTypes,arrayMax (columnTypes),int) = GFR_COLUMN_TYPE_NUM_EXONS_TRANSCRIPT1;
00078 textAdd (columnHeaders,GFR_COLUMN_NAME_NUM_EXONS_TRANSCRIPT1);
00079 }
00080 else if (strEqual (type,GFR_COLUMN_NAME_EXON_COORDINATES_TRANSCRIPT1)) {
00081 bitSetOne (presentColumnTypes,GFR_COLUMN_TYPE_EXON_COORDINATES_TRANSCRIPT1);
00082 array (columnTypes,arrayMax (columnTypes),int) = GFR_COLUMN_TYPE_EXON_COORDINATES_TRANSCRIPT1;
00083 textAdd (columnHeaders,GFR_COLUMN_NAME_EXON_COORDINATES_TRANSCRIPT1);
00084 }
00085 else if (strEqual (type,GFR_COLUMN_NAME_START_TRANSCRIPT1)) {
00086 bitSetOne (presentColumnTypes,GFR_COLUMN_TYPE_START_TRANSCRIPT1);
00087 array (columnTypes,arrayMax (columnTypes),int) = GFR_COLUMN_TYPE_START_TRANSCRIPT1;
00088 textAdd (columnHeaders,GFR_COLUMN_NAME_START_TRANSCRIPT1);
00089 }
00090 else if (strEqual (type,GFR_COLUMN_NAME_END_TRANSCRIPT1)) {
00091 bitSetOne (presentColumnTypes,GFR_COLUMN_TYPE_END_TRANSCRIPT1);
00092 array (columnTypes,arrayMax (columnTypes),int) = GFR_COLUMN_TYPE_END_TRANSCRIPT1;
00093 textAdd (columnHeaders,GFR_COLUMN_NAME_END_TRANSCRIPT1);
00094 }
00095 else if (strEqual (type,GFR_COLUMN_NAME_GENE_SYMBOL_TRANSCRIPT1)) {
00096 bitSetOne (presentColumnTypes,GFR_COLUMN_TYPE_GENE_SYMBOL_TRANSCRIPT1);
00097 array (columnTypes,arrayMax (columnTypes),int) = GFR_COLUMN_TYPE_GENE_SYMBOL_TRANSCRIPT1;
00098 textAdd (columnHeaders,GFR_COLUMN_NAME_GENE_SYMBOL_TRANSCRIPT1);
00099 }
00100 else if (strEqual (type,GFR_COLUMN_NAME_DESCRIPTION_TRANSCRIPT1)) {
00101 bitSetOne (presentColumnTypes,GFR_COLUMN_TYPE_DESCRIPTION_TRANSCRIPT1);
00102 array (columnTypes,arrayMax (columnTypes),int) = GFR_COLUMN_TYPE_DESCRIPTION_TRANSCRIPT1;
00103 textAdd (columnHeaders,GFR_COLUMN_NAME_DESCRIPTION_TRANSCRIPT1);
00104 }
00105 else if (strEqual (type,GFR_COLUMN_NAME_NAME_TRANSCRIPT2)) {
00106 bitSetOne (presentColumnTypes,GFR_COLUMN_TYPE_NAME_TRANSCRIPT2);
00107 array (columnTypes,arrayMax (columnTypes),int) = GFR_COLUMN_TYPE_NAME_TRANSCRIPT2;
00108 textAdd (columnHeaders,GFR_COLUMN_NAME_NAME_TRANSCRIPT2);
00109 }
00110 else if (strEqual (type,GFR_COLUMN_NAME_CHROMOSOME_TRANSCRIPT2)) {
00111 bitSetOne (presentColumnTypes,GFR_COLUMN_TYPE_CHROMOSOME_TRANSCRIPT2);
00112 array (columnTypes,arrayMax (columnTypes),int) = GFR_COLUMN_TYPE_CHROMOSOME_TRANSCRIPT2;
00113 textAdd (columnHeaders,GFR_COLUMN_NAME_CHROMOSOME_TRANSCRIPT2);
00114 }
00115 else if (strEqual (type,GFR_COLUMN_NAME_STRAND_TRANSCRIPT2)) {
00116 bitSetOne (presentColumnTypes,GFR_COLUMN_TYPE_STRAND_TRANSCRIPT2);
00117 array (columnTypes,arrayMax (columnTypes),int) = GFR_COLUMN_TYPE_STRAND_TRANSCRIPT2;
00118 textAdd (columnHeaders,GFR_COLUMN_NAME_STRAND_TRANSCRIPT2);
00119 }
00120 else if (strEqual (type,GFR_COLUMN_NAME_NUM_EXONS_TRANSCRIPT2)) {
00121 bitSetOne (presentColumnTypes,GFR_COLUMN_TYPE_NUM_EXONS_TRANSCRIPT2);
00122 array (columnTypes,arrayMax (columnTypes),int) = GFR_COLUMN_TYPE_NUM_EXONS_TRANSCRIPT2;
00123 textAdd (columnHeaders,GFR_COLUMN_NAME_NUM_EXONS_TRANSCRIPT2);
00124 }
00125 else if (strEqual (type,GFR_COLUMN_NAME_EXON_COORDINATES_TRANSCRIPT2)) {
00126 bitSetOne (presentColumnTypes,GFR_COLUMN_TYPE_EXON_COORDINATES_TRANSCRIPT2);
00127 array (columnTypes,arrayMax (columnTypes),int) = GFR_COLUMN_TYPE_EXON_COORDINATES_TRANSCRIPT2;
00128 textAdd (columnHeaders,GFR_COLUMN_NAME_EXON_COORDINATES_TRANSCRIPT2);
00129 }
00130 else if (strEqual (type,GFR_COLUMN_NAME_START_TRANSCRIPT2)) {
00131 bitSetOne (presentColumnTypes,GFR_COLUMN_TYPE_START_TRANSCRIPT2);
00132 array (columnTypes,arrayMax (columnTypes),int) = GFR_COLUMN_TYPE_START_TRANSCRIPT2;
00133 textAdd (columnHeaders,GFR_COLUMN_NAME_START_TRANSCRIPT2);
00134 }
00135 else if (strEqual (type,GFR_COLUMN_NAME_END_TRANSCRIPT2)) {
00136 bitSetOne (presentColumnTypes,GFR_COLUMN_TYPE_END_TRANSCRIPT2);
00137 array (columnTypes,arrayMax (columnTypes),int) = GFR_COLUMN_TYPE_END_TRANSCRIPT2;
00138 textAdd (columnHeaders,GFR_COLUMN_NAME_END_TRANSCRIPT2);
00139 }
00140 else if (strEqual (type,GFR_COLUMN_NAME_GENE_SYMBOL_TRANSCRIPT2)) {
00141 bitSetOne (presentColumnTypes,GFR_COLUMN_TYPE_GENE_SYMBOL_TRANSCRIPT2);
00142 array (columnTypes,arrayMax (columnTypes),int) = GFR_COLUMN_TYPE_GENE_SYMBOL_TRANSCRIPT2;
00143 textAdd (columnHeaders,GFR_COLUMN_NAME_GENE_SYMBOL_TRANSCRIPT2);
00144 }
00145 else if (strEqual (type,GFR_COLUMN_NAME_DESCRIPTION_TRANSCRIPT2)) {
00146 bitSetOne (presentColumnTypes,GFR_COLUMN_TYPE_DESCRIPTION_TRANSCRIPT2);
00147 array (columnTypes,arrayMax (columnTypes),int) = GFR_COLUMN_TYPE_DESCRIPTION_TRANSCRIPT2;
00148 textAdd (columnHeaders,GFR_COLUMN_NAME_DESCRIPTION_TRANSCRIPT2);
00149 }
00150 else if (strEqual (type,GFR_COLUMN_NAME_INTER_READS)) {
00151 bitSetOne (presentColumnTypes,GFR_COLUMN_TYPE_INTER_READS);
00152 array (columnTypes,arrayMax (columnTypes),int) = GFR_COLUMN_TYPE_INTER_READS;
00153 textAdd (columnHeaders,GFR_COLUMN_NAME_INTER_READS);
00154 }
00155 else if (strEqual (type,GFR_COLUMN_NAME_PAIR_COUNT)) {
00156 bitSetOne (presentColumnTypes,GFR_COLUMN_TYPE_PAIR_COUNT);
00157 array (columnTypes,arrayMax (columnTypes),int) = GFR_COLUMN_TYPE_PAIR_COUNT;
00158 textAdd (columnHeaders,GFR_COLUMN_NAME_PAIR_COUNT);
00159 }
00160 else if (strEqual (type,GFR_COLUMN_NAME_ID)) {
00161 bitSetOne (presentColumnTypes,GFR_COLUMN_TYPE_ID);
00162 array (columnTypes,arrayMax (columnTypes),int) = GFR_COLUMN_TYPE_ID;
00163 textAdd (columnHeaders,GFR_COLUMN_NAME_ID);
00164 }
00165 else if (strEqual (type,GFR_COLUMN_NAME_READS_TRANSCRIPT1)) {
00166 bitSetOne (presentColumnTypes,GFR_COLUMN_TYPE_READS_TRANSCRIPT1);
00167 array (columnTypes,arrayMax (columnTypes),int) = GFR_COLUMN_TYPE_READS_TRANSCRIPT1;
00168 textAdd (columnHeaders,GFR_COLUMN_NAME_READS_TRANSCRIPT1);
00169 }
00170 else if (strEqual (type,GFR_COLUMN_NAME_READS_TRANSCRIPT2)) {
00171 bitSetOne (presentColumnTypes,GFR_COLUMN_TYPE_READS_TRANSCRIPT2);
00172 array (columnTypes,arrayMax (columnTypes),int) = GFR_COLUMN_TYPE_READS_TRANSCRIPT2;
00173 textAdd (columnHeaders,GFR_COLUMN_NAME_READS_TRANSCRIPT2);
00174 }
00175 else if (strEqual (type,GFR_COLUMN_NAME_SPER)) {
00176 bitSetOne (presentColumnTypes,GFR_COLUMN_TYPE_SPER);
00177 array (columnTypes,arrayMax (columnTypes),int) = GFR_COLUMN_TYPE_SPER;
00178 textAdd (columnHeaders,GFR_COLUMN_NAME_SPER);
00179 }
00180 else if (strEqual (type,GFR_COLUMN_NAME_DASPER)) {
00181 bitSetOne (presentColumnTypes,GFR_COLUMN_TYPE_DASPER);
00182 array (columnTypes,arrayMax (columnTypes),int) = GFR_COLUMN_TYPE_DASPER;
00183 textAdd (columnHeaders,GFR_COLUMN_NAME_DASPER);
00184 }
00185 else if (strEqual (type,GFR_COLUMN_NAME_RESPER)) {
00186 bitSetOne (presentColumnTypes,GFR_COLUMN_TYPE_RESPER);
00187 array (columnTypes,arrayMax (columnTypes),int) = GFR_COLUMN_TYPE_RESPER;
00188 textAdd (columnHeaders,GFR_COLUMN_NAME_RESPER);
00189 }
00190 else {
00191 die ("Unknown presentColumn: %s",type);
00192 }
00193 }
00194
00195
00196
00197 int gfr_init (char *fileName)
00198 {
00199 int i;
00200 Texta tokens;
00201 lsGfr = ls_createFromFile (fileName);
00202 char* firstLine = ls_nextLine( lsGfr );
00203 if( firstLine==NULL) return 0;
00204 columnTypes = arrayCreate (20,int);
00205 columnHeaders = textCreate (20);
00206 presentColumnTypes = bitAlloc (100);
00207 headerLine = hlr_strdup ( firstLine );
00208 tokens = textFieldtokP (headerLine,"\t");
00209 for (i = 0; i < arrayMax (tokens); i++) {
00210 gfr_addColumnType (textItem (tokens,i));
00211 }
00212 return 1;
00213 }
00214
00215
00216
00217 void gfr_addNewColumnType (char* columnName)
00218 {
00219 int i;
00220
00221 i = 0;
00222 while (i < arrayMax (columnHeaders)) {
00223 if (strEqual (textItem (columnHeaders,i),columnName)) {
00224 break;
00225 }
00226 i++;
00227 }
00228 if (i == arrayMax (columnHeaders)) {
00229 gfr_addColumnType (columnName);
00230 }
00231 }
00232
00233
00234
00235 void gfr_deInit (void)
00236 {
00237 if (lsGfr != NULL) {
00238 ls_destroy (lsGfr);
00239 }
00240 arrayDestroy (columnTypes);
00241 textDestroy (columnHeaders);
00242 bitFree (&presentColumnTypes);
00243 hlr_free (headerLine);
00244 }
00245
00246
00247
00248 static void gfr_freeEntry (GfrEntry* currEntry)
00249 {
00250 if (currEntry == NULL) {
00251 return;
00252 }
00253 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_FUSION_TYPE)) {
00254 hlr_free (currEntry->fusionType);
00255 }
00256 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_NAME_TRANSCRIPT1)) {
00257 hlr_free (currEntry->nameTranscript1);
00258 }
00259 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_CHROMOSOME_TRANSCRIPT1)) {
00260 hlr_free (currEntry->chromosomeTranscript1);
00261 }
00262 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_GENE_SYMBOL_TRANSCRIPT1)) {
00263 hlr_free (currEntry->geneSymbolTranscript1);
00264 }
00265 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_EXON_COORDINATES_TRANSCRIPT1)) {
00266 arrayDestroy (currEntry->exonCoordinatesTranscript1);
00267 }
00268 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_DESCRIPTION_TRANSCRIPT1)) {
00269 hlr_free (currEntry->descriptionTranscript1);
00270 }
00271 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_NAME_TRANSCRIPT2)) {
00272 hlr_free (currEntry->nameTranscript2);
00273 }
00274 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_CHROMOSOME_TRANSCRIPT2)) {
00275 hlr_free (currEntry->chromosomeTranscript2);
00276 }
00277 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_GENE_SYMBOL_TRANSCRIPT2)) {
00278 hlr_free (currEntry->geneSymbolTranscript2);
00279 }
00280 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_EXON_COORDINATES_TRANSCRIPT2)) {
00281 arrayDestroy (currEntry->exonCoordinatesTranscript2);
00282 }
00283 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_DESCRIPTION_TRANSCRIPT2)) {
00284 hlr_free (currEntry->descriptionTranscript2);
00285 }
00286 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_ID)) {
00287 hlr_free (currEntry->id);
00288 }
00289 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_INTER_READS)) {
00290 arrayDestroy (currEntry->interReads);
00291 }
00292 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_PAIR_COUNT)) {
00293 arrayDestroy (currEntry->pairCounts);
00294 }
00295 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_READS_TRANSCRIPT1)) {
00296 textDestroy (currEntry->readsTranscript1);
00297 }
00298 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_READS_TRANSCRIPT2)) {
00299 textDestroy (currEntry->readsTranscript2);
00300 }
00301 freeMem (currEntry);
00302 currEntry = NULL;
00303 }
00304
00305
00306
00307 static GfrEntry* gfr_processNextEntry (int freeMemory)
00308 {
00309 static GfrEntry *currEntry = NULL;
00310 char *line,*token,*pos;
00311 WordIter w;
00312 int index,columnType;
00313 GfrPairCount *currGPC;
00314 GfrInterRead *currGIR;
00315 Texta tokens,items;
00316 int i;
00317 GfrExonCoordinate *currEC;
00318
00319 if (!ls_isEof (lsGfr)) {
00320 while (line = ls_nextLine (lsGfr)) {
00321 if (line[0] == '\0') {
00322 continue;
00323 }
00324 if (freeMemory) {
00325 gfr_freeEntry (currEntry);
00326 }
00327 AllocVar (currEntry);
00328 index = 0;
00329 w = wordIterCreate (line,"\t",0);
00330 while (token = wordNext (w)) {
00331 columnType = arru (columnTypes,index,int);
00332 if (columnType == GFR_COLUMN_TYPE_NUM_INTER) {
00333 currEntry->numInter = atoi (token);
00334 }
00335 else if (columnType == GFR_COLUMN_TYPE_INTER_MEAN_AB) {
00336 currEntry->interMeanAB = atof (token);
00337 }
00338 else if (columnType == GFR_COLUMN_TYPE_INTER_MEAN_BA) {
00339 currEntry->interMeanBA = atof (token);
00340 }
00341 else if (columnType == GFR_COLUMN_TYPE_PVALUE_AB) {
00342 currEntry->pValueAB = atof (token);
00343 }
00344 else if (columnType == GFR_COLUMN_TYPE_PVALUE_BA) {
00345 currEntry->pValueBA = atof (token);
00346 }
00347 else if (columnType == GFR_COLUMN_TYPE_NUM_INTRA1) {
00348 currEntry->numIntra1 = atoi (token);
00349 }
00350 else if (columnType == GFR_COLUMN_TYPE_NUM_INTRA2) {
00351 currEntry->numIntra2 = atoi (token);
00352 }
00353 else if (columnType == GFR_COLUMN_TYPE_FUSION_TYPE) {
00354 currEntry->fusionType = hlr_strdup (token);
00355 }
00356 else if (columnType == GFR_COLUMN_TYPE_NAME_TRANSCRIPT1) {
00357 currEntry->nameTranscript1 = hlr_strdup (token);
00358 }
00359 else if (columnType == GFR_COLUMN_TYPE_CHROMOSOME_TRANSCRIPT1) {
00360 currEntry->chromosomeTranscript1 = hlr_strdup (token);
00361 }
00362 else if (columnType == GFR_COLUMN_TYPE_STRAND_TRANSCRIPT1) {
00363 currEntry->strandTranscript1 = token[0];
00364 }
00365 else if (columnType == GFR_COLUMN_TYPE_NUM_EXONS_TRANSCRIPT1) {
00366 currEntry->numExonsTranscript1 = atoi (token);
00367 }
00368 else if (columnType == GFR_COLUMN_TYPE_EXON_COORDINATES_TRANSCRIPT1) {
00369 tokens = textFieldtok (token,"|");
00370 currEntry->exonCoordinatesTranscript1 = arrayCreate (100,GfrExonCoordinate);
00371 for (i = 0; i < arrayMax (tokens); i++) {
00372 currEC = arrayp (currEntry->exonCoordinatesTranscript1,arrayMax (currEntry->exonCoordinatesTranscript1),GfrExonCoordinate);
00373 pos = strchr (textItem (tokens,i),',');
00374 *pos = '\0';
00375 currEC->start = atoi (textItem (tokens,i));
00376 currEC->end = atoi (pos + 1);
00377 }
00378 textDestroy (tokens);
00379 }
00380 else if (columnType == GFR_COLUMN_TYPE_START_TRANSCRIPT1) {
00381 currEntry->startTranscript1 = atoi (token);
00382 }
00383 else if (columnType == GFR_COLUMN_TYPE_END_TRANSCRIPT1) {
00384 currEntry->endTranscript1 = atoi (token);
00385 }
00386 else if (columnType == GFR_COLUMN_TYPE_GENE_SYMBOL_TRANSCRIPT1) {
00387 currEntry->geneSymbolTranscript1 = hlr_strdup (token);
00388 }
00389 else if (columnType == GFR_COLUMN_TYPE_DESCRIPTION_TRANSCRIPT1) {
00390 currEntry->descriptionTranscript1 = hlr_strdup (token);
00391 }
00392 else if (columnType == GFR_COLUMN_TYPE_NAME_TRANSCRIPT2) {
00393 currEntry->nameTranscript2 = hlr_strdup (token);
00394 }
00395 else if (columnType == GFR_COLUMN_TYPE_CHROMOSOME_TRANSCRIPT2) {
00396 currEntry->chromosomeTranscript2 = hlr_strdup (token);
00397 }
00398 else if (columnType == GFR_COLUMN_TYPE_STRAND_TRANSCRIPT2) {
00399 currEntry->strandTranscript2 = token[0];
00400 }
00401 else if (columnType == GFR_COLUMN_TYPE_NUM_EXONS_TRANSCRIPT2) {
00402 currEntry->numExonsTranscript2 = atoi (token);
00403 }
00404 else if (columnType == GFR_COLUMN_TYPE_EXON_COORDINATES_TRANSCRIPT2) {
00405 tokens = textFieldtok (token,"|");
00406 currEntry->exonCoordinatesTranscript2 = arrayCreate (100,GfrExonCoordinate);
00407 for (i = 0; i < arrayMax (tokens); i++) {
00408 currEC = arrayp (currEntry->exonCoordinatesTranscript2,arrayMax (currEntry->exonCoordinatesTranscript2),GfrExonCoordinate);
00409 pos = strchr (textItem (tokens,i),',');
00410 *pos = '\0';
00411 currEC->start = atoi (textItem (tokens,i));
00412 currEC->end = atoi (pos + 1);
00413 }
00414 textDestroy (tokens);
00415 }
00416 else if (columnType == GFR_COLUMN_TYPE_START_TRANSCRIPT2) {
00417 currEntry->startTranscript2 = atoi (token);
00418 }
00419 else if (columnType == GFR_COLUMN_TYPE_END_TRANSCRIPT2) {
00420 currEntry->endTranscript2 = atoi (token);
00421 }
00422 else if (columnType == GFR_COLUMN_TYPE_GENE_SYMBOL_TRANSCRIPT2) {
00423 currEntry->geneSymbolTranscript2 = hlr_strdup (token);
00424 }
00425 else if (columnType == GFR_COLUMN_TYPE_DESCRIPTION_TRANSCRIPT2) {
00426 currEntry->descriptionTranscript2 = hlr_strdup (token);
00427 }
00428 else if (columnType == GFR_COLUMN_TYPE_INTER_READS) {
00429 tokens = textFieldtok (token,"|");
00430 currEntry->interReads = arrayCreate (100,GfrInterRead);
00431 for (i = 0; i < arrayMax (tokens); i++) {
00432 if (textItem (tokens,i)[0] == '\0') {
00433 continue;
00434 }
00435 currGIR = arrayp (currEntry->interReads,arrayMax (currEntry->interReads),GfrInterRead);
00436 items = textFieldtok (textItem (tokens,i),",");
00437 if( arrayMax( items ) > 6 ) {
00438 currGIR->pairType = atoi (textItem (items,0));
00439 currGIR->number1 = atoi (textItem (items,1));
00440 currGIR->number2 = atoi (textItem (items,2));
00441 currGIR->readStart1 = atoi (textItem (items,3));
00442 currGIR->readEnd1 = atoi (textItem (items,4));
00443 currGIR->readStart2 = atoi (textItem (items,5));
00444 currGIR->readEnd2 = atoi (textItem (items,6));
00445 } else {
00446 currGIR->pairType = GFR_PAIR_TYPE_EXONIC_EXONIC;
00447 currGIR->number1 = atoi (textItem (items,0));
00448 currGIR->readStart1 = atoi (textItem (items,1));
00449 currGIR->readEnd1 = atoi (textItem (items,2));
00450 currGIR->number2 = atoi (textItem (items,3));
00451 currGIR->readStart2 = atoi (textItem (items,4));
00452 currGIR->readEnd2 = atoi (textItem (items,5));
00453 }
00454 currGIR->flag = 0;
00455 textDestroy (items);
00456 }
00457 textDestroy (tokens);
00458 }
00459 else if (columnType == GFR_COLUMN_TYPE_PAIR_COUNT) {
00460 tokens = textFieldtok (token,"|");
00461 currEntry->pairCounts = arrayCreate (100,GfrPairCount);
00462 for (i = 0; i < arrayMax (tokens); i++) {
00463 currGPC = arrayp (currEntry->pairCounts,arrayMax (currEntry->pairCounts),GfrPairCount);
00464 items = textFieldtok (textItem (tokens,i),",");
00465 if( arrayMax( items ) > 3 ) {
00466 currGPC->pairType = atoi (textItem (items,0));
00467 currGPC->count = atoi (textItem (items,1));
00468 currGPC->number1 = atoi (textItem (items,2));
00469 currGPC->number2 = atoi (textItem (items,3));
00470 } else {
00471 currGPC->pairType = GFR_PAIR_TYPE_EXONIC_EXONIC;
00472 currGPC->count = atoi (textItem (items,2));
00473 currGPC->number1 = atoi (textItem (items,0));
00474 currGPC->number2 = atoi (textItem (items,1));
00475 }
00476 textDestroy (items);
00477 }
00478 textDestroy (tokens);
00479 }
00480 else if (columnType == GFR_COLUMN_TYPE_ID) {
00481 currEntry->id = hlr_strdup (token);
00482 }
00483 else if (columnType == GFR_COLUMN_TYPE_READS_TRANSCRIPT1) {
00484 tokens = textFieldtok (token,"|");
00485 currEntry->readsTranscript1 = textCreate (100);
00486 for (i = 0; i < arrayMax (tokens); i++) {
00487 textAdd (currEntry->readsTranscript1,textItem (tokens,i));
00488 }
00489 textDestroy (tokens);
00490 }
00491 else if (columnType == GFR_COLUMN_TYPE_READS_TRANSCRIPT2) {
00492 tokens = textFieldtok (token,"|");
00493 currEntry->readsTranscript2 = textCreate (100);
00494 for (i = 0; i < arrayMax (tokens); i++) {
00495 textAdd (currEntry->readsTranscript2,textItem (tokens,i));
00496 }
00497 textDestroy (tokens);
00498 }
00499 else if (columnType == GFR_COLUMN_TYPE_SPER) {
00500 currEntry->SPER = atof(token);
00501 }
00502 else if (columnType == GFR_COLUMN_TYPE_DASPER) {
00503 currEntry->DASPER = atof(token);
00504 }
00505 else if (columnType == GFR_COLUMN_TYPE_RESPER) {
00506 currEntry->RESPER = atof(token);
00507 }
00508 else {
00509 die ("Unknown columnType: %d",columnType);
00510 }
00511 index++;
00512 }
00513 wordIterDestroy (w);
00514 return currEntry;
00515 }
00516 }
00517 if (freeMemory) {
00518 gfr_freeEntry (currEntry);
00519 }
00520 currEntry = NULL;
00521 return currEntry;
00522 }
00523
00524
00525
00526 GfrEntry* gfr_nextEntry (void)
00527 {
00528 return gfr_processNextEntry (1);
00529 }
00530
00531
00532
00533 Array gfr_parse (void)
00534 {
00535 Array gfrEntries;
00536 GfrEntry *currEntry;
00537
00538 gfrEntries = arrayCreate (100000,GfrEntry);
00539 while (currEntry = gfr_processNextEntry (0)) {
00540 array (gfrEntries,arrayMax (gfrEntries),GfrEntry) = *currEntry;
00541 }
00542 return gfrEntries;
00543 }
00544
00545
00546
00547 static void gfr_addTab (Stringa buffer, int *first)
00548 {
00549 if (*first == 1) {
00550 *first = 0;
00551 return;
00552 }
00553 stringCatChar (buffer,'\t');
00554 }
00555
00556
00557
00558 char* gfr_writeHeader (void)
00559 {
00560 static Stringa buffer = NULL;
00561 int i;
00562
00563 stringCreateClear (buffer,100);
00564 for (i = 0; i < arrayMax (columnHeaders); i++) {
00565 stringAppendf (buffer,"%s%s",textItem (columnHeaders,i),
00566 i < arrayMax (columnHeaders) - 1 ? "\t" : "");
00567 }
00568 return string (buffer);
00569 }
00570
00571
00572
00573 char* gfr_writeGfrEntry (GfrEntry *currEntry)
00574 {
00575 static Stringa buffer = NULL;
00576 int first;
00577 int i,j;
00578 int columnType;
00579 GfrPairCount *currGPC;
00580 GfrInterRead *currGIR;
00581 GfrExonCoordinate *currEC;
00582
00583 stringCreateClear (buffer,100);
00584 first = 1;
00585 for (i = 0; i < arrayMax (columnTypes); i++) {
00586 columnType = arru (columnTypes,i,int);
00587 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_NUM_INTER) && columnType == GFR_COLUMN_TYPE_NUM_INTER) {
00588 gfr_addTab (buffer,&first);
00589 stringAppendf (buffer,"%d",currEntry->numInter);
00590 }
00591 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_INTER_MEAN_AB) && columnType == GFR_COLUMN_TYPE_INTER_MEAN_AB) {
00592 gfr_addTab (buffer,&first);
00593 stringAppendf (buffer,"%.2f",currEntry->interMeanAB);
00594 }
00595 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_INTER_MEAN_BA) && columnType == GFR_COLUMN_TYPE_INTER_MEAN_BA) {
00596 gfr_addTab (buffer,&first);
00597 stringAppendf (buffer,"%.2f",currEntry->interMeanBA);
00598 }
00599 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_PVALUE_AB) && columnType == GFR_COLUMN_TYPE_PVALUE_AB) {
00600 gfr_addTab (buffer,&first);
00601 stringAppendf (buffer,"%.5f",currEntry->pValueAB);
00602 }
00603 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_PVALUE_BA) && columnType == GFR_COLUMN_TYPE_PVALUE_BA) {
00604 gfr_addTab (buffer,&first);
00605 stringAppendf (buffer,"%.5f",currEntry->pValueBA);
00606 }
00607 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_NUM_INTRA1) && columnType == GFR_COLUMN_TYPE_NUM_INTRA1) {
00608 gfr_addTab (buffer,&first);
00609 stringAppendf (buffer,"%d",currEntry->numIntra1);
00610 }
00611 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_NUM_INTRA2) && columnType == GFR_COLUMN_TYPE_NUM_INTRA2) {
00612 gfr_addTab (buffer,&first);
00613 stringAppendf (buffer,"%d",currEntry->numIntra2);
00614 }
00615 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_FUSION_TYPE) && columnType == GFR_COLUMN_TYPE_FUSION_TYPE) {
00616 gfr_addTab (buffer,&first);
00617 stringAppendf (buffer,"%s",currEntry->fusionType);
00618 }
00619 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_NAME_TRANSCRIPT1) && columnType == GFR_COLUMN_TYPE_NAME_TRANSCRIPT1) {
00620 gfr_addTab (buffer,&first);
00621 stringAppendf (buffer,"%s",currEntry->nameTranscript1);
00622 }
00623 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_NUM_EXONS_TRANSCRIPT1) && columnType == GFR_COLUMN_TYPE_NUM_EXONS_TRANSCRIPT1) {
00624 gfr_addTab (buffer,&first);
00625 stringAppendf (buffer,"%d",currEntry->numExonsTranscript1);
00626 }
00627 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_EXON_COORDINATES_TRANSCRIPT1) && columnType == GFR_COLUMN_TYPE_EXON_COORDINATES_TRANSCRIPT1) {
00628 gfr_addTab (buffer,&first);
00629 for (j = 0; j < arrayMax (currEntry->exonCoordinatesTranscript1); j++) {
00630 currEC = arrp (currEntry->exonCoordinatesTranscript1,j,GfrExonCoordinate);
00631 stringAppendf (buffer,"%d,%d%s",currEC->start,currEC->end,j < arrayMax (currEntry->exonCoordinatesTranscript1) - 1 ? "|" : "");
00632 }
00633 }
00634 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_CHROMOSOME_TRANSCRIPT1) && columnType == GFR_COLUMN_TYPE_CHROMOSOME_TRANSCRIPT1) {
00635 gfr_addTab (buffer,&first);
00636 stringAppendf (buffer,"%s",currEntry->chromosomeTranscript1);
00637 }
00638 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_STRAND_TRANSCRIPT1) && columnType == GFR_COLUMN_TYPE_STRAND_TRANSCRIPT1) {
00639 gfr_addTab (buffer,&first);
00640 stringAppendf (buffer,"%c",currEntry->strandTranscript1);
00641 }
00642 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_START_TRANSCRIPT1) && columnType == GFR_COLUMN_TYPE_START_TRANSCRIPT1) {
00643 gfr_addTab (buffer,&first);
00644 stringAppendf (buffer,"%d",currEntry->startTranscript1);
00645 }
00646 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_END_TRANSCRIPT1) && columnType == GFR_COLUMN_TYPE_END_TRANSCRIPT1) {
00647 gfr_addTab (buffer,&first);
00648 stringAppendf (buffer,"%d",currEntry->endTranscript1);
00649 }
00650 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_GENE_SYMBOL_TRANSCRIPT1) && columnType == GFR_COLUMN_TYPE_GENE_SYMBOL_TRANSCRIPT1) {
00651 gfr_addTab (buffer,&first);
00652 stringAppendf (buffer,"%s",currEntry->geneSymbolTranscript1);
00653 }
00654 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_DESCRIPTION_TRANSCRIPT1) && columnType == GFR_COLUMN_TYPE_DESCRIPTION_TRANSCRIPT1) {
00655 gfr_addTab (buffer,&first);
00656 stringAppendf (buffer,"%s",currEntry->descriptionTranscript1);
00657 }
00658 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_NAME_TRANSCRIPT2) && columnType == GFR_COLUMN_TYPE_NAME_TRANSCRIPT2) {
00659 gfr_addTab (buffer,&first);
00660 stringAppendf (buffer,"%s",currEntry->nameTranscript2);
00661 }
00662 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_NUM_EXONS_TRANSCRIPT2) && columnType == GFR_COLUMN_TYPE_NUM_EXONS_TRANSCRIPT2) {
00663 gfr_addTab (buffer,&first);
00664 stringAppendf (buffer,"%d",currEntry->numExonsTranscript2);
00665 }
00666 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_EXON_COORDINATES_TRANSCRIPT2) && columnType == GFR_COLUMN_TYPE_EXON_COORDINATES_TRANSCRIPT2) {
00667 gfr_addTab (buffer,&first);
00668 for (j = 0; j < arrayMax (currEntry->exonCoordinatesTranscript2); j++) {
00669 currEC = arrp (currEntry->exonCoordinatesTranscript2,j,GfrExonCoordinate);
00670 stringAppendf (buffer,"%d,%d%s",currEC->start,currEC->end,j < arrayMax (currEntry->exonCoordinatesTranscript2) - 1 ? "|" : "");
00671 }
00672 }
00673 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_CHROMOSOME_TRANSCRIPT2) && columnType == GFR_COLUMN_TYPE_CHROMOSOME_TRANSCRIPT2) {
00674 gfr_addTab (buffer,&first);
00675 stringAppendf (buffer,"%s",currEntry->chromosomeTranscript2);
00676 }
00677 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_STRAND_TRANSCRIPT2) && columnType == GFR_COLUMN_TYPE_STRAND_TRANSCRIPT2) {
00678 gfr_addTab (buffer,&first);
00679 stringAppendf (buffer,"%c",currEntry->strandTranscript2);
00680 }
00681 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_START_TRANSCRIPT2) && columnType == GFR_COLUMN_TYPE_START_TRANSCRIPT2) {
00682 gfr_addTab (buffer,&first);
00683 stringAppendf (buffer,"%d",currEntry->startTranscript2);
00684 }
00685 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_END_TRANSCRIPT2) && columnType == GFR_COLUMN_TYPE_END_TRANSCRIPT2) {
00686 gfr_addTab (buffer,&first);
00687 stringAppendf (buffer,"%d",currEntry->endTranscript2);
00688 }
00689 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_GENE_SYMBOL_TRANSCRIPT2) && columnType == GFR_COLUMN_TYPE_GENE_SYMBOL_TRANSCRIPT2) {
00690 gfr_addTab (buffer,&first);
00691 stringAppendf (buffer,"%s",currEntry->geneSymbolTranscript2);
00692 }
00693 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_DESCRIPTION_TRANSCRIPT2) && columnType == GFR_COLUMN_TYPE_DESCRIPTION_TRANSCRIPT2) {
00694 gfr_addTab (buffer,&first);
00695 stringAppendf (buffer,"%s",currEntry->descriptionTranscript2);
00696 }
00697 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_PAIR_COUNT) && columnType == GFR_COLUMN_TYPE_PAIR_COUNT) {
00698 gfr_addTab (buffer,&first);
00699 for (j = 0; j < arrayMax (currEntry->pairCounts); j++) {
00700 currGPC = arrp (currEntry->pairCounts,j,GfrPairCount);
00701 stringAppendf (buffer,"%d,%d,%d,%d%s",currGPC->pairType,currGPC->count,currGPC->number1,currGPC->number2,
00702 j < arrayMax (currEntry->pairCounts) - 1 ? "|" : "");
00703 }
00704 }
00705 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_INTER_READS) && columnType == GFR_COLUMN_TYPE_INTER_READS) {
00706 gfr_addTab (buffer,&first);
00707 for (j = 0; j < arrayMax (currEntry->interReads); j++) {
00708 currGIR = arrp (currEntry->interReads,j,GfrInterRead);
00709 if (currGIR->flag == 0) {
00710 stringAppendf (buffer,"%d,%d,%d,%d,%d,%d,%d%s",
00711 currGIR->pairType,currGIR->number1,currGIR->number2,
00712 currGIR->readStart1,currGIR->readEnd1,
00713 currGIR->readStart2,currGIR->readEnd2,
00714 j < arrayMax (currEntry->interReads) - 1 ? "|" : "");
00715 }
00716 }
00717 }
00718 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_ID) && columnType == GFR_COLUMN_TYPE_ID) {
00719 gfr_addTab (buffer,&first);
00720 stringAppendf (buffer,"%s",currEntry->id);
00721 }
00722 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_READS_TRANSCRIPT1) && columnType == GFR_COLUMN_TYPE_READS_TRANSCRIPT1) {
00723 gfr_addTab (buffer,&first);
00724 for (j = 0; j < arrayMax (currEntry->readsTranscript1); j++) {
00725 stringAppendf (buffer,"%s%s",textItem (currEntry->readsTranscript1,j),
00726 j < arrayMax (currEntry->readsTranscript1) - 1 ? "|" : "");
00727 }
00728 }
00729 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_READS_TRANSCRIPT2) && columnType == GFR_COLUMN_TYPE_READS_TRANSCRIPT2) {
00730 gfr_addTab (buffer,&first);
00731 for (j = 0; j < arrayMax (currEntry->readsTranscript2); j++) {
00732 stringAppendf (buffer,"%s%s",textItem (currEntry->readsTranscript2,j),
00733 j < arrayMax (currEntry->readsTranscript2) - 1 ? "|" : "");
00734 }
00735 }
00736 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_SPER) && columnType == GFR_COLUMN_TYPE_SPER) {
00737 gfr_addTab (buffer,&first);
00738 stringAppendf (buffer,"%f",currEntry->SPER);
00739 }
00740 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_DASPER) && columnType == GFR_COLUMN_TYPE_DASPER) {
00741 gfr_addTab (buffer,&first);
00742 stringAppendf (buffer,"%f",currEntry->DASPER);
00743 }
00744 if (bitReadOne (presentColumnTypes,GFR_COLUMN_TYPE_RESPER) && columnType == GFR_COLUMN_TYPE_RESPER) {
00745 gfr_addTab (buffer,&first);
00746 stringAppendf (buffer,"%f",currEntry->RESPER);
00747 }
00748 }
00749 return string (buffer);
00750 }
00751
00752