00001 #include "log.h"
00002 #include "format.h"
00003 #include "html.h"
00004 #include "htmlLinker.h"
00005 #include "gfr.h"
00006 #include "geneFusionsConfig.h"
00007
00008 #define GFR_PAIR_NAME_EXONIC_EXONIC "exon-exon"
00009 #define GFR_PAIR_NAME_EXONIC_INTRONIC "exon-intron"
00010 #define GFR_PAIR_NAME_EXONIC_JUNCTION "exon-boundary"
00011 #define GFR_PAIR_NAME_INTRONIC_EXONIC "intron-exon"
00012 #define GFR_PAIR_NAME_INTRONIC_INTRONIC "intron-intron"
00013 #define GFR_PAIR_NAME_INTRONIC_JUNCTION "intron-boundary"
00014 #define GFR_PAIR_NAME_JUNCTION_JUNCTION "boundary-boundary"
00015 #define GFR_PAIR_NAME_JUNCTION_EXONIC "boundary-exon"
00016 #define GFR_PAIR_NAME_JUNCTION_INTRONIC "boundary-intron"
00017
00018
00019 static char* processString (char *str)
00020 {
00021 static Stringa buffer = NULL;
00022 Texta tokens;
00023 int i;
00024
00025 if (!strchr (str,'|')) {
00026 return str;
00027 }
00028 stringCreateClear (buffer,100);
00029 tokens = textStrtokP (str,"|");
00030 for (i = 0; i < arrayMax (tokens); i++) {
00031 stringAppendf (buffer,"%s%s",textItem (tokens,i),i < arrayMax (tokens) - 1 ? "<br>" : "");
00032 }
00033 return string (buffer);
00034 }
00035
00036 char* getPairTypeName( int pairType ) {
00037 switch ( pairType ) {
00038 case GFR_PAIR_TYPE_EXONIC_EXONIC:
00039 return GFR_PAIR_NAME_EXONIC_EXONIC;
00040 case GFR_PAIR_TYPE_EXONIC_INTRONIC:
00041 return GFR_PAIR_NAME_EXONIC_INTRONIC;
00042 case GFR_PAIR_TYPE_EXONIC_JUNCTION:
00043 return GFR_PAIR_NAME_EXONIC_JUNCTION;
00044 case GFR_PAIR_TYPE_INTRONIC_EXONIC:
00045 return GFR_PAIR_NAME_INTRONIC_EXONIC;
00046 case GFR_PAIR_TYPE_INTRONIC_INTRONIC:
00047 return GFR_PAIR_NAME_INTRONIC_INTRONIC;
00048 case GFR_PAIR_TYPE_INTRONIC_JUNCTION:
00049 return GFR_PAIR_NAME_INTRONIC_JUNCTION;
00050 case GFR_PAIR_TYPE_JUNCTION_JUNCTION:
00051 return GFR_PAIR_NAME_JUNCTION_JUNCTION;
00052 case GFR_PAIR_TYPE_JUNCTION_EXONIC:
00053 return GFR_PAIR_NAME_JUNCTION_EXONIC;
00054 case GFR_PAIR_TYPE_JUNCTION_INTRONIC:
00055 return GFR_PAIR_NAME_JUNCTION_INTRONIC;
00056 }
00057 return "NULL";
00058 }
00059 char* getEntryNumber( int number, int pairType, int readNum ) {
00060 int flag=0;
00061 Stringa str=stringCreate(10);
00062 switch ( pairType ) {
00063 case GFR_PAIR_TYPE_EXONIC_EXONIC:
00064 case GFR_PAIR_TYPE_EXONIC_INTRONIC:
00065 case GFR_PAIR_TYPE_INTRONIC_EXONIC:
00066 case GFR_PAIR_TYPE_INTRONIC_INTRONIC:
00067 break;
00068 case GFR_PAIR_TYPE_INTRONIC_JUNCTION:
00069 case GFR_PAIR_TYPE_EXONIC_JUNCTION:
00070 if( readNum==2 ) flag=1;
00071 break;
00072 case GFR_PAIR_TYPE_JUNCTION_EXONIC:
00073 case GFR_PAIR_TYPE_JUNCTION_INTRONIC:
00074 if( readNum==1 ) flag=1;
00075 break;
00076 case GFR_PAIR_TYPE_JUNCTION_JUNCTION:
00077 flag=1;
00078 }
00079 if( flag==1 ) {
00080 int rem = number % 2;
00081 if( rem == 0 )
00082 stringPrintf(str, "%d right", number/2);
00083 else
00084 stringPrintf(str, "left %d", number/2+1);
00085 } else {
00086 stringPrintf(str, "%d", number);
00087 }
00088 return string(str);
00089 }
00090
00091 int main (int argc, char *argv[])
00092 {
00093 FILE* ftmp=NULL;
00094 cgiInit();
00095 cgiHeader("text/html");
00096
00097 if (argc == 3) {
00098 GfrEntry *currGE;
00099 Stringa buffer;
00100 GfrPairCount *currGEPC;
00101 GfrInterRead *currGIR;
00102 int i;
00103
00104 puts ("<html>");
00105 puts ("<head>");
00106 html_printGenericStyleSheet (12);
00107 puts ("<title>geneFusions Details</title>\n");
00108 puts ("</head>");
00109 puts ("<body>");
00110 buffer = stringCreate (100);
00111 stringPrintf (buffer,"%s/%s.gfr",WEB_DATA_DIR,argv[1]);
00112 gfr_init (string (buffer));
00113 while (currGE = gfr_nextEntry ()){
00114 fflush( stdout );
00115 if (!strEqual (currGE->id,argv[2])) {
00116 continue;
00117 }
00118 printf ("<h1>Detailed summary for potential gene fusion candidate</h1><br>");
00119 puts ("<table border=0 cellpadding=10>");
00120 puts ("<tr align=left valign=top>");
00121 puts ("<td width=400>");
00122 puts ("<h2>Summary information</h2><br>");
00123 printf ("<b>Identifier</b>: %s<br><br>\n",currGE->id);
00124 printf ("<b>Number of inter paired-end reads</b>: %d<br><br>\n",currGE->numInter);
00125 printf ("<b>Type</b>: %s<br><br>\n",currGE->fusionType);
00126
00127 stringPrintf(buffer, "%s/GFF/%s.gff", WEB_DATA_DIR,currGE->id);
00128 ftmp = fopen( string(buffer), "r" );
00129 if( ftmp ) {
00130 printf ("<b>Connected Reads</b>: <a href=%s&hgt.customText=%s/GFF/%s.gff target=blank>UCSC connectivity graph</a><br>\n",
00131 htmlLinker_generateLinkToGenomeBrowserAtUCSC ("hg18","vertebrate","human",currGE->chromosomeTranscript1,currGE->startTranscript1 - UCSC_GENOME_BROWSER_FLANKING_REGION,currGE->endTranscript2 + UCSC_GENOME_BROWSER_FLANKING_REGION),
00132 WEB_DATA_LINK,currGE->id);
00133 fclose( ftmp );
00134 }
00135
00136 puts ("</td>");
00137 puts ("<td>");
00138 puts ("<h2>Transcript connectivity graph</h2>");
00139 printf ("<img src=%s/IMAGES/%s.jpg alt=geneFusionImage>\n",WEB_DATA_LINK,currGE->id);
00140 puts ("</td>");
00141 puts ("<td>");
00142 puts ("<h2>Transcript connectivity table</h2><br>");
00143 puts ("<table border=0>");
00144 puts ("<tr align=left>");
00145 puts ("<th width=200>Pair Type</th>");
00146 puts ("<th width=200>Entry transcript 1</th>");
00147 puts ("<th width=200>Entry transcript 2</th>");
00148 puts ("<th width=200>Counts</th>");
00149 puts ("</tr>");
00150 fflush( stdout );
00151 for (i = 0; i < arrayMax (currGE->pairCounts); i++) {
00152 currGEPC = arrp (currGE->pairCounts,i,GfrPairCount);
00153 printf ("<tr><td>%s</td><td>%s</td><td>%s</td><td>%d</td></tr>\n",
00154 getPairTypeName(currGEPC->pairType),
00155 getEntryNumber(currGEPC->number1, currGEPC->pairType, 1),
00156 getEntryNumber(currGEPC->number2, currGEPC->pairType, 2),
00157 currGEPC->count);
00158 }
00159 puts ("</table>");
00160 puts ("</td>");
00161 puts ("</tr>");
00162 puts ("</table>");
00163 puts ("<br>");
00164
00165 puts ("<h2>Transcript information</h2><br>");
00166 puts ("<table border=1 cellpadding=10 width=\"80%\">");
00167 puts ("<tr align=left>");
00168 puts ("<th width=\"20%\"></th>");
00169 puts ("<th><font color='blue'>Transcript 1</font></th>");
00170 puts ("<th><font color='orange'>Transcript 2</font></th>");
00171 puts ("</tr>");
00172 puts ("<tr align=left>");
00173 puts ("<td width=\"20%\"><b>Gene symbol(s)</b></td>");
00174 printf ("<td width=\"30%%\"><font color='blue'>%s</font></td>\n",processString (currGE->geneSymbolTranscript1));
00175 printf ("<td width=\"30%%\"><font color='orange'>%s</font></td>\n",processString (currGE->geneSymbolTranscript2));
00176 puts ("</tr>");
00177 puts ("<tr align=left>");
00178 puts ("<td width=\"20%\"><b>Coordinates</b></td>");
00179 printf ("<td width=\"30%%\">%s:%d-%d</td>\n",currGE->chromosomeTranscript1,currGE->startTranscript1,currGE->endTranscript1);
00180 printf ("<td width=\"30%%\">%s:%d-%d</td>\n",currGE->chromosomeTranscript2,currGE->startTranscript2,currGE->endTranscript2);
00181 puts ("</tr>");
00182 puts ("<tr align=left>");
00183 puts ("<td width=\"20%\"><b>Strand</b></td>");
00184 printf ("<td width=\"30%%\">%c</td>\n",currGE->strandTranscript1);
00185 printf ("<td width=\"30%%\">%c</td>\n",currGE->strandTranscript2);
00186 puts ("</tr>");
00187 puts ("<tr align=left>");
00188 puts ("<td width=\"20%\"><b>Gene description(s)</b></td>");
00189 printf ("<td width=\"30%%\">%s</td>\n",processString (currGE->descriptionTranscript1));
00190 printf ("<td width=\"30%%\">%s</td>\n",processString (currGE->descriptionTranscript2));
00191 puts ("</tr>");
00192 puts ("<tr align=left>");
00193 puts ("<td width=\"20%\"><b>Number of exons</b></td>");
00194 printf ("<td width=\"30%%\">%d</td>\n",currGE->numExonsTranscript1);
00195 printf ("<td width=\"30%%\">%d</td>\n",currGE->numExonsTranscript2);
00196 puts ("</tr>");
00197 puts ("<tr align=left>");
00198 puts ("<td width=\"20%\"><b>Number of intra paired-end reads</b></td>");
00199 printf ("<td width=\"30%%\">%d</td>\n",currGE->numIntra1);
00200 printf ("<td width=\"30%%\">%d</td>\n",currGE->numIntra2);
00201 puts ("</tr>");
00202 puts ("<tr align=left>");
00203 puts ("<td width=\"20%\"><b>Links</b></td>");
00204 printf ("<td width=\"30%%\">[<a href=%s&hgt.customText=%s/BED/%s_1.bed target=blank>UCSC genome browser</a>] [<a href=%s/FASTA/%s_1.fasta>FASTA file</a>]<br></td>\n",
00205 htmlLinker_generateLinkToGenomeBrowserAtUCSC ("hg18","vertebrate","human",currGE->chromosomeTranscript1,currGE->startTranscript1 - UCSC_GENOME_BROWSER_FLANKING_REGION,currGE->endTranscript1 + UCSC_GENOME_BROWSER_FLANKING_REGION),
00206 WEB_DATA_LINK,currGE->id,WEB_DATA_LINK,currGE->id);
00207 printf ("<td width=\"30%%\">[<a href=%s&hgt.customText=%s/BED/%s_2.bed target=blank>UCSC genome browser</a>] [<a href=%s/FASTA/%s_2.fasta>FASTA file</a>]<br></td></tr>\n",
00208 htmlLinker_generateLinkToGenomeBrowserAtUCSC ("hg18","vertebrate","human",currGE->chromosomeTranscript2,currGE->startTranscript2 - UCSC_GENOME_BROWSER_FLANKING_REGION,currGE->endTranscript2 + UCSC_GENOME_BROWSER_FLANKING_REGION),
00209 WEB_DATA_LINK,currGE->id,WEB_DATA_LINK,currGE->id);
00210
00211 puts ("<tr align=left>");
00212 puts ("<td width=\"20%\"><b>Expression</b></td>");
00213
00214 stringPrintf(buffer, "%s/BGRS/%s_%s.bgr.gz", WEB_DATA_DIR,argv[1],currGE->chromosomeTranscript1);
00215 ftmp = fopen( string(buffer), "r" );
00216 puts("<td width=\"30%\">");
00217 if( ftmp ) {
00218 printf ("[<a href=%s&hgt.customText=%s/BGRS/%s_%s.bgr.gz target=blank>Expression %s</a>]",
00219 htmlLinker_generateLinkToGenomeBrowserAtUCSC ("hg18","vertebrate","human",currGE->chromosomeTranscript1,currGE->startTranscript1 - UCSC_GENOME_BROWSER_FLANKING_REGION,currGE->endTranscript1 + UCSC_GENOME_BROWSER_FLANKING_REGION),
00220 WEB_DATA_LINK,argv[1],currGE->chromosomeTranscript1,currGE->chromosomeTranscript1);
00221 fclose(ftmp);
00222 }
00223 puts("</td>");
00224
00225 stringPrintf(buffer, "%s/BGRS/%s_%s.bgr.gz", WEB_DATA_DIR,argv[1],currGE->chromosomeTranscript2);
00226 ftmp = fopen( string(buffer), "r" );
00227 puts("<td width=\"30%\">");
00228 if( ftmp ) {
00229 printf ("[<a href=%s&hgt.customText=%s/BGRS/%s_%s.bgr.gz target=blank>Expression %s</a>]",
00230 htmlLinker_generateLinkToGenomeBrowserAtUCSC ("hg18","vertebrate","human",currGE->chromosomeTranscript2,currGE->startTranscript2 - UCSC_GENOME_BROWSER_FLANKING_REGION,currGE->endTranscript2 + UCSC_GENOME_BROWSER_FLANKING_REGION),
00231 WEB_DATA_LINK,argv[1],currGE->chromosomeTranscript2,currGE->chromosomeTranscript2);
00232 fclose(ftmp);
00233 }
00234 puts("</td>");
00235 puts("</tr>");
00236 puts ("</table><br><br>");
00237
00238 puts ("<h2>Breakpoint analysis</h2><br>");
00239 puts ("<table border=1 width=\"80%\" cellpadding=10><thead><tr><th>Orientation</th><th>Alignments</th><th colspan=2>Breakpoints</th></tr></thead><tbody>");
00240 puts ("<tr><td>Orientation AB</td>");
00241 if (currGE->strandTranscript1=='+') {
00242 currGE->strandTranscript2=='+' ? stringPrintf(buffer, "AB_trans1F_trans2F") : stringPrintf(buffer, "AB_trans1F_trans2R");
00243 } else if( currGE->strandTranscript1 == '-') {
00244 currGE->strandTranscript2=='+' ? stringPrintf(buffer, "AB_trans1R_trans2F") : stringPrintf(buffer, "AB_trans1R_trans2R");
00245 } else {
00246 die("Strand informatation is not correct (transcript 1): %c", currGE->strandTranscript1);
00247 }
00248 printf ("<td align=center><a href=%s/ALIGNMENTS/%s_AB_breakPointAlignments.txt><img src=%s/IMAGES/%s.png></img> AB</a></td>",WEB_DATA_LINK,currGE->id, WEB_DATA_LINK, string(buffer));
00249 printf ("<td align=center><a href=%s&hgt.customText=%s/WIGS/%s_AB_breakPointsTranscript1.wig target=blank>Breakpoints transcript 1 UCSC Genome Browser</a></td>", htmlLinker_generateLinkToGenomeBrowserAtUCSC ("hg18","vertebrate","human",currGE->chromosomeTranscript1,currGE->startTranscript1 - UCSC_GENOME_BROWSER_FLANKING_REGION,currGE->endTranscript1 + UCSC_GENOME_BROWSER_FLANKING_REGION),WEB_DATA_LINK,currGE->id);
00250 printf ("<td align=center><a href=%s&hgt.customText=%s/WIGS/%s_AB_breakPointsTranscript2.wig target=blank>Breakpoints transcript 2 UCSC Genome Browser</a></td></tr>", htmlLinker_generateLinkToGenomeBrowserAtUCSC ("hg18","vertebrate","human",currGE->chromosomeTranscript2,currGE->startTranscript2 - UCSC_GENOME_BROWSER_FLANKING_REGION,currGE->endTranscript2 + UCSC_GENOME_BROWSER_FLANKING_REGION),WEB_DATA_LINK,currGE->id);
00251 fflush(stdout);
00252 puts ("<tr><td>Orientation BA</td>");
00253 if (currGE->strandTranscript1 == '+') {
00254 currGE->strandTranscript2=='+' ? stringPrintf(buffer, "BA_trans1F_trans2F") : stringPrintf(buffer, "BA_trans1F_trans2R");
00255 } else if( currGE->strandTranscript1 == '-') {
00256 currGE->strandTranscript2=='+' ? stringPrintf(buffer, "BA_trans1R_trans2F") : stringPrintf(buffer, "BA_trans1R_trans2R");
00257 } else {
00258 die("Strand informatation is not correct (transcript2): %c", currGE->strandTranscript2);
00259 }
00260 printf ("<td align=center><a href=%s/ALIGNMENTS/%s_BA_breakPointAlignments.txt><img src=%s/IMAGES/%s.png></img> BA</a></td>",WEB_DATA_LINK,currGE->id, WEB_DATA_LINK,string(buffer));
00261 printf ("<td align=center><a href=%s&hgt.customText=%s/WIGS/%s_BA_breakPointsTranscript2.wig target=blank>Breakpoints transcript 2 UCSC Genome Browser</a></td>", htmlLinker_generateLinkToGenomeBrowserAtUCSC ("hg18","vertebrate","human",currGE->chromosomeTranscript2,currGE->startTranscript2 - UCSC_GENOME_BROWSER_FLANKING_REGION,currGE->endTranscript2 + UCSC_GENOME_BROWSER_FLANKING_REGION),WEB_DATA_LINK,currGE->id);
00262 printf ("<td align=center><a href=%s&hgt.customText=%s/WIGS/%s_BA_breakPointsTranscript1.wig target=blank>Breakpoints transcript 1 UCSC Genome Browser</a></td></tr>", htmlLinker_generateLinkToGenomeBrowserAtUCSC ("hg18","vertebrate","human",currGE->chromosomeTranscript1,currGE->startTranscript1 - UCSC_GENOME_BROWSER_FLANKING_REGION,currGE->endTranscript1 + UCSC_GENOME_BROWSER_FLANKING_REGION),WEB_DATA_LINK,currGE->id);
00263
00264 puts ("</tbody></table>");
00265 puts ("<br><br><br>");
00266 fflush(stdout);
00267
00268
00269 puts ("<h2>Read coordinates</h2><br>");
00270 puts ("<table border=0>");
00271 puts ("<tr align=left>");
00272 puts ("<th width=\"10%\">Pair Type</th>");
00273 puts ("<th width=\"10%\">Entry Transcript 1</th>");
00274 puts ("<th width=\"10%\">Read start transcript 1</th>");
00275 puts ("<th width=\"10%\">Read end transcript 1</th>");
00276 puts ("<th width=\"10%\">Entry Transcript 2</th>");
00277 puts ("<th width=\"10%\">Read start transcript 2</th>");
00278 puts ("<th width=\"10%\">Read end transcript 2</th>");
00279 puts ("</tr>");
00280 for (i = 0; i < arrayMax (currGE->interReads); i++) {
00281 currGIR = arrp (currGE->interReads,i,GfrInterRead);
00282 printf ("<tr><td>%s</td><td>%s</td><td>%d</td><td>%d</td><td>%s</td><td>%d</td><td>%d</td></tr>\n",
00283 getPairTypeName(currGIR->pairType),
00284 getEntryNumber(currGIR->number1, currGIR->pairType, 1),
00285 currGIR->readStart1,currGIR->readEnd1,
00286 getEntryNumber(currGIR->number2,currGIR->pairType, 2),
00287 currGIR->readStart2,
00288 currGIR->readEnd2);
00289 }
00290 puts ("</table><br><br><br>");
00291 puts ("</body>");
00292 puts ("</html>");
00293 fflush (stdout);
00294 }
00295 }
00296 return 0;
00297 }