00001 #include <stdio.h>
00002 #include <stdlib.h>
00003 #include <string.h>
00004 #include "format.h"
00005 #include "array.h"
00006 #include "sqvUtil.h"
00007 #include "sqvWeb.h"
00008
00012 void swapInt ( int *a, int *b)
00013 {
00014 int tmp = *a;
00015 *a = *b;
00016 *b = tmp;
00017 };
00018
00019 int inRegion (int istart, int iend, int rstart, int rend)
00020 {
00021 if ((istart >= rstart && istart <= rend) &&
00022 (iend >= rstart && iend <= rend)) {
00023 return 1;
00024 }
00025 return 0;
00026 }
00027
00028 int inRegions (Array regions, int chromosome, int start, int end)
00029 {
00030 SRegion_t *tmp;
00031 int i;
00032
00033 tmp = arrayp (regions, 0, SRegion_t);
00034 if (tmp->chromosome == 0) {
00035 return 1;
00036 }
00037 else {
00038 for (i = 0; i < arrayMax (regions); i++) {
00039 tmp = arrayp (regions, i, SRegion_t);
00040 if (tmp->chromosome == chromosome && tmp->show) {
00041 if ((start >= tmp->start && start <= tmp->end) &&
00042 (end >= tmp->start && end <= tmp->end)) {
00043 return 1;
00044 }
00045 }
00046 }
00047 return 0;
00048 }
00049 }
00050
00051 int SRegionCmp (SRegion_t *region1, SRegion_t *region2)
00052 {
00053 if (region1->chromosome == region2->chromosome) {
00054 return region1->instance - region2->instance;
00055 } else {
00056 return region1->chromosome - region2->chromosome;
00057 }
00058 }
00059
00060 int sortPosition( PEreads *per1, PEreads *per2) {
00061 int chrnum11, chrnum12, chrnum21, chrnum22;
00062 int start1, start2, chr1, chr2;
00063
00064 if (!strcmp (per1->read1.chromosome+3, "X")) {
00065 chrnum11 = 23;
00066 } else {
00067 if ( !strcmp (per1->read1.chromosome+3, "Y")) {
00068 chrnum11 = 24;
00069 } else {
00070 chrnum11 = atoi (per1->read1.chromosome+3 );
00071 }
00072 }
00073 if (!strcmp (per1->read2.chromosome+3, "X")) {
00074 chrnum12 = 23;
00075 } else {
00076 if ( !strcmp (per1->read2.chromosome+3, "Y")) {
00077 chrnum12 = 24;
00078 } else {
00079 chrnum12 = atoi (per1->read2.chromosome+3 );
00080 }
00081 }
00082 if( chrnum11 == chrnum12 ) {
00083 start1 = MIN( per1->read1.start, per1->read2.start);
00084 chr1 = chrnum11;
00085 } else {
00086 chr1 = MIN (chrnum11, chrnum12 );
00087 if( chrnum11 < chrnum12 )
00088 start1 = per1->read1.start;
00089 else
00090 start1 = per1->read2.start;
00091 }
00092
00093
00094 if (!strcmp (per2->read1.chromosome+3, "X")) {
00095 chrnum21 = 23;
00096 } else {
00097 if ( !strcmp (per2->read1.chromosome+3, "Y")) {
00098 chrnum21 = 24;
00099 } else {
00100 chrnum21 = atoi (per2->read1.chromosome+3 );
00101 }
00102 }
00103 if (!strcmp (per2->read2.chromosome+3, "X")) {
00104 chrnum22 = 23;
00105 } else {
00106 if ( !strcmp (per2->read2.chromosome+3, "Y")) {
00107 chrnum22 = 24;
00108 } else {
00109 chrnum22 = atoi (per2->read2.chromosome+3 );
00110 }
00111 }
00112 if( chrnum21 == chrnum22 ) {
00113 start2 = MIN( per2->read2.start, per2->read2.start);
00114 chr2 = chrnum21;
00115 } else {
00116 chr2 = MIN (chrnum21, chrnum22 );
00117 if( chrnum21 < chrnum22 )
00118 start2 = per2->read1.start;
00119 else
00120 start2 = per2->read2.start;
00121 }
00122
00123 if( chr1 == chr2 )
00124 return start1 - start2;
00125 else
00126 return chr1 - chr2;
00127 }
00128
00129 int PEReadSizeCmp (PEreads *per1, PEreads *per2)
00130 {
00131 int chrnum11, chrnum12, chrnum21, chrnum22;
00132 int instsize1, instsize2;
00133
00134 if (strcmp (per1->read1.chromosome+3, per1->read2.chromosome+3) ||
00135 strcmp (per2->read1.chromosome+3, per2->read2.chromosome+3)) {
00136 if (!strcmp (per1->read1.chromosome+3, "X")) {
00137 chrnum11 = 23;
00138 } else if (!strcmp (per1->read1.chromosome+3, "Y")) {
00139 chrnum11 = 24;
00140 } else {
00141 chrnum11 = atoi (per1->read1.chromosome+3);
00142 }
00143
00144 if (!strcmp (per1->read2.chromosome+3, "X")) {
00145 chrnum12 = 23;
00146 } else if (!strcmp (per1->read2.chromosome+3, "Y")) {
00147 chrnum12 = 24;
00148 } else {
00149 chrnum12 = atoi (per1->read2.chromosome+3);
00150 }
00151
00152 if (!strcmp (per2->read1.chromosome+3, "X")) {
00153 chrnum21 = 23;
00154 } else if (!strcmp (per2->read1.chromosome+3, "Y")) {
00155 chrnum21 = 24;
00156 } else {
00157 chrnum21 = atoi (per2->read1.chromosome+3);
00158 }
00159
00160 if (!strcmp (per2->read2.chromosome+3, "X")) {
00161 chrnum22 = 23;
00162 } else if (!strcmp (per2->read2.chromosome+3, "Y")) {
00163 chrnum22 = 24;
00164 } else {
00165 chrnum22 = atoi (per2->read2.chromosome+3);
00166 }
00167
00168 instsize1 = chrnum12 - chrnum11;
00169 instsize2 = chrnum22 - chrnum21;
00170 }
00171 else {
00172 instsize1 = per1->read1.end - per1->read2.start;
00173 instsize2 = per2->read1.end - per2->read2.start;
00174 }
00175
00176 if (instsize1 < 0)
00177 instsize1 = 0 - instsize1;
00178 if (instsize2 < 0)
00179 instsize2 = 0 - instsize2;
00180
00181 return instsize2 - instsize1;
00182 }
00183
00184 char *getMchrname (int chromosome)
00185 {
00186 if (chromosome < 10) {
00187 return striappend ("mhs0", chromosome, 10);
00188 }
00189 else if (chromosome >= 10 && chromosome < 23) {
00190 return striappend ("mhs", chromosome, 10);
00191 }
00192 else if (chromosome == 23) {
00193 return strdup ("mhs0X");
00194 }
00195 else if (chromosome == 24) {
00196 return strdup ("mhs0Y");
00197 }
00198 else
00199 {
00200 return NULL;
00201 }
00202 }
00203
00204 char *getSchrname (int chromosome)
00205 {
00206 if (chromosome < 10) {
00207 return striappend ("shs0", chromosome, 10);
00208 }
00209 else if (chromosome >= 10 && chromosome < 23) {
00210 return striappend ("shs", chromosome, 10);
00211 }
00212 else if (chromosome == 23) {
00213 return strdup ("shs0X");
00214 }
00215 else if (chromosome == 24) {
00216 return strdup ("shs0Y");
00217 }
00218 else {
00219 return NULL;
00220 }
00221 }
00222
00223 char *getHchrname (int chromosome)
00224 {
00225 if (chromosome <= 22) {
00226 return striappend ("hs", chromosome, 10);
00227 }
00228 else if (chromosome == 23) {
00229 return strdup ("hsX");
00230 }
00231 else if (chromosome == 24) {
00232 return strdup ("hsY");
00233 }
00234 else {
00235 return NULL;
00236 }
00237 }
00238
00239 int getScale (Array regions)
00240 {
00241 int rlength = 0;
00242 SRegion_t *tmp;
00243 int i;
00244
00245 tmp = arrayp(regions, 0, SRegion_t);
00246 if (tmp->chromosome >= 1 && tmp->chromosome <= 24) {
00247 for (i = 0; i < arrayMax(regions); i++) {
00248 tmp = arrayp(regions, i, SRegion_t);
00249 rlength += tmp->end - tmp->start;
00250 }
00251 }
00252
00253 if (rlength > 500000000 || rlength == 0) {
00254
00255 return 1000000;
00256 }
00257 else if (rlength <= 500000000 && rlength > 50000000) {
00258
00259 return 100000;
00260 }
00261 else if (rlength <= 50000000 && rlength > 5000000) {
00262
00263 return 10000;
00264 }
00265 else if (rlength <= 5000000 && rlength > 500000) {
00266
00267 return 1000;
00268 }
00269 else if (rlength <= 500000 && rlength > 50000) {
00270
00271 return 100;
00272 }
00273 else if (rlength <= 50000 && rlength > 5000) {
00274
00275 return 10;
00276 }
00277 else {
00278
00279 return 1;
00280 }
00281 }
00282
00283 int getChrnum (char *chrname)
00284 {
00285 if (!strcmp (chrname, "X")) {
00286 return 23;
00287 }
00288 else if (!strcmp (chrname, "Y")) {
00289 return 24;
00290 }
00291 else {
00292 return atoi (chrname);
00293 }
00294 }
00295
00296
00297 int chrsize(int chromosome)
00298 {
00299 switch (chromosome) {
00300 case 1:
00301 return 247249719;
00302 break;
00303 case 2:
00304 return 242951149;
00305 break;
00306 case 3:
00307 return 199501827;
00308 break;
00309 case 4:
00310 return 191273063;
00311 break;
00312 case 5:
00313 return 180857866;
00314 break;
00315 case 6:
00316 return 170899992;
00317 break;
00318 case 7:
00319 return 158821424;
00320 break;
00321 case 8:
00322 return 146274826;
00323 break;
00324 case 9:
00325 return 140273252;
00326 break;
00327 case 10:
00328 return 135374737;
00329 break;
00330 case 11:
00331 return 134452384;
00332 break;
00333 case 12:
00334 return 132349534;
00335 break;
00336 case 13:
00337 return 114142980;
00338 break;
00339 case 14:
00340 return 106368585;
00341 break;
00342 case 15:
00343 return 100338915;
00344 break;
00345 case 16:
00346 return 88827254;
00347 break;
00348 case 17:
00349 return 78774742;
00350 break;
00351 case 18:
00352 return 76117153;
00353 break;
00354 case 19:
00355 return 63811651;
00356 break;
00357 case 20:
00358 return 62435964;
00359 break;
00360 case 21:
00361 return 46944323;
00362 break;
00363 case 22:
00364 return 49691432;
00365 break;
00366 case 23:
00367 return 154913754;
00368 break;
00369 case 24:
00370 return 57772954;
00371 break;
00372 default:
00373 return 0;
00374 break;
00375 }
00376 }
00377
00378 int putsn (int ntabs, const char *str)
00379 {
00380 int i;
00381 for (i = 0; i < ntabs; i++)
00382 putchar ('\t');
00383 return puts (str);
00384 }
00385
00386
00387 char *striappend(char *str, int value, int base)
00388 {
00389 char *tmp = (char *) malloc((ILEN) * sizeof(*tmp));
00390 char *new;
00391
00392 tmp = itoa(value, tmp, base);
00393 new = (char *) malloc((strlen(tmp) + strlen(str) + 1) * sizeof(*new));
00394
00395 strcpy(new, str);
00396 strcat(new, tmp);
00397
00398 free(tmp);
00399 return new;
00400 }
00401
00402 char *strappend(char *destination, const char *source)
00403 {
00404 destination = (char *) realloc(destination,
00405 (strlen(destination) + strlen(source) + 1)*sizeof(*destination));
00406
00407 strcat(destination, source);
00408 return destination;
00409 }
00410
00411 char *itoa (int value, char *str, int base)
00412 {
00413 int i = 0;
00414 int sign;
00415
00416 if ((sign = value) < 0)
00417 value *= -1;
00418
00419 do {
00420 str[i] = value % 10 + '0';
00421 i++;
00422 } while ((value /= 10) > 0);
00423
00424 if (sign < 0) {
00425 str[i] = '\0';
00426 i++;
00427 }
00428
00429 str[i] = '\0';
00430 reverse(str);
00431 return str;
00432 }
00433
00434 int antoi(const char *str, size_t n)
00435 {
00436 char *tmp = (char *) malloc((n+1) * sizeof(*str));
00437 int i;
00438
00439 tmp = strncpy(tmp, str, n);
00440 tmp[n] = '\0';
00441 i = atoi(tmp);
00442 free(tmp);
00443 return i;
00444 }
00445
00446 void reverse(char *str)
00447 {
00448 char c;
00449 int i, j;
00450 for (i = 0, j = strlen(str)-1; i < j; i++, j--) {
00451 c = str[j];
00452 str[j] = str[i];
00453 str[i] = c;
00454 }
00455 }