#include "arraystats.h" #include "io.h" #include #include #include #include /***************************************************************************************************/ /** read_data.c **********************/ /** read paired sample data file **********************/ /***************************************************************************************************/ int Nheader; /* Number of lines in header */ /* declared here since it will be used by output routine write_llr */ void read_data( FILE *fpin ){ #define MAXGENES 50000 /* maximum no. genes for temporary storage */ #define MAXREPS 100 /* maxiumum no. replicates for temporary storage */ #define Mkeep 1 /* keep only genes with replicates Mkeep or greater */ #define MAXLENGTH 30 /* maximum length of gene name strings */ int i, j, k, l, Norig, col_count; char line[1000000], word[1000], save_line[1000000]; float **Xtemp, **Ytemp; char **yorf_temp, **gene_name_temp; int mstore[MAXGENES], internal2full_temp[MAXGENES] ; int Nkeep=0, mtemp=0; int N_col, Xcol[MAXREPS], Ycol[MAXREPS], Fcol[MAXREPS], index_to_find; int file_position, line_length, nXcols, nYcols, nFcols, npairs; int n_header_col, n_data_col; char index_to_find_string[MAXREPS], full_string[MAXREPS]; char yorf_word[MAXLENGTH], gene_name_word[MAXLENGTH], *token; /* allocate temporary memory */ Xtemp = (float **) calloc( MAXGENES ,sizeof(float*)); Ytemp = (float **) calloc( MAXGENES ,sizeof(float*)); yorf_temp = (char **) calloc( MAXGENES ,sizeof(char*)); gene_name_temp = (char **) calloc( MAXGENES ,sizeof(char*)); for ( i=0 ; i string was not found */ break; } } if ( file_position == line_length && index_to_find == 0 ){ printf("Error: No X0 column in data file\n"); exit(1); } rewind(fpin); file_position = ftell(fpin ); /*************************** find Y fields **********************************/ nYcols=0; for( index_to_find=0 ; index_to_find string was not found */ break; } } if ( file_position == line_length && index_to_find == 0 ){ printf("Error: No Y0 column in data file\n"); exit(1); } rewind(fpin); file_position = ftell(fpin ); if( nXcols != nYcols ){ printf("Error: Numbers of X columns(%d) and Y columns (%d) don't agree\n", nXcols, nYcols ); exit(1); } npairs = nXcols; /******************************* Look for flag, F, fields *****************************************************/ nFcols=0; for( index_to_find=0 ; index_to_find string was not found */ break; } } if ( file_position == line_length && index_to_find == 0 ){ /* did not find F0 string */ nFcols = -1; /* denotes that there are no qualitity flags on the replicates */ } rewind(fpin); file_position = ftell(fpin ); if( (nXcols != nFcols) && (nFcols != -1 )){ printf("Error: Numbers of data pairs (%d) and F columns (%d) don't agree\n", npairs, nFcols ); exit(1); } /*** Print out summary on column detection */ printf("Maximum %d (X,Y) pairs from column headings: ", npairs); for ( i=0 ; i= Mkeep ){ strcpy( yorf_temp[Nkeep], yorf_word ); strcpy( gene_name_temp[Nkeep], gene_name_word ); full2internal[i] = Nkeep; internal2full_temp[Nkeep] = i; mstore[Nkeep] = mtemp ; Nkeep++ ; /* increment kept gene counter */ } else { /* store excluded genes as -1 in indexing array */ full2internal[i] = -1; } } else if ( mtemp >= Mkeep && N_col != -1 ){ /* if N column is used for true no of samples and number of samples >= no required */ mstore[Nkeep] = mtemp; for ( j=0 ; j = 2 ){ t_x = factor * sigma_delta_x / sqrt( (float)(m[internal_index] - 1) ) ; t_y = factor * sigma_delta_y / sqrt( (float)(m[internal_index] - 1) ) ; } else { /* there is one sample */ t_x = factor * sigma_delta_x ; t_y = factor * sigma_delta_y ; } t_x = sqrt( (t_x * t_x + t_y * t_y)/2. ) ; t_y = t_x ; log_ratio_displayed = tempered_log_ratio ( muX[internal_index], muY[internal_index], t_x, t_y , &return_flag ); if ( return_flag == 'T' ) tempered_count++ ; fprintf(fpout, "%10.4f %c\n", log_ratio_displayed, return_flag ) ; } } } max_threshold = factor * ( sqrt(sigma_delta_x*sigma_delta_x + sigma_delta_y*sigma_delta_y) ) / sqrt(2.*3.) ; /* printf("4 sample threshold %f\n", max_threshold ); */ printf("For display in output file, %d log ratios were tempered\n\n", tempered_count); fclose(fpin);fclose(fpout); } /**************************************************************************************************/ /* tempered_log_ratio ***********************/ /**************************************************************************************************/ float tempered_log_ratio ( float x, float y, float xT, float yT , char *flag ){ /* input variable requirement: (x,y,xT,yT) >= 0 */ float minT, xtemp, ytemp, lr_straight, lr_threshold; x += 1.e-10 ; /* to avoid division by zero */ y += 1.e-10 ; if ( xT > 0. && yT > 0. ){ minT = fabs( log10 ( xT / yT ) ) ; } else { minT = 0. ; /* in this case, the straight log ratio will always be returned */ } /* determine value of threshold function at x, y */ xtemp = x ; ytemp = y ; if ( x < xT ) xtemp = xT ; if ( y < yT ) ytemp = yT ; lr_straight = log10 ( x / y ) ; lr_threshold = fabs( log10( xtemp/ytemp ) ) ; if ( lr_threshold < minT ) lr_threshold = minT ; /* printf ( "%f %f %f %f %f %f %f \n \n", minT, xT, yT, x, y, lr_threshold, lr_straight ); */ /* If straight log ratio is more extreme than threshold function value, replace with threshold function value */ /* return flag T to indicate that log ratio has been tempered */ if ( x >= y && lr_straight > lr_threshold ) { *flag = 'T'; return lr_threshold ; } else if ( x >= y && lr_straight <= lr_threshold ) { *flag = '-' ; return lr_straight ; } else if ( x < y && lr_straight < -lr_threshold ) { *flag = 'T' ; return -lr_threshold ; } else if ( x < y && lr_straight >= -lr_threshold ) { *flag = '-' ; return lr_straight ; } else { printf ( "Error: No conditions were met\n"); exit(1); } } /***************************************************************************************************/ /** read_mod.c **********************/ /** read error model **********************/ /***************************************************************************************************/ void read_mod( FILE *fp){ int i; char line[10000]; char word[100]; float dummy; /* skip line */ fgets( line, sizeof(line), fp ); /* read into global covariance */ for (i=1 ; i<=NDIM ;i++ ){ fscanf(fp," %f ", &dummy); cov[i]=(double)dummy; } /* takes care of a rare problem: when model has no variation in */ /* x direction and single gene values have only variation in this */ /* direction and are low we end up outside the allowable search region */ if ( cov[4] == 0 ) cov[4] = 0.0001 ; if ( cov[5] == 0 ) cov[5] = 0.0001 ; /* skip linefeed and line */ fgets( line, sizeof(line), fp ); fscanf(fp, "%s", word ); if ( strcmp( word, "gene" ) == 0 ){ /* found mus in the model file */ fgets( line, sizeof(line), fp ); /* ignore remainder of line */ for (i=0 ; i