table of contents
Genome::Model::Tools::Music::Smg(3pm) | User Contributed Perl Documentation | Genome::Model::Tools::Music::Smg(3pm) |
EOS
); }
sub _doc_authors {
return <<EOS
Qunyuan Zhang, Ph.D.
Cyriac Kandoth, Ph.D.
Nathan D. Dees, Ph.D. EOS }
sub execute {
my $self = shift;
my $gene_mr_file =
$self->gene_mr_file;
my $output_file =
$self->output_file;
my $output_file_detailed =
$output_file . "_detailed";
my $max_fdr =
$self->max_fdr;
my $skip_low_mr_genes =
$self->skip_low_mr_genes;
my $bmr_modifier_file =
$self->bmr_modifier_file;
my $processors =
$self->processors;
# Check on all the input data before starting work print STDERR "Gene mutation rate file not found or is empty: $gene_mr_file\n" unless( -s $gene_mr_file ); print STDERR "BMR modifier file not found or is empty: $bmr_modifier_file\n" unless( !defined $bmr_modifier_file || -s $bmr_modifier_file ); return undef unless( -s $gene_mr_file && ( !defined $bmr_modifier_file || -s $bmr_modifier_file )); # If BMR modifiers were provided, then load them, and create another gene_mr_file with modified BMRs if( defined $bmr_modifier_file ) { my $inBmrModFh = IO::File->new( $bmr_modifier_file ) or die "Couldn't open $bmr_modifier_file. $!\n"; my %bmr_modifier = (); while( my $line = $inBmrModFh->getline ) { next if( $line =~ m/^#/ ); chomp( $line ); my ( $gene, $modifier ) = split( /\t/, $line ); ( $modifier > 0 ) or die "$modifier is an invalid bmr-modifier. Please fix values in $bmr_modifier_file.\n"; $bmr_modifier{$gene} = $modifier; } $inBmrModFh->close; my $new_gene_mr_file = Genome::Sys->create_temp_file_path; ( $new_gene_mr_file ) or die "Couldn't create a temp file. $!"; my $inMrFh = IO::File->new( $gene_mr_file ) or die "Couldn't open $gene_mr_file. $!\n"; my $outMrFh = IO::File->new( $new_gene_mr_file, ">" ) or die "Couldn't open $new_gene_mr_file. $!\n"; while( my $line = $inMrFh->getline ) { if( $line =~ m/^#/ ) { $outMrFh->print( $line ); next; } chomp( $line ); my ( $gene, $type, $covd_bps, $mut_cnt, $bmr ) = split( /\t/, $line ); $bmr = $bmr * $bmr_modifier{$gene} if( defined $bmr_modifier{$gene} ); $outMrFh->print( "$gene\t$type\t$covd_bps\t$mut_cnt\t$bmr\n" ); } $outMrFh->close; $inMrFh->close; $gene_mr_file = $new_gene_mr_file; } # Collect per-gene mutation rates for reporting in results later my ( %gene_muts, %gene_bps, %mut_classes_hash ); my $inMrFh = IO::File->new( $gene_mr_file ) or die "Couldn't open $gene_mr_file. $!\n"; while( my $line = $inMrFh->getline ) { next if( $line =~ m/^#/ ); my ( $gene, $type, $covd_bps, $mut_cnt, undef ) = split( /\t/, $line ); # Warn user about cases where there could be fewer covered bps than mutations detected ( $mut_cnt <= $covd_bps ) or warn "More $type seen in $gene than there are bps with sufficient coverage!\n"; if( $type eq "Overall" or $type eq "Indels" or $type eq "Truncations" ) { $gene_muts{$gene}{$type} = $mut_cnt; $gene_bps{$gene} = $covd_bps; $mut_classes_hash{$type} = 1 unless( $type eq "Overall" ); } elsif( $type =~ m/(Transitions|Transversions)$/ ) { $gene_muts{$gene}{SNVs} += $mut_cnt; $mut_classes_hash{SNVs} = 1; } else { die "Unrecognized mutation class in gene-mr-file. $!\n"; } } $inMrFh->close; my @mut_classes = sort keys %mut_classes_hash; # Create a temporary intermediate file to hold the p-values my $pval_file = Genome::Sys->create_temp_file_path; ( $pval_file ) or die "Couldn't create a temp file. $!"; # Call R for Fisher combined test, Likelihood ratio test, and convolution test on each gene my $smg_cmd = "R --slave --args < " . __FILE__ . ".R $gene_mr_file $pval_file smg_test $processors $skip_low_mr_genes"; WIFEXITED( system $smg_cmd ) or croak "Couldn't run: $smg_cmd ($?)"; # Call R for calculating FDR on the p-values calculated in the SMG test my $fdr_cmd = "R --slave --args < " . __FILE__ . ".R $pval_file $output_file_detailed calc_fdr $processors $skip_low_mr_genes"; WIFEXITED( system $fdr_cmd ) or croak "Couldn't run: $fdr_cmd ($?)"; # Parse the R output to identify the SMGs (significant by at least 2 of 3 tests) my $smgFh = IO::File->new( $output_file_detailed ) or die "Couldn't open $output_file_detailed. $!\n"; my ( @newLines, @smgLines ); my $header = "#Gene\t" . join( "\t", @mut_classes ); $header .= "\tTot Muts\tCovd Bps\tMuts pMbp\tP-value FCPT\tP-value LRT\tP-value CT\tFDR FCPT\tFDR LRT\tFDR CT\n"; while( my $line = $smgFh->getline ) { chomp( $line ); if( $line =~ m/^Gene\tp.fisher\tp.lr\tp.convol\tfdr.fisher\tfdr.lr\tfdr.convol$/ ) { push( @newLines, $header ); push( @smgLines, $header ); } else { my ( $gene, @pq_vals ) = split( /\t/, $line ); my ( $p_fcpt, $p_lrt, $p_ct, $q_fcpt, $q_lrt, $q_ct ) = @pq_vals; my @mut_cnts; foreach( @mut_classes ) { # If a mutation count is a fraction, round down the digits after the decimal point push( @mut_cnts, (( $gene_muts{$gene}{$_} =~ m/\./ ) ? sprintf( "%.2f", $gene_muts{$gene}{$_} ) : $gene_muts{$gene}{$_} )); } my $mut_per_mbp = ( $gene_bps{$gene} ? sprintf( "%.2f", ( $gene_muts{$gene}{Overall} / $gene_bps{$gene} * 1000000 )) : 0 ); push( @newLines, join( "\t", $gene, @mut_cnts, $gene_muts{$gene}{Overall}, $gene_bps{$gene}, $mut_per_mbp, @pq_vals ) . "\n" ); # If the FDR of at least two of these tests is less than the maximum allowed, we consider it an SMG if(( $q_fcpt <= $max_fdr && $q_lrt <= $max_fdr ) || ( $q_fcpt <= $max_fdr && $q_ct <= $max_fdr ) || ( $q_lrt <= $max_fdr && $q_ct <= $max_fdr )) { push( @smgLines, join( "\t", $gene, @mut_cnts, $gene_muts{$gene}{Overall}, $gene_bps{$gene}, $mut_per_mbp, @pq_vals ) . "\n" ); } } } $smgFh->close; # Add per-gene SNV and Indel counts to the detailed R output, and make the header friendlier my $outDetFh = IO::File->new( $output_file_detailed, ">" ) or die "Couldn't open $output_file_detailed. $!\n"; $outDetFh->print( @newLines ); $outDetFh->close; # Do the same for only the genes that we consider SMGs my $outFh = IO::File->new( $output_file, ">" ) or die "Couldn't open $output_file. $!\n"; $outFh->print( @smgLines ); $outFh->close; return 1; }
1;
2020-11-06 | perl v5.30.3 |