table of contents
| Genome::Model::Tools::Music::Smg(3pm) | User Contributed Perl Documentation | Genome::Model::Tools::Music::Smg(3pm) |
EOS
); }
sub _doc_authors {
return <<EOS
Qunyuan Zhang, Ph.D.
Cyriac Kandoth, Ph.D.
Nathan D. Dees, Ph.D. EOS }
sub execute {
my $self = shift;
my $gene_mr_file =
$self->gene_mr_file;
my $output_file =
$self->output_file;
my $output_file_detailed =
$output_file . "_detailed";
my $max_fdr =
$self->max_fdr;
my $skip_low_mr_genes =
$self->skip_low_mr_genes;
my $bmr_modifier_file =
$self->bmr_modifier_file;
my $processors =
$self->processors;
# Check on all the input data before starting work
print STDERR "Gene mutation rate file not found or is empty: $gene_mr_file\n" unless( -s $gene_mr_file );
print STDERR "BMR modifier file not found or is empty: $bmr_modifier_file\n" unless( !defined $bmr_modifier_file || -s $bmr_modifier_file );
return undef unless( -s $gene_mr_file && ( !defined $bmr_modifier_file || -s $bmr_modifier_file ));
# If BMR modifiers were provided, then load them, and create another gene_mr_file with modified BMRs
if( defined $bmr_modifier_file ) {
my $inBmrModFh = IO::File->new( $bmr_modifier_file ) or die "Couldn't open $bmr_modifier_file. $!\n";
my %bmr_modifier = ();
while( my $line = $inBmrModFh->getline ) {
next if( $line =~ m/^#/ );
chomp( $line );
my ( $gene, $modifier ) = split( /\t/, $line );
( $modifier > 0 ) or die "$modifier is an invalid bmr-modifier. Please fix values in $bmr_modifier_file.\n";
$bmr_modifier{$gene} = $modifier;
}
$inBmrModFh->close;
my $new_gene_mr_file = Genome::Sys->create_temp_file_path;
( $new_gene_mr_file ) or die "Couldn't create a temp file. $!";
my $inMrFh = IO::File->new( $gene_mr_file ) or die "Couldn't open $gene_mr_file. $!\n";
my $outMrFh = IO::File->new( $new_gene_mr_file, ">" ) or die "Couldn't open $new_gene_mr_file. $!\n";
while( my $line = $inMrFh->getline ) {
if( $line =~ m/^#/ ) {
$outMrFh->print( $line );
next;
}
chomp( $line );
my ( $gene, $type, $covd_bps, $mut_cnt, $bmr ) = split( /\t/, $line );
$bmr = $bmr * $bmr_modifier{$gene} if( defined $bmr_modifier{$gene} );
$outMrFh->print( "$gene\t$type\t$covd_bps\t$mut_cnt\t$bmr\n" );
}
$outMrFh->close;
$inMrFh->close;
$gene_mr_file = $new_gene_mr_file;
}
# Collect per-gene mutation rates for reporting in results later
my ( %gene_muts, %gene_bps, %mut_classes_hash );
my $inMrFh = IO::File->new( $gene_mr_file ) or die "Couldn't open $gene_mr_file. $!\n";
while( my $line = $inMrFh->getline ) {
next if( $line =~ m/^#/ );
my ( $gene, $type, $covd_bps, $mut_cnt, undef ) = split( /\t/, $line );
# Warn user about cases where there could be fewer covered bps than mutations detected
( $mut_cnt <= $covd_bps ) or warn "More $type seen in $gene than there are bps with sufficient coverage!\n";
if( $type eq "Overall" or $type eq "Indels" or $type eq "Truncations" ) {
$gene_muts{$gene}{$type} = $mut_cnt;
$gene_bps{$gene} = $covd_bps;
$mut_classes_hash{$type} = 1 unless( $type eq "Overall" );
}
elsif( $type =~ m/(Transitions|Transversions)$/ ) {
$gene_muts{$gene}{SNVs} += $mut_cnt;
$mut_classes_hash{SNVs} = 1;
}
else {
die "Unrecognized mutation class in gene-mr-file. $!\n";
}
}
$inMrFh->close;
my @mut_classes = sort keys %mut_classes_hash;
# Create a temporary intermediate file to hold the p-values
my $pval_file = Genome::Sys->create_temp_file_path;
( $pval_file ) or die "Couldn't create a temp file. $!";
# Call R for Fisher combined test, Likelihood ratio test, and convolution test on each gene
my $smg_cmd = "R --slave --args < " . __FILE__ . ".R $gene_mr_file $pval_file smg_test $processors $skip_low_mr_genes";
WIFEXITED( system $smg_cmd ) or croak "Couldn't run: $smg_cmd ($?)";
# Call R for calculating FDR on the p-values calculated in the SMG test
my $fdr_cmd = "R --slave --args < " . __FILE__ . ".R $pval_file $output_file_detailed calc_fdr $processors $skip_low_mr_genes";
WIFEXITED( system $fdr_cmd ) or croak "Couldn't run: $fdr_cmd ($?)";
# Parse the R output to identify the SMGs (significant by at least 2 of 3 tests)
my $smgFh = IO::File->new( $output_file_detailed ) or die "Couldn't open $output_file_detailed. $!\n";
my ( @newLines, @smgLines );
my $header = "#Gene\t" . join( "\t", @mut_classes );
$header .= "\tTot Muts\tCovd Bps\tMuts pMbp\tP-value FCPT\tP-value LRT\tP-value CT\tFDR FCPT\tFDR LRT\tFDR CT\n";
while( my $line = $smgFh->getline ) {
chomp( $line );
if( $line =~ m/^Gene\tp.fisher\tp.lr\tp.convol\tfdr.fisher\tfdr.lr\tfdr.convol$/ ) {
push( @newLines, $header );
push( @smgLines, $header );
}
else {
my ( $gene, @pq_vals ) = split( /\t/, $line );
my ( $p_fcpt, $p_lrt, $p_ct, $q_fcpt, $q_lrt, $q_ct ) = @pq_vals;
my @mut_cnts;
foreach( @mut_classes ) {
# If a mutation count is a fraction, round down the digits after the decimal point
push( @mut_cnts, (( $gene_muts{$gene}{$_} =~ m/\./ ) ? sprintf( "%.2f", $gene_muts{$gene}{$_} ) : $gene_muts{$gene}{$_} ));
}
my $mut_per_mbp = ( $gene_bps{$gene} ? sprintf( "%.2f", ( $gene_muts{$gene}{Overall} / $gene_bps{$gene} * 1000000 )) : 0 );
push( @newLines, join( "\t", $gene, @mut_cnts, $gene_muts{$gene}{Overall}, $gene_bps{$gene}, $mut_per_mbp, @pq_vals ) . "\n" );
# If the FDR of at least two of these tests is less than the maximum allowed, we consider it an SMG
if(( $q_fcpt <= $max_fdr && $q_lrt <= $max_fdr ) || ( $q_fcpt <= $max_fdr && $q_ct <= $max_fdr ) ||
( $q_lrt <= $max_fdr && $q_ct <= $max_fdr )) {
push( @smgLines, join( "\t", $gene, @mut_cnts, $gene_muts{$gene}{Overall}, $gene_bps{$gene}, $mut_per_mbp, @pq_vals ) . "\n" );
}
}
}
$smgFh->close;
# Add per-gene SNV and Indel counts to the detailed R output, and make the header friendlier
my $outDetFh = IO::File->new( $output_file_detailed, ">" ) or die "Couldn't open $output_file_detailed. $!\n";
$outDetFh->print( @newLines );
$outDetFh->close;
# Do the same for only the genes that we consider SMGs
my $outFh = IO::File->new( $output_file, ">" ) or die "Couldn't open $output_file. $!\n";
$outFh->print( @smgLines );
$outFh->close;
return 1;
}
1;
| 2020-11-06 | perl v5.30.3 |