#!/usr/bin/env perl
# PODNAME: prune-tree.pl
# ABSTRACT: Prune tips from TREE files based on id lists

use Modern::Perl '2011';
use autodie;

use File::Basename;
use Getopt::Euclid qw(:vars);
use Smart::Comments;
use Try::Tiny;

use Bio::MUST::Core;
use Bio::MUST::Core::Utils qw(change_suffix insert_suffix secure_outfile);
use aliased 'Bio::MUST::Core::IdList';
use aliased 'Bio::MUST::Core::Tree';


my $method = $ARGV_from_must ? 'load_lis' : 'load';

IDL:
for my $infile (@ARGV_infiles) {

    ### Processing: $infile
    my $list = IdList->$method($infile);
    $infile =~ s/$_//xms for @ARGV_in_strip;

    # load Tree (either reference tree or tree associated to idl)
    my $trefile = $ARGV_ref_tree // change_suffix($infile, '.tre');

    my $tree;
    try   { $tree = Tree->load($trefile) }
    catch { warn "Warning: cannot load '$trefile' as a Tree; skipping!\n" };
    next IDL unless $tree;
    ### Pruning tips in: $trefile

    # optionally negate list
    $list = $list->negative_list($tree) if $ARGV_negate_list;

    # prune tree
    $tree->tree->keep_tips( [ map { $_->foreign_id } $list->all_seq_ids ] );

    # create suffix named after filename
    my $outfile = $trefile;
    if ($ARGV_ref_tree) {
        my ($filename) = fileparse($infile, qr{\.[^.]*}xms);
        $outfile = insert_suffix($outfile, "-$filename");
    }
    $outfile = secure_outfile($outfile, $ARGV_out_suffix);
    ### Output tree in: $outfile
    $tree->store($outfile);
}

# TODO: generalize the --ref-tree approach to other programs as this allows
# a dual use (multiple infiles with or without explicit derived file)

__END__

=pod

=head1 NAME

prune-tree.pl - Prune tips from TREE files based on id lists

=head1 VERSION

version 0.212670

=head1 USAGE

    prune-tree.pl <infiles> [optional arguments]

=head1 REQUIRED ARGUMENTS

=over

=item <infiles>

Path to input IDL files [repeatable argument].

=for Euclid: infiles.type: readable
    repeatable

=back

=head1 OPTIONAL ARGUMENTS

=over

=item --in[-strip]=<str>

Substring(s) to strip from infile basenames before attempting to derive other
infile (e.g., TREE files) and outfile names [default: none].

=for Euclid: str.type: string
    repeatable

=item --out[-suffix]=<suffix>

Suffix to append to (possibly stripped) infile basenames for deriving
outfile names [default: none]. When not specified, outfile names are taken
from infiles but original infiles are preserved by being appended a .bak
suffix.

=for Euclid: suffix.type: string

=item --ref-tree=<file>

Path to an optional TREE file (e.g., a supermatrix tree) that will be the
target of the pruning operations [default: none]. Otherwise, each IDL file is
assumed to match a similarly named TREE (.tre) file.

=for Euclid: file.type: readable

=item --from-must

Consider the input file as generated by ed/treeplot [default: no]. Currently,
this switches to the legacy .lis format (instead of the modern .idl format).

=item --negate-list

Interpret the list as a negative list instead of a positive list [default:
no]. This means that seqs corresponding to listed ids are discarded.

=item --version

=item --usage

=item --help

=item --man

Print the usual program information

=back

=head1 AUTHOR

Denis BAURAIN <denis.baurain@uliege.be>

=head1 COPYRIGHT AND LICENSE

This software is copyright (c) 2013 by University of Liege / Unit of Eukaryotic Phylogenomics / Denis BAURAIN.

This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.

=cut
