#!/usr/bin/env python
"""Benchmark several different operations."""

from time import time
from contextlib import contextmanager
try:
    from cStringIO import StringIO
    BytesIO = StringIO
except ImportError:
    # Python 3
    from io import StringIO, BytesIO

from Bio import Phylo


# --- Timer mini-framework -----------------------------------

def time_once(func_manager):
    """Time a single run."""
    with func_manager() as func:
        t_start = time()
        func()
        t_end = time()
    return t_end - t_start


def median_time(func_manager):
    """Take the median of multiple timed runs."""
    N = 101
    durations = []
    for _i in range(N):
        durations.append(time_once(func_manager))
    durations.sort()
    median = durations[N//2]
    return median


def handle_manager(func, fname, fmt):
    """Manage a file handle in memory, to avoid re-reading from disk.

    Returns a context manager that manages the actual function, resetting the
    handle after each use.
    """
    with open(fname) as file_handle:
        mem_handle = StringIO(file_handle.read())
    def do_func():
        func(mem_handle, fmt)
    @contextmanager
    def handle_manager():
        try:
            yield do_func
        finally:
            mem_handle.seek(0)
    return handle_manager


def tree_manager(func, fname, fmt):
    """Regenerate fresh copies of a tree from an in-memory string.

    Returns a context manager that managers the actual function, regenerating a
    new tree before each use.
    """
    with open(fname) as file_handle:
        mem_handle = StringIO(file_handle.read())
    @contextmanager
    def handle_manager():
        mem_handle.seek(0)
        tree = Phylo.read(mem_handle, fmt)
        def do_func():
            func(tree)
        yield do_func
    return handle_manager


# --- Benchmark operations -----------------------------------

def parse_many(handle, fmt):
    """Parse a Newick file containing many trees."""
    for tree in Phylo.parse(handle, fmt):
        pass

def read_big(handle, fmt):
    """Read a file containing a single, large tree."""
    Phylo.read(handle, fmt)

def write_big(tree):
    """Write a single, large tree to file."""
    Phylo.write(tree, StringIO(), 'newick')

def write_big_xml(tree):
    """Write a single, large tree as PhyloXML."""
    Phylo.write(tree, BytesIO(), 'phyloxml')


def reroot_tree(tree):
    """Reroot a tree at every node."""
    for node in list(tree.find_clades()):
        tree.root_with_outgroup(node)

def collapse_all_lt50(tree):
    """Collapse all clades with bootstrap values < 50%."""
    def is_weak_branch(clade):
        if clade.confidence is not None and clade.confidence < 50:
            return True
        return False
    for node in tree.get_nonterminals():
        if node == tree.root:
            continue
        if tree.find_any(node) and is_weak_branch(node):
            tree.collapse(node)

def total_branch_length(tree):
    """Sum all branch lengths in the tree."""
    tree.total_branch_length()

def ladderize(tree):
    tree.ladderize()

def count_terminals(tree):
    tree.count_terminals()


# --- Main script --------------------------------------------

# Data files

# From: http://github.com/camwebb/tree-of-trees/
# https://raw.github.com/camwebb/tree-of-trees/master/megatrees_other/davies2004.bl.new
EX_MEDIUM = 'davies2004.bl.new'  # 440 terminals

# Davies 2004 copies rerooted at each node
EX_MANY = 'davies-reroot.bl.nwk'    # 816 trees
# ENH - get a Newick file of 1000 bootstrap trees

# From: http://www.evoio.org/wiki/PhylotasticUseCases#Big_Trees
# http://www.evoio.org/wg/evoio/images/3/37/Smith_2011_angiosperms.txt
EX_BIG = 'Smith_2011_angiosperms.txt'    # 55473 terminals

# From phyloxml.org 
# converted from EX_BIG with phylo_converter
EX_BIG_XML = 'Smith_2011_angiosperms.xml'   # 55473

# Wrappers for running the benchmark operations
benchmarks = (
    ("read_big", handle_manager(read_big, EX_BIG, 'newick')),
    ("read_big_xml", handle_manager(read_big, EX_BIG_XML, 'phyloxml')),
    ("write_big", tree_manager(write_big, EX_BIG, 'newick')),
    ("write_big_xml", tree_manager(write_big_xml, EX_BIG, 'newick')),
    ("read_medium", handle_manager(read_big, EX_MEDIUM, 'newick')),
    ("parse_many", handle_manager(parse_many, EX_MANY, 'newick')),
    ("reroot_tree", tree_manager(reroot_tree, EX_MEDIUM, 'newick')),
    ("collapse_all_lt50", tree_manager(collapse_all_lt50, EX_MEDIUM, 'newick')),
    ("total_branch_length", tree_manager(total_branch_length, EX_MEDIUM, 'newick')),
    ("ladderize", tree_manager(ladderize, EX_MEDIUM, 'newick')),
    ("count_terminals", tree_manager(count_terminals, EX_MEDIUM, 'newick')),
)

for bm_name, bm_runner in benchmarks:
    bm_time = median_time(bm_runner)
    print(bm_name.ljust(22) + str(bm_time))

