<?xml version='1.0'?>
<!DOCTYPE art SYSTEM 'http://www.biomedcentral.com/xml/article.dtd'>
<art>
   <ui>1471-2105-4-41</ui>
   <ji>1471-2105</ji>
   <fm>
      <dochead>Research article</dochead>
      <bibl>
         <title>
            <p>The COG database: an updated version includes eukaryotes</p>
         </title>
         <aug>
            <au id="A1" ca="yes">
               <snm>Tatusov</snm>
               <mi>L</mi>
               <fnm>Roman</fnm>
               <insr iid="I1"/>
               <email>tatusov@ncbi.nlm.nih.gov</email>
            </au>
            <au id="A2">
               <snm>Fedorova</snm>
               <mi>D</mi>
               <fnm>Natalie</fnm>
               <insr iid="I1"/>
               <email>fedorova@ncbi.nlm.nih.gov</email>
            </au>
            <au id="A3">
               <snm>Jackson</snm>
               <mi>D</mi>
               <fnm>John</fnm>
               <insr iid="I1"/>
               <email>jjackson@ncbi.nlm.nih.gov</email>
            </au>
            <au id="A4">
               <snm>Jacobs</snm>
               <mi>R</mi>
               <fnm>Aviva</fnm>
               <insr iid="I1"/>
               <email>jacobs@ncbi.nlm.nih.gov</email>
            </au>
            <au id="A5">
               <snm>Kiryutin</snm>
               <fnm>Boris</fnm>
               <insr iid="I1"/>
               <email>kiryutin@ncbi.nlm.nih.gov</email>
            </au>
            <au id="A6">
               <snm>Koonin</snm>
               <mi>V</mi>
               <fnm>Eugene</fnm>
               <insr iid="I1"/>
               <email>koonin@ncbi.nlm.nih.gov</email>
            </au>
            <au id="A7">
               <snm>Krylov</snm>
               <mi>M</mi>
               <fnm>Dmitri</fnm>
               <insr iid="I1"/>
               <email>krylov@ncbi.nlm.nih.gov</email>
            </au>
            <au id="A8">
               <snm>Mazumder</snm>
               <fnm>Raja</fnm>
               <insr iid="I2"/>
               <email>rm285@georgetown.edu</email>
            </au>
            <au id="A9">
               <snm>Mekhedov</snm>
               <mi>L</mi>
               <fnm>Sergei</fnm>
               <insr iid="I1"/>
               <email>mekhedov@ncbi.nlm.nih.gov</email>
            </au>
            <au id="A10">
               <snm>Nikolskaya</snm>
               <mi>N</mi>
               <fnm>Anastasia</fnm>
               <insr iid="I2"/>
               <email>ann2@georgetown.edu</email>
            </au>
            <au id="A11">
               <snm>Rao</snm>
               <fnm>B Sridhar</fnm>
               <insr iid="I1"/>
               <email>rao@ncbi.nlm.nih.gov</email>
            </au>
            <au id="A12">
               <snm>Smirnov</snm>
               <fnm>Sergei</fnm>
               <insr iid="I1"/>
               <email>smirnov@ncbi.nlm.nih.gov</email>
            </au>
            <au id="A13">
               <snm>Sverdlov</snm>
               <mi>V</mi>
               <fnm>Alexander</fnm>
               <insr iid="I1"/>
               <email>asverdlo@ncbi.nlm.nih.gov</email>
            </au>
            <au id="A14">
               <snm>Vasudevan</snm>
               <fnm>Sona</fnm>
               <insr iid="I1"/>
               <email>vasudeva@ncbi.nlm.nih.gov</email>
            </au>
            <au id="A15">
               <snm>Wolf</snm>
               <mi>I</mi>
               <fnm>Yuri</fnm>
               <insr iid="I1"/>
               <email>wolf@ncbi.nlm.nih.gov</email>
            </au>
            <au id="A16">
               <snm>Yin</snm>
               <mi>J</mi>
               <fnm>Jodie</fnm>
               <insr iid="I1"/>
               <email>yin@ncbi.nlm.nih.gov</email>
            </au>
            <au id="A17">
               <snm>Natale</snm>
               <mi>A</mi>
               <fnm>Darren</fnm>
               <insr iid="I2"/>
               <email>dan5@georgetown.edu</email>
            </au>
         </aug>
         <insg>
            <ins id="I1">
               <p>National Center for Biotechnology Information, National Library of Medicine, National Institutes of Health, Bethesda MD, USA</p>
            </ins>
            <ins id="I2">
               <p>Protein Information Resource, Georgetown University Medical Center, 3900 Reservoir Road, NW, Washington, DC 20007, USA</p>
            </ins>
         </insg>
         <source>BMC Bioinformatics</source>
         <issn>1471-2105</issn>
         <pubdate>2003</pubdate>
         <volume>4</volume>
         <issue>1</issue>
         <fpage>41</fpage>
         <url>http://www.biomedcentral.com/1471-2105/4/41</url>
         <xrefbib>
            <pubidlist>
               <pubid idtype="pmpid">12969510</pubid>
               <pubid idtype="doi">10.1186/1471-2105-4-41</pubid>
            </pubidlist>
         </xrefbib>
      </bibl>
      <history>
         <rec>
            <date>
               <day>20</day>
               <month>5</month>
               <year>2003</year>
            </date>
         </rec>
         <acc>
            <date>
               <day>11</day>
               <month>9</month>
               <year>2003</year>
            </date>
         </acc>
         <pub>
            <date>
               <day>11</day>
               <month>9</month>
               <year>2003</year>
            </date>
         </pub>
      </history>
      <cpyrt>
         <year>2003</year>
         <collab>Tatusov et al; licensee BioMed Central Ltd. This is an Open Access article: verbatim copying and redistribution of this article are permitted in all media for any purpose, provided this notice is preserved along with the article's original URL.</collab>
      </cpyrt>
      <abs>
         <sec>
            <st>
               <p>Abstract</p>
            </st>
            <sec>
               <st>
                  <p>Background</p>
               </st>
               <p>The availability of multiple, essentially complete genome sequences of prokaryotes and eukaryotes spurred both the demand and the opportunity for the construction of an evolutionary classification of genes from these genomes. Such a classification system based on orthologous relationships between genes appears to be a natural framework for comparative genomics and should facilitate both functional annotation of genomes and large-scale evolutionary studies.</p>
            </sec>
            <sec>
               <st>
                  <p>Results</p>
               </st>
               <p>We describe here a major update of the previously developed system for delineation of Clusters of Orthologous Groups of proteins (COGs) from the sequenced genomes of prokaryotes and unicellular eukaryotes and the construction of clusters of predicted orthologs for 7 eukaryotic genomes, which we named KOGs after eu<ul>k</ul>aryotic <ul>o</ul>rthologous <ul>g</ul>roups. The COG collection currently consists of 138,458 proteins, which form 4873 COGs and comprise 75% of the 185,505 (predicted) proteins encoded in 66 genomes of unicellular organisms. The eu<ul>k</ul>aryotic <ul>o</ul>rthologous <ul>g</ul>roups (KOGs) include proteins from 7 eukaryotic genomes: three animals (the nematode <it>Caenorhabditis elegans</it>, the fruit fly <it>Drosophila melanogaster </it>and <it>Homo sapiens</it>), one plant, <it>Arabidopsis thaliana</it>, two fungi (<it>Saccharomyces cerevisiae </it>and <it>Schizosaccharomyces pombe</it>), and the intracellular microsporidian parasite <it>Encephalitozoon cuniculi</it>. The current KOG set consists of 4852 clusters of orthologs, which include 59,838 proteins, or ~54% of the analyzed eukaryotic 110,655 gene products. Compared to the coverage of the prokaryotic genomes with COGs, a considerably smaller fraction of eukaryotic genes could be included into the KOGs; addition of new eukaryotic genomes is expected to result in substantial increase in the coverage of eukaryotic genomes with KOGs. Examination of the phyletic patterns of KOGs reveals a conserved core represented in all analyzed species and consisting of ~20% of the KOG set. This conserved portion of the KOG set is much greater than the ubiquitous portion of the COG set (~1% of the COGs). In part, this difference is probably due to the small number of included eukaryotic genomes, but it could also reflect the relative compactness of eukaryotes as a clade and the greater evolutionary stability of eukaryotic genomes.</p>
            </sec>
            <sec>
               <st>
                  <p>Conclusion</p>
               </st>
               <p>The updated collection of orthologous protein sets for prokaryotes and eukaryotes is expected to be a useful platform for functional annotation of newly sequenced genomes, including those of complex eukaryotes, and genome-wide evolutionary studies.</p>
            </sec>
         </sec>
      </abs>
   </fm>
   <bdy>
      <sec>
         <st>
            <p>Background</p>
         </st>
         <p>The rapid accumulation of genome sequences is a major challenge to researchers attempting to extract the maximum functional and evolutionary information from the new genomes. To avoid informational overflow from the constant influx of new genome sequences, a comprehensive evolutionary classification of the genes from all sequenced genomes is required. Such classifications are based on two fundamental notions from evolutionary biology: orthology and paralogy, which describe the two fundamentally different types of homologous relationships between genes <abbrgrp><abbr bid="B1">1</abbr><abbr bid="B2">2</abbr><abbr bid="B3">3</abbr><abbr bid="B4">4</abbr></abbrgrp>. Orthologs are homologous genes derived by vertical descent from a single ancestral gene in the last common ancestor of the compared species. Paralogs, in contrast, are homologous genes, which, at some stage of evolution of the respective gene family, have evolved by duplication of an ancestral gene. The notions of orthology and paralogy are intimately linked because, if a duplication (s) occurred after the speciation event that separated the compared species, orthology becomes a relationship between sets of paralogs (co-orthologs), rather than individual genes. A classic case of the interplay between orthologous and paralogous relationships is seen in the globin family: all animal globins, including myoglobin, are paralogs, but they are all co-orthologs of the plant leghemoglobin(s) <abbrgrp><abbr bid="B5">5</abbr></abbrgrp>.</p>
         <p>Deciphering orthologous and paralogous relationships among genes is critical for both the functional and the evolutionary aspects of comparative genomics <abbrgrp><abbr bid="B4">4</abbr><abbr bid="B5">5</abbr></abbrgrp>. Orthologs typically occupy the same functional niche in different species, whereas paralogs tend to evolve toward functional diversification. Therefore, robustness of genome annotation depends on accurate identification of orthologs. Similarly, knowing which homologous genes are orthologs and which are paralogs is required for constructing evolutionary scenarios involving, along with vertical inheritance, lineage-specific gene loss and horizontal gene transfer.</p>
         <p>In principle, identification of orthologs requires phylogenetic analysis of entire families of homologous proteins, which is expected to isolate orthologous protein sets in distinct clades <abbrgrp><abbr bid="B6">6</abbr><abbr bid="B7">7</abbr><abbr bid="B8">8</abbr></abbrgrp>. However, on the scale of complete genomes, such analysis is both extremely labor-intensive and error-prone due to the inherent artifacts of phylogenetic tree construction. Therefore shortcuts have been developed by introducing the notion of a genome-specific best hit (BeT). A BeT is the protein in a target genome, which is most similar to a given protein from the query genome <abbrgrp><abbr bid="B9">9</abbr><abbr bid="B10">10</abbr></abbrgrp>. The underlying premise is that orthologs are more similar to each other than they are to any other protein from the respective genomes. In multiple-genome comparisons, pairs of potential orthologs identified via BeTs can be joined to form clusters of orthologs represented in all or a subset of the analyzed genomes <abbrgrp><abbr bid="B9">9</abbr><abbr bid="B11">11</abbr></abbrgrp>. This approach to the identification of orthologous protein sets meets with two obvious complications. Firstly, many proteins belong to lineage-specific expansions, i.e., have evolved via duplication(s) after the divergence of the compared species <abbrgrp><abbr bid="B12">12</abbr><abbr bid="B13">13</abbr><abbr bid="B14">14</abbr></abbrgrp>. In these cases, deciphering (co)orthologous relationships can be a hard task and clusters of orthologs that include such expansions should be treated with particular caution. The second complication is caused by the fact that many proteins exist in multidomain forms encoded by a single gene in some species and as products of two or more stand-alone genes in others. In protein clustering, multidomain proteins may connect distinct clusters of orthologs resulting in artifactual lumping.</p>
         <p>The approach to the identification of orthologous protein sets based on clustering of consistent BeTs has been implemented in the collection of Clusters of Orthologous Groups (COGs) of proteins <abbrgrp><abbr bid="B9">9</abbr><abbr bid="B15">15</abbr></abbrgrp>. The COG construction protocol included an automatic procedure for detecting candidate sets of orthologs, manual splitting of multidomain proteins into the component domains, and subsequent manual curation and annotation. The COGs started with 6 prokaryotic genomes and one genome of a unicellular eukaryote, yeast <it>Saccharomyces cerevisiae </it><abbrgrp><abbr bid="B9">9</abbr></abbrgrp>. Subsequent updates increased the number of prokaryotic genomes in the COGs to 43 <abbrgrp><abbr bid="B15">15</abbr></abbrgrp>. The procedure for COG construction required that each COG included proteins from at least three sufficiently distant species. This conservative approach notwithstanding, ~60 to ~85% of the proteins encoded in prokaryotic genomes were included in the COGs.</p>
         <p>The COG system, which includes the COGNITOR program for adding new members to COGs (RLT, unpublished results), has become a widely used tool for computational genomics. The most important applications of the COGs are functional annotation of newly sequenced genomes <abbrgrp><abbr bid="B16">16</abbr><abbr bid="B17">17</abbr><abbr bid="B18">18</abbr><abbr bid="B19">19</abbr><abbr bid="B20">20</abbr></abbrgrp> and genome-wide evolutionary analyses <abbrgrp><abbr bid="B21">21</abbr><abbr bid="B22">22</abbr><abbr bid="B23">23</abbr><abbr bid="B24">24</abbr><abbr bid="B25">25</abbr></abbrgrp>.</p>
         <p>Here, we present a major update to the COGs, with over 63 sequenced prokaryotic genomes and three genomes of unicellular prokaryotes now included. Furthermore, the COG system is extended to complex, multicellular eukaryotes by constructing clusters of probable orthologs, which we named KOGs (eu<ul>k</ul>aryotic <ul>o</ul>rthologous <ul>g</ul>roups) for 7 sequenced genomes of animals, fungi, microsporidia, and plants.</p>
      </sec>
      <sec>
         <st>
            <p>Results and discussion</p>
         </st>
         <sec>
            <st>
               <p>Update of the COGs</p>
            </st>
            <p>To add a new species to the COG system, the annotated protein sequences from the respective genome were compared to the proteins in the COG database by using the BLAST program and assigned to pre-existing COGs by using the COGNITOR program (and see Materials and Methods). The genomes of prokaryotes and unicellular eukaryotes that have been sequenced since the latest update of the COGs were added one at a time. At each step, the proteins that remained unassigned after manual validation of the COGNITOR results were subject to the COG construction procedure in order to identify new COGs that could be formed thanks to the addition of the analyzed genome. The resulting COG assignments for 63 prokaryotic genomes and three genomes of unicellular eukaryotes are quantified in Table <tblr tid="T1">1</tblr>. The addition of new species leads to incremental increase in the COG coverage for each of the included prokaryotic genomes. The highest coverage now achieved is for <it>Buchnera sp</it>. (99%) and the lowest coverage is for <it>Borrelia burgdorferi </it>(43%). Each of these organisms is a special case. <it>Buchnera </it>is a highly degraded ensymbiont, which evolved from a relatively recent common ancestor with <it>E. coli </it>but apparently lost the great majority of genes, retaining &#8211; almost exclusively &#8211; conserved, essential ones <abbrgrp><abbr bid="B26">26</abbr></abbrgrp>, whereas <it>Borrelia </it>has numerous plasmids that mostly encode poorly conserved genes <abbrgrp><abbr bid="B27">27</abbr></abbrgrp>. Probably more telling is the observation that, for most free-living prokaryotes, ~80% of the genes belong to COGs and there is no appreciable dependence between the number of genes in a genome and the COG coverage (Table <tblr tid="T1">1</tblr>). Given that most genomes encode a substantial fraction (up to 10%) of fast-evolving, non-globular proteins <abbrgrp><abbr bid="B28">28</abbr></abbrgrp> and other poorly conserved proteins (e.g., remnants of prophages) as well, these findings seem to suggest that the COG coverage of most genomes is approaching saturation.</p>
            <tbl id="T1">
               <title>
                  <p>Table 1</p>
               </title>
               <caption>
                  <p>Coverage of unicellular organisms in COGs</p>
               </caption>
               <tblbdy cols="4">
                  <r>
                     <c ca="left">
                        <p>Species</p>
                     </c>
                     <c ca="center">
                        <p>Number of annotated proteins</p>
                     </c>
                     <c ca="center">
                        <p>Number (and percentage) of proteins in COGs</p>
                     </c>
                     <c ca="center">
                        <p>Number of COGs that include the given species</p>
                     </c>
                  </r>
                  <r>
                     <c cspan="4">
                        <hr/>
                     </c>
                  </r>
                  <r>
                     <c>
                        <p/>
                     </c>
                     <c ca="center" cspan="3">
                        <p>
                           <b>Bacteria</b>
                        </p>
                     </c>
                  </r>
                  <r>
                     <c cspan="4">
                        <hr/>
                     </c>
                  </r>
                  <r>
                     <c cspan="4" ca="center">
                        <p>Proteobacteria (Gram-negative)</p>
                     </c>
                  </r>
                  <r>
                     <c cspan="4">
                        <hr/>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>Agrobacterium tumefaciens</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>5299</p>
                     </c>
                     <c ca="center">
                        <p>4398 (83%)</p>
                     </c>
                     <c ca="center">
                        <p>1978</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>Brucella melitensis</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>3198</p>
                     </c>
                     <c ca="center">
                        <p>2678 (84%)</p>
                     </c>
                     <c ca="center">
                        <p>1654</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>Caulobacter crescentus</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>3737</p>
                     </c>
                     <c ca="center">
                        <p>2958 (79%)</p>
                     </c>
                     <c ca="center">
                        <p>1734</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>Mesorhizobium loti</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>7275</p>
                     </c>
                     <c ca="center">
                        <p>5653 (78%)</p>
                     </c>
                     <c ca="center">
                        <p>2175</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>Sinorhizobium meliloti</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>6205</p>
                     </c>
                     <c ca="center">
                        <p>5207 (84%)</p>
                     </c>
                     <c ca="center">
                        <p>2084</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>Rickettsia conorii</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>1374</p>
                     </c>
                     <c ca="center">
                        <p>891 (65%)</p>
                     </c>
                     <c ca="center">
                        <p>733</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>Rickettsia prowazekii</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>835</p>
                     </c>
                     <c ca="center">
                        <p>727 (87%)</p>
                     </c>
                     <c ca="center">
                        <p>647</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>Buchnera sp</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>574</p>
                     </c>
                     <c ca="center">
                        <p>567 (99%)</p>
                     </c>
                     <c ca="center">
                        <p>559</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>Escherichia coli K12</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>4279</p>
                     </c>
                     <c ca="center">
                        <p>3623 (85%)</p>
                     </c>
                     <c ca="center">
                        <p>2131</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>Escherichia coli O157:H7</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>5324</p>
                     </c>
                     <c ca="center">
                        <p>4050 (76%)</p>
                     </c>
                     <c ca="center">
                        <p>2190</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>Escherichia coli O157:H7 EDL933</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>5361</p>
                     </c>
                     <c ca="center">
                        <p>4023 (75%)</p>
                     </c>
                     <c ca="center">
                        <p>2200</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>Salmonella typhi</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>4553</p>
                     </c>
                     <c ca="center">
                        <p>3724 (82%)</p>
                     </c>
                     <c ca="center">
                        <p>2167</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>Yersinia pestis</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>4083</p>
                     </c>
                     <c ca="center">
                        <p>3341 (82%)</p>
                     </c>
                     <c ca="center">
                        <p>1993</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>Haemophilus influenzae</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>1714</p>
                     </c>
                     <c ca="center">
                        <p>1597 (93%)</p>
                     </c>
                     <c ca="center">
                        <p>1317</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>Pasteurella multocida</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>2015</p>
                     </c>
                     <c ca="center">
                        <p>1829 (91%)</p>
                     </c>
                     <c ca="center">
                        <p>1455</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>Vibrio cholerae</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>3463</p>
                     </c>
                     <c ca="center">
                        <p>2929 (85%)</p>
                     </c>
                     <c ca="center">
                        <p>1918</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>Pseudomonas aeruginosa</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>5567</p>
                     </c>
                     <c ca="center">
                        <p>4660 (84%)</p>
                     </c>
                     <c ca="center">
                        <p>2243</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>Xylella fastidiosa</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>2832</p>
                     </c>
                     <c ca="center">
                        <p>1740 (61%)</p>
                     </c>
                     <c ca="center">
                        <p>1310</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>Neisseria meningitidis MC58</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>2079</p>
                     </c>
                     <c ca="center">
                        <p>1561 (75%)</p>
                     </c>
                     <c ca="center">
                        <p>1255</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>Neisseria meningitides Z2491</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>2065</p>
                     </c>
                     <c ca="center">
                        <p>1573 (76%)</p>
                     </c>
                     <c ca="center">
                        <p>1260</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>Ralstonia solanaraceum</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>5116</p>
                     </c>
                     <c ca="center">
                        <p>3931 (77%)</p>
                     </c>
                     <c ca="center">
                        <p>2018</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>Campylobacter jejuni</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>1634</p>
                     </c>
                     <c ca="center">
                        <p>1328 (81%)</p>
                     </c>
                     <c ca="center">
                        <p>1093</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>Helicobacter pylori 26695</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>1576</p>
                     </c>
                     <c ca="center">
                        <p>1127 (72%)</p>
                     </c>
                     <c ca="center">
                        <p>920</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>Helicobacter pylori J99</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>1491</p>
                     </c>
                     <c ca="center">
                        <p>1106 (74%)</p>
                     </c>
                     <c ca="center">
                        <p>921</p>
                     </c>
                  </r>
                  <r>
                     <c cspan="4">
                        <hr/>
                     </c>
                  </r>
                  <r>
                     <c ca="center" cspan="4">
                        <p>Low-GC Gram-positive bacteria</p>
                     </c>
                  </r>
                  <r>
                     <c cspan="4">
                        <hr/>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>Bacillus halodurans</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>4066</p>
                     </c>
                     <c ca="center">
                        <p>3149 (77%)</p>
                     </c>
                     <c ca="center">
                        <p>1744</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>Bacillus subtilis</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>4112</p>
                     </c>
                     <c ca="center">
                        <p>3125 (76%)</p>
                     </c>
                     <c ca="center">
                        <p>1771</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>Clostridium acetobutilicum</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>3848</p>
                     </c>
                     <c ca="center">
                        <p>2879 (75%)</p>
                     </c>
                     <c ca="center">
                        <p>1549</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>Lactococcus lactis</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>2267</p>
                     </c>
                     <c ca="center">
                        <p>1798 (79%)</p>
                     </c>
                     <c ca="center">
                        <p>1208</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>Listeria innocua</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>3043</p>
                     </c>
                     <c ca="center">
                        <p>2428 (80%)</p>
                     </c>
                     <c ca="center">
                        <p>1522</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>Mycoplasma genitalium</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>484</p>
                     </c>
                     <c ca="center">
                        <p>385 (80%)</p>
                     </c>
                     <c ca="center">
                        <p>362</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>Mycoplasma pneumoniae</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>689</p>
                     </c>
                     <c ca="center">
                        <p>431 (63%)</p>
                     </c>
                     <c ca="center">
                        <p>383</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>Mycoplasma pulmonis</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>782</p>
                     </c>
                     <c ca="center">
                        <p>514 (66%)</p>
                     </c>
                     <c ca="center">
                        <p>426</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>Ureaplasma urealyticum</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>614</p>
                     </c>
                     <c ca="center">
                        <p>418 (68%)</p>
                     </c>
                     <c ca="center">
                        <p>378</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>Staphylococcus aureus</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>2625</p>
                     </c>
                     <c ca="center">
                        <p>2071 (79%)</p>
                     </c>
                     <c ca="center">
                        <p>1419</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>Streptococcus pneumoniae</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>2094</p>
                     </c>
                     <c ca="center">
                        <p>1586 (76%)</p>
                     </c>
                     <c ca="center">
                        <p>1105</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>Streptococcus pyogenes</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>1697</p>
                     </c>
                     <c ca="center">
                        <p>1356 (80%)</p>
                     </c>
                     <c ca="center">
                        <p>1030</p>
                     </c>
                  </r>
                  <r>
                     <c cspan="4">
                        <hr/>
                     </c>
                  </r>
                  <r>
                     <c ca="center" cspan="4">
                        <p>Actinobacteria</p>
                     </c>
                  </r>
                  <r>
                     <c cspan="4">
                        <hr/>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>Corinebacterium glutamicum</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>3040</p>
                     </c>
                     <c ca="center">
                        <p>2162 (71%)</p>
                     </c>
                     <c ca="center">
                        <p>1339</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>Mycobacterium tuberculosis H37Rv</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>3927</p>
                     </c>
                     <c ca="center">
                        <p>2843 (72%)</p>
                     </c>
                     <c ca="center">
                        <p>1450</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>Mycobacterium tuberculosis CDC1551</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>4187</p>
                     </c>
                     <c ca="center">
                        <p>2756 (66%)</p>
                     </c>
                     <c ca="center">
                        <p>1434</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>Mycobacterium leprae</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>1605</p>
                     </c>
                     <c ca="center">
                        <p>1180 (74%)</p>
                     </c>
                     <c ca="center">
                        <p>927</p>
                     </c>
                  </r>
                  <r>
                     <c cspan="4">
                        <hr/>
                     </c>
                  </r>
                  <r>
                     <c ca="center" cspan="4">
                        <p>Hyperthermophilic bacteria</p>
                     </c>
                  </r>
                  <r>
                     <c cspan="4">
                        <hr/>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>Aquifex aeolicus</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>1560</p>
                     </c>
                     <c ca="center">
                        <p>1349 (86%)</p>
                     </c>
                     <c ca="center">
                        <p>1088</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>Thermotoga maritima</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>1858</p>
                     </c>
                     <c ca="center">
                        <p>1565 (84%)</p>
                     </c>
                     <c ca="center">
                        <p>1167</p>
                     </c>
                  </r>
                  <r>
                     <c cspan="4">
                        <hr/>
                     </c>
                  </r>
                  <r>
                     <c ca="center" cspan="4">
                        <p>Cyanobacteria</p>
                     </c>
                  </r>
                  <r>
                     <c cspan="4">
                        <hr/>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>Synechocystis sp.</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>3167</p>
                     </c>
                     <c ca="center">
                        <p>2346 (74%)</p>
                     </c>
                     <c ca="center">
                        <p>1427</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>Nostoc sp.</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>6129</p>
                     </c>
                     <c ca="center">
                        <p>3832 (63%)</p>
                     </c>
                     <c ca="center">
                        <p>1673</p>
                     </c>
                  </r>
                  <r>
                     <c cspan="4">
                        <hr/>
                     </c>
                  </r>
                  <r>
                     <c ca="center" cspan="4">
                        <p>Other bacteria</p>
                     </c>
                  </r>
                  <r>
                     <c cspan="4">
                        <hr/>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>Borrelia burgdorferi</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>1638</p>
                     </c>
                     <c ca="center">
                        <p>701 (43%)</p>
                     </c>
                     <c ca="center">
                        <p>577</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>Treponema pallidum</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>1036</p>
                     </c>
                     <c ca="center">
                        <p>737 (71%)</p>
                     </c>
                     <c ca="center">
                        <p>639</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>Chlamydia trachomatis</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>895</p>
                     </c>
                     <c ca="center">
                        <p>644 (72%)</p>
                     </c>
                     <c ca="center">
                        <p>587</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>Chlamydophila pneumoniae</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>1054</p>
                     </c>
                     <c ca="center">
                        <p>667 (63%)</p>
                     </c>
                     <c ca="center">
                        <p>603</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>Deinococcus radiodurans</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>3182</p>
                     </c>
                     <c ca="center">
                        <p>2322 (73%)</p>
                     </c>
                     <c ca="center">
                        <p>1495</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>Fusobacterium nucleatum</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>2067</p>
                     </c>
                     <c ca="center">
                        <p>1556 (75%)</p>
                     </c>
                     <c ca="center">
                        <p>1143</p>
                     </c>
                  </r>
                  <r>
                     <c cspan="4">
                        <hr/>
                     </c>
                  </r>
                  <r>
                     <c ca="center" cspan="4">
                        <p>
                           <b>Archaea</b>
                        </p>
                     </c>
                  </r>
                  <r>
                     <c cspan="4">
                        <hr/>
                     </c>
                  </r>
                  <r>
                     <c ca="center" cspan="4">
                        <p>Euryarchaeota</p>
                     </c>
                  </r>
                  <r>
                     <c cspan="4">
                        <hr/>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>Archaeoglobus fulgidus</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>2420</p>
                     </c>
                     <c ca="center">
                        <p>1953 (81%)</p>
                     </c>
                     <c ca="center">
                        <p>1244</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>Methanocaldococcus jannaschii</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>1758</p>
                     </c>
                     <c ca="center">
                        <p>1448 (82%)</p>
                     </c>
                     <c ca="center">
                        <p>1117</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>Methanothermobacter autotrophicus</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>1873</p>
                     </c>
                     <c ca="center">
                        <p>1500 (80%)</p>
                     </c>
                     <c ca="center">
                        <p>1123</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>Methanopyrus kandleri</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>1691</p>
                     </c>
                     <c ca="center">
                        <p>1253 (74%)</p>
                     </c>
                     <c ca="center">
                        <p>1022</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>Methanosarcina acetivorans</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>4540</p>
                     </c>
                     <c ca="center">
                        <p>3142 (69%)</p>
                     </c>
                     <c ca="center">
                        <p>1462</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>Pyrococcus abyssi</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>1769</p>
                     </c>
                     <c ca="center">
                        <p>1506 (85%)</p>
                     </c>
                     <c ca="center">
                        <p>1065</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>Pyrococcus horikoshii</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>1801</p>
                     </c>
                     <c ca="center">
                        <p>1425 (79%)</p>
                     </c>
                     <c ca="center">
                        <p>1019</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>Thermoplasma acidophilum</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>1482</p>
                     </c>
                     <c ca="center">
                        <p>1261 (85%)</p>
                     </c>
                     <c ca="center">
                        <p>890</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>Thermoplasma volcanium</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>1499</p>
                     </c>
                     <c ca="center">
                        <p>1277 (85%)</p>
                     </c>
                     <c ca="center">
                        <p>900</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>Halobacterium sp.</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>2622</p>
                     </c>
                     <c ca="center">
                        <p>1809 (69%)</p>
                     </c>
                     <c ca="center">
                        <p>1109</p>
                     </c>
                  </r>
                  <r>
                     <c cspan="4">
                        <hr/>
                     </c>
                  </r>
                  <r>
                     <c ca="center" cspan="4">
                        <p>Crenarchaeota</p>
                     </c>
                  </r>
                  <r>
                     <c cspan="4">
                        <hr/>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>Aeropyrum pernix</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>1840</p>
                     </c>
                     <c ca="center">
                        <p>1236 (67%)</p>
                     </c>
                     <c ca="center">
                        <p>947</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>Pyrobaculum aerophylum</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>2605</p>
                     </c>
                     <c ca="center">
                        <p>1529 (59%)</p>
                     </c>
                     <c ca="center">
                        <p>1015</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>Sulfolobus solfataricus</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>2977</p>
                     </c>
                     <c ca="center">
                        <p>2207 (74%)</p>
                     </c>
                     <c ca="center">
                        <p>1084</p>
                     </c>
                  </r>
                  <r>
                     <c cspan="4">
                        <hr/>
                     </c>
                  </r>
                  <r>
                     <c ca="center" cspan="4">
                        <p>
                           <b>Eukaryota</b>
                        </p>
                     </c>
                  </r>
                  <r>
                     <c cspan="4">
                        <hr/>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>Saccharomyces cerevisiae</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>6338</p>
                     </c>
                     <c ca="center">
                        <p>3012 (48%)</p>
                     </c>
                     <c ca="center">
                        <p>1299</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>Schizosaccharomyces pombe</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>4979</p>
                     </c>
                     <c ca="center">
                        <p>2774 (56%)</p>
                     </c>
                     <c ca="center">
                        <p>1282</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>Encephalitozoon cuniculi</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>1996</p>
                     </c>
                     <c ca="center">
                        <p>1105 (55%)</p>
                     </c>
                     <c ca="center">
                        <p>696</p>
                     </c>
                  </r>
               </tblbdy>
            </tbl>
            <p>The COGs are accompanied by a phyletic pattern search tool, i.e., a Web-based tool that allows the user to select COGs with a desired pattern of presence-absence of species. Using the phyletic pattern search tool, one can classify the COGs by the representation of the major lineages of unicellular life forms (Fig. <figr fid="F1">1</figr>). This breakdown of the updated COGs emphasizes the important trend noticed previously<abbrgrp><abbr bid="B9">9</abbr><abbr bid="B15">15</abbr></abbrgrp>: only a minuscule fraction (~1%) of the COGs are ubiquitous and even the COGs that are present in all bacteria or in all archaea represent a small minority. Furthermore, many COGs show scattered distribution, which appears to reflect rampant lineage-specific gene loss and horizontal gene transfer, which are typical of prokaryotic evolution <abbrgrp><abbr bid="B29">29</abbr><abbr bid="B30">30</abbr><abbr bid="B31">31</abbr></abbrgrp>.</p>
            <fig id="F1">
               <title>
                  <p>Figure 1</p>
               </title>
               <caption>
                  <p>Phyletic patterns of COGs</p>
               </caption>
               <text>
                  <p><b>Phyletic patterns of COGs</b>. <it>All</it>, represented in all unicellular organisms included in the COG system; <it>All archaea, All bacteria, All eukaryotes</it>, represented in each species from the respective domain of life (and possibly in some species from other domains); <it>All bacteria except the smallest</it>, represented in all bacteria except, possibly, parasites with small genomes (mycoplasma, chlamydia, rickettsia, and spirochetes).</p>
               </text>
               <graphic file="1471-2105-4-41-1"/>
            </fig>
         </sec>
         <sec>
            <st>
               <p>Construction of KOGs for 7 sequenced eukaryotic genomes</p>
            </st>
            <p>Eukaryotic KOGs were constructed from annotated proteins encoded in the genomes of three animals (<it>Homo sapiens </it><abbrgrp><abbr bid="B32">32</abbr></abbrgrp>, the fruit fly <it>Drosophila melanogaster </it><abbrgrp><abbr bid="B33">33</abbr></abbrgrp>, and the nematode <it>Caenorhabditis elegans</it>) <abbrgrp><abbr bid="B34">34</abbr></abbrgrp>, the green plant <it>Arabidopsis thaliana </it>(thale cress) <abbrgrp><abbr bid="B35">35</abbr></abbrgrp>, two fungi (budding yeast <it>Saccharomyces cerevisiae </it><abbrgrp><abbr bid="B36">36</abbr></abbrgrp> and fission yeast <it>Schizosaccharomyces pombe </it><abbrgrp><abbr bid="B37">37</abbr></abbrgrp>, and the microsporidian <it>Encephalitozoon cuniculi </it><abbrgrp><abbr bid="B38">38</abbr></abbrgrp>). The basic procedure for KOG construction was the same as the procedure previously employed for prokaryotic genomes (Refs. <abbrgrp><abbr bid="B9">9</abbr><abbr bid="B15">15</abbr></abbrgrp> and see Materials and Methods). Given the abundance of multidomain architectures among eukaryotic proteins and the fact that apparent orthologs often differ in domain composition <abbrgrp><abbr bid="B32">32</abbr><abbr bid="B39">39</abbr></abbrgrp>, the protocol based on the BeT analysis was amended with domain identification using the RPS-BLAST program <abbrgrp><abbr bid="B40">40</abbr></abbrgrp>. Proteins assigned to a KOG by the initial KOG construction procedure were kept in that KOG without splitting them into individual domains if they shared a common core of domains. In addition, proteins, which consisted solely of widespread, "promiscuous" domains (e.g., SH2, SH3, WD40 repeats or TPR repeats) and did not show clear-cut orthologous relationships, were assigned to Fuzzy Orthologous Groups (FOGs). In addition to KOGs and FOGs, we also identified provisional clusters of orthologs represented in two genomes (TWOGs) by detecting bi-directional BeTs between proteins not included in KOGs or FOGs and assigning additional members by examination of the BLAST search outputs. Finally, lineage-specific expansions (LSEs) of paralogs among the proteins from each genome not included in KOGs, FOGs, and TWOGs were detected by using the clustering procedure described previously <abbrgrp><abbr bid="B14">14</abbr></abbrgrp> accompanied by a newly developed procedure for finding tight protein clusters (BK and RLT, unpublished results). The construction of TWOGs and LSEs involved more extensive case by case evaluation than the KOG construction due to the lack of well established procedures to generate these types of clusters; nevertheless, these clusters should be considered preliminary until further validation.</p>
            <p>Table <tblr tid="T2">2</tblr> shows the assignment of the proteins from each of the analyzed eukaryotic species to KOGs. Unlike the situation with prokaryotic COGs (Table <tblr tid="T1">1</tblr>), the fraction of proteins assigned to KOGs tends to decrease with increasing genome size of the analyzed eukaryotic species, from the maximum of ~74% for fission yeast <it>Schizosaccharomyces pombe</it>, the second smallest genome (for reasons that remain unclear, the smallest genome, that of the microsporidian <it>Encephalitozoon cuniculi</it>, had only 61% of the proteins included in COGs) to ~49% for the largest, human genome (Table <tblr tid="T2">2</tblr>).</p>
            <tbl id="T2">
               <title>
                  <p>Table 2</p>
               </title>
               <caption>
                  <p>Representation of the 7 analyzed eukaryotic species in KOGs</p>
               </caption>
               <tblbdy cols="5">
                  <r>
                     <c ca="left">
                        <p>Species</p>
                     </c>
                     <c ca="center">
                        <p>Symbol</p>
                     </c>
                     <c ca="center">
                        <p>Number of annotated proteins</p>
                     </c>
                     <c ca="center" cspan="2">
                        <p>Number of proteins in KOGs (%)</p>
                     </c>
                  </r>
                  <r>
                     <c cspan="5">
                        <hr/>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>Arabidopsis thaliana</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>
                           <b>A</b>
                        </p>
                     </c>
                     <c ca="center">
                        <p>25,749</p>
                     </c>
                     <c ca="center">
                        <p>13,531</p>
                     </c>
                     <c ca="center">
                        <p>53%</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>Caenorhabditis elegans</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>
                           <b>C</b>
                        </p>
                     </c>
                     <c ca="center">
                        <p>20,275</p>
                     </c>
                     <c ca="center">
                        <p>10,393</p>
                     </c>
                     <c ca="center">
                        <p>51%</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>Drosophila melanogaster</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>
                           <b>D</b>
                        </p>
                     </c>
                     <c ca="center">
                        <p>13,468</p>
                     </c>
                     <c ca="center">
                        <p>8,321</p>
                     </c>
                     <c ca="center">
                        <p>62%</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>Homo Sapiens</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>
                           <b>H</b>
                        </p>
                     </c>
                     <c ca="center">
                        <p>37,840</p>
                     </c>
                     <c ca="center">
                        <p>18,714</p>
                     </c>
                     <c ca="center">
                        <p>49%</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>Saccharomyces cerevisiae</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>Y</p>
                     </c>
                     <c ca="center">
                        <p>6,338</p>
                     </c>
                     <c ca="center">
                        <p>3,971</p>
                     </c>
                     <c ca="center">
                        <p>63%</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>Schizosaccharomyces pombe</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>P</p>
                     </c>
                     <c ca="center">
                        <p>4,989</p>
                     </c>
                     <c ca="center">
                        <p>3,692</p>
                     </c>
                     <c ca="center">
                        <p>74%</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <it>Encephalitozoon cuniculi</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>E</p>
                     </c>
                     <c ca="center">
                        <p>1,996</p>
                     </c>
                     <c ca="center">
                        <p>1,216</p>
                     </c>
                     <c ca="center">
                        <p>61%</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <b>Total</b>
                        </p>
                     </c>
                     <c>
                        <p/>
                     </c>
                     <c ca="center">
                        <p>
                           <b>110,655</b>
                        </p>
                     </c>
                     <c ca="center">
                        <p>
                           <b>59,838</b>
                        </p>
                     </c>
                     <c ca="center">
                        <p>
                           <b>54%</b>
                        </p>
                     </c>
                  </r>
               </tblbdy>
            </tbl>
            <p>Compared to prokaryotes, a considerably smaller fraction of eukaryotic genes could be included into KOGs (Tables <tblr tid="T1">1</tblr> and <tblr tid="T2">2</tblr>). Thus, the apparent difference in coverage with highly conserved clusters of orthologs (C/KOGs) between prokaryotes and eukaryotes, particularly complex ones, is probably due to the relatively small number of eukaryotic genomes included in this analysis and is expected to level off with the growth of the eukaryotic genome collection. This view is compatible with the observed dependence of the KOG coverage on the number of genes (Table <tblr tid="T1">1</tblr>), which suggests that the KOGs are still far from saturation.</p>
            <p>Examination of the phyletic patterns of KOGs points to the existence of a conserved eukaryotic gene core as well as substantial diversity (Fig. <figr fid="F2">2</figr>); this clearly resembles the evolutionary pattern seen previously during the analysis of archaeal COGs <abbrgrp><abbr bid="B41">41</abbr></abbrgrp>. The genes represented in each of the 7 analyzed genomes comprise ~20% of the KOG set and approximately the same number of KOGs includes 6 species, with the exception of the microsporidian. The prevalence of the latter pattern is not surprising given that microsporidia are intracellular parasites with minimal metabolic capabilities and a dramatically reduced genome <abbrgrp><abbr bid="B38">38</abbr></abbrgrp>. The next largest group consists of animal-specific COGs, which, again, could be expected because animals are the only lineage of complex eukaryotes that is represented by more than one species in the analyzed set of genomes. However, a notable observation is that ~30% of the KOGs had "odd" phyletic patterns, e.g., are represented in one animal, one plant and one fungal species (Fig. <figr fid="F2">2</figr>).</p>
            <fig id="F2">
               <title>
                  <p>Figure 2</p>
               </title>
               <caption>
                  <p>Phyletic patterns of KOGs</p>
               </caption>
               <text>
                  <p><b>Phyletic patterns of KOGs</b>. <it>All</it>, include representatives from each of the 7 analyzed species; <it>All-Ec</it>, include representatives from each of 6 species other than <it>Encephalitozoon cuniculi; All animals</it>, include representatives from three animal genomes only; <it>All fungi</it>, include representatives from two fungal genomes only.</p>
               </text>
               <graphic file="1471-2105-4-41-2"/>
            </fig>
            <p>To illustrate the typical composition of a KOG, some of the problems that tend to emerge with their construction, and possible biological implications, we briefly discuss here KOG3378, which includes proteins already mentioned above as a typical case of paralogy and orthology, namely, the globins (Fig. <figr fid="F3">3</figr>). Globins are small (typically, between 140 and 150 amino acid residues) and relatively poorly conserved proteins. As a consequence, the initial, automatic procedure for KOG construction produced a candidate KOG consisting of only 3 proteins from 3 species: <it>S. cerevisiae </it>YGR234w, its ortholog from <it>S. pombe </it>SPAC869.02c, and human neuroglobin Hs10864065. The remaining proteins were brought into the KOG manually, as the result of examination of BLAST search outputs, focused on the conservation of the globin-specific sequence motifs. The final KOG is represented in 6 of the 7 analyzed eukaryotic species, with the sole exception of <it>E. cuniculi </it>(Fig. <figr fid="F3">3</figr>). The most remarkable aspect of this KOG is the apparent independent proliferation of genes for globins and globin-like proteins in vertebrates (represented here by humans): 11 paralogs, and nematodes (<it>C. elegans</it>): 24 paralogs (CE23430 and CE23431 are parts of the same gene). Strictly speaking, to demonstrate that these expansions are, indeed, independent, rather than ancestral, complete phylogenetic analysis is required, which is a difficult task given the low sequence conservation in many members of the KOGs. However, the presence of only one globin homolog in <it>D. melanogaster </it>is best compatible with hypothesis of lineage-specific expansion because, regardless of the exact topology of the animal phylogenetic tree <abbrgrp><abbr bid="B42">42</abbr></abbrgrp>, the alternative to this hypothesis would involve massive loss of globin-like genes in insects. Furthermore, this hypothesis is also compatible with the topology of the crude similarity dendrogram, which accompanies the KOG and in which the majority of human and nematode members form distinct clusters (Fig. <figr fid="F3">3</figr>). Thus, at this stage, the most likely, conservative interpretation of the evolutionary relationship between vertebrates and nematode globins is that they comprise co-orthologous sets and are legitimately included in the same KOG. Similarly, the two paralogous leghemoglobins from <it>A. thaliana </it>should be considered co-orthologous to the human and <it>C. elegans </it>paralogous sets.</p>
            <fig id="F3">
               <title>
                  <p>Figure 3</p>
               </title>
               <caption>
                  <p>An example of a complex eukaryotic KOG: globins and related hemoproteins</p>
               </caption>
               <text>
                  <p><b>An example of a complex eukaryotic KOG: globins and related hemoproteins</b>. The systematic protein names of the KOG members are listed under each species. To the left of the KOG proper is the similarity dendrogram produced from the BLAST scores between the KOG members. This is a crude clustering, which should not be construed as a phylogenetic tree.</p>
               </text>
               <graphic file="1471-2105-4-41-3"/>
            </fig>
            <p>The functions of human globins and globin homologs, primarily in oxygen delivery to different tissues, at different developmental stages have been studied in great detail <abbrgrp><abbr bid="B43">43</abbr></abbrgrp>. In contrast, the dramatic proliferation of globin-like proteins in the nematode <it>C. elegans</it>, while noticed, in part, in previous work <abbrgrp><abbr bid="B44">44</abbr></abbrgrp>, is not well understood. To our knowledge, KOG3378 is the most complete current representation of this lineage-specific expansion of globin-like paralogs; the experimental study of these genes is expected to reveal novel aspects of invertebrate physiology.</p>
            <p>Another notable observation comes from the analysis of the yeast members of KOG3378. A BLAST search of the non-redundant protein sequence database (NCBI, NIH, Bethesda) and examination of the domain composition of the <it>S. cerevisiae </it>protein YGR234w shows that this protein (named flavohemoglobin) consists of the globin domain fused to a flavodoxin reductase domain and is highly similar to a variety of oxidoreductases from several bacterial species and some lower eukaryotes (e.g., slime molds and other protists), which have the same domain composition (<abbrgrp><abbr bid="B45">45</abbr></abbrgrp> and data not shown). The <it>S. pombe </it>flavohemoglobin belongs to the same protein family but is not the closest relative of the <it>S. cerevisiae </it>flavohemoglobin (data not shown). These observations strongly suggest that the yeast flavohemoglobin genes have been acquired from bacteria via horizontal gene transfer and hence have an evolutionary history that is distinct and independent from those of plant and animal globins. Notably, the second member of this KOG from S. cerevisie YNL234w is not at all a close paralog of the flavohemoglobins. The only identifiable domain in this large protein is the globin domain, which is most similar to vertebrate neuroglobins. These observations illustrate an important general point to be kept in mind when perusing the KOGs: although a given set of proteins may have been legitimately brought together in the same KOG in the context of eukaryotic genome comparison, on some occasions, different KOG members have different evolutionary trajectories.</p>
         </sec>
         <sec>
            <st>
               <p>Prokaryotic and eukaryotic orthologous gene sets: evolutionary connections and functional differences</p>
            </st>
            <p>The two sets of orthologous genes overlap because the three species of unicellular eukaryotes were included in both sets; the proteins from these species obviously form connections between prokaryotic orthologous sets (COGs) and eukaryotic orthologous sets (KOGs). Such connections, suggestive of orthologous relationships, were established between 1253 COGs, each of which included at least one protein from a unicellular eukaryote (not counting COGs that consisted exclusively of eukaryotic proteins), and 2000 eukaryotic KOGs. The greater number of eukaryotic KOGs involved in this relationship is due to the fact that, on many occasions, several proteins from unicellular eukaryotes that are part of the same COG have their distinct orthologs in other eukaryotes and, accordingly, belong to several KOGs. Only relatively small fractions of the prokaryotic COGs (27% of the COGs that include at least one prokaryotic species) and eukaryotic KOGs (34% of the KOGs and TWOGs) comprised sets of putative orthologs represented in both prokaryotes and eukaryotes. This emphasizes the distinction between the repertoires of genes that are conserved in prokaryotes and in eukaryotes and the considerable amount of innovation in both groups of organisms. However, these numbers give the low bound of the shared clusters of orthologs because some of the KOGs are not represented in the relatively small genomes of unicellular eukaryotes, primarily due to gene loss in the latter, but have prokaryotic counterparts.</p>
            <p>Functional annotation of the detected orthologous clusters is one of the crucial and most labor-consuming aspects of the C/KOG analysis. Given the well-known inaccuracy of the currently available schemes for automatic annotation (e.g., Refs. 5,18, and references therein), no attempt was made to fully automate the C/KOG annotation; instead, assignments were made on a case by case basis through a combination of published data on C/KOG members and their homologs, protein domain analysis and different types of context analysis, particularly phyletic patterns and, in prokaryotes, conservation of gene strings which comprise putative operons <abbrgrp><abbr bid="B46">46</abbr><abbr bid="B47">47</abbr><abbr bid="B48">48</abbr></abbrgrp>. Figure <figr fid="F4">4</figr> shows the distribution of known and predicted protein functions for the prokaryotic COGs (i.e., the subset of the COGs obtained by subtraction from the COG collection of those COGs that included solely unicellular eukaryotes were) and the eukaryotic KOGs. The difference between prokaryotic and eukaryotic clusters of orthologs is obvious in that the latter are substantially enriched in proteins involved in signal transduction and intracellular trafficking; certain functional categories, such as cytoskeleton formation and chromatin dynamics were unique to eukaryotes. In contrast, metabolic and transport functions were relatively more prominent among the prokaryotic COGs (Fig. <figr fid="F4">4</figr>).</p>
            <fig id="F4">
               <title>
                  <p>Figure 4</p>
               </title>
               <caption>
                  <p>Functional classification of prokaryotic (COGs) and eukaryotic (KOGs) clusters of orthologs</p>
               </caption>
               <text>
                  <p><b>Functional classification of prokaryotic (COGs) and eukaryotic (KOGs) clusters of orthologs</b>. Designations of functional categories: A, RNA processing and modification (not used for prokaryotic COGs), B, chromatin structure and dynamics, C, energy production and conversion, D, cell cycle control and mitosis, E, amino acid metabolism and transport, F, nucleotide metabolism and transport, G, carbohydrate metabolism and transport, H, coenzyme metabolism, I, lipid metabolism, J, translation, K, transcription, L, replication and repair, M, cell wall/membrane/envelope biogenesis, N, Cell motility, O, post-translational modification, protein turnover, chaperone functions, P, Inorganic ion transport and metabolism, Q, secondary metabolites biosynthesis, transport and catabolism, T, signal transduction, U, intracellular trafficking and secretion, Y, nuclear structure (not applicable to prokaryotic COGs), Z, cytoskeleton (not applicable to prokaryotic COGs); R, general functional prediction only (typically, prediction of biochemical activity), S, function unknown. The numbers were obtained after subtracting the COGs that consisted entirely of proteins from unicellular eukaryotes from the COG collection.</p>
               </text>
               <graphic file="1471-2105-4-41-4"/>
            </fig>
         </sec>
         <sec>
            <st>
               <p>Using phyletic patterns to examine gene function and evolution</p>
            </st>
            <p>Phyletic pattern search can be employed for preliminary assessment of specific functional and evolutionary hypotheses. With the increased number of included genomes and enhanced capabilities of the phyletic pattern search tool, this analysis becomes particularly informative. Below we discuss straightforward examples of its use. Figure <figr fid="F5">5a</figr> shows the results of querying the COG database for COGs that are represented in the microsporidian parasite <it>Encephalitozoon cuniculi </it>but not in the two yeast species. Given the dramatic genome reduction seen in the microsporidium <abbrgrp><abbr bid="B38">38</abbr></abbrgrp>, it is not unexpected that the query retrieves only a small set of 13 COGs. This phyletic pattern can be explained either by loss of ancestral eukaryotic genes in yeast or by acquisition of genes by <it>E. cuniculi </it>via horizontal gene transfer. At least in some cases, further examination of the phyletic patterns of the retrieved COGs suggests the most likely scenario. Thus, COGs 1078, 1258, 1690, and 2263 are represented in all archaea, but are either missing in bacteria or are present in a minority of species (Fig. <figr fid="F5">5a</figr>). Therefore these COGs most likely are part of the ancestral archaeo-eukaryotic heritage <abbrgrp><abbr bid="B49">49</abbr></abbrgrp> and might have been lost in yeasts; the respective proteins are known or predicted to be involved in translation or RNA modification, which is compatible with this evolutionary scenario. In contrast, COG3202 seems to be a likely case of horizontal gene transfer. Remarkably, the proteins in this COG are ADP/ATP translocases, which seem to be a hallmark of intracellular parasitism (or symbiosis) allowing the respective organisms to tap into the ATP supplies of the host cell <abbrgrp><abbr bid="B50">50</abbr><abbr bid="B51">51</abbr></abbrgrp>. Indeed, this COG is shared by the eukaryotic (<it>E. cuniculi</it>) and bacterial (Chlamydia and Rickettsia) intracellular parasites, the only exception being a diverged member of the COG found in the plant pathogenic bacterium <it>Xylella fastidiosum </it>(Fig. <figr fid="F5">5a</figr>).</p>
            <fig id="F5">
               <title>
                  <p>Figure 5</p>
               </title>
               <caption>
                  <p>Examples of phyletic pattern search</p>
               </caption>
               <text>
                  <p><b>Examples of phyletic pattern search. </b>(A) COGs represented in <it>Encephalitozoon cuniculi </it>but missing in the two yeasts (B) COGs represented in <it>Yersinia pestis </it>but not in other Proteobacteria or eukaryotesThe sets of species included in COGs are color-coded as follows (from left to right): yellow, archaea; purple, eukaryotes; green, miscellaneous bacteria, including hyperthermophiles, cyanobacteria, <it>Fusobacterium</it>, and <it>Deinococcus</it>; dark yellow, actinobacteria; torqoise, low-GC Gram-positive bacteria (except for mycoplasmas); light blue, Gamma-proteobacteria; dark-blue, Beta- and Epsilon-proteobacteria; dark gray, Alpha-proteobacteria; green, chlamydia and spirochetes; dark green, mycoplasmas. The functional categories, designated as in Fig. <figr fid="F4">4</figr>, are also color-coded.</p>
               </text>
               <graphic file="1471-2105-4-41-5"/>
            </fig>
            <p>The second case in point that we consider here is a search for COGs, which are represented in the causative agent of plague, <it>Yersinia pestis </it><abbrgrp><abbr bid="B52">52</abbr></abbrgrp>, but not in other Proteobacteria (the taxon to which <it>Y. pestis </it>belongs) or eukaryotes; this query retrieves 7 COGs (Fig. <figr fid="F5">5b</figr>). These genes probably have been acquired by <it>Y. pestis </it>via horizontal gene transfer. On a more practical note, some of these genes could be potential targets for highly selective anti-bacterial agents. It is noticeable that three of these genes are predicted to be involved in cell wall metabolism (COGs 2152, 2401, and 3867), whereas the functions of others remain uncharacterized.</p>
         </sec>
      </sec>
      <sec>
         <st>
            <p>Conclusions</p>
         </st>
         <p>The collection of COGs from prokaryotes and unicellular eukaryotes was substantially amended to include 66 species and eukaryotic orthologous groups (KOGs) for 7 species were constructed. The prokaryotic COG system already covers most of the globular proteins encoded in bacterial and archaeal genomes. Eukaryotic KOGs include a lower fraction of the encoded proteins but this difference is expected to level off with the growth of the eukaryotic genome collection. The eukaryotic KOG analysis revealed a substantial conserved core of eukaryotic genes as well as major lineage-specific variations. Lineage-specific expansion of paralogous families within the KOGs and expansion of families that do not have orthologs in other compared genomes make major contributions to the eukaryotic gene repertoire. Only a minority of eukaryotic KOGs have readily detectable prokaryotic counterparts and the same holds for prokaryotic COGs, emphasizing the extent of innovation in both the eukaryotic and prokaryotic divisions of life. The wide scatter of the phyletic patterns among the KOGs testifies to the importance of lineage-specific gene loss in the evolution of eukaryotic genomes.</p>
         <p>The current collection of eukaryotic KOGs includes 7 genomes whose sequences had been available as of July 1, 2002. Manual correction and annotation of KOGs is a labor-intensive process, which precluded immediate inclusion of the genomes of the mouse <abbrgrp><abbr bid="B53">53</abbr></abbrgrp>, fugu fish <abbrgrp><abbr bid="B54">54</abbr></abbrgrp>, mosquito <abbrgrp><abbr bid="B55">55</abbr></abbrgrp>, the urochordate <it>Ciona instestinalis </it><abbrgrp><abbr bid="B56">56</abbr></abbrgrp>, and the malarial parasite <it>Plasmodium falciparum </it><abbrgrp><abbr bid="B57">57</abbr></abbrgrp>, which have become available since that date. However, once the basic system is established, it is expected that inclusion of these and other newly sequenced genomes in the KOG system proceeds at a greater pace.</p>
         <p>The C/KOG system can be employed for functional annotation of genes from new genomes by using the COGNITOR program and for research into genome evolution. The utility of the system for both of these purposes should increase progressively with the inclusion of new genomes, particularly those of early-branching eukaryotes.</p>
      </sec>
      <sec>
         <st>
            <p>Methods</p>
         </st>
         <sec>
            <st>
               <p>Protein sets for new genomes</p>
            </st>
            <p>The protein sets for all newly included bacterial and archaeal genomes, the yeasts <it>Saccharomyces cerevisiae </it>and <it>Schizosaccharomyces pombe</it>, the microsporidian <it>Encaephalitozoon cuniculi</it>, the thale cress <it>Arabidopsis thaliana</it>, and the fruit fly <it>Drosophila melanogaster </it>were extracted from the Genome division of the (NCBI, NIH, Bethesda). The protein sequences for the nematode <it>Caenorhabditis elegans </it>were from the WormPep67 database, the sequences for <it>Homo sapiens </it>were from the NCBI build 30.</p>
         </sec>
         <sec>
            <st>
               <p>Addition of new genomes to the COGs</p>
            </st>
            <p>The new genomes were added to the COGs by using the COGNITOR program, with the results validated manually, essentially as described previously <abbrgrp><abbr bid="B9">9</abbr><abbr bid="B15">15</abbr></abbrgrp>. After the completion of the validation process, the remaining proteins were subject to the COG construction procedure, in order to detect new COGs that could not be formed without the added genomes; the validation and annotation steps were repeated with the newly detected COGs.</p>
         </sec>
         <sec>
            <st>
               <p>Sequence analysis, construction and annotation of KOGs</p>
            </st>
            <p>The construction of KOGs followed the previously outlined strategy based on sets of consistent BeTs <abbrgrp><abbr bid="B9">9</abbr><abbr bid="B15">15</abbr></abbrgrp>, but included additional steps that reflected specific features of eukaryotic proteins. Briefly, the procedure was as follows. 1. Detection and masking of widespread, typically repetitive domains, which was performed by using the RPS-BLAST program and the PSSMs for the respective domains from the CDD collection <abbrgrp><abbr bid="B40">40</abbr></abbrgrp>. These domains, namely, PPR (pfam01535), WD40 (pfam00400), IG (pfam00047), IGc1, Igv, IG_like, RRM (pfam00076), ANK (pfam00023), myosin tail (pfam01576), Fn3 (pfam00041), CA, (IG), ANK, kelch (pfam01344), OAD_kelch, SH3 (pfam00018), intermediate filaments (pfam00038), C2H2 finger (pfam00096), PDZ (pfam00595), POZ (pfam00651), PH (pfam00169), ZnF-C4 (pfam00105), spectrin (pfam00435), Sushi (pfam00084), TPR (pfam00017), BTB, LRR_CC, LY, ARM, SH2, and CH, were detected and masked prior to applying the COG construction procedure. Masking these domains was required to ensure the robust classification of the eukaryotic orthologous clusters with the KOG detection procedure because hits between these common, "promiscuous" domains resulted in spurious lumping of numerous non-orthologous proteins. 2. All-against-all comparison of protein sequences from the analyzed genomes by using the gapped BLAST program <abbrgrp><abbr bid="B58">58</abbr></abbrgrp>, with filtering for low sequence complexity regions performed using the SEG program <abbrgrp><abbr bid="B59">59</abbr></abbrgrp>. 3. Detection of triangles of mutually consistent, genome-specific best hits (BeTs). 4. Merging triangles with a common side to form crude, preliminary KOGs. 5. Case by case analysis of each candidate KOG. This analysis serves to eliminate the false-positives that are incorporated in the KOGs during the automatic steps and included, primarily, examination of the domain composition of KOG members, which was determined using the RPS-BLAST program and the CDD collection of position-specific scoring matrices (PSSMs) for individual domains <abbrgrp><abbr bid="B40">40</abbr></abbrgrp>. Generally, proteins were kept in the same KOG when they shared a conserved core domain architecture. However, in cases when KOGs were artificially bridged by multidomain proteins, the latter were split into individual domains (or arrays of domains) and steps (1)-(4) were repeated with these sequences; this results in the assignment of individual domains to KOGs in accordance with their distinct evolutionary affinities. 6. Assignment of proteins containing promiscuous domains. In cases when a sequence assigned to a KOG contained one or more masked promiscuous domains, these domains were restored and became part of the respective KOG. Proteins containing promiscuous domains but not assigned to any KOG were classified in Fuzzy Orthologous Groups (FOGs) named after the respective domains. 7. Examination of large KOGs, which included multiple members from all or several of the compared genomes by using phylogenetic trees, cluster analysis with the BLASTCLUST program <url>ftp://ftp.ncbi.nih.gov/blast/</url>, comparison of domain architectures, and visual inspection of alignments; as a result, some of these protein sets were split into two or more smaller ones that were included in the final set of KOGs.</p>
            <p>The KOGs were annotated on the basis of the annotations available through GenBank and other public databases, which were critically assessed against the primary literature. For proteins that are currently annotated as "hypothetical" or "unknown", iterative sequence similarity searches with the PSI-BLAST program <abbrgrp><abbr bid="B58">58</abbr></abbrgrp>, the results of the RPS-BLAST searches, additional domain architecture analysis performed by using the SMART system <abbrgrp><abbr bid="B60">60</abbr></abbrgrp>, and comparison to the COG database by using the COGNITOR program (RLT, unpublished results) were employed to identify distant homologs with experimentally characterized functions and/or structures. The known and predicted functions of KOGs were classified into 23 categories (see legend to Fig. <figr fid="F4">4</figr>); these were modified from the functional classification previously employed for prokaryotic COGs <abbrgrp><abbr bid="B15">15</abbr></abbrgrp> by including several specific eukaryotic categories.</p>
         </sec>
      </sec>
      <sec>
         <st>
            <p>Availability of the results</p>
         </st>
         <p>The updated version of the COGs for unicellular organisms and the eukaryotic KOGs are accessible at <url>http://www.ncbi.nlm.nih.gov/COG/</url> and via ftp at <url>ftp://ftp.ncbi.nih.gov/pub/COG/</url>.</p>
      </sec>
   </bdy>
   <bm>
      <ack>
         <sec>
            <st>
               <p>Acknowledgements</p>
            </st>
            <p>We thank L. Aravind, David Lipman, Kira Makarova and Wei Yang for useful discussions, and Igor Garkavtsev for his contributions at the initial stages of the KOG project.</p>
         </sec>
      </ack>
      <refgrp>
         <bibl id="B1">
            <title>
               <p>Distinguishing homologous from analogous proteins</p>
            </title>
            <aug>
               <au>
                  <snm>Fitch</snm>
                  <fnm>WM</fnm>
               </au>
            </aug>
            <source>Systematic Zoology</source>
            <pubdate>1970</pubdate>
            <volume>19</volume>
            <fpage>99</fpage>
            <lpage>106</lpage>
            <xrefbib>
               <pubid idtype="pmpid">5449325</pubid>
            </xrefbib>
         </bibl>
         <bibl id="B2">
            <title>
               <p>Homology a personal view on some of the problems</p>
            </title>
            <aug>
               <au>
                  <snm>Fitch</snm>
                  <fnm>WM</fnm>
               </au>
            </aug>
            <source>Trends Genet</source>
            <pubdate>2000</pubdate>
            <volume>16</volume>
            <fpage>227</fpage>
            <lpage>231</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmpid" link="fulltext">10782117</pubid>
                  <pubid idtype="doi">10.1016/S0168-9525(00)02005-9</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B3">
            <title>
               <p>Gene families: the taxonomy of protein paralogs and chimeras</p>
            </title>
            <aug>
               <au>
                  <snm>Henikoff</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Greene</snm>
                  <fnm>EA</fnm>
               </au>
               <au>
                  <snm>Pietrokovski</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Bork</snm>
                  <fnm>P</fnm>
               </au>
               <au>
                  <snm>Attwood</snm>
                  <fnm>TK</fnm>
               </au>
               <au>
                  <snm>Hood</snm>
                  <fnm>L</fnm>
               </au>
            </aug>
            <source>Science</source>
            <pubdate>1997</pubdate>
            <volume>278</volume>
            <fpage>609</fpage>
            <lpage>614</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmpid" link="fulltext">9381171</pubid>
                  <pubid idtype="doi">10.1126/science.278.5338.609</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B4">
            <title>
               <p>Orthology, paralogy and proposed classification for paralog subtypes</p>
            </title>
            <aug>
               <au>
                  <snm>Sonnhammer</snm>
                  <fnm>EL</fnm>
               </au>
               <au>
                  <snm>Koonin</snm>
                  <fnm>EV</fnm>
               </au>
            </aug>
            <source>Trends Genet</source>
            <pubdate>2002</pubdate>
            <volume>18</volume>
            <fpage>619</fpage>
            <lpage>620</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmpid" link="fulltext">12446146</pubid>
                  <pubid idtype="doi">10.1016/S0168-9525(02)02793-2</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B5">
            <title>
               <p>Assessing annotation transfer for genomics: quantifying the relations between protein sequence, structure and function through traditional and probabilistic scores</p>
            </title>
            <aug>
               <au>
                  <snm>Wilson</snm>
                  <fnm>CA</fnm>
               </au>
               <au>
                  <snm>Kreychman</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Gerstein</snm>
                  <fnm>M</fnm>
               </au>
            </aug>
            <source>J Mol Biol</source>
            <pubdate>2000</pubdate>
            <volume>297</volume>
            <fpage>233</fpage>
            <lpage>249</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmpid" link="fulltext">10704319</pubid>
                  <pubid idtype="doi">10.1006/jmbi.2000.3550</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B6">
            <title>
               <p>A phylogenomic approach to microbial evolution</p>
            </title>
            <aug>
               <au>
                  <snm>Sicheritz-Ponten</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Andersson</snm>
                  <fnm>SG</fnm>
               </au>
            </aug>
            <source>Nucleic Acids Res</source>
            <pubdate>2001</pubdate>
            <volume>29</volume>
            <fpage>545</fpage>
            <lpage>552</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmpid" link="fulltext">11139625</pubid>
                  <pubid idtype="doi">10.1093/nar/29.2.545</pubid>
                  <pubid idtype="pmcid">29656</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B7">
            <title>
               <p>RIO: Analyzing proteomes by automated phylogenomics using resampled inference of orthologs</p>
            </title>
            <aug>
               <au>
                  <snm>Zmasek</snm>
                  <fnm>CM</fnm>
               </au>
               <au>
                  <snm>Eddy</snm>
                  <fnm>SR</fnm>
               </au>
            </aug>
            <source>BMC Bioinformatics</source>
            <pubdate>2002</pubdate>
            <volume>3</volume>
            <fpage>14</fpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmpid" link="fulltext">12028595</pubid>
                  <pubid idtype="doi">10.1186/1471-2105-3-14</pubid>
                  <pubid idtype="pmcid">116988</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B8">
            <title>
               <p>Automated ortholog inference from phylogenetic trees and calculation of orthology reliability</p>
            </title>
            <aug>
               <au>
                  <snm>Storm</snm>
                  <fnm>CE</fnm>
               </au>
               <au>
                  <snm>Sonnhammer</snm>
                  <fnm>EL</fnm>
               </au>
            </aug>
            <source>Bioinformatics</source>
            <pubdate>2002</pubdate>
            <volume>18</volume>
            <fpage>92</fpage>
            <lpage>99</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmpid" link="fulltext">11836216</pubid>
                  <pubid idtype="doi">10.1093/bioinformatics/18.1.92</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B9">
            <title>
               <p>A genomic perspective on protein families</p>
            </title>
            <aug>
               <au>
                  <snm>Tatusov</snm>
                  <fnm>RL</fnm>
               </au>
               <au>
                  <snm>Koonin</snm>
                  <fnm>EV</fnm>
               </au>
               <au>
                  <snm>Lipman</snm>
                  <fnm>DJ</fnm>
               </au>
            </aug>
            <source>Science</source>
            <pubdate>1997</pubdate>
            <volume>278</volume>
            <fpage>631</fpage>
            <lpage>637</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmpid" link="fulltext">9381173</pubid>
                  <pubid idtype="doi">10.1126/science.278.5338.631</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B10">
            <title>
               <p>Measuring genome evolution</p>
            </title>
            <aug>
               <au>
                  <snm>Huynen</snm>
                  <fnm>MA</fnm>
               </au>
               <au>
                  <snm>Bork</snm>
                  <fnm>P</fnm>
               </au>
            </aug>
            <source>Proc Natl Acad Sci U S A</source>
            <pubdate>1998</pubdate>
            <volume>95</volume>
            <fpage>5849</fpage>
            <lpage>5856</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmpid" link="fulltext">9600883</pubid>
                  <pubid idtype="doi">10.1073/pnas.95.11.5849</pubid>
                  <pubid idtype="pmcid">34486</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B11">
            <title>
               <p>Gene content phylogeny of herpesviruses</p>
            </title>
            <aug>
               <au>
                  <snm>Montague</snm>
                  <fnm>MG</fnm>
               </au>
               <au>
                  <snm>Hutchison</snm>
                  <fnm>CA</fnm>
                  <suf>3rd</suf>
               </au>
            </aug>
            <source>Proc Natl Acad Sci U S A</source>
            <pubdate>2000</pubdate>
            <volume>97</volume>
            <fpage>5334</fpage>
            <lpage>5339</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmpid" link="fulltext">10805793</pubid>
                  <pubid idtype="doi">10.1073/pnas.97.10.5334</pubid>
                  <pubid idtype="pmcid">25829</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B12">
            <title>
               <p>Lineage-specific gene expansions in bacterial and archaeal genomes</p>
            </title>
            <aug>
               <au>
                  <snm>Jordan</snm>
                  <fnm>IK</fnm>
               </au>
               <au>
                  <snm>Makarova</snm>
                  <fnm>KS</fnm>
               </au>
               <au>
                  <snm>Spouge</snm>
                  <fnm>JL</fnm>
               </au>
               <au>
                  <snm>Wolf</snm>
                  <fnm>YI</fnm>
               </au>
               <au>
                  <snm>Koonin</snm>
                  <fnm>EV</fnm>
               </au>
            </aug>
            <source>Genome Res</source>
            <pubdate>2001</pubdate>
            <volume>11</volume>
            <fpage>555</fpage>
            <lpage>565</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmpid" link="fulltext">11282971</pubid>
                  <pubid idtype="doi">10.1101/gr.GR-1660R</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B13">
            <title>
               <p>Automatic clustering of orthologs and in-paralogs from pairwise species comparisons</p>
            </title>
            <aug>
               <au>
                  <snm>Remm</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Storm</snm>
                  <fnm>CE</fnm>
               </au>
               <au>
                  <snm>Sonnhammer</snm>
                  <fnm>EL</fnm>
               </au>
            </aug>
            <source>J Mol Biol</source>
            <pubdate>2001</pubdate>
            <volume>314</volume>
            <fpage>1041</fpage>
            <lpage>1052</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmpid" link="fulltext">11743721</pubid>
                  <pubid idtype="doi">10.1006/jmbi.2000.5197</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B14">
            <title>
               <p>The role of lineage-specific gene family expansion in the evolution of eukaryotes</p>
            </title>
            <aug>
               <au>
                  <snm>Lespinet</snm>
                  <fnm>O</fnm>
               </au>
               <au>
                  <snm>Wolf</snm>
                  <fnm>YI</fnm>
               </au>
               <au>
                  <snm>Koonin</snm>
                  <fnm>EV</fnm>
               </au>
               <au>
                  <snm>Aravind</snm>
                  <fnm>L</fnm>
               </au>
            </aug>
            <source>Genome Res</source>
            <pubdate>2002</pubdate>
            <volume>12</volume>
            <fpage>1048</fpage>
            <lpage>1059</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmpid" link="fulltext">12097341</pubid>
                  <pubid idtype="doi">10.1101/gr.174302</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B15">
            <title>
               <p>The COG database: new developments in phylogenetic classification of proteins from complete genomes</p>
            </title>
            <aug>
               <au>
                  <snm>Tatusov</snm>
                  <fnm>RL</fnm>
               </au>
               <au>
                  <snm>Natale</snm>
                  <fnm>DA</fnm>
               </au>
               <au>
                  <snm>Garkavtsev</snm>
                  <fnm>IV</fnm>
               </au>
               <au>
                  <snm>Tatusova</snm>
                  <fnm>TA</fnm>
               </au>
               <au>
                  <snm>Shankavaram</snm>
                  <fnm>UT</fnm>
               </au>
               <au>
                  <snm>Rao</snm>
                  <fnm>BS</fnm>
               </au>
               <au>
                  <snm>Kiryutin</snm>
                  <fnm>B</fnm>
               </au>
               <au>
                  <snm>Galperin</snm>
                  <fnm>MY</fnm>
               </au>
               <au>
                  <snm>Fedorova</snm>
                  <fnm>ND</fnm>
               </au>
               <au>
                  <snm>Koonin</snm>
                  <fnm>EV</fnm>
               </au>
            </aug>
            <source>Nucleic Acids Res</source>
            <pubdate>2001</pubdate>
            <volume>29</volume>
            <fpage>22</fpage>
            <lpage>28</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmpid" link="fulltext">11125040</pubid>
                  <pubid idtype="doi">10.1093/nar/29.1.22</pubid>
                  <pubid idtype="pmcid">29819</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B16">
            <title>
               <p>The complete genome of hyperthermophile Methanopyrus kandleri AV19 and monophyly of archaeal methanogens</p>
            </title>
            <aug>
               <au>
                  <snm>Slesarev</snm>
                  <fnm>AI</fnm>
               </au>
               <au>
                  <snm>Mezhevaya</snm>
                  <fnm>KV</fnm>
               </au>
               <au>
                  <snm>Makarova</snm>
                  <fnm>KS</fnm>
               </au>
               <au>
                  <snm>Polushin</snm>
                  <fnm>NN</fnm>
               </au>
               <au>
                  <snm>Shcherbinina</snm>
                  <fnm>OV</fnm>
               </au>
               <au>
                  <snm>Shakhova</snm>
                  <fnm>VV</fnm>
               </au>
               <au>
                  <snm>Belova</snm>
                  <fnm>GI</fnm>
               </au>
               <au>
                  <snm>Aravind</snm>
                  <fnm>L</fnm>
               </au>
               <au>
                  <snm>Natale</snm>
                  <fnm>DA</fnm>
               </au>
               <au>
                  <snm>Rogozin</snm>
                  <fnm>IB</fnm>
               </au>
               <au>
                  <snm>Tatusov</snm>
                  <fnm>RL</fnm>
               </au>
               <au>
                  <snm>Wolf</snm>
                  <fnm>YI</fnm>
               </au>
               <au>
                  <snm>Stetter</snm>
                  <fnm>KO</fnm>
               </au>
               <au>
                  <snm>Malykh</snm>
                  <fnm>AG</fnm>
               </au>
               <au>
                  <snm>Koonin</snm>
                  <fnm>EV</fnm>
               </au>
               <au>
                  <snm>Kozyavkin</snm>
                  <fnm>SA</fnm>
               </au>
            </aug>
            <source>Proc Natl Acad Sci U S A</source>
            <pubdate>2002</pubdate>
            <volume>99</volume>
            <fpage>4644</fpage>
            <lpage>4649</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmpid" link="fulltext">11930014</pubid>
                  <pubid idtype="doi">10.1073/pnas.032671499</pubid>
                  <pubid idtype="pmcid">123701</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B17">
            <title>
               <p>Genome annotation using clusters of orthologous groups of proteins (COGs) &#8211; towards understanding the first genome of a Crenarchaeon</p>
            </title>
            <aug>
               <au>
                  <snm>Natale</snm>
                  <fnm>DA</fnm>
               </au>
               <au>
                  <snm>Shankavaram</snm>
                  <fnm>UT</fnm>
               </au>
               <au>
                  <snm>Galperin</snm>
                  <fnm>MY</fnm>
               </au>
               <au>
                  <snm>Wolf</snm>
                  <fnm>YI</fnm>
               </au>
               <au>
                  <snm>Aravind</snm>
                  <fnm>L</fnm>
               </au>
               <au>
                  <snm>Koonin</snm>
                  <fnm>EV</fnm>
               </au>
            </aug>
            <source>Genome Biology</source>
            <pubdate>2001</pubdate>
            <volume>5</volume>
            <fpage>RESEARCH0009</fpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">15027</pubid>
                  <pubid idtype="pmpid" link="fulltext">11178258</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B18">
            <title>
               <p>Genome sequence and comparative analysis of the solvent-producing bacterium Clostridium acetobutylicum</p>
            </title>
            <aug>
               <au>
                  <snm>Nolling</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Breton</snm>
                  <fnm>G</fnm>
               </au>
               <au>
                  <snm>Omelchenko</snm>
                  <fnm>MV</fnm>
               </au>
               <au>
                  <snm>Makarova</snm>
                  <fnm>KS</fnm>
               </au>
               <au>
                  <snm>Zeng</snm>
                  <fnm>Q</fnm>
               </au>
               <au>
                  <snm>Gibson</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>Lee</snm>
                  <fnm>HM</fnm>
               </au>
               <au>
                  <snm>Dubois</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Qiu</snm>
                  <fnm>D</fnm>
               </au>
               <au>
                  <snm>Hitti</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Wolf</snm>
                  <fnm>YI</fnm>
               </au>
               <au>
                  <snm>Tatusov</snm>
                  <fnm>RL</fnm>
               </au>
               <au>
                  <snm>Sabathe</snm>
                  <fnm>F</fnm>
               </au>
               <au>
                  <snm>Doucette-Stamm</snm>
                  <fnm>L</fnm>
               </au>
               <au>
                  <snm>Soucaille</snm>
                  <fnm>P</fnm>
               </au>
               <au>
                  <snm>Daly</snm>
                  <fnm>MJ</fnm>
               </au>
               <au>
                  <snm>Bennett</snm>
                  <fnm>GN</fnm>
               </au>
               <au>
                  <snm>Koonin</snm>
                  <fnm>EV</fnm>
               </au>
               <au>
                  <snm>Smith</snm>
                  <fnm>DR</fnm>
               </au>
            </aug>
            <source>J Bacteriol</source>
            <pubdate>2001</pubdate>
            <volume>183</volume>
            <fpage>4823</fpage>
            <lpage>4838</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmpid" link="fulltext">11466286</pubid>
                  <pubid idtype="doi">10.1128/JB.183.16.4823-4838.2001</pubid>
                  <pubid idtype="pmcid">99537</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B19">
            <title>
               <p>Complete genome sequence of Salmonella enterica serovar Typhimurium LT2</p>
            </title>
            <aug>
               <au>
                  <snm>McClelland</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Sanderson</snm>
                  <fnm>KE</fnm>
               </au>
               <au>
                  <snm>Spieth</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Clifton</snm>
                  <fnm>SW</fnm>
               </au>
               <au>
                  <snm>Latreille</snm>
                  <fnm>P</fnm>
               </au>
               <au>
                  <snm>Courtney</snm>
                  <fnm>L</fnm>
               </au>
               <au>
                  <snm>Porwollik</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Ali</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Dante</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Du</snm>
                  <fnm>F</fnm>
               </au>
               <au>
                  <snm>Hou</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Layman</snm>
                  <fnm>D</fnm>
               </au>
               <au>
                  <snm>Leonard</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Nguyen</snm>
                  <fnm>C</fnm>
               </au>
               <au>
                  <snm>Scott</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>Holmes</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Grewal</snm>
                  <fnm>N</fnm>
               </au>
               <au>
                  <snm>Mulvaney</snm>
                  <fnm>E</fnm>
               </au>
               <au>
                  <snm>Ryan</snm>
                  <fnm>E</fnm>
               </au>
               <au>
                  <snm>Sun</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Florea</snm>
                  <fnm>L</fnm>
               </au>
               <au>
                  <snm>Miller</snm>
                  <fnm>W</fnm>
               </au>
               <au>
                  <snm>Stoneking</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Nhan</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Waterston</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>Wilson</snm>
                  <fnm>RK</fnm>
               </au>
            </aug>
            <source>Nature</source>
            <pubdate>2001</pubdate>
            <volume>413</volume>
            <fpage>852</fpage>
            <lpage>856</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmpid" link="fulltext">11677609</pubid>
                  <pubid idtype="doi">10.1038/35101614</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B20">
            <title>
               <p>Genome of the extremely radiation-resistant bacterium Deinococcus radiodurans viewed from the perspective of comparative genomics</p>
            </title>
            <aug>
               <au>
                  <snm>Makarova</snm>
                  <fnm>KS</fnm>
               </au>
               <au>
                  <snm>Aravind</snm>
                  <fnm>L</fnm>
               </au>
               <au>
                  <snm>Wolf</snm>
                  <fnm>YI</fnm>
               </au>
               <au>
                  <snm>Tatusov</snm>
                  <fnm>RL</fnm>
               </au>
               <au>
                  <snm>Minton</snm>
                  <fnm>KW</fnm>
               </au>
               <au>
                  <snm>Koonin</snm>
                  <fnm>EV</fnm>
               </au>
               <au>
                  <snm>Daly</snm>
                  <fnm>MJ</fnm>
               </au>
            </aug>
            <source>Microbiol Mol Biol Rev</source>
            <pubdate>2001</pubdate>
            <volume>65</volume>
            <fpage>44</fpage>
            <lpage>79</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmpid" link="fulltext">11238985</pubid>
                  <pubid idtype="doi">10.1128/MMBR.65.1.44-79.2001</pubid>
                  <pubid idtype="pmcid">99018</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B21">
            <title>
               <p>Constant relative rate of protein evolution and detection of functional diversification among bacterial, archaeal and eukaryotic proteins</p>
            </title>
            <aug>
               <au>
                  <snm>Jordan</snm>
                  <fnm>IK</fnm>
               </au>
               <au>
                  <snm>Kondrashov</snm>
                  <fnm>FA</fnm>
               </au>
               <au>
                  <snm>Rogozin</snm>
                  <fnm>IB</fnm>
               </au>
               <au>
                  <snm>Tatusov</snm>
                  <fnm>RL</fnm>
               </au>
               <au>
                  <snm>Wolf</snm>
                  <fnm>YI</fnm>
               </au>
               <au>
                  <snm>Koonin</snm>
                  <fnm>EV</fnm>
               </au>
            </aug>
            <source>Genome Biol</source>
            <pubdate>2001</pubdate>
            <volume>2</volume>
            <fpage>RESEARCH0053</fpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmpid" link="fulltext">11790256</pubid>
                  <pubid idtype="doi">10.1186/gb-2001-2-12-research0053</pubid>
                  <pubid idtype="pmcid">64838</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B22">
            <title>
               <p>Genes linked by fusion events are generally of the same functional category: A systematic analysis of 30 microbial genomes</p>
            </title>
            <aug>
               <au>
                  <snm>Yanai</snm>
                  <fnm>I</fnm>
               </au>
               <au>
                  <snm>Derti</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>DeLisi</snm>
                  <fnm>C</fnm>
               </au>
            </aug>
            <source>Proc Natl Acad Sci U S A</source>
            <pubdate>2001</pubdate>
            <volume>98</volume>
            <fpage>7940</fpage>
            <lpage>7945</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmpid" link="fulltext">11438739</pubid>
                  <pubid idtype="doi">10.1073/pnas.141236298</pubid>
                  <pubid idtype="pmcid">35447</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B23">
            <title>
               <p>Genome evolution at the genus level: comparison of three complete genomes of hyperthermophilic archaea</p>
            </title>
            <aug>
               <au>
                  <snm>Lecompte</snm>
                  <fnm>O</fnm>
               </au>
               <au>
                  <snm>Ripp</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>Puzos-Barbe</snm>
                  <fnm>V</fnm>
               </au>
               <au>
                  <snm>Duprat</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Heilig</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>Dietrich</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Thierry</snm>
                  <fnm>JC</fnm>
               </au>
               <au>
                  <snm>Poch</snm>
                  <fnm>O</fnm>
               </au>
            </aug>
            <source>Genome Res</source>
            <pubdate>2001</pubdate>
            <volume>11</volume>
            <fpage>981</fpage>
            <lpage>993</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmpid" link="fulltext">11381026</pubid>
                  <pubid idtype="doi">10.1101/gr.GR1653R</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B24">
            <title>
               <p>Horizontal gene transfer in prokaryotes: quantification and classification</p>
            </title>
            <aug>
               <au>
                  <snm>Koonin</snm>
                  <fnm>EV</fnm>
               </au>
               <au>
                  <snm>Makarova</snm>
                  <fnm>KS</fnm>
               </au>
               <au>
                  <snm>Aravind</snm>
                  <fnm>L</fnm>
               </au>
            </aug>
            <source>Annu Rev Microbiol</source>
            <pubdate>2001</pubdate>
            <volume>55</volume>
            <fpage>709</fpage>
            <lpage>742</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmpid" link="fulltext">11544372</pubid>
                  <pubid idtype="doi">10.1146/annurev.micro.55.1.709</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B25">
            <title>
               <p>Essential genes are more evolutionarily conserved than are nonessential genes in bacteria</p>
            </title>
            <aug>
               <au>
                  <snm>Jordan</snm>
                  <fnm>IK</fnm>
               </au>
               <au>
                  <snm>Rogozin</snm>
                  <fnm>IB</fnm>
               </au>
               <au>
                  <snm>Wolf</snm>
                  <fnm>YI</fnm>
               </au>
               <au>
                  <snm>Koonin</snm>
                  <fnm>EV</fnm>
               </au>
            </aug>
            <source>Genome Res</source>
            <pubdate>2002</pubdate>
            <volume>12</volume>
            <fpage>962</fpage>
            <lpage>968</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmpid" link="fulltext">12045149</pubid>
                  <pubid idtype="doi">10.1101/gr.87702. Article published online before print in May 2002</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B26">
            <title>
               <p>Genome evolution in bacterial endosymbionts of insects</p>
            </title>
            <aug>
               <au>
                  <snm>Wernegreen</snm>
                  <fnm>JJ</fnm>
               </au>
            </aug>
            <source>Nat Rev Genet</source>
            <pubdate>2002</pubdate>
            <volume>3</volume>
            <fpage>850</fpage>
            <lpage>861</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmpid" link="fulltext">12415315</pubid>
                  <pubid idtype="doi">10.1038/nrg931</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B27">
            <title>
               <p>Borrelia genomes in the year 2000</p>
            </title>
            <aug>
               <au>
                  <snm>Casjens</snm>
                  <fnm>S</fnm>
               </au>
            </aug>
            <source>J Mol Microbiol Biotechnol</source>
            <pubdate>2000</pubdate>
            <volume>2</volume>
            <fpage>401</fpage>
            <lpage>410</lpage>
            <xrefbib>
               <pubid idtype="pmpid">11075912</pubid>
            </xrefbib>
         </bibl>
         <bibl id="B28">
            <title>
               <p>Comparison of archaeal and bacterial genomes: computer analysis of protein sequences predicts novel functions and suggests a chimeric origin for the archaea</p>
            </title>
            <aug>
               <au>
                  <snm>Koonin</snm>
                  <fnm>EV</fnm>
               </au>
               <au>
                  <snm>Mushegian</snm>
                  <fnm>AR</fnm>
               </au>
               <au>
                  <snm>Galperin</snm>
                  <fnm>MY</fnm>
               </au>
               <au>
                  <snm>Walker</snm>
                  <fnm>DR</fnm>
               </au>
            </aug>
            <source>Mol Microbiol</source>
            <pubdate>1997</pubdate>
            <volume>25</volume>
            <fpage>619</fpage>
            <lpage>637</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1046/j.1365-2958.1997.4821861.x</pubid>
                  <pubid idtype="pmpid">9379893</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B29">
            <title>
               <p>Genomes in flux: the evolution of archaeal and proteobacterial gene content</p>
            </title>
            <aug>
               <au>
                  <snm>Snel</snm>
                  <fnm>B</fnm>
               </au>
               <au>
                  <snm>Bork</snm>
                  <fnm>P</fnm>
               </au>
               <au>
                  <snm>Huynen</snm>
                  <fnm>MA</fnm>
               </au>
            </aug>
            <source>Genome Res</source>
            <pubdate>2002</pubdate>
            <volume>12</volume>
            <fpage>17</fpage>
            <lpage>25</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmpid" link="fulltext">11779827</pubid>
                  <pubid idtype="doi">10.1101/gr.176501</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B30">
            <title>
               <p>Genome trees constructed using five different approaches suggest new major bacterial clades</p>
            </title>
            <aug>
               <au>
                  <snm>Wolf</snm>
                  <fnm>YI</fnm>
               </au>
               <au>
                  <snm>Rogozin</snm>
                  <fnm>IB</fnm>
               </au>
               <au>
                  <snm>Grishin</snm>
                  <fnm>NV</fnm>
               </au>
               <au>
                  <snm>Tatusov</snm>
                  <fnm>RL</fnm>
               </au>
               <au>
                  <snm>Koonin</snm>
                  <fnm>EV</fnm>
               </au>
            </aug>
            <source>BMC Evol Biol</source>
            <pubdate>2001</pubdate>
            <volume>1</volume>
            <fpage>8</fpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmpid" link="fulltext">11734060</pubid>
                  <pubid idtype="doi">10.1186/1471-2148-1-8</pubid>
                  <pubid idtype="pmcid">60490</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B31">
            <title>
               <p>Algorithms for computing parsimonious evolutionary scenarios for genome evolution, the last universal common ancestor and dominance of horizontal gene transfer in the evolution of prokaryotes</p>
            </title>
            <aug>
               <au>
                  <snm>Mirkin</snm>
                  <fnm>BG</fnm>
               </au>
               <au>
                  <snm>Fenner</snm>
                  <fnm>TI</fnm>
               </au>
               <au>
                  <snm>Galperin</snm>
                  <fnm>MY</fnm>
               </au>
               <au>
                  <snm>Koonin</snm>
                  <fnm>EV</fnm>
               </au>
            </aug>
            <source>BMC Evol Biol</source>
            <pubdate>2003</pubdate>
            <volume>3</volume>
            <fpage>2</fpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmpid" link="fulltext">12515582</pubid>
                  <pubid idtype="doi">10.1186/1471-2148-3-2</pubid>
                  <pubid idtype="pmcid">149225</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B32">
            <title>
               <p>Initial sequencing and analysis of the human genome</p>
            </title>
            <aug>
               <au>
                  <snm>Lander</snm>
                  <fnm>ES</fnm>
               </au>
               <au>
                  <snm>Linton</snm>
                  <fnm>LM</fnm>
               </au>
               <au>
                  <snm>Birren</snm>
                  <fnm>B</fnm>
               </au>
               <au>
                  <snm>Nusbaum</snm>
                  <fnm>C</fnm>
               </au>
               <au>
                  <snm>Zody</snm>
                  <fnm>MC</fnm>
               </au>
               <au>
                  <snm>Baldwin</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Devon</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>Dewar</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>Doyle</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>FitzHugh</snm>
                  <fnm>W</fnm>
               </au>
               <au>
                  <snm>Funke</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>Gage</snm>
                  <fnm>D</fnm>
               </au>
               <au>
                  <snm>Harris</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>Heaford</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Howland</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Kann</snm>
                  <fnm>L</fnm>
               </au>
               <au>
                  <snm>Lehoczky</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>LeVine</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>McEwan</snm>
                  <fnm>P</fnm>
               </au>
               <au>
                  <snm>McKernan</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>Meldrim</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Mesirov</snm>
                  <fnm>JP</fnm>
               </au>
               <au>
                  <snm>Miranda</snm>
                  <fnm>C</fnm>
               </au>
               <au>
                  <snm>Morris</snm>
                  <fnm>W</fnm>
               </au>
               <au>
                  <snm>Naylor</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Raymond</snm>
                  <fnm>C</fnm>
               </au>
               <au>
                  <snm>Rosetti</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Santos</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>Sheridan</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Sougnez</snm>
                  <fnm>C</fnm>
               </au>
               <au>
                  <snm>Stange-Thomann</snm>
                  <fnm>N</fnm>
               </au>
               <au>
                  <snm>Stojanovic</snm>
                  <fnm>N</fnm>
               </au>
               <au>
                  <snm>Subramanian</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Wyman</snm>
                  <fnm>D</fnm>
               </au>
               <au>
                  <snm>Rogers</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Sulston</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Ainscough</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>Beck</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Bentley</snm>
                  <fnm>D</fnm>
               </au>
               <au>
                  <snm>Burton</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Clee</snm>
                  <fnm>C</fnm>
               </au>
               <au>
                  <snm>Carter</snm>
                  <fnm>N</fnm>
               </au>
               <au>
                  <snm>Coulson</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Deadman</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>Deloukas</snm>
                  <fnm>P</fnm>
               </au>
               <au>
                  <snm>Dunham</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Dunham</snm>
                  <fnm>I</fnm>
               </au>
               <au>
                  <snm>Durbin</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>French</snm>
                  <fnm>L</fnm>
               </au>
               <au>
                  <snm>Grafham</snm>
                  <fnm>D</fnm>
               </au>
               <au>
                  <snm>Gregory</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Hubbard</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Humphray</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Hunt</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Jones</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Lloyd</snm>
                  <fnm>C</fnm>
               </au>
               <au>
                  <snm>McMurray</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Matthews</snm>
                  <fnm>L</fnm>
               </au>
               <au>
                  <snm>Mercer</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Milne</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Mullikin</snm>
                  <fnm>JC</fnm>
               </au>
               <au>
                  <snm>Mungall</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Plumb</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>Ross</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Shownkeen</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>Sims</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Waterston</snm>
                  <fnm>RH</fnm>
               </au>
               <au>
                  <snm>Wilson</snm>
                  <fnm>RK</fnm>
               </au>
               <au>
                  <snm>Hillier</snm>
                  <fnm>LW</fnm>
               </au>
               <au>
                  <snm>McPherson</snm>
                  <fnm>JD</fnm>
               </au>
               <au>
                  <snm>Marra</snm>
                  <fnm>MA</fnm>
               </au>
               <au>
                  <snm>Mardis</snm>
                  <fnm>ER</fnm>
               </au>
               <au>
                  <snm>Fulton</snm>
                  <fnm>LA</fnm>
               </au>
               <au>
                  <snm>Chinwalla</snm>
                  <fnm>AT</fnm>
               </au>
               <au>
                  <snm>Pepin</snm>
                  <fnm>KH</fnm>
               </au>
               <au>
                  <snm>Gish</snm>
                  <fnm>WR</fnm>
               </au>
               <au>
                  <snm>Chissoe</snm>
                  <fnm>SL</fnm>
               </au>
               <au>
                  <snm>Wendl</snm>
                  <fnm>MC</fnm>
               </au>
               <au>
                  <snm>Delehaunty</snm>
                  <fnm>KD</fnm>
               </au>
               <au>
                  <snm>Miner</snm>
                  <fnm>TL</fnm>
               </au>
               <au>
                  <snm>Delehaunty</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Kramer</snm>
                  <fnm>JB</fnm>
               </au>
               <au>
                  <snm>Cook</snm>
                  <fnm>LL</fnm>
               </au>
               <au>
                  <snm>Fulton</snm>
                  <fnm>RS</fnm>
               </au>
               <au>
                  <snm>Johnson</snm>
                  <fnm>DL</fnm>
               </au>
               <au>
                  <snm>Minx</snm>
                  <fnm>PJ</fnm>
               </au>
               <au>
                  <snm>Clifton</snm>
                  <fnm>SW</fnm>
               </au>
               <au>
                  <snm>Hawkins</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Branscomb</snm>
                  <fnm>E</fnm>
               </au>
               <au>
                  <snm>Predki</snm>
                  <fnm>P</fnm>
               </au>
               <au>
                  <snm>Richardson</snm>
                  <fnm>P</fnm>
               </au>
               <au>
                  <snm>Wenning</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Slezak</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Doggett</snm>
                  <fnm>N</fnm>
               </au>
               <au>
                  <snm>Cheng</snm>
                  <fnm>JF</fnm>
               </au>
               <au>
                  <snm>Olsen</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Lucas</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Elkin</snm>
                  <fnm>C</fnm>
               </au>
               <au>
                  <snm>Uberbacher</snm>
                  <fnm>E</fnm>
               </au>
               <au>
                  <snm>Frazier</snm>
                  <fnm>M</fnm>
               </au>
               <etal/>
            </aug>
            <source>Nature</source>
            <pubdate>2001</pubdate>
            <volume>409</volume>
            <fpage>860</fpage>
            <lpage>921</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmpid" link="fulltext">11237011</pubid>
                  <pubid idtype="doi">10.1038/35057062</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B33">
            <title>
               <p>The genome sequence of Drosophila melanogaster</p>
            </title>
            <aug>
               <au>
                  <snm>Adams</snm>
                  <fnm>MD</fnm>
               </au>
               <au>
                  <snm>Celniker</snm>
                  <fnm>SE</fnm>
               </au>
               <au>
                  <snm>Holt</snm>
                  <fnm>RA</fnm>
               </au>
               <au>
                  <snm>Evans</snm>
                  <fnm>CA</fnm>
               </au>
               <au>
                  <snm>Gocayne</snm>
                  <fnm>JD</fnm>
               </au>
               <au>
                  <snm>Amanatides</snm>
                  <fnm>PG</fnm>
               </au>
               <au>
                  <snm>Scherer</snm>
                  <fnm>SE</fnm>
               </au>
               <au>
                  <snm>Li</snm>
                  <fnm>PW</fnm>
               </au>
               <au>
                  <snm>Hoskins</snm>
                  <fnm>RA</fnm>
               </au>
               <au>
                  <snm>Galle</snm>
                  <fnm>RF</fnm>
               </au>
               <etal/>
            </aug>
            <source>Science</source>
            <pubdate>2000</pubdate>
            <volume>287</volume>
            <fpage>2185</fpage>
            <lpage>2195</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmpid" link="fulltext">10731132</pubid>
                  <pubid idtype="doi">10.1126/science.287.5461.2185</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B34">
            <title>
               <p>Genome sequence of the nematode C. elegans: a platform for investigating biology</p>
            </title>
            <aug>
               <au>
                  <cnm>Consortium TCeS</cnm>
               </au>
            </aug>
            <source>Science</source>
            <pubdate>1998</pubdate>
            <volume>282</volume>
            <fpage>2012</fpage>
            <lpage>2018</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmpid" link="fulltext">9851916</pubid>
                  <pubid idtype="doi">10.1126/science.282.5396.2012</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B35">
            <title>
               <p>TAG Analysis of the genome sequence of the flowering plant Arabidopsis thaliana</p>
            </title>
            <aug>
               <au>
                  <cnm>Initiative</cnm>
               </au>
            </aug>
            <source>Nature</source>
            <pubdate>2000</pubdate>
            <volume>408</volume>
            <fpage>796</fpage>
            <lpage>815</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmpid" link="fulltext">11130711</pubid>
                  <pubid idtype="doi">10.1038/35048692</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B36">
            <title>
               <p>Life with 6000 genes</p>
            </title>
            <aug>
               <au>
                  <snm>Goffeau</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Barrell</snm>
                  <fnm>BG</fnm>
               </au>
               <au>
                  <snm>Bussey</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Davis</snm>
                  <fnm>RW</fnm>
               </au>
               <au>
                  <snm>Dujon</snm>
                  <fnm>B</fnm>
               </au>
               <au>
                  <snm>Feldmann</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Galibert</snm>
                  <fnm>F</fnm>
               </au>
               <au>
                  <snm>Hoheisel</snm>
                  <fnm>JD</fnm>
               </au>
               <au>
                  <snm>Jacq</snm>
                  <fnm>C</fnm>
               </au>
               <au>
                  <snm>Johnston</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Louis</snm>
                  <fnm>EJ</fnm>
               </au>
               <au>
                  <snm>Mewes</snm>
                  <fnm>HW</fnm>
               </au>
               <au>
                  <snm>Murakami</snm>
                  <fnm>Y</fnm>
               </au>
               <au>
                  <snm>Philippsen</snm>
                  <fnm>P</fnm>
               </au>
               <au>
                  <snm>Tettelin</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Oliver</snm>
                  <fnm>SG</fnm>
               </au>
            </aug>
            <source>Science</source>
            <pubdate>1996</pubdate>
            <volume>274</volume>
            <fpage>563</fpage>
            <lpage>567</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmpid" link="fulltext">8849441</pubid>
                  <pubid idtype="doi">10.1126/science.274.5287.546</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B37">
            <title>
               <p>The genome sequence of Schizosaccharomyces pombe</p>
            </title>
            <aug>
               <au>
                  <snm>Wood</snm>
                  <fnm>V</fnm>
               </au>
               <au>
                  <snm>Gwilliam</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>Rajandream</snm>
                  <fnm>MA</fnm>
               </au>
               <au>
                  <snm>Lyne</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Lyne</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>Stewart</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Sgouros</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Peat</snm>
                  <fnm>N</fnm>
               </au>
               <au>
                  <snm>Hayles</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Baker</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Basham</snm>
                  <fnm>D</fnm>
               </au>
               <au>
                  <snm>Bowman</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Brooks</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>Brown</snm>
                  <fnm>D</fnm>
               </au>
               <au>
                  <snm>Brown</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Chillingworth</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Churcher</snm>
                  <fnm>C</fnm>
               </au>
               <au>
                  <snm>Collins</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Connor</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>Cronin</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Davis</snm>
                  <fnm>P</fnm>
               </au>
               <au>
                  <snm>Feltwell</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Fraser</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Gentles</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Goble</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Hamlin</snm>
                  <fnm>N</fnm>
               </au>
               <au>
                  <snm>Harris</snm>
                  <fnm>D</fnm>
               </au>
               <au>
                  <snm>Hidalgo</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Hodgson</snm>
                  <fnm>G</fnm>
               </au>
               <au>
                  <snm>Holroyd</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Hornsby</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Howarth</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Huckle</snm>
                  <fnm>EJ</fnm>
               </au>
               <au>
                  <snm>Hunt</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Jagels</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>James</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>Jones</snm>
                  <fnm>L</fnm>
               </au>
               <au>
                  <snm>Jones</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Leather</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>McDonald</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>McLean</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Mooney</snm>
                  <fnm>P</fnm>
               </au>
               <au>
                  <snm>Moule</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Mungall</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>Murphy</snm>
                  <fnm>L</fnm>
               </au>
               <au>
                  <snm>Niblett</snm>
                  <fnm>D</fnm>
               </au>
               <au>
                  <snm>Odell</snm>
                  <fnm>C</fnm>
               </au>
               <au>
                  <snm>Oliver</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>O'Neil</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Pearson</snm>
                  <fnm>D</fnm>
               </au>
               <au>
                  <snm>Quail</snm>
                  <fnm>MA</fnm>
               </au>
               <au>
                  <snm>Rabbinowitsch</snm>
                  <fnm>E</fnm>
               </au>
               <au>
                  <snm>Rutherford</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>Rutter</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Saunders</snm>
                  <fnm>D</fnm>
               </au>
               <au>
                  <snm>Seeger</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>Sharp</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Skelton</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Simmonds</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Squares</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>Squares</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Stevens</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>Taylor</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>Taylor</snm>
                  <fnm>RG</fnm>
               </au>
               <au>
                  <snm>Tivey</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Walsh</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Warren</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Whitehead</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Woodward</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Volckaert</snm>
                  <fnm>G</fnm>
               </au>
               <au>
                  <snm>Aert</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>Robben</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Grymonprez</snm>
                  <fnm>B</fnm>
               </au>
               <au>
                  <snm>Weltjens</snm>
                  <fnm>I</fnm>
               </au>
               <au>
                  <snm>Vanstreels</snm>
                  <fnm>E</fnm>
               </au>
               <au>
                  <snm>Rieger</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Schafer</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Muller-Auer</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Gabel</snm>
                  <fnm>C</fnm>
               </au>
               <au>
                  <snm>Fuchs</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Dusterhoft</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Fritzc</snm>
                  <fnm>C</fnm>
               </au>
               <au>
                  <snm>Holzer</snm>
                  <fnm>E</fnm>
               </au>
               <au>
                  <snm>Moestl</snm>
                  <fnm>D</fnm>
               </au>
               <au>
                  <snm>Hilbert</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Borzym</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>Langer</snm>
                  <fnm>I</fnm>
               </au>
               <au>
                  <snm>Beck</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Lehrach</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Reinhardt</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>Pohl</snm>
                  <fnm>TM</fnm>
               </au>
               <au>
                  <snm>Eger</snm>
                  <fnm>P</fnm>
               </au>
               <au>
                  <snm>Zimmermann</snm>
                  <fnm>W</fnm>
               </au>
               <au>
                  <snm>Wedler</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Wambutt</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>Purnelle</snm>
                  <fnm>B</fnm>
               </au>
               <au>
                  <snm>Goffeau</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Cadieu</snm>
                  <fnm>E</fnm>
               </au>
               <au>
                  <snm>Dreano</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Gloux</snm>
                  <fnm>S</fnm>
               </au>
               <etal/>
            </aug>
            <source>Nature</source>
            <pubdate>2002</pubdate>
            <volume>415</volume>
            <fpage>871</fpage>
            <lpage>880</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmpid" link="fulltext">11859360</pubid>
                  <pubid idtype="doi">10.1038/nature724</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B38">
            <title>
               <p>Genome sequence and gene compaction of the eukaryote parasite Encephalitozoon cuniculi</p>
            </title>
            <aug>
               <au>
                  <snm>Katinka</snm>
                  <fnm>MD</fnm>
               </au>
               <au>
                  <snm>Duprat</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Cornillot</snm>
                  <fnm>E</fnm>
               </au>
               <au>
                  <snm>Metenier</snm>
                  <fnm>G</fnm>
               </au>
               <au>
                  <snm>Thomarat</snm>
                  <fnm>F</fnm>
               </au>
               <au>
                  <snm>Prensier</snm>
                  <fnm>G</fnm>
               </au>
               <au>
                  <snm>Barbe</snm>
                  <fnm>V</fnm>
               </au>
               <au>
                  <snm>Peyretaillade</snm>
                  <fnm>E</fnm>
               </au>
               <au>
                  <snm>Brottier</snm>
                  <fnm>P</fnm>
               </au>
               <au>
                  <snm>Wincker</snm>
                  <fnm>P</fnm>
               </au>
               <au>
                  <snm>Delbac</snm>
                  <fnm>F</fnm>
               </au>
               <au>
                  <snm>El Alaoui</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Peyret</snm>
                  <fnm>P</fnm>
               </au>
               <au>
                  <snm>Saurin</snm>
                  <fnm>W</fnm>
               </au>
               <au>
                  <snm>Gouy</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Weissenbach</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Vivares</snm>
                  <fnm>CP</fnm>
               </au>
            </aug>
            <source>Nature</source>
            <pubdate>2001</pubdate>
            <volume>414</volume>
            <fpage>450</fpage>
            <lpage>453</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmpid" link="fulltext">11719806</pubid>
                  <pubid idtype="doi">10.1038/35106579</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B39">
            <title>
               <p>The impact of comparative genomics on our understanding of evolution</p>
            </title>
            <aug>
               <au>
                  <snm>Koonin</snm>
                  <fnm>EV</fnm>
               </au>
               <au>
                  <snm>Aravind</snm>
                  <fnm>L</fnm>
               </au>
               <au>
                  <snm>Kondrashov</snm>
                  <fnm>AS</fnm>
               </au>
            </aug>
            <source>Cell</source>
            <pubdate>2000</pubdate>
            <volume>101</volume>
            <fpage>573</fpage>
            <lpage>576</lpage>
            <xrefbib>
               <pubid idtype="pmpid" link="fulltext">10892642</pubid>
            </xrefbib>
         </bibl>
         <bibl id="B40">
            <title>
               <p>CDD: a curated Entrez database of conserved domain alignments</p>
            </title>
            <aug>
               <au>
                  <snm>Marchler-Bauer</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Anderson</snm>
                  <fnm>JB</fnm>
               </au>
               <au>
                  <snm>DeWeese-Scott</snm>
                  <fnm>C</fnm>
               </au>
               <au>
                  <snm>Fedorova</snm>
                  <fnm>ND</fnm>
               </au>
               <au>
                  <snm>Geer</snm>
                  <fnm>LY</fnm>
               </au>
               <au>
                  <snm>He</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Hurwitz</snm>
                  <fnm>DI</fnm>
               </au>
               <au>
                  <snm>Jackson</snm>
                  <fnm>JD</fnm>
               </au>
               <au>
                  <snm>Jacobs</snm>
                  <fnm>AR</fnm>
               </au>
               <au>
                  <snm>Lanczycki</snm>
                  <fnm>CJ</fnm>
               </au>
               <au>
                  <snm>Liebert</snm>
                  <fnm>CA</fnm>
               </au>
               <au>
                  <snm>Liu</snm>
                  <fnm>C</fnm>
               </au>
               <au>
                  <snm>Madej</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Marchler</snm>
                  <fnm>GH</fnm>
               </au>
               <au>
                  <snm>Mazumder</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>Nikolskaya</snm>
                  <fnm>AN</fnm>
               </au>
               <au>
                  <snm>Panchenko</snm>
                  <fnm>AR</fnm>
               </au>
               <au>
                  <snm>Rao</snm>
                  <fnm>BS</fnm>
               </au>
               <au>
                  <snm>Shoemaker</snm>
                  <fnm>BA</fnm>
               </au>
               <au>
                  <snm>Simonyan</snm>
                  <fnm>V</fnm>
               </au>
               <au>
                  <snm>Song</snm>
                  <fnm>JS</fnm>
               </au>
               <au>
                  <snm>Thiessen</snm>
                  <fnm>PA</fnm>
               </au>
               <au>
                  <snm>Vasudevan</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Wang</snm>
                  <fnm>Y</fnm>
               </au>
               <au>
                  <snm>Yamashita</snm>
                  <fnm>RA</fnm>
               </au>
               <au>
                  <snm>Yin</snm>
                  <fnm>JJ</fnm>
               </au>
               <au>
                  <snm>Bryant</snm>
                  <fnm>SH</fnm>
               </au>
            </aug>
            <source>Nucleic Acids Res</source>
            <pubdate>2003</pubdate>
            <volume>31</volume>
            <fpage>383</fpage>
            <lpage>387</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmpid" link="fulltext">12520028</pubid>
                  <pubid idtype="doi">10.1093/nar/gkg087</pubid>
                  <pubid idtype="pmcid">165534</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B41">
            <title>
               <p>Comparative genomics of the Archaea (Euryarchaeota): evolution of conserved protein families, the stable core, and the variable shell</p>
            </title>
            <aug>
               <au>
                  <snm>Makarova</snm>
                  <fnm>KS</fnm>
               </au>
               <au>
                  <snm>Aravind</snm>
                  <fnm>L</fnm>
               </au>
               <au>
                  <snm>Galperin</snm>
                  <fnm>MY</fnm>
               </au>
               <au>
                  <snm>Grishin</snm>
                  <fnm>NV</fnm>
               </au>
               <au>
                  <snm>Tatusov</snm>
                  <fnm>RL</fnm>
               </au>
               <au>
                  <snm>Wolf</snm>
                  <fnm>YI</fnm>
               </au>
               <au>
                  <snm>Koonin</snm>
                  <fnm>EV</fnm>
               </au>
            </aug>
            <source>Genome Res</source>
            <pubdate>1999</pubdate>
            <volume>9</volume>
            <fpage>608</fpage>
            <lpage>628</lpage>
            <xrefbib>
               <pubid idtype="pmpid" link="fulltext">10413400</pubid>
            </xrefbib>
         </bibl>
         <bibl id="B42">
            <title>
               <p>The origin and evolution of model organisms</p>
            </title>
            <aug>
               <au>
                  <snm>Hedges</snm>
                  <fnm>SB</fnm>
               </au>
            </aug>
            <source>Nat Rev Genet</source>
            <pubdate>2002</pubdate>
            <volume>3</volume>
            <fpage>838</fpage>
            <lpage>849</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmpid" link="fulltext">12415314</pubid>
                  <pubid idtype="doi">10.1038/nrg929</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B43">
            <title>
               <p>Neuroglobin and cytoglobin. Fresh blood for the vertebrate globin family</p>
            </title>
            <aug>
               <au>
                  <snm>Pesce</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Bolognesi</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Bocedi</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Ascenzi</snm>
                  <fnm>P</fnm>
               </au>
               <au>
                  <snm>Dewilde</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Moens</snm>
                  <fnm>L</fnm>
               </au>
               <au>
                  <snm>Hankeln</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Burmester</snm>
                  <fnm>T</fnm>
               </au>
            </aug>
            <source>EMBO Rep</source>
            <pubdate>2002</pubdate>
            <volume>3</volume>
            <fpage>1146</fpage>
            <lpage>1151</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmpid" link="fulltext">12475928</pubid>
                  <pubid idtype="doi">10.1093/embo-reports/kvf248</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B44">
            <title>
               <p>Extracting protein alignment models from the sequence database</p>
            </title>
            <aug>
               <au>
                  <snm>Neuwald</snm>
                  <fnm>AF</fnm>
               </au>
               <au>
                  <snm>Liu</snm>
                  <fnm>JS</fnm>
               </au>
               <au>
                  <snm>Lipman</snm>
                  <fnm>DJ</fnm>
               </au>
               <au>
                  <snm>Lawrence</snm>
                  <fnm>CE</fnm>
               </au>
            </aug>
            <source>Nucleic Acids Res</source>
            <pubdate>1997</pubdate>
            <volume>25</volume>
            <fpage>1665</fpage>
            <lpage>1677</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmpid" link="fulltext">9108146</pubid>
                  <pubid idtype="doi">10.1093/nar/25.9.1665</pubid>
                  <pubid idtype="pmcid">146639</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B45">
            <title>
               <p>Nitric-oxide dioxygenase activity and function of flavohemoglobins. sensitivity to nitric oxide and carbon monoxide inhibition</p>
            </title>
            <aug>
               <au>
                  <snm>Gardner</snm>
                  <fnm>PR</fnm>
               </au>
               <au>
                  <snm>Gardner</snm>
                  <fnm>AM</fnm>
               </au>
               <au>
                  <snm>Martin</snm>
                  <fnm>LA</fnm>
               </au>
               <au>
                  <snm>Dou</snm>
                  <fnm>Y</fnm>
               </au>
               <au>
                  <snm>Li</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Olson</snm>
                  <fnm>JS</fnm>
               </au>
               <au>
                  <snm>Zhu</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Riggs</snm>
                  <fnm>AF</fnm>
               </au>
            </aug>
            <source>J Biol Chem</source>
            <pubdate>2000</pubdate>
            <volume>275</volume>
            <fpage>31581</fpage>
            <lpage>31587</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmpid" link="fulltext">10922365</pubid>
                  <pubid idtype="doi">10.1074/jbc.M004141200</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B46">
            <title>
               <p>Gene and context: integrative approaches to genome analysis</p>
            </title>
            <aug>
               <au>
                  <snm>Huynen</snm>
                  <fnm>MJ</fnm>
               </au>
               <au>
                  <snm>Snel</snm>
                  <fnm>B</fnm>
               </au>
            </aug>
            <source>Adv Prot Chem</source>
            <pubdate>2000</pubdate>
            <volume>54</volume>
            <fpage>345</fpage>
            <lpage>379</lpage>
         </bibl>
         <bibl id="B47">
            <title>
               <p>Exploitation of gene context</p>
            </title>
            <aug>
               <au>
                  <snm>Huynen</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Snel</snm>
                  <fnm>B</fnm>
               </au>
               <au>
                  <snm>Lathe</snm>
                  <fnm>W</fnm>
               </au>
               <au>
                  <snm>Bork</snm>
                  <fnm>P</fnm>
               </au>
            </aug>
            <source>Curr Opin Struct Biol</source>
            <pubdate>2000</pubdate>
            <volume>10</volume>
            <fpage>366</fpage>
            <lpage>370</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmpid" link="fulltext">10851194</pubid>
                  <pubid idtype="doi">10.1016/S0959-440X(00)00098-1</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B48">
            <title>
               <p>Genome alignment, evolution of prokaryotic genome organization and prediction of gene function using genomic context</p>
            </title>
            <aug>
               <au>
                  <snm>Wolf</snm>
                  <fnm>YI</fnm>
               </au>
               <au>
                  <snm>Rogozin</snm>
                  <fnm>IB</fnm>
               </au>
               <au>
                  <snm>Kondrashov</snm>
                  <fnm>AS</fnm>
               </au>
               <au>
                  <snm>Koonin</snm>
                  <fnm>EV</fnm>
               </au>
            </aug>
            <source>Genome Res</source>
            <pubdate>2001</pubdate>
            <volume>11</volume>
            <fpage>356</fpage>
            <lpage>372</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmpid" link="fulltext">11230160</pubid>
                  <pubid idtype="doi">10.1101/gr.GR-1619R</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B49">
            <title>
               <p>Archaeal genomics: an overview</p>
            </title>
            <aug>
               <au>
                  <snm>Olsen</snm>
                  <fnm>GJ</fnm>
               </au>
               <au>
                  <snm>Woese</snm>
                  <fnm>CR</fnm>
               </au>
            </aug>
            <source>Cell</source>
            <pubdate>1997</pubdate>
            <volume>89</volume>
            <fpage>991</fpage>
            <lpage>994</lpage>
            <xrefbib>
               <pubid idtype="pmpid" link="fulltext">9215619</pubid>
            </xrefbib>
         </bibl>
         <bibl id="B50">
            <title>
               <p>Non-mitochondrial ATP transport</p>
            </title>
            <aug>
               <au>
                  <snm>Winkler</snm>
                  <fnm>HH</fnm>
               </au>
               <au>
                  <snm>Neuhaus</snm>
                  <fnm>HE</fnm>
               </au>
            </aug>
            <source>Trends Biochem Sci</source>
            <pubdate>1999</pubdate>
            <volume>24</volume>
            <fpage>64</fpage>
            <lpage>68</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmpid" link="fulltext">10098400</pubid>
                  <pubid idtype="doi">10.1016/S0968-0004(98)01334-6</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B51">
            <title>
               <p>Rickettsiae and Chlamydiae: evidence of horizontal gene transfer and gene exchange</p>
            </title>
            <aug>
               <au>
                  <snm>Wolf</snm>
                  <fnm>YI</fnm>
               </au>
               <au>
                  <snm>Aravind</snm>
                  <fnm>L</fnm>
               </au>
               <au>
                  <snm>Koonin</snm>
                  <fnm>EV</fnm>
               </au>
            </aug>
            <source>Trends Genet</source>
            <pubdate>1999</pubdate>
            <volume>15</volume>
            <fpage>173</fpage>
            <lpage>175</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmpid" link="fulltext">10322483</pubid>
                  <pubid idtype="doi">10.1016/S0168-9525(99)01704-7</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B52">
            <title>
               <p>Genome sequence of Yersinia pestis, the causative agent of plague</p>
            </title>
            <aug>
               <au>
                  <snm>Parkhill</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Wren</snm>
                  <fnm>BW</fnm>
               </au>
               <au>
                  <snm>Thomson</snm>
                  <fnm>NR</fnm>
               </au>
               <au>
                  <snm>Titball</snm>
                  <fnm>RW</fnm>
               </au>
               <au>
                  <snm>Holden</snm>
                  <fnm>MT</fnm>
               </au>
               <au>
                  <snm>Prentice</snm>
                  <fnm>MB</fnm>
               </au>
               <au>
                  <snm>Sebaihia</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>James</snm>
                  <fnm>KD</fnm>
               </au>
               <au>
                  <snm>Churcher</snm>
                  <fnm>C</fnm>
               </au>
               <au>
                  <snm>Mungall</snm>
                  <fnm>KL</fnm>
               </au>
               <au>
                  <snm>Baker</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Basham</snm>
                  <fnm>D</fnm>
               </au>
               <au>
                  <snm>Bentley</snm>
                  <fnm>SD</fnm>
               </au>
               <au>
                  <snm>Brooks</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>Cerdeno-Tarraga</snm>
                  <fnm>AM</fnm>
               </au>
               <au>
                  <snm>Chillingworth</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Cronin</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Davies</snm>
                  <fnm>RM</fnm>
               </au>
               <au>
                  <snm>Davis</snm>
                  <fnm>P</fnm>
               </au>
               <au>
                  <snm>Dougan</snm>
                  <fnm>G</fnm>
               </au>
               <au>
                  <snm>Feltwell</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Hamlin</snm>
                  <fnm>N</fnm>
               </au>
               <au>
                  <snm>Holroyd</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Jagels</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>Karlyshev</snm>
                  <fnm>AV</fnm>
               </au>
               <au>
                  <snm>Leather</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Moule</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Oyston</snm>
                  <fnm>PC</fnm>
               </au>
               <au>
                  <snm>Quail</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Rutherford</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>Simmonds</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Skelton</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Stevens</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>Whitehead</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Barrell</snm>
                  <fnm>BG</fnm>
               </au>
            </aug>
            <source>Nature</source>
            <pubdate>2001</pubdate>
            <volume>413</volume>
            <fpage>523</fpage>
            <lpage>527</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmpid" link="fulltext">11586360</pubid>
                  <pubid idtype="doi">10.1038/35097083</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B53">
            <title>
               <p>Initial sequencing and comparative analysis of the mouse genome</p>
            </title>
            <aug>
               <au>
                  <snm>Waterston</snm>
                  <fnm>RH</fnm>
               </au>
               <au>
                  <snm>Lindblad-Toh</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>Birney</snm>
                  <fnm>E</fnm>
               </au>
               <au>
                  <snm>Rogers</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Abril</snm>
                  <fnm>JF</fnm>
               </au>
               <au>
                  <snm>Agarwal</snm>
                  <fnm>P</fnm>
               </au>
               <au>
                  <snm>Agarwala</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>Ainscough</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>Alexandersson</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>An</snm>
                  <fnm>P</fnm>
               </au>
               <au>
                  <snm>Antonarakis</snm>
                  <fnm>SE</fnm>
               </au>
               <au>
                  <snm>Attwood</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Baertsch</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>Bailey</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Barlow</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>Beck</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Berry</snm>
                  <fnm>E</fnm>
               </au>
               <au>
                  <snm>Birren</snm>
                  <fnm>B</fnm>
               </au>
               <au>
                  <snm>Bloom</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Bork</snm>
                  <fnm>P</fnm>
               </au>
               <au>
                  <snm>Botcherby</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Bray</snm>
                  <fnm>N</fnm>
               </au>
               <au>
                  <snm>Brent</snm>
                  <fnm>MR</fnm>
               </au>
               <au>
                  <snm>Brown</snm>
                  <fnm>DG</fnm>
               </au>
               <au>
                  <snm>Brown</snm>
                  <fnm>SD</fnm>
               </au>
               <au>
                  <snm>Bult</snm>
                  <fnm>C</fnm>
               </au>
               <au>
                  <snm>Burton</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Butler</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Campbell</snm>
                  <fnm>RD</fnm>
               </au>
               <au>
                  <snm>Carninci</snm>
                  <fnm>P</fnm>
               </au>
               <au>
                  <snm>Cawley</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Chiaromonte</snm>
                  <fnm>F</fnm>
               </au>
               <au>
                  <snm>Chinwalla</snm>
                  <fnm>AT</fnm>
               </au>
               <au>
                  <snm>Church</snm>
                  <fnm>DM</fnm>
               </au>
               <au>
                  <snm>Clamp</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Clee</snm>
                  <fnm>C</fnm>
               </au>
               <au>
                  <snm>Collins</snm>
                  <fnm>FS</fnm>
               </au>
               <au>
                  <snm>Cook</snm>
                  <fnm>LL</fnm>
               </au>
               <au>
                  <snm>Copley</snm>
                  <fnm>RR</fnm>
               </au>
               <au>
                  <snm>Coulson</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Couronne</snm>
                  <fnm>O</fnm>
               </au>
               <au>
                  <snm>Cuff</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Curwen</snm>
                  <fnm>V</fnm>
               </au>
               <au>
                  <snm>Cutts</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Daly</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>David</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>Davies</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Delehaunty</snm>
                  <fnm>KD</fnm>
               </au>
               <au>
                  <snm>Deri</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Dermitzakis</snm>
                  <fnm>ET</fnm>
               </au>
               <au>
                  <snm>Dewey</snm>
                  <fnm>C</fnm>
               </au>
               <au>
                  <snm>Dickens</snm>
                  <fnm>NJ</fnm>
               </au>
               <au>
                  <snm>Diekhans</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Dodge</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Dubchak</snm>
                  <fnm>I</fnm>
               </au>
               <au>
                  <snm>Dunn</snm>
                  <fnm>DM</fnm>
               </au>
               <au>
                  <snm>Eddy</snm>
                  <fnm>SR</fnm>
               </au>
               <au>
                  <snm>Elnitski</snm>
                  <fnm>L</fnm>
               </au>
               <au>
                  <snm>Emes</snm>
                  <fnm>RD</fnm>
               </au>
               <au>
                  <snm>Eswara</snm>
                  <fnm>P</fnm>
               </au>
               <au>
                  <snm>Eyras</snm>
                  <fnm>E</fnm>
               </au>
               <au>
                  <snm>Felsenfeld</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Fewell</snm>
                  <fnm>GA</fnm>
               </au>
               <au>
                  <snm>Flicek</snm>
                  <fnm>P</fnm>
               </au>
               <au>
                  <snm>Foley</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>Frankel</snm>
                  <fnm>WN</fnm>
               </au>
               <au>
                  <snm>Fulton</snm>
                  <fnm>LA</fnm>
               </au>
               <au>
                  <snm>Fulton</snm>
                  <fnm>RS</fnm>
               </au>
               <au>
                  <snm>Furey</snm>
                  <fnm>TS</fnm>
               </au>
               <au>
                  <snm>Gage</snm>
                  <fnm>D</fnm>
               </au>
               <au>
                  <snm>Gibbs</snm>
                  <fnm>RA</fnm>
               </au>
               <au>
                  <snm>Glusman</snm>
                  <fnm>G</fnm>
               </au>
               <au>
                  <snm>Gnerre</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Goldman</snm>
                  <fnm>N</fnm>
               </au>
               <au>
                  <snm>Goodstadt</snm>
                  <fnm>L</fnm>
               </au>
               <au>
                  <snm>Grafham</snm>
                  <fnm>D</fnm>
               </au>
               <au>
                  <snm>Graves</snm>
                  <fnm>TA</fnm>
               </au>
               <au>
                  <snm>Green</snm>
                  <fnm>ED</fnm>
               </au>
               <au>
                  <snm>Gregory</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Guigo</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>Guyer</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Hardison</snm>
                  <fnm>RC</fnm>
               </au>
               <au>
                  <snm>Haussler</snm>
                  <fnm>D</fnm>
               </au>
               <au>
                  <snm>Hayashizaki</snm>
                  <fnm>Y</fnm>
               </au>
               <au>
                  <snm>Hillier</snm>
                  <fnm>LW</fnm>
               </au>
               <au>
                  <snm>Hinrichs</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Hlavina</snm>
                  <fnm>W</fnm>
               </au>
               <au>
                  <snm>Holzer</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Hsu</snm>
                  <fnm>F</fnm>
               </au>
               <au>
                  <snm>Hua</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Hubbard</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Hunt</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Jackson</snm>
                  <fnm>I</fnm>
               </au>
               <au>
                  <snm>Jaffe</snm>
                  <fnm>DB</fnm>
               </au>
               <au>
                  <snm>Johnson</snm>
                  <fnm>LS</fnm>
               </au>
               <au>
                  <snm>Jones</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Jones</snm>
                  <fnm>TA</fnm>
               </au>
               <au>
                  <snm>Joy</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Kamal</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Karlsson</snm>
                  <fnm>EK</fnm>
               </au>
               <etal/>
            </aug>
            <source>Nature</source>
            <pubdate>2002</pubdate>
            <volume>420</volume>
            <fpage>520</fpage>
            <lpage>562</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmpid" link="fulltext">12466850</pubid>
                  <pubid idtype="doi">10.1038/nature01262</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B54">
            <title>
               <p>Whole-genome shotgun assembly and analysis of the genome of Fugu rubripes</p>
            </title>
            <aug>
               <au>
                  <snm>Aparicio</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Chapman</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Stupka</snm>
                  <fnm>E</fnm>
               </au>
               <au>
                  <snm>Putnam</snm>
                  <fnm>N</fnm>
               </au>
               <au>
                  <snm>Chia</snm>
                  <fnm>JM</fnm>
               </au>
               <au>
                  <snm>Dehal</snm>
                  <fnm>P</fnm>
               </au>
               <au>
                  <snm>Christoffels</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Rash</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Hoon</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Smit</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Gelpke</snm>
                  <fnm>MD</fnm>
               </au>
               <au>
                  <snm>Roach</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Oh</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Ho</snm>
                  <fnm>IY</fnm>
               </au>
               <au>
                  <snm>Wong</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Detter</snm>
                  <fnm>C</fnm>
               </au>
               <au>
                  <snm>Verhoef</snm>
                  <fnm>F</fnm>
               </au>
               <au>
                  <snm>Predki</snm>
                  <fnm>P</fnm>
               </au>
               <au>
                  <snm>Tay</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Lucas</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Richardson</snm>
                  <fnm>P</fnm>
               </au>
               <au>
                  <snm>Smith</snm>
                  <fnm>SF</fnm>
               </au>
               <au>
                  <snm>Clark</snm>
                  <fnm>MS</fnm>
               </au>
               <au>
                  <snm>Edwards</snm>
                  <fnm>YJ</fnm>
               </au>
               <au>
                  <snm>Doggett</snm>
                  <fnm>N</fnm>
               </au>
               <au>
                  <snm>Zharkikh</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Tavtigian</snm>
                  <fnm>SV</fnm>
               </au>
               <au>
                  <snm>Pruss</snm>
                  <fnm>D</fnm>
               </au>
               <au>
                  <snm>Barnstead</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Evans</snm>
                  <fnm>C</fnm>
               </au>
               <au>
                  <snm>Baden</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Powell</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Glusman</snm>
                  <fnm>G</fnm>
               </au>
               <au>
                  <snm>Rowen</snm>
                  <fnm>L</fnm>
               </au>
               <au>
                  <snm>Hood</snm>
                  <fnm>L</fnm>
               </au>
               <au>
                  <snm>Tan</snm>
                  <fnm>YH</fnm>
               </au>
               <au>
                  <snm>Elgar</snm>
                  <fnm>G</fnm>
               </au>
               <au>
                  <snm>Hawkins</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Venkatesh</snm>
                  <fnm>B</fnm>
               </au>
               <au>
                  <snm>Rokhsar</snm>
                  <fnm>D</fnm>
               </au>
               <au>
                  <snm>Brenner</snm>
                  <fnm>S</fnm>
               </au>
            </aug>
            <source>Science</source>
            <pubdate>2002</pubdate>
            <volume>297</volume>
            <fpage>1301</fpage>
            <lpage>1310</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmpid" link="fulltext">12142439</pubid>
                  <pubid idtype="doi">10.1126/science.1072104</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B55">
            <title>
               <p>The genome sequence of the malaria mosquito Anopheles gambiae</p>
            </title>
            <aug>
               <au>
                  <snm>Holt</snm>
                  <fnm>RA</fnm>
               </au>
               <au>
                  <snm>Subramanian</snm>
                  <fnm>GM</fnm>
               </au>
               <au>
                  <snm>Halpern</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Sutton</snm>
                  <fnm>GG</fnm>
               </au>
               <au>
                  <snm>Charlab</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>Nusskern</snm>
                  <fnm>DR</fnm>
               </au>
               <au>
                  <snm>Wincker</snm>
                  <fnm>P</fnm>
               </au>
               <au>
                  <snm>Clark</snm>
                  <fnm>AG</fnm>
               </au>
               <au>
                  <snm>Ribeiro</snm>
                  <fnm>JM</fnm>
               </au>
               <au>
                  <snm>Wides</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>Salzberg</snm>
                  <fnm>SL</fnm>
               </au>
               <au>
                  <snm>Loftus</snm>
                  <fnm>B</fnm>
               </au>
               <au>
                  <snm>Yandell</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Majoros</snm>
                  <fnm>WH</fnm>
               </au>
               <au>
                  <snm>Rusch</snm>
                  <fnm>DB</fnm>
               </au>
               <au>
                  <snm>Lai</snm>
                  <fnm>Z</fnm>
               </au>
               <au>
                  <snm>Kraft</snm>
                  <fnm>CL</fnm>
               </au>
               <au>
                  <snm>Abril</snm>
                  <fnm>JF</fnm>
               </au>
               <au>
                  <snm>Anthouard</snm>
                  <fnm>V</fnm>
               </au>
               <au>
                  <snm>Arensburger</snm>
                  <fnm>P</fnm>
               </au>
               <au>
                  <snm>Atkinson</snm>
                  <fnm>PW</fnm>
               </au>
               <au>
                  <snm>Baden</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>de Berardinis</snm>
                  <fnm>V</fnm>
               </au>
               <au>
                  <snm>Baldwin</snm>
                  <fnm>D</fnm>
               </au>
               <au>
                  <snm>Benes</snm>
                  <fnm>V</fnm>
               </au>
               <au>
                  <snm>Biedler</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Blass</snm>
                  <fnm>C</fnm>
               </au>
               <au>
                  <snm>Bolanos</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>Boscus</snm>
                  <fnm>D</fnm>
               </au>
               <au>
                  <snm>Barnstead</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Cai</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Center</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Chatuverdi</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>Christophides</snm>
                  <fnm>GK</fnm>
               </au>
               <au>
                  <snm>Chrystal</snm>
                  <fnm>MA</fnm>
               </au>
               <au>
                  <snm>Clamp</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Cravchik</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Curwen</snm>
                  <fnm>V</fnm>
               </au>
               <au>
                  <snm>Dana</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Delcher</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Dew</snm>
                  <fnm>I</fnm>
               </au>
               <au>
                  <snm>Evans</snm>
                  <fnm>CA</fnm>
               </au>
               <au>
                  <snm>Flanigan</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Grundschober-Freimoser</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Friedli</snm>
                  <fnm>L</fnm>
               </au>
               <au>
                  <snm>Gu</snm>
                  <fnm>Z</fnm>
               </au>
               <au>
                  <snm>Guan</snm>
                  <fnm>P</fnm>
               </au>
               <au>
                  <snm>Guigo</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>Hillenmeyer</snm>
                  <fnm>ME</fnm>
               </au>
               <au>
                  <snm>Hladun</snm>
                  <fnm>SL</fnm>
               </au>
               <au>
                  <snm>Hogan</snm>
                  <fnm>JR</fnm>
               </au>
               <au>
                  <snm>Hong</snm>
                  <fnm>YS</fnm>
               </au>
               <au>
                  <snm>Hoover</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Jaillon</snm>
                  <fnm>O</fnm>
               </au>
               <au>
                  <snm>Ke</snm>
                  <fnm>Z</fnm>
               </au>
               <au>
                  <snm>Kodira</snm>
                  <fnm>C</fnm>
               </au>
               <au>
                  <snm>Kokoza</snm>
                  <fnm>E</fnm>
               </au>
               <au>
                  <snm>Koutsos</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Letunic</snm>
                  <fnm>I</fnm>
               </au>
               <au>
                  <snm>Levitsky</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Liang</snm>
                  <fnm>Y</fnm>
               </au>
               <au>
                  <snm>Lin</snm>
                  <fnm>JJ</fnm>
               </au>
               <au>
                  <snm>Lobo</snm>
                  <fnm>NF</fnm>
               </au>
               <au>
                  <snm>Lopez</snm>
                  <fnm>JR</fnm>
               </au>
               <au>
                  <snm>Malek</snm>
                  <fnm>JA</fnm>
               </au>
               <au>
                  <snm>McIntosh</snm>
                  <fnm>TC</fnm>
               </au>
               <au>
                  <snm>Meister</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Miller</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Mobarry</snm>
                  <fnm>C</fnm>
               </au>
               <au>
                  <snm>Mongin</snm>
                  <fnm>E</fnm>
               </au>
               <au>
                  <snm>Murphy</snm>
                  <fnm>SD</fnm>
               </au>
               <au>
                  <snm>O'Brochta</snm>
                  <fnm>DA</fnm>
               </au>
               <au>
                  <snm>Pfannkoch</snm>
                  <fnm>C</fnm>
               </au>
               <au>
                  <snm>Qi</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>Regier</snm>
                  <fnm>MA</fnm>
               </au>
               <au>
                  <snm>Remington</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>Shao</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Sharakhova</snm>
                  <fnm>MV</fnm>
               </au>
               <au>
                  <snm>Sitter</snm>
                  <fnm>CD</fnm>
               </au>
               <au>
                  <snm>Shetty</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Smith</snm>
                  <fnm>TJ</fnm>
               </au>
               <au>
                  <snm>Strong</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>Sun</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Thomasova</snm>
                  <fnm>D</fnm>
               </au>
               <au>
                  <snm>Ton</snm>
                  <fnm>LQ</fnm>
               </au>
               <au>
                  <snm>Topalis</snm>
                  <fnm>P</fnm>
               </au>
               <au>
                  <snm>Tu</snm>
                  <fnm>Z</fnm>
               </au>
               <au>
                  <snm>Unger</snm>
                  <fnm>MF</fnm>
               </au>
               <au>
                  <snm>Walenz</snm>
                  <fnm>B</fnm>
               </au>
               <au>
                  <snm>Wang</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Wang</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Wang</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Wang</snm>
                  <fnm>X</fnm>
               </au>
               <au>
                  <snm>Woodford</snm>
                  <fnm>KJ</fnm>
               </au>
               <au>
                  <snm>Wortman</snm>
                  <fnm>JR</fnm>
               </au>
               <au>
                  <snm>Wu</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Yao</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Zdobnov</snm>
                  <fnm>EM</fnm>
               </au>
               <au>
                  <snm>Zhang</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Zhao</snm>
                  <fnm>Q</fnm>
               </au>
               <etal/>
            </aug>
            <source>Science</source>
            <pubdate>2002</pubdate>
            <volume>298</volume>
            <fpage>129</fpage>
            <lpage>149</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmpid" link="fulltext">12364791</pubid>
                  <pubid idtype="doi">10.1126/science.1076181</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B56">
            <title>
               <p>The draft genome of Ciona intestinalis: insights into chordate and vertebrate origins</p>
            </title>
            <aug>
               <au>
                  <snm>Dehal</snm>
                  <fnm>P</fnm>
               </au>
               <au>
                  <snm>Satou</snm>
                  <fnm>Y</fnm>
               </au>
               <au>
                  <snm>Campbell</snm>
                  <fnm>RK</fnm>
               </au>
               <au>
                  <snm>Chapman</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Degnan</snm>
                  <fnm>B</fnm>
               </au>
               <au>
                  <snm>De Tomaso</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Davidson</snm>
                  <fnm>B</fnm>
               </au>
               <au>
                  <snm>Di Gregorio</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Gelpke</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Goodstein</snm>
                  <fnm>DM</fnm>
               </au>
               <au>
                  <snm>Harafuji</snm>
                  <fnm>N</fnm>
               </au>
               <au>
                  <snm>Hastings</snm>
                  <fnm>KE</fnm>
               </au>
               <au>
                  <snm>Ho</snm>
                  <fnm>I</fnm>
               </au>
               <au>
                  <snm>Hotta</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>Huang</snm>
                  <fnm>W</fnm>
               </au>
               <au>
                  <snm>Kawashima</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Lemaire</snm>
                  <fnm>P</fnm>
               </au>
               <au>
                  <snm>Martinez</snm>
                  <fnm>D</fnm>
               </au>
               <au>
                  <snm>Meinertzhagen</snm>
                  <fnm>IA</fnm>
               </au>
               <au>
                  <snm>Necula</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Nonaka</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Putnam</snm>
                  <fnm>N</fnm>
               </au>
               <au>
                  <snm>Rash</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Saiga</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Satake</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Terry</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Yamada</snm>
                  <fnm>L</fnm>
               </au>
               <au>
                  <snm>Wang</snm>
                  <fnm>HG</fnm>
               </au>
               <au>
                  <snm>Awazu</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Azumi</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>Boore</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Branno</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Chin-Bow</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>DeSantis</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>Doyle</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Francino</snm>
                  <fnm>P</fnm>
               </au>
               <au>
                  <snm>Keys</snm>
                  <fnm>DN</fnm>
               </au>
               <au>
                  <snm>Haga</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Hayashi</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Hino</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>Imai</snm>
                  <fnm>KS</fnm>
               </au>
               <au>
                  <snm>Inaba</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>Kano</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Kobayashi</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>Kobayashi</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Lee</snm>
                  <fnm>BI</fnm>
               </au>
               <au>
                  <snm>Makabe</snm>
                  <fnm>KW</fnm>
               </au>
               <au>
                  <snm>Manohar</snm>
                  <fnm>C</fnm>
               </au>
               <au>
                  <snm>Matassi</snm>
                  <fnm>G</fnm>
               </au>
               <au>
                  <snm>Medina</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Mochizuki</snm>
                  <fnm>Y</fnm>
               </au>
               <au>
                  <snm>Mount</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Morishita</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Miura</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Nakayama</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Nishizaka</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Nomoto</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Ohta</snm>
                  <fnm>F</fnm>
               </au>
               <au>
                  <snm>Oishi</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>Rigoutsos</snm>
                  <fnm>I</fnm>
               </au>
               <au>
                  <snm>Sano</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Sasaki</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Sasakura</snm>
                  <fnm>Y</fnm>
               </au>
               <au>
                  <snm>Shoguchi</snm>
                  <fnm>E</fnm>
               </au>
               <au>
                  <snm>Shin-i</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Spagnuolo</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Stainier</snm>
                  <fnm>D</fnm>
               </au>
               <au>
                  <snm>Suzuki</snm>
                  <fnm>MM</fnm>
               </au>
               <au>
                  <snm>Tassy</snm>
                  <fnm>O</fnm>
               </au>
               <au>
                  <snm>Takatori</snm>
                  <fnm>N</fnm>
               </au>
               <au>
                  <snm>Tokuoka</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Yagi</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>Yoshizaki</snm>
                  <fnm>F</fnm>
               </au>
               <au>
                  <snm>Wada</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Zhang</snm>
                  <fnm>C</fnm>
               </au>
               <au>
                  <snm>Hyatt</snm>
                  <fnm>PD</fnm>
               </au>
               <au>
                  <snm>Larimer</snm>
                  <fnm>F</fnm>
               </au>
               <au>
                  <snm>Detter</snm>
                  <fnm>C</fnm>
               </au>
               <au>
                  <snm>Doggett</snm>
                  <fnm>N</fnm>
               </au>
               <au>
                  <snm>Glavina</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Hawkins</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Richardson</snm>
                  <fnm>P</fnm>
               </au>
               <au>
                  <snm>Lucas</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Kohara</snm>
                  <fnm>Y</fnm>
               </au>
               <au>
                  <snm>Levine</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Satoh</snm>
                  <fnm>N</fnm>
               </au>
               <au>
                  <snm>Rokhsar</snm>
                  <fnm>DS</fnm>
               </au>
            </aug>
            <source>Science</source>
            <pubdate>2002</pubdate>
            <volume>298</volume>
            <fpage>2157</fpage>
            <lpage>2167</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmpid" link="fulltext">12481130</pubid>
                  <pubid idtype="doi">10.1126/science.1080049</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B57">
            <title>
               <p>Genome sequence of the human malaria parasite Plasmodium falciparum</p>
            </title>
            <aug>
               <au>
                  <snm>Gardner</snm>
                  <fnm>MJ</fnm>
               </au>
               <au>
                  <snm>Hall</snm>
                  <fnm>N</fnm>
               </au>
               <au>
                  <snm>Fung</snm>
                  <fnm>E</fnm>
               </au>
               <au>
                  <snm>White</snm>
                  <fnm>O</fnm>
               </au>
               <au>
                  <snm>Berriman</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Hyman</snm>
                  <fnm>RW</fnm>
               </au>
               <au>
                  <snm>Carlton</snm>
                  <fnm>JM</fnm>
               </au>
               <au>
                  <snm>Pain</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Nelson</snm>
                  <fnm>KE</fnm>
               </au>
               <au>
                  <snm>Bowman</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Paulsen</snm>
                  <fnm>IT</fnm>
               </au>
               <au>
                  <snm>James</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>Eisen</snm>
                  <fnm>JA</fnm>
               </au>
               <au>
                  <snm>Rutherford</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>Salzberg</snm>
                  <fnm>SL</fnm>
               </au>
               <au>
                  <snm>Craig</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Kyes</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Chan</snm>
                  <fnm>MS</fnm>
               </au>
               <au>
                  <snm>Nene</snm>
                  <fnm>V</fnm>
               </au>
               <au>
                  <snm>Shallom</snm>
                  <fnm>SJ</fnm>
               </au>
               <au>
                  <snm>Suh</snm>
                  <fnm>B</fnm>
               </au>
               <au>
                  <snm>Peterson</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Angiuoli</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Pertea</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Allen</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Selengut</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Haft</snm>
                  <fnm>D</fnm>
               </au>
               <au>
                  <snm>Mather</snm>
                  <fnm>MW</fnm>
               </au>
               <au>
                  <snm>Vaidya</snm>
                  <fnm>AB</fnm>
               </au>
               <au>
                  <snm>Martin</snm>
                  <fnm>DM</fnm>
               </au>
               <au>
                  <snm>Fairlamb</snm>
                  <fnm>AH</fnm>
               </au>
               <au>
                  <snm>Fraunholz</snm>
                  <fnm>MJ</fnm>
               </au>
               <au>
                  <snm>Roos</snm>
                  <fnm>DS</fnm>
               </au>
               <au>
                  <snm>Ralph</snm>
                  <fnm>SA</fnm>
               </au>
               <au>
                  <snm>McFadden</snm>
                  <fnm>GI</fnm>
               </au>
               <au>
                  <snm>Cummings</snm>
                  <fnm>LM</fnm>
               </au>
               <au>
                  <snm>Subramanian</snm>
                  <fnm>GM</fnm>
               </au>
               <au>
                  <snm>Mungall</snm>
                  <fnm>C</fnm>
               </au>
               <au>
                  <snm>Venter</snm>
                  <fnm>JC</fnm>
               </au>
               <au>
                  <snm>Carucci</snm>
                  <fnm>DJ</fnm>
               </au>
               <au>
                  <snm>Hoffman</snm>
                  <fnm>SL</fnm>
               </au>
               <au>
                  <snm>Newbold</snm>
                  <fnm>C</fnm>
               </au>
               <au>
                  <snm>Davis</snm>
                  <fnm>RW</fnm>
               </au>
               <au>
                  <snm>Fraser</snm>
                  <fnm>CM</fnm>
               </au>
               <au>
                  <snm>Barrell</snm>
                  <fnm>B</fnm>
               </au>
            </aug>
            <source>Nature</source>
            <pubdate>2002</pubdate>
            <volume>419</volume>
            <fpage>498</fpage>
            <lpage>511</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmpid" link="fulltext">12368864</pubid>
                  <pubid idtype="doi">10.1038/nature01097</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B58">
            <title>
               <p>Gapped BLAST and PSI-BLAST: a new generation of protein database search programs</p>
            </title>
            <aug>
               <au>
                  <snm>Altschul</snm>
                  <fnm>SF</fnm>
               </au>
               <au>
                  <snm>Madden</snm>
                  <fnm>TL</fnm>
               </au>
               <au>
                  <snm>Schaffer</snm>
                  <fnm>AA</fnm>
               </au>
               <au>
                  <snm>Zhang</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Zhang</snm>
                  <fnm>Z</fnm>
               </au>
               <au>
                  <snm>Miller</snm>
                  <fnm>W</fnm>
               </au>
               <au>
                  <snm>Lipman</snm>
                  <fnm>DJ</fnm>
               </au>
            </aug>
            <source>Nucleic Acids Res</source>
            <pubdate>1997</pubdate>
            <volume>25</volume>
            <fpage>3389</fpage>
            <lpage>3402</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmpid" link="fulltext">9254694</pubid>
                  <pubid idtype="doi">10.1093/nar/25.17.3389</pubid>
                  <pubid idtype="pmcid">146917</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B59">
            <title>
               <p>Analysis of compositionally biased regions in sequence databases</p>
            </title>
            <aug>
               <au>
                  <snm>Wootton</snm>
                  <fnm>JC</fnm>
               </au>
               <au>
                  <snm>Federhen</snm>
                  <fnm>S</fnm>
               </au>
            </aug>
            <source>Methods Enzymol</source>
            <pubdate>1996</pubdate>
            <volume>266</volume>
            <fpage>554</fpage>
            <lpage>571</lpage>
            <xrefbib>
               <pubid idtype="pmpid">8743706</pubid>
            </xrefbib>
         </bibl>
         <bibl id="B60">
            <title>
               <p>SMART, a simple modular architecture research tool: identification of signaling domains</p>
            </title>
            <aug>
               <au>
                  <snm>Schultz</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Milpetz</snm>
                  <fnm>F</fnm>
               </au>
               <au>
                  <snm>Bork</snm>
                  <fnm>P</fnm>
               </au>
               <au>
                  <snm>Ponting</snm>
                  <fnm>CP</fnm>
               </au>
            </aug>
            <source>Proc Natl Acad Sci U S A</source>
            <pubdate>1998</pubdate>
            <volume>95</volume>
            <fpage>5857</fpage>
            <lpage>5864</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmpid" link="fulltext">9600884</pubid>
                  <pubid idtype="doi">10.1073/pnas.95.11.5857</pubid>
                  <pubid idtype="pmcid">34487</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
      </refgrp>
   </bm>
</art>
