<?xml version='1.0'?>
<!DOCTYPE art SYSTEM 'http://www.biomedcentral.com/xml/article.dtd'>
<art>
   <ui>1471-2229-8-18</ui>
   <ji>1471-2229</ji>
   <fm>
      <dochead>Research article</dochead>
      <bibl>
         <title>
            <p>Characterization of paralogous protein families in rice</p>
         </title>
         <aug>
            <au id="A1">
               <snm>Lin</snm>
               <fnm>Haining</fnm>
               <insr iid="I1"/>
               <insr iid="I3"/>
               <insr iid="I7"/>
               <email>linha@msu.edu</email>
            </au>
            <au id="A2">
               <snm>Ouyang</snm>
               <fnm>Shu</fnm>
               <insr iid="I1"/>
               <email>souyang@jcvi.org</email>
            </au>
            <au id="A3">
               <snm>Egan</snm>
               <fnm>Amy</fnm>
               <insr iid="I1"/>
               <insr iid="I6"/>
               <email>aegan@som.umaryland.edu</email>
            </au>
            <au id="A4">
               <snm>Nobuta</snm>
               <fnm>Kan</fnm>
               <insr iid="I2"/>
               <email>nobuta@dbi.udel.edu</email>
            </au>
            <au id="A5">
               <snm>Haas</snm>
               <mi>J</mi>
               <fnm>Brian</fnm>
               <insr iid="I1"/>
               <insr iid="I5"/>
               <email>bhaas@broad.mit.edu</email>
            </au>
            <au id="A6">
               <snm>Zhu</snm>
               <fnm>Wei</fnm>
               <insr iid="I1"/>
               <email>weizhu365@hotmail.com</email>
            </au>
            <au id="A7">
               <snm>Gu</snm>
               <fnm>Xun</fnm>
               <insr iid="I3"/>
               <insr iid="I4"/>
               <email>xgu@iastate.edu</email>
            </au>
            <au id="A8">
               <snm>Silva</snm>
               <mi>C</mi>
               <fnm>Joana</fnm>
               <insr iid="I1"/>
               <insr iid="I6"/>
               <email>jcsilva@som.umaryland.edu</email>
            </au>
            <au id="A9">
               <snm>Meyers</snm>
               <mi>C</mi>
               <fnm>Blake</fnm>
               <insr iid="I2"/>
               <email>meyers@dbi.udel.edu</email>
            </au>
            <au id="A10" ca="yes">
               <snm>Buell</snm>
               <fnm>C Robin</fnm>
               <insr iid="I1"/>
               <insr iid="I7"/>
               <email>Buell@msu.edu</email>
            </au>
         </aug>
         <insg>
            <ins id="I1">
               <p>The Institute for Genomic Research, 9712 Medical Center Dr., Rockville, MD 20850, USA and J.Craig Venter Institute, 9704 Medical Center Dr., Rockville, MD 20850, USA</p>
            </ins>
            <ins id="I2">
               <p>Department of Plant and Soil Sciences &amp; Delaware Biotechnology Institute, University of Delaware, Newark, DE 19711, USA</p>
            </ins>
            <ins id="I3">
               <p>Department of Genetics, Development, and Cell Biology, Iowa State University, Ames, IA 50011, USA</p>
            </ins>
            <ins id="I4">
               <p>Center for Bioinformatics and Biological Statistics, Iowa State University, Ames, IA 50011, USA</p>
            </ins>
            <ins id="I5">
               <p>The Broad Institute, 7 Cambridge Center, Cambridge, MA 02142, USA</p>
            </ins>
            <ins id="I6">
               <p>Institute for Genome Sciences &amp; Department of Microbiology &amp; Immunology, University of Maryland, Baltimore, School of Medicine, HSF-II, Rm S-445, 20 Penn St., Baltimore, MD 21201, USA</p>
            </ins>
            <ins id="I7">
               <p>Department of Plant Biology, Michigan State University, 166 Plant Biology Building, East Lansing, MI 48824, USA</p>
            </ins>
         </insg>
         <source>BMC Plant Biology</source>
         <issn>1471-2229</issn>
         <pubdate>2008</pubdate>
         <volume>8</volume>
         <issue>1</issue>
         <fpage>18</fpage>
         <url>http://www.biomedcentral.com/1471-2229/8/18</url>
         <xrefbib>
            <pubidlist>
               <pubid idtype="pmpid">18284697</pubid>
               <pubid idtype="doi">10.1186/1471-2229-8-18</pubid>
            </pubidlist>
         </xrefbib>
      </bibl>
      <history>
         <rec>
            <date>
               <day>15</day>
               <month>5</month>
               <year>2007</year>
            </date>
         </rec>
         <acc>
            <date>
               <day>19</day>
               <month>2</month>
               <year>2008</year>
            </date>
         </acc>
         <pub>
            <date>
               <day>19</day>
               <month>2</month>
               <year>2008</year>
            </date>
         </pub>
      </history>
      <cpyrt>
         <year>2008</year>
         <collab>Lin et al; licensee BioMed Central Ltd.</collab>
         <note>This is an Open Access article distributed under the terms of the Creative Commons Attribution License (<url>http://creativecommons.org/licenses/by/2.0</url>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.</note>
      </cpyrt>
      <abs>
         <sec>
            <st>
               <p>Abstract</p>
            </st>
            <sec>
               <st>
                  <p>Background</p>
               </st>
               <p>High gene numbers in plant genomes reflect polyploidy and major gene duplication events. <it>Oryza sativa</it>, cultivated rice, is a diploid monocotyledonous species with a ~390 Mb genome that has undergone segmental duplication of a substantial portion of its genome. This, coupled with other genetic events such as tandem duplications, has resulted in a substantial number of its genes, and resulting proteins, occurring in paralogous families.</p>
            </sec>
            <sec>
               <st>
                  <p>Results</p>
               </st>
               <p>Using a computational pipeline that utilizes Pfam and novel protein domains, we characterized paralogous families in rice and compared these with paralogous families in the model dicotyledonous diploid species, <it>Arabidopsis thaliana</it>. Arabidopsis, which has undergone genome duplication as well, has a substantially smaller genome (~120 Mb) and gene complement compared to rice. Overall, 53% and 68% of the non-transposable element-related rice and Arabidopsis proteins could be classified into paralogous protein families, respectively. Singleton and paralogous family genes differed substantially in their likelihood of encoding a protein of known or putative function; 26% and 66% of singleton genes compared to 73% and 96% of the paralogous family genes encode a known or putative protein in rice and Arabidopsis, respectively. Furthermore, a major skew in the distribution of specific gene function was observed; a total of 17 Gene Ontology categories in both rice and Arabidopsis were statistically significant in their differential distribution between paralogous family and singleton proteins. In contrast to mammalian organisms, we found that duplicated genes in rice and Arabidopsis tend to have more alternative splice forms. Using data from Massively Parallel Signature Sequencing, we show that a significant portion of the duplicated genes in rice show divergent expression although a correlation between sequence divergence and correlation of expression could be seen in very young genes.</p>
            </sec>
            <sec>
               <st>
                  <p>Conclusion</p>
               </st>
               <p>Collectively, these data suggest that while co-regulation and conserved function are present in some paralogous protein family members, evolutionary pressures have resulted in functional divergence with differential expression patterns.</p>
            </sec>
         </sec>
      </abs>
   </fm>
   <meta>
      <classifications>
         <classification type="bmc" subtype="user_supplied_xml" id="endnote"/>
      </classifications>
   </meta>
   <bdy>
      <sec>
         <st>
            <p>Background</p>
         </st>
         <p>Gene duplication is a major contributor to genetic novelty and proteomic complexity. Evolutionary pressures on duplicated genes differ from single copy (singleton) genes and several models have been proposed for the evolutionary fate of duplicated genes. In the non/neofunctionalization model, one of the duplicated genes becomes a pseudogene through the accumulation of deleterious mutations although on a rare occasion, it may acquire a new function <abbrgrp><abbr bid="B1">1</abbr></abbrgrp>. In the subfunctionalization model <abbrgrp><abbr bid="B2">2</abbr><abbr bid="B3">3</abbr><abbr bid="B4">4</abbr></abbrgrp>, duplicated genes adopt a subset of functions of the ancestral gene. Functional redundancy of duplicated genes has been shown to increase the robustness of biological systems <abbrgrp><abbr bid="B5">5</abbr></abbrgrp>.</p>
         <p>Gene duplication occurs frequently in plants, either in the form of segmental duplication, tandem duplication, and at the level of whole genome duplication <abbrgrp><abbr bid="B6">6</abbr><abbr bid="B7">7</abbr><abbr bid="B8">8</abbr><abbr bid="B9">9</abbr><abbr bid="B10">10</abbr><abbr bid="B11">11</abbr><abbr bid="B12">12</abbr><abbr bid="B13">13</abbr><abbr bid="B14">14</abbr></abbrgrp>. Genome duplication has been reported in rice (<it>Oryza sativa</it>), an important agricultural species and model species for the grass family (Poaceae) <abbrgrp><abbr bid="B15">15</abbr><abbr bid="B16">16</abbr><abbr bid="B17">17</abbr><abbr bid="B18">18</abbr><abbr bid="B19">19</abbr></abbrgrp>. Depending on the methods, parameters, and genome assemblies used, 15% to 62% <abbrgrp><abbr bid="B15">15</abbr><abbr bid="B16">16</abbr><abbr bid="B17">17</abbr><abbr bid="B18">18</abbr><abbr bid="B19">19</abbr></abbrgrp> of the rice genome underwent one round of large-scale segmental duplication that occurred approximately 70 Million Years Ago (MYA) <abbrgrp><abbr bid="B15">15</abbr><abbr bid="B16">16</abbr><abbr bid="B18">18</abbr></abbrgrp>. A more recent duplication, on the short arms of chromosomes 11 and 12, occurred approximately 5 ~8 MYA <abbrgrp><abbr bid="B15">15</abbr><abbr bid="B20">20</abbr></abbrgrp>. With respect to tandem duplications, depending on the parameters utilized, 14&#8211;29% of rice genes occur in tandem <abbrgrp><abbr bid="B21">21</abbr></abbrgrp>. Paralogous families, composed of tandemly and segmentally duplicated genes, have been studied to a limited extent in rice, typically in a comparative context with the finished genome of the dicotyledonous plant species, <it>Arabidopsis thaliana </it><abbrgrp><abbr bid="B22">22</abbr><abbr bid="B23">23</abbr><abbr bid="B24">24</abbr><abbr bid="B25">25</abbr><abbr bid="B26">26</abbr><abbr bid="B27">27</abbr></abbrgrp>. To date, only limited genome-wide analyses of paralogous protein families have been reported in rice <abbrgrp><abbr bid="B28">28</abbr><abbr bid="B29">29</abbr></abbrgrp>. In Horan <it>et al</it>. <abbrgrp><abbr bid="B28">28</abbr></abbrgrp>, Arabidopsis and rice proteins were co-clustered using Pfam domain-based or BLASTP-based similarity clustering which allowed for the clustering of proteins into families common between these two model species and for the identification of proteins that were species-specific.</p>
         <p>In this study, we classified proteins from the predicted rice proteome into paralogous protein families using a computational pipeline that utilizes both Pfam and BLASTP-based novel domains <abbrgrp><abbr bid="B30">30</abbr></abbrgrp>. While the focus in our study was analysis of the rice paralogous families, for comparative purposes, we performed a similar classification with the predicted Arabidopsis proteome to compare and contrast paralogous family composition and features in two model species which represent two major divisions of the angiosperms, monocots and dicots. In rice, we characterized alternative splicing, functional classification of paralogous family proteins, expression patterns, and duplication age and compared these data to those observed in single copy proteins. A parallel analysis of alternative splicing and functional domain composition of paralogous family proteins was performed with Arabidopsis to compare and contrast with the findings in rice. To highlight our observations, we examined in depth two rice protein families, prolamin and Bowman-Birk inhibitor. This study provides a comprehensive analysis of rice paralogous families in parallel with a comparative analysis in Arabidopsis thereby providing novel insight into paralogous gene family evolution in these two model plant species.</p>
      </sec>
      <sec>
         <st>
            <p>Results and Discussion</p>
         </st>
         <sec>
            <st>
               <p>Classification of paralogous protein families in rice and Arabidopsis</p>
            </st>
            <p>A total of 3,865 paralogous protein families containing 21,998 proteins were identified [see Additional file <supplr sid="S1">1</supplr>] from the 42,653 total non-transposable element (TE)-related proteins predicted in the rice genome, leaving 20,655 putative singleton proteins encoded by single copy genes. On average, a rice family contained six family members, ranging in size from two to 214 family members (Fig. <figr fid="F1">1</figr>). A total of 11 paralogous protein families with more than one hundred member proteins were identified in rice which encoded proteins such as zinc finger proteins, protein kinases, Myb-like proteins, and transducins [see Additional file <supplr sid="S2">2</supplr>], similar to the largest protein families reported in Arabidopsis <abbrgrp><abbr bid="B30">30</abbr></abbrgrp>. Paralogous protein family genes of rice were distributed throughout the genome and within chromosomes in a pattern similar to the singleton genes [see Additional file <supplr sid="S3">3A</supplr>]. Although paralogous protein family genes were more frequently located in the euchromatic regions, this was consistent with previous reports that non-TE-related genes are found more prevalently in euchromatic regions. A comparison of segmentally duplicated genes with the paralogous protein family genes suggested that our classification pipeline was robust. Of the 2,403 segmentally duplicated gene pairs within 163 segmentally duplicated blocks, 1,570 duplicated gene pairs (65%) were classified in the same paralogous protein family. For the remainder of the segmentally duplicated genes, 175 pairs (7%) were classified in different paralogous protein families and 268 (11%) had one gene classified in a paralogous protein family and the other gene classified as a singleton. We observed that 390 segmentally duplicated gene pairs (16%) were not included in any paralogous protein family. Note that in our computational pipeline, four or more members were required to define a BLASTP-based domain. Consequently, a single pair of segmentally duplicated genes alone is insufficient to define a BLASTP-based domain. The lack of 100% correspondence between segmental duplication and paralogous family classification may be due to the acquisition of new domain(s) or loss of existing domain(s) within one of the duplicated genes as in our computational pipeline, only proteins with the identical domain composition were classified into the same paralogous protein family. Alternatively, the difference could be due to the different classification methods employed in each method. For example, LOC_Os08g37350 and LOC_Os09g28940 are segmentally duplicated genes from chromosomes 8 and 9, respectively. These two protein sequences had a 56% identity over 70% of the length of the longer sequence and were within a segmentally duplicated block of 43 collinear gene pairs. LOC_Os08g37350 has two Pfam domains (PF00443: Ubiquitin carboxyl-terminal hydrolase; PF01753: MYND finger) while LOC_Os09g28940 has only one Pfam domain (PF00443: Ubiquitin carboxyl-terminal hydrolase). As a consequence, these loci were classified in two different paralogous families (LOC_Os08g37350 is classified in Family 1545; LOC_Os09g28940 is in Family 3650). In a second example, LOC_Os11g03210 and LOC_Os12g02960 are from a segmental duplication event involving chromosomes 11 and 12 which includes 160 collinear gene pairs. LOC_Os11g03210 has a single Pfam domain (PF02798: Glutathione S-transferase, N-terminal domain) and thus is classified in Family 3362 while LOC_Os12g02960 is classified as a singleton as although it has two Pfam domains (PF02798: Glutathione S-transferase, N-terminal domain; PF00043: Glutathione S-transferase, C-terminal domain) no other protein has exactly the same domain profile. Note that in our computational pipeline, a paralogous family must have at least two members with identical domain profiles. In a third example, segmentally duplicated genes LOC_Os01g41900 and LOC_Os05g51160 are from chromosomes 1 and 5. These two genes were derived from full length cDNAs (FLcDNAs) and had a 59% identity over approximately three-quarters of the longer protein sequence. LOC_Os01g41900 has two Pfam domains (PF00249: Myb-like DNA-binding domain and PF00098: Zinc knuckle) while LOC_Os05g51160 has only one single Pfam domain (PF00249: Myb-like DNA-binding domain). As a consequence, they were classified in different families, Family 1452 and Family 3863, respectively. Manual inspection of these three sets of loci revealed that they were correctly annotated and that the lack of clustering into a single paralogous family could not be attributed to incorrect structural annotation which is another potential cause for lack of 100% correspondence between segmentally duplicated genes and paralogous families.</p>
            <suppl id="S1">
               <title>
                  <p>Additional File 1</p>
               </title>
               <text>
                  <p>Putative paralogous protein families within the rice genome.</p>
               </text>
               <file name="1471-2229-8-18-S1.pdf">
                  <p>Click here for file</p>
               </file>
            </suppl>
            <suppl id="S2">
               <title>
                  <p>Additional File 2</p>
               </title>
               <text>
                  <p>Rice paralogous protein families with more than one hundred member proteins.</p>
               </text>
               <file name="1471-2229-8-18-S2.pdf">
                  <p>Click here for file</p>
               </file>
            </suppl>
            <suppl id="S3">
               <title>
                  <p>Additional File 3</p>
               </title>
               <text>
                  <p>Distribution of non-transposable element-related genes in rice and Arabidopsis. In panel A, the 12 rice chromosomes are shown with paralogous gene family members plotted in blue while single copy genes are plotted in red. Segmental duplicated blocks are indicated in green and centromeres are denoted by a white box. In panel B, the five Arabidopsis chromosomes are shown with paralogous gene family members plotted in blue while single copy genes are plotted in red.</p>
               </text>
               <file name="1471-2229-8-18-S3.tiff">
                  <p>Click here for file</p>
               </file>
            </suppl>
            <fig id="F1">
               <title>
                  <p>Figure 1</p>
               </title>
               <caption>
                  <p>Size distribution of paralogous protein families in rice and Arabidopsis</p>
               </caption>
               <text>
                  <p>Size distribution of paralogous protein families in rice and Arabidopsis. The exact number of families is listed above the bars.</p>
               </text>
               <graphic file="1471-2229-8-18-1"/>
            </fig>
            <p>A parallel construction of paralogous protein families in Arabidopsis identified 3,092 paralogous protein families (18,183 proteins) and 8,636 single copy genes from a total of 26,819 protein coding genes from TAIR7 release <abbrgrp><abbr bid="B31">31</abbr></abbrgrp>. A similar size distribution of Arabidopsis protein families was observed, ranging from two to 182 (Fig. <figr fid="F1">1</figr>). In Arabidopsis, the largest families encode Myb-like proteins, zinc finger proteins, and protein kinases, consistent with what has been reported previously <abbrgrp><abbr bid="B30">30</abbr></abbrgrp>. Arabidopsis paralogous protein family genes distributed similarly to singleton genes and were more frequently located in the euchromatic regions [see Additional file <supplr sid="S3">3B</supplr>].</p>
         </sec>
         <sec>
            <st>
               <p>Function of paralogous protein families in rice and Arabidopsis</p>
            </st>
            <p>We examined the functional annotation of paralogous family and singleton proteins. A total of 21,403 and 23,081 genes were annotated as encoding known or putative proteins in rice and Arabidopsis, respectively, due to strong similarity with proteins with a known function or the presence of Pfam domains above the trusted cutoff. Genes with no known or putative function can be supported by experimental transcript evidence (i.e., encode an "expressed protein") or are predicted solely by an <it>ab initio </it>gene finder and lack expression support as well as sequence similarity to known proteins with the exception of other hypothetical proteins (i.e., encode a "hypothetical protein"). In rice, a total of 6,913 genes encode expressed proteins as shown by experimental transcript evidence from Expressed Sequence Tags (ESTs), FLcDNAs, Massively Parallel Signature Sequencing <abbrgrp><abbr bid="B32">32</abbr></abbrgrp>, Serial Analysis of Gene Expression, and/or proteomic data <abbrgrp><abbr bid="B33">33</abbr></abbrgrp>. In Arabidopsis, 2,270 genes encode expressed proteins as shown by experimental transcript in the form of ESTs and/or cDNA evidence (see Methods). The remaining 14,337 rice genes <abbrgrp><abbr bid="B33">33</abbr></abbrgrp> and 1,468 Arabidopsis genes (see Methods) encode hypothetical proteins. A majority of rice paralogous family genes (73%) encode either a known or putative protein (Fig. <figr fid="F2">2</figr>). The remaining rice paralogous family genes encode expressed proteins (9%) and hypothetical proteins (18%). In contrast, rice singletons had a larger portion of hypothetical genes (50%) and a smaller portion of genes with a known or putative function (26%). Even though Arabidopsis overall has a smaller number of genes with unknown function than rice, a similar bias of genes with a known or putative function in paralogous family genes was observed in a parallel analysis in Arabidopsis (Fig. <figr fid="F2">2</figr>).</p>
            <fig id="F2">
               <title>
                  <p>Figure 2</p>
               </title>
               <caption>
                  <p>Functional classification of paralogous family and singleton proteins in rice and Arabidopsis</p>
               </caption>
               <text>
                  <p>Functional classification of paralogous family and singleton proteins in rice and Arabidopsis.</p>
               </text>
               <graphic file="1471-2229-8-18-2"/>
            </fig>
            <p>Using Plant GOSlim annotations <abbrgrp><abbr bid="B34">34</abbr></abbrgrp>, we compared the function of the proteins within rice paralogous families to that in the singletons. Within the 26 molecular function GOSlim categories identified in our analyses, rice paralogous protein families showed different patterns from singletons in a number of GOSlim categories (Fig. <figr fid="F3">3A</figr>). Although, the relative abundance of each GOSlim category varied with the size of the rice paralogous family, no obvious correlation was observed (Fig. <figr fid="F3">3A</figr>). For each category, a two-tailed two-sample binomial test was performed by comparing the abundance of that category in rice paralogous families with that in the singletons. Multiple testing was corrected using the Benjamini and Hochberg false discovery rate control at a level of 0.05 <abbrgrp><abbr bid="B35">35</abbr></abbrgrp>. The statistical test revealed a substantial enrichment of 12 categories in rice paralogous family proteins including transcription factor activity, hydrolase activity, DNA binding, and transporter activity while a substantial reduction was seen in five categories including receptor activity, nucleotide binding and carbohydrate binding (Table <tblr tid="T1">1</tblr>). A similar skew in GOSlim categories was observed in a parallel analysis in Arabidopsis (Table <tblr tid="T2">2</tblr> &amp; Fig. <figr fid="F3">3B</figr>), consistent with a previous report in Arabidopsis <abbrgrp><abbr bid="B36">36</abbr></abbrgrp> that non-random loss and retention of paralogous genes with different functions occurred after gene duplication.</p>
            <tbl id="T1">
               <title>
                  <p>Table 1</p>
               </title>
               <caption>
                  <p>Two-sample binomial tests for GOSlim assignments of paralogous family and singleton proteins in rice</p>
               </caption>
               <tblbdy cols="4">
                  <r>
                     <c ca="left">
                        <p>
                           <b>GOSlim assignment<sup>a</sup></b>
                        </p>
                     </c>
                     <c ca="center">
                        <p>
                           <b>Singletons (%)</b>
                        </p>
                     </c>
                     <c ca="center">
                        <p>
                           <b>Paralogous genes (%)</b>
                        </p>
                     </c>
                     <c ca="center">
                        <p>
                           <b>P-value<sup>d</sup></b>
                        </p>
                     </c>
                  </r>
                  <r>
                     <c cspan="4">
                        <hr/>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>Binding, other<sup>b</sup></p>
                     </c>
                     <c ca="center">
                        <p>3.3</p>
                     </c>
                     <c ca="center">
                        <p>6.5</p>
                     </c>
                     <c ca="center">
                        <p>&lt;1e-5</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>Carbohydrate binding<sup>c</sup></p>
                     </c>
                     <c ca="center">
                        <p>2.7</p>
                     </c>
                     <c ca="center">
                        <p>0.6</p>
                     </c>
                     <c ca="center">
                        <p>&lt;1e-5</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>DNA binding<sup>b</sup></p>
                     </c>
                     <c ca="center">
                        <p>4.8</p>
                     </c>
                     <c ca="center">
                        <p>8.0</p>
                     </c>
                     <c ca="center">
                        <p>&lt;1e-5</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>Hydrolase activity<sup>b</sup></p>
                     </c>
                     <c ca="center">
                        <p>7.8</p>
                     </c>
                     <c ca="center">
                        <p>12.7</p>
                     </c>
                     <c ca="center">
                        <p>&lt;1e-5</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>Kinase activity<sup>c</sup></p>
                     </c>
                     <c ca="center">
                        <p>16.0</p>
                     </c>
                     <c ca="center">
                        <p>6.2</p>
                     </c>
                     <c ca="center">
                        <p>&lt;1e-5</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>Nucleotide binding<sup>c</sup></p>
                     </c>
                     <c ca="center">
                        <p>13.4</p>
                     </c>
                     <c ca="center">
                        <p>4.2</p>
                     </c>
                     <c ca="center">
                        <p>&lt;1e-5</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>Protein binding, other<sup>c</sup></p>
                     </c>
                     <c ca="center">
                        <p>14.2</p>
                     </c>
                     <c ca="center">
                        <p>9.5</p>
                     </c>
                     <c ca="center">
                        <p>&lt;1e-5</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>Receptor activity<sup>c</sup></p>
                     </c>
                     <c ca="center">
                        <p>2.3</p>
                     </c>
                     <c ca="center">
                        <p>0.4</p>
                     </c>
                     <c ca="center">
                        <p>&lt;1e-5</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>Transcription factor activity<sup>b</sup></p>
                     </c>
                     <c ca="center">
                        <p>4.3</p>
                     </c>
                     <c ca="center">
                        <p>9.3</p>
                     </c>
                     <c ca="center">
                        <p>&lt;1e-5</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>Catalytic activity, other<sup>b</sup></p>
                     </c>
                     <c ca="center">
                        <p>8.7</p>
                     </c>
                     <c ca="center">
                        <p>12.2</p>
                     </c>
                     <c ca="center">
                        <p>&lt;1e-5</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>Structural molecule activity<sup>b</sup></p>
                     </c>
                     <c ca="center">
                        <p>0.8</p>
                     </c>
                     <c ca="center">
                        <p>2.2</p>
                     </c>
                     <c ca="center">
                        <p>&lt;1e-5</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>Oxygen binding<sup>b</sup></p>
                     </c>
                     <c ca="center">
                        <p>0.7</p>
                     </c>
                     <c ca="center">
                        <p>1.9</p>
                     </c>
                     <c ca="center">
                        <p>&lt;1e-5</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>Transcription regulator activity<sup>b</sup></p>
                     </c>
                     <c ca="center">
                        <p>1.1</p>
                     </c>
                     <c ca="center">
                        <p>2.3</p>
                     </c>
                     <c ca="center">
                        <p>&lt;1e-5</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>Transporter activity<sup>b</sup></p>
                     </c>
                     <c ca="center">
                        <p>5.0</p>
                     </c>
                     <c ca="center">
                        <p>7.0</p>
                     </c>
                     <c ca="center">
                        <p>&lt;1e-5</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>Lipid binding<sup>b</sup></p>
                     </c>
                     <c ca="center">
                        <p>0.4</p>
                     </c>
                     <c ca="center">
                        <p>1.1</p>
                     </c>
                     <c ca="center">
                        <p>&lt;1e-5</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>Molecular function, other<sup>b</sup></p>
                     </c>
                     <c ca="center">
                        <p>0.1</p>
                     </c>
                     <c ca="center">
                        <p>0.4</p>
                     </c>
                     <c ca="center">
                        <p>0.001</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>Enzyme regulator activity<sup>b</sup></p>
                     </c>
                     <c ca="center">
                        <p>0.5</p>
                     </c>
                     <c ca="center">
                        <p>0.9</p>
                     </c>
                     <c ca="center">
                        <p>0.008</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>Motor activity</p>
                     </c>
                     <c ca="center">
                        <p>0.5</p>
                     </c>
                     <c ca="center">
                        <p>0.3</p>
                     </c>
                     <c ca="center">
                        <p>0.051</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>Transferase activity</p>
                     </c>
                     <c ca="center">
                        <p>7.0</p>
                     </c>
                     <c ca="center">
                        <p>7.7</p>
                     </c>
                     <c ca="center">
                        <p>0.095</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>Receptor binding</p>
                     </c>
                     <c ca="center">
                        <p>0.0</p>
                     </c>
                     <c ca="center">
                        <p>0.1</p>
                     </c>
                     <c ca="center">
                        <p>0.137</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>RNA binding</p>
                     </c>
                     <c ca="center">
                        <p>1.8</p>
                     </c>
                     <c ca="center">
                        <p>2.1</p>
                     </c>
                     <c ca="center">
                        <p>0.369</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>Translation factor activity, nucleic acid binding</p>
                     </c>
                     <c ca="center">
                        <p>0.5</p>
                     </c>
                     <c ca="center">
                        <p>0.7</p>
                     </c>
                     <c ca="center">
                        <p>0.353</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>Signal transducer activity</p>
                     </c>
                     <c ca="center">
                        <p>1.0</p>
                     </c>
                     <c ca="center">
                        <p>0.9</p>
                     </c>
                     <c ca="center">
                        <p>0.43</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>Chromatin binding</p>
                     </c>
                     <c ca="center">
                        <p>0.3</p>
                     </c>
                     <c ca="center">
                        <p>0.2</p>
                     </c>
                     <c ca="center">
                        <p>0.465</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>Nucleic acid binding, other</p>
                     </c>
                     <c ca="center">
                        <p>1.8</p>
                     </c>
                     <c ca="center">
                        <p>1.9</p>
                     </c>
                     <c ca="center">
                        <p>0.882</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>Nuclease activity</p>
                     </c>
                     <c ca="center">
                        <p>0.8</p>
                     </c>
                     <c ca="center">
                        <p>0.8</p>
                     </c>
                     <c ca="center">
                        <p>0.888</p>
                     </c>
                  </r>
               </tblbdy>
               <tblfn>
                  <p><sup>a </sup>GoSlim assignment classifications were performed as described in the Materials and Methods.</p>
                  <p><sup>b </sup>Enrichment of GOSlim annotations in paralogous protein families compared to singletons.</p>
                  <p><sup>c </sup>Reduction of GOSlim annotations in paralogous protein families compared to singletons.</p>
                  <p><sup>d </sup>Benjamini and Hochberg correction for multiple testing.</p>
               </tblfn>
            </tbl>
            <tbl id="T2">
               <title>
                  <p>Table 2</p>
               </title>
               <caption>
                  <p>Two-sample binomial tests for GOSlim assignments of paralogous family and singleton proteins in Arabidopsis</p>
               </caption>
               <tblbdy cols="4">
                  <r>
                     <c ca="left">
                        <p>
                           <b>GOSlim assignment<sup>a</sup></b>
                        </p>
                     </c>
                     <c ca="center">
                        <p>
                           <b>Singletons (%)</b>
                        </p>
                     </c>
                     <c ca="center">
                        <p>
                           <b>Paralogous genes (%)</b>
                        </p>
                     </c>
                     <c ca="center">
                        <p>
                           <b>P-value<sup>d</sup></b>
                        </p>
                     </c>
                  </r>
                  <r>
                     <c cspan="4">
                        <hr/>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>Hydrolase activity<sup>b</sup></p>
                     </c>
                     <c ca="center">
                        <p>7.5</p>
                     </c>
                     <c ca="center">
                        <p>12.6</p>
                     </c>
                     <c ca="center">
                        <p>&lt;1e-5</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>Kinase activity<sup>c</sup></p>
                     </c>
                     <c ca="center">
                        <p>10.4</p>
                     </c>
                     <c ca="center">
                        <p>5.5</p>
                     </c>
                     <c ca="center">
                        <p>&lt;1e-5</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>Nucleotide binding<sup>c</sup></p>
                     </c>
                     <c ca="center">
                        <p>10.2</p>
                     </c>
                     <c ca="center">
                        <p>4.6</p>
                     </c>
                     <c ca="center">
                        <p>&lt;1e-5</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>Protein binding, other<sup>c</sup></p>
                     </c>
                     <c ca="center">
                        <p>12.9</p>
                     </c>
                     <c ca="center">
                        <p>8.2</p>
                     </c>
                     <c ca="center">
                        <p>&lt;1e-5</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>Transcription factor activity<sup>b</sup></p>
                     </c>
                     <c ca="center">
                        <p>4.2</p>
                     </c>
                     <c ca="center">
                        <p>9.0</p>
                     </c>
                     <c ca="center">
                        <p>&lt;1e-5</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>Receptor activity<sup>c</sup></p>
                     </c>
                     <c ca="center">
                        <p>1.9</p>
                     </c>
                     <c ca="center">
                        <p>0.7</p>
                     </c>
                     <c ca="center">
                        <p>&lt;1e-5</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>DNA binding<sup>b</sup></p>
                     </c>
                     <c ca="center">
                        <p>4.1</p>
                     </c>
                     <c ca="center">
                        <p>7.2</p>
                     </c>
                     <c ca="center">
                        <p>&lt;1e-5</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>Oxygen binding<sup>b</sup></p>
                     </c>
                     <c ca="center">
                        <p>0.1</p>
                     </c>
                     <c ca="center">
                        <p>1.4</p>
                     </c>
                     <c ca="center">
                        <p>&lt;1e-5</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>Receptor binding<sup>c</sup></p>
                     </c>
                     <c ca="center">
                        <p>0.5</p>
                     </c>
                     <c ca="center">
                        <p>0.1</p>
                     </c>
                     <c ca="center">
                        <p>&lt;1e-5</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>Carbohydrate binding<sup>c</sup></p>
                     </c>
                     <c ca="center">
                        <p>0.7</p>
                     </c>
                     <c ca="center">
                        <p>0.3</p>
                     </c>
                     <c ca="center">
                        <p>&lt;1e-3</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>Lipid binding<sup>b</sup></p>
                     </c>
                     <c ca="center">
                        <p>0.3</p>
                     </c>
                     <c ca="center">
                        <p>0.8</p>
                     </c>
                     <c ca="center">
                        <p>0.001</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>Structural molecule activity<sup>b</sup></p>
                     </c>
                     <c ca="center">
                        <p>1.6</p>
                     </c>
                     <c ca="center">
                        <p>2.5</p>
                     </c>
                     <c ca="center">
                        <p>0.002</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>Enzyme regulator activity<sup>b</sup></p>
                     </c>
                     <c ca="center">
                        <p>0.7</p>
                     </c>
                     <c ca="center">
                        <p>1.4</p>
                     </c>
                     <c ca="center">
                        <p>0.005</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>Molecular function, other<sup>b</sup></p>
                     </c>
                     <c ca="center">
                        <p>1.8</p>
                     </c>
                     <c ca="center">
                        <p>2.5</p>
                     </c>
                     <c ca="center">
                        <p>0.011</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>Transporter activity<sup>b</sup></p>
                     </c>
                     <c ca="center">
                        <p>5.0</p>
                     </c>
                     <c ca="center">
                        <p>6.0</p>
                     </c>
                     <c ca="center">
                        <p>0.019</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>Nucleic acid binding, other<sup>c</sup></p>
                     </c>
                     <c ca="center">
                        <p>2.6</p>
                     </c>
                     <c ca="center">
                        <p>2.0</p>
                     </c>
                     <c ca="center">
                        <p>0.027</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>Motor activity<sup>b</sup></p>
                     </c>
                     <c ca="center">
                        <p>0.2</p>
                     </c>
                     <c ca="center">
                        <p>0.5</p>
                     </c>
                     <c ca="center">
                        <p>0.03</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>Transferase activity</p>
                     </c>
                     <c ca="center">
                        <p>5.3</p>
                     </c>
                     <c ca="center">
                        <p>6.1</p>
                     </c>
                     <c ca="center">
                        <p>0.053</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>RNA binding</p>
                     </c>
                     <c ca="center">
                        <p>1.5</p>
                     </c>
                     <c ca="center">
                        <p>1.9</p>
                     </c>
                     <c ca="center">
                        <p>0.099</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>Binding, other</p>
                     </c>
                     <c ca="center">
                        <p>12.3</p>
                     </c>
                     <c ca="center">
                        <p>11.3</p>
                     </c>
                     <c ca="center">
                        <p>0.102</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>Signal transducer activity</p>
                     </c>
                     <c ca="center">
                        <p>1.0</p>
                     </c>
                     <c ca="center">
                        <p>0.8</p>
                     </c>
                     <c ca="center">
                        <p>0.132</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>Catalytic activity, other</p>
                     </c>
                     <c ca="center">
                        <p>12.4</p>
                     </c>
                     <c ca="center">
                        <p>11.7</p>
                     </c>
                     <c ca="center">
                        <p>0.244</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>Transcription regulator activity</p>
                     </c>
                     <c ca="center">
                        <p>1.3</p>
                     </c>
                     <c ca="center">
                        <p>1.5</p>
                     </c>
                     <c ca="center">
                        <p>0.743</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>Chromatin binding</p>
                     </c>
                     <c ca="center">
                        <p>0.2</p>
                     </c>
                     <c ca="center">
                        <p>0.1</p>
                     </c>
                     <c ca="center">
                        <p>0.803</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>Translation factor activity, nucleic acid binding</p>
                     </c>
                     <c ca="center">
                        <p>0.6</p>
                     </c>
                     <c ca="center">
                        <p>0.6</p>
                     </c>
                     <c ca="center">
                        <p>1</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>Nuclease activity</p>
                     </c>
                     <c ca="center">
                        <p>0.7</p>
                     </c>
                     <c ca="center">
                        <p>0.8</p>
                     </c>
                     <c ca="center">
                        <p>1</p>
                     </c>
                  </r>
               </tblbdy>
               <tblfn>
                  <p><sup>a </sup>GoSlim assignment classifications were performed as described in the Materials and Methods.</p>
                  <p><sup>b </sup>Enrichment of GOSlim annotations in paralogous protein families compared to singletons.</p>
                  <p><sup>c </sup>Reduction of GOSlim annotations in paralogous protein families compared to singletons.</p>
                  <p><sup>d </sup>Benjamini and Hochberg correction for multiple testing.</p>
               </tblfn>
            </tbl>
            <fig id="F3">
               <title>
                  <p>Figure 3</p>
               </title>
               <caption>
                  <p>GOSlim assignment of A) rice paralogous families and singletons, B) Arabidopsis paralogous families and singletons</p>
               </caption>
               <text>
                  <p>GOSlim assignment of A) rice paralogous families and singletons, B) Arabidopsis paralogous families and singletons. The paralogous protein families are further classified by family size.</p>
               </text>
               <graphic file="1471-2229-8-18-3"/>
            </fig>
         </sec>
         <sec>
            <st>
               <p>Paralogous protein family genes tend to have more alternative isoforms than singletons</p>
            </st>
            <p>Alternative splicing has been regarded as a mechanism to increase genetic novelty. In the rice genome, 6,253 non-TE-related genes have evidence of alternative splicing (see Methods) and we used this set of genes to examine alternative splicing in singleton versus paralogous protein family genes. The percentage of alternative splicing in single copy genes is 2,094/20,655 = 10.1%, while that in paralogous family genes is 4,159/21,998 = 18.9%; a statistically significant difference (<it>&#967;</it><sup>2 </sup>test, P &lt; 1e-5). To remove any bias due to genes that lack transcript evidence, we restricted our analysis to genes with EST and/or FLcDNA evidence. The percentage of alternative splicing in singletons is 2,094/8,619 = 24.3%, while that in paralogous protein family genes is 4,159/14,072 = 29.6%; a statistically significant difference (<it>&#967;</it><sup>2 </sup>test, P &lt; 1e-5). We further restricted our analysis to high confidence genes whose structures were completely supported by ESTs and/or FLcDNAs. The percentage of alternative splicing in singletons increases to 1,826/5,964 = 30.6%, while that in paralogous protein family genes increases to 3,765/11,235 = 33.5%; a statistically significant difference (<it>&#967;</it><sup>2 </sup>test, P &lt; 1e-3).</p>
            <p>To confirm that our observation was not restricted to rice, we performed a parallel analysis with Arabidopsis. Using data on alternative splicing as provided with the TAIR7 release (see Methods), the percentage of alternative splicing in Arabidopsis single copy genes is 943/8,636 = 9.8%, while that in paralogous protein family genes is 2,856/18,183 = 15.7%. This difference is also statistically significant (<it>&#967;</it><sup>2 </sup>test, P &lt; 1e-5), similar to that observed in rice. Restricting the analysis to only those Arabidopsis genes with EST and/or cDNA support as provided in the TAIR7 release revealed that the percentage of alternative splicing in singletons is 942/6,663 = 14.1%, while that in paralogous family genes is 2,852/15,369 = 18.6%; a statistically significant difference (<it>&#967;</it><sup>2 </sup>test, P &lt; 1e-5). Our findings are contradictory to previous reports in model animal species in which duplicated genes tend to have fewer alternative spliced isoforms thereby supporting the 'function-sharing model' that alternative splicing and gene duplication are two mechanisms that are complementary with respect to proteomic function diversity <abbrgrp><abbr bid="B37">37</abbr><abbr bid="B38">38</abbr></abbrgrp>. Our results suggested that plants may employ multiple mechanisms for proteomic complexity, gene duplication and alternative splicing.</p>
         </sec>
         <sec>
            <st>
               <p>Age of paralogous protein families in rice</p>
            </st>
            <p>While there are previous reports on gene duplication in rice <abbrgrp><abbr bid="B15">15</abbr><abbr bid="B16">16</abbr><abbr bid="B17">17</abbr><abbr bid="B18">18</abbr><abbr bid="B19">19</abbr></abbrgrp>, they utilized alternative assemblies and annotation datasets of the rice genome. To provide information on the age of paralogous families identified in this study, we estimated the age of a paralogous family from the maximum value of the distribution of pairwise <it>d</it><sub>S </sub>calculated among all members of that protein family (see Methods). We found that the origin of most paralogous families dates back to over 115 Million Years (MY), the point at which synonymous sites are saturated and dating becomes unreliable (<it>d</it><sub>S </sub>~1.5) [see Additional file <supplr sid="S4">4A</supplr>]. Among protein families for which the maximum pairwise <it>d</it><sub>S </sub>value is less than 1.5, the distribution of maximum <it>d</it><sub>S </sub>is fairly flat, with the exception of a recent peak at <it>d</it><sub>S </sub>between 0 and 0.1 [see Additional file <supplr sid="S4">4B</supplr>]. This suggests that paralogous families have been arising at a relatively constant pace within the past 115 MY, but that a burst of duplication took place within the last 7.5 MY. Alternatively, paralogous families arise at a rate similar to that observed for the first few million years, but about 2/3 of them revert to single-gene status soon thereafter, accounting for the quick decline after the first 7.5 MY. The fairly constant number of older paralogous families can be due to selective constraints maintaining the elevated copy number or if the loss of paralogs is dependent on sequence similarity, such that after ~10% sequence divergence, paralog loss is negligible. Finally, for each family we identified the largest peak below 1.5 (if there was one) in the distribution of all pairwise <it>d</it><sub>S </sub>values. The distribution of this peak value across all families is bimodal [see Additional file <supplr sid="S5">5</supplr>], and it confirms the presence of a large number of recently duplicated genes (0 &#8804; <it>d</it><sub>S </sub>&lt; 0.1). In addition, the peak at 0.7 &#8804; <it>d</it><sub>S </sub>&#8804; 1 most likely results from the large-scale segmental duplication event that occurred ~70 MYA.</p>
            <suppl id="S4">
               <title>
                  <p>Additional File 4</p>
               </title>
               <text>
                  <p>The age distribution of rice paralogous protein families. <b>A) </b>an expanded view of the age distribution. <b>B) </b>the enlarged distribution of rice paralogous protein families with largest <it>d</it>s &#8804; 1.5.</p>
               </text>
               <file name="1471-2229-8-18-S4.pdf">
                  <p>Click here for file</p>
               </file>
            </suppl>
            <suppl id="S5">
               <title>
                  <p>Additional File 5</p>
               </title>
               <text>
                  <p>Distribution of modal values under <it>d</it><sub>S </sub>&#8804; 1.5 across rice paralogous protein families. Of all 3,865 paralogous protein families, 2,388 showed a peak under 1.5 in the distribution of all pairwise <it>d</it><sub>S </sub>values and are plotted.</p>
               </text>
               <file name="1471-2229-8-18-S5.pdf">
                  <p>Click here for file</p>
               </file>
            </suppl>
         </sec>
         <sec>
            <st>
               <p>Expression of paralogous protein families in rice</p>
            </st>
            <p>We further examined the expression patterns of the paralogous families using MPSS data from 18 libraries <abbrgrp><abbr bid="B32">32</abbr></abbrgrp>. MPSS tags were searched against our release 4 pseudomolecules and cDNA sequences of all annotated gene models to ensure that all MPSS tags would be identified even if they spanned the intron(s). We found 11,619 genes within the paralogous protein families that were associated with unique, reliable, and significant MPSS tags, which were referred as MPSS-qualifying genes.</p>
            <p>Suitable summary statistics of correlation for expression divergence of a gene family can be found in Gu <abbrgrp><abbr bid="B39">39</abbr></abbrgrp> and Gu <it>et al</it>. <abbrgrp><abbr bid="B40">40</abbr></abbrgrp>, though microarray data were the primary focus in these studies. To be concise, we restricted our analysis of expression correlation in the libraries and tissues to paralogous families with exactly two MPSS-qualifying genes (674 protein families). To measure the expression correlation, the Pearson's Correlation Coefficient (<it>r</it>) of their expression was computed for each pair of MPSS-qualifying genes from each of the 674 protein families across all 18 MPSS libraries. It is important to note that we excluded MPSS tags which mapped to multiple locations, as most of these are likely to match to closely-related paralogs and could have confounded our analyses. We employed the method used by Blanc and Wolfe <abbrgrp><abbr bid="B36">36</abbr></abbrgrp> to determine a minimum cutoff value for Pearson's Correlation Coefficient (<it>r</it>) to classify two duplicated genes as having divergent expression. Basically, a total of 10,000 gene pairs were generated by random shuffling of the singleton genes and the Pearson's Correlation Coefficient (<it>r</it>) was calculated similarly for each pair. Ninety five percent of the random shuffled gene pairs had a correlation value <it>r </it>&lt; 0.59. As random shuffled gene pairs should have divergent function and expression patterns, we utilized <it>r </it>&lt; 0.59 as an indicator of divergent expression. Our results show that the expression correlation value (<it>r</it>) of the paralogous protein family genes ranged from -0.6 to 1.0 although the majority of the gene pairs had little correlation with <it>r </it>peaking at -0.2 ~0, similar to that observed with the singletons (Fig. <figr fid="F4">4</figr>). Using the correlation cutoff (<it>r </it>= 0.59), a total of 598 (89%) paralogous protein families with two-qualifying MPSS genes exhibited divergent expression patterns, consistent with what has been reported in Arabidopsis <abbrgrp><abbr bid="B36">36</abbr></abbrgrp> and in yeast in which more than 80% of the older duplicated gene pairs (<it>ds </it>> 1.5) showed divergence in expression <abbrgrp><abbr bid="B41">41</abbr></abbrgrp>.</p>
            <fig id="F4">
               <title>
                  <p>Figure 4</p>
               </title>
               <caption>
                  <p>Histogram of Pearson's Correlation Coefficients of expression (<it>r</it>) of rice paralogous protein families with exactly two MPSS-qualifying genes</p>
               </caption>
               <text>
                  <p>Histogram of Pearson's Correlation Coefficients of expression (<it>r</it>) of rice paralogous protein families with exactly two MPSS-qualifying genes.</p>
               </text>
               <graphic file="1471-2229-8-18-4"/>
            </fig>
            <p>To gain a better understanding of the expression patterns of paralogous protein family members in different organs/tissues, we classified the 18 MPSS libraries <abbrgrp><abbr bid="B32">32</abbr></abbrgrp> into four groups by organs/tissues: roots, leaves, reproductive organs/tissues, and "other tissues". Within the 674 paralogous families with exactly two MPSS-qualifying genes, 239, 168, 223, and 200 paralogous families had only a single member of the pair expressed in roots, leaves, reproductive organs/tissues, and "other tissues", respectively, which demonstrated their diverged expression patterns, and possible tissue-specific expression. To further examine the tissue-specific or stress-induced expression patterns of paralogous protein family members, we calculated the Preferential Expression Measure (PEM) for each of the 1,348 genes from the 674 paralogous families (see Methods) in the 18 MPSS libraries. The PEM shows the base-10 log of ratio of the observed expression level in a given tissue/treatment to the expected expression level assuming uniform expression across all tissues/treatments. A PEM value of 1 means the observed expression level in a given tissue/treatment is 10 times that of expected and indicates strong tissue specific expression. For each gene, tissue(s) with a stringent cutoff of PEM &#8805; 1 were compared with the other member of the duplicated gene pair. A total of 375 (375/674 = 55.6%) of the paralogous families showed little tissue-specific expression as none of the associated PEMs had a value equal to or greater than 1. Two hundred ninety-nine families showed strong tissue specific expression patterns; 19 families were preferentially expressed in the same tissue or treatment, 49 families were preferentially expressed in different tissues or treatments, and 231 families had only one of the duplicated genes with preferential tissue-specific expression.</p>
            <p>We further examined the correlation between expression divergence and sequence divergence. For each family, we calculated the Pearson's Correlation Coefficient (<it>r</it>) for all possible pairs of the MPSS-qualifying genes to measure expression divergence. We then used <it>ds </it>as a proxy of divergence time for each gene pair. We restricted our analysis to <it>d</it><sub>S </sub>&#8804; 1.5 so that the synonymous sites are not saturated. The Pearson's Correlation Coefficient (<it>r</it>) values were plotted against the <it>d</it>s values for each interval of 0.1 to gain better resolution. That is, we plotted for gene pairs with 0 &lt;<it>d</it><sub>S </sub>&#8804; 0.1, 0.1 &lt;<it>d</it><sub>S </sub>&#8804; 0.2, 0.2 &lt;<it>d</it><sub>S </sub>&#8804; 0.3, and so on. We found no correlation between <it>d</it><sub>S </sub>and correlation of expression except for gene pairs with 0 &lt;<it>d</it><sub>S </sub>&#8804; 0.1 (R = 0.33, P &lt; 1e-4) where duplicated genes were relatively young [see Additional file <supplr sid="S6">6</supplr>]. The number of non-synonymous substitutions per site (<it>dN</it>) was also calculated for each gene pair and plotted against correlation of expression. No correlation was observed between <it>dN </it>and correlation of expression (data not shown). This is consistent with reports in Arabidopsis in which expression divergence is not strictly coupled with sequence divergence as shown by no appreciable change for the majority of gene duplicates with highly diverged amino acid sequences in expression pattern in developing roots <abbrgrp><abbr bid="B42">42</abbr></abbrgrp>.</p>
            <suppl id="S6">
               <title>
                  <p>Additional File 6</p>
               </title>
               <text>
                  <p>Pearson's correlation coefficient (<it>r</it>) versus <it>ds </it>values. <b>A) </b>0 &lt;<it>d</it><sub>S </sub>&#8804; 0.1; <b>B) </b>0.4 &lt;<it>d</it><sub>S </sub>&#8804; 0.5; <b>C) </b>1.0 &lt;<it>d</it><sub>S </sub>&#8804; 1.1; <b>D) </b>1.4 &lt;<it>d</it><sub>S </sub>&#8804; 1.5.</p>
               </text>
               <file name="1471-2229-8-18-S6.pdf">
                  <p>Click here for file</p>
               </file>
            </suppl>
            <p>Positive correlation of expression patterns among paralogous protein family members would suggest that similar transcriptional regulation was retained in both members and possibly, similar functions. However, we observed a large number of gene pairs with little expression correlation which could be an indication of subfunctionalization or neofunctionalization after gene duplication. The duplication-degeneration-complementarity (DDC) model proposed by Force et al. <abbrgrp><abbr bid="B3">3</abbr></abbrgrp> and Lynch and Force <abbrgrp><abbr bid="B4">4</abbr></abbrgrp> suggests that subfunctionalization is a major mechanism for retention of duplicated genes as a result of differential expression caused by accumulation of mutations in regulatory regions rather than protein coding regions. The 49 families with preferential expression in two different tissues or treatments, along with the 231 families having only one member of the paralogous pair preferentially expressed, is a strong indicator of subfunctionalization. As our paralogous protein family classification required that each family member have the same domain profile, the differential expression may be attributable to mutations in regulatory regions rather than gene coding regions, consistent with the DDC model.</p>
         </sec>
         <sec>
            <st>
               <p>Case studies of rice paralogous protein families</p>
            </st>
            <sec>
               <st>
                  <p>Prolamin protein family</p>
               </st>
               <p>Prolamin is one of the major endosperm storage proteins in cereal grains such as wheat, barley, rye, maize, and sorghum <abbrgrp><abbr bid="B43">43</abbr><abbr bid="B44">44</abbr><abbr bid="B45">45</abbr><abbr bid="B46">46</abbr></abbrgrp>. It was named prolamin due to its high content of proline and glutamine. In rice, prolamin contributes 35% of the total seed protein <abbrgrp><abbr bid="B47">47</abbr></abbrgrp>. Three classes of prolamins have been identified in <it>Oryza </it>by their molecular weights: 10, 13, and 16 kDa <abbrgrp><abbr bid="B48">48</abbr></abbrgrp>. The major prolamin families in rice are Family 3722 (20 members) and Family 3193 (seven members). Members of both families have a BLASTP-based domain. Members of Family 3193 have a Pfam domain (PF00234; Protease inhibitor/seed storage/LTP family) in addition to the common BLASTP-based domain and thus were not clustered within Family 3722 as the exact same domain profile is required for each family member in our computational pipeline [see Additional file <supplr sid="S7">7</supplr>]. All of the prolamin genes were single-exon genes as reported previously <abbrgrp><abbr bid="B49">49</abbr></abbrgrp> with the exception of four genes that contained a single intron which were further examined and found that based on the EST alignments they were single-exon genes that had not been properly annotated (data not shown). The length of the deduced amino acids of the prolamin proteins (excluding the four inaccurate genes) varied from 101 to 156 bp with two peaks at 101~110 and 145~160 bp, consistent with what had been reported in rice prolamin proteins <abbrgrp><abbr bid="B49">49</abbr><abbr bid="B50">50</abbr></abbrgrp>.</p>
               <suppl id="S7">
                  <title>
                     <p>Additional File 7</p>
                  </title>
                  <text>
                     <p>Schematic illustration of the domain composition of three related rice paralogous protein families: Family 3722, Family 3193, and Family 3856.</p>
                  </text>
                  <file name="1471-2229-8-18-S7.pdf">
                     <p>Click here for file</p>
                  </file>
               </suppl>
               <p>Only five prolamin family members (LOC_Os05g26720.1, LOC_Os05g26770.1, LOC_Os06g31070.1, LOC_Os12g16880.1, LOC_Os12g16890.1) were associated with unique, reliable, and significant MPSS tags, which, as expected, were exclusively expressed in 3-day germinating seeds with relatively high abundances (198, 562, 1042, 148, and 670 Transcripts Per Million (TPM), respectively) [see Additional file <supplr sid="S8">8</supplr>]. We also examined the expression of the two prolamin families with that of Family 3856 (123 members) which contained the same Pfam domain (PF00234) that was in prolamin family 3193 [see Additional file <supplr sid="S7">7</supplr>]. A total of 54 genes from Family 3856 were associated with unique, reliable, and significant MPSS tags. However, the expression pattern observed in Family 3856 substantially differed from that of the prolamin families (Family 3722 and Family 3193) in that most of the genes were expressed in multiple organs/tissues [see Additional file <supplr sid="S9">9</supplr>].</p>
               <suppl id="S8">
                  <title>
                     <p>Additional File 8</p>
                  </title>
                  <text>
                     <p>Expression abundance of the rice prolamin genes from Family 3722 and Family 3193 in 18 libraries which were associated with unique, reliable, and significant MPSS tags.</p>
                  </text>
                  <file name="1471-2229-8-18-S8.pdf">
                     <p>Click here for file</p>
                  </file>
               </suppl>
               <suppl id="S9">
                  <title>
                     <p>Additional File 9</p>
                  </title>
                  <text>
                     <p>Expression abundance of genes from rice paralogous protein family Family 3856 (contained PF00234) in 18 libraries which were associated with unique, reliable, and significant MPSS tags.</p>
                  </text>
                  <file name="1471-2229-8-18-S9.pdf">
                     <p>Click here for file</p>
                  </file>
               </suppl>
               <p>Interestingly, we observed that genes encoding the prolamin protein family seemed to localize closely on the chromosomes. A total of 16 prolamin protein family genes were located together on chromosome 5 with a large number of TE-related genes inserted between the family members [see Additional file <supplr sid="S10">10</supplr>]. Other prolamin protein family genes were located on chromosome 6 (two genes in tandem), chromosome 7 (in two gene clusters), and chromosome 12 (three genes with TE-related genes inserted between them), suggestive of tandem duplication(s) of the prolamin protein family genes followed by insertion of transposable elements throughout the course of evolution. This is consistent with previous report on the compact expansion of <it>&#945;</it>-zein gene family of maize <abbrgrp><abbr bid="B13">13</abbr></abbrgrp>.</p>
               <suppl id="S10">
                  <title>
                     <p>Additional File 10</p>
                  </title>
                  <text>
                     <p>Genome Browser view of the genes encoding rice prolamin proteins with TE-related genes inserted between putative tandem duplications.</p>
                  </text>
                  <file name="1471-2229-8-18-S10.pdf">
                     <p>Click here for file</p>
                  </file>
               </suppl>
            </sec>
            <sec>
               <st>
                  <p>Bowman-Birk Inhibitor (BBI) type protein family</p>
               </st>
               <p>BBI is a cysteine-rich protein which has trypsin and chymotrypsin inhibitory activities <abbrgrp><abbr bid="B51">51</abbr></abbrgrp>. It was first characterized in soybean <abbrgrp><abbr bid="B52">52</abbr><abbr bid="B53">53</abbr></abbrgrp> and later found widely distributed in monocot and dicot species <abbrgrp><abbr bid="B54">54</abbr><abbr bid="B55">55</abbr><abbr bid="B56">56</abbr><abbr bid="B57">57</abbr><abbr bid="B58">58</abbr></abbrgrp>. It has been extensively studied due to its possible role in plant defense <abbrgrp><abbr bid="B51">51</abbr><abbr bid="B54">54</abbr><abbr bid="B58">58</abbr></abbrgrp> and its potential application in cancer chemoprevention <abbrgrp><abbr bid="B59">59</abbr><abbr bid="B60">60</abbr><abbr bid="B61">61</abbr></abbrgrp>. The major BBI type protein families in rice are Family 3328 (eight members) and Family 1493 (three members). While both families have the Pfam domain PF00228 (Bowman-Birk serine protease inhibitor family), Family 3328 also has a second domain identified via BLASTP [see Additional file <supplr sid="S11">11</supplr>]. Amino acid composition analysis showed that 31% and 47% of the conserved residues of Family 3288 and Family 1493, respectively, was cysteine suggesting that this amino acid has an important role in the protease inhibitory activity of BBI. These composition data also revealed subtle differences between the two BBI type protein families. The phylogenetic tree generated by MEGA version 3.1 <abbrgrp><abbr bid="B62">62</abbr></abbrgrp> for family 3328 [see Additional file <supplr sid="S12">12</supplr>] suggests that after the original duplication event, only one of the paralogs underwent further rounds of duplication, consistent with the physical clustering of this set of BBI genes on chromosome 1 [see Additional file <supplr sid="S13">13</supplr>].</p>
               <suppl id="S11">
                  <title>
                     <p>Additional File 11</p>
                  </title>
                  <text>
                     <p>Schematic illustration of the domain composition of two rice BBI-related paralogous protein families which have Pfam domain PF00228: Family 3328 and Family 1493.</p>
                  </text>
                  <file name="1471-2229-8-18-S11.pdf">
                     <p>Click here for file</p>
                  </file>
               </suppl>
               <suppl id="S12">
                  <title>
                     <p>Additional File 12</p>
                  </title>
                  <text>
                     <p>Neighbor-Joining tree of the rice Bowman-Birk inhibitor protein family Family 3328.</p>
                  </text>
                  <file name="1471-2229-8-18-S12.pdf">
                     <p>Click here for file</p>
                  </file>
               </suppl>
               <suppl id="S13">
                  <title>
                     <p>Additional File 13</p>
                  </title>
                  <text>
                     <p>Genome Browser view of the rice genes encoding BBI proteins on chromosome 1.</p>
                  </text>
                  <file name="1471-2229-8-18-S13.pdf">
                     <p>Click here for file</p>
                  </file>
               </suppl>
               <p>MPSS analysis showed that the BBI genes were differentially expressed in a wide range of tissues and organs, consistent with previously reported expression patterns <abbrgrp><abbr bid="B58">58</abbr></abbrgrp>. Seven genes of Family 3328 were associated with unique, reliable, and significant MPSS tags with the pairwise Pearson's Correlation Coefficient values ranging from -0.35 to 0.71. Two genes within Family 1493 were associated with unique, reliable, and significant MPSS tags, which showed little correlation in expression (<it>r </it>= -0.12). It would be interesting to determine expression levels of the BBI genes following wounding, as seven proteins of the Family 3328 were annotated as Bowman-Birk type bran trypsin inhibitor precursors, a type which was reported to play an important role in plant defense <abbrgrp><abbr bid="B54">54</abbr><abbr bid="B58">58</abbr></abbrgrp>, and two members of the Family 1493 were annotated as wound-induced BBI type WIP1 precursors <abbrgrp><abbr bid="B33">33</abbr></abbrgrp>.</p>
            </sec>
         </sec>
      </sec>
      <sec>
         <st>
            <p>Conclusion</p>
         </st>
         <p>We demonstrated that even relatively small plant genomes such as rice and Arabidopsis have a significant portion of their proteomes in paralogous families, resulting in a partially redundant proteome. The origin of most paralogous gene families in the rice genome seems to be very old, but duplicates have continued to arise at a fairly steady pace, with a peak in duplication being coincident with a major segmental duplication that took place at ~70 MYA. While conservation of protein domains was clearly observed within rice and Arabidopsis paralogous families, we did observe a major skew in types of proteins and protein domains within paralogous families versus singleton proteins, suggesting an impact of selection occurred during genome evolution and gene duplication. Another level of potential functionality in paralogous family proteins could also occur through alternative splicing which was statistically more frequent in paralogous family proteins compared to singletons in both rice as well as Arabidopsis. In rice, while some paralogous family members were transcriptionally co-regulated, divergence in expression patterns was clearly evident, thereby allowing an expanded range of functionality for the protein. These data suggested that multiple mechanisms are present in plant genomes to generate protein diversity and that these two model plant species share at least a subset of these mechanisms.</p>
      </sec>
      <sec>
         <st>
            <p>Methods</p>
         </st>
         <sec>
            <st>
               <p>Construction of paralogous protein families</p>
            </st>
            <p>In release 4 of the TIGR Rice Genome Annotation <abbrgrp><abbr bid="B33">33</abbr></abbrgrp>, a total of 55,890 genes were annotated, of which 13,237 were related to TE. The TE-related genes were excluded from all further analyses. As alternative splicing occurs in the rice genome and some genes have multiple splice forms, the largest peptide sequence was used whenever alternative isoforms existed. Short protein sequences (&lt;50 amino acids) were excluded from this analysis. A total of 42,653 rice protein sequences were used to classify paralogous protein families using protein domain compositions as described in Haas <it>et al</it>. <abbrgrp><abbr bid="B30">30</abbr></abbrgrp>. The basic approach for generating the protein families involved identification of the domains followed by organization of the families based on domains. Two different types of domains were used for the generation of paralogous families: Pfam/HMM domains and BLASTP-based domains. For the Pfam/HMM domains, the predicted rice proteome was searched against the Pfam HMM domain database <abbrgrp><abbr bid="B63">63</abbr></abbrgrp> using HMMER2 <abbrgrp><abbr bid="B64">64</abbr></abbrgrp> and proteins with scores above the trusted cutoff value were retained. For the BLASTP-based domain, peptide regions that were not covered by the Pfam HMM profiles were then clustered based on homology derived from an all versus all BLASTP search <abbrgrp><abbr bid="B65">65</abbr></abbrgrp>. Links were made if two peptides had an >45% identity over >75 amino acids with an E-value &lt;0.001. To prevent multi-domain proteins that are not related from artificially clustering due to single linkages, the Jaccard coefficient of community <abbrgrp><abbr bid="B66">66</abbr></abbrgrp>, also known as link score, was used in the clustering process. As described in Haas <it>et al</it>. <abbrgrp><abbr bid="B30">30</abbr></abbrgrp>, a link score was calculated for the pairs of linked peptide sequences <it>a </it>and <it>b </it>as follows:</p>
            <p>
               <display-formula>
                  <m:math name="1471-2229-8-18-i1" xmlns:m="http://www.w3.org/1998/Math/MathML">
                     <m:semantics>
                        <m:mrow>
                           <m:msub>
                              <m:mi>J</m:mi>
                              <m:mrow>
                                 <m:mi>a</m:mi>
                                 <m:mo>,</m:mo>
                                 <m:mi>b</m:mi>
                              </m:mrow>
                           </m:msub>
                           <m:mo>=</m:mo>
                           <m:mfrac>
                              <m:mrow>
                                 <m:mo>#</m:mo>
                                 <m:mi>d</m:mi>
                                 <m:mi>i</m:mi>
                                 <m:mi>s</m:mi>
                                 <m:mi>t</m:mi>
                                 <m:mi>i</m:mi>
                                 <m:mi>n</m:mi>
                                 <m:mi>c</m:mi>
                                 <m:mi>t</m:mi>
                                 <m:mtext>&#160;</m:mtext>
                                 <m:mi>s</m:mi>
                                 <m:mi>e</m:mi>
                                 <m:mi>q</m:mi>
                                 <m:mi>u</m:mi>
                                 <m:mi>e</m:mi>
                                 <m:mi>n</m:mi>
                                 <m:mi>c</m:mi>
                                 <m:mi>e</m:mi>
                                 <m:mi>s</m:mi>
                                 <m:mtext>&#160;</m:mtext>
                                 <m:mi>m</m:mi>
                                 <m:mi>a</m:mi>
                                 <m:mi>t</m:mi>
                                 <m:mi>c</m:mi>
                                 <m:mi>h</m:mi>
                                 <m:mi>i</m:mi>
                                 <m:mi>n</m:mi>
                                 <m:mi>g</m:mi>
                                 <m:mtext>&#160;</m:mtext>
                                 <m:mi>a</m:mi>
                                 <m:mtext>&#160;</m:mtext>
                                 <m:mi>a</m:mi>
                                 <m:mi>n</m:mi>
                                 <m:mi>d</m:mi>
                                 <m:mtext>&#160;</m:mtext>
                                 <m:mi>b</m:mi>
                                 <m:mtext>&#160;</m:mtext>
                                 <m:mi>i</m:mi>
                                 <m:mi>n</m:mi>
                                 <m:mi>c</m:mi>
                                 <m:mi>l</m:mi>
                                 <m:mi>u</m:mi>
                                 <m:mi>d</m:mi>
                                 <m:mi>i</m:mi>
                                 <m:mi>n</m:mi>
                                 <m:mi>g</m:mi>
                                 <m:mo stretchy="false">(</m:mo>
                                 <m:mi>a</m:mi>
                                 <m:mo>,</m:mo>
                                 <m:mi>b</m:mi>
                                 <m:mo stretchy="false">)</m:mo>
                              </m:mrow>
                              <m:mrow>
                                 <m:mo>#</m:mo>
                                 <m:mi>d</m:mi>
                                 <m:mi>i</m:mi>
                                 <m:mi>s</m:mi>
                                 <m:mi>t</m:mi>
                                 <m:mi>i</m:mi>
                                 <m:mi>n</m:mi>
                                 <m:mi>c</m:mi>
                                 <m:mi>t</m:mi>
                                 <m:mtext>&#160;</m:mtext>
                                 <m:mi>s</m:mi>
                                 <m:mi>e</m:mi>
                                 <m:mi>q</m:mi>
                                 <m:mi>u</m:mi>
                                 <m:mi>e</m:mi>
                                 <m:mi>n</m:mi>
                                 <m:mi>c</m:mi>
                                 <m:mi>e</m:mi>
                                 <m:mi>s</m:mi>
                                 <m:mtext>&#160;</m:mtext>
                                 <m:mi>m</m:mi>
                                 <m:mi>a</m:mi>
                                 <m:mi>t</m:mi>
                                 <m:mi>c</m:mi>
                                 <m:mi>h</m:mi>
                                 <m:mi>i</m:mi>
                                 <m:mi>n</m:mi>
                                 <m:mi>g</m:mi>
                                 <m:mtext>&#160;</m:mtext>
                                 <m:mi>e</m:mi>
                                 <m:mi>i</m:mi>
                                 <m:mi>t</m:mi>
                                 <m:mi>h</m:mi>
                                 <m:mi>e</m:mi>
                                 <m:mi>r</m:mi>
                                 <m:mtext>&#160;</m:mtext>
                                 <m:mi>a</m:mi>
                                 <m:mtext>&#160;</m:mtext>
                                 <m:mi>o</m:mi>
                                 <m:mi>r</m:mi>
                                 <m:mtext>&#160;</m:mtext>
                                 <m:mi>b</m:mi>
                              </m:mrow>
                           </m:mfrac>
                        </m:mrow>
                        <m:annotation encoding="MathType-MTEF">
 MathType@MTEF@5@5@+=feaafiart1ev1aaatCvAUfKttLearuWrP9MDH5MBPbIqV92AaeXatLxBI9gBaebbnrfifHhDYfgasaacPC6xNi=xI8qiVKYPFjYdHaVhbbf9v8qqaqFr0xc9vqFj0dXdbba91qpepeI8k8fiI+fsY=rqGqVepae9pg0db9vqaiVgFr0xfr=xfr=xc9adbaqaaeGacaGaaiaabeqaaeqabiWaaaGcbaGaemOsaO0aaSbaaSqaaiabdggaHjabcYcaSiabdkgaIbqabaGccqGH9aqpjuaGdaWcaaqaaiabcocaJiabdsgaKjabdMgaPjabdohaZjabdsha0jabdMgaPjabd6gaUjabdogaJjabdsha0jabbccaGiabdohaZjabdwgaLjabdghaXjabdwha1jabdwgaLjabd6gaUjabdogaJjabdwgaLjabdohaZjabbccaGiabd2gaTjabdggaHjabdsha0jabdogaJjabdIgaOjabdMgaPjabd6gaUjabdEgaNjabbccaGiabdggaHjabbccaGiabdggaHjabd6gaUjabdsgaKjabbccaGiabdkgaIjabbccaGiabdMgaPjabd6gaUjabdogaJjabdYgaSjabdwha1jabdsgaKjabdMgaPjabd6gaUjabdEgaNjabcIcaOiabdggaHjabcYcaSiabdkgaIjabcMcaPaqaaiabcocaJiabdsgaKjabdMgaPjabdohaZjabdsha0jabdMgaPjabd6gaUjabdogaJjabdsha0jabbccaGiabdohaZjabdwgaLjabdghaXjabdwha1jabdwgaLjabd6gaUjabdogaJjabdwgaLjabdohaZjabbccaGiabd2gaTjabdggaHjabdsha0jabdogaJjabdIgaOjabdMgaPjabd6gaUjabdEgaNjabbccaGiabdwgaLjabdMgaPjabdsha0jabdIgaOjabdwgaLjabdkhaYjabbccaGiabdggaHjabbccaGiabd+gaVjabdkhaYjabbccaGiabdkgaIbaaaaa@A7D8@</m:annotation>
                     </m:semantics>
                  </m:math>
               </display-formula>
            </p>
            <p>Peptides with a link score above the cut-off value (0.66) were selected to generate single linkage clusters. Clustered peptides were then aligned using CLUSTALW <abbrgrp><abbr bid="B67">67</abbr><abbr bid="B68">68</abbr></abbrgrp> and used to develop BLASTP-based domains, which were used to build the families if the domain alignments contained four or more members. Protein families were then organized based on the domain composition that refers to the type and number of the domains, which included both Pfam HMM domains and BLASTP-based domains. Proteins with identical domain composition were then classified into putative protein families. Paralogous protein families in Arabidopsis were constructed similarly with a total of 26,819 protein coding genes from the TAIR7 release of the predicted proteome <abbrgrp><abbr bid="B31">31</abbr></abbrgrp>.</p>
         </sec>
         <sec>
            <st>
               <p>Identification of segmentally duplicated genes</p>
            </st>
            <p>Segmentally duplicated genes in the rice genome were defined in Release 4 as described previously <abbrgrp><abbr bid="B69">69</abbr></abbrgrp>. In brief, similar gene pairs were identified by all versus all BLASTP search (WU-BLASTP, parameters "V = 5 B = 5 E = 1e-10 -filter seg") <abbrgrp><abbr bid="B65">65</abbr></abbrgrp>, which were then used to define segmentally duplicated blocks by running DAGchainer <abbrgrp><abbr bid="B70">70</abbr></abbrgrp> with parameters "-s -I -D 100000".</p>
         </sec>
         <sec>
            <st>
               <p>Functional classification of Arabidopsis proteome</p>
            </st>
            <p>A total of 26,819 Arabidopsis protein coding genes were downloaded from the TAIR7 release of the predicted proteome <abbrgrp><abbr bid="B31">31</abbr></abbrgrp> and searched against an in-house non-redundant amino acid database that contains all publicly available protein sequences (e.g. GenBank, Swissprot, etc.) using BLASTP <abbrgrp><abbr bid="B65">65</abbr></abbrgrp> and the Pfam HMM domain database <abbrgrp><abbr bid="B63">63</abbr></abbrgrp> using HMMER2 <abbrgrp><abbr bid="B64">64</abbr></abbrgrp>. BLASTP matches to Arabidopsis sequences were excluded unless they were from Swissprot. BLASTP matches to conserved hypothetical or hypothetical proteins were excluded as well. Arabidopsis proteins with a BLASTP match (&lt; 1e-10 and > 30% identity over 50% coverage) or Pfam domains with scores above the trusted cutoff value were classified as known or putative proteins. The remaining Arabidopsis genes were classified as expressed genes or hypothetical genes according to the gene set downloaded from TAIR7 release <abbrgrp><abbr bid="B31">31</abbr></abbrgrp> which had at least one supporting cDNA and/or EST.</p>
         </sec>
         <sec>
            <st>
               <p>GOSlim assignment</p>
            </st>
            <p>To assign Gene Ontologies (GO) <abbrgrp><abbr bid="B71">71</abbr></abbrgrp>, the predicted rice proteome was searched against the predicted Arabidopsis proteome (TAIR6 Genome Release) <abbrgrp><abbr bid="B31">31</abbr></abbrgrp> using BLASTP. Using an E-value cutoff of 1e-10, plant GOSlim annotations <abbrgrp><abbr bid="B34">34</abbr></abbrgrp> were transitively annotated using the GO terms from Arabidopsis. Hypothetical/expressed proteins, TE-related proteins, and proteins assigned with GO terms with "unknown" definitions were excluded from this analysis. The GOSlim assignment of Arabidopsis proteins was obtained form TAIR7 release <abbrgrp><abbr bid="B31">31</abbr></abbrgrp>.</p>
         </sec>
         <sec>
            <st>
               <p>Identification of alternatively spliced genes</p>
            </st>
            <p>Approximately 780,000 rice EST sequences were released subsequent to the generation of the Release 4 gene models <abbrgrp><abbr bid="B33">33</abbr></abbrgrp>. Thus, we utilized the PASA program <abbrgrp><abbr bid="B72">72</abbr></abbrgrp> to re-annotate the gene models and comprehensively identify alternatively spliced genes with the latest set of rice transcript data. Alternative splicing information on Arabidopsis was obtained from TAIR7 release <abbrgrp><abbr bid="B31">31</abbr></abbrgrp>.</p>
         </sec>
         <sec>
            <st>
               <p>Estimation of the age of the paralogous protein families</p>
            </st>
            <p>A multiple protein sequence alignment was obtained for each family using CLUSTALW with default parameter settings <abbrgrp><abbr bid="B67">67</abbr><abbr bid="B68">68</abbr></abbrgrp>. From each protein family of size <it>n</it>, all (<it>n</it><sup>2</sup>-<it>n</it>)/2 pairwise alignments were extracted from the global family alignment, maintaining the position and length of all gaps. A maximum likelihood estimate of the number of synonymous substitutions per synonymous site (<it>d</it><sub>S</sub>) was obtained for all pairwise alignments. All calculations were performed using the codon-based substitution model of Goodman and Yang <abbrgrp><abbr bid="B73">73</abbr></abbrgrp> implemented in <it>codeml</it>, of the PAML package, version 3.15 <abbrgrp><abbr bid="B74">74</abbr></abbrgrp>, running in pairwise mode (runmode = -2), with codon equilibrium frequencies estimated from average nucleotide frequencies at each codon position (codonFreq = 2).</p>
            <p>The age of a paralogous protein family is defined by the duplication that gave rise to its second member, and can be approximated by the divergence between the most distantly-related pair of genes in the family. Given the rate of synonymous substitutions in grasses, estimated to be ~6.5 &#215; 10<sup>-9 </sup>per site per year <abbrgrp><abbr bid="B75">75</abbr></abbrgrp>, the number of synonymous substitutions per site (<it>d</it><sub>S</sub>) between the most divergent gene pair in a family can be converted into a divergence time, provided synonymous sites are not saturated (<it>d</it><sub>S </sub>&lt; ~1). In addition, peaks in the distribution of intra-family pairwise <it>d</it><sub>S </sub>values suggest periods of family diversification. For each family, the distribution of pariwise <it>d</it><sub>S </sub>values was determined, plotted within the range of 0 to 1.5, with bin size of 0.1. Both the modal bin of each distribution (usually resulting from the most ancient split in the family tree) and the largest modal value of <it>d</it><sub>S </sub>&lt; 1.5 (reflecting a burst in diversification within the last 100 MY) were recorded.</p>
         </sec>
         <sec>
            <st>
               <p>Massively parallel signature sequencing data and mapping</p>
            </st>
            <p>A total of 106,521 significant (>3 TPM) and reliable (observed in more than one sequencing run) MPSS <abbrgrp><abbr bid="B32">32</abbr></abbrgrp> tags were obtained from the Rice MPSS Project <abbrgrp><abbr bid="B32">32</abbr><abbr bid="B76">76</abbr></abbrgrp>. These MPSS tags are derived from nine treated or untreated organs/tissues including callus, leaf, seed, crown vegetative meristematic tissue, ovary, stigma, pollen, panicle and stem. To reduce background noise, the method of Haberer <it>et al</it>. <abbrgrp><abbr bid="B77">77</abbr></abbrgrp> was used to remove tags if the total minimal abundance across all libraries was &#8804; 10 TPM or if the tag was not detected at &#8805; 5 TPM in at least a single library, resulting in a total of 74,748 tags for subsequent analyses. The final set of MPSS tags were searched against TIGR rice pseudomolecules <abbrgrp><abbr bid="B33">33</abbr></abbrgrp> using the Vmatch program <abbrgrp><abbr bid="B78">78</abbr></abbrgrp>. As tags can span an intron(s), MPSS tags were also searched against all the cDNA sequences of the annotated genes. MPSS tags that mapped to the anti-sense sequence of the annotated genes or that mapped to multiple locations of the genome were excluded, which is important to minimize false correlations among closely related paralogs. If a gene was associated with multiple MPSS tags, only the most 3' tag was used for the expression analysis. Paralogous genes that were associated with unique, reliable, and significant MPSS tags were analyzed. Pearson's Correlation Coefficient (<it>r</it>) was calculated for each gene pair to determine the expression correlation using the following formula <abbrgrp><abbr bid="B79">79</abbr></abbrgrp>:</p>
            <p>
               <display-formula>
                  <m:math name="1471-2229-8-18-i2" xmlns:m="http://www.w3.org/1998/Math/MathML">
                     <m:semantics>
                        <m:mrow>
                           <m:mi>r</m:mi>
                           <m:mo>=</m:mo>
                           <m:mfrac>
                              <m:mrow>
                                 <m:mi>n</m:mi>
                                 <m:mstyle displaystyle="true">
                                    <m:munderover>
                                       <m:mo>&#8721;</m:mo>
                                       <m:mrow>
                                          <m:mi>i</m:mi>
                                          <m:mo>=</m:mo>
                                          <m:mn>1</m:mn>
                                       </m:mrow>
                                       <m:mi>n</m:mi>
                                    </m:munderover>
                                    <m:mrow>
                                       <m:msub>
                                          <m:mi>x</m:mi>
                                          <m:mi>i</m:mi>
                                       </m:msub>
                                       <m:msub>
                                          <m:mi>y</m:mi>
                                          <m:mi>i</m:mi>
                                       </m:msub>
                                       <m:mo>&#8722;</m:mo>
                                       <m:mstyle displaystyle="true">
                                          <m:munderover>
                                             <m:mo>&#8721;</m:mo>
                                             <m:mrow>
                                                <m:mi>i</m:mi>
                                                <m:mo>=</m:mo>
                                                <m:mn>1</m:mn>
                                             </m:mrow>
                                             <m:mi>n</m:mi>
                                          </m:munderover>
                                          <m:mrow>
                                             <m:msub>
                                                <m:mi>x</m:mi>
                                                <m:mi>i</m:mi>
                                             </m:msub>
                                             <m:mstyle displaystyle="true">
                                                <m:munderover>
                                                   <m:mo>&#8721;</m:mo>
                                                   <m:mrow>
                                                      <m:mi>i</m:mi>
                                                      <m:mo>=</m:mo>
                                                      <m:mn>1</m:mn>
                                                   </m:mrow>
                                                   <m:mi>n</m:mi>
                                                </m:munderover>
                                                <m:mrow>
                                                   <m:msub>
                                                      <m:mi>y</m:mi>
                                                      <m:mi>i</m:mi>
                                                   </m:msub>
                                                </m:mrow>
                                             </m:mstyle>
                                          </m:mrow>
                                       </m:mstyle>
                                    </m:mrow>
                                 </m:mstyle>
                              </m:mrow>
                              <m:mrow>
                                 <m:msqrt>
                                    <m:mrow>
                                       <m:mo stretchy="false">[</m:mo>
                                       <m:mi>n</m:mi>
                                       <m:mstyle displaystyle="true">
                                          <m:munderover>
                                             <m:mo>&#8721;</m:mo>
                                             <m:mrow>
                                                <m:mi>i</m:mi>
                                                <m:mo>=</m:mo>
                                                <m:mn>1</m:mn>
                                             </m:mrow>
                                             <m:mi>n</m:mi>
                                          </m:munderover>
                                          <m:mrow>
                                             <m:msubsup>
                                                <m:mi>x</m:mi>
                                                <m:mi>i</m:mi>
                                                <m:mn>2</m:mn>
                                             </m:msubsup>
                                             <m:mo>&#8722;</m:mo>
                                             <m:msup>
                                                <m:mrow>
                                                   <m:mo stretchy="false">(</m:mo>
                                                   <m:mstyle displaystyle="true">
                                                      <m:munderover>
                                                         <m:mo>&#8721;</m:mo>
                                                         <m:mrow>
                                                            <m:mi>i</m:mi>
                                                            <m:mo>=</m:mo>
                                                            <m:mn>1</m:mn>
                                                         </m:mrow>
                                                         <m:mi>n</m:mi>
                                                      </m:munderover>
                                                      <m:mrow>
                                                         <m:msub>
                                                            <m:mi>x</m:mi>
                                                            <m:mi>i</m:mi>
                                                         </m:msub>
                                                      </m:mrow>
                                                   </m:mstyle>
                                                   <m:mo stretchy="false">)</m:mo>
                                                </m:mrow>
                                                <m:mn>2</m:mn>
                                             </m:msup>
                                             <m:mo stretchy="false">]</m:mo>
                                             <m:mo stretchy="false">[</m:mo>
                                             <m:mi>n</m:mi>
                                             <m:mstyle displaystyle="true">
                                                <m:munderover>
                                                   <m:mo>&#8721;</m:mo>
                                                   <m:mrow>
                                                      <m:mi>i</m:mi>
                                                      <m:mo>=</m:mo>
                                                      <m:mn>1</m:mn>
                                                   </m:mrow>
                                                   <m:mi>n</m:mi>
                                                </m:munderover>
                                                <m:mrow>
                                                   <m:msubsup>
                                                      <m:mi>y</m:mi>
                                                      <m:mi>i</m:mi>
                                                      <m:mn>2</m:mn>
                                                   </m:msubsup>
                                                   <m:mo>&#8722;</m:mo>
                                                   <m:msup>
                                                      <m:mrow>
                                                         <m:mo stretchy="false">(</m:mo>
                                                         <m:mstyle displaystyle="true">
                                                            <m:munderover>
                                                               <m:mo>&#8721;</m:mo>
                                                               <m:mrow>
                                                                  <m:mi>i</m:mi>
                                                                  <m:mo>=</m:mo>
                                                                  <m:mn>1</m:mn>
                                                               </m:mrow>
                                                               <m:mi>n</m:mi>
                                                            </m:munderover>
                                                            <m:mrow>
                                                               <m:msub>
                                                                  <m:mi>y</m:mi>
                                                                  <m:mi>i</m:mi>
                                                               </m:msub>
                                                            </m:mrow>
                                                         </m:mstyle>
                                                         <m:mo stretchy="false">)</m:mo>
                                                      </m:mrow>
                                                      <m:mn>2</m:mn>
                                                   </m:msup>
                                                   <m:mo stretchy="false">]</m:mo>
                                                </m:mrow>
                                             </m:mstyle>
                                          </m:mrow>
                                       </m:mstyle>
                                    </m:mrow>
                                 </m:msqrt>
                              </m:mrow>
                           </m:mfrac>
                           <m:mo>,</m:mo>
                        </m:mrow>
                        <m:annotation encoding="MathType-MTEF">
 MathType@MTEF@5@5@+=feaafiart1ev1aaatCvAUfKttLearuWrP9MDH5MBPbIqV92AaeXatLxBI9gBaebbnrfifHhDYfgasaacPC6xNi=xI8qiVKYPFjYdHaVhbbf9v8qqaqFr0xc9vqFj0dXdbba91qpepeI8k8fiI+fsY=rqGqVepae9pg0db9vqaiVgFr0xfr=xfr=xc9adbaqaaeGacaGaaiaabeqaaeqabiWaaaGcbaGaemOCaiNaeyypa0tcfa4aaSaaaeaacqWGUbGBdaaeWbqaaiabdIha4naaBaaabaGaemyAaKgabeaacqWG5bqEdaWgaaqaaiabdMgaPbqabaGaeyOeI0YaaabCaeaacqWG4baEdaWgaaqaaiabdMgaPbqabaWaaabCaeaacqWG5bqEdaWgaaqaaiabdMgaPbqabaaabaGaemyAaKMaeyypa0JaeGymaedabaGaemOBa4gacqGHris5aaqaaiabdMgaPjabg2da9iabigdaXaqaaiabd6gaUbGaeyyeIuoaaeaacqWGPbqAcqGH9aqpcqaIXaqmaeaacqWGUbGBaiabggHiLdaabaWaaOaaaeaacqGGBbWwcqWGUbGBdaaeWbqaaiabdIha4naaDaaabaGaemyAaKgabaGaeGOmaidaaiabgkHiTiabcIcaOmaaqahabaGaemiEaG3aaSbaaeaacqWGPbqAaeqaaaqaaiabdMgaPjabg2da9iabigdaXaqaaiabd6gaUbGaeyyeIuoacqGGPaqkdaahaaqabeaacqaIYaGmaaGaeiyxa0Laei4waSLaemOBa42aaabCaeaacqWG5bqEdaqhaaqaaiabdMgaPbqaaiabikdaYaaacqGHsislcqGGOaakdaaeWbqaaiabdMha5naaBaaabaGaemyAaKgabeaaaeaacqWGPbqAcqGH9aqpcqaIXaqmaeaacqWGUbGBaiabggHiLdGaeiykaKYaaWbaaeqabaGaeGOmaidaaiabc2faDbqaaiabdMgaPjabg2da9iabigdaXaqaaiabd6gaUbGaeyyeIuoaaeaacqWGPbqAcqGH9aqpcqaIXaqmaeaacqWGUbGBaiabggHiLdaabeaaaaGccqGGSaalaaa@8B64@</m:annotation>
                     </m:semantics>
                  </m:math>
               </display-formula>
            </p>
            <p>Where <it>n </it>is the number of DNA libraries. <b><it>X</it></b><sub><it>i </it></sub>and <b><it>Y</it></b><sub><it>i </it></sub>represent the expression level of the gene pair in the <it>i</it>-th library.</p>
         </sec>
         <sec>
            <st>
               <p>Tissue specific expression analysis</p>
            </st>
            <p>To determine if a gene was preferentially expressed in a specific tissue, we employed the PEM devised by Huminiecki <it>et al </it><abbrgrp><abbr bid="B80">80</abbr></abbrgrp>. PEM is defined as log<sub>10</sub>(<it>O</it>/<it>E</it>). Basically, it compares the observed (O) expression level in a given tissue with that of expected (E) level, assuming uniform expression across all tissues. The PEM value of the <it>i</it>-th gene in the <it>j</it>-th tissue was calculated as followed:</p>
            <p>
               <display-formula>
                  <m:math name="1471-2229-8-18-i3" xmlns:m="http://www.w3.org/1998/Math/MathML">
                     <m:semantics>
                        <m:mrow>
                           <m:mi>P</m:mi>
                           <m:mi>E</m:mi>
                           <m:msub>
                              <m:mi>M</m:mi>
                              <m:mrow>
                                 <m:mi>i</m:mi>
                                 <m:mo>,</m:mo>
                                 <m:mi>j</m:mi>
                              </m:mrow>
                           </m:msub>
                           <m:mo>=</m:mo>
                           <m:msub>
                              <m:mrow>
                                 <m:mi>log</m:mi>
                                 <m:mo>&#8289;</m:mo>
                              </m:mrow>
                              <m:mrow>
                                 <m:mn>10</m:mn>
                              </m:mrow>
                           </m:msub>
                           <m:mo stretchy="false">(</m:mo>
                           <m:msub>
                              <m:mi>x</m:mi>
                              <m:mrow>
                                 <m:mi>i</m:mi>
                                 <m:mo>,</m:mo>
                                 <m:mi>j</m:mi>
                              </m:mrow>
                           </m:msub>
                           <m:mo>/</m:mo>
                           <m:mo stretchy="false">(</m:mo>
                           <m:mstyle displaystyle="true">
                              <m:munderover>
                                 <m:mo>&#8721;</m:mo>
                                 <m:mrow>
                                    <m:mi>k</m:mi>
                                    <m:mo>=</m:mo>
                                    <m:mn>1</m:mn>
                                 </m:mrow>
                                 <m:mi>m</m:mi>
                              </m:munderover>
                              <m:mrow>
                                 <m:msub>
                                    <m:mi>x</m:mi>
                                    <m:mrow>
                                       <m:mi>k</m:mi>
                                       <m:mo>,</m:mo>
                                       <m:mi>j</m:mi>
                                    </m:mrow>
                                 </m:msub>
                              </m:mrow>
                           </m:mstyle>
                           <m:mstyle displaystyle="true">
                              <m:munderover>
                                 <m:mo>&#8721;</m:mo>
                                 <m:mrow>
                                    <m:mi>l</m:mi>
                                    <m:mo>=</m:mo>
                                    <m:mn>1</m:mn>
                                 </m:mrow>
                                 <m:mi>n</m:mi>
                              </m:munderover>
                              <m:mrow>
                                 <m:msub>
                                    <m:mi>x</m:mi>
                                    <m:mrow>
                                       <m:mi>i</m:mi>
                                       <m:mo>,</m:mo>
                                       <m:mi>l</m:mi>
                                    </m:mrow>
                                 </m:msub>
                              </m:mrow>
                           </m:mstyle>
                           <m:mo>/</m:mo>
                           <m:mstyle displaystyle="true">
                              <m:munderover>
                                 <m:mo>&#8721;</m:mo>
                                 <m:mrow>
                                    <m:mi>k</m:mi>
                                    <m:mo>=</m:mo>
                                    <m:mn>1</m:mn>
                                 </m:mrow>
                                 <m:mi>m</m:mi>
                              </m:munderover>
                              <m:mrow>
                                 <m:mstyle displaystyle="true">
                                    <m:munderover>
                                       <m:mo>&#8721;</m:mo>
                                       <m:mrow>
                                          <m:mi>l</m:mi>
                                          <m:mo>=</m:mo>
                                          <m:mn>1</m:mn>
                                       </m:mrow>
                                       <m:mi>n</m:mi>
                                    </m:munderover>
                                    <m:mrow>
                                       <m:msub>
                                          <m:mi>x</m:mi>
                                          <m:mrow>
                                             <m:mi>k</m:mi>
                                             <m:mo>,</m:mo>
                                             <m:mi>l</m:mi>
                                          </m:mrow>
                                       </m:msub>
                                    </m:mrow>
                                 </m:mstyle>
                              </m:mrow>
                           </m:mstyle>
                           <m:mo stretchy="false">)</m:mo>
                           <m:mo stretchy="false">)</m:mo>
                        </m:mrow>
                        <m:annotation encoding="MathType-MTEF">
 MathType@MTEF@5@5@+=feaafiart1ev1aaatCvAUfKttLearuWrP9MDH5MBPbIqV92AaeXatLxBI9gBaebbnrfifHhDYfgasaacPC6xNi=xI8qiVKYPFjYdHaVhbbf9v8qqaqFr0xc9vqFj0dXdbba91qpepeI8k8fiI+fsY=rqGqVepae9pg0db9vqaiVgFr0xfr=xfr=xc9adbaqaaeGacaGaaiaabeqaaeqabiWaaaGcbaGaemiuaaLaemyrauKaemyta00aaSbaaSqaaiabdMgaPjabcYcaSiabdQgaQbqabaGccqGH9aqpcyGGSbaBcqGGVbWBcqGGNbWzdaWgaaWcbaGaeGymaeJaeGimaadabeaakiabcIcaOiabdIha4naaBaaaleaacqWGPbqAcqGGSaalcqWGQbGAaeqaaOGaei4la8IaeiikaGYaaabCaeaacqWG4baEdaWgaaWcbaGaem4AaSMaeiilaWIaemOAaOgabeaaaeaacqWGRbWAcqGH9aqpcqaIXaqmaeaacqWGTbqBa0GaeyyeIuoakmaaqahabaGaemiEaG3aaSbaaSqaaiabdMgaPjabcYcaSiabdYgaSbqabaaabaGaemiBaWMaeyypa0JaeGymaedabaGaemOBa4ganiabggHiLdGccqGGVaWldaaeWbqaamaaqahabaGaemiEaG3aaSbaaSqaaiabdUgaRjabcYcaSiabdYgaSbqabaaabaGaemiBaWMaeyypa0JaeGymaedabaGaemOBa4ganiabggHiLdaaleaacqWGRbWAcqGH9aqpcqaIXaqmaeaacqWGTbqBa0GaeyyeIuoakiabcMcaPiabcMcaPaaa@70B1@</m:annotation>
                     </m:semantics>
                  </m:math>
               </display-formula>
            </p>
            <p>Where <it>m </it>and <it>n </it>represent the total number of MPSS-qualifying genes and tissues, respectively.<it>x</it><sub><it>i</it>, <it>j </it></sub>is the expression level of the i-th gene in the j-th tissue.</p>
         </sec>
      </sec>
      <sec>
         <st>
            <p>Abbreviations</p>
         </st>
         <p>BBI: Bowman-Birk Inhibitor; EST: Expressed Sequence Tag;FLcDNA: Full Length cDNA; MPSS: Massively Parallel Signature Sequencing; MY: Million Years; MYA: Million Years Ago; PEM: Preferential Expression Measure; TE: Transposable Element; TPM: Transcripts Per Million;</p>
      </sec>
      <sec>
         <st>
            <p>Authors' contributions</p>
         </st>
         <p>HL designed the study, performed the analyses, and drafted the manuscript. SO participated in the analysis of GOSlim and made Additional file <supplr sid="S3">3</supplr>. KN and BM provided rice MPSS data. AE and JS carried out the age analysis of paralogous families. BH identified alternative splicing isoforms in rice. WZ identified the high confidence gene set in rice. XG participated in the analysis of alternative splicing. RB designed the study and drafted the manuscript. All authors read and approved the final manuscript.</p>
      </sec>
   </bdy>
   <bm>
      <ack>
         <sec>
            <st>
               <p>Acknowledgements</p>
            </st>
            <p>We thank Zhe Zhang for comments on statistical analyses. We thank Francoise Thibaud-Nissen for critical review of the article. This work was supported by a National Science Foundation Plant Genome Research Program grant to C. R. B. (DBI-0321538). The MPSS data were supported by NSF grant to B.C.M. (DBI-0321437).</p>
         </sec>
      </ack>
      <refgrp>
         <bibl id="B1">
            <title>
               <p>Evolution by Gene Duplication</p>
            </title>
            <aug>
               <au>
                  <snm>Ohno</snm>
                  <fnm>S</fnm>
               </au>
            </aug>
            <publisher> Springer-Verlag, New York</publisher>
            <pubdate>1970</pubdate>
         </bibl>
         <bibl id="B2">
            <title>
               <p>The evolution of functionally novel proteins after gene duplication</p>
            </title>
            <aug>
               <au>
                  <snm>Hughes</snm>
                  <fnm>AL</fnm>
               </au>
            </aug>
            <source>Proc Biol Sci</source>
            <pubdate>1994</pubdate>
            <volume>256</volume>
            <issue>1346</issue>
            <fpage>119</fpage>
            <lpage>124</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1098/rspb.1994.0058</pubid>
                  <pubid idtype="pmpid" link="fulltext">8029240</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B3">
            <title>
               <p>Preservation of duplicate genes by complementary, degenerative mutations</p>
            </title>
            <aug>
               <au>
                  <snm>Force</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Lynch</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Pickett</snm>
                  <fnm>FB</fnm>
               </au>
               <au>
                  <snm>Amores</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Yan</snm>
                  <fnm>YL</fnm>
               </au>
               <au>
                  <snm>Postlethwait</snm>
                  <fnm>J</fnm>
               </au>
            </aug>
            <source>Genetics</source>
            <pubdate>1999</pubdate>
            <volume>151</volume>
            <issue>4</issue>
            <fpage>1531</fpage>
            <lpage>1545</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">1460548</pubid>
                  <pubid idtype="pmpid" link="fulltext">10101175</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B4">
            <title>
               <p>The probability of duplicate gene preservation by subfunctionalization</p>
            </title>
            <aug>
               <au>
                  <snm>Lynch</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Force</snm>
                  <fnm>A</fnm>
               </au>
            </aug>
            <source>Genetics</source>
            <pubdate>2000</pubdate>
            <volume>154</volume>
            <issue>1</issue>
            <fpage>459</fpage>
            <lpage>473</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">1460895</pubid>
                  <pubid idtype="pmpid" link="fulltext">10629003</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B5">
            <title>
               <p>Role of duplicate genes in genetic robustness against null mutations</p>
            </title>
            <aug>
               <au>
                  <snm>Gu</snm>
                  <fnm>Z</fnm>
               </au>
               <au>
                  <snm>Steinmetz</snm>
                  <fnm>LM</fnm>
               </au>
               <au>
                  <snm>Gu</snm>
                  <fnm>X</fnm>
               </au>
               <au>
                  <snm>Scharfe</snm>
                  <fnm>C</fnm>
               </au>
               <au>
                  <snm>Davis</snm>
                  <fnm>RW</fnm>
               </au>
               <au>
                  <snm>Li</snm>
                  <fnm>WH</fnm>
               </au>
            </aug>
            <source>Nature</source>
            <pubdate>2003</pubdate>
            <volume>421</volume>
            <issue>6918</issue>
            <fpage>63</fpage>
            <lpage>66</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1038/nature01198</pubid>
                  <pubid idtype="pmpid" link="fulltext">12511954</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B6">
            <title>
               <p>Genome organization in dicots: genome duplication in Arabidopsis and synteny between soybean and Arabidopsis</p>
            </title>
            <aug>
               <au>
                  <snm>Grant</snm>
                  <fnm>D</fnm>
               </au>
               <au>
                  <snm>Cregan</snm>
                  <fnm>P</fnm>
               </au>
               <au>
                  <snm>Shoemaker</snm>
                  <fnm>RC</fnm>
               </au>
            </aug>
            <source>Proc Natl Acad Sci U S A</source>
            <pubdate>2000</pubdate>
            <volume>97</volume>
            <issue>8</issue>
            <fpage>4168</fpage>
            <lpage>4173</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">18185</pubid>
                  <pubid idtype="pmpid" link="fulltext">10759555</pubid>
                  <pubid idtype="doi">10.1073/pnas.070430597</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B7">
            <title>
               <p>Duplication and suppression of chloroplast protein translocation genes in maize</p>
            </title>
            <aug>
               <au>
                  <snm>Settles</snm>
                  <fnm>AM</fnm>
               </au>
               <au>
                  <snm>Baron</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Barkan</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Martienssen</snm>
                  <fnm>RA</fnm>
               </au>
            </aug>
            <source>Genetics</source>
            <pubdate>2001</pubdate>
            <volume>157</volume>
            <issue>1</issue>
            <fpage>349</fpage>
            <lpage>360</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">1461481</pubid>
                  <pubid idtype="pmpid" link="fulltext">11139515</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B8">
            <title>
               <p>A recent polyploidy superimposed on older large-scale duplications in the Arabidopsis genome</p>
            </title>
            <aug>
               <au>
                  <snm>Blanc</snm>
                  <fnm>G</fnm>
               </au>
               <au>
                  <snm>Hokamp</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>Wolfe</snm>
                  <fnm>KH</fnm>
               </au>
            </aug>
            <source>Genome Res</source>
            <pubdate>2003</pubdate>
            <volume>13</volume>
            <issue>2</issue>
            <fpage>137</fpage>
            <lpage>144</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">420368</pubid>
                  <pubid idtype="pmpid" link="fulltext">12566392</pubid>
                  <pubid idtype="doi">10.1101/gr.751803</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B9">
            <title>
               <p>Recently duplicated maize R2R3 Myb genes provide evidence for distinct mechanisms of evolutionary divergence after duplication</p>
            </title>
            <aug>
               <au>
                  <snm>Dias</snm>
                  <fnm>AP</fnm>
               </au>
               <au>
                  <snm>Braun</snm>
                  <fnm>EL</fnm>
               </au>
               <au>
                  <snm>McMullen</snm>
                  <fnm>MD</fnm>
               </au>
               <au>
                  <snm>Grotewold</snm>
                  <fnm>E</fnm>
               </au>
            </aug>
            <source>Plant Physiol</source>
            <pubdate>2003</pubdate>
            <volume>131</volume>
            <issue>2</issue>
            <fpage>610</fpage>
            <lpage>620</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">166837</pubid>
                  <pubid idtype="pmpid" link="fulltext">12586885</pubid>
                  <pubid idtype="doi">10.1104/pp.012047</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B10">
            <title>
               <p>The roles of segmental and tandem gene duplication in the evolution of large gene families in Arabidopsis thaliana</p>
            </title>
            <aug>
               <au>
                  <snm>Cannon</snm>
                  <fnm>SB</fnm>
               </au>
               <au>
                  <snm>Mitra</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Baumgarten</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Young</snm>
                  <fnm>ND</fnm>
               </au>
               <au>
                  <snm>May</snm>
                  <fnm>G</fnm>
               </au>
            </aug>
            <source>BMC Plant Biol</source>
            <pubdate>2004</pubdate>
            <volume>4</volume>
            <fpage>10</fpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">446195</pubid>
                  <pubid idtype="pmpid" link="fulltext">15171794</pubid>
                  <pubid idtype="doi">10.1186/1471-2229-4-10</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B11">
            <title>
               <p>Tandem and segmental gene duplication and recombination in the evolution of plant disease resistance gene</p>
            </title>
            <aug>
               <au>
                  <snm>Leister</snm>
                  <fnm>D</fnm>
               </au>
            </aug>
            <source>Trends Genet</source>
            <pubdate>2004</pubdate>
            <volume>20</volume>
            <issue>3</issue>
            <fpage>116</fpage>
            <lpage>122</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1016/j.tig.2004.01.007</pubid>
                  <pubid idtype="pmpid">15049302</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B12">
            <title>
               <p>Genome-wide analysis of the MADS-box gene family in Populus trichocarpa</p>
            </title>
            <aug>
               <au>
                  <snm>Leseberg</snm>
                  <fnm>CH</fnm>
               </au>
               <au>
                  <snm>Li</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Kang</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Duvall</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Mao</snm>
                  <fnm>L</fnm>
               </au>
            </aug>
            <source>Gene</source>
            <pubdate>2006</pubdate>
            <volume>378</volume>
            <fpage>84</fpage>
            <lpage>94</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1016/j.gene.2006.05.022</pubid>
                  <pubid idtype="pmpid" link="fulltext">16831523</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B13">
            <title>
               <p>Sequence, regulation, and evolution of the maize 22-kD alpha zein gene family</p>
            </title>
            <aug>
               <au>
                  <snm>Song</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>Llaca</snm>
                  <fnm>V</fnm>
               </au>
               <au>
                  <snm>Linton</snm>
                  <fnm>E</fnm>
               </au>
               <au>
                  <snm>Messing</snm>
                  <fnm>J</fnm>
               </au>
            </aug>
            <source>Genome Res</source>
            <pubdate>2001</pubdate>
            <volume>11</volume>
            <issue>11</issue>
            <fpage>1817</fpage>
            <lpage>1825</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">311139</pubid>
                  <pubid idtype="pmpid" link="fulltext">11691845</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B14">
            <title>
               <p>Organization and variability of the maize genome</p>
            </title>
            <aug>
               <au>
                  <snm>Messing</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Dooner</snm>
                  <fnm>HK</fnm>
               </au>
            </aug>
            <source>Curr Opin Plant Biol</source>
            <pubdate>2006</pubdate>
            <volume>9</volume>
            <issue>2</issue>
            <fpage>157</fpage>
            <lpage>163</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1016/j.pbi.2006.01.009</pubid>
                  <pubid idtype="pmpid" link="fulltext">16459130</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B15">
            <title>
               <p>Duplication and DNA segmental loss in the rice genome: implications for diploidization</p>
            </title>
            <aug>
               <au>
                  <snm>Wang</snm>
                  <fnm>X</fnm>
               </au>
               <au>
                  <snm>Shi</snm>
                  <fnm>X</fnm>
               </au>
               <au>
                  <snm>Hao</snm>
                  <fnm>B</fnm>
               </au>
               <au>
                  <snm>Ge</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Luo</snm>
                  <fnm>J</fnm>
               </au>
            </aug>
            <source>New Phytol</source>
            <pubdate>2005</pubdate>
            <volume>165</volume>
            <issue>3</issue>
            <fpage>937</fpage>
            <lpage>946</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1111/j.1469-8137.2004.01293.x</pubid>
                  <pubid idtype="pmpid" link="fulltext">15720704</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B16">
            <title>
               <p>Evidence that rice and other cereals are ancient aneuploids</p>
            </title>
            <aug>
               <au>
                  <snm>Vandepoele</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>Simillion</snm>
                  <fnm>C</fnm>
               </au>
               <au>
                  <snm>Van de Peer</snm>
                  <fnm>Y</fnm>
               </au>
            </aug>
            <source>Plant Cell</source>
            <pubdate>2003</pubdate>
            <volume>15</volume>
            <issue>9</issue>
            <fpage>2192</fpage>
            <lpage>2202</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">181340</pubid>
                  <pubid idtype="pmpid" link="fulltext">12953120</pubid>
                  <pubid idtype="doi">10.1105/tpc.014019</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B17">
            <title>
               <p>Building genomic profiles for uncovering segmental homology in the twilight zone</p>
            </title>
            <aug>
               <au>
                  <snm>Simillion</snm>
                  <fnm>C</fnm>
               </au>
               <au>
                  <snm>Vandepoele</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>Saeys</snm>
                  <fnm>Y</fnm>
               </au>
               <au>
                  <snm>Van de Peer</snm>
                  <fnm>Y</fnm>
               </au>
            </aug>
            <source>Genome Res</source>
            <pubdate>2004</pubdate>
            <volume>14</volume>
            <issue>6</issue>
            <fpage>1095</fpage>
            <lpage>1106</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">419788</pubid>
                  <pubid idtype="pmpid" link="fulltext">15173115</pubid>
                  <pubid idtype="doi">10.1101/gr.2179004</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B18">
            <title>
               <p>Ancient polyploidization predating divergence of the cereals, and its consequences for comparative genomics</p>
            </title>
            <aug>
               <au>
                  <snm>Paterson</snm>
                  <fnm>AH</fnm>
               </au>
               <au>
                  <snm>Bowers</snm>
                  <fnm>JE</fnm>
               </au>
               <au>
                  <snm>Chapman</snm>
                  <fnm>BA</fnm>
               </au>
            </aug>
            <source>Proc Natl Acad Sci U S A</source>
            <pubdate>2004</pubdate>
            <volume>101</volume>
            <issue>26</issue>
            <fpage>9903</fpage>
            <lpage>9908</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">470771</pubid>
                  <pubid idtype="pmpid" link="fulltext">15161969</pubid>
                  <pubid idtype="doi">10.1073/pnas.0307901101</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B19">
            <title>
               <p>Ancestral genome duplication in rice</p>
            </title>
            <aug>
               <au>
                  <snm>Guyot</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>Keller</snm>
                  <fnm>B</fnm>
               </au>
            </aug>
            <source>Genome</source>
            <pubdate>2004</pubdate>
            <volume>47</volume>
            <issue>3</issue>
            <fpage>610</fpage>
            <lpage>614</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1139/g04-016</pubid>
                  <pubid idtype="pmpid" link="fulltext">15190378</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B20">
            <title>
               <p>The sequence of rice chromosomes 11 and 12, rich in disease resistance genes and recent gene duplications</p>
            </title>
            <aug>
               <au>
                  <cnm>The Rice Chromosomes 11 and 12 Sequencing Consortia</cnm>
               </au>
            </aug>
            <source>BMC Biol</source>
            <pubdate>2005</pubdate>
            <volume>3</volume>
            <fpage>20</fpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">1261165</pubid>
                  <pubid idtype="pmpid" link="fulltext">16188032</pubid>
                  <pubid idtype="doi">10.1186/1741-7007-3-20</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B21">
            <title>
               <p>The map-based sequence of the rice genome</p>
            </title>
            <aug>
               <au>
                  <cnm>International Rice Genome Sequencing Project</cnm>
               </au>
            </aug>
            <source>Nature</source>
            <pubdate>2005</pubdate>
            <volume>436</volume>
            <issue>7052</issue>
            <fpage>793</fpage>
            <lpage>800</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1038/nature03895</pubid>
                  <pubid idtype="pmpid" link="fulltext">16100779</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B22">
            <title>
               <p>Genome-wide analysis of the stress associated protein (SAP) gene family containing A20/AN1 zinc-finger(s) in rice and their phylogenetic relationship with Arabidopsis</p>
            </title>
            <aug>
               <au>
                  <snm>Vij</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Tyagi</snm>
                  <fnm>AK</fnm>
               </au>
            </aug>
            <source>Mol Genet Genomics</source>
            <pubdate>2006</pubdate>
            <xrefbib>
               <pubid idtype="pmpid" link="fulltext">17033811</pubid>
            </xrefbib>
         </bibl>
         <bibl id="B23">
            <title>
               <p>Cross genome comparisons of serine proteases in Arabidopsis and rice</p>
            </title>
            <aug>
               <au>
                  <snm>Tripathi</snm>
                  <fnm>LP</fnm>
               </au>
               <au>
                  <snm>Sowdhamini</snm>
                  <fnm>R</fnm>
               </au>
            </aug>
            <source>BMC Genomics</source>
            <pubdate>2006</pubdate>
            <volume>7</volume>
            <fpage>200</fpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">1560137</pubid>
                  <pubid idtype="pmpid" link="fulltext">16895613</pubid>
                  <pubid idtype="doi">10.1186/1471-2164-7-200</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B24">
            <title>
               <p>Comparative phylogenetic analysis of cystatin gene families from arabidopsis, rice and barley</p>
            </title>
            <aug>
               <au>
                  <snm>Martinez</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Abraham</snm>
                  <fnm>Z</fnm>
               </au>
               <au>
                  <snm>Carbonero</snm>
                  <fnm>P</fnm>
               </au>
               <au>
                  <snm>Diaz</snm>
                  <fnm>I</fnm>
               </au>
            </aug>
            <source>Mol Genet Genomics</source>
            <pubdate>2005</pubdate>
            <volume>273</volume>
            <issue>5</issue>
            <fpage>423</fpage>
            <lpage>432</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1007/s00438-005-1147-4</pubid>
                  <pubid idtype="pmpid" link="fulltext">15887031</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B25">
            <title>
               <p>Expression of SERK family receptor-like protein kinase genes in rice</p>
            </title>
            <aug>
               <au>
                  <snm>Ito</snm>
                  <fnm>Y</fnm>
               </au>
               <au>
                  <snm>Takaya</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>Kurata</snm>
                  <fnm>N</fnm>
               </au>
            </aug>
            <source>Biochim Biophys Acta</source>
            <pubdate>2005</pubdate>
            <volume>1730</volume>
            <issue>3</issue>
            <fpage>253</fpage>
            <lpage>258</lpage>
            <xrefbib>
               <pubid idtype="pmpid" link="fulltext">16081169</pubid>
            </xrefbib>
         </bibl>
         <bibl id="B26">
            <title>
               <p>Genome-wide comparative analysis of the IQD gene families in Arabidopsis thaliana and Oryza sativa</p>
            </title>
            <aug>
               <au>
                  <snm>Abel</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Savchenko</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Levy</snm>
                  <fnm>M</fnm>
               </au>
            </aug>
            <source>BMC Evol Biol</source>
            <pubdate>2005</pubdate>
            <volume>5</volume>
            <fpage>72</fpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">1368998</pubid>
                  <pubid idtype="pmpid" link="fulltext">16368012</pubid>
                  <pubid idtype="doi">10.1186/1471-2148-5-72</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B27">
            <title>
               <p>The Endo-beta-Mannanase gene families in Arabidopsis, rice, and poplar</p>
            </title>
            <aug>
               <au>
                  <snm>Yuan</snm>
                  <fnm>JS</fnm>
               </au>
               <au>
                  <snm>Yang</snm>
                  <fnm>X</fnm>
               </au>
               <au>
                  <snm>Lai</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Lin</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Cheng</snm>
                  <fnm>ZM</fnm>
               </au>
               <au>
                  <snm>Nonogaki</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Chen</snm>
                  <fnm>F</fnm>
               </au>
            </aug>
            <source>Funct Integr Genomics</source>
            <pubdate>2006</pubdate>
            <xrefbib>
               <pubid idtype="pmpid" link="fulltext">16897088</pubid>
            </xrefbib>
         </bibl>
         <bibl id="B28">
            <title>
               <p>Genome cluster database. A sequence family analysis platform for Arabidopsis and rice</p>
            </title>
            <aug>
               <au>
                  <snm>Horan</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>Lauricha</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Bailey-Serres</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Raikhel</snm>
                  <fnm>N</fnm>
               </au>
               <au>
                  <snm>Girke</snm>
                  <fnm>T</fnm>
               </au>
            </aug>
            <source>Plant Physiol</source>
            <pubdate>2005</pubdate>
            <volume>138</volume>
            <issue>1</issue>
            <fpage>47</fpage>
            <lpage>54</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">1104159</pubid>
                  <pubid idtype="pmpid" link="fulltext">15888677</pubid>
                  <pubid idtype="doi">10.1104/pp.104.059048</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B29">
            <title>
               <p>Curated genome annotation of Oryza sativa ssp. japonica and comparative genome analysis with Arabidopsis thaliana</p>
            </title>
            <aug>
               <au>
                  <snm>Itoh</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Tanaka</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Barrero</snm>
                  <fnm>RA</fnm>
               </au>
               <au>
                  <snm>Yamasaki</snm>
                  <fnm>C</fnm>
               </au>
               <au>
                  <snm>Fujii</snm>
                  <fnm>Y</fnm>
               </au>
               <au>
                  <snm>Hilton</snm>
                  <fnm>PB</fnm>
               </au>
               <au>
                  <snm>Antonio</snm>
                  <fnm>BA</fnm>
               </au>
               <au>
                  <snm>Aono</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Apweiler</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>Bruskiewich</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>Bureau</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Burr</snm>
                  <fnm>F</fnm>
               </au>
               <au>
                  <snm>Costa de Oliveira</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Fuks</snm>
                  <fnm>G</fnm>
               </au>
               <au>
                  <snm>Habara</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Haberer</snm>
                  <fnm>G</fnm>
               </au>
               <au>
                  <snm>Han</snm>
                  <fnm>B</fnm>
               </au>
               <au>
                  <snm>Harada</snm>
                  <fnm>E</fnm>
               </au>
               <au>
                  <snm>Hiraki</snm>
                  <fnm>AT</fnm>
               </au>
               <au>
                  <snm>Hirochika</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Hoen</snm>
                  <fnm>D</fnm>
               </au>
               <au>
                  <snm>Hokari</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Hosokawa</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Hsing</snm>
                  <fnm>YI</fnm>
               </au>
               <au>
                  <snm>Ikawa</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Ikeo</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>Imanishi</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Ito</snm>
                  <fnm>Y</fnm>
               </au>
               <au>
                  <snm>Jaiswal</snm>
                  <fnm>P</fnm>
               </au>
               <au>
                  <snm>Kanno</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Kawahara</snm>
                  <fnm>Y</fnm>
               </au>
               <au>
                  <snm>Kawamura</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Kawashima</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Khurana</snm>
                  <fnm>JP</fnm>
               </au>
               <au>
                  <snm>Kikuchi</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Komatsu</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Koyanagi</snm>
                  <fnm>KO</fnm>
               </au>
               <au>
                  <snm>Kubooka</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Lieberherr</snm>
                  <fnm>D</fnm>
               </au>
               <au>
                  <snm>Lin</snm>
                  <fnm>YC</fnm>
               </au>
               <au>
                  <snm>Lonsdale</snm>
                  <fnm>D</fnm>
               </au>
               <au>
                  <snm>Matsumoto</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Matsuya</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>McCombie</snm>
                  <fnm>WR</fnm>
               </au>
               <au>
                  <snm>Messing</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Miyao</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Mulder</snm>
                  <fnm>N</fnm>
               </au>
               <au>
                  <snm>Nagamura</snm>
                  <fnm>Y</fnm>
               </au>
               <au>
                  <snm>Nam</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Namiki</snm>
                  <fnm>N</fnm>
               </au>
               <au>
                  <snm>Numa</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Nurimoto</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>O'Donovan</snm>
                  <fnm>C</fnm>
               </au>
               <au>
                  <snm>Ohyanagi</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Okido</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Oota</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Osato</snm>
                  <fnm>N</fnm>
               </au>
               <au>
                  <snm>Palmer</snm>
                  <fnm>LE</fnm>
               </au>
               <au>
                  <snm>Quetier</snm>
                  <fnm>F</fnm>
               </au>
               <au>
                  <snm>Raghuvanshi</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Saichi</snm>
                  <fnm>N</fnm>
               </au>
               <au>
                  <snm>Sakai</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Sakai</snm>
                  <fnm>Y</fnm>
               </au>
               <au>
                  <snm>Sakata</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>Sakurai</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Sato</snm>
                  <fnm>F</fnm>
               </au>
               <au>
                  <snm>Sato</snm>
                  <fnm>Y</fnm>
               </au>
               <au>
                  <snm>Schoof</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Seki</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Shibata</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Shimizu</snm>
                  <fnm>Y</fnm>
               </au>
               <au>
                  <snm>Shinozaki</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>Shinso</snm>
                  <fnm>Y</fnm>
               </au>
               <au>
                  <snm>Singh</snm>
                  <fnm>NK</fnm>
               </au>
               <au>
                  <snm>Smith-White</snm>
                  <fnm>B</fnm>
               </au>
               <au>
                  <snm>Takeda</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Tanino</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Tatusova</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Thongjuea</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Todokoro</snm>
                  <fnm>F</fnm>
               </au>
               <au>
                  <snm>Tsugane</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Tyagi</snm>
                  <fnm>AK</fnm>
               </au>
               <au>
                  <snm>Vanavichit</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Wang</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Wing</snm>
                  <fnm>RA</fnm>
               </au>
               <au>
                  <snm>Yamaguchi</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>Yamamoto</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Yamamoto</snm>
                  <fnm>N</fnm>
               </au>
               <au>
                  <snm>Yu</snm>
                  <fnm>Y</fnm>
               </au>
               <au>
                  <snm>Zhang</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Zhao</snm>
                  <fnm>Q</fnm>
               </au>
               <au>
                  <snm>Higo</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>Burr</snm>
                  <fnm>B</fnm>
               </au>
               <au>
                  <snm>Gojobori</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Sasaki</snm>
                  <fnm>T</fnm>
               </au>
            </aug>
            <source>Genome Res</source>
            <pubdate>2007</pubdate>
            <volume>17</volume>
            <issue>2</issue>
            <fpage>175</fpage>
            <lpage>183</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">1781349</pubid>
                  <pubid idtype="pmpid" link="fulltext">17210932</pubid>
                  <pubid idtype="doi">10.1101/gr.5509507</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B30">
            <title>
               <p>Complete reannotation of the Arabidopsis genome: methods, tools, protocols and the final release</p>
            </title>
            <aug>
               <au>
                  <snm>Haas</snm>
                  <fnm>BJ</fnm>
               </au>
               <au>
                  <snm>Wortman</snm>
                  <fnm>JR</fnm>
               </au>
               <au>
                  <snm>Ronning</snm>
                  <fnm>CM</fnm>
               </au>
               <au>
                  <snm>Hannick</snm>
                  <fnm>LI</fnm>
               </au>
               <au>
                  <snm>Smith</snm>
                  <fnm>RK</fnm>
                  <suf>Jr.</suf>
               </au>
               <au>
                  <snm>Maiti</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>Chan</snm>
                  <fnm>AP</fnm>
               </au>
               <au>
                  <snm>Yu</snm>
                  <fnm>C</fnm>
               </au>
               <au>
                  <snm>Farzad</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Wu</snm>
                  <fnm>D</fnm>
               </au>
               <au>
                  <snm>White</snm>
                  <fnm>O</fnm>
               </au>
               <au>
                  <snm>Town</snm>
                  <fnm>CD</fnm>
               </au>
            </aug>
            <source>BMC Biol</source>
            <pubdate>2005</pubdate>
            <volume>3</volume>
            <fpage>7</fpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">1082884</pubid>
                  <pubid idtype="pmpid" link="fulltext">15784138</pubid>
                  <pubid idtype="doi">10.1186/1741-7007-3-7</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B31">
            <aug>
               <au>
                  <cnm>TAIR</cnm>
               </au>
            </aug>
            <url>http://www.arabidopsis.org</url>
         </bibl>
         <bibl id="B32">
            <aug>
               <au>
                  <cnm>The Rice MPSS Database</cnm>
               </au>
            </aug>
            <url>http://mpss.udel.edu/rice/</url>
         </bibl>
         <bibl id="B33">
            <title>
               <p>The TIGR Rice Genome Annotation Resource: improvements and new features</p>
            </title>
            <aug>
               <au>
                  <snm>Ouyang</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Zhu</snm>
                  <fnm>W</fnm>
               </au>
               <au>
                  <snm>Hamilton</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Lin</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Campbell</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Childs</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>Thibaud-Nissen</snm>
                  <fnm>F</fnm>
               </au>
               <au>
                  <snm>Malek</snm>
                  <fnm>RL</fnm>
               </au>
               <au>
                  <snm>Lee</snm>
                  <fnm>Y</fnm>
               </au>
               <au>
                  <snm>Zheng</snm>
                  <fnm>L</fnm>
               </au>
               <au>
                  <snm>Orvis</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Haas</snm>
                  <fnm>B</fnm>
               </au>
               <au>
                  <snm>Wortman</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Buell</snm>
                  <fnm>CR</fnm>
               </au>
            </aug>
            <source>Nucleic Acids Res</source>
            <pubdate>2007</pubdate>
            <volume>35</volume>
            <issue>Database issue</issue>
            <fpage>D883</fpage>
            <lpage>7</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">1751532</pubid>
                  <pubid idtype="pmpid" link="fulltext">17145706</pubid>
                  <pubid idtype="doi">10.1093/nar/gkl976</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B34">
            <aug>
               <au>
                  <cnm>The Gene Ontology</cnm>
               </au>
            </aug>
            <url>http://www.geneontology.org/GO.slims.shtml</url>
         </bibl>
         <bibl id="B35">
            <title>
               <p>Controlling the false positive discovery rate: a practical and powerful approach to multiple testing</p>
            </title>
            <aug>
               <au>
                  <snm>Benjamini</snm>
                  <fnm>Y</fnm>
               </au>
               <au>
                  <snm>Hochberg</snm>
                  <fnm>Y</fnm>
               </au>
            </aug>
            <source>Journal of the Royal Statistical Society</source>
            <pubdate>1995</pubdate>
            <volume>Series B, 57</volume>
            <fpage>289</fpage>
            <lpage>300</lpage>
         </bibl>
         <bibl id="B36">
            <title>
               <p>Functional divergence of duplicated genes formed by polyploidy during Arabidopsis evolution</p>
            </title>
            <aug>
               <au>
                  <snm>Blanc</snm>
                  <fnm>G</fnm>
               </au>
               <au>
                  <snm>Wolfe</snm>
                  <fnm>KH</fnm>
               </au>
            </aug>
            <source>Plant Cell</source>
            <pubdate>2004</pubdate>
            <volume>16</volume>
            <issue>7</issue>
            <fpage>1679</fpage>
            <lpage>1691</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">514153</pubid>
                  <pubid idtype="pmpid" link="fulltext">15208398</pubid>
                  <pubid idtype="doi">10.1105/tpc.021410</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B37">
            <title>
               <p>Alternative splicing and gene duplication are inversely correlated evolutionary mechanisms</p>
            </title>
            <aug>
               <au>
                  <snm>Kopelman</snm>
                  <fnm>NM</fnm>
               </au>
               <au>
                  <snm>Lancet</snm>
                  <fnm>D</fnm>
               </au>
               <au>
                  <snm>Yanai</snm>
                  <fnm>I</fnm>
               </au>
            </aug>
            <source>Nat Genet</source>
            <pubdate>2005</pubdate>
            <volume>37</volume>
            <issue>6</issue>
            <fpage>588</fpage>
            <lpage>589</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1038/ng1575</pubid>
                  <pubid idtype="pmpid" link="fulltext">15895079</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B38">
            <title>
               <p>Evolution of alternative splicing after gene duplication</p>
            </title>
            <aug>
               <au>
                  <snm>Su</snm>
                  <fnm>Z</fnm>
               </au>
               <au>
                  <snm>Wang</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Yu</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Huang</snm>
                  <fnm>X</fnm>
               </au>
               <au>
                  <snm>Gu</snm>
                  <fnm>X</fnm>
               </au>
            </aug>
            <source>Genome Res</source>
            <pubdate>2006</pubdate>
            <volume>16</volume>
            <issue>2</issue>
            <fpage>182</fpage>
            <lpage>189</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">1361713</pubid>
                  <pubid idtype="pmpid" link="fulltext">16365379</pubid>
                  <pubid idtype="doi">10.1101/gr.4197006</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B39">
            <title>
               <p>Statistical framework for phylogenomic analysis of gene family expression profiles</p>
            </title>
            <aug>
               <au>
                  <snm>Gu</snm>
                  <fnm>X</fnm>
               </au>
            </aug>
            <source>Genetics</source>
            <pubdate>2004</pubdate>
            <volume>167</volume>
            <issue>1</issue>
            <fpage>531</fpage>
            <lpage>542</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">1470858</pubid>
                  <pubid idtype="pmpid" link="fulltext">15166175</pubid>
                  <pubid idtype="doi">10.1534/genetics.167.1.531</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B40">
            <title>
               <p>Rapid evolution of expression and regulatory divergences after yeast gene duplication</p>
            </title>
            <aug>
               <au>
                  <snm>Gu</snm>
                  <fnm>X</fnm>
               </au>
               <au>
                  <snm>Zhang</snm>
                  <fnm>Z</fnm>
               </au>
               <au>
                  <snm>Huang</snm>
                  <fnm>W</fnm>
               </au>
            </aug>
            <source>Proc Natl Acad Sci U S A</source>
            <pubdate>2005</pubdate>
            <volume>102</volume>
            <issue>3</issue>
            <fpage>707</fpage>
            <lpage>712</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">545572</pubid>
                  <pubid idtype="pmpid" link="fulltext">15647348</pubid>
                  <pubid idtype="doi">10.1073/pnas.0409186102</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B41">
            <title>
               <p>Rapid divergence in expression between duplicate genes inferred from microarray data</p>
            </title>
            <aug>
               <au>
                  <snm>Gu</snm>
                  <fnm>Z</fnm>
               </au>
               <au>
                  <snm>Nicolae</snm>
                  <fnm>D</fnm>
               </au>
               <au>
                  <snm>Lu</snm>
                  <fnm>HH</fnm>
               </au>
               <au>
                  <snm>Li</snm>
                  <fnm>WH</fnm>
               </au>
            </aug>
            <source>Trends Genet</source>
            <pubdate>2002</pubdate>
            <volume>18</volume>
            <issue>12</issue>
            <fpage>609</fpage>
            <lpage>613</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1016/S0168-9525(02)02837-8</pubid>
                  <pubid idtype="pmpid" link="fulltext">12446139</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B42">
            <title>
               <p>Expression patterns of duplicate genes in the developing root in Arabidopsis thaliana</p>
            </title>
            <aug>
               <au>
                  <snm>Hughes</snm>
                  <fnm>AL</fnm>
               </au>
               <au>
                  <snm>Friedman</snm>
                  <fnm>R</fnm>
               </au>
            </aug>
            <source>J Mol Evol</source>
            <pubdate>2005</pubdate>
            <volume>60</volume>
            <issue>2</issue>
            <fpage>247</fpage>
            <lpage>256</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1007/s00239-004-0171-z</pubid>
                  <pubid idtype="pmpid" link="fulltext">15785853</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B43">
            <title>
               <p>Molecular evolution of the seed storage proteins of barley, rye and wheat</p>
            </title>
            <aug>
               <au>
                  <snm>Kreis</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Forde</snm>
                  <fnm>BG</fnm>
               </au>
               <au>
                  <snm>Rahman</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Miflin</snm>
                  <fnm>BJ</fnm>
               </au>
               <au>
                  <snm>Shewry</snm>
                  <fnm>PR</fnm>
               </au>
            </aug>
            <source>J Mol Biol</source>
            <pubdate>1985</pubdate>
            <volume>183</volume>
            <issue>3</issue>
            <fpage>499</fpage>
            <lpage>502</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1016/0022-2836(85)90017-8</pubid>
                  <pubid idtype="pmpid" link="fulltext">4020867</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B44">
            <title>
               <p>The prolamin storage proteins of cereal seeds: structure and evolution</p>
            </title>
            <aug>
               <au>
                  <snm>Shewry</snm>
                  <fnm>PR</fnm>
               </au>
               <au>
                  <snm>Tatham</snm>
                  <fnm>AS</fnm>
               </au>
            </aug>
            <source>Biochem J</source>
            <pubdate>1990</pubdate>
            <volume>267</volume>
            <issue>1</issue>
            <fpage>1</fpage>
            <lpage>12</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">1131235</pubid>
                  <pubid idtype="pmpid">2183790</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B45">
            <title>
               <p>The prolamins of the Triticeae</p>
            </title>
            <aug>
               <au>
                  <snm>Shewry</snm>
                  <fnm>PR</fnm>
               </au>
               <au>
                  <snm>Tatham</snm>
                  <fnm>AS</fnm>
               </au>
               <au>
                  <snm>Halford</snm>
                  <fnm>NG</fnm>
               </au>
            </aug>
            <source>Seed proteins</source>
            <editor>Shewry PR, Casey R</editor>
            <pubdate>1999</pubdate>
            <fpage>35&#8211;78</fpage>
         </bibl>
         <bibl id="B46">
            <title>
               <p>The prolamins of sorghum, Coix and millets</p>
            </title>
            <aug>
               <au>
                  <snm>Leite</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Neto</snm>
                  <fnm>GC</fnm>
               </au>
               <au>
                  <snm>Vettore</snm>
                  <fnm>AL</fnm>
               </au>
               <au>
                  <snm>Yunes</snm>
                  <fnm>JA</fnm>
               </au>
               <au>
                  <snm>Arruda</snm>
                  <fnm>P</fnm>
               </au>
            </aug>
            <source>Seed proteins</source>
            <editor>Shewry PR, Casey R</editor>
            <pubdate>1999</pubdate>
            <fpage>141&#8211;157</fpage>
         </bibl>
         <bibl id="B47">
            <title>
               <p>Morphometric Analysis of Rice Seed Protein Bodies (Implication for a Significant Contribution of Prolamine to the Total Protein Content of Rice Endosperm)</p>
            </title>
            <aug>
               <au>
                  <snm>Krishnan</snm>
                  <fnm>HB</fnm>
               </au>
               <au>
                  <snm>White</snm>
                  <fnm>JA</fnm>
               </au>
            </aug>
            <source>Plant Physiol</source>
            <pubdate>1995</pubdate>
            <volume>109</volume>
            <issue>4</issue>
            <fpage>1491</fpage>
            <lpage>1495</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">157685</pubid>
                  <pubid idtype="pmpid" link="fulltext">12228685</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B48">
            <title>
               <p>Variation in the nucleotide sequence of a prolamin gene family in wild rice</p>
            </title>
            <aug>
               <au>
                  <snm>Barbier</snm>
                  <fnm>P</fnm>
               </au>
               <au>
                  <snm>Ishihama</snm>
                  <fnm>A</fnm>
               </au>
            </aug>
            <source>Plant Mol Biol</source>
            <pubdate>1990</pubdate>
            <volume>15</volume>
            <issue>1</issue>
            <fpage>191</fpage>
            <lpage>195</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1007/BF00017744</pubid>
                  <pubid idtype="pmpid">2103437</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B49">
            <title>
               <p>Nucleotide sequence of a rice (Oryza sativa) prolamin storage protein gene, RP6</p>
            </title>
            <aug>
               <au>
                  <snm>Wen</snm>
                  <fnm>TN</fnm>
               </au>
               <au>
                  <snm>Shyur</snm>
                  <fnm>LF</fnm>
               </au>
               <au>
                  <snm>Su</snm>
                  <fnm>JC</fnm>
               </au>
               <au>
                  <snm>Chen</snm>
                  <fnm>CS</fnm>
               </au>
            </aug>
            <source>Plant Physiol</source>
            <pubdate>1993</pubdate>
            <volume>101</volume>
            <issue>3</issue>
            <fpage>1115</fpage>
            <lpage>1116</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">158734</pubid>
                  <pubid idtype="pmpid" link="fulltext">8310050</pubid>
                  <pubid idtype="doi">10.1104/pp.101.3.1115</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B50">
            <title>
               <p>Amino acid variation in the 10 kDa Oryza prolamin seed storage protein</p>
            </title>
            <aug>
               <au>
                  <snm>Mullins</snm>
                  <fnm>IM</fnm>
               </au>
               <au>
                  <snm>Hilu</snm>
                  <fnm>KW</fnm>
               </au>
            </aug>
            <source>J Agric Food Chem</source>
            <pubdate>2004</pubdate>
            <volume>52</volume>
            <issue>8</issue>
            <fpage>2242</fpage>
            <lpage>2246</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1021/jf0305480</pubid>
                  <pubid idtype="pmpid" link="fulltext">15080628</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B51">
            <title>
               <p>Proteinase inhibitors in plants: genes for improving defenses against insects and pathogens</p>
            </title>
            <aug>
               <au>
                  <snm>Ryan</snm>
                  <fnm>CA</fnm>
               </au>
            </aug>
            <source>Annu Rev Phytophathol</source>
            <pubdate>1990</pubdate>
            <volume>28</volume>
            <fpage>425</fpage>
            <lpage>449</lpage>
            <xrefbib>
               <pubid idtype="doi">10.1146/annurev.py.28.090190.002233</pubid>
            </xrefbib>
         </bibl>
         <bibl id="B52">
            <title>
               <p>A pure trypsin inhibitor from soya beans</p>
            </title>
            <aug>
               <au>
                  <snm>Birk</snm>
                  <fnm>Y</fnm>
               </au>
               <au>
                  <snm>Gertler</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Khalef</snm>
                  <fnm>S</fnm>
               </au>
            </aug>
            <source>Biochem J</source>
            <pubdate>1963</pubdate>
            <volume>87</volume>
            <fpage>281</fpage>
            <lpage>284</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">1201889</pubid>
                  <pubid idtype="pmpid">13968438</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B53">
            <title>
               <p>Differentiation of soy bean anti-tryptic factors</p>
            </title>
            <aug>
               <au>
                  <snm>Bowman</snm>
                  <fnm>DE</fnm>
               </au>
            </aug>
            <source>Proc Soc Exp Biol Med</source>
            <pubdate>1946</pubdate>
            <volume>63</volume>
            <fpage>547</fpage>
            <lpage>550</lpage>
         </bibl>
         <bibl id="B54">
            <title>
               <p>Cloning, expression and localization pattern of a trypsin inhibitor gene from rice</p>
            </title>
            <aug>
               <au>
                  <snm>Masumura</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Fujioka</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Matsui</snm>
                  <fnm>Y</fnm>
               </au>
               <au>
                  <snm>Kumazawa</snm>
                  <fnm>Y</fnm>
               </au>
               <au>
                  <snm>Tashiro</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Morita</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Tanaka</snm>
                  <fnm>K</fnm>
               </au>
            </aug>
            <source>Plant &amp; Animal Genomes XI Conference</source>
            <pubdate>2003</pubdate>
         </bibl>
         <bibl id="B55">
            <title>
               <p>Wheat germ trypsin inhibitors. Isolation and structural characterization of single-headed and double-headed inhibitors of the Bowman-Birk type</p>
            </title>
            <aug>
               <au>
                  <snm>Odani</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Koide</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Ono</snm>
                  <fnm>T</fnm>
               </au>
            </aug>
            <source>J Biochem (Tokyo)</source>
            <pubdate>1986</pubdate>
            <volume>100</volume>
            <issue>4</issue>
            <fpage>975</fpage>
            <lpage>983</lpage>
         </bibl>
         <bibl id="B56">
            <title>
               <p>Purification and primary structure determination of a Bowman-Birk trypsin inhibitor from Torresea cearensis seeds</p>
            </title>
            <aug>
               <au>
                  <snm>Tanaka</snm>
                  <fnm>AS</fnm>
               </au>
               <au>
                  <snm>Sampaio</snm>
                  <fnm>MU</fnm>
               </au>
               <au>
                  <snm>Marangoni</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>de Oliveira</snm>
                  <fnm>B</fnm>
               </au>
               <au>
                  <snm>Novello</snm>
                  <fnm>JC</fnm>
               </au>
               <au>
                  <snm>Oliva</snm>
                  <fnm>ML</fnm>
               </au>
               <au>
                  <snm>Fink</snm>
                  <fnm>E</fnm>
               </au>
               <au>
                  <snm>Sampaio</snm>
                  <fnm>CA</fnm>
               </au>
            </aug>
            <source>Biol Chem</source>
            <pubdate>1997</pubdate>
            <volume>378</volume>
            <issue>3-4</issue>
            <fpage>273</fpage>
            <lpage>281</lpage>
            <xrefbib>
               <pubid idtype="pmpid">9165081</pubid>
            </xrefbib>
         </bibl>
         <bibl id="B57">
            <title>
               <p>Amino acid sequence of trypsin chymotrypsin inhibitors (AI, AII, BI and BII) from peanut (Arachis hypogaea): a discussion on the molecular evolution of legume Bowman-Birk type inhibitors.</p>
            </title>
            <aug>
               <au>
                  <snm>Norioka</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Ikenaka</snm>
                  <fnm>T</fnm>
               </au>
            </aug>
            <source>J Biochem</source>
            <pubdate>1983</pubdate>
            <volume>94</volume>
            <fpage>589</fpage>
            <lpage>599</lpage>
            <xrefbib>
               <pubid idtype="pmpid">6630176</pubid>
            </xrefbib>
         </bibl>
         <bibl id="B58">
            <title>
               <p>Molecular cloning and functional analysis of a novel type of Bowman-Birk inhibitor gene family in rice</p>
            </title>
            <aug>
               <au>
                  <snm>Qu</snm>
                  <fnm>LJ</fnm>
               </au>
               <au>
                  <snm>Chen</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Liu</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Pan</snm>
                  <fnm>N</fnm>
               </au>
               <au>
                  <snm>Okamoto</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Lin</snm>
                  <fnm>Z</fnm>
               </au>
               <au>
                  <snm>Li</snm>
                  <fnm>C</fnm>
               </au>
               <au>
                  <snm>Li</snm>
                  <fnm>D</fnm>
               </au>
               <au>
                  <snm>Wang</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Zhu</snm>
                  <fnm>G</fnm>
               </au>
               <au>
                  <snm>Zhao</snm>
                  <fnm>X</fnm>
               </au>
               <au>
                  <snm>Chen</snm>
                  <fnm>X</fnm>
               </au>
               <au>
                  <snm>Gu</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Chen</snm>
                  <fnm>Z</fnm>
               </au>
            </aug>
            <source>Plant Physiol</source>
            <pubdate>2003</pubdate>
            <volume>133</volume>
            <issue>2</issue>
            <fpage>560</fpage>
            <lpage>570</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">219032</pubid>
                  <pubid idtype="pmpid" link="fulltext">12972663</pubid>
                  <pubid idtype="doi">10.1104/pp.103.024810</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B59">
            <title>
               <p>Protection against adverse biological effects induced by space radiation by the Bowman-Birk inhibitor and antioxidants</p>
            </title>
            <aug>
               <au>
                  <snm>Kennedy</snm>
                  <fnm>AR</fnm>
               </au>
               <au>
                  <snm>Zhou</snm>
                  <fnm>Z</fnm>
               </au>
               <au>
                  <snm>Donahue</snm>
                  <fnm>JJ</fnm>
               </au>
               <au>
                  <snm>Ware</snm>
                  <fnm>JH</fnm>
               </au>
            </aug>
            <source>Radiat Res</source>
            <pubdate>2006</pubdate>
            <volume>166</volume>
            <issue>2</issue>
            <fpage>327</fpage>
            <lpage>332</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1667/RR3599.1</pubid>
                  <pubid idtype="pmpid" link="fulltext">16881733</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B60">
            <title>
               <p>Bowman-Birk inhibitor abates proteasome function and suppresses the proliferation of MCF7 breast cancer cells through accumulation of MAP kinase phosphatase-1</p>
            </title>
            <aug>
               <au>
                  <snm>Chen</snm>
                  <fnm>YW</fnm>
               </au>
               <au>
                  <snm>Huang</snm>
                  <fnm>SC</fnm>
               </au>
               <au>
                  <snm>Lin-Shiau</snm>
                  <fnm>SY</fnm>
               </au>
               <au>
                  <snm>Lin</snm>
                  <fnm>JK</fnm>
               </au>
            </aug>
            <source>Carcinogenesis</source>
            <pubdate>2005</pubdate>
            <volume>26</volume>
            <issue>7</issue>
            <fpage>1296</fpage>
            <lpage>1306</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1093/carcin/bgi062</pubid>
                  <pubid idtype="pmpid" link="fulltext">15746161</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B61">
            <title>
               <p>Radioprotection of normal tissue to improve radiotherapy: the effect of the Bowman Birk protease inhibitor</p>
            </title>
            <aug>
               <au>
                  <snm>Dittmann</snm>
                  <fnm>KH</fnm>
               </au>
               <au>
                  <snm>Mayer</snm>
                  <fnm>C</fnm>
               </au>
               <au>
                  <snm>Rodemann</snm>
                  <fnm>HP</fnm>
               </au>
            </aug>
            <source>Curr Med Chem Anticancer Agents</source>
            <pubdate>2003</pubdate>
            <volume>3</volume>
            <issue>5</issue>
            <fpage>360</fpage>
            <lpage>363</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.2174/1568011033482288</pubid>
                  <pubid idtype="pmpid" link="fulltext">12871082</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B62">
            <title>
               <p>MEGA3: Integrated software for Molecular Evolutionary Genetics Analysis and sequence alignment</p>
            </title>
            <aug>
               <au>
                  <snm>Kumar</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Tamura</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>Nei</snm>
                  <fnm>M</fnm>
               </au>
            </aug>
            <source>Brief Bioinform</source>
            <pubdate>2004</pubdate>
            <volume>5</volume>
            <issue>2</issue>
            <fpage>150</fpage>
            <lpage>163</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1093/bib/5.2.150</pubid>
                  <pubid idtype="pmpid" link="fulltext">15260895</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B63">
            <title>
               <p>The Pfam protein families database</p>
            </title>
            <aug>
               <au>
                  <snm>Bateman</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Coin</snm>
                  <fnm>L</fnm>
               </au>
               <au>
                  <snm>Durbin</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>Finn</snm>
                  <fnm>RD</fnm>
               </au>
               <au>
                  <snm>Hollich</snm>
                  <fnm>V</fnm>
               </au>
               <au>
                  <snm>Griffiths-Jones</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Khanna</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Marshall</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Moxon</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Sonnhammer</snm>
                  <fnm>EL</fnm>
               </au>
               <au>
                  <snm>Studholme</snm>
                  <fnm>DJ</fnm>
               </au>
               <au>
                  <snm>Yeats</snm>
                  <fnm>C</fnm>
               </au>
               <au>
                  <snm>Eddy</snm>
                  <fnm>SR</fnm>
               </au>
            </aug>
            <source>Nucleic Acids Res</source>
            <pubdate>2004</pubdate>
            <volume>32</volume>
            <issue>Database issue</issue>
            <fpage>D138</fpage>
            <lpage>41</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">308855</pubid>
                  <pubid idtype="pmpid" link="fulltext">14681378</pubid>
                  <pubid idtype="doi">10.1093/nar/gkh121</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B64">
            <title>
               <p>Profile hidden Markov models</p>
            </title>
            <aug>
               <au>
                  <snm>Eddy</snm>
                  <fnm>SR</fnm>
               </au>
            </aug>
            <source>Bioinformatics</source>
            <pubdate>1998</pubdate>
            <volume>14</volume>
            <issue>9</issue>
            <fpage>755</fpage>
            <lpage>763</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1093/bioinformatics/14.9.755</pubid>
                  <pubid idtype="pmpid" link="fulltext">9918945</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B65">
            <aug>
               <au>
                  <snm>Gish</snm>
                  <fnm>W</fnm>
               </au>
            </aug>
            <url>http://blast.wustl.edu</url>
            <note>1996 - 2006</note>
         </bibl>
         <bibl id="B66">
            <title>
               <p>The Distribution of the Flora in the Alpine Zone</p>
            </title>
            <aug>
               <au>
                  <snm>Jaccard</snm>
                  <fnm>P</fnm>
               </au>
            </aug>
            <source>The New Phytologist</source>
            <pubdate>1912</pubdate>
            <volume>11</volume>
            <issue>2</issue>
            <fpage>37</fpage>
            <lpage>50</lpage>
            <xrefbib>
               <pubid idtype="doi">10.1111/j.1469-8137.1912.tb05611.x</pubid>
            </xrefbib>
         </bibl>
         <bibl id="B67">
            <title>
               <p>CLUSTAL W: improving the sensitivity of progressive multiple sequence alignment through sequence weighting, position-specific gap penalties and weight matrix choice</p>
            </title>
            <aug>
               <au>
                  <snm>Thompson</snm>
                  <fnm>JD</fnm>
               </au>
               <au>
                  <snm>Higgins</snm>
                  <fnm>DG</fnm>
               </au>
               <au>
                  <snm>Gibson</snm>
                  <fnm>TJ</fnm>
               </au>
            </aug>
            <source>Nucleic Acids Res</source>
            <pubdate>1994</pubdate>
            <volume>22</volume>
            <issue>22</issue>
            <fpage>4673</fpage>
            <lpage>4680</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">308517</pubid>
                  <pubid idtype="pmpid" link="fulltext">7984417</pubid>
                  <pubid idtype="doi">10.1093/nar/22.22.4673</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B68">
            <title>
               <p>Multiple sequence alignment with the Clustal series of programs</p>
            </title>
            <aug>
               <au>
                  <snm>Chenna</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>Sugawara</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Koike</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Lopez</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>Gibson</snm>
                  <fnm>TJ</fnm>
               </au>
               <au>
                  <snm>Higgins</snm>
                  <fnm>DG</fnm>
               </au>
               <au>
                  <snm>Thompson</snm>
                  <fnm>JD</fnm>
               </au>
            </aug>
            <source>Nucleic Acids Res</source>
            <pubdate>2003</pubdate>
            <volume>31</volume>
            <issue>13</issue>
            <fpage>3497</fpage>
            <lpage>3500</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">168907</pubid>
                  <pubid idtype="pmpid" link="fulltext">12824352</pubid>
                  <pubid idtype="doi">10.1093/nar/gkg500</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B69">
            <title>
               <p>Intron gain and loss in segmentally duplicated genes in rice</p>
            </title>
            <aug>
               <au>
                  <snm>Lin</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Zhu</snm>
                  <fnm>W</fnm>
               </au>
               <au>
                  <snm>Silva</snm>
                  <fnm>JC</fnm>
               </au>
               <au>
                  <snm>Gu</snm>
                  <fnm>X</fnm>
               </au>
               <au>
                  <snm>Buell</snm>
                  <fnm>CR</fnm>
               </au>
            </aug>
            <source>Genome Biol</source>
            <pubdate>2006</pubdate>
            <volume>7</volume>
            <issue>5</issue>
            <fpage>R41</fpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">1779517</pubid>
                  <pubid idtype="pmpid" link="fulltext">16719932</pubid>
                  <pubid idtype="doi">10.1186/gb-2006-7-5-r41</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B70">
            <title>
               <p>DAGchainer: a tool for mining segmental genome duplications and synteny</p>
            </title>
            <aug>
               <au>
                  <snm>Haas</snm>
                  <fnm>BJ</fnm>
               </au>
               <au>
                  <snm>Delcher</snm>
                  <fnm>AL</fnm>
               </au>
               <au>
                  <snm>Wortman</snm>
                  <fnm>JR</fnm>
               </au>
               <au>
                  <snm>Salzberg</snm>
                  <fnm>SL</fnm>
               </au>
            </aug>
            <source>Bioinformatics</source>
            <pubdate>2004</pubdate>
            <volume>20</volume>
            <issue>18</issue>
            <fpage>3643</fpage>
            <lpage>3646</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1093/bioinformatics/bth397</pubid>
                  <pubid idtype="pmpid" link="fulltext">15247098</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B71">
            <title>
               <p>Gene ontology: tool for the unification of biology. The Gene Ontology Consortium</p>
            </title>
            <aug>
               <au>
                  <snm>Ashburner</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Ball</snm>
                  <fnm>CA</fnm>
               </au>
               <au>
                  <snm>Blake</snm>
                  <fnm>JA</fnm>
               </au>
               <au>
                  <snm>Botstein</snm>
                  <fnm>D</fnm>
               </au>
               <au>
                  <snm>Butler</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Cherry</snm>
                  <fnm>JM</fnm>
               </au>
               <au>
                  <snm>Davis</snm>
                  <fnm>AP</fnm>
               </au>
               <au>
                  <snm>Dolinski</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>Dwight</snm>
                  <fnm>SS</fnm>
               </au>
               <au>
                  <snm>Eppig</snm>
                  <fnm>JT</fnm>
               </au>
               <au>
                  <snm>Harris</snm>
                  <fnm>MA</fnm>
               </au>
               <au>
                  <snm>Hill</snm>
                  <fnm>DP</fnm>
               </au>
               <au>
                  <snm>Issel-Tarver</snm>
                  <fnm>L</fnm>
               </au>
               <au>
                  <snm>Kasarskis</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Lewis</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Matese</snm>
                  <fnm>JC</fnm>
               </au>
               <au>
                  <snm>Richardson</snm>
                  <fnm>JE</fnm>
               </au>
               <au>
                  <snm>Ringwald</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Rubin</snm>
                  <fnm>GM</fnm>
               </au>
               <au>
                  <snm>Sherlock</snm>
                  <fnm>G</fnm>
               </au>
            </aug>
            <source>Nat Genet</source>
            <pubdate>2000</pubdate>
            <volume>25</volume>
            <issue>1</issue>
            <fpage>25</fpage>
            <lpage>29</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1038/75556</pubid>
                  <pubid idtype="pmpid" link="fulltext">10802651</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B72">
            <title>
               <p>Improving the Arabidopsis genome annotation using maximal transcript alignment assemblies</p>
            </title>
            <aug>
               <au>
                  <snm>Haas</snm>
                  <fnm>BJ</fnm>
               </au>
               <au>
                  <snm>Delcher</snm>
                  <fnm>AL</fnm>
               </au>
               <au>
                  <snm>Mount</snm>
                  <fnm>SM</fnm>
               </au>
               <au>
                  <snm>Wortman</snm>
                  <fnm>JR</fnm>
               </au>
               <au>
                  <snm>Smith</snm>
                  <fnm>RK</fnm>
                  <suf>Jr.</suf>
               </au>
               <au>
                  <snm>Hannick</snm>
                  <fnm>LI</fnm>
               </au>
               <au>
                  <snm>Maiti</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>Ronning</snm>
                  <fnm>CM</fnm>
               </au>
               <au>
                  <snm>Rusch</snm>
                  <fnm>DB</fnm>
               </au>
               <au>
                  <snm>Town</snm>
                  <fnm>CD</fnm>
               </au>
               <au>
                  <snm>Salzberg</snm>
                  <fnm>SL</fnm>
               </au>
               <au>
                  <snm>White</snm>
                  <fnm>O</fnm>
               </au>
            </aug>
            <source>Nucleic Acids Res</source>
            <pubdate>2003</pubdate>
            <volume>31</volume>
            <issue>19</issue>
            <fpage>5654</fpage>
            <lpage>5666</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">206470</pubid>
                  <pubid idtype="pmpid" link="fulltext">14500829</pubid>
                  <pubid idtype="doi">10.1093/nar/gkg770</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B73">
            <title>
               <p>A codon-based model of nucleotide substitution for protein-coding DNA sequences</p>
            </title>
            <aug>
               <au>
                  <snm>Goldman</snm>
                  <fnm>N</fnm>
               </au>
               <au>
                  <snm>Yang</snm>
                  <fnm>Z</fnm>
               </au>
            </aug>
            <source>Mol Biol Evol</source>
            <pubdate>1994</pubdate>
            <volume>11</volume>
            <issue>5</issue>
            <fpage>725</fpage>
            <lpage>736</lpage>
            <xrefbib>
               <pubid idtype="pmpid" link="fulltext">7968486</pubid>
            </xrefbib>
         </bibl>
         <bibl id="B74">
            <title>
               <p>PAML: a program package for phylogenetic analysis by maximum likelihood</p>
            </title>
            <aug>
               <au>
                  <snm>Yang</snm>
                  <fnm>Z</fnm>
               </au>
            </aug>
            <source>Comput Appl Biosci</source>
            <pubdate>1997</pubdate>
            <volume>13</volume>
            <issue>5</issue>
            <fpage>555</fpage>
            <lpage>556</lpage>
            <xrefbib>
               <pubid idtype="pmpid">9367129</pubid>
            </xrefbib>
         </bibl>
         <bibl id="B75">
            <title>
               <p>Substitution rate comparisons between grasses and palms: synonymous rate differences at the nuclear gene Adh parallel rate differences at the plastid gene rbcL</p>
            </title>
            <aug>
               <au>
                  <snm>Gaut</snm>
                  <fnm>BS</fnm>
               </au>
               <au>
                  <snm>Morton</snm>
                  <fnm>BR</fnm>
               </au>
               <au>
                  <snm>McCaig</snm>
                  <fnm>BC</fnm>
               </au>
               <au>
                  <snm>Clegg</snm>
                  <fnm>MT</fnm>
               </au>
            </aug>
            <source>Proc Natl Acad Sci U S A</source>
            <pubdate>1996</pubdate>
            <volume>93</volume>
            <issue>19</issue>
            <fpage>10274</fpage>
            <lpage>10279</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">38374</pubid>
                  <pubid idtype="pmpid" link="fulltext">8816790</pubid>
                  <pubid idtype="doi">10.1073/pnas.93.19.10274</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B76">
            <title>
               <p>Plant MPSS databases: signature-based transcriptional resources for analyses of mRNA and small RNA</p>
            </title>
            <aug>
               <au>
                  <snm>Nakano</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Nobuta</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>Vemaraju</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>Tej</snm>
                  <fnm>SS</fnm>
               </au>
               <au>
                  <snm>Skogen</snm>
                  <fnm>JW</fnm>
               </au>
               <au>
                  <snm>Meyers</snm>
                  <fnm>BC</fnm>
               </au>
            </aug>
            <source>Nucleic Acids Res</source>
            <pubdate>2006</pubdate>
            <volume>34</volume>
            <issue>Database issue</issue>
            <fpage>D731</fpage>
            <lpage>5</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">1347440</pubid>
                  <pubid idtype="pmpid" link="fulltext">16381968</pubid>
                  <pubid idtype="doi">10.1093/nar/gkj077</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B77">
            <title>
               <p>Transcriptional similarities, dissimilarities, and conservation of cis-elements in duplicated genes of Arabidopsis</p>
            </title>
            <aug>
               <au>
                  <snm>Haberer</snm>
                  <fnm>G</fnm>
               </au>
               <au>
                  <snm>Hindemitt</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Meyers</snm>
                  <fnm>BC</fnm>
               </au>
               <au>
                  <snm>Mayer</snm>
                  <fnm>KF</fnm>
               </au>
            </aug>
            <source>Plant Physiol</source>
            <pubdate>2004</pubdate>
            <volume>136</volume>
            <issue>2</issue>
            <fpage>3009</fpage>
            <lpage>3022</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">523363</pubid>
                  <pubid idtype="pmpid" link="fulltext">15489284</pubid>
                  <pubid idtype="doi">10.1104/pp.104.046466</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B78">
            <title>
               <p>The Vmatch large scale sequence analysis software</p>
            </title>
            <aug>
               <au>
                  <snm>Kurtz</snm>
                  <fnm>S</fnm>
               </au>
            </aug>
            <url>http://www.vmatch.de/</url>
         </bibl>
         <bibl id="B79">
            <title>
               <p>Fundamental of Biostatistics</p>
            </title>
            <aug>
               <au>
                  <snm>Rosner</snm>
                  <fnm>B</fnm>
               </au>
            </aug>
            <publisher> Duxbury Press</publisher>
            <edition>4th</edition>
            <pubdate>1995</pubdate>
         </bibl>
         <bibl id="B80">
            <title>
               <p>Congruence of tissue expression profiles from Gene Expression Atlas, SAGEmap and TissueInfo databases</p>
            </title>
            <aug>
               <au>
                  <snm>Huminiecki</snm>
                  <fnm>L</fnm>
               </au>
               <au>
                  <snm>Lloyd</snm>
                  <fnm>AT</fnm>
               </au>
               <au>
                  <snm>Wolfe</snm>
                  <fnm>KH</fnm>
               </au>
            </aug>
            <source>BMC Genomics</source>
            <pubdate>2003</pubdate>
            <volume>4</volume>
            <issue>1</issue>
            <fpage>31</fpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">183867</pubid>
                  <pubid idtype="pmpid" link="fulltext">12885301</pubid>
                  <pubid idtype="doi">10.1186/1471-2164-4-31</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
      </refgrp>
   </bm>
</art>
