<?xml version='1.0'?>
<!DOCTYPE art SYSTEM 'http://www.biomedcentral.com/xml/article.dtd'>
<art>
   <ui>1471-2164-9-204</ui>
   <ji>1471-2164</ji>
   <fm>
      <dochead>Research article</dochead>
      <bibl>
         <title>
            <p>Genome sequence and rapid evolution of the rice pathogen <it>Xanthomonas oryzae </it>pv. oryzae PXO99<sup>A</sup></p>
         </title>
         <aug>
            <au id="A1" ca="yes">
               <snm>Salzberg</snm>
               <mi>L</mi>
               <fnm>Steven</fnm>
               <insr iid="I1"/>
               <email>salzberg@umd.edu</email>
            </au>
            <au id="A2">
               <snm>Sommer</snm>
               <mi>D</mi>
               <fnm>Daniel</fnm>
               <insr iid="I1"/>
               <email>dsommer@umiacs.umd.edu</email>
            </au>
            <au id="A3">
               <snm>Schatz</snm>
               <mi>C</mi>
               <fnm>Michael</fnm>
               <insr iid="I1"/>
               <email>mschatz@umiacs.umd.edu</email>
            </au>
            <au id="A4">
               <snm>Phillippy</snm>
               <mi>M</mi>
               <fnm>Adam</fnm>
               <insr iid="I1"/>
               <email>amp@umiacs.umd.edu</email>
            </au>
            <au id="A5">
               <snm>Rabinowicz</snm>
               <mi>D</mi>
               <fnm>Pablo</fnm>
               <insr iid="I2"/>
               <insr iid="I3"/>
               <email>prabinowicz@som.umaryland.edu</email>
            </au>
            <au id="A6">
               <snm>Tsuge</snm>
               <fnm>Seiji</fnm>
               <insr iid="I4"/>
               <email>s_tsuge@kpu.ac.jp</email>
            </au>
            <au id="A7">
               <snm>Furutani</snm>
               <fnm>Ayako</fnm>
               <insr iid="I4"/>
               <insr iid="I5"/>
               <email>a9920614@kpu.ac.jp</email>
            </au>
            <au id="A8">
               <snm>Ochiai</snm>
               <fnm>Hirokazu</fnm>
               <insr iid="I5"/>
               <email>ochiaih@nias.affrc.go.jp</email>
            </au>
            <au id="A9">
               <snm>Delcher</snm>
               <mi>L</mi>
               <fnm>Arthur</fnm>
               <insr iid="I1"/>
               <email>adelcher@umiacs.umd.edu</email>
            </au>
            <au id="A10">
               <snm>Kelley</snm>
               <fnm>David</fnm>
               <insr iid="I1"/>
               <email>dakelley@umiacs.umd.edu</email>
            </au>
            <au id="A11">
               <snm>Madupu</snm>
               <fnm>Ramana</fnm>
               <insr iid="I2"/>
               <insr iid="I6"/>
               <email>rmadupu@jcvi.org</email>
            </au>
            <au id="A12">
               <snm>Puiu</snm>
               <fnm>Daniela</fnm>
               <insr iid="I1"/>
               <email>dpuiu@umiacs.umd.edu</email>
            </au>
            <au id="A13">
               <snm>Radune</snm>
               <fnm>Diana</fnm>
               <insr iid="I2"/>
               <insr iid="I6"/>
               <email>dbushman@jcvi.org</email>
            </au>
            <au id="A14">
               <snm>Shumway</snm>
               <fnm>Martin</fnm>
               <insr iid="I2"/>
               <insr iid="I7"/>
               <email>shumwaym@ncbi.nlm.nih.gov</email>
            </au>
            <au id="A15">
               <snm>Trapnell</snm>
               <fnm>Cole</fnm>
               <insr iid="I1"/>
               <email>cole@cs.umd.edu</email>
            </au>
            <au id="A16">
               <snm>Aparna</snm>
               <fnm>Gudlur</fnm>
               <insr iid="I8"/>
               <email>aparna@ccmb.res.in</email>
            </au>
            <au id="A17">
               <snm>Jha</snm>
               <fnm>Gopaljee</fnm>
               <insr iid="I9"/>
               <email>jmsgopal@yahoo.co.in</email>
            </au>
            <au id="A18">
               <snm>Pandey</snm>
               <fnm>Alok</fnm>
               <insr iid="I8"/>
               <email>alok@ccmb.res.in</email>
            </au>
            <au id="A19">
               <snm>Patil</snm>
               <mi>B</mi>
               <fnm>Prabhu</fnm>
               <insr iid="I8"/>
               <email>prabhubpatil@gmail.com</email>
            </au>
            <au id="A20">
               <snm>Ishihara</snm>
               <fnm>Hiromichi</fnm>
               <insr iid="I10"/>
               <email>hiromichi.ishihara@colostate.edu</email>
            </au>
            <au id="A21">
               <snm>Meyer</snm>
               <mi>F</mi>
               <fnm>Damien</fnm>
               <insr iid="I11"/>
               <email>dfmeyer@iastate.edu</email>
            </au>
            <au id="A22">
               <snm>Szurek</snm>
               <fnm>Boris</fnm>
               <insr iid="I12"/>
               <email>boris.szurek@mpl.ird.fr</email>
            </au>
            <au id="A23">
               <snm>Verdier</snm>
               <fnm>Valerie</fnm>
               <insr iid="I12"/>
               <email>valerie.verdier@mpl.ird.fr</email>
            </au>
            <au id="A24">
               <snm>Koebnik</snm>
               <fnm>Ralf</fnm>
               <insr iid="I12"/>
               <email>koebnik@mpl.ird.fr</email>
            </au>
            <au id="A25">
               <snm>Dow</snm>
               <fnm>J Maxwell</fnm>
               <insr iid="I13"/>
               <email>m.dow@ucc.ie</email>
            </au>
            <au id="A26">
               <snm>Ryan</snm>
               <mi>P</mi>
               <fnm>Robert</fnm>
               <insr iid="I13"/>
               <email>r.ryan@ucc.ie</email>
            </au>
            <au id="A27">
               <snm>Hirata</snm>
               <fnm>Hisae</fnm>
               <insr iid="I14"/>
               <email>hisaeh@agr.shizuoka.ac.jp</email>
            </au>
            <au id="A28">
               <snm>Tsuyumu</snm>
               <fnm>Shinji</fnm>
               <insr iid="I13"/>
               <email>tsuyumu@agr.shizuoka.ac.jp</email>
            </au>
            <au id="A29">
               <snm>Won Lee</snm>
               <fnm>Sang</fnm>
               <insr iid="I15"/>
               <email>drlee@ucdavis.edu</email>
            </au>
            <au id="A30">
               <snm>Ronald</snm>
               <mi>C</mi>
               <fnm>Pamela</fnm>
               <insr iid="I15"/>
               <email>pcronald@ucdavis.edu</email>
            </au>
            <au id="A31">
               <snm>Sonti</snm>
               <mi>V</mi>
               <fnm>Ramesh</fnm>
               <insr iid="I8"/>
               <email>sonti@ccmb.res.in</email>
            </au>
            <au id="A32">
               <snm>Van Sluys</snm>
               <fnm>Marie-Anne</fnm>
               <insr iid="I9"/>
               <insr iid="I16"/>
               <email>mavsluys2004@yahoo.com</email>
            </au>
            <au id="A33">
               <snm>Leach</snm>
               <mi>E</mi>
               <fnm>Jan</fnm>
               <insr iid="I9"/>
               <email>jan.leach@colostate.edu</email>
            </au>
            <au id="A34">
               <snm>White</snm>
               <mi>F</mi>
               <fnm>Frank</fnm>
               <insr iid="I17"/>
               <email>fwhite@ksu.edu</email>
            </au>
            <au id="A35">
               <snm>Bogdanove</snm>
               <mi>J</mi>
               <fnm>Adam</fnm>
               <insr iid="I11"/>
               <email>ajbog@iastate.edu</email>
            </au>
         </aug>
         <insg>
            <ins id="I1">
               <p>Center for Bioinformatics and Computational Biology, University of Maryland, College Park, MD 20742, USA</p>
            </ins>
            <ins id="I2">
               <p>The Institute for Genomic Research, Rockville, MD 20850, USA</p>
            </ins>
            <ins id="I3">
               <p>Institute for Genome Sciences, University of Maryland, Baltimore, MD 21201, USA</p>
            </ins>
            <ins id="I4">
               <p>Laboratory of Plant Pathology, Kyoto Prefectural University, Sakyo, Kyoto 606-8522, Japan</p>
            </ins>
            <ins id="I5">
               <p>Department of Genetic Resources, National Institute of Agrobiological Sciences, Kannondai, Tsukuba 305-8602, Japan</p>
            </ins>
            <ins id="I6">
               <p>Current address: J. Craig Venter Institute, Rockville, MD 20850, USA</p>
            </ins>
            <ins id="I7">
               <p>Current address: National Center for Biotechnology Information, National Institutes of Health, Bethesda, MD 20894, USA</p>
            </ins>
            <ins id="I8">
               <p>Centre for Cellular and Molecular Biology, Council of Scientific and Industrial Research, Hyderabad, India</p>
            </ins>
            <ins id="I9">
               <p>Institute of Himalayan Bioresource Technology, Council of Scientific and Industrial Research, Palampur, India</p>
            </ins>
            <ins id="I10">
               <p>Department of Bioagricultural Sciences and Pest Management, Colorado State University, Fort Collins, CO, USA</p>
            </ins>
            <ins id="I11">
               <p>Department of Plant Pathology, Iowa State University, Ames, IA, USA</p>
            </ins>
            <ins id="I12">
               <p>Institut de la Recherche pour le Developpement, 911 Av. Agropolis, Montpellier, 34090, France</p>
            </ins>
            <ins id="I13">
               <p>BIOMERIT Research Centre, BioSciences Institute, University College Cork, Cork, Ireland</p>
            </ins>
            <ins id="I14">
               <p>Graduate School of Natural Science &amp; Technology, Shizuoka University, 836 Ohya, Suruga-ku, Shizuoka, 422-8017, Japan</p>
            </ins>
            <ins id="I15">
               <p>Department of Plant Pathology, UC Davis, Davis, CA 95616, USA</p>
            </ins>
            <ins id="I16">
               <p>Departamento de Bot&#226;nica, IB-USP, Sao Paulo, SP, Brazil</p>
            </ins>
            <ins id="I17">
               <p>Department of Plant Pathology, Kansas State University, Manhattan, KS, USA</p>
            </ins>
         </insg>
         <source>BMC Genomics</source>
         <issn>1471-2164</issn>
         <pubdate>2008</pubdate>
         <volume>9</volume>
         <issue>1</issue>
         <fpage>204</fpage>
         <url>http://www.biomedcentral.com/1471-2164/9/204</url>
         <xrefbib>
            <pubidlist>
               <pubid idtype="pmpid">18452608</pubid>
               <pubid idtype="doi">10.1186/1471-2164-9-204</pubid>
            </pubidlist>
         </xrefbib>
      </bibl>
      <history>
         <rec>
            <date>
               <day>27</day>
               <month>2</month>
               <year>2008</year>
            </date>
         </rec>
         <acc>
            <date>
               <day>01</day>
               <month>5</month>
               <year>2008</year>
            </date>
         </acc>
         <pub>
            <date>
               <day>01</day>
               <month>5</month>
               <year>2008</year>
            </date>
         </pub>
      </history>
      <cpyrt>
         <year>2008</year>
         <collab>Salzberg et al; licensee BioMed Central Ltd.</collab>
         <note>This is an Open Access article distributed under the terms of the Creative Commons Attribution License (<url>http://creativecommons.org/licenses/by/2.0</url>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.</note>
      </cpyrt>
      <abs>
         <sec>
            <st>
               <p>Abstract</p>
            </st>
            <sec>
               <st>
                  <p>Background</p>
               </st>
               <p><it>Xanthomonas oryzae </it>pv. oryzae causes bacterial blight of rice (<it>Oryza sativa </it>L.), a major disease that constrains production of this staple crop in many parts of the world. We report here on the complete genome sequence of strain PXO99<sup>A </sup>and its comparison to two previously sequenced strains, KACC10331 and MAFF311018, which are highly similar to one another.</p>
            </sec>
            <sec>
               <st>
                  <p>Results</p>
               </st>
               <p>The PXO99<sup>A </sup>genome is a single circular chromosome of 5,240,075 bp, considerably longer than the genomes of the other strains (4,941,439 bp and 4,940,217 bp, respectively), and it contains 5083 protein-coding genes, including 87 not found in KACC10331 or MAFF311018. PXO99<sup>A </sup>contains a greater number of virulence-associated transcription activator-like effector genes and has at least ten major chromosomal rearrangements relative to KACC10331 and MAFF311018. PXO99<sup>A </sup>contains numerous copies of diverse insertion sequence elements, members of which are associated with 7 out of 10 of the major rearrangements. A rapidly-evolving CRISPR (clustered regularly interspersed short palindromic repeats) region contains evidence of dozens of phage infections unique to the PXO99<sup>A </sup>lineage. PXO99<sup>A </sup>also contains a unique, near-perfect tandem repeat of 212 kilobases close to the replication terminus.</p>
            </sec>
            <sec>
               <st>
                  <p>Conclusion</p>
               </st>
               <p>Our results provide striking evidence of genome plasticity and rapid evolution within <it>Xanthomonas oryzae </it>pv. oryzae. The comparisons point to sources of genomic variation and candidates for strain-specific adaptations of this pathogen that help to explain the extraordinary diversity of <it>Xanthomonas oryzae </it>pv. oryzae genotypes and races that have been isolated from around the world.</p>
            </sec>
         </sec>
      </abs>
   </fm>
   <meta>
      <classifications>
         <classification type="bmc" subtype="user_supplied_xml" id="endnote"/>
      </classifications>
   </meta>
   <bdy>
      <sec>
         <st>
            <p>Background</p>
         </st>
         <p><it>Xanthomonas oryzae </it>pathovar oryzae (Xoo), a member of the gamma subdivision of the proteobacteria, is a major pathogen of rice (<it>Oryza sativa </it>L.). It enters rice leaves through water pores or wounds and moves systemically by invading the xylem, causing a disease known as bacterial blight <abbrgrp><abbr bid="B1">1</abbr></abbrgrp>. Bacterial blight is the most serious bacterial disease of rice, and in some areas, the most important of any disease of rice, carrying the potential to reduce yields by as much as 50% <abbrgrp><abbr bid="B2">2</abbr></abbrgrp>. When Xoo infects at the seedling stage, it causes a syndrome known as kresek, which can lead to nearly complete crop loss <abbrgrp><abbr bid="B1">1</abbr></abbrgrp>. Several factors that contribute to fitness and virulence in Xoo have been identified (reviewed in <abbrgrp><abbr bid="B3">3</abbr></abbrgrp>). However, as rice is a staple crop for much of the world population, as well as a model for cereal biology <abbrgrp><abbr bid="B4">4</abbr></abbrgrp>, a better understanding of pathogenesis by Xoo remains a pressing goal both for control of bacterial blight and for fundamental understanding of bacterial-plant interactions.</p>
         <p>Bacterial blight occurs in most rice growing areas of the world, and Xoo isolates from within and across Africa, India, Asia, and Australia show a great diversity of genotypes, based on polymorphism of transposable elements, predominantly insertion sequences (IS), avirulence genes, rep/box elements, and other markers <abbrgrp><abbr bid="B5">5</abbr></abbrgrp>. Based on the ability of strains to elicit resistance in particular host genotypes, several distinct races have been defined <abbrgrp><abbr bid="B2">2</abbr></abbrgrp>. Rice is one of our most ancient domesticated crops, and comprises more than 100,000 distinct varieties <abbrgrp><abbr bid="B6">6</abbr></abbrgrp>. Twenty nine bacterial blight resistance (<it>R</it>) genes (<it>Xa1-Xa29</it>) have been identified to date <abbrgrp><abbr bid="B7">7</abbr></abbrgrp>. The great diversity of strains within Xoo undoubtedly reflects adaptation of the pathogen to the diversity of host genotypes as well as the diverse environmental conditions in which rice is grown. From a broader perspective, Xoo belongs to a diverse and highly adapted genus that includes more than 20 plant-associated or plant pathogenic species. Each species may comprise one or more pathogenic varieties (pathovar; pv.), which demonstrate distinct host plant specificity or modes of infection. Collectively, different <it>Xanthomonas </it>species and pathovars cause diseases in over 390 host plant species <abbrgrp><abbr bid="B8">8</abbr></abbrgrp>.</p>
         <p>Complete genome sequences have been published for two strains of Xoo, MAFF311018 (MAFF), a Japanese race 1 strain also referred to as T7174 <abbrgrp><abbr bid="B9">9</abbr></abbrgrp>, and KACC10331 (KACC), a Korean race 1 isolate also known as KXO85 <abbrgrp><abbr bid="B10">10</abbr></abbrgrp>. Comparative analysis of multiple Xoo genomes promises insight into specific adaptations that allow different strains to maintain virulence in different types of rice in different regions of the world. Of particular potential interest are adaptations involving extracellular components, and type III effectors, which have been established as critical virulence factors in bacterial blight or other plant bacterial diseases <abbrgrp><abbr bid="B3">3</abbr><abbr bid="B11">11</abbr></abbrgrp>.</p>
         <p>The genomes of MAFF and KACC overall are highly similar to one another in gene content and organization. We report here the complete genome sequence of a third strain of Xoo, PXO99<sup>A</sup>, which, as described below, is considerably more distant from either of these strains than they are from each other. PXO99<sup>A </sup>is a 5-azacytidine-resistant derivative of PXO99, which was isolated in Los Ba&#241;os and classified as Philippine race 6 <abbrgrp><abbr bid="B12">12</abbr></abbrgrp>. Genotypically, however, PXO99 is more similar to isolates from South Asia (Nepal and India) than to other Philippine isolates <abbrgrp><abbr bid="B13">13</abbr></abbrgrp>. In contrast to MAFF and KACC, PXO99<sup>A </sup>is virulent toward a large number of rice varieties representing diverse genetic sources of resistance, including the broad-spectrum, recessive resistance gene <it>xa5 </it><abbrgrp><abbr bid="B14">14</abbr></abbrgrp>. The relatively few resistance genes effective against PXO99<sup>A </sup>include the recessive resistance gene <it>xa13</it>, which is ineffective against MAFF and KACC, the recently characterized broad-spectrum resistance gene <it>Xa27</it>, and the pattern recognition receptor-like resistance gene <it>Xa21</it>, which is effective against MAFF but not against KACC <abbrgrp><abbr bid="B15">15</abbr><abbr bid="B16">16</abbr><abbr bid="B17">17</abbr></abbrgrp>. Because of its amenability to genetic analysis, and its relatively broad cultivar specificity, PXO99<sup>A </sup>has been the focus of numerous studies of the molecular basis of bacterial blight and blight resistance.</p>
      </sec>
      <sec>
         <st>
            <p>Results</p>
         </st>
         <sec>
            <st>
               <p>The PXO99<sup>A </sup>genome</p>
            </st>
            <p>The PXO99<sup>A </sup>genome is a single circular chromosome of 5,240,075 bp with an overall GC content of 63.6%. It contains 5083 protein-coding genes, 2 ribosomal RNA operons, and 55 tRNAs (Table <tblr tid="T1">1</tblr>). The origin of replication was identified by similarity to other <it>Xanthomonas </it>genomes, by proximity to genes (<it>dnaA</it>, <it>dnaN</it>, and <it>gyrB</it>) often found near the origin on bacterial genomes, and by GC-skew analysis, which examines the excess of G versus C on the leading strand <abbrgrp><abbr bid="B18">18</abbr></abbrgrp>. A schematic representation of the genome is provided in Figure <figr fid="F1">1</figr>.</p>
            <tbl id="T1">
               <title>
                  <p>Table 1</p>
               </title>
               <caption>
                  <p>Comparison of 3 <it>Xanthomonas oryzae </it>pv. oryzae genomes</p>
               </caption>
               <tblbdy cols="4">
                  <r>
                     <c>
                        <p/>
                     </c>
                     <c ca="left">
                        <p>PXO99<sup>A</sup></p>
                     </c>
                     <c ca="left">
                        <p>KACC</p>
                     </c>
                     <c ca="left">
                        <p>MAFF</p>
                     </c>
                  </r>
                  <r>
                     <c cspan="4">
                        <hr/>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>Length (bp)</p>
                     </c>
                     <c ca="left">
                        <p>5,240,075</p>
                     </c>
                     <c ca="left">
                        <p>4,941,439</p>
                     </c>
                     <c ca="left">
                        <p>4,940,217</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>GC content (%)</p>
                     </c>
                     <c ca="left">
                        <p>63.6</p>
                     </c>
                     <c ca="left">
                        <p>63.7</p>
                     </c>
                     <c ca="left">
                        <p>63.7</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>Annotated genes</p>
                     </c>
                     <c ca="left">
                        <p>5,083</p>
                     </c>
                     <c ca="left">
                        <p>4,637</p>
                     </c>
                     <c ca="left">
                        <p>4,372</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>IS elements (complete/fragment)</p>
                     </c>
                     <c ca="left">
                        <p>267 (683)</p>
                     </c>
                     <c ca="left">
                        <p>252 (714)</p>
                     </c>
                     <c ca="left">
                        <p>251 (712)</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>TAL effector genes</p>
                     </c>
                     <c ca="left">
                        <p>19</p>
                     </c>
                     <c ca="left">
                        <p>15</p>
                     </c>
                     <c ca="left">
                        <p>17</p>
                     </c>
                  </r>
               </tblbdy>
            </tbl>
            <fig id="F1">
               <title>
                  <p>Figure 1</p>
               </title>
               <caption>
                  <p>Circular representation of the <it>Xanthomonas oryzae </it>pv. oryzae genome</p>
               </caption>
               <text>
                  <p><b>Circular representation of the <it>Xanthomonas oryzae </it>pv. oryzae genome</b>. Rings illustrate, from outside to inside: protein coding genes (forward strand), protein coding genes (reverse strand), TAL effectors (green) and IS elements (red), and GC-skew plot showing (G-C)/(G+C) in 10 kilobase windows. Positive values of GC-skew indicate the leading strand of replication, negative values the lagging strand.</p>
               </text>
               <graphic file="1471-2164-9-204-1"/>
            </fig>
         </sec>
         <sec>
            <st>
               <p>Relationship to other sequenced <it>Xanthomonas oryzae </it>genomes</p>
            </st>
            <p>To assess the phylogenetic relationships among PXO99<sup>A </sup>and related strains, we aligned the complete genome to the genomes of MAFF, and KACC, and strain BLS256 of <it>X. oryzae </it>pv. oryzicola, (GenBank Accession <ext-link ext-link-type="gen" ext-link-id="AAQN01000001">AAQN01000001</ext-link>), and generated a cladogram using Mauve 2.1.1 <abbrgrp><abbr bid="B19">19</abbr></abbrgrp>. MAFF and KACC group together, but PXO99<sup>A </sup>is clearly distinct and considerably more distant from MAFF and KACC than they are from one another (Additional file <supplr sid="S1">1</supplr>). The tree was confirmed by another tree built with all the sequenced <it>Xanthomonas </it>genomes and rooted with <it>Xylella fastidiosa </it>(Temecula strain) (data not shown).</p>
            <suppl id="S1">
               <title>
                  <p>Additional file 1</p>
               </title>
               <text>
                  <p><b>Supplementary Figure 1</b>. Phylogenetic relationships among <it>X. oryzae </it>pv. oryzae (Xoo) strains PXO99<sup>A</sup>, KACC10331, and MAFF311018, and <it>X. oryzae </it>pv. oryzicola (Xoc) strain BLS256 based on whole genome alignment.</p>
               </text>
               <file name="1471-2164-9-204-S1.pdf">
                  <p>Click here for file</p>
               </file>
            </suppl>
         </sec>
         <sec>
            <st>
               <p>Genes unique to PXO99<sup>A </sup>relative to MAFF</p>
            </st>
            <p>Of the 5083 annotated protein coding genes in PXO99<sup>A</sup>, 4910 have clear homologs in the MAFF strain. These genes map to just 4234 genes in MAFF (out of 4372 total), indicating a considerable expansion of some gene families. 194 of the shared genes are present in a 212 kb direct repeat near the replication terminus (see below). Of the remaining 173 PXO99-specific genes, 29 (including 18 tranposases) are missing from MAFF because they span breakpoints; i.e., a rearrangement, insertion, or deletion in MAFF has broken these genes into fragments. Fifty eight other PXO99<sup>A </sup>genes only partially align to MAFF, including 29 transposases. Finally, 86 genes in PXO99<sup>A </sup>are completely absent (based on sequence alignment) from MAFF.</p>
            <p>Among the 138 annotated genes in the MAFF strain that are not present in PXO99<sup>A</sup>, 20 are missing in PXO99<sup>A </sup>because they span breakpoints, and 38 (including 12 transposases) are missing because they are truncated in PXO99<sup>A</sup>. The remaining 80 genes in MAFF are entirely missing from PXO99<sup>A</sup>.</p>
            <p>Additional file <supplr sid="S2">2</supplr> contains the lists of genes unique to PXO99<sup>A </sup>and unique to MAFF. It is noteworthy that a majority of the genes unique to MAFF (64/80) are hypothetical proteins, which may represent annotation artifacts. These hypothetical genes have an average length of 182 bp, compared to 850 bp for an average gene. Of the 87 genes unique to PXO99<sup>A</sup>, twenty are hypothetical while the remainder comprises genes similar to predicted genes in other strains and species.</p>
            <suppl id="S2">
               <title>
                  <p>Additional file 2</p>
               </title>
               <text>
                  <p><b>Supplementary Tables</b>. Supplementary Table 1, Genes unique to PXO99<sup>A </sup>and unique to MAFF311018; Supplementary Table 2, Primers used to amplify genes at the non-fimbrial adhesin encoding locus; Supplementary Table 3, Primers used to confirm the 212 kb direct repeat.</p>
               </text>
               <file name="1471-2164-9-204-S2.pdf">
                  <p>Click here for file</p>
               </file>
            </suppl>
         </sec>
         <sec>
            <st>
               <p>IS elements</p>
            </st>
            <p>All sequenced <it>Xanthomonas </it>genomes contain numerous IS elements, but the Xoo genomes contain the most diverse pool <abbrgrp><abbr bid="B20">20</abbr></abbrgrp>. Of the 19 known families of IS elements <abbrgrp><abbr bid="B21">21</abbr></abbrgrp>, eight families composed of 28 distinct elements appear in Xoo. MAFF and KACC have nearly identical numbers of IS elements (Table <tblr tid="T1">1</tblr>), while PXO99<sup>A </sup>contains fewer elements overall, but more copies of ISXo8, IS1114/ISXoo4, and ISXo2.</p>
         </sec>
         <sec>
            <st>
               <p>A genomic region encoding several non-fimbrial adhesin genes</p>
            </st>
            <p>Sequences unique to PXO99<sup>A </sup>relative to MAFF include a 38,766 bp region (coordinates 4788763 &#8211; 4827529) that contains several predicted non-fimbrial adhesin genes (Figure <figr fid="F2">2</figr>). Of 20 genes at this locus, three (<it>fhaB</it>, <it>fhaX </it>and <it>fhaB1</it>) encode non-fimbrial adhesin related proteins and a fourth (<it>fhaC</it>) is predicted to help in transport of non-fimbrial adhesins. The <it>fhaB </it>gene, which encodes the longest protein (3527 aa) in PXO99<sup>A</sup>, contains a hemagglutination activity domain and filamentous hemagglutinin repeats that are likely to serve in adhesion and autoaggregation. Two more genes (ORFs 2986 and 2987) are predicted to be involved in bacteriocin secretion while another (ORF 2973) encodes an ice-nucleation protein homolog. The locus also includes several IS elements and is flanked by direct repeats of ISXo5. These are in turn flanked by genes for a dual specificity phosphatase (DSP in Figure <figr fid="F2">2</figr>) and a DNA binding protein (DBP). In contrast, only one copy of the ISXo5 element is present between DSP and DBP in MAFF and KACC, indicating that the IsXo5 element was involved in the genomic rearrangement that led either to loss of the locus from MAFF and KACC or gain of the locus in PXO99<sup>A</sup>. The former is likely the case because the arrangement in PXO99<sup>A </sup>is present also in <it>X. oryzae </it>pv. oryzicola BLS256 (data not shown).</p>
            <fig id="F2">
               <title>
                  <p>Figure 2</p>
               </title>
               <caption>
                  <p>A 38.8 kb region including nonfimbrial adhesin genes that is unique to PXO99<sup>A</sup></p>
               </caption>
               <text>
                  <p><b>A 38.8 kb region including nonfimbrial adhesin genes that is unique to PXO99<sup>A</sup></b>. A: organization of the region in the PXO99<sup>A </sup>genome. Block arrows represent genes; inverted triangles represent insertion sequence elements. The region is flanked by DSP (dual specificity protein) and DBP (DNA binding protein) encoding genes, which are also present in MAFF and KACC. B: the corresponding locus in MAFF and KACC, missing the entire block of genes. The point of insertion/deletion maps to an ISXo5 insertion sequence element between DSP and DBP.</p>
               </text>
               <graphic file="1471-2164-9-204-2"/>
            </fig>
            <p>Specific primers were developed for the DSP and DBP genes that flank this locus as well as for the <it>fhaB</it>, <it>fhaC</it>, and <it>fhaX </it>genes. Using PXO99<sup>A </sup>genomic DNA as a template, we amplified the expected PCR products for all five genes (data not shown). Using either MAFF 311018 or KACC 10331 genomic DNA as template, products of the expected size were obtained with primers specific to the DBP and DSP encoding genes, but no products were obtained with primers specific for <it>fhaC</it>, <it>fhaB </it>or <it>fhaX</it>. Also, a fragment of the expected size (~2.5 kb) was obtained via PCR with DBP- and DSP-specific primers using MAFF and KACC genomic DNA, but not with PXO99<sup>A </sup>genomic DNA (data not shown). These results provide additional evidence that the non-fimbrial adhesin genes are indeed missing from the MAFF and KACC genomes. Based on PCR analysis using the above primers, the <it>fhaC</it>, <it>fhaB </it>and <it>fhaX </it>genes are also missing from the Indian Xoo strain BXO43, and in another Indian strain, BXO8, only <it>fhaB </it>appears to be present. However, all three genes were detected in the strain Nepal624 (data not shown), a result consistent with the close relationship, as established by DNA fingerprinting studies, between PXO99<sup>A </sup>and Xoo strains from Nepal <abbrgrp><abbr bid="B22">22</abbr></abbrgrp>.</p>
         </sec>
         <sec>
            <st>
               <p>Recent large duplication</p>
            </st>
            <p>The PXO99<sup>A </sup>strain contains a near-perfect tandem duplication of 212,087 bp. This unusually large repeat spans the intervals 2,502,622&#8211;2,714,708 and 2,714,709&#8211;2,926,795. The repeat is flanked by an insertion (1073 bp) of ISXo5 (Figure <figr fid="F5">5</figr>) at each end and between the two copies. Except for a single base difference in one IS copy, the two regions are 100% identical. Because the flanking ISXo5 is longer than a read, and because the repeat is much too long to be spanned by any pair of sequencing reads, the original assembly had collapsed these two repeats into a single region. Also, the positioning of the flanking short repeats meant that every sequence fit accurately into the collapsed assembly, with only the paired-end information indicating a problem. This collapse was discovered through the use of the Hawkeye assembly diagnostics tool, which identified a large set of mis-oriented paired-end sequences on either end of the collapsed version of the assembly <abbrgrp><abbr bid="B23">23</abbr></abbrgrp>. In order to provide additional validation of this duplication, we designed primers on either side of the unique junction where the two copies of the tandem repeat meet (see Additional file <supplr sid="S2">2</supplr>). We verified the presence of the junction by PCR amplification and re-sequencing of this region.</p>
            <fig id="F5">
               <title>
                  <p>Figure 5</p>
               </title>
               <caption>
                  <p>Inversions and rearrangements in PXO99<sup>A </sup>compared to MAFF</p>
               </caption>
               <text>
                  <p><b>Inversions and rearrangements in PXO99<sup>A </sup>compared to MAFF</b>. The alignment shows regions of PXO99<sup>A </sup>that align to the same (red) or opposite (blue) strand of MAFF. Transposase genes and their orientation (+ or -) are shown at the sites of each rearrangement. Letters A-J indicate specific rearrangement events. A: the IS element ISXoo3 is composed of two distinct and independently conserved ORFs and is responsible for an inversion spanning coordinates 267869&#8211;5114959 (all coordinates refer to the PXO99<sup>A </sup>genome). B: ISXo8 occurs in opposite orientation at each end of a 2.6 Mbp inversion spanning positions 1356757&#8211;3898472. C: ISXo1 occurs in inverted copies at the endpoints of a 1.8 Mbp inversion spanning 1558996&#8211;3391786. D: a 33270 bp inverted region spanning 4394742&#8211;4428012 is flanked by oppositely-oriented copies of ISXo8. E: Each copy of the 212-kb duplication is flanked by ISXo5, which also occurs adjacent to two other translocations in this region. The duplication appears as two parallel diagonal lines in this box. F: ISXo8 also occurs in inverted copies at the boundaries of a 47540 bp segment that is translocated from approximately 4800000 to 685272. G: ISXoo3 flanks both ends of a 47540 bp translocation from approximately 1117000 to 4339239. H: A 9,862 bp region occurs in inverted copies at 217,455 and 4,305,307. MAFF311018 contains only one copy of this region. I,J: Segments spanning 96,753 bp (I) and 17,021 bp (J) are inverted with respect to MAFF311018 but not associated with transposases.</p>
               </text>
               <graphic file="1471-2164-9-204-5"/>
            </fig>
            <p>The 212 kb segment occurs once in the MAFF and KACC sequences. One question is whether the difficulty of assembling this region might mean that it is present in these strains, but undetected. Evidence that the duplication is indeed unique to PXO99<sup>A </sup>is the sequence divergence (~0.3%) of PXO99<sup>A </sup>from MAFF/KACC. This divergence implies that if the duplication had happened in a common ancestor, then the two distinct 212 kb regions, which would have existed since the divergence between strains, would be expected to have over 600 single-base differences. The fact that the copies have only one difference confirms that the large duplication in PXO99<sup>A </sup>occurred much more recently than its divergence from MAFF and KACC.</p>
         </sec>
         <sec>
            <st>
               <p>TAL effector genes</p>
            </st>
            <p>A hallmark of the Xoo genome is the large number of transcription activator-like (TAL) type III effector genes, which are defined by their relatedness to the type members <it>avrBs3 </it>and <it>pthA </it><abbrgrp><abbr bid="B24">24</abbr><abbr bid="B25">25</abbr><abbr bid="B26">26</abbr></abbrgrp>. TAL effector genes are characterized in part by a region of 102 bp repeats, or more rarely 105 bp repeats, within the central coding portion <abbrgrp><abbr bid="B27">27</abbr><abbr bid="B28">28</abbr></abbrgrp>. Nineteen TAL effector genes were identified in the PXO99<sup>A </sup>genome (Table <tblr tid="T2">2</tblr> and Figure <figr fid="F3">3</figr>), including four previously associated with virulence and avirulence phenotypes and effector-specific gene expression in rice <abbrgrp><abbr bid="B16">16</abbr><abbr bid="B29">29</abbr><abbr bid="B30">30</abbr></abbrgrp>. One of these, <it>pthXo1</it>, encodes the major virulence determinant for PXO99<sup>A </sup>whose function is disrupted in rice by the recessive blight resistance gene <it>xa13 </it><abbrgrp><abbr bid="B29">29</abbr><abbr bid="B30">30</abbr></abbrgrp>. The TAL effector genes are located in nine loci distributed in the genome. Two loci consist of single genes, six consist of two genes oriented in the same direction, and one is a previously identified cluster of five genes all oriented in the same direction <abbrgrp><abbr bid="B16">16</abbr></abbrgrp>. Each of the genes within a cluster is preceded by a region of 990 bp that contains two or more short, predicted ORFs but is more likely non-coding DNA, suggesting that each gene has its own promoter, and that the clusters do not represent polycistronic operons. We have designated the genes numerically according to the locus in which they reside, sequentially from the origin of replication, and alphabetically, according to their position in that locus starting at the 5' end of the locus. Thus, the first TAL effector gene in the genome sequence, proximal to the origin, is <it>tal1</it>, the second (which is the second gene in a locus oriented toward the origin) <it>tal2b</it>, <it>etc</it>. The genes with known phenotypes are distributed in separate loci: <it>tal1 </it>is <it>pthXo7</it>, <it>tal2b </it>is <it>pthXo1</it>, <it>tal5b </it>is <it>pthXo6</it>, and <it>tal9c </it>is <it>avrXa27</it>. Among the genes, the number of repeat units varies from 12.5 (<it>tal9d</it>) to 26.5 (<it>tal9c</it>). None of the genes contains the rare 105 bp repeat. Gene pairs in loci 7 and 8 are identical copies in the 212 kb duplicated regions of the genome.</p>
            <fig id="F3">
               <title>
                  <p>Figure 3</p>
               </title>
               <caption>
                  <p>Relationship of TAL effector genes in Xoo strains PXO99<sup>A </sup>and MAFF</p>
               </caption>
               <text>
                  <p><b>Relationship of TAL effector genes in Xoo strains PXO99<sup>A </sup>and MAFF</b>. The individual genes, distributed among nine loci in PXO99<sup>A </sup>and eight in MAFF, are represented by open arrows and labeled as described in the text. Pseudogenes (truncated genes or genes with early stop codons) are indicated by an apostrophe. Genes that have identical repeat regions based on number of repeats and identity at the twelfth and thirteenth codons are connected with a black dashed line. Blue dashed lines connect genes with nearly identical repeat regions (see text). Names of previously characterized genes are centered above or below the corresponding open arrow. Colored boxes indicate TAL gene clusters (not to scale), with the same color representing loci at the same relative positions in the two genomes. Locus 4 in PXO99<sup>A </sup>and locus 3 in MAFF are uniquely positioned in their respective genomes. The solid black rectangle and arrows beneath it represent the 212 kb direct repeat in the PXO99<sup>A </sup>genome.</p>
               </text>
               <graphic file="1471-2164-9-204-3"/>
            </fig>
            <tbl id="T2">
               <title>
                  <p>Table 2</p>
               </title>
               <caption>
                  <p>TAL effector genes in PXO99<sup>A</sup></p>
               </caption>
               <tblbdy cols="6">
                  <r>
                     <c ca="center">
                        <p>
                           <b>Gene</b>
                        </p>
                     </c>
                     <c ca="center">
                        <p>
                           <b>ID</b>
                        </p>
                     </c>
                     <c ca="center">
                        <p>
                           <b>Coordinates</b>
                        </p>
                     </c>
                     <c ca="center">
                        <p>
                           <b>Strand</b>
                        </p>
                     </c>
                     <c ca="center">
                        <p>
                           <b>Repeats</b>
                        </p>
                     </c>
                     <c ca="center">
                        <p>
                           <b>Comments<sup>1</sup></b>
                        </p>
                     </c>
                  </r>
                  <r>
                     <c cspan="6">
                        <hr/>
                     </c>
                  </r>
                  <r>
                     <c ca="center">
                        <p>
                           <it>pthXo7 (tal1)</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>03922</p>
                     </c>
                     <c ca="center">
                        <p>559109..562222</p>
                     </c>
                     <c ca="center">
                        <p>-</p>
                     </c>
                     <c ca="center">
                        <p>21.5</p>
                     </c>
                     <c ca="center">
                        <p>
                           <it>OsTFIIA&#947;1</it>
                        </p>
                     </c>
                  </r>
                  <r>
                     <c ca="center">
                        <p>
                           <it>pthXo1 (tal2b)</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>00227</p>
                     </c>
                     <c ca="center">
                        <p>1645240..1649043</p>
                     </c>
                     <c ca="center">
                        <p>-</p>
                     </c>
                     <c ca="center">
                        <p>23.5</p>
                     </c>
                     <c ca="center">
                        <p>
                           <it>Os8N3</it>
                        </p>
                     </c>
                  </r>
                  <r>
                     <c ca="center">
                        <p>
                           <it>tal2a</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>00223</p>
                     </c>
                     <c ca="center">
                        <p>1650351..1653557</p>
                     </c>
                     <c ca="center">
                        <p>-</p>
                     </c>
                     <c ca="center">
                        <p>14.5</p>
                     </c>
                     <c>
                        <p/>
                     </c>
                  </r>
                  <r>
                     <c ca="center">
                        <p>
                           <it>tal3a</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>00511</p>
                     </c>
                     <c ca="center">
                        <p>1860212..1862083</p>
                     </c>
                     <c ca="center">
                        <p>+</p>
                     </c>
                     <c ca="center">
                        <p>17.5</p>
                     </c>
                     <c ca="center">
                        <p>N-term deletion, truncated</p>
                     </c>
                  </r>
                  <r>
                     <c ca="center">
                        <p>
                           <it>tal3b</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>00505</p>
                     </c>
                     <c ca="center">
                        <p>1864934..1866895</p>
                     </c>
                     <c ca="center">
                        <p>+</p>
                     </c>
                     <c ca="center">
                        <p>17.5</p>
                     </c>
                     <c ca="center">
                        <p>N-term deletion, truncated</p>
                     </c>
                  </r>
                  <r>
                     <c ca="center">
                        <p>
                           <it>tal4</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>00318</p>
                     </c>
                     <c ca="center">
                        <p>2083533..2085968</p>
                     </c>
                     <c ca="center">
                        <p>-</p>
                     </c>
                     <c ca="center">
                        <p>15.5</p>
                     </c>
                     <c>
                        <p/>
                     </c>
                  </r>
                  <r>
                     <c ca="center">
                        <p>
                           <it>pthXo6, (tal5b)</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>00572</p>
                     </c>
                     <c ca="center">
                        <p>2354996..2358139</p>
                     </c>
                     <c ca="center">
                        <p>-</p>
                     </c>
                     <c ca="center">
                        <p>22.5</p>
                     </c>
                     <c ca="center">
                        <p>
                           <it>OsTFX1</it>
                        </p>
                     </c>
                  </r>
                  <r>
                     <c ca="center">
                        <p>
                           <it>tal5a</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>00567</p>
                     </c>
                     <c ca="center">
                        <p>2360008..2362440</p>
                     </c>
                     <c ca="center">
                        <p>-</p>
                     </c>
                     <c ca="center">
                        <p>15.5</p>
                     </c>
                     <c>
                        <p/>
                     </c>
                  </r>
                  <r>
                     <c ca="center">
                        <p>
                           <it>tal6a</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>00546</p>
                     </c>
                     <c ca="center">
                        <p>2384284..2387193</p>
                     </c>
                     <c ca="center">
                        <p>+</p>
                     </c>
                     <c ca="center">
                        <p>19.5</p>
                     </c>
                     <c>
                        <p/>
                     </c>
                  </r>
                  <r>
                     <c ca="center">
                        <p>
                           <it>tal6b</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>05609</p>
                     </c>
                     <c ca="center">
                        <p>2388988..2392041</p>
                     </c>
                     <c ca="center">
                        <p>+</p>
                     </c>
                     <c ca="center">
                        <p>20.5</p>
                     </c>
                     <c ca="center">
                        <p>N-term frameshift</p>
                     </c>
                  </r>
                  <r>
                     <c ca="center">
                        <p>
                           <it>tal7a</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>05633</p>
                     </c>
                     <c ca="center">
                        <p>2683629..2686343</p>
                     </c>
                     <c ca="center">
                        <p>+</p>
                     </c>
                     <c ca="center">
                        <p>17.5</p>
                     </c>
                     <c>
                        <p/>
                     </c>
                  </r>
                  <r>
                     <c ca="center">
                        <p>
                           <it>tal7b</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>01085</p>
                     </c>
                     <c ca="center">
                        <p>2688137..2691088</p>
                     </c>
                     <c ca="center">
                        <p>+</p>
                     </c>
                     <c ca="center">
                        <p>19.5</p>
                     </c>
                     <c>
                        <p/>
                     </c>
                  </r>
                  <r>
                     <c ca="center">
                        <p>
                           <it>tal8a</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>06229</p>
                     </c>
                     <c ca="center">
                        <p>2895716..2898430</p>
                     </c>
                     <c ca="center">
                        <p>+</p>
                     </c>
                     <c ca="center">
                        <p>17.5</p>
                     </c>
                     <c ca="center">
                        <p>Duplicate of <it>tal7a</it></p>
                     </c>
                  </r>
                  <r>
                     <c ca="center">
                        <p>
                           <it>tal8b</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>06234</p>
                     </c>
                     <c ca="center">
                        <p>2900224..2903175</p>
                     </c>
                     <c ca="center">
                        <p>+</p>
                     </c>
                     <c ca="center">
                        <p>19.5</p>
                     </c>
                     <c ca="center">
                        <p>Duplicate of <it>tal7b</it></p>
                     </c>
                  </r>
                  <r>
                     <c ca="center">
                        <p>
                           <it>tal9a</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>02172</p>
                     </c>
                     <c ca="center">
                        <p>4101543..4104803</p>
                     </c>
                     <c ca="center">
                        <p>+</p>
                     </c>
                     <c ca="center">
                        <p>19.5</p>
                     </c>
                     <c>
                        <p/>
                     </c>
                  </r>
                  <r>
                     <c ca="center">
                        <p>
                           <it>tal9b</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>05714</p>
                     </c>
                     <c ca="center">
                        <p>4106597..4110244</p>
                     </c>
                     <c ca="center">
                        <p>+</p>
                     </c>
                     <c ca="center">
                        <p>26.5</p>
                     </c>
                     <c>
                        <p/>
                     </c>
                  </r>
                  <r>
                     <c ca="center">
                        <p>
                           <it>avrXa27, (tal9c)</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>05718</p>
                     </c>
                     <c ca="center">
                        <p>4112038..4114644</p>
                     </c>
                     <c ca="center">
                        <p>+</p>
                     </c>
                     <c ca="center">
                        <p>16.5</p>
                     </c>
                     <c ca="center">
                        <p>
                           <it>Xa27</it>
                        </p>
                     </c>
                  </r>
                  <r>
                     <c ca="center">
                        <p>
                           <it>tal9d</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>02269</p>
                     </c>
                     <c ca="center">
                        <p>4116438..4118642</p>
                     </c>
                     <c ca="center">
                        <p>+</p>
                     </c>
                     <c ca="center">
                        <p>12.5</p>
                     </c>
                     <c>
                        <p/>
                     </c>
                  </r>
                  <r>
                     <c ca="center">
                        <p>
                           <it>tal9e</it>
                        </p>
                     </c>
                     <c ca="center">
                        <p>02272</p>
                     </c>
                     <c ca="center">
                        <p>4120436..4123759</p>
                     </c>
                     <c ca="center">
                        <p>+</p>
                     </c>
                     <c ca="center">
                        <p>23.5</p>
                     </c>
                     <c>
                        <p/>
                     </c>
                  </r>
               </tblbdy>
               <tblfn>
                  <p><sup>1</sup>The rice gene induced by the effector is in italics.</p>
               </tblfn>
            </tbl>
            <p>With the exception of the gene pairs within the 212 kb duplication, none of the genes share the same repeat region structure based on a comparison of the twelfth and thirteenth codons, which vary from repeat to repeat (Figure <figr fid="F4">4</figr>). Genes <it>tal3a </it>and <it>tal3b </it>each have two deletions of 43 and 15 codons in their 5' ends and are truncated in the 3' ends of their coding regions, so they are unlikely to produce functional effectors. The similarities in <it>tal3a </it>and <it>tal3b </it>indicate that one is a duplicate of the other. Gene <it>tal6b </it>has a frameshift mutation within the 5'-end of the coding region and is therefore also unlikely to be functional. <it>Genes tal6b</it>, <it>tal7b</it>, and <it>tal8b </it>share a novel eleven codon duplication (PERTSHRVADL-PERTSNRVADL) at their 3' ends.</p>
            <fig id="F4">
               <title>
                  <p>Figure 4</p>
               </title>
               <caption>
                  <p>Alignment of PXO99<sup>A </sup>TAL effector repetitive regions as represented by the twelfth and thirteenth residues of each repeat</p>
               </caption>
               <text>
                  <p><b>Alignment of PXO99<sup>A </sup>TAL effector repetitive regions as represented by the twelfth and thirteenth residues of each repeat</b>. Notes: 1 * indicates a proposed deletion of the thirteenth codon in the repeat; 2, novel variable codons; 3, truncation; 4, six-codon deletion; 5, N-terminal frameshift; 6, five-codon deletion in repeat.</p>
               </text>
               <graphic file="1471-2164-9-204-4"/>
            </fig>
            <p>Comparison of the TAL effector gene content and arrangement in PXO99<sup>A </sup>with those in MAFF, using the scheme described above to name the MAFF genes (genes in the two strains are hereafter distinguished by subscript), indicates that the number of loci and relative positions are similar with the exception of the duplicated loci (7 and 8) in PXO99<sup>A</sup>, as well as PXO99<sup>A </sup>locus 4 and MAFF locus 3, which occupy unique relative positions in their respective genomes (Figure <figr fid="F3">3</figr>). MAFF loci 2 and 3 were considered by Ochiai et al. <abbrgrp><abbr bid="B9">9</abbr></abbrgrp> as one locus, but we treat them as distinct based on the unusual distance (roughly 3 kb instead of the usual 990 bp) between the locus 3 gene and the closest locus 2 gene, and the presence of IS elements flanking locus 3. Despite the similarity in number and arrangement of the respective loci, only three PXO99<sup>A </sup>TAL effector genes, all in PXO99<sup>A </sup>locus 9, have counterparts in MAFF that are identical with respect to the number of repeats and the twelfth and thirteenth codons of the central repeat domain. The identical genes are <it>tal9c</it><sub>PXO99A </sub>and <it>tal1c</it><sub>MAFF</sub>, <it>tal9d</it><sub>PXO99A </sub>and <it>tal1d</it><sub>MAFF</sub>, and <it>tal9e</it><sub>PXO99A </sub>and <it>tal3</it><sub>MAFF</sub>. Genes <it>tal9a</it><sub>PXO99A </sub>and <it>tal9b</it><sub>PXO99A </sub>correspond in repeat number to <it>tal1a</it><sub>MAFF </sub>and <it>tal1b</it><sub>MAFF</sub>, respectively. The <it>tal3</it><sub>MAFF </sub>gene, which represents a break in the apparent overall synteny between PXO99<sup>A </sup>locus 9 and MAFF locus 1, is flanked by IS elements. The <it>tal9c</it><sub>PXO99A </sub>gene is the avirulence determinant <it>avrXa27</it>, and its identity with <it>tal1c</it><sub>MAFF </sub>is consistent with the effectiveness of the corresponding host resistance gene <it>Xa27 </it>against both PXO99<sup>A </sup>and MAFF, as well as a broad range of other strains <abbrgrp><abbr bid="B31">31</abbr></abbrgrp>. Two other PXO99<sup>A </sup>TAL effector genes have counterparts in MAFF that are nearly identical with respect to the number of repeats and the predicted twelfth and thirteenth residues in each repeat: <it>tal4</it><sub>PXO99A </sub>has the same structure as <it>tal4a</it><sub>MAFF </sub>except for residue 12 in the fifteenth repeat, and <it>tal6a</it><sub>PXO99A </sub>has the same structure as <it>tal5a</it><sub>MAFF </sub>except for residues 12 and 13 in the fourteenth repeat. MAFF has two TAL effector genes, <it>pthXo2 </it>(<it>tal8</it><sub>MAFF</sub>) and <it>avrXa7 </it>(<it>tal7d</it><sub>MAFF</sub>), that are major virulence determinants <abbrgrp><abbr bid="B30">30</abbr></abbrgrp>. The <it>pthXo2 </it>gene occupies the same locus in MAFF that <it>pthXo7 </it>does in PXO99<sup>A</sup>, while <it>avrXa7 </it>occupies the same locus as <it>pthXo1</it>, the major virulence determinant for PXO99<sup>A</sup>. Some corresponding loci differ in their gene content. For example, locus 2 in PXO99<sup>A </sup>consists of two genes but the corresponding locus in MAFF, locus 7, contains four. Absent from MAFF locus 5 is <it>pthXo6</it>, although previous evidence indicates that <it>OsTFX1</it>, a host gene expressed in a <it>pthXo6</it>-dependent manner, is induced upon infection with MAFF <abbrgrp><abbr bid="B32">32</abbr></abbrgrp>. Induction could be due to one of the other TAL effectors, or <it>pthXo6 </it>might have been misassembled in the MAFF sequence. Locus 6 in PXO99<sup>A </sup>corresponds to locus 4 in MAFF, but locus 4 in MAFF contains the gene nearly identical to <it>tal4</it><sub>PXO99A </sub>in the uniquely positioned locus 4 of PXO99<sup>A</sup>. The MAFF gene nearly identical to <it>tal6a</it><sub>PXO99A</sub>, is located in a corresponding neighboring locus, MAFF locus 5. Locus 3 in PXO99<sup>A </sup>and 6 in MAFF contain two and one defective TAL effector genes, respectively. All three of these genes have identical repeat domains. Moreover, <it>tal6</it><sub>MAFF </sub>shares with the PXO99<sup>A </sup>genes the 3' deletions of 43 and 15 codons discussed above, as well as a six-codon deletion in the repeat region (repeat 4 of <it>tal6</it><sub>MAFF</sub>, repeat 3 of <it>tal3a</it><sub>PXO99A</sub>, and repeat 4 of <it>tal3b</it><sub>PXO99A</sub>), indicating that these genes may represent a generally defunct locus in Xoo. The observed substitution of genes at conserved loci across the genomes, expansion or contraction of individual loci in a given strain, and divergence or degeneration of gene sequences at shared loci are presumably accomplished by the exchange of coding sequences through homologous recombination. Transposition of genes involving IS element-mediated recombination may also occur, as exemplified possibly by <it>tal3</it><sub>MAFF</sub>.</p>
         </sec>
         <sec>
            <st>
               <p>Genome rearrangements in Xoo</p>
            </st>
            <p>The PXO99<sup>A </sup>strain of Xoo has experienced at least ten major rearrangements with respect to the MAFF strain, resulting in 29 distinct syntenic blocks, as shown in Figure <figr fid="F5">5</figr>. The majority of these rearrangements are symmetric about the origin of replication, as has been observed for many other bacterial rearrangements <abbrgrp><abbr bid="B33">33</abbr></abbrgrp>. Most of these rearrangements appear to be mediated by a diverse set of transposable elements. Some elements, such as ISXo5, ISXo8, and IS1389/ISXoo3, are responsible for multiple rearrangements. For example, ISXo5 occurs near each endpoint of both copies of the 212,087 bp tandem repeat (region E, Figure <figr fid="F5">5</figr>). Within each copy of the repeat there is a 116,872 bp inversion flanked by inverted copies of ISXo5. Only three major rearrangement events (H, I, and J in Figure <figr fid="F5">5</figr>) do not seem to be associated with IS elements.</p>
         </sec>
         <sec>
            <st>
               <p>Evolution of the CRISPR region in Xoo lineages</p>
            </st>
            <p>The PXO99<sup>A</sup>, MAFF, and KACC genomes each contain a CRISPR (clustered regularly interspersed short palindromic repeats) element. CRISPRs are identified by a set of Cas genes, followed by a leader sequence and then a variable number of alternating spacers and repeats; the elements here represent the Dvulg subtype <abbrgrp><abbr bid="B34">34</abbr></abbrgrp>. The repeats are identical, while the spacers represent foreign DNA that was laterally transferred from a bacteriophage or a plasmid <abbrgrp><abbr bid="B35">35</abbr></abbrgrp>. A growing body of evidence demonstrates that the spacers, acquired during phage infection, provide immune protection for the bacterium against the phage <abbrgrp><abbr bid="B36">36</abbr></abbrgrp>. Thus CRISPRs represent an inheritable immune system for bacteria.</p>
            <p>Because the CRISPR region evolves very rapidly, it provides one of the most striking records of differentiation among PXO99<sup>A</sup>, MAFF, and KACC. As shown in Figure <figr fid="F6">6</figr>, PXO99<sup>A </sup>has the largest CRISPR region of the three strains, with 75 spacer elements. In contrast, MAFF and KACC contain just 48 and 59 spacers respectively, implying that PXO99<sup>A </sup>has acquired a substantially greater resistance to phage infections than its cousins. Also worth noting is that the majority of the spacers are unique to each strain, attesting to the rapid evolution of these regions.</p>
            <fig id="F6">
               <title>
                  <p>Figure 6</p>
               </title>
               <caption>
                  <p>Alignment of CRISPR elements from the PXO99<sup>A</sup>, KACC, and MAFF genomes</p>
               </caption>
               <text>
                  <p><b>Alignment of CRISPR elements from the PXO99<sup>A</sup>, KACC, and MAFF genomes</b>. Spacers are numbered from right (S0) to left, with the oldest elements on the right. Gaps (green boxes) indicate the positions of additional spacers in the genomes not shown here. Red lines indicate spacers shared in all three genomes, heavy black lines indicate spacers shared in just two species, and thin black lines indicate spacers that are similar but not identical between two species.</p>
               </text>
               <graphic file="1471-2164-9-204-6"/>
            </fig>
            <p>The alignment of the CRISPR spacers in the three Xoo strains (Figure <figr fid="F6">6</figr>) appears on first inspection to contradict the phylogenetic relationship of the strains, in that MAFF appears more distant from the other two strains. Spacers are inserted into a genome in chronological order, with new elements appearing next to the 188-bp leader sequence, which gives a clear picture of the shared history of these elements. Our alignment shows that all three strains share five of the oldest elements (S1&#8211;S5 in PXO99<sup>A</sup>), but that all of the more recent elements in MAFF are unique to that strain. PXO99<sup>A </sup>and KACC share the very oldest element, which has been lost in MAFF, as well as 10 additional older spacer elements in conserved order. These 10 spacers range from S6&#8211;S25 in PXO99<sup>A </sup>and S8&#8211;S30 in MAFF (intervening elements are unique in each strain), indicating that these two strains diverged after the acquisition of spacer S25/S30. MAFF, in contrast, shares no spacers more recent than S5 with either of the other two strains. This appears to contradict whole-genome phylogenetic evidence and large-scale genome structure, both of which indicate that MAFF and KACC are much closer to one another than either is to PXO99<sup>A</sup>. A likely alternative explanation, given the hypervariable nature of CRISPRs, is that MAFF lost these older spacers.</p>
         </sec>
         <sec>
            <st>
               <p>Validation of the MAFF assembly</p>
            </st>
            <p>To validate the large-scale rearrangements between strains PXO99<sup>A </sup>and MAFF, we obtained a library of 9 kb shotgun clones for MAFF and identified those clones that correspond to breakpoints shown in Figure <figr fid="F5">5</figr>. Two clones for each breakpoint were selected, except in one case where only one clone could be identified. These clones were end-sequenced and the ends compared to the MAFF genome. In addition, restriction enzyme analysis was performed for each of the shotgun clones.</p>
            <p>In all cases, the analysis of the MAFF sequences confirmed that the MAFF genome is correctly assembled. Had there been any mis-assemblies, the clones would have shown significant length polymorphisms or would have mapped to inconsistent positions on the finished sequence. This evidence further strengthens the conclusion that breakpoints in the genome alignment between MAFF and PXO99<sup>A </sup>represent genuine differences between the genomes. Because the MAFF and KACC strains have almost the same overall genome architecture, with very few rearrangements, we did not attempt separate verification of the KACC assembly.</p>
            <p>Separately, we identified 18 significant insertions and deletions between MAFF and PXO99<sup>A</sup>. We generated PCR primers to test for the presence or absence of each insertion, and amplified fragments from genomic DNA using both strains. In all cases the PCR tests verified the presence of the insertion in one strain and its absence in the other (data not shown).</p>
         </sec>
         <sec>
            <st>
               <p>Regions of lateral gene transfer</p>
            </st>
            <p>GC-content frequently used for identifying regions of a genome with unusual composition, as might result from lateral gene transfer. PXO99<sup>A </sup>has a GC-content of 63.6%, ranging from a high of 71.8% to a low of 41.6%. A more sensitive measure of unusual composition, used in many previous studies (e.g., <abbrgrp><abbr bid="B37">37</abbr></abbrgrp>) is based on trinucleotide composition. For this measure, we compute the X<sup>2 </sup>statistic to compare the trinucleotide distribution in fixed-size windows to the overall trinucleotide distribution for the genome. Regions highlighted by this statistic are either caused by lateral gene transfer or else under very strong evolutionary constraints to maintain their atypical DNA composition. A plot of the X<sup>2 </sup>statistic as well as GC-content across the genome is shown in Figure <figr fid="F7">7</figr>.</p>
            <fig id="F7">
               <title>
                  <p>Figure 7</p>
               </title>
               <caption>
                  <p>Compositional analysis of the PXO99A genome</p>
               </caption>
               <text>
                  <p><b>Compositional analysis of the PXO99A genome</b>. Analysis of genome composition in 1000 bp windows. The red plot shows a X<sup>2 </sup>analysis, in which the trinucleotide composition of each window is compared to the overall composition. The green plot shows GC content for the same windows.</p>
               </text>
               <graphic file="1471-2164-9-204-7"/>
            </fig>
            <p>The figure shows multiple regions of highly unusual composition, which we then investigated further. The largest peak in the X<sup>2 </sup>distribution, at position 918,000, is centered on a 424-aa protein (ORF04252) containing a lysin domain (often found in enzymes involved in bacterial cell wall degradation) but whose function is otherwise unknown. There is strong evidence that this gene has been laterally transferred via a bacteriophage: it is not found in any other Xanthomonads, and the closest matches are in Burkholderia, Campylobacter, and Shewanella, all very distantly related genera. Homologs in both <it>B. pseudomallei </it>K96243 <abbrgrp><abbr bid="B38">38</abbr></abbrgrp> and <it>Erythobacter litoralis </it>are annotated as acquired from bacteriophage, and a direct phage homolog occurs in Burkholderia phage phiE202. A phylogenetic tree of all homologs (data not shown) supports the conclusion that this gene was laterally transferred via a phage.</p>
            <p>The second-highest peak in Figure <figr fid="F7">7</figr> is in the midst of a broader region of unusual composition, extending from 3,540,900 to 3,571,800. This region contains a large prophage element with 41 phage-related genes, extending from ORF01364 (a phage portal protein, pbsx family) to ORF01326 (a site-specific recombinase, phage integrase family). PXO99<sup>A </sup>contains a second, smaller prophage element spanning six genes from 2366221&#8211;2371236.</p>
            <p>All 19 of the TAL effector genes show an unusual composition and correspond to peaks in Figure <figr fid="F7">7</figr>. Because the TAL effectors are adjacent to transposases, they too might have originated in another species, possibly as a single-copy gene that later expanded in number in Xoo or a progenitor. Conservation of the unusual composition in all members of the family might also reflect strong functional constraints.</p>
         </sec>
         <sec>
            <st>
               <p>Hypothetical proteins</p>
            </st>
            <p>A significant fraction of predicted genes in most bacterial genomes are annotated as hypothetical proteins. These open reading frames (ORFs) are predicted computationally, but because they lack sequence homology to other species, they cannot be assigned a name. An unknown number of these predicted genes are likely to be false predictions, and for most genomes there has been little basis for distinguishing true genes at the time of sequencing. For PXO99<sup>A</sup>, we took advantage of the related MAFF and KACC genomes to improve upon the usual set of hypothetical predicted genes. Multiple sequence alignments among several closely-related species often reveal that the ORFs of hypothetical proteins are not maintained in sister species; i.e., they contain in-frame stop codons. Although it is possible that these interrupted ORFs are functional in only one of the species, a more parsimonious explanation is simply that the original gene prediction was wrong. This strategy has been used, for example, to identify several hundred incorrectly annotated genes in <it>S. cerevisiae </it><abbrgrp><abbr bid="B39">39</abbr></abbrgrp>, using three related yeast genomes.</p>
            <p>We aligned the DNA sequences for all 1273 hypothetical proteins in PXO99<sup>A </sup>to the corresponding sequences in MAFF, KACC, <it>X. axonopodis pv. citri</it>, <it>X. campestris pv. campestris</it>, and <it>X. campestris pv. vesicatoria</it>. From these alignments, we identified all predicted PXO99<sup>A </sup>genes with premature stop codons or crippling frameshift mutations in any other species. From these data, we identified 78 ORFs with multiple lines of evidence that they did not represent true genes; these predicted genes were deleted from the annotation.</p>
         </sec>
      </sec>
      <sec>
         <st>
            <p>Discussion</p>
         </st>
         <p>Nearly 30 distinct bacterial blight resistance genes from different rice varieties and wild relatives have been identified and many have been used in breeding programs for disease control <abbrgrp><abbr bid="B7">7</abbr></abbrgrp>, but in several instances, resistance has broken down as new, virulent strains of Xoo have emerged <abbrgrp><abbr bid="B12">12</abbr><abbr bid="B40">40</abbr><abbr bid="B41">41</abbr><abbr bid="B42">42</abbr></abbrgrp>. Understanding mechanisms that account for the rapid emergence of new pathogen genotypes, and identifying Xoo genes involved in pathogenic adaptation are important goals toward developing durable disease control strategies. The complete genome sequence of strain PXO99<sup>A </sup>and its comparison to two previously sequenced strains, KACC10331 and MAFF311018, that we have presented here, provide new insights that advance these goals.</p>
         <p>Because MAFF and KACC are highly similar in genome content and organization, our comparative analysis focused largely on PXO99<sup>A </sup>and MAFF. This analysis revealed a remarkable plasticity of the Xoo genome. This plasticity is most strikingly evident in the large number of major rearrangements and indels between these strains. On a smaller scale, differences are prevalent in the inventories of TAL effector genes in PXO99<sup>A </sup>and MAFF. Also, a number of indels exist that represent genes shared by both strains but present in higher copy in PXO99<sup>A</sup>, including several IS elements. All of these differences suggest that the Xoo genome evolves rapidly. This conclusion is perhaps best supported however by the 212 kb sequence duplication in PXO99<sup>A </sup>that we discovered using a new and powerful application, the Hawkeye assembly diagnostics tool, and which we confirmed by PCR amplification of the repeat junction. The duplication represents a remarkably recent event, with only a single nucleotide difference differentiating between the two copies in PXO99<sup>A</sup>.</p>
         <p>Gene duplication contributes to gene diversification, allowing for unconstrained evolution of otherwise indispensable sequences. The abundance of duplications in PXO99<sup>A </sup>suggests that they are an important source of genomic variation for Xoo. As made clear by analysis of the 212 kb repeat, IS elements play an important role in generating duplications. And they clearly can generate other types of genome modifications as well, including rearrangements and inversions, and insertions or deletions that can lead to acquisition, modification, or loss of gene content <abbrgrp><abbr bid="B20">20</abbr></abbrgrp>. Indeed, 7 out of 10 of the major rearrangements in the PXO99<sup>A </sup>genome relative to MAFF are associated with IS elements. The presence of ISXo5 at both ends of the 38.8 kb locus containing the non-fimbrial adhesin-like genes in PXO99<sup>A</sup>, compared with its presence in single copy in place of this locus in MAFF and KACC provides a patent example of an IS mediated genome modification that resulted either in an excision (from the MAFF and KACC lineage), or an integration of DNA (in the PXO99<sup>A </sup>lineage). Our analysis highlights also an important role for phage as a source of genomic variation for Xoo. The PXO99<sup>A </sup>sequence revealed numerous differences from MAFF related to phage integration, including the presence of genes that clearly originated in distantly related organisms. Yet another template for genome modification, and a particularly interesting characteristic of the Xoo genomes, are the TAL effector genes. As virulence factors and triggers of host resistance, differences in TAL effector gene content have been associated for some time with phenotypic diversity. Comparison of MAFF and PXO99<sup>A </sup>provided clear evidence of the involvement of homologous recombination among these genes in generating differences in their structure and copy number at genomic locations that were otherwise conserved, indicating that the sequences themselves play a major role in generating that diversity.</p>
         <p>Included among the 19 TAL effector genes in PXO99<sup>A </sup>are <it>pthXo1</it>, a major virulence determinant not present in other strains <abbrgrp><abbr bid="B29">29</abbr></abbrgrp> and <it>avrXa27</it>, a cultivar specificity determinant <abbrgrp><abbr bid="B16">16</abbr></abbrgrp>. There is evidence also that the TAL effector gene <it>pthXo7 </it>is important in the virulence of PXO99<sup>A </sup>on plants containing the recessive resistance gene <it>xa5 </it><abbrgrp><abbr bid="B14">14</abbr><abbr bid="B32">32</abbr></abbrgrp>. Significantly, <it>xa5 </it>is prevalent among the Aus-Boro lines of rice, which originated in Nepal and Bangladesh, the geographical region that likely gave rise to PXO99 <abbrgrp><abbr bid="B13">13</abbr></abbrgrp>. These and other observations firmly establish a role for TAL effector genes in strain-specific adaptation. The differences in TAL effector gene content and structure between the geographically distinct strains PXO99<sup>A </sup>and MAFF further underscore this role, and the importance of understanding the diversity of TAL effector functions.</p>
         <p>The non-fimbrial adhesin-like genes <it>fhaB</it>, <it>fhaB1</it>, and <it>fhaX </it>and the transport gene <it>fhaC </it>we discovered at the 38.8 kb locus in PXO99<sup>A </sup>that is missing in MAFF and KACC are additional intriguing candidates for adaptations to certain host genotypes or environmental conditions. Homologs of <it>fhaB </it>and <it>fhaC </it>are present in a number of plant and animal pathogenic bacteria <abbrgrp><abbr bid="B43">43</abbr></abbrgrp>. MAFF and KACC encode other non-fimbrial adhesins, which are also present and highly conserved in PXO99<sup>A</sup>. Thus, it seems likely that the <it>fha </it>genes are not essential pathogenicity factors in PXO99<sup>A</sup>. However, mutational analysis might reveal a quantitative effect on virulence, or a differential effect in certain rice varieties or under different temperatures. Other proteins encoded at the locus that are of interest from the perspective of host-pathogen interactions include a putative ice nucleation protein and a putative colicin with an associated transporter protein.</p>
         <p>Complete genome sequences are available for a number of members of other <it>Xanthomonas </it>species, including <it>X. campestris </it>pv. campestris, the causal agent of black rot in crucifers, <abbrgrp><abbr bid="B44">44</abbr><abbr bid="B45">45</abbr></abbrgrp><it>X. axonopodis </it>pv. citri, which causes citrus canker, and <it>X. campestris </it>pv. vesicatoria, which is responsible for bacterial spot in tomato and pepper plants <abbrgrp><abbr bid="B46">46</abbr></abbrgrp>. Whole genome alignments revealed several inversions, indels, and rearrangements in these genomes relative to one another <abbrgrp><abbr bid="B46">46</abbr></abbrgrp>. Thus the genus as a whole shows a high degree of genomic variation. Even in this context however, the differences uncovered here in structure and content of the PXO99<sup>A </sup>versus the MAFF and KACC genomes are striking. Notably, Xoo strains contain the greatest number and diversity of IS elements of all the sequenced xanthomonads, and the size of the CRISPRs in the strains discussed here suggests a long history of interaction with phage. <it>X. oryzae </it>strains are also unusual in their abundance of TAL effector genes. None of the other sequenced <it>Xanthomonas </it>strains have more than four TAL effector genes, and some have none. Though a comprehensive survey has not been done, large numbers of TAL effector genes are only known to exist elsewhere in strains of <it>X. campestris </it>pv. malvacearum, a pathogen of another ancient and genetically diverse domesticated crop plant, cotton <abbrgrp><abbr bid="B47">47</abbr></abbrgrp>, and, curiously, in <it>Xanthomonas </it>strains that infect mango <abbrgrp><abbr bid="B48">48</abbr></abbrgrp>. It is tempting to speculate for <it>X. oryzae </it>that the diversification of its host through millennia of cultivation around the world favored an amplification of elements in the pathogen that confer genome plasticity and adaptability, including IS elements, phage, and the repeat-dominated TAL effector genes.</p>
         <p>It is interesting that in contrast to the East Asian MAFF and KACC strains, the ancestry of PXO99<sup>A </sup>is likely centered in South Asia <abbrgrp><abbr bid="B13">13</abbr></abbrgrp>, one of at least three probable sites of domestication of rice <abbrgrp><abbr bid="B6">6</abbr></abbrgrp>. As described here, PXO99<sup>A </sup>has a larger genome and a greater number of strain-specific genes than its close relatives MAFF and KACC. This greater size and complexity may be a consequence of this strain having derived from a lineage that evolved near a center of origin for its host, which would be expected to have a greater diversity of host genotypes than other locations.</p>
      </sec>
      <sec>
         <st>
            <p>Conclusion</p>
         </st>
         <p>The genome sequence of PXO99<sup>A </sup>and its comparison to those of strains MAFF and KACC provide direct evidence that the Xoo genome is highly plastic and rapidly evolving. Our analysis has revealed sources of genomic variation and identified candidates for strain-specific adaptations of this pathogen. These findings help to explain the extraordinary diversity of Xoo genotypes and races that have been isolated from around the world <abbrgrp><abbr bid="B9">9</abbr><abbr bid="B10">10</abbr><abbr bid="B12">12</abbr><abbr bid="B13">13</abbr><abbr bid="B49">49</abbr><abbr bid="B50">50</abbr><abbr bid="B51">51</abbr><abbr bid="B52">52</abbr><abbr bid="B53">53</abbr><abbr bid="B54">54</abbr><abbr bid="B55">55</abbr></abbrgrp> and even from within a particular country or region <abbrgrp><abbr bid="B51">51</abbr><abbr bid="B56">56</abbr><abbr bid="B57">57</abbr><abbr bid="B58">58</abbr><abbr bid="B59">59</abbr><abbr bid="B60">60</abbr></abbrgrp>. Our study also has highlighted particular classes of genes as important targets for functional analysis toward development of better, broader-spectrum and more durable control measures.</p>
      </sec>
      <sec>
         <st>
            <p>Methods</p>
         </st>
         <sec>
            <st>
               <p>Sequencing</p>
            </st>
            <p>Bacterial genomic DNA was randomly sheared by nebulization, end-repaired with consecutive BAL31 nuclease and T4 DNA polymerase treatments, and size-selected using gel electrophoresis on 1% low-melting-point agarose. After ligation to BstXI adapters, DNA was purified by three rounds of gel electrophoresis to remove excess adapters, and the fragments were ligated into the vector pHOS2 (a modified pBR322 vector) linearized with BstXI. The pHOS2 plasmid contains two BstXI cloning sites immediately flanked by sequencing primer binding sites. These features reduce the frequency of non-recombinant clones, and reduce the amount of vector sequences at the end of the reads. Two libraries with average insert size of 4.5 kb and 10 kb were constructed. The ligation reactions were electroporated into <it>E. coli</it>. Clones were plated onto large format (16 &#215; 16 cm) diffusion plates prepared by layering 150 ml of fresh antibiotic-free agar onto a previously set 50-ml layer of agar containing antibiotic. Colonies were picked for template preparation, inoculated into 384-well blocks containing liquid media, and incubated overnight with shaking. High-purity plasmid DNA was prepared using the DNA purification robotic workstation custom-built by Thermo CRS (Thermo Fisher Scientific, Inc.) and based on the alkaline lysis miniprep <abbrgrp><abbr bid="B61">61</abbr></abbrgrp> and isopropanol precipitation. DNA precipitate was washed with 70% ethanol, dried, and resuspended in 10 mM Tris HCl buffer containing a trace of blue dextran. The yield of plasmid DNA was approximately 600&#8211;800 ng per clone, providing sufficient DNA for at least four sequencing reactions per template. Sequencing was done using di-deoxy sequencing method <abbrgrp><abbr bid="B62">62</abbr></abbrgrp>. Two 384-well cycle-sequencing reaction plates were prepared from each plate of plasmid template DNA for opposite-end, paired-sequence reads. Sequencing reactions were completed using the Big Dye Terminator chemistry and standard M13 forward and reverse primers. Reaction mixtures, thermal cycling profiles, and electrophoresis conditions were optimized to reduce the volume of the Big Dye Terminator mix and to extend read lengths on the AB3730xl sequencers (Applied Biosystems). Sequencing reactions were set up by the Biomek FX pipetting workstations. Robots were used to aliquot and combine templates with reaction mixes consisting of deoxy- and fluorescently labeled dideoxynucleotides, DNA polymerase, sequencing primers, and reaction buffer in a 5 &#956;l volume. After 30&#8211;40 consecutive cycles of amplification, reaction products were precipitated by isopropanol, dried at room temperature, resuspended in water, and transferred to an AB3730xl sequencer. 8,700 and 52,100 high-quality reads from the 4.5 kb and 10 kb insert libraries, respectively, were generated with an average trimmed sequence read length of 821 bp and a success rate of 93%. After initial assembly, gaps were closed by primer walking on plasmid templates, sequencing genomic PCR products that spanned the gaps, and by transposon insertion and sequencing of selected 10 kb shotgun clones.</p>
         </sec>
         <sec>
            <st>
               <p>Assembly and annotation</p>
            </st>
            <p>Multiple rounds of assembly were performed, beginning with the shotgun reads and later including additional finishing reads. In the final assembly, 65,620 reads were trimmed to remove vector and low-quality sequence, and then assembled using Celera Assembler <abbrgrp><abbr bid="B63">63</abbr></abbrgrp>. The large (212 kb) tandem repeat was initially collapsed into one copy, which had twice the depth of coverage of the rest of the genome. This anomaly was detected and corrected to two copies after analysis aided by the Hawkeye assembly diagnosis software <abbrgrp><abbr bid="B23">23</abbr></abbrgrp>. Protein-coding genes were identified using Glimmer 3.0, which includes an algorithm to identify ribosome binding sites for each gene. Transcription terminators were predicted using TransTermHP <abbrgrp><abbr bid="B66">66</abbr></abbrgrp> with parameter settings expected to yield over 90% accuracy. Transfer RNAs were identified with tRNAScanSE <abbrgrp><abbr bid="B67">67</abbr></abbrgrp>. Regions with neither Glimmer predictions nor RNA genes were searched in all six frames using blastx <abbrgrp><abbr bid="B68">68</abbr></abbrgrp> to identify any missed proteins, and all annotations were manually curated as described previously <abbrgrp><abbr bid="B69">69</abbr></abbrgrp>, using the Manatee online annotation system <abbrgrp><abbr bid="B70">70</abbr></abbrgrp>. The origin and terminus of replication was determined using GC-skew analysis <abbrgrp><abbr bid="B18">18</abbr></abbrgrp>, which indicates an origin near position 50 kb and termini near 2,370 kb or 2,510 kb. The chromosome replication initiator gene dnaA, which is commonly found near the origin, is at position 45. Oligomer skew analysis <abbrgrp><abbr bid="B71">71</abbr></abbrgrp>, which identifies 8-mers preferentially located on the leading strand, indicates an origin at 4,895 kb (30 kb from the end of the genome) and a terminus at 2,381 kb, based on multiple 8-mers including CCCTGCCC and AGGACCAT. These 8-mers occur 328/376 and 218/248 times (over 87%) on the leading strand; for CCCTGCCC the likelihood that this occurred by chance is 3.6x10<sup>-45</sup>. To determine genome rearrangements, the MUMmer/Nucmer suite of genome alignment programs <abbrgrp><abbr bid="B72">72</abbr></abbrgrp> was used to align Xoo PXO99<sup>A </sup>to the MAFF and KACC strains as well as to all other <it>Xanthomonas </it>genomes.</p>
         </sec>
         <sec>
            <st>
               <p>PCR amplification of genes at the non-fimbrial adhesin encoding locus</p>
            </st>
            <p>Genomic DNA was isolated from PXO99<sup>A</sup>, BXO8, Nepal624, KACC10331 and MAFF311018 strains according to the procedure described by Leach et. al. <abbrgrp><abbr bid="B50">50</abbr></abbrgrp>. PCR was performed using a set of gene specific primers listed in Additional file <supplr sid="S2">2</supplr>.</p>
         </sec>
         <sec>
            <st>
               <p>Genome data</p>
            </st>
            <p>The PXO99<sup>A </sup>complete, annotated genome has been deposited in Genbank under accession number <ext-link ext-link-type="gen" ext-link-id="CP000967">CP000967</ext-link>. The traces have been deposited in the NCBI Trace Archive <abbrgrp><abbr bid="B73">73</abbr></abbrgrp> and the complete assembly is in the NCBI Assembly Archive <abbrgrp><abbr bid="B74">74</abbr></abbrgrp>.</p>
         </sec>
      </sec>
      <sec>
         <st>
            <p>Authors' contributions</p>
         </st>
         <p>SLS, JEL, FFW, and AJB conceived the project. SLS, PDR, and AJB coordinated and oversaw the project. SLS and PDR managed all genomic sequencing. DP and MCS did the initial assembly of the genome. DR directed the sequence finishing and gap closure activities. MCS, AMP, and ALD created the final assembly. RM was in charge of the initial, semi-automated genome annotation. MCS, CT, and SLS carried out the overall structural analysis of the genome. PBP and RVS performed the whole genome alignments for phylogenetic analysis. DK, CT, DDS, and SLS compared the gene content of PXO99<sup>A </sup>and MAFF. CT and MAVS analyzed IS elements. GA and RVS analyzed the adhesin locus. MCS, ALD, and SLS discovered and characterized the 212 kb duplication. FFW carried out the TAL effector analysis, assisted by RK and AJB. CT documented rearrangements in the PXO99<sup>A </sup>genome relative to MAFF. DDS, SLS and RK investigated the CRISPRs. SeT, AF, and HO validated the MAFF assembly. SLS identified regions of possible lateral gene transfer. DK optimized annotation of hypothetical protein genes. SeT, AF, GA, GJ, AP, PBP, RVS, HI, DFM, BS, VV, JMD, RPR, HH, ShT, SWL, PCR, RVS, MAVS, JEL, FFW, and AJB contributed to the manual annotation. SLS and AJB drafted the manuscript, assisted by PDR, SeT, GA, PBP, RVS, RK, MAVS, JEL, and FFW. All authors approved the final manuscript.</p>
      </sec>
   </bdy>
   <bm>
      <ack>
         <sec>
            <st>
               <p>Acknowledgements</p>
            </st>
            <p>We thank Nadia Fedorova, Faiza Benahmed, Kyle McAllen, and Hoda Khouri for assistance in closing gaps in the genome, and Sam Angiuoli for help with syntenic alignments. Funding for this work was provided by the U.S. Department of Agriculture-National Science Foundation Microbial Genome Sequencing Program (20043560015022 to AJB, JEL, SLS, and FFW), the National Science Foundation (MCB-0412260 to SLS), and the National Institutes of Health (R01-GM083873 to SLS).</p>
         </sec>
      </ack>
      <refgrp>
         <bibl id="B1">
            <title>
               <p>Rice Diseases</p>
            </title>
            <aug>
               <au>
                  <snm>Ou</snm>
                  <fnm>SH</fnm>
               </au>
            </aug>
            <publisher>Kew, Surrey: Commonwealth Agricultural Bureau</publisher>
            <edition>2</edition>
            <pubdate>1985</pubdate>
         </bibl>
         <bibl id="B2">
            <title>
               <p>Current status and future prospects of research on bacterial blight of rice</p>
            </title>
            <aug>
               <au>
                  <snm>Mew</snm>
                  <fnm>TW</fnm>
               </au>
            </aug>
            <source>Annu Rev Phytopathol</source>
            <pubdate>1987</pubdate>
            <volume>25</volume>
            <fpage>359</fpage>
            <lpage>382</lpage>
            <xrefbib>
               <pubid idtype="doi">10.1146/annurev.py.25.090187.002043</pubid>
            </xrefbib>
         </bibl>
         <bibl id="B3">
            <title>
               <p><it>Xanthomonas oryzae </it>pathovars: model pathogens of a model crop</p>
            </title>
            <aug>
               <au>
                  <snm>Nino-Liu</snm>
                  <fnm>DO</fnm>
               </au>
               <au>
                  <snm>Ronald</snm>
                  <fnm>PC</fnm>
               </au>
               <au>
                  <snm>Bogdanove</snm>
                  <fnm>AJ</fnm>
               </au>
            </aug>
            <source>Mol Plant Pathol</source>
            <pubdate>2006</pubdate>
            <volume>7</volume>
            <issue>5</issue>
            <fpage>303</fpage>
            <lpage>324</lpage>
            <xrefbib>
               <pubid idtype="doi">10.1111/j.1364-3703.2006.00344.x</pubid>
            </xrefbib>
         </bibl>
         <bibl id="B4">
            <title>
               <p>THE RICE GENOME: The most precious things are not jade and pearls...</p>
            </title>
            <aug>
               <au>
                  <snm>Ronald</snm>
                  <fnm>P</fnm>
               </au>
               <au>
                  <snm>Leung</snm>
                  <fnm>H</fnm>
               </au>
            </aug>
            <source>Science</source>
            <pubdate>2002</pubdate>
            <volume>296</volume>
            <issue>5565</issue>
            <fpage>58</fpage>
            <lpage>59</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1126/science.1071337</pubid>
                  <pubid idtype="pmpid" link="fulltext">11935008</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B5">
            <title>
               <p>Population biology of <it>Xanthomonas oryzae </it>pv. oryzae and approaches to its control</p>
            </title>
            <aug>
               <au>
                  <snm>Leach</snm>
                  <fnm>JE</fnm>
               </au>
               <au>
                  <snm>Leung</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Nelson</snm>
                  <fnm>RJ</fnm>
               </au>
               <au>
                  <snm>Mew</snm>
                  <fnm>TW</fnm>
               </au>
            </aug>
            <source>Curr Opin Biotechnol</source>
            <pubdate>1995</pubdate>
            <volume>6</volume>
            <issue>3</issue>
            <fpage>298</fpage>
            <lpage>304</lpage>
            <xrefbib>
               <pubid idtype="doi">10.1016/0958-1669(95)80051-4</pubid>
            </xrefbib>
         </bibl>
         <bibl id="B6">
            <title>
               <p>Origin, dispersal, cultivation and variation of rice</p>
            </title>
            <aug>
               <au>
                  <snm>Khush</snm>
                  <fnm>GS</fnm>
               </au>
            </aug>
            <source>Plant Mol Biol</source>
            <pubdate>1997</pubdate>
            <volume>35</volume>
            <issue>1&#8211;2</issue>
            <fpage>25</fpage>
            <lpage>34</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1023/A:1005810616885</pubid>
                  <pubid idtype="pmpid" link="fulltext">9291957</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B7">
            <title>
               <p>Genetics and improvement of resistance to bacterial blight in rice (in Chinese)</p>
            </title>
            <aug>
               <au>
                  <snm>Zhang</snm>
                  <fnm>Q</fnm>
               </au>
            </aug>
            <publisher>Beijing: Science Press</publisher>
            <pubdate>2007</pubdate>
         </bibl>
         <bibl id="B8">
            <title>
               <p>The hosts of <it>Xanthomonas</it></p>
            </title>
            <aug>
               <au>
                  <snm>Hayward</snm>
                  <fnm>AC</fnm>
               </au>
            </aug>
            <source>Xanthomonas</source>
            <publisher>London: Chapman and Hall</publisher>
            <editor>Swings JG, Civerolo EL</editor>
            <pubdate>1993</pubdate>
            <fpage>1</fpage>
            <lpage>119</lpage>
         </bibl>
         <bibl id="B9">
            <title>
               <p>Genome sequence of <it>Xanthomonas oryzae </it>pv. <it>oryzae </it>suggests contribution of large numbers of effector genes and insertion sequences to its race diversity</p>
            </title>
            <aug>
               <au>
                  <snm>Ochiai</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Inoue</snm>
                  <fnm>Y</fnm>
               </au>
               <au>
                  <snm>Takeya</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Sasaki</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Kaku</snm>
                  <fnm>H</fnm>
               </au>
            </aug>
            <source>Jpn Agric Res Q</source>
            <pubdate>2005</pubdate>
            <volume>39</volume>
            <issue>4</issue>
            <fpage>275</fpage>
            <lpage>287</lpage>
         </bibl>
         <bibl id="B10">
            <title>
               <p>The genome sequence of <it>Xanthomonas oryzae </it>pathovar oryzae KACC10331 the bacterial blight pathogen of rice</p>
            </title>
            <aug>
               <au>
                  <snm>Lee</snm>
                  <fnm>BM</fnm>
               </au>
               <au>
                  <snm>Park</snm>
                  <fnm>YJ</fnm>
               </au>
               <au>
                  <snm>Park</snm>
                  <fnm>DS</fnm>
               </au>
               <au>
                  <snm>Kang</snm>
                  <fnm>HW</fnm>
               </au>
               <au>
                  <snm>Kim</snm>
                  <fnm>JG</fnm>
               </au>
               <au>
                  <snm>Song</snm>
                  <fnm>ES</fnm>
               </au>
               <au>
                  <snm>Park</snm>
                  <fnm>IC</fnm>
               </au>
               <au>
                  <snm>Yoon</snm>
                  <fnm>UH</fnm>
               </au>
               <au>
                  <snm>Hahn</snm>
                  <fnm>JH</fnm>
               </au>
               <au>
                  <snm>Koo</snm>
                  <fnm>BS</fnm>
               </au>
               <etal/>
            </aug>
            <source>Nucleic Acids Res</source>
            <pubdate>2005</pubdate>
            <volume>33</volume>
            <issue>2</issue>
            <fpage>577</fpage>
            <lpage>586</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">548351</pubid>
                  <pubid idtype="pmpid" link="fulltext">15673718</pubid>
                  <pubid idtype="doi">10.1093/nar/gki206</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B11">
            <title>
               <p>HecA, a member of a class of adhesins produced by diverse pathogenic bacteria, contributes to the attachment, aggregation, epidermal cell killing, and virulence phenotypes of <it>Erwinia chrysanthemi </it>EC16 on <it>Nicotiana clevelandii </it>seedlings</p>
            </title>
            <aug>
               <au>
                  <snm>Rojas</snm>
                  <fnm>CM</fnm>
               </au>
               <au>
                  <snm>Ham</snm>
                  <fnm>JH</fnm>
               </au>
               <au>
                  <snm>Deng</snm>
                  <fnm>WL</fnm>
               </au>
               <au>
                  <snm>Doyle</snm>
                  <fnm>JJ</fnm>
               </au>
               <au>
                  <snm>Collmer</snm>
                  <fnm>A</fnm>
               </au>
            </aug>
            <source>Proc Natl Acad Sci USA</source>
            <pubdate>2002</pubdate>
            <volume>99</volume>
            <issue>20</issue>
            <fpage>13142</fpage>
            <lpage>13147</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">130600</pubid>
                  <pubid idtype="pmpid" link="fulltext">12271135</pubid>
                  <pubid idtype="doi">10.1073/pnas.202358699</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B12">
            <title>
               <p>Changes in race frequency of <it>Xanthomonas oryzae </it>pv. <it>oryzae </it>in response to rice cultivars planted in the Philippines</p>
            </title>
            <aug>
               <au>
                  <snm>Mew</snm>
                  <fnm>TW</fnm>
               </au>
               <au>
                  <snm>Vera Cruz</snm>
                  <fnm>CM</fnm>
               </au>
               <au>
                  <snm>Medalla</snm>
                  <fnm>ES</fnm>
               </au>
            </aug>
            <source>Plant Dis</source>
            <pubdate>1992</pubdate>
            <volume>76</volume>
            <fpage>1029</fpage>
            <lpage>1032</lpage>
         </bibl>
         <bibl id="B13">
            <title>
               <p>Genetic diversity of <it>Xanthomonas oryzae </it>pv. oryzae in Asia</p>
            </title>
            <aug>
               <au>
                  <snm>Adhikari</snm>
                  <fnm>TB</fnm>
               </au>
               <au>
                  <snm>Cruz</snm>
                  <fnm>CMV</fnm>
               </au>
               <au>
                  <snm>Zhang</snm>
                  <fnm>Q</fnm>
               </au>
               <au>
                  <snm>Nelson</snm>
                  <fnm>RJ</fnm>
               </au>
               <au>
                  <snm>Skinner</snm>
                  <fnm>DZ</fnm>
               </au>
               <au>
                  <snm>Mew</snm>
                  <fnm>TW</fnm>
               </au>
               <au>
                  <snm>Leach</snm>
                  <fnm>JE</fnm>
               </au>
               <au>
                  <snm>Vera Cruz</snm>
                  <fnm>CM</fnm>
               </au>
            </aug>
            <source>Appl Environ Microbiol</source>
            <pubdate>1995</pubdate>
            <volume>61</volume>
            <issue>3</issue>
            <fpage>966</fpage>
            <lpage>971</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">1388379</pubid>
                  <pubid idtype="pmpid" link="fulltext">16534980</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B14">
            <title>
               <p>The rice bacterial blight resistance gene <it>xa5 </it>encodes a novel form of disease resistance</p>
            </title>
            <aug>
               <au>
                  <snm>Iyer</snm>
                  <fnm>AS</fnm>
               </au>
               <au>
                  <snm>McCouch</snm>
                  <fnm>SR</fnm>
               </au>
            </aug>
            <source>Mol Plant Microbe Interact</source>
            <pubdate>2004</pubdate>
            <volume>17</volume>
            <issue>12</issue>
            <fpage>1348</fpage>
            <lpage>1354</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1094/MPMI.2004.17.12.1348</pubid>
                  <pubid idtype="pmpid" link="fulltext">15597740</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B15">
            <title>
               <p>Promoter mutations of an essential gene for pollen development result in disease resistance in rice</p>
            </title>
            <aug>
               <au>
                  <snm>Chu</snm>
                  <fnm>Z</fnm>
               </au>
               <au>
                  <snm>Yuan</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Yao</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Ge</snm>
                  <fnm>X</fnm>
               </au>
               <au>
                  <snm>Yuan</snm>
                  <fnm>B</fnm>
               </au>
               <au>
                  <snm>Xu</snm>
                  <fnm>C</fnm>
               </au>
               <au>
                  <snm>Li</snm>
                  <fnm>X</fnm>
               </au>
               <au>
                  <snm>Fu</snm>
                  <fnm>B</fnm>
               </au>
               <au>
                  <snm>Li</snm>
                  <fnm>Z</fnm>
               </au>
               <au>
                  <snm>Bennetzen</snm>
                  <fnm>JL</fnm>
               </au>
               <etal/>
            </aug>
            <source>Genes &amp; development</source>
            <pubdate>2006</pubdate>
            <volume>20</volume>
            <issue>10</issue>
            <fpage>1250</fpage>
            <lpage>1255</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">1472899</pubid>
                  <pubid idtype="pmpid" link="fulltext">16648463</pubid>
                  <pubid idtype="doi">10.1101/gad.1416306</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B16">
            <title>
               <p><it>R </it>gene expression induced by a type-III effector triggers disease resistance in rice</p>
            </title>
            <aug>
               <au>
                  <snm>Gu</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>Yang</snm>
                  <fnm>B</fnm>
               </au>
               <au>
                  <snm>Tian</snm>
                  <fnm>D</fnm>
               </au>
               <au>
                  <snm>Wu</snm>
                  <fnm>L</fnm>
               </au>
               <au>
                  <snm>Wang</snm>
                  <fnm>D</fnm>
               </au>
               <au>
                  <snm>Sreekala</snm>
                  <fnm>C</fnm>
               </au>
               <au>
                  <snm>Yang</snm>
                  <fnm>F</fnm>
               </au>
               <au>
                  <snm>Chu</snm>
                  <fnm>Z</fnm>
               </au>
               <au>
                  <snm>Wang</snm>
                  <fnm>G-L</fnm>
               </au>
               <au>
                  <snm>White</snm>
                  <fnm>FF</fnm>
               </au>
               <etal/>
            </aug>
            <source>Nature</source>
            <pubdate>2005</pubdate>
            <volume>435</volume>
            <issue>7045</issue>
            <fpage>1122</fpage>
            <lpage>1125</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1038/nature03630</pubid>
                  <pubid idtype="pmpid" link="fulltext">15973413</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B17">
            <title>
               <p>A receptor kinase-like protein encoded by the rice disease resistance gene, Xa21</p>
            </title>
            <aug>
               <au>
                  <snm>Song</snm>
                  <fnm>WY</fnm>
               </au>
               <au>
                  <snm>Wang</snm>
                  <fnm>GL</fnm>
               </au>
               <au>
                  <snm>Chen</snm>
                  <fnm>LL</fnm>
               </au>
               <au>
                  <snm>Kim</snm>
                  <fnm>HS</fnm>
               </au>
               <au>
                  <snm>Pi</snm>
                  <fnm>LY</fnm>
               </au>
               <au>
                  <snm>Holsten</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Gardner</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Wang</snm>
                  <fnm>B</fnm>
               </au>
               <au>
                  <snm>Zhai</snm>
                  <fnm>WX</fnm>
               </au>
               <au>
                  <snm>Zhu</snm>
                  <fnm>LH</fnm>
               </au>
               <etal/>
            </aug>
            <source>Science</source>
            <pubdate>1995</pubdate>
            <volume>270</volume>
            <issue>5243</issue>
            <fpage>1804</fpage>
            <lpage>1806</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1126/science.270.5243.1804</pubid>
                  <pubid idtype="pmpid" link="fulltext">8525370</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B18">
            <title>
               <p>Asymmetric substitution patterns in the two DNA strands of bacteria</p>
            </title>
            <aug>
               <au>
                  <snm>Lobry</snm>
                  <fnm>JR</fnm>
               </au>
            </aug>
            <source>Mol Biol Evol</source>
            <pubdate>1996</pubdate>
            <volume>13</volume>
            <issue>5</issue>
            <fpage>660</fpage>
            <lpage>665</lpage>
            <xrefbib>
               <pubid idtype="pmpid" link="fulltext">8676740</pubid>
            </xrefbib>
         </bibl>
         <bibl id="B19">
            <title>
               <p>Mauve: multiple alignment of conserved genomic sequence with rearrangements</p>
            </title>
            <aug>
               <au>
                  <snm>Darling</snm>
                  <fnm>AC</fnm>
               </au>
               <au>
                  <snm>Mau</snm>
                  <fnm>B</fnm>
               </au>
               <au>
                  <snm>Blattner</snm>
                  <fnm>FR</fnm>
               </au>
               <au>
                  <snm>Perna</snm>
                  <fnm>NT</fnm>
               </au>
            </aug>
            <source>Genome Res</source>
            <pubdate>2004</pubdate>
            <volume>14</volume>
            <issue>7</issue>
            <fpage>1394</fpage>
            <lpage>1403</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">442156</pubid>
                  <pubid idtype="pmpid" link="fulltext">15231754</pubid>
                  <pubid idtype="doi">10.1101/gr.2289704</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B20">
            <title>
               <p><it>Xylella </it>and <it>Xanthomonas </it>Mobil'omics</p>
            </title>
            <aug>
               <au>
                  <snm>Monteiro-Vitorello</snm>
                  <fnm>CB</fnm>
               </au>
               <au>
                  <snm>de Oliveira</snm>
                  <fnm>MC</fnm>
               </au>
               <au>
                  <snm>Zerillo</snm>
                  <fnm>MM</fnm>
               </au>
               <au>
                  <snm>Varani</snm>
                  <fnm>AM</fnm>
               </au>
               <au>
                  <snm>Civerolo</snm>
                  <fnm>E</fnm>
               </au>
               <au>
                  <snm>Van Sluys</snm>
                  <fnm>MA</fnm>
               </au>
            </aug>
            <source>Omics</source>
            <pubdate>2005</pubdate>
            <volume>9</volume>
            <issue>2</issue>
            <fpage>146</fpage>
            <lpage>159</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1089/omi.2005.9.146</pubid>
                  <pubid idtype="pmpid" link="fulltext">15969647</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B21">
            <title>
               <p>ISfinder: the reference centre for bacterial insertion sequences</p>
            </title>
            <aug>
               <au>
                  <snm>Siguier</snm>
                  <fnm>P</fnm>
               </au>
               <au>
                  <snm>Perochon</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Lestrade</snm>
                  <fnm>L</fnm>
               </au>
               <au>
                  <snm>Mahillon</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Chandler</snm>
                  <fnm>M</fnm>
               </au>
            </aug>
            <source>Nucleic Acids Res</source>
            <pubdate>2006</pubdate>
            <issue>34 Database</issue>
            <fpage>D32</fpage>
            <lpage>36</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">1347377</pubid>
                  <pubid idtype="pmpid" link="fulltext">16381877</pubid>
                  <pubid idtype="doi">10.1093/nar/gkj014</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B22">
            <title>
               <p>Genotypic and pathotypic diversity in <it>Xanthomonas oryzae </it>pv. oryzae in Nepal</p>
            </title>
            <aug>
               <au>
                  <snm>Adhikari</snm>
                  <fnm>TB</fnm>
               </au>
               <au>
                  <snm>Mew</snm>
                  <fnm>TW</fnm>
               </au>
               <au>
                  <snm>Leach</snm>
                  <fnm>JE</fnm>
               </au>
            </aug>
            <source>Phytopathology</source>
            <pubdate>1999</pubdate>
            <volume>89</volume>
            <issue>8</issue>
            <fpage>687</fpage>
            <lpage>694</lpage>
            <xrefbib>
               <pubid idtype="doi">10.1094/PHYTO.1999.89.8.687</pubid>
            </xrefbib>
         </bibl>
         <bibl id="B23">
            <title>
               <p>Hawkeye: an interactive visual analytics tool for genome assemblies</p>
            </title>
            <aug>
               <au>
                  <snm>Schatz</snm>
                  <fnm>MC</fnm>
               </au>
               <au>
                  <snm>Phillippy</snm>
                  <fnm>AM</fnm>
               </au>
               <au>
                  <snm>Shneiderman</snm>
                  <fnm>B</fnm>
               </au>
               <au>
                  <snm>Salzberg</snm>
                  <fnm>SL</fnm>
               </au>
            </aug>
            <source>Genome Biol</source>
            <pubdate>2007</pubdate>
            <volume>8</volume>
            <issue>3</issue>
            <fpage>R34</fpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">1868940</pubid>
                  <pubid idtype="pmpid" link="fulltext">17349036</pubid>
                  <pubid idtype="doi">10.1186/gb-2007-8-3-r34</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B24">
            <title>
               <p>Genetic and structural characterization of the avirulence gene <it>avrBs3 </it>from <it>Xanthomonas campestris </it>pv. vesicatoria</p>
            </title>
            <aug>
               <au>
                  <snm>Bonas</snm>
                  <fnm>U</fnm>
               </au>
               <au>
                  <snm>Stall</snm>
                  <fnm>RE</fnm>
               </au>
               <au>
                  <snm>Staskawicz</snm>
                  <fnm>B</fnm>
               </au>
            </aug>
            <source>Mol Gen Genet</source>
            <pubdate>1989</pubdate>
            <volume>218</volume>
            <issue>1</issue>
            <fpage>127</fpage>
            <lpage>136</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1007/BF00330575</pubid>
                  <pubid idtype="pmpid">2550761</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B25">
            <title>
               <p>A pathogenicity locus from <it>Xanthomonas citri </it>enables strains from several pathovars of <it>X. campestris </it>to elicit canker-like lesions on citrus</p>
            </title>
            <aug>
               <au>
                  <snm>Swarup</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>De Feyter</snm>
                  <fnm>RD</fnm>
               </au>
               <au>
                  <snm>Brlansky</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>Gabriel</snm>
                  <fnm>DW</fnm>
               </au>
            </aug>
            <source>Phytopathology</source>
            <pubdate>1991</pubdate>
            <volume>81</volume>
            <fpage>802</fpage>
            <lpage>809</lpage>
            <xrefbib>
               <pubid idtype="doi">10.1094/Phyto-81-802</pubid>
            </xrefbib>
         </bibl>
         <bibl id="B26">
            <title>
               <p>Identification of a family of avirulence genes from <it>Xanthomonas oryzae </it>pv. <it>oryzae</it></p>
            </title>
            <aug>
               <au>
                  <snm>Hopkins</snm>
                  <fnm>CM</fnm>
               </au>
               <au>
                  <snm>White</snm>
                  <fnm>FF</fnm>
               </au>
               <au>
                  <snm>Choi</snm>
                  <fnm>SH</fnm>
               </au>
               <au>
                  <snm>Guo</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Leach</snm>
                  <fnm>JE</fnm>
               </au>
            </aug>
            <source>Mol Plant Microbe Interact</source>
            <pubdate>1992</pubdate>
            <volume>5</volume>
            <issue>6</issue>
            <fpage>451</fpage>
            <lpage>459</lpage>
            <xrefbib>
               <pubid idtype="pmpid">1335800</pubid>
            </xrefbib>
         </bibl>
         <bibl id="B27">
            <title>
               <p>Gene-for-gene-mediated recognition of nuclear-targeted AvrBs3-like bacterial effector proteins</p>
            </title>
            <aug>
               <au>
                  <snm>Schornack</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Meyer</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Romer</snm>
                  <fnm>P</fnm>
               </au>
               <au>
                  <snm>Jordan</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Lahaye</snm>
                  <fnm>T</fnm>
               </au>
            </aug>
            <source>J Plant Physiol</source>
            <pubdate>2006</pubdate>
            <volume>163</volume>
            <issue>3</issue>
            <fpage>256</fpage>
            <lpage>272</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1016/j.jplph.2005.12.001</pubid>
                  <pubid idtype="pmpid" link="fulltext">16403589</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B28">
            <title>
               <p>Characterization of AvrBs3-like effectors from a Brassicaceae pathogen reveals virulence and avirulence activities and a protein with a novel repeat architecture</p>
            </title>
            <aug>
               <au>
                  <snm>Kay</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Boch</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Bonas</snm>
                  <fnm>U</fnm>
               </au>
            </aug>
            <source>Mol Plant Microbe Interact</source>
            <pubdate>2005</pubdate>
            <volume>18</volume>
            <issue>8</issue>
            <fpage>838</fpage>
            <lpage>848</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1094/MPMI-18-0838</pubid>
                  <pubid idtype="pmpid" link="fulltext">16134896</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B29">
            <title>
               <p><it>Os8N3 </it>is a host disease-susceptibility gene for bacterial blight of rice</p>
            </title>
            <aug>
               <au>
                  <snm>Yang</snm>
                  <fnm>B</fnm>
               </au>
               <au>
                  <snm>Sugio</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>White</snm>
                  <fnm>FF</fnm>
               </au>
            </aug>
            <source>Proc Natl Acad Sci USA</source>
            <pubdate>2006</pubdate>
            <volume>103</volume>
            <issue>27</issue>
            <fpage>10503</fpage>
            <lpage>10508</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">1502487</pubid>
                  <pubid idtype="pmpid" link="fulltext">16798873</pubid>
                  <pubid idtype="doi">10.1073/pnas.0604088103</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B30">
            <title>
               <p>Diverse members of the AvrBs3/PthA family of type III effectors are major virulence determinants in bacterial blight disease of rice</p>
            </title>
            <aug>
               <au>
                  <snm>Yang</snm>
                  <fnm>B</fnm>
               </au>
               <au>
                  <snm>White</snm>
                  <fnm>FF</fnm>
               </au>
            </aug>
            <source>Mol Plant Microbe Interact</source>
            <pubdate>2004</pubdate>
            <volume>17</volume>
            <issue>11</issue>
            <fpage>1192</fpage>
            <lpage>1200</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1094/MPMI.2004.17.11.1192</pubid>
                  <pubid idtype="pmpid" link="fulltext">15553245</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B31">
            <title>
               <p>High-resolution genetic mapping of <it>Xa27(t)</it>, a new bacterial blight resistance gene in rice, <it>Oryza sativa </it>L</p>
            </title>
            <aug>
               <au>
                  <snm>Gu</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>Tian</snm>
                  <fnm>D</fnm>
               </au>
               <au>
                  <snm>Yang</snm>
                  <fnm>F</fnm>
               </au>
               <au>
                  <snm>Wu</snm>
                  <fnm>L</fnm>
               </au>
               <au>
                  <snm>Sreekala</snm>
                  <fnm>C</fnm>
               </au>
               <au>
                  <snm>Wang</snm>
                  <fnm>D</fnm>
               </au>
               <au>
                  <snm>Wang</snm>
                  <fnm>GL</fnm>
               </au>
               <au>
                  <snm>Yin</snm>
                  <fnm>Z</fnm>
               </au>
            </aug>
            <source>Theor Appl Genet</source>
            <pubdate>2004</pubdate>
            <volume>108</volume>
            <issue>5</issue>
            <fpage>800</fpage>
            <lpage>807</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtyp