<?xml version='1.0'?>
<!DOCTYPE art SYSTEM 'http://www.biomedcentral.com/xml/article.dtd'>
<art>
   <ui>1471-2105-11-318</ui>
   <ji>1471-2105</ji>
   <fm>
      <dochead>Software</dochead>
      <bibl>
         <title>
            <p>CONAN: copy number variation analysis software for genome-wide association studies</p>
         </title>
         <aug>
            <au id="A1">
               <snm>Forer</snm>
               <fnm>Lukas</fnm>
               <insr iid="I1"/>
               <email>lukas.forer@i-med.ac.at</email>
            </au>
            <au id="A2">
               <snm>Sch&#246;nherr</snm>
               <fnm>Sebastian</fnm>
               <insr iid="I1"/>
               <insr iid="I2"/>
               <email>Sebastian.schoenherr@uibk.ac.at</email>
            </au>
            <au id="A3">
               <snm>Weissensteiner</snm>
               <fnm>Hansi</fnm>
               <insr iid="I1"/>
               <email>Hansi.weissensteiner@i-med.ac.at</email>
            </au>
            <au id="A4">
               <snm>Haider</snm>
               <fnm>Florian</fnm>
               <insr iid="I1"/>
               <email>Florian.haider@i-med.ac.at</email>
            </au>
            <au id="A5">
               <snm>Kluckner</snm>
               <fnm>Thomas</fnm>
               <insr iid="I1"/>
               <email>Thomas.kluckner@i-med.ac.at</email>
            </au>
            <au id="A6">
               <snm>Gieger</snm>
               <fnm>Christian</fnm>
               <insr iid="I3"/>
               <email>Christian.gieger@helmholtz-muenchen.de</email>
            </au>
            <au id="A7">
               <snm>Wichmann</snm>
               <fnm>Heinz-Erich</fnm>
               <insr iid="I3"/>
               <insr iid="I4"/>
               <insr iid="I5"/>
               <email>wichmann@helmholtz-muenchen.de</email>
            </au>
            <au id="A8">
               <snm>Specht</snm>
               <fnm>G&#252;nther</fnm>
               <insr iid="I2"/>
               <email>Guenther.specht@uibk.ac.at</email>
            </au>
            <au id="A9">
               <snm>Kronenberg</snm>
               <fnm>Florian</fnm>
               <insr iid="I1"/>
               <email>Florian.kronenberg@i-med.ac.at</email>
            </au>
            <au ca="yes" id="A10">
               <snm>Kloss-Brandst&#228;tter</snm>
               <fnm>Anita</fnm>
               <insr iid="I1"/>
               <email>Anita.brandstaetter@i-med.ac.at</email>
            </au>
         </aug>
         <insg>
            <ins id="I1">
               <p>Division of Genetic Epidemiology, Department of Medical Genetics, Molecular and Clinical Pharmacology, Innsbruck Medical University, 6020 Innsbruck, Austria</p>
            </ins>
            <ins id="I2">
               <p>Department of Database and Information Systems, Institute of Computer Science, University of Innsbruck, 6020 Innsbruck, Austria</p>
            </ins>
            <ins id="I3">
               <p>Institute of Epidemiology, Helmholtz Center Munich, German Research Center for Environmental Health, 85764 Neuherberg, Germany</p>
            </ins>
            <ins id="I4">
               <p>Institute of Medical Informatics, Biometry and Epidemiology, Chair of Epidemiology, Ludwig-Maximilians-Universit&#228;t, 80539 Munich, Germany</p>
            </ins>
            <ins id="I5">
               <p>Klinikum Gro&#223;hadern, 80337 Munich, Germany</p>
            </ins>
         </insg>
         <source>BMC Bioinformatics</source>
         <issn>1471-2105</issn>
         <pubdate>2010</pubdate>
         <volume>11</volume>
         <issue>1</issue>
         <fpage>318</fpage>
         <url>http://www.biomedcentral.com/1471-2105/11/318</url>
         <xrefbib>
            <pubidlist>
               <pubid idtype="pmpid">20546565</pubid>
               <pubid idtype="doi">10.1186/1471-2105-11-318</pubid>
            </pubidlist>
         </xrefbib>
      </bibl>
      <history>
         <rec>
            <date>
               <day>19</day>
               <month>3</month>
               <year>2010</year>
            </date>
         </rec>
         <acc>
            <date>
               <day>14</day>
               <month>6</month>
               <year>2010</year>
            </date>
         </acc>
         <pub>
            <date>
               <day>14</day>
               <month>6</month>
               <year>2010</year>
            </date>
         </pub>
      </history>
      <cpyrt>
         <year>2010</year>
         <collab>Forer et al; licensee BioMed Central Ltd.</collab>
         <note>This is an Open Access article distributed under the terms of the Creative Commons Attribution License (<url>http://creativecommons.org/licenses/by/2.0</url>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.</note>
      </cpyrt>
      <abs>
         <sec>
            <st>
               <p>Abstract</p>
            </st>
            <sec>
               <st>
                  <p>Background</p>
               </st>
               <p>Genome-wide association studies (GWAS) based on single nucleotide polymorphisms (SNPs) revolutionized our perception of the genetic regulation of complex traits and diseases. Copy number variations (CNVs) promise to shed additional light on the genetic basis of monogenic as well as complex diseases and phenotypes. Indeed, the number of detected associations between CNVs and certain phenotypes are constantly increasing. However, while several software packages support the determination of CNVs from SNP chip data, the downstream statistical inference of CNV-phenotype associations is still subject to complicated and inefficient in-house solutions, thus strongly limiting the performance of GWAS based on CNVs.</p>
            </sec>
            <sec>
               <st>
                  <p>Results</p>
               </st>
               <p>CONAN is a freely available client-server software solution which provides an intuitive graphical user interface for categorizing, analyzing and associating CNVs with phenotypes. Moreover, CONAN assists the evaluation process by visualizing detected associations via Manhattan plots in order to enable a rapid identification of genome-wide significant CNV regions. Various file formats including the information on CNVs in population samples are supported as input data.</p>
            </sec>
            <sec>
               <st>
                  <p>Conclusions</p>
               </st>
               <p>CONAN facilitates the performance of GWAS based on CNVs and the visual analysis of calculated results. CONAN provides a rapid, valid and straightforward software solution to identify genetic variation underlying the 'missing' heritability for complex traits that remains unexplained by recent GWAS. The freely available software can be downloaded at <url>http://genepi-conan.i-med.ac.at</url>.</p>
            </sec>
         </sec>
      </abs>
   </fm>
   <meta>
      <classifications>
         <classification id="refman" subtype="user_supplied_xml" type="bmc"/>
      </classifications>
   </meta>
   <bdy>
      <sec>
         <st>
            <p>Background</p>
         </st>
         <p>Genome-wide association studies (GWAS) have identified associations between various phenotypes and common sequence polymorphisms, which might play a role for disease development (for a comprehensive overview see <abbrgrp><abbr bid="B1">1</abbr></abbrgrp>). For most common diseases, these discoveries collectively explain only a modest fraction (1-15%) of heritable variation of the phenotype <abbrgrp><abbr bid="B2">2</abbr></abbrgrp>. Recently, genome re-sequencing studies demonstrated that most bases that vary among human genomes reside in copy number variations (CNVs) <abbrgrp><abbr bid="B3">3</abbr></abbrgrp>. CNVs are genomic segments which are duplicated or deleted among different individuals, ranging from kilobases to several megabases in length <abbrgrp><abbr bid="B4">4</abbr></abbrgrp>. Although at least 20% of the genome was found to be copy number variable, this class of variation is, nonetheless, poorly integrated into human genetic studies. However, part of the heritability void left by GWAS could be accounted for common CNVs. Indeed, several CNVs were recently described to be associated with complex traits: a 20-kb deletion upstream of the IRGM gene with Crohn's disease <abbrgrp><abbr bid="B5">5</abbr></abbrgrp>, a 45-kb deletion upstream of NEGR1 with body mass index <abbrgrp><abbr bid="B6">6</abbr></abbrgrp>, a 32-kb deletion with psoriasis <abbrgrp><abbr bid="B7">7</abbr><abbr bid="B8">8</abbr></abbrgrp>, and a 117-kb deletion of UGT2B17 with osteoporosis <abbrgrp><abbr bid="B9">9</abbr></abbrgrp>. Consequently, the next logical step is to perform GWAS based on CNVs.</p>
         <p>Available computer programs like Birdsuite <abbrgrp><abbr bid="B10">10</abbr></abbrgrp>, QuantiSNP <abbrgrp><abbr bid="B11">11</abbr></abbrgrp>, PennCNV <abbrgrp><abbr bid="B12">12</abbr></abbrgrp>, COKGEN <abbrgrp><abbr bid="B13">13</abbr></abbrgrp>, CNV Workshop <abbrgrp><abbr bid="B14">14</abbr></abbrgrp> or the Affymetrix Genotyping Console allow the determination of CNVs from SNP array data. Furthermore, software systems exist for the management of genotypes, phenotypes and other subject-related information <abbrgrp><abbr bid="B15">15</abbr><abbr bid="B16">16</abbr></abbrgrp>. Unfortunately, those tools are limited either for the calculation of CNVs or the storage of phenotypes and provide no functionality to perform genome-wide association studies based on CNVs. So far, GWAS based on CNVS have used either commercial software solutions like Helixtree (Golden Helix, Inc.), Array Studio (Omicsoft), open source software like PLINK <abbrgrp><abbr bid="B17">17</abbr></abbrgrp> or self created R scripts for the determination of genome-wide regions of interest and for the performance of statistical analysis, especially general linear regression models. The use of different software tools for each step requires additional efforts for appropriate data conversion and slows down the process as entity. For biologists without expertise in computer science or statistics these analyses turn out to be very difficult.</p>
         <p>We present CONAN (<b>Co</b>py <b>N</b>umber Variation <b>An</b>alysis Tool), a freely available software package to support scientists by GWAS based on CNVs. It was developed with the goal of creating a user-friendly, intuitive and fast software tool which covers the whole analysis process of association studies based on CNVs. To use it in real-life scenarios, a variety of de facto standard data formats are supported (Affymetrix Genotyping Console and Microsoft Excel) and all implemented algorithms are scalable and fast enough for typical problem sizes. Moreover, visual analytical methods assist the user to get a fast overview of the results.</p>
      </sec>
      <sec>
         <st>
            <p>Implementation</p>
         </st>
         <p>The CONAN software package consists of a client application and several database packages. The client application was implemented in Java <url>http://www.java.com</url>. It was successfully tested on Windows and Linux operating systems with about 1650 subjects and millions of CNVs. A user-friendly graphical interface was designed using the open source widget toolkit SWT (Standard Widget Toolkit). For wizards and progress monitor dialogs we used JFace <url>http://wiki.eclipse.org/index.php/JFace</url>. The complete application was programmed in a strictly object-oriented way using JFace's Action Framework and is based on the Model-View-Controller Pattern. Libraries such as JExcelApi and opencsv were used to enable the import of phenotypes and CNVs from a variety of different data formats. All needed Java libraries are included in the software package and need no additional installation.</p>
         <p>The users can upload their CNVs, phenotypes and genotypes directly through the client application to the server. All imported and calculated data are stored in a relational database (Oracle Database 10gR2). In order to avoid unnecessary data transfer between the client workstation and the database server, all time and data intensive analysis methods used by CONAN are executed on the database server itself. This leads to a markedly faster generation of results compared to traditional approaches where the application requests data, processes it locally and uploads the results (Figure <figr fid="F1">1</figr>). All algorithms are implemented in PL/SQL (Procedural Language/Structured Query Language) as stored procedures and are organized in several packages. The Java client uses Oracle's JDBC (Java Database Connectivity) driver to establish the connection to the database server and to invoke the stored procedures.</p>
         <fig id="F1">
            <title>
               <p>Figure 1</p>
            </title>
            <caption>
               <p>Software Architecture</p>
            </caption>
            <text>
               <p><b>Software Architecture</b>. (A) In traditional approaches the application requests data, processes it locally and uploads the results. Thus, additional amount of data transfer reduce the performance. (B) The two-tier architecture of CONAN outsources all data intensive algorithms on the database server. The client invokes stored procedures to execute them on the database server itself; thus no upload of data is required and the client retrieves only the informative results.</p>
            </text>
            <graphic file="1471-2105-11-318-1"/>
         </fig>
         <sec>
            <st>
               <p>CNVR detection</p>
            </st>
            <p>CNV regions (CNVRs) are defined as the union of all overlapping CNVs across subjects. As these regions are very long and therefore inadequate for the analysis, we divided them (based on the rules defined in <abbrgrp><abbr bid="B18">18</abbr></abbrgrp>) into several sub-CNVRs (Figure <figr fid="F2">2A</figr>). The frequency of a sub-CNVR is defined as the percentage of subjects which have a CNV inside the boundaries. Only those with a frequency higher than the user-defined threshold are selected and saved in the database (Figure <figr fid="F2">2B</figr>).</p>
            <fig id="F2">
               <title>
                  <p>Figure 2</p>
               </title>
               <caption>
                  <p>CNVR Determination</p>
               </caption>
               <text>
                  <p><b>CNVR Determination</b>. (A) The boundaries of a sub-CNVR are determined using the start and end SNP of each CNV. (B) The basic algorithm designates a sub-CNVR as a CNVR if its frequency is greater than the threshold. (C) The extended algorithm merges consecutive sub-CNVRs and builds a single one on their basis.</p>
               </text>
               <graphic file="1471-2105-11-318-2" hint_layout="single"/>
            </fig>
            <p>More precisely, our CNVR algorithm performs the following steps to detect sub-CNVRs with a frequency greater than the threshold:</p>
            <p indent="1">1. A list is created that contains only SNPs from all study individuals on a specific chromosome that define the borders of individual CNVs; upstream SNPs are designated as "starting" SNPs "S", downstream SNPs are designated as "ending" SNPs "E"</p>
            <p indent="1">2. The list is sorted by the physical position of those SNPs (note: if several individuals have a CNV with the same starting or ending SNP, this SNP is listed for each individual separately; thus, the same SNP could be listed several times, sometimes as starting SNP, sometimes as ending SNP)</p>
            <p indent="1">3. A counter is initiated which increments on each CNV-starting SNP and decrements on each CNV-ending SNP.</p>
            <p indent="1">4. When two consecutive SNPs within this sorted list have different (ascending) physical positions, a next sub-CNVR could begin or previous sub-CNVR would end. The frequency of this potential sub-CNVR is determined with the help of the counter, and only if the frequency is greater than the user-specified threshold, the specific sub-CNVR is actually designated as CNVR.</p>
            <p indent="1">5. When two consecutive SNPs within this sorted list have exactly the same physical position, the counter actualizes to the frequency of the respective CNVR as defined under step 3.</p>
            <p>Note: the boundaries of each sub-CNVR are only approximated by the physical positions of its bracketing SNPs.</p>
            <p>If the number of subjects is very huge and their CNVs are highly interlaced with each other, the algorithm will detect many regions with almost all of the calculated CNVRs having a length of only two SNPs. Therefore we implemented a second algorithm which extends the former one by merging consecutive sub-CNVRs with a frequency greater than the threshold and building a single one on their basis. This leads to regions with greater length, but has the consequence that the state of a subject (e.g. deletion or amplification) in a region is no longer unique. Thus we have introduced a second threshold which is used to define the state of a subject: if the CNV is the longest in the given region and its physical length is greater than this threshold, then the state of this CNV is used for the association analysis (see Figure <figr fid="F3">3</figr>).</p>
            <fig id="F3">
               <title>
                  <p>Figure 3</p>
               </title>
               <caption>
                  <p>Extended CNVR-Determination</p>
               </caption>
               <text>
                  <p><b>Extended CNVR-Determination</b>. The longest CNV of a subject in the CNVR with a percentage greater than the threshold (e.g. 50%) is used to define the final state.</p>
               </text>
               <graphic file="1471-2105-11-318-3" hint_layout="single"/>
            </fig>
         </sec>
         <sec>
            <st>
               <p>Association analysis</p>
            </st>
            <p>A multiple linear regression model is used to discover associations between extracted CNVRs and phenotypes. The regression analysis is performed for each CNVR separately; the dependent variable is the phenotype for which an association should be calculated. As independent variables we use the state of the subject in the region and a user defined list of covariates. Covariates are phenotypes that are used for adjustment. After the estimated coefficients and the standard variations are calculated using the Gaussian Algorithm, we determine the significance (p-value) of each region using Student's t-test. A sub-CNVR is genome-wide significant if the calculated p-value is below the Bonferroni-threshold.</p>
         </sec>
      </sec>
      <sec>
         <st>
            <p>Results</p>
         </st>
         <p>CONAN is a cross-platform analysis software tool developed to provide several methods for genome-wide association studies based on copy number variations. An intuitive graphical user interface (GUI) enables the determination of CNV regions and carrying out association analysis through multiple regressions. In addition, the explorative process of results is supported by several interactive visualizations.</p>
         <p>CONAN implements a simple but effective workflow to enable CNV analysis (Figure <figr fid="F4">4</figr>): in a first step CNVs generated by third party software are imported and stored in a relational database. In a second step, copy number variable regions (CNVR) are determined and GWA analyses are conducted. CNVRs are defined as the union of all overlapping CNVs across subjects. Finally, CONAN provides visualizations for all CNV regions and for all results of association analysis and enables thus a rapid interpretation. CONAN is very flexible and can easily be implemented in an existing workflow without error-prone data adaptation.</p>
         <fig id="F4">
            <title>
               <p>Figure 4</p>
            </title>
            <caption>
               <p>Overview of steps</p>
            </caption>
            <text>
               <p><b>Overview of steps</b>. The CONAN analysis process is divided into three main steps: data import, data analysis, and data visualization.</p>
            </text>
            <graphic file="1471-2105-11-318-4" hint_layout="single"/>
         </fig>
         <sec>
            <st>
               <p>User interface</p>
            </st>
            <p>CONAN has a very clear and simple interface (Figure <figr fid="F5">5</figr>): on the left side of the main window, all imported datasets, their calculated CNV regions and associated analyses are organized in a tree structure with different symbols. In the center, all CNV regions of the current selected dataset or association analysis are shown as a table (with a short summary of the parameters) and as a graphical representation. By selecting a certain CNVR a new dialog box appears which provides information about its position, its SNPs and its associations (with p-values). There is also the possibility to view the respective genomic region in the UCSC Human Genome Browser <abbrgrp><abbr bid="B19">19</abbr></abbrgrp>, HapMap Genome Browser <abbrgrp><abbr bid="B20">20</abbr></abbrgrp> or ensemble Genome Browser <abbrgrp><abbr bid="B21">21</abbr></abbrgrp> by just another mouse click. All algorithms and functions can be executed through well-structured menus and all required parameters can be set step by step. Moreover, the user always has the full control over the execution of each algorithm and can monitor its current progress and status.</p>
            <fig id="F5">
               <title>
                  <p>Figure 5</p>
               </title>
               <caption>
                  <p>Graphical User Interface</p>
               </caption>
               <text>
                  <p><b>Graphical User Interface</b>. All imported datasets, their calculated CNV regions and associated analyses are organized in a tree structure. All CNV regions of the current selected dataset or association analysis are shown as a table: Genome-wide significant CNVRs are highlighted in red and regions with already know associations from SNP-GWAS <url>http://www.genome.gov/gwastudies/</url> are highlighted in yellow. Information about a specified CNVR is listed in a separate dialog.</p>
               </text>
               <graphic file="1471-2105-11-318-5" hint_layout="double"/>
            </fig>
         </sec>
         <sec>
            <st>
               <p>Data input and output</p>
            </st>
            <p>CONAN supports copy number variations which are determined using Affymetrix 500K SNP Arrays. Additionally, our solution supports the import of the "Copy Number Segment Summary" and the "Copy Number Segment Data" file format which can be exported from the frequently used Affymetrix Genotype Console software. There also exists a generic importer for CNVs that were detected from any other platform with any other software tool; however, then the CNVs need to be stored in a comma separated values file format (an example can be downloaded at <url>http://genepi-conan.i-med.ac.at</url>).</p>
            <p>After the data is uploaded, a dataset is created which covers all information about the loaded CNVs and subjects. For the association analysis, different phenotypes for the same subjects are required and can be easily and at any time imported into an existing dataset. At present, CONAN allows the import of phenotypes saved in a tabular data format (Microsoft Excel or CSV) in which each row represents a certain person and contains its related phenotypic information. In addition to spreadsheet and statistic software, the efficient phenotype management software eCOMPAGT <abbrgrp><abbr bid="B15">15</abbr></abbrgrp> can also export phenotypic data for import into CONAN. CONAN automatically checks the input files to ensure that they are corresponding to the subjects and only numerical values are contained.</p>
            <p>For further analysis with statistical software such as R <abbrgrp><abbr bid="B22">22</abbr></abbrgrp> and SPSS, all results can be exported as CSV (comma separated values) or Microsoft Excel files. Visualizations can be saved as high quality PNG images.</p>
         </sec>
         <sec>
            <st>
               <p>Analysis methods</p>
            </st>
            <p>Once all data are stored in the database, the analysis process starts with the determination of CNV regions. For this purpose we have implemented the procedure described in <abbrgrp><abbr bid="B18">18</abbr><abbr bid="B23">23</abbr></abbrgrp> for the detection of regions where the number of subjects which have a CNV (with either a gain or a loss) therein is greater than a given threshold. In addition to this threshold, the user can also control the minimal number of consecutive SNPs which is used to define a CNV (CNVs, which involve less SNPs than the threshold, are discarded).</p>
            <p>If the number of subjects is huge (>1000) and their CNVs are highly interlaced with each other, the algorithm will detect many regions with almost all of the calculated CNVRs having a length of only two SNPs. Therefore, we developed a second algorithm which extends the former procedure by merging consecutive regions and building a single one on their basis.</p>
            <p>Table <tblr tid="T1">1</tblr> summarizes several algorithm runs with different parameters to demonstrate their impact on the resulting regions. The results suggest that the number of CNV regions and the execution time depend on the total number of subjects, total number of CNVs and the threshold parameters (see "Validation" for a description of the dataset).</p>
            <tbl id="T1">
               <title>
                  <p>Table 1</p>
               </title>
               <caption>
                  <p>Execution times for the calculation of CNVRs</p>
               </caption>
               <tblbdy cols="3">
                  <r>
                     <c ca="center">
                        <p>
                           <b>Frequency Threshold [%]</b>
                        </p>
                     </c>
                     <c ca="center">
                        <p>
                           <b>Number of CNVRs</b>
                        </p>
                     </c>
                     <c ca="center">
                        <p>
                           <b>Execution Time [sec]</b>
                        </p>
                     </c>
                  </r>
                  <r>
                     <c cspan="3">
                        <hr/>
                     </c>
                  </r>
                  <r>
                     <c ca="center">
                        <p>5</p>
                     </c>
                     <c ca="center">
                        <p>25,007</p>
                     </c>
                     <c ca="center">
                        <p>1,957</p>
                     </c>
                  </r>
                  <r>
                     <c ca="center">
                        <p>10</p>
                     </c>
                     <c ca="center">
                        <p>11,720</p>
                     </c>
                     <c ca="center">
                        <p>949</p>
                     </c>
                  </r>
                  <r>
                     <c ca="center">
                        <p>15</p>
                     </c>
                     <c ca="center">
                        <p>6,162</p>
                     </c>
                     <c ca="center">
                        <p>521</p>
                     </c>
                  </r>
                  <r>
                     <c ca="center">
                        <p>20</p>
                     </c>
                     <c ca="center">
                        <p>3,440</p>
                     </c>
                     <c ca="center">
                        <p>310</p>
                     </c>
                  </r>
                  <r>
                     <c ca="center">
                        <p>25</p>
                     </c>
                     <c ca="center">
                        <p>2,049</p>
                     </c>
                     <c ca="center">
                        <p>220</p>
                     </c>
                  </r>
               </tblbdy>
               <tblfn>
                  <p>The analyses were run on 1,644 subjects with on average 7,130 CNVs per sample. Apart from the total number of subjects and the total number of CNVs, the resulting number of CNV regions and the execution time depend on the threshold parameters.</p>
               </tblfn>
            </tbl>
            <p>After CNV regions were calculated, the user is provided with the ability to perform GWA analysis on their basis. For this task we provide a multiple linear regression model (assuming an additive genetic model) which enables to discover associations between the detected regions and the imported phenotypes. A second association analysis method combines the genotyping data from SNPs with the states of detected CNVRs in order to discover associations between cumulative effect of SNPs and CNVS and phenotypes. In both cases the user can select the dependent phenotype (e.g. blood sugar level) and a list of phenotypes which should be used for adjustment (e.g. sex, age, BMI). The software automatically calculates the corresponding p-values for all selected regions and checks for genome-wide significance after Bonferroni-correction for multiple testing (p &lt; 0.05/total number of CNV regions). In some cases it is necessary to perform the analysis only on subjects with particular properties (for example only subjects whose blood was collected after an overnight fasting period or only male subjects). Therefore, it is possible to build user defined filters in order to perform the association analysis only on a subset of all available data.</p>
            <p>In addition, to save CPU time, already detected CNV regions can be reused for several studies. These can be compared quickly to see their difference and to identify the impact of each changed parameter.</p>
         </sec>
         <sec>
            <st>
               <p>Data visualization</p>
            </st>
            <p>The interpretation of tables with thousands of regions is a complex and time-consuming task. Therefore, to assist the user, we have implemented several interactive visualizations to discover regions of interest in a fast manner and to show their attributes on demand. CONAN depicts the distribution of all detected CNV regions on each chromosome (Figure <figr fid="F6">6A</figr>). The results of an association study can be visualized with a Manhattan-Plot in which all p-values are plotted using log<sub>10</sub>-transformation and each chromosome has a different color; genome-wide significant hits can be found above the Bonferroni-threshold line which is automatically drawn considering the number of tests performed (Figure <figr fid="F6">6B</figr>). Every plotted p-value can be addressed by a mouse click, and a short overview of its properties appears. As a special feature, CONAN compares the detected regions with already known and published associations from the GWAS database <abbrgrp><abbr bid="B24">24</abbr></abbrgrp>. Genomic regions that are known to be associated with the phenotype or disease in question are highlighted in yellow (Figure <figr fid="F5">5</figr>).</p>
            <fig id="F6">
               <title>
                  <p>Figure 6</p>
               </title>
               <caption>
                  <p>Data Visualization</p>
               </caption>
               <text>
                  <p><b>Data Visualization</b>. CONAN supports the analysis process by several visualizations: (A) Visualization of the distribution of all detected CNV regions on each chromosome. (B) Visualization of associations via Manhattan plot enables a rapid identification of genome-wide significant CNVRs.</p>
               </text>
               <graphic file="1471-2105-11-318-6" hint_layout="single"/>
            </fig>
         </sec>
         <sec>
            <st>
               <p>Validation</p>
            </st>
            <p>In order to verify the implemented algorithms we have tested CONAN with sample data consisting of 1,644 KORA subjects <abbrgrp><abbr bid="B25">25</abbr></abbrgrp>. The Affymetrix 500K SNP Chip data were analyzed by DARVIN, our in-house software solution for CNV detection using a Hidden Markov Model after identification of chromosomal gains and losses by comparing the intensity of the probe sets of all subjects with a reference set (manuscript under review). The software detected about 7,130 CNVs per sample on average. As phenotypes we used BMI, gender and age. CONAN has discovered the same associations between BMI and CNVs as previously suggested: (1) nearby gene KCTD15 <abbrgrp><abbr bid="B6">6</abbr></abbrgrp> we have discovered a CNVR on 19q13.11 with a p-value of 0.003; (2) on 5p15.33 <abbrgrp><abbr bid="B18">18</abbr></abbrgrp> we have discovered a CNVR with a p-value of 0.009.</p>
         </sec>
      </sec>
      <sec>
         <st>
            <p>Discussion</p>
         </st>
         <p>We present CONAN, a new and useful tool for GWAS based on CNVs detected by third party computer programs. It combines the individual steps of the whole analysis process into one user-friendly software solution. Due to the outsourcing of all time intensive algorithms on the database server, the software works very fast and scales well.</p>
         <p>The analysis of millions of CNVs is a very time-consuming task; therefore we have optimized the applied algorithms with respect to two different aspects: First we optimized the algorithm in terms of run-time and time-complexity; then we optimized the used SQL queries and created indices on the underlying tables to enable fast data retrieval. As a consequence, the import of CNVs requires more time, but as the focus of our software lies on the analysis, fast query results are more important.</p>
         <sec>
            <st>
               <p>Comparison with similar software packages</p>
            </st>
            <p>The open source command line tool <b>Birdsuite </b><abbrgrp><abbr bid="B10">10</abbr></abbrgrp> enables the detection of CNVs and provides several scripts in order to perform GWAS on the results using PLINK <abbrgrp><abbr bid="B17">17</abbr></abbrgrp>. Visualizations are possible with gPLINK. <b>SCIMMkit </b><abbrgrp><abbr bid="B26">26</abbr></abbrgrp> is also open source and provides a command line tool which enables the targeted interrogation of CNVs using Illumina Infinium II and GoldenGate SNP assays; association analysis with phenotypes is not yet provided. <b>Helixtree </b>and <b>Array Studio </b>are both commercial solutions and support a variety of input formats (CNVs detected by analyzing Affymetrix SNP arrays and Illumina arrays). GWAS are performed through a user-friendly GUI and different graphical representations enable a rapid interpretation. However, most of those approaches are client-oriented and perform their calculations locally; this leads to poor scalability and all results are stored on different workstations and not on a central machine. This is an important aspect because the amount of the genotyping data for GWAS is increasing continuously and in a non-linear manner; thus high-performance data retrieval is an important issue. CONAN solved this problem by outsourcing all tasks to a central database server and by using the client workstation only for the presentation of the results.</p>
         </sec>
         <sec>
            <st>
               <p>Strengths and limitations</p>
            </st>
            <p>Our software has several strengths: (1) Extensive tests with real data demonstrated that the analysis of a study with about 1,600 subjects and hundreds of thousands of CNVs can be performed with CONAN without any problems and in reasonable time frames. (2) Due to an intuitive user interface and a detailed user manual, no knowledge in computer science and statistics is required to perform the association analysis. (3) With the help of the Manhattan Plot it is possible to spot within seconds which regions are genome-wide significant. In addition, various export functions enable the further usage of the newly-detected information in other software packages such as R or SPSS (see Table <tblr tid="T2">2</tblr> for a complete list of all key features). CONAN has limitations as well, as it supports only phenotypes with numerical values; phenotypes at nominal level must be pre-processed and encoded numerically before they can be imported. However, the next version of CONAN is conceived to provide functions for labelling nominal variables automatically with numbers. Moreover, CONAN is presently limited to the analysis of Affymetrix SNP Arrays, but an extension to the import of Illumina data is planned for the next release. CNVs generated by QuantiSNP <abbrgrp><abbr bid="B11">11</abbr></abbrgrp> or PennCNV <abbrgrp><abbr bid="B12">12</abbr></abbrgrp> must be converted into a CSV file before they can be used in the software. However, a direct support of those data formats is planned. Finally, an interface between CONAN and eCOMPAGT <abbrgrp><abbr bid="B15">15</abbr><abbr bid="B16">16</abbr></abbrgrp> should eliminate the error prone export and import tasks of phenotype-data through files.</p>
            <tbl id="T2">
               <title>
                  <p>Table 2</p>
               </title>
               <caption>
                  <p>Key Features</p>
               </caption>
               <tblbdy cols="2">
                  <r>
                     <c ca="left">
                        <p>
                           <b>Feature</b>
                        </p>
                     </c>
                     <c ca="left">
                        <p>
                           <b>Details</b>
                        </p>
                     </c>
                  </r>
                  <r>
                     <c cspan="2">
                        <hr/>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <b>Supported CNV File Formats</b>
                        </p>
                     </c>
                     <c ca="left">
                        <p>Affymetrix Genotyping Console</p>
                     </c>
                  </r>
                  <r>
                     <c>
                        <p/>
                     </c>
                     <c ca="left">
                        <p>Microsoft Excel</p>
                     </c>
                  </r>
                  <r>
                     <c>
                        <p/>
                     </c>
                     <c ca="left">
                        <p>Comma Separated Values</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <b>Supported SNP Arrays</b>
                        </p>
                     </c>
                     <c ca="left">
                        <p>Affymetrix SNP Array 500K</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <b>Supported Phenotype File Formats</b>
                        </p>
                     </c>
                     <c ca="left">
                        <p>Microsoft Excel</p>
                     </c>
                  </r>
                  <r>
                     <c>
                        <p/>
                     </c>
                     <c ca="left">
                        <p>Comma Separated Values</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <b>Supported Genotype File Formats</b>
                        </p>
                     </c>
                     <c ca="left">
                        <p>Affymetrix SNP 500K .call files</p>
                     </c>
                  </r>
                  <r>
                     <c cspan="2">
                        <hr/>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <b>Algorithms</b>
                        </p>
                     </c>
                     <c ca="left">
                        <p>Genome-wide CNVR-determination</p>
                     </c>
                  </r>
                  <r>
                     <c>
                        <p/>
                     </c>
                     <c ca="left">
                        <p>Genome-wide CNV-phenotype association analysis</p>
                     </c>
                  </r>
                  <r>
                     <c cspan="2">
                        <hr/>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <b>Visualization</b>
                        </p>
                     </c>
                     <c ca="left">
                        <p>Interactive Manhattan Plot with automatically drawn Bonferroni-threshold line</p>
                     </c>
                  </r>
                  <r>
                     <c>
                        <p/>
                     </c>
                     <c ca="left">
                        <p>Visualization of the distribution of detected CNV regions on each chromosome</p>
                     </c>
                  </r>
                  <r>
                     <c>
                        <p/>
                     </c>
                     <c ca="left">
                        <p>Exporting of all visualizations as PNG and JPEG images</p>
                     </c>
                  </r>
                  <r>
                     <c cspan="2">
                        <hr/>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <b>Analysis</b>
                        </p>
                     </c>
                     <c ca="left">
                        <p>Filtering and searching of statistical results</p>
                     </c>
                  </r>
                  <r>
                     <c>
                        <p/>
                     </c>
                     <c ca="left">
                        <p>Highlighting of genome-wide significant results</p>
                     </c>
                  </r>
                  <r>
                     <c>
                        <p/>
                     </c>
                     <c ca="left">
                        <p>Highlighting of regions which fit with results from the GWAS database</p>
                     </c>
                  </r>
                  <r>
                     <c>
                        <p/>
                     </c>
                     <c ca="left">
                        <p>Exporting of all results as Microsoft Excel or CSV-Files</p>
                     </c>
                  </r>
                  <r>
                     <c>
                        <p/>
                     </c>
                     <c ca="left">
                        <p>Direct links to entries in public databases:</p>
                     </c>
                  </r>
                  <r>
                     <c>
                        <p/>
                     </c>
                     <c ca="left">
                        <p>UCSC Genome Browser, NCBI dbSNP, Ensembl, HapMap</p>
                     </c>
                  </r>
               </tblbdy>
            </tbl>
         </sec>
      </sec>
      <sec>
         <st>
            <p>Conclusions</p>
         </st>
         <p>CONAN facilitates the performance of GWAS based on CNVs and the visual analysis of calculated results. CONAN provides a rapid, valid and straightforward software solution to identify genetic variation underlying the 'missing' heritability for complex traits that remains unexplained by recent GWAS. The freely available software can be downloaded at <url>http://genepi-conan.i-med.ac.at</url>.</p>
      </sec>
      <sec>
         <st>
            <p>Availability and requirements</p>
         </st>
         <p>Project name: CONAN</p>
         <p>Project home page: <url>http://genepi-conan.i-med.ac.at</url></p>
         <p>Operating system(s): Windows and Linux</p>
         <p>Programming language: Java</p>
         <p>Other requirements: Java 1.5+, relational database (Oracle)</p>
      </sec>
      <sec>
         <st>
            <p>Authors' contributions</p>
         </st>
         <p>LF was responsible for programming and designing CONAN and drafted the manuscript. TK and FH provided CNV data from KORA. SS and HW helped with database issues. GG and HEW were responsible for the KORA study. FK and GS helped drafting the manuscript. AK-B initialized the project, supervised it and drafted the manuscript. All authors read and approved the final manuscript.</p>
      </sec>
   </bdy>
   <bm>
      <ack>
         <sec>
            <st>
               <p>Acknowledgements</p>
            </st>
            <p>The authors appreciate the kind assistance of the IT-service team at the Innsbruck Medical University, especially Josef Radinger, Michele Paoli and Ursula Schmida. In addition, we are grateful for the advice of Claudia Lamina and Stefan Coassin (Division of Genetic Epidemiology, Innsbruck Medical University). This work was supported by the Medizinische Forschungsf&#246;rderung Innsbruck (Grant 2007-402 to A.B.), the &#214;sterreichische Nationalbank (Grant 13059 to A.B.), the Austrian GEN-AU-Program "GOLD" (Grant 820979 to F.K.) and by ONCOTYROL (SFB 021).</p>
         </sec>
      </ack>
      <refgrp>
         <bibl id="B1">
            <title>
               <p>An open access database of genome-wide association results</p>
            </title>
            <aug>
               <au>
                  <snm>Johnson</snm>
                  <fnm>AD</fnm>
               </au>
               <au>
                  <snm>O'Donnell</snm>
                  <fnm>CJ</fnm>
               </au>
            </aug>
            <source>BMC Med Genet</source>
            <pubdate>2009</pubdate>
            <volume>10</volume>
            <fpage>6</fpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1186/1471-2350-10-6</pubid>
                  <pubid idtype="pmcid">2639349</pubid>
                  <pubid idtype="pmpid" link="fulltext">19161620</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B2">
            <title>
               <p>Extending genome-wide association studies to copy-number variation</p>
            </title>
            <aug>
               <au>
                  <snm>McCarroll</snm>
                  <fnm>SA</fnm>
               </au>
            </aug>
            <source>Hum Mol Genet</source>
            <pubdate>2008</pubdate>
            <volume>17</volume>
            <fpage>R135</fpage>
            <lpage>R142</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1093/hmg/ddn282</pubid>
                  <pubid idtype="pmpid" link="fulltext">18852202</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B3">
            <title>
               <p>The complete genome of an individual by massively parallel DNA sequencing</p>
            </title>
            <aug>
               <au>
                  <snm>Wheeler</snm>
                  <fnm>DA</fnm>
               </au>
               <au>
                  <snm>Srinivasan</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Egholm</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Shen</snm>
                  <fnm>Y</fnm>
               </au>
               <au>
                  <snm>Chen</snm>
                  <fnm>L</fnm>
               </au>
               <au>
                  <snm>McGuire</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>He</snm>
                  <fnm>W</fnm>
               </au>
               <au>
                  <snm>Chen</snm>
                  <fnm>YJ</fnm>
               </au>
               <au>
                  <snm>Makhijani</snm>
                  <fnm>V</fnm>
               </au>
               <au>
                  <snm>Roth</snm>
                  <fnm>GT</fnm>
               </au>
               <etal/>
            </aug>
            <source>Nature</source>
            <pubdate>2008</pubdate>
            <volume>452</volume>
            <fpage>872</fpage>
            <lpage>876</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1038/nature06884</pubid>
                  <pubid idtype="pmpid" link="fulltext">18421352</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B4">
            <title>
               <p>Global variation in copy number in the human genome</p>
            </title>
            <aug>
               <au>
                  <snm>Redon</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>Ishikawa</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Fitch</snm>
                  <fnm>KR</fnm>
               </au>
               <au>
                  <snm>Feuk</snm>
                  <fnm>L</fnm>
               </au>
               <au>
                  <snm>Perry</snm>
                  <fnm>GH</fnm>
               </au>
               <au>
                  <snm>Andrews</snm>
                  <fnm>TD</fnm>
               </au>
               <au>
                  <snm>Fiegler</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Shapero</snm>
                  <fnm>MH</fnm>
               </au>
               <au>
                  <snm>Carson</snm>
                  <fnm>AR</fnm>
               </au>
               <au>
                  <snm>Chen</snm>
                  <fnm>W</fnm>
               </au>
               <etal/>
            </aug>
            <source>Nature</source>
            <pubdate>2006</pubdate>
            <volume>444</volume>
            <fpage>444</fpage>
            <lpage>454</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1038/nature05329</pubid>
                  <pubid idtype="pmcid">2669898</pubid>
                  <pubid idtype="pmpid" link="fulltext">17122850</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B5">
            <title>
               <p>Deletion polymorphism upstream of IRGM associated with altered IRGM expression and Crohn's disease</p>
            </title>
            <aug>
               <au>
                  <snm>McCarroll</snm>
                  <fnm>SA</fnm>
               </au>
               <au>
                  <snm>Huett</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Kuballa</snm>
                  <fnm>P</fnm>
               </au>
               <au>
                  <snm>Chilewski</snm>
                  <fnm>SD</fnm>
               </au>
               <au>
                  <snm>Landry</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Goyette</snm>
                  <fnm>P</fnm>
               </au>
               <au>
                  <snm>Zody</snm>
                  <fnm>MC</fnm>
               </au>
               <au>
                  <snm>Hall</snm>
                  <fnm>JL</fnm>
               </au>
               <au>
                  <snm>Brant</snm>
                  <fnm>SR</fnm>
               </au>
               <au>
                  <snm>Cho</snm>
                  <fnm>JH</fnm>
               </au>
               <etal/>
            </aug>
            <source>Nat Genet</source>
            <pubdate>2008</pubdate>
            <volume>40</volume>
            <fpage>1107</fpage>
            <lpage>1112</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1038/ng.215</pubid>
                  <pubid idtype="pmcid">2731799</pubid>
                  <pubid idtype="pmpid" link="fulltext">19165925</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B6">
            <title>
               <p>Six new loci associated with body mass index highlight a neuronal influence on body weight regulation</p>
            </title>
            <aug>
               <au>
                  <snm>Willer</snm>
                  <fnm>CJ</fnm>
               </au>
               <au>
                  <snm>Speliotes</snm>
                  <fnm>EK</fnm>
               </au>
               <au>
                  <snm>Loos</snm>
                  <fnm>RJ</fnm>
               </au>
               <au>
                  <snm>Li</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Lindgren</snm>
                  <fnm>CM</fnm>
               </au>
               <au>
                  <snm>Heid</snm>
                  <fnm>IM</fnm>
               </au>
               <au>
                  <snm>Berndt</snm>
                  <fnm>SI</fnm>
               </au>
               <au>
                  <snm>Elliott</snm>
                  <fnm>AL</fnm>
               </au>
               <au>
                  <snm>Jackson</snm>
                  <fnm>AU</fnm>
               </au>
               <au>
                  <snm>Lamina</snm>
                  <fnm>C</fnm>
               </au>
               <etal/>
            </aug>
            <source>Nat Genet</source>
            <pubdate>2009</pubdate>
            <volume>41</volume>
            <fpage>25</fpage>
            <lpage>34</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1038/ng.287</pubid>
                  <pubid idtype="pmcid">2695662</pubid>
                  <pubid idtype="pmpid" link="fulltext">19079261</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B7">
            <title>
               <p>Deletion of the late cornified envelope LCE3B and LCE3C genes as a susceptibility factor for psoriasis</p>
            </title>
            <aug>
               <au>
                  <snm>de</snm>
                  <fnm>CR</fnm>
               </au>
               <au>
                  <snm>Riveira-Munoz</snm>
                  <fnm>E</fnm>
               </au>
               <au>
                  <snm>Zeeuwen</snm>
                  <fnm>PL</fnm>
               </au>
               <au>
                  <snm>Robarge</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Liao</snm>
                  <fnm>W</fnm>
               </au>
               <au>
                  <snm>Dannhauser</snm>
                  <fnm>EN</fnm>
               </au>
               <au>
                  <snm>Giardina</snm>
                  <fnm>E</fnm>
               </au>
               <au>
                  <snm>Stuart</snm>
                  <fnm>PE</fnm>
               </au>
               <au>
                  <snm>Nair</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>Helms</snm>
                  <fnm>C</fnm>
               </au>
               <etal/>
            </aug>
            <source>Nat Genet</source>
            <pubdate>2009</pubdate>
            <volume>41</volume>
            <fpage>211</fpage>
            <lpage>215</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1038/ng.313</pubid>
                  <pubid idtype="pmpid" link="fulltext">19169253</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B8">
            <title>
               <p>Psoriasis is associated with increased beta-defensin genomic copy number</p>
            </title>
            <aug>
               <au>
                  <snm>Hollox</snm>
                  <fnm>EJ</fnm>
               </au>
               <au>
                  <snm>Huffmeier</snm>
                  <fnm>U</fnm>
               </au>
               <au>
                  <snm>Zeeuwen</snm>
                  <fnm>PL</fnm>
               </au>
               <au>
                  <snm>Palla</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>Lascorz</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Rodijk-Olthuis</snm>
                  <fnm>D</fnm>
               </au>
               <au>
                  <snm>van de Kerkhof</snm>
                  <fnm>PC</fnm>
               </au>
               <au>
                  <snm>Traupe</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>de</snm>
                  <fnm>JG</fnm>
               </au>
               <au>
                  <snm>den</snm>
                  <fnm>HM</fnm>
               </au>
               <etal/>
            </aug>
            <source>Nat Genet</source>
            <pubdate>2008</pubdate>
            <volume>40</volume>
            <fpage>23</fpage>
            <lpage>25</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1038/ng.2007.48</pubid>
                  <pubid idtype="pmcid">2447885</pubid>
                  <pubid idtype="pmpid" link="fulltext">18059266</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B9">
            <title>
               <p>Genome-wide copy-number-variation study identified a susceptibility gene, UGT2B17, for osteoporosis</p>
            </title>
            <aug>
               <au>
                  <snm>Yang</snm>
                  <fnm>TL</fnm>
               </au>
               <au>
                  <snm>Chen</snm>
                  <fnm>XD</fnm>
               </au>
               <au>
                  <snm>Guo</snm>
                  <fnm>Y</fnm>
               </au>
               <au>
                  <snm>Lei</snm>
                  <fnm>SF</fnm>
               </au>
               <au>
                  <snm>Wang</snm>
                  <fnm>JT</fnm>
               </au>
               <au>
                  <snm>Zhou</snm>
                  <fnm>Q</fnm>
               </au>
               <au>
                  <snm>Pan</snm>
                  <fnm>F</fnm>
               </au>
               <au>
                  <snm>Chen</snm>
                  <fnm>Y</fnm>
               </au>
               <au>
                  <snm>Zhang</snm>
                  <fnm>ZX</fnm>
               </au>
               <au>
                  <snm>Dong</snm>
                  <fnm>SS</fnm>
               </au>
               <etal/>
            </aug>
            <source>Am J Hum Genet</source>
            <pubdate>2008</pubdate>
            <volume>83</volume>
            <fpage>663</fpage>
            <lpage>674</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1016/j.ajhg.2008.10.006</pubid>
                  <pubid idtype="pmcid">2667994</pubid>
                  <pubid idtype="pmpid" link="fulltext">18992858</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B10">
            <title>
               <p>Integrated genotype calling and association analysis of SNPs, common copy number polymorphisms and rare CNVs</p>
            </title>
            <aug>
               <au>
                  <snm>Korn</snm>
                  <fnm>JM</fnm>
               </au>
               <au>
                  <snm>Kuruvilla</snm>
                  <fnm>FG</fnm>
               </au>
               <au>
                  <snm>McCarroll</snm>
                  <fnm>SA</fnm>
               </au>
               <au>
                  <snm>Wysoker</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Nemesh</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Cawley</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Hubbell</snm>
                  <fnm>E</fnm>
               </au>
               <au>
                  <snm>Veitch</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Collins</snm>
                  <fnm>PJ</fnm>
               </au>
               <au>
                  <snm>Darvishi</snm>
                  <fnm>K</fnm>
               </au>
               <etal/>
            </aug>
            <source>Nat Genet</source>
            <pubdate>2008</pubdate>
            <volume>40</volume>
            <fpage>1253</fpage>
            <lpage>1260</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1038/ng.237</pubid>
                  <pubid idtype="pmcid">2756534</pubid>
                  <pubid idtype="pmpid" link="fulltext">18776909</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B11">
            <title>
               <p>QuantiSNP: an Objective Bayes Hidden-Markov Model to detect and accurately map copy number variation using SNP genotyping data</p>
            </title>
            <aug>
               <au>
                  <snm>Colella</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Yau</snm>
                  <fnm>C</fnm>
               </au>
               <au>
                  <snm>Taylor</snm>
                  <fnm>JM</fnm>
               </au>
               <au>
                  <snm>Mirza</snm>
                  <fnm>G</fnm>
               </au>
               <au>
                  <snm>Butler</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Clouston</snm>
                  <fnm>P</fnm>
               </au>
               <au>
                  <snm>Bassett</snm>
                  <fnm>AS</fnm>
               </au>
               <au>
                  <snm>Seller</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Holmes</snm>
                  <fnm>CC</fnm>
               </au>
               <au>
                  <snm>Ragoussis</snm>
                  <fnm>J</fnm>
               </au>
            </aug>
            <source>Nucleic Acids Res</source>
            <pubdate>2007</pubdate>
            <volume>35</volume>
            <fpage>2013</fpage>
            <lpage>2025</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1093/nar/gkm076</pubid>
                  <pubid idtype="pmcid">1874617</pubid>
                  <pubid idtype="pmpid" link="fulltext">17341461</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B12">
            <title>
               <p>PennCNV: an integrated hidden Markov model designed for high-resolution copy number variation detection in whole-genome SNP genotyping data</p>
            </title>
            <aug>
               <au>
                  <snm>Wang</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>Li</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Hadley</snm>
                  <fnm>D</fnm>
               </au>
               <au>
                  <snm>Liu</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>Glessner</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Grant</snm>
                  <fnm>SF</fnm>
               </au>
               <au>
                  <snm>Hakonarson</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Bucan</snm>
                  <fnm>M</fnm>
               </au>
            </aug>
            <source>Genome Res</source>
            <pubdate>2007</pubdate>
            <volume>17</volume>
            <fpage>1665</fpage>
            <lpage>1674</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1101/gr.6861907</pubid>
                  <pubid idtype="pmcid">2045149</pubid>
                  <pubid idtype="pmpid" link="fulltext">17921354</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B13">
            <title>
               <p>Cokgen: a software for the identification of rare copy number variation from SNP microarrays</p>
            </title>
            <aug>
               <au>
                  <snm>Yavas</snm>
                  <fnm>G</fnm>
               </au>
               <au>
                  <snm>Koyuturk</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Ozsoyoglu</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Gould</snm>
                  <fnm>MP</fnm>
               </au>
               <au>
                  <snm>Laframboise</snm>
                  <fnm>T</fnm>
               </au>
            </aug>
            <source>Pac Symp Biocomput</source>
            <pubdate>2010</pubdate>
            <fpage>371</fpage>
            <lpage>382</lpage>
            <xrefbib>
               <pubid idtype="pmpid" link="fulltext">19908389</pubid>
            </xrefbib>
         </bibl>
         <bibl id="B14">
            <title>
               <p>CNV Workshop: an integrated platform for high-throughput copy number variation discovery and clinical diagnostics</p>
            </title>
            <aug>
               <au>
                  <snm>Gai</snm>
                  <fnm>X</fnm>
               </au>
               <au>
                  <snm>Perin</snm>
                  <fnm>JC</fnm>
               </au>
               <au>
                  <snm>Murphy</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>O'Hara</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>D'arcy</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Wenocur</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Xie</snm>
                  <fnm>HM</fnm>
               </au>
               <au>
                  <snm>Rappaport</snm>
                  <fnm>EF</fnm>
               </au>
               <au>
                  <snm>Shaikh</snm>
                  <fnm>TH</fnm>
               </au>
               <au>
                  <snm>White</snm>
                  <fnm>PS</fnm>
               </au>
            </aug>
            <source>BMC Bioinformatics</source>
            <pubdate>2010</pubdate>
            <volume>11</volume>
            <fpage>-74</fpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1186/1471-2105-11-74</pubid>
                  <pubid idtype="pmcid">2827374</pubid>
                  <pubid idtype="pmpid" link="fulltext">20132550</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B15">
            <title>
               <p>eCOMPAGT -- efficient combination and management of phenotypes and genotypes for genetic epidemiology</p>
            </title>
            <aug>
               <au>
                  <snm>Sch&#246;nherr</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Weissensteiner</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Coassin</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Specht</snm>
                  <fnm>G</fnm>
               </au>
               <au>
                  <snm>Kronenberg</snm>
                  <fnm>F</fnm>
               </au>
               <au>
                  <snm>Brandst&#228;tter</snm>
                  <fnm>A</fnm>
               </au>
            </aug>
            <source>BMC Bioinformatics</source>
            <pubdate>2009</pubdate>
            <volume>10</volume>
            <fpage>139</fpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1186/1471-2105-10-139</pubid>
                  <pubid idtype="pmcid">2685123</pubid>
                  <pubid idtype="pmpid" link="fulltext">19432954</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B16">
            <title>
               <p>eCOMPAGT integrates mtDNA: import, validation and export of mitochondrial DNA profiles for population genetics, tumour dynamics and genotype-phenotype association studies</p>
            </title>
            <aug>
               <au>
                  <snm>Weissensteiner</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Sch&#246;nherr</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Specht</snm>
                  <fnm>G</fnm>
               </au>
               <au>
                  <snm>Kronenberg</snm>
                  <fnm>F</fnm>
               </au>
               <au>
                  <snm>Brandst&#228;tter</snm>
                  <fnm>A</fnm>
               </au>
            </aug>
            <source>BMC Bioinformatics</source>
            <pubdate>2010</pubdate>
            <volume>11</volume>
            <fpage>122</fpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1186/1471-2105-11-122</pubid>
                  <pubid idtype="pmcid">2841209</pubid>
                  <pubid idtype="pmpid" link="fulltext">20214782</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B17">
            <title>
               <p>PLINK: a tool set for whole-genome association and population-based linkage analyses</p>
            </title>
            <aug>
               <au>
                  <snm>Purcell</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Neale</snm>
                  <fnm>B</fnm>
               </au>
               <au>
                  <snm>Todd-Brown</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>Thomas</snm>
                  <fnm>L</fnm>
               </au>
               <au>
                  <snm>Ferreira</snm>
                  <fnm>MA</fnm>
               </au>
               <au>
                  <snm>Bender</snm>
                  <fnm>D</fnm>
               </au>
               <au>
                  <snm>Maller</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Sklar</snm>
                  <fnm>P</fnm>
               </au>
               <au>
                  <snm>de Bakker</snm>
                  <fnm>PI</fnm>
               </au>
               <au>
                  <snm>Daly</snm>
                  <fnm>MJ</fnm>
               </au>
               <etal/>
            </aug>
            <source>Am J Hum Genet</source>
            <pubdate>2007</pubdate>
            <volume>81</volume>
            <fpage>559</fpage>
            <lpage>575</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1086/519795</pubid>
                  <pubid idtype="pmcid">1950838</pubid>
                  <pubid idtype="pmpid" link="fulltext">17701901</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B18">
            <title>
               <p>Genome-wide association study suggested copy number variation may be associated with body mass index in the Chinese population</p>
            </title>
            <aug>
               <au>
                  <snm>Sha</snm>
                  <fnm>BY</fnm>
               </au>
               <au>
                  <snm>Yang</snm>
                  <fnm>TL</fnm>
               </au>
               <au>
                  <snm>Zhao</snm>
                  <fnm>LJ</fnm>
               </au>
               <au>
                  <snm>Chen</snm>
                  <fnm>XD</fnm>
               </au>
               <au>
                  <snm>Guo</snm>
                  <fnm>Y</fnm>
               </au>
               <au>
                  <snm>Chen</snm>
                  <fnm>Y</fnm>
               </au>
               <au>
                  <snm>Pan</snm>
                  <fnm>F</fnm>
               </au>
               <au>
                  <snm>Zhang</snm>
                  <fnm>ZX</fnm>
               </au>
               <au>
                  <snm>Dong</snm>
                  <fnm>SS</fnm>
               </au>
               <au>
                  <snm>Xu</snm>
                  <fnm>XH</fnm>
               </au>
               <etal/>
            </aug>
            <source>J Hum Genet</source>
            <pubdate>2009</pubdate>
            <volume>54</volume>
            <fpage>199</fpage>
            <lpage>202</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1038/jhg.2009.10</pubid>
                  <pubid idtype="pmcid">2733232</pubid>
                  <pubid idtype="pmpid" link="fulltext">19229253</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B19">
            <title>
               <p>The human genome browser at UCSC</p>
            </title>
            <aug>
               <au>
                  <snm>Kent</snm>
                  <fnm>WJ</fnm>
               </au>
               <au>
                  <snm>Sugnet</snm>
                  <fnm>CW</fnm>
               </au>
               <au>
                  <snm>Furey</snm>
                  <fnm>TS</fnm>
               </au>
               <au>
                  <snm>Roskin</snm>
                  <fnm>KM</fnm>
               </au>
               <au>
                  <snm>Pringle</snm>
                  <fnm>TH</fnm>
               </au>
               <au>
                  <snm>Zahler</snm>
                  <fnm>AM</fnm>
               </au>
               <au>
                  <snm>Haussler</snm>
                  <fnm>D</fnm>
               </au>
            </aug>
            <source>Genome Res</source>
            <pubdate>2002</pubdate>
            <volume>12</volume>
            <fpage>996</fpage>
            <lpage>1006</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">186604</pubid>
                  <pubid idtype="pmpid" link="fulltext">12045153</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B20">
            <title>
               <p>The International HapMap Project Web site</p>
            </title>
            <aug>
               <au>
                  <snm>Thorisson</snm>
                  <fnm>GA</fnm>
               </au>
               <au>
                  <snm>Smith</snm>
                  <fnm>AV</fnm>
               </au>
               <au>
                  <snm>Krishnan</snm>
                  <fnm>L</fnm>
               </au>
               <au>
                  <snm>Stein</snm>
                  <fnm>LD</fnm>
               </au>
            </aug>
            <source>Genome Res</source>
            <pubdate>2005</pubdate>
            <volume>15</volume>
            <fpage>1592</fpage>
            <lpage>1593</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1101/gr.4413105</pubid>
                  <pubid idtype="pmcid">1310647</pubid>
                  <pubid idtype="pmpid" link="fulltext">16251469</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B21">
            <title>
               <p>Ensembl 2008</p>
            </title>
            <aug>
               <au>
                  <snm>Flicek</snm>
                  <fnm>P</fnm>
               </au>
               <au>
                  <snm>Aken</snm>
                  <fnm>BL</fnm>
               </au>
               <au>
                  <snm>Beal</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>Ballester</snm>
                  <fnm>B</fnm>
               </au>
               <au>
                  <snm>Caccamo</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Chen</snm>
                  <fnm>Y</fnm>
               </au>
               <au>
                  <snm>Clarke</snm>
                  <fnm>L</fnm>
               </au>
               <au>
                  <snm>Coates</snm>
                  <fnm>G</fnm>
               </au>
               <au>
                  <snm>Cunningham</snm>
                  <fnm>F</fnm>
               </au>
               <au>
                  <snm>Cutts</snm>
                  <fnm>T</fnm>
               </au>
               <etal/>
            </aug>
            <source>Nucleic Acids Res</source>
            <pubdate>2008</pubdate>
            <volume>36</volume>
            <fpage>D707</fpage>
            <lpage>D714</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1093/nar/gkm988</pubid>
                  <pubid idtype="pmcid">2238821</pubid>
                  <pubid idtype="pmpid" link="fulltext">18000006</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B22">
            <aug>
               <au>
                  <cnm>R Development Core Team</cnm>
               </au>
            </aug>
            <source>R: A language and environment for statistical computing</source>
            <publisher>Vienna, Austria: R Foundation for Statistical Computing</publisher>
            <pubdate>2008</pubdate>
         </bibl>
         <bibl id="B23">
            <title>
               <p>Genome-wide copy number variation association study suggested VPS13B gene for osteoporosis in Caucasians</p>
            </title>
            <aug>
               <au>
                  <snm>Deng</snm>
                  <fnm>FY</fnm>
               </au>
               <au>
                  <snm>Zhao</snm>
                  <fnm>LJ</fnm>
               </au>
               <au>
                  <snm>Pei</snm>
                  <fnm>YF</fnm>
               </au>
               <au>
                  <snm>Sha</snm>
                  <fnm>BY</fnm>
               </au>
               <au>
                  <snm>Liu</snm>
                  <fnm>XG</fnm>
               </au>
               <au>
                  <snm>Yan</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Wang</snm>
                  <fnm>L</fnm>
               </au>
               <au>
                  <snm>Yang</snm>
                  <fnm>TL</fnm>
               </au>
               <au>
                  <snm>Recker</snm>
                  <fnm>RR</fnm>
               </au>
               <au>
                  <snm>Papasian</snm>
                  <fnm>CJ</fnm>
               </au>
               <etal/>
            </aug>
            <source>Osteoporos Int</source>
            <pubdate>2010</pubdate>
            <volume>21</volume>
            <fpage>579</fpage>
            <lpage>87</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1007/s00198-009-0998-7</pubid>
                  <pubid idtype="pmpid" link="fulltext">19680589</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B24">
            <title>
               <p>Potential etiologic and functional implications of genome-wide association loci for human diseases and traits</p>
            </title>
            <aug>
               <au>
                  <snm>Hindorff</snm>
                  <fnm>LA</fnm>
               </au>
               <au>
                  <snm>Sethupathy</snm>
                  <fnm>P</fnm>
               </au>
               <au>
                  <snm>Junkins</snm>
                  <fnm>HA</fnm>
               </au>
               <au>
                  <snm>Ramos</snm>
                  <fnm>EM</fnm>
               </au>
               <au>
                  <snm>Mehta</snm>
                  <fnm>JP</fnm>
               </au>
               <au>
                  <snm>Collins</snm>
                  <fnm>FS</fnm>
               </au>
               <au>
                  <snm>Manolio</snm>
                  <fnm>TA</fnm>
               </au>
            </aug>
            <source>Proc Natl Acad Sci USA</source>
            <pubdate>2009</pubdate>
            <volume>106</volume>
            <fpage>9362</fpage>
            <lpage>9367</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1073/pnas.0903103106</pubid>
                  <pubid idtype="pmcid">2687147</pubid>
                  <pubid idtype="pmpid" link="fulltext">19474294</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B25">
            <title>
               <p>KORA-gen--resource for population genetics, controls and a broad spectrum of disease phenotypes</p>
            </title>
            <aug>
               <au>
                  <snm>Wichmann</snm>
                  <fnm>HE</fnm>
               </au>
               <au>
                  <snm>Gieger</snm>
                  <fnm>C</fnm>
               </au>
               <au>
                  <snm>Illig</snm>
                  <fnm>T</fnm>
               </au>
            </aug>
            <source>Gesundheitswesen</source>
            <pubdate>2005</pubdate>
            <volume>67</volume>
            <issue>Suppl 1</issue>
            <fpage>S26</fpage>
            <lpage>S30</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1055/s-2005-858226</pubid>
                  <pubid idtype="pmpid">16032514</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B26">
            <title>
               <p>Targeted interrogation of copy number variation using SCIMMkit</p>
            </title>
            <aug>
               <au>
                  <snm>Zerr</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Cooper</snm>
                  <fnm>GM</fnm>
               </au>
               <au>
                  <snm>Eichler</snm>
                  <fnm>EE</fnm>
               </au>
               <au>
                  <snm>Nickerson</snm>
                  <fnm>DA</fnm>
               </au>
            </aug>
            <source>Bioinformatics</source>
            <pubdate>2010</pubdate>
            <volume>26</volume>
            <fpage>120</fpage>
            <lpage>122</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1093/bioinformatics/btp606</pubid>
                  <pubid idtype="pmcid">2796813</pubid>
                  <pubid idtype="pmpid" link="fulltext">19846438</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
      </refgrp>
   </bm>
</art>

