<?xml version='1.0'?>
<!DOCTYPE art SYSTEM 'http://www.biomedcentral.com/xml/article.dtd'>
<art>
   <ui>1471-2164-7-96</ui>
   <ji>1471-2164</ji>
   <fm>
      <dochead>Research article</dochead>
      <bibl>
         <title>
            <p>The molecular portraits of breast tumors are conserved across microarray platforms</p>
         </title>
         <aug>
            <au id="A1">
               <snm>Hu</snm>
               <fnm>Zhiyuan</fnm>
               <insr iid="I1"/>
               <insr iid="I2"/>
               <email>zhiyuan_hu@med.unc.edu</email>
            </au>
            <au id="A2">
               <snm>Fan</snm>
               <fnm>Cheng</fnm>
               <insr iid="I1"/>
               <email>cfan2004@gmail.com</email>
            </au>
            <au id="A3">
               <snm>Oh</snm>
               <mi>S</mi>
               <fnm>Daniel</fnm>
               <insr iid="I1"/>
               <insr iid="I2"/>
               <email>daniel_oh@med.unc.edu</email>
            </au>
            <au id="A4">
               <snm>Marron</snm>
               <fnm>JS</fnm>
               <insr iid="I3"/>
               <email>marron@email.unc.edu</email>
            </au>
            <au id="A5">
               <snm>He</snm>
               <fnm>Xiaping</fnm>
               <insr iid="I1"/>
               <insr iid="I2"/>
               <email>xiaping_he@med.unc.edu</email>
            </au>
            <au id="A6">
               <snm>Qaqish</snm>
               <mi>F</mi>
               <fnm>Bahjat</fnm>
               <insr iid="I4"/>
               <email>qaqish@bios.unc.edu</email>
            </au>
            <au id="A7">
               <snm>Livasy</snm>
               <fnm>Chad</fnm>
               <insr iid="I5"/>
               <email>chad_livasy@med.unc.edu</email>
            </au>
            <au id="A8">
               <snm>Carey</snm>
               <mi>A</mi>
               <fnm>Lisa</fnm>
               <insr iid="I6"/>
               <email>Lisa_Carey@med.unc.edu</email>
            </au>
            <au id="A9">
               <snm>Reynolds</snm>
               <fnm>Evangeline</fnm>
               <insr iid="I6"/>
               <email>evangeline_reynolds@med.unc.edu</email>
            </au>
            <au id="A10">
               <snm>Dressler</snm>
               <fnm>Lynn</fnm>
               <insr iid="I6"/>
               <email>lynn_dressler@med.unc.edu</email>
            </au>
            <au id="A11">
               <snm>Nobel</snm>
               <fnm>Andrew</fnm>
               <insr iid="I3"/>
               <email>nobel@email.unc.edu</email>
            </au>
            <au id="A12">
               <snm>Parker</snm>
               <fnm>Joel</fnm>
               <insr iid="I7"/>
               <email>jparker@constellagroup.com</email>
            </au>
            <au id="A13">
               <snm>Ewend</snm>
               <mi>G</mi>
               <fnm>Matthew</fnm>
               <insr iid="I6"/>
               <email>ewend@med.unc.edu</email>
            </au>
            <au id="A14">
               <snm>Sawyer</snm>
               <mi>R</mi>
               <fnm>Lynda</fnm>
               <insr iid="I6"/>
               <email>lynda_sawyer@med.unc.edu</email>
            </au>
            <au id="A15">
               <snm>Wu</snm>
               <fnm>Junyuan</fnm>
               <insr iid="I1"/>
               <email>junyuan_wu@med.unc.edu</email>
            </au>
            <au id="A16">
               <snm>Liu</snm>
               <fnm>Yudong</fnm>
               <insr iid="I1"/>
               <email>ydliu@email.unc.edu</email>
            </au>
            <au id="A17">
               <snm>Nanda</snm>
               <fnm>Rita</fnm>
               <insr iid="I8"/>
               <email>rnanda@medicine.bsd.uchicago.edu</email>
            </au>
            <au id="A18">
               <snm>Tretiakova</snm>
               <fnm>Maria</fnm>
               <insr iid="I8"/>
               <email>mtretiakova@medicine.bsd.uchicago.edu</email>
            </au>
            <au id="A19">
               <snm>Orrico</snm>
               <mnm>Ruiz</mnm>
               <fnm>Alejandra</fnm>
               <insr iid="I9"/>
               <email>Alejandra.RuizOrrico@jefferson.edu</email>
            </au>
            <au id="A20">
               <snm>Dreher</snm>
               <fnm>Donna</fnm>
               <insr iid="I9"/>
               <email>donna.dreher@jefferson.edu</email>
            </au>
            <au id="A21">
               <snm>Palazzo</snm>
               <mi>P</mi>
               <fnm>Juan</fnm>
               <insr iid="I9"/>
               <email>juan.palazzo@jefferson.edu</email>
            </au>
            <au id="A22">
               <snm>Perreard</snm>
               <fnm>Laurent</fnm>
               <insr iid="I10"/>
               <email>laurent.perreard@hci.utah.edu</email>
            </au>
            <au id="A23">
               <snm>Nelson</snm>
               <fnm>Edward</fnm>
               <insr iid="I11"/>
               <email>edward.nelson@hsc.utah.edu</email>
            </au>
            <au id="A24">
               <snm>Mone</snm>
               <fnm>Mary</fnm>
               <insr iid="I11"/>
               <email>mary.mone@hsc.utah.edu</email>
            </au>
            <au id="A25">
               <snm>Hansen</snm>
               <fnm>Heidi</fnm>
               <insr iid="I11"/>
               <email>heidi.hansen@hsc.utah.edu</email>
            </au>
            <au id="A26">
               <snm>Mullins</snm>
               <fnm>Michael</fnm>
               <insr iid="I12"/>
               <email>michael.mullins@hci.utah.edu</email>
            </au>
            <au id="A27">
               <snm>Quackenbush</snm>
               <mi>F</mi>
               <fnm>John</fnm>
               <insr iid="I12"/>
               <email>jonquack@hotmail.com</email>
            </au>
            <au id="A28">
               <snm>Ellis</snm>
               <mi>J</mi>
               <fnm>Matthew</fnm>
               <insr iid="I13"/>
               <email>mellis@im.wustl.edu</email>
            </au>
            <au id="A29">
               <snm>Olopade</snm>
               <mi>I</mi>
               <fnm>Olufunmilayo</fnm>
               <insr iid="I8"/>
               <email>folopade@medicine.bsd.uchicago.edu</email>
            </au>
            <au id="A30">
               <snm>Bernard</snm>
               <mi>S</mi>
               <fnm>Philip</fnm>
               <insr iid="I12"/>
               <email>phil.bernard@hci.utah.edu</email>
            </au>
            <au id="A31" ca="yes">
               <snm>Perou</snm>
               <mi>M</mi>
               <fnm>Charles</fnm>
               <insr iid="I1"/>
               <insr iid="I2"/>
               <insr iid="I5"/>
               <email>cperou@med.unc.edu</email>
            </au>
         </aug>
         <insg>
            <ins id="I1">
               <p>Lineberger Comprehensive Cancer Center, University of North Carolina, Chapel Hill, NC 27599, USA</p>
            </ins>
            <ins id="I2">
               <p>Department of Genetics, University of North Carolina, Chapel Hill, NC 27599, USA</p>
            </ins>
            <ins id="I3">
               <p>Department of Statistics and Operations Research, University of North Carolina, Chapel Hill, NC 27599, USA</p>
            </ins>
            <ins id="I4">
               <p>Department of Biostatistics, University of North Carolina, Chapel Hill, NC 27599, USA</p>
            </ins>
            <ins id="I5">
               <p>Department of Pathology and Laboratory Medicine, University of North Carolina, Chapel Hill, NC 27599, USA</p>
            </ins>
            <ins id="I6">
               <p>Department of Medicine, University of North Carolina, Chapel Hill, NC 27599, USA</p>
            </ins>
            <ins id="I7">
               <p>Constella Health Sciences, 2605 Meridian Parkway, Durham, NC 27713, USA</p>
            </ins>
            <ins id="I8">
               <p>Section of Hematology/Oncology, Department of Medicine, Committees on Genetics and Cancer Biology, University of Chicago, 5841 South Maryland Avenue, Chicago, IL 60637-1463, USA</p>
            </ins>
            <ins id="I9">
               <p>Department of Pathology, Thomas Jefferson University, 132 South 10th Street Philadelphia, PA 19107, USA</p>
            </ins>
            <ins id="I10">
               <p>The ARUP Institute for Clinical and Experimental Pathology, 500 Chipeta Way, Salt Lake City, Utah 84108, USA</p>
            </ins>
            <ins id="I11">
               <p>Department of Surgery, University of Utah School of Medicine, 30 N 1900 E, Salt Lake City, Utah 84132, USA</p>
            </ins>
            <ins id="I12">
               <p>Department of Pathology, University of Utah School of Medicine, 30 N 1900 E, Salt Lake City, Utah 84132, USA</p>
            </ins>
            <ins id="I13">
               <p>Department of Medicine, Division of Oncology, Washington University School of Medicine and Siteman Cancer Center, St Louis, Missouri, USA</p>
            </ins>
         </insg>
         <source>BMC Genomics</source>
         <issn>1471-2164</issn>
         <pubdate>2006</pubdate>
         <volume>7</volume>
         <issue>1</issue>
         <fpage>96</fpage>
         <url>http://www.biomedcentral.com/1471-2164/7/96</url>
         <xrefbib>
            <pubidlist>
               <pubid idtype="pmpid">16643655</pubid>
               <pubid idtype="doi">10.1186/1471-2164-7-96</pubid>
            </pubidlist>
         </xrefbib>
      </bibl>
      <history>
         <rec>
            <date>
               <day>15</day>
               <month>2</month>
               <year>2006</year>
            </date>
         </rec>
         <acc>
            <date>
               <day>27</day>
               <month>4</month>
               <year>2006</year>
            </date>
         </acc>
         <pub>
            <date>
               <day>27</day>
               <month>4</month>
               <year>2006</year>
            </date>
         </pub>
      </history>
      <cpyrt>
         <year>2006</year>
         <collab>Hu et al; licensee BioMed Central Ltd.</collab>
         <note>This is an Open Access article distributed under the terms of the Creative Commons Attribution License (<url>http://creativecommons.org/licenses/by/2.0</url>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.</note>
      </cpyrt>
      <abs>
         <sec>
            <st>
               <p>Abstract</p>
            </st>
            <sec>
               <st>
                  <p>Background</p>
               </st>
               <p>Validation of a novel gene expression signature in independent data sets is a critical step in the development of a clinically useful test for cancer patient risk-stratification. However, validation is often unconvincing because the size of the test set is typically small. To overcome this problem we used publicly available breast cancer gene expression data sets and a novel approach to data fusion, in order to validate a new breast tumor intrinsic list.</p>
            </sec>
            <sec>
               <st>
                  <p>Results</p>
               </st>
               <p>A 105-tumor training set containing 26 sample pairs was used to derive a new breast tumor intrinsic gene list. This intrinsic list contained 1300 genes and a proliferation signature that was not present in previous breast intrinsic gene sets. We tested this list as a survival predictor on a data set of 311 tumors compiled from three independent microarray studies that were fused into a single data set using Distance Weighted Discrimination. When the new intrinsic gene set was used to hierarchically cluster this combined test set, tumors were grouped into LumA, LumB, Basal-like, HER2+/ER-, and Normal Breast-like tumor subtypes that we demonstrated in previous datasets. These subtypes were associated with significant differences in Relapse-Free and Overall Survival. Multivariate Cox analysis of the combined test set showed that the intrinsic subtype classifications added significant prognostic information that was independent of standard clinical predictors. From the combined test set, we developed an objective and unchanging classifier based upon five intrinsic subtype mean expression profiles (i.e. centroids), which is designed for single sample predictions (SSP). The SSP approach was applied to two additional independent data sets and consistently predicted survival in both systemically treated and untreated patient groups.</p>
            </sec>
            <sec>
               <st>
                  <p>Conclusion</p>
               </st>
               <p>This study validates the "breast tumor intrinsic" subtype classification as an objective means of tumor classification that should be translated into a clinical assay for further retrospective and prospective validation. In addition, our method of combining existing data sets can be used to robustly validate the potential clinical value of any new gene expression profile.</p>
            </sec>
         </sec>
      </abs>
   </fm>
   <bdy>
      <sec>
         <st>
            <p>Background</p>
         </st>
         <p>The classification of human tumors using microarray data has been an area of intense research, but it remains a daunting task to validate a new profile and generate a clinically useful test. Many different gene expression-based predictors have been developed for breast cancer <abbrgrp><abbr bid="B1">1</abbr><abbr bid="B2">2</abbr><abbr bid="B3">3</abbr><abbr bid="B4">4</abbr><abbr bid="B5">5</abbr><abbr bid="B6">6</abbr><abbr bid="B7">7</abbr><abbr bid="B8">8</abbr><abbr bid="B9">9</abbr></abbrgrp>, and two different gene expression predictors have reached the final step of prospective clinical trial testing <abbrgrp><abbr bid="B10">10</abbr><abbr bid="B11">11</abbr></abbrgrp>. Using cDNA microarrays, we previously identified five distinct subtypes of breast tumors arising from at least two distinct cell types (basal-like and luminal epithelial cells) <abbrgrp><abbr bid="B1">1</abbr><abbr bid="B2">2</abbr><abbr bid="B3">3</abbr></abbrgrp>. This molecular taxonomy was based upon an "intrinsic" gene set, which was identified using a supervised analysis to select genes that showed little variance within repeated samplings of the same tumor, but which showed high variance across tumors <abbrgrp><abbr bid="B1">1</abbr></abbrgrp>. We showed that an intrinsic gene set reflects the stable biological properties of tumors and typically identifies distinct tumor subtypes that have prognostic significance, even though no knowledge of outcome was used to derive this gene set <abbrgrp><abbr bid="B3">3</abbr><abbr bid="B12">12</abbr><abbr bid="B13">13</abbr><abbr bid="B14">14</abbr></abbrgrp>.</p>
         <p>A major challenge for microarray studies, especially those with clinical implications, is validation <abbrgrp><abbr bid="B15">15</abbr><abbr bid="B16">16</abbr></abbrgrp>. Due to the practical barriers of cost and access to large numbers of fresh frozen tumor samples with associated clinical information, very few microarray studies have analyzed enough samples to allow promising initial findings to be sufficiently validated to justify the major investment required for clinical testing. An efficient approach would be to use public gene expression data repositories as test sets; however, it has been difficult to compare and/or combine data sets from independent laboratories due to differences in sample preparation, experimental design, and microarray platforms. An accepted method for validation is to derive a prognostic/predictive gene set from a "training set" and then apply it to a completely independent "test set" <abbrgrp><abbr bid="B17">17</abbr></abbrgrp>. The "purest" test sets are comprised of samples not generated by the primary investigators to remove any possibility of bias <abbrgrp><abbr bid="B18">18</abbr></abbrgrp>. In this study, we illustrate the successful application of these principles by (1) deriving a new breast tumor intrinsic gene list that identifies the "intrinsic" biological features of breast tumors and (2) validating this predictor using a combined test set of 311 breast tumor samples compiled from the public domain. These analyses show that the breast tumor intrinsic subtypes are significant predictors of outcome when correcting for standard clinical parameters, and that common patterns of expression and outcome predictions can be identified in data sets generated by independent labs.</p>
      </sec>
      <sec>
         <st>
            <p>Results</p>
         </st>
         <sec>
            <st>
               <p>Identification of the Intrinsic/UNC gene set</p>
            </st>
            <p>Our goals were to (1) create a new breast tumor intrinsic list, (2) validate this list on an independent dataset to show the clinical significance of the "intrinsic" classifications, and (3) to derive an objective "intrinsic subtype" classifier that could be used clinically (see Figure <figr fid="F1">1</figr> for overview of analyses performed). An intrinsic analysis is a "within class" versus "across classes" analysis that identifies genes that show low variability within a group (i.e. a tumor-metastasis pair), but which show high variation in expression across different tumors; in essence, one is selecting for genes that are consistently expressed when individual tumors are examined, but that vary in expression across different tumors. To develop a new breast tumor intrinsic gene set (Intrinsic/UNC), we assayed a training set of 105 breast tumor samples and 9 normal breast samples, which contained 26 sample pairs (See <supplr sid="S2">Additional file 2</supplr>, 146 microarray experiments in total), using Agilent oligo microarrays. Using the intrinsic analysis method as described in Sorlie et al. 2003<abbrgrp><abbr bid="B3">3</abbr></abbrgrp>, we identified an intrinsic gene set of 1410 microarray elements representing 1300 genes. We felt it important to create a new intrinsic list because first, we wanted to take advantage of newer microarrays (Agilent arrays with 17,000 genes vs. 8,000 gene cDNA microarrays previously used<abbrgrp><abbr bid="B3">3</abbr></abbrgrp>), and second, we wanted to use paired tumor samples that were not before-and-after chemotherapy pairs, but were instead pre-treatment tumor pairs. The Intrinsic/UNC gene set showed overlap with a previous breast tumor intrinsic gene set (108 genes in common with the Intrinsic/Stanford gene set of Sorlie et al. 2003<abbrgrp><abbr bid="B3">3</abbr></abbrgrp>), but also showed a significant increase in gene number likely due to the greater number of genes present on current microarrays.</p>
            <fig id="F1">
               <title>
                  <p>Figure 1</p>
               </title>
               <caption>
                  <p>Overview of the analysis methods and datasets used in this paper</p>
               </caption>
               <text>
                  <p>Overview of the analysis methods and datasets used in this paper.</p>
               </text>
               <graphic file="1471-2164-7-96-1"/>
            </fig>
            <suppl id="S2">
               <title>
                  <p>Additional File 2</p>
               </title>
               <text>
                  <p>Supplemental Table 1. Clinical and microarray information associated with each patient in the 105-sample training dataset.</p>
               </text>
               <file name="1471-2164-7-96-S2.pdf">
                  <p>Click here for file</p>
               </file>
            </suppl>
         </sec>
         <sec>
            <st>
               <p>Validation of the Intrinsic/UNC gene list</p>
            </st>
            <p>To evaluate the Intrinsic/UNC gene set on an independent test dataset, we applied it to a "combined test set" of 315 breast samples (311 tumors and 4 normal breast samples) using hierarchical clustering methods as have been done previously <abbrgrp><abbr bid="B1">1</abbr><abbr bid="B2">2</abbr><abbr bid="B3">3</abbr></abbrgrp>. The "combined test set" of 315 breast samples was a single data set created by combining together the data from Sorlie <it>et al</it>. 2001 and 2003 (cDNA microarrays)<abbrgrp><abbr bid="B2">2</abbr><abbr bid="B3">3</abbr></abbrgrp>, van't Veer <it>et al</it>. 2002 (custom Agilent oligo microarrays)<abbrgrp><abbr bid="B5">5</abbr></abbrgrp> and Sotiriou <it>et al.</it>2003 (cDNA microarrays)<abbrgrp><abbr bid="B19">19</abbr></abbrgrp>. We created a single data table of these three sets by first identifying the common genes present across all three microarray data sets (2800 genes). Next, we used Distance Weighted Discrimination (DWD) to combine these three data sets together <abbrgrp><abbr bid="B20">20</abbr></abbrgrp>. DWD is a multivariate analysis tool that is able to identify systematic biases present in separate data sets and then make a global adjustment to compensate for these biases; in essence, each separate data set is a multi-dimensional cloud of data points, and DWD takes two points clouds and shifts one such that it more optimally overlaps the other. Finally, we determined that 306 of the 1300 unique Intrinsic/UNC genes were present in the combined test set and performed a hierarchical clustering analysis of these 306 genes and 315 samples (Figure <figr fid="F2">2</figr>; see <supplr sid="S1">Additional file 1</supplr>, for the complete cluster diagram). We analyzed the combined test set instead of analyzing each of the 3 datasets separately because we believed this would provide more statistical power to perform multivariate analysis, and would yield more meaningful results because any finding would need to be shared/present across all 3 datasets. Remarkably, despite the loss of genes in the Intrinsic/UNC list due to the requirement of having to be present on 4 different microarray platforms, the hierarchical clustering analysis in Figure <figr fid="F2">2</figr> identified the five main subtypes/groups corresponding to the previously defined HER2+/ER-, Basal-like, LumA, LumB and Normal Breast-like tumor groups <abbrgrp><abbr bid="B2">2</abbr><abbr bid="B3">3</abbr></abbrgrp>.</p>
            <fig id="F2">
               <title>
                  <p>Figure 2</p>
               </title>
               <caption>
                  <p>Hierarchical cluster analysis of the 315-sample combined test set using the Intrinsic/UNC gene set reduced to 306 genes</p>
               </caption>
               <text>
                  <p>Hierarchical cluster analysis of the 315-sample combined test set using the Intrinsic/UNC gene set reduced to 306 genes. <b>(A) </b>Overview of complete cluster diagram. <b>(B) </b>Experimental sample-associated dendrogram. <b>(C) </b>Luminal/ER+ gene cluster with <it>GATA3</it>-regulated genes highlighted in pink. <b>(D) </b><it>HER2 </it>and <it>GRB7</it>-containing expression cluster. <b>(E) </b>Interferon-regulated cluster containing <it>STAT1</it>. <b>(F) </b>Basal epithelial cluster. <b>(G) </b>Proliferation cluster.</p>
               </text>
               <graphic file="1471-2164-7-96-2"/>
            </fig>
            <suppl id="S1">
               <title>
                  <p>Additional File 1</p>
               </title>
               <text>
                  <p>Supplemental Figure 1. Complete hierarchical cluster diagram of the 315-sample combined test set analyzed using the Intrinsic/UNC gene set, which was reduced to 306 genes based upon the gene overlap between datasets. Sorlie <it>et al. </it>sample names begin with the letters "BC", Sotiriou <it>et al. </it>sample names begin with "Exp", and van't Veer <it>et al. </it>sample names begin with "sample".</p>
               </text>
               <file name="1471-2164-7-96-S1.pdf">
                  <p>Click here for file</p>
               </file>
            </suppl>
            <p>As shown in previous studies, a HER2+ expression cluster was observed in the cluster analysis of the "combined test set" and contained multiple genes from the 17q11 amplicon including <it>HER2/ERBB2 </it>and <it>GRB7 </it>(Figure <figr fid="F2">2D</figr>). The HER2+ intrinsic subtype (pink dendrogram branch in Figure <figr fid="F2">2B</figr>) was predominantly ER-negative (i.e. HER2+/ER-) as previously shown. A Basal-like expression cluster was also present and contained genes (i.e. <it>c-KIT, FOXC1 </it>and <it>P-Cadherin</it>) previously identified to be characteristic of basal epithelial cells (Figure <figr fid="F2">2F</figr>). Using the program EASE<abbrgrp><abbr bid="B21">21</abbr></abbrgrp>, the Gene Ontology (GO) categories "extracellular space" and "extracellular region" were over-represented relative to chance in the Basal epithelial gene cluster. As shown in previous studies, a Luminal/ER+ expression cluster was present and contained <it>ER</it>, <it>XBP1</it>, <it>FOXA1 </it>and <it>GATA3 </it>(Figure <figr fid="F2">2C</figr>). <it>GATA3 </it>has recently been shown to be somatically mutated in some ER+ breast tumors, and some of the genes in Figure <figr fid="F2">2C</figr> are <it>GATA3</it>-regulated (<it>FOXA1 </it>and <it>TFF3</it>)<abbrgrp><abbr bid="B22">22</abbr></abbrgrp>, thus showing the functional clustering of a transcription factor and some of its direct targets. The Gene Ontology (GO) categories "transcription regulator activity" and "DNA binding" were over-represented relative to chance in the Luminal/ER+ gene cluster.</p>
            <p>The most significant difference between the previous Intrinsic/Stanford gene lists and the new Intrinsic/UNC gene list was that the latter contained a large proliferation signature (Figure <figr fid="F2">2G</figr>) <abbrgrp><abbr bid="B23">23</abbr><abbr bid="B24">24</abbr><abbr bid="B25">25</abbr></abbrgrp>. As expected, EASE analysis showed that the GO categories "mitotic cell cycle" and "M phase" were over-represented relative to chance in the proliferation signature. The inclusion of proliferation genes in the Intrinsic/UNC gene set, but not in the Intrinsic/Stanford gene set, is likely due to the fact that the Intrinsic/Stanford lists were based upon before-and-after chemotherapy paired samples of the same tumor, while the Intrinsic/UNC list was based upon paired samples taken at the same time point with respect to chemotherapy (22/26 were pre-treatment pairs). This finding suggests that tumor cell proliferation rates do vary before and after chemotherapy, but that proliferation is a reproducible and intrinsic feature of a tumor's expression profile.</p>
            <p>A possible new tumor group (IFN) characterized by the high expression of Interferon (IFN)-regulated genes was observed in the combined test set analysis (Figure <figr fid="F2">2E</figr>). According to EASE, the GO categories "immune response" and "defense response" were over-represented relative to chance in the interferon-regulated gene cluster. This cluster contained <it>STAT1</it>, which is thought to be the transcription factor responsible for mediating IFN-regulation of gene expression <abbrgrp><abbr bid="B26">26</abbr><abbr bid="B27">27</abbr></abbrgrp>. Genes in the IFN cluster have been linked to lymph node metastasis and poor prognosis <abbrgrp><abbr bid="B7">7</abbr><abbr bid="B13">13</abbr></abbrgrp>. In summary, the Intrinsic/UNC list contained more genes than previous lists, encompasses most features of the Intrinsic/Stanford list (i.e. Basal, Luminal/ER+, and HER2-amplicon gene clusters) and adds the biologically and clinically relevant proliferation signature.</p>
         </sec>
         <sec>
            <st>
               <p>Tumor subtypes identified by the Intrinsic/UNC gene set are predictive of outcome</p>
            </st>
            <p>To determine how many biologically relevant tumor subtypes/groups might be present within the cluster in Figure <figr fid="F2">2</figr>, we used 3 criteria, which resulted in the identification of 6 potential subtypes/groups. The first criterion was the simple and obvious dendrogram branching pattern (Figure <figr fid="F2">2B</figr>) suggesting six groups. Second was the observation that each of the six groups uniquely expressed distinct sets of known biologically relevant genes including the basal, luminal/ER+, HER2-amplicon, IFN-regulated, and proliferation-associated signatures. Third was our knowledge of the previous classifications made by the Sorlie et al. 2003 Intrinsic/Stanford list of the Stanford/Norway samples (these samples are identified in <supplr sid="S1">Additional file 1</supplr>): there was a high concordance (78%) between the classification of these samples made using either the Sorlie et al. 2003 Intrinsic/Stanford list or the Intrinsic/UNC list (excluding the IFN samples). Therefore, the 311 tumors/patients were stratified into six groups, and we proceeded to look for differences in outcomes and associations with other clinical parameters between these six groups. The Intrinsic/UNC gene set identified tumor groups/subtypes that were predictive of Relapse-Free Survival (RFS, Figure <figr fid="F3">3A</figr>) and Overall Survival (OS, p = 0.000001, data not shown) in Kaplan-Meier survival analysis on the combined test set. As previously seen in Sorlie et al. (2001 and 2003), the LumA group had the best outcome while the HER2+/ER-, Basal-like, and LumB groups had significantly worse outcomes. The new IFN class had a Kaplan-Meier survival curve similar to that of LumB, and both showed elevated proliferation rates when compared to LumA (Figure <figr fid="F2">2G</figr>).</p>
            <fig id="F3">
               <title>
                  <p>Figure 3</p>
               </title>
               <caption>
                  <p>Kaplan-Meier survival curves of breast tumors classified by intrinsic subtype</p>
               </caption>
               <text>
                  <p>Kaplan-Meier survival curves of breast tumors classified by intrinsic subtype. Survival curves are shown for <b>(A) </b>the 315-sample combined test set classified by hierarchical clustering using the Intrinsic/UNC gene set and <b>(B) </b>the 60-sample Ma et al., <b>(C) </b>96-sample Chang et al., and <b>(D) </b>105-sample (used to derive the Intrinsic/UNC gene set) datasets classified by the Nearest-Centroid predictor (Single Sample Predictor).</p>
               </text>
               <graphic file="1471-2164-7-96-3"/>
            </fig>
            <p>In the combined test set, the standard clinical parameters of ER status, node status, grade, and tumor size (note: data for clinical HER2 status was not available) were significant predictors of RFS using Kaplan-Meier analysis (Figure <figr fid="F4">4</figr>), thus showing that the act of combining three different patient sets together did not destroy the prognostic abilities of these standard markers. In a multivariate Cox proportional hazards analysis of the combined test set using these standard clinical parameters, size, grade and ER status were significant predictors of RFS (Table <tblr tid="T1">1A</tblr>).</p>
            <fig id="F4">
               <title>
                  <p>Figure 4</p>
               </title>
               <caption>
                  <p>Kaplan-Meier survival curves using RFS as the endpoint, for the common clinical parameters present within the 315-sample combined test set</p>
               </caption>
               <text>
                  <p>Kaplan-Meier survival curves using RFS as the endpoint, for the common clinical parameters present within the 315-sample combined test set. Survival curves are shown for <b>(A) </b>ER status, <b>(B) </b>node status, <b>(C) </b>histologic grade (1 = well-differentiated, 2 = intermediate, 3 = poor), and <b>(D) </b>tumor size (1 = diameter of 2 cm or less; 2 = diameter greater than 2 cm and less than or equal to 5 cm; 3 = diameter greater than 5 cm; 4 = any size with direct extension to chest wall or skin).</p>
               </text>
               <graphic file="1471-2164-7-96-4"/>
            </fig>
            <tbl id="T1">
               <title>
                  <p>Table 1</p>
               </title>
               <caption>
                  <p>Multivariate Cox proportional hazards analysis of (A) standard clinical factors alone, or with (B) the Intrinsic Subtypes in relation to Relapse-Free Survival for the 315-sample combined test set. Size was a binary variable (0 = diameter of 2 cm or less, 1 = greater than 2 cm); node status was a binary variable (0 = no positive nodes, 1 = one or more positive nodes); age was a continuous variable formatted as decade-years. Hazard ratios for Intrinsic Subtypes were calculated relative to the Luminal A subtype. Variables found to be significant (p &lt; 0.05) in the Cox proportional hazards model are shown in bold.</p>
               </caption>
               <tblbdy cols="3">
                  <r>
                     <c ca="left">
                        <p>
                           <b>A.</b>
                        </p>
                     </c>
                     <c cspan="2" ca="left">
                        <p>
                           <b>Relapse-Free survival</b>
                        </p>
                     </c>
                  </r>
                  <r>
                     <c cspan="3">
                        <hr/>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <b>Variable</b>
                        </p>
                     </c>
                     <c ca="left">
                        <p>
                           <b>Hazard Ratio (95% CI)</b>
                        </p>
                     </c>
                     <c ca="left">
                        <p>
                           <b>p-value</b>
                        </p>
                     </c>
                  </r>
                  <r>
                     <c cspan="3">
                        <hr/>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>Age, per decade</p>
                     </c>
                     <c ca="left">
                        <p>1.04 (0.90&#8211;1.20)</p>
                     </c>
                     <c ca="left">
                        <p>0.64</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>ER status</p>
                     </c>
                     <c ca="left">
                        <p><b>0.59 </b>(0.41&#8211;0.83)</p>
                     </c>
                     <c ca="left">
                        <p>
                           <b>0.003</b>
                        </p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>Node status</p>
                     </c>
                     <c ca="left">
                        <p>1.41 (0.98&#8211;2.04)</p>
                     </c>
                     <c ca="left">
                        <p>0.07</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>Tumor grade 2 <it>vs</it>. 1</p>
                     </c>
                     <c ca="left">
                        <p><b>2.41 </b>(1.08&#8211;5.36)</p>
                     </c>
                     <c ca="left">
                        <p>
                           <b>0.032</b>
                        </p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>Tumor grade 3 <it>vs</it>. 1</p>
                     </c>
                     <c ca="left">
                        <p><b>3.98 </b>(1.80&#8211;8.82)</p>
                     </c>
                     <c ca="left">
                        <p>
                           <b>0.0007</b>
                        </p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>Size</p>
                     </c>
                     <c ca="left">
                        <p><b>1.60 </b>(1.31&#8211;1.95)</p>
                     </c>
                     <c ca="left">
                        <p>
                           <b>&lt;0.0001</b>
                        </p>
                     </c>
                  </r>
                  <r>
                     <c cspan="3">
                        <hr/>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <b>B.</b>
                        </p>
                     </c>
                     <c cspan="2" ca="left">
                        <p>
                           <b>Relapse-Free survival</b>
                        </p>
                     </c>
                  </r>
                  <r>
                     <c cspan="3">
                        <hr/>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <b>Variable</b>
                        </p>
                     </c>
                     <c ca="left">
                        <p>
                           <b>Hazard Ratio (95% CI)</b>
                        </p>
                     </c>
                     <c ca="left">
                        <p>
                           <b>p-value</b>
                        </p>
                     </c>
                  </r>
                  <r>
                     <c cspan="3">
                        <hr/>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>Age, per decade</p>
                     </c>
                     <c ca="left">
                        <p>1.08 (0.94&#8211;1.24)</p>
                     </c>
                     <c ca="left">
                        <p>0.29</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>ER status</p>
                     </c>
                     <c ca="left">
                        <p>0.69 (0.42&#8211;1.13)</p>
                     </c>
                     <c ca="left">
                        <p>0.14</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>Node status</p>
                     </c>
                     <c ca="left">
                        <p>1.35 (0.92&#8211;1.98)</p>
                     </c>
                     <c ca="left">
                        <p>0.13</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>Tumor grade 2 <it>vs</it>. 1</p>
                     </c>
                     <c ca="left">
                        <p>1.88 (0.82&#8211;4.32)</p>
                     </c>
                     <c ca="left">
                        <p>0.14</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>Tumor grade 3 <it>vs</it>. 1</p>
                     </c>
                     <c ca="left">
                        <p><b>2.58 </b>(1.08&#8211;6.12)</p>
                     </c>
                     <c ca="left">
                        <p>
                           <b>0.03</b>
                        </p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>Size</p>
                     </c>
                     <c ca="left">
                        <p><b>1.59 </b>(1.30&#8211;1.95)</p>
                     </c>
                     <c ca="left">
                        <p>
                           <b>&lt;0.0001</b>
                        </p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>Basal-like vs. LumA</p>
                     </c>
                     <c ca="left">
                        <p><b>2.02 </b>(1.05&#8211;3.90)</p>
                     </c>
                     <c ca="left">
                        <p>
                           <b>0.036</b>
                        </p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>HER2+/ER- vs. LumA</p>
                     </c>
                     <c ca="left">
                        <p><b>3.47 </b>(1.78&#8211;6.76)</p>
                     </c>
                     <c ca="left">
                        <p>
                           <b>0.0003</b>
                        </p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>LumB vs. LumA</p>
                     </c>
                     <c ca="left">
                        <p><b>1.92 </b>(1.07&#8211;3.45)</p>
                     </c>
                     <c ca="left">
                        <p>
                           <b>0.028</b>
                        </p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>IFN vs. LumA</p>
                     </c>
                     <c ca="left">
                        <p>1.40 (0.67&#8211;2.91)</p>
                     </c>
                     <c ca="left">
                        <p>0.37</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>Normal-like vs. LumA</p>
                     </c>
                     <c ca="left">
                        <p>1.56 (0.59&#8211;4.16)</p>
                     </c>
                     <c ca="left">
                        <p>0.37</p>
                     </c>
                  </r>
               </tblbdy>
            </tbl>
            <p>To further evaluate the prognostic/predictive value of the intrinsic subtype classification, we performed multivariate Cox proportional hazards analysis of the combined test set using the six intrinsic subtypes/groups defined above and the five standard clinical parameters with RFS, OS, or DSS as the endpoint (Table <tblr tid="T1">1B</tblr> shows analysis for RFS). The intrinsic subtypes, when added to the multivariate model containing the standard clinical variables, resulted in a model significantly more predictive of RFS, OS, and DSS (p = 0.01, 0.009, and 0.04 respectively, by the likelihood-ratio test). In multivariate analysis for RFS (Table <tblr tid="T1">1B</tblr>), the Basal-like, LumB and HER2+/ER- subtypes had hazard ratios significantly greater than 1 (LumA served as the reference group), while the IFN and Normal Breast-like groups were not significant. Thus, the intrinsic subtypes classifications of LumA, LumB, Basal-like and HER2+/ER- add new and important prognostic information beyond what the standard clinical predictors provide.</p>
         </sec>
         <sec>
            <st>
               <p>Associations of the intrinsic subtypes with clinical and biological parameters</p>
            </st>
            <p>To further characterize and better understand the intrinsic subtypes, we determined whether an association existed between intrinsic subtype and grade, node status, ER status, age, and tumor size in the combined test set. Two-way contingency table analysis showed significant association between grade and subtype, with HER2+/ER- and Basal-like tumors more likely to be grade 3 (Table <tblr tid="T2">2</tblr>). The Cramer's V statistic<abbrgrp><abbr bid="B28">28</abbr></abbrgrp>, which measures the strength of association between two variables in a contingency table, indicated a substantial association (Cramer's V > 0.36) between grade and subtype. Two-way contingency table analysis did not show significant association between node status and subtype (p = 0.44), but did show significant association between ER status and subtype (p &lt; 0.0001; Cramer's V = 0.72) and between tumor size and subtype (p = 0.01; Cramer's V = 0.17). As would be expected, ER+ tumors were more likely to be LumA or LumB. As indicated by the low Cramer's V (Cramer's V &lt; 0.19 indicates a low relationship), tumor size and subtype were not strongly correlated.</p>
            <tbl id="T2">
               <title>
                  <p>Table 2</p>
               </title>
               <caption>
                  <p>Association between tumor histologic grade and intrinsic subtype in the 315-sample combined test set.</p>
               </caption>
               <tblbdy cols="6">
                  <r>
                     <c>
                        <p/>
                     </c>
                     <c cspan="5" ca="left">
                        <p>
                           <b>Intrinsic Subtype</b>
                        </p>
                     </c>
                  </r>
                  <r>
                     <c cspan="6">
                        <hr/>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <b>Two-way contingency table</b>
                        </p>
                     </c>
                     <c ca="left">
                        <p>
                           <b>LumA </b>
                        </p>
                        <p>(#of pts.)</p>
                     </c>
                     <c ca="left">
                        <p>
                           <b>LumB </b>
                        </p>
                        <p>(#of pts.)</p>
                     </c>
                     <c ca="left">
                        <p>
                           <b>IFN </b>
                        </p>
                        <p>(#of pts.)</p>
                     </c>
                     <c ca="left">
                        <p>
                           <b>HER2+/ER- </b>
                        </p>
                        <p>(#of pts.)</p>
                     </c>
                     <c ca="left">
                        <p>
                           <b>Basal-like </b>
                        </p>
                        <p>(#of pts.)</p>
                     </c>
                  </r>
                  <r>
                     <c cspan="6">
                        <hr/>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <b>Grade</b>
                        </p>
                     </c>
                     <c>
                        <p/>
                     </c>
                     <c>
                        <p/>
                     </c>
                     <c>
                        <p/>
                     </c>
                     <c>
                        <p/>
                     </c>
                     <c>
                        <p/>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>1 (well)</p>
                     </c>
                     <c ca="left">
                        <p>29</p>
                     </c>
                     <c ca="left">
                        <p>2</p>
                     </c>
                     <c ca="left">
                        <p>1</p>
                     </c>
                     <c ca="left">
                        <p>0</p>
                     </c>
                     <c ca="left">
                        <p>1</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>2 (intermediate)</p>
                     </c>
                     <c ca="left">
                        <p>45</p>
                     </c>
                     <c ca="left">
                        <p>26</p>
                     </c>
                     <c ca="left">
                        <p>8</p>
                     </c>
                     <c ca="left">
                        <p>6</p>
                     </c>
                     <c ca="left">
                        <p>16</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>3 (poor)</p>
                     </c>
                     <c ca="left">
                        <p>15</p>
                     </c>
                     <c ca="left">
                        <p>32</p>
                     </c>
                     <c ca="left">
                        <p>16</p>
                     </c>
                     <c ca="left">
                        <p>21</p>
                     </c>
                     <c ca="left">
                        <p>67</p>
                     </c>
                  </r>
                  <r>
                     <c cspan="6" ca="left">
                        <p>
                           <b>Statistics for two-way contingency table analysis</b>
                        </p>
                     </c>
                  </r>
                  <r>
                     <c cspan="6" ca="left">
                        <p>p-value&#8224; &lt;0.0001</p>
                     </c>
                  </r>
                  <r>
                     <c cspan="6" ca="left">
                        <p>Cramer's V&#8224;&#8224; 0.42</p>
                     </c>
                  </r>
               </tblbdy>
               <tblfn>
                  <p>&#8224;p-value calculated from Chi-square test on contingency table. &#8224;&#8224;Cramer's V statistic (value can range from 0 to 1) measures the strength of association between the two variables analyzed in the contingency table, with 1 indicating perfect association and 0 indicating no association.</p>
               </tblfn>
            </tbl>
            <p>To determine association between age and subtype, we used an unpaired Student's t-test to compare the average ages of diagnosis of each tumor subtype. Interestingly, the average age of diagnosis for HER2+/ER- tumors was significantly less than that for all other tumor types. The average age of diagnosis for LumA tumors was significantly greater than that for LumB tumors.</p>
         </sec>
         <sec>
            <st>
               <p>Derivation and application of a Single Sample Predictor</p>
            </st>
            <p>A caveat to the above analyses is that our classifications were based upon hierarchical clustering, which is a powerful tool for intrinsic class discovery, but which is not suited for individual sample classification because to classify a new sample would require a reanalysis of all samples. Therefore, we wanted to create an unchanging and objective method to classify tumors according to intrinsic subtype that could be clinically applicable. To this end, we developed a Single Sample Predictor (SSP) using the combined test set hierarchically clustered using the 306 Intrinsic/UNC genes (Figure <figr fid="F1">1</figr>). For the SSP, a mean expression profile (i.e. centroid) was created for each subtype that was significant in the multivariate analysis (LumA, LumB, Basal-like, HER2+/ER-) and for the Normal Breast-like group using the combined test set (Figure <figr fid="F2">2</figr>). Next, any new sample is then compared to each Centroid and assigned by the SSP to the nearest subtype/centroid as determined by Spearman correlation (note: this SSP is based on methods developed by Tibshirani and colleagues<abbrgrp><abbr bid="B3">3</abbr><abbr bid="B29">29</abbr><abbr bid="B30">30</abbr></abbrgrp>); thus, the SSP contains five different idealized profiles, and any new sample is compared to each of the five profiles and assigned a profile label (i.e. subtype name) based upon the single idealized profile it most resembled.</p>
            <p>To validate the SSP, we tested it on two additional datasets not used previously. The first was the 60-patient Ma et al. dataset, which represents a group of early stage ER+ tamoxifen-treated patients <abbrgrp><abbr bid="B6">6</abbr></abbrgrp>. The SSP classified these samples as follows: 2 Basal-like, 2 HER2+/ER-, 12 Normal Breast-like, 34 LumA, and 9 LumB. The 2 Basal-like and 2 HER2+/ER- assigned samples were excluded from a survival analysis because they were too few for a meaningful survival analysis and possibly were misclassified ER-negative tumors. Among the remaining samples the SSP classification was a significant predictor of RFS (p = 0.04, Figure <figr fid="F3">3B</figr>), due to the poor outcome of the LumB group. Next, we applied the SSP to a 96-sample test set of local only (surgery)-treated patients from Chang et al. <abbrgrp><abbr bid="B31">31</abbr></abbrgrp>. The tumor groups identified by the SSP showed significant differences in RFS (Figure <figr fid="F3">3C</figr>, p = 0.0006) and OS (p = 0.001, data not shown) in Kaplan-Meier analysis, with the poor outcome groups as expected: LumB, Basal-like, and HER2+/ER-. Thus, the SSP identified tumor groups that are truly prognostic and have significantly different outcomes as was seen before: namely, LumA always has the most favorable outcome, while LumB, Basal-like and HER2+/ER- do poorly<abbrgrp><abbr bid="B2">2</abbr><abbr bid="B3">3</abbr><abbr bid="B9">9</abbr><abbr bid="B19">19</abbr></abbrgrp>.</p>
            <p>We also applied the SSP onto the 105-sample dataset used to derive the Intrinsic/UNC gene list, which is technically not a test set for the SSP because it was used to derive the Intrinsic/UNC gene set. The tumor groups identified by the SSP showed significantly different RFS (Figure <figr fid="F3">3D</figr>, p = 0.02) and OS (p = 0.03, data not shown) in Kaplan-Meier analysis with the poor outcome groups again being LumB, Basal-like, and HER2+/ER-. A subset of the 105-sample dataset (48 in total) had been previously characterized using an immunohistochemical (IHC) analysis<abbrgrp><abbr bid="B32">32</abbr></abbrgrp>, which showed that (1) all 18 Basal-like tumors were ER-negative and HER2-negative (defined as not having a 3+ score on HER2 IHC analysis), (2) all 18 luminal subtype tumors were ER-positive and HER2-negative, and (3) all 12 HER2+/ER- subtype tumors were ER-negative and 11 out of these 12 showed HER2-overexpression (defined as having a 3+ score on HER2 IHC analysis). Thus, the SSP correlated with many standard clinical parameters, and was also able to identify clinically relevant groups (i.e. LumA vs. LumB) not identifiable using the standard clinical assays, thus indicating potential value as an objective classification method that should be developed further as a clinically applicable test.</p>
         </sec>
      </sec>
      <sec>
         <st>
            <p>Discussion</p>
         </st>
         <p>The development and validation of gene sets for cancer patients requires significant resources because large training and test sets are required to achieve robust results. In fact, microarray studies are often criticized for a lack of rigorous validation due to small sample sizes <abbrgrp><abbr bid="B17">17</abbr><abbr bid="B18">18</abbr></abbrgrp>. Therefore, we utilized a previously described microarray data set combining method (Distance Weighted Discrimination) to create a large validation test set of over 300 tumors, and used it to validate a newly derived gene list for breast cancer prognostication and prediction. This approach allowed us to perform a multivariate analysis in which we show for the first time that the intrinsic subtype classification adds valuable information in the presence of five standard clinical parameters. We believe this combined test set is a valid test set for use in our analysis because after the multiple data sets were combined, the prognostic abilities of the standard clinical variables such as ER and grade remained intact.</p>
         <p>The remarkable power of our DWD-based approach is indicated by the fact that although samples came from different platforms, hierarchical clustering analysis of the combined data set managed to group samples and genes based upon biology, and not some artifact caused by combining the data sets together. Evidence that this grouping reflected biology and not some artifact comes from (1) the finding that various Gene Ontology terms were significantly over-represented relative to chance in individual gene clusters seen in this analysis and (2) the groupings of the samples showed inter-dataset mixing and were significant predictors of outcome in univariate Kaplan-Meier and multivariate Cox analysis. It is also remarkable that this classification was successful in predicting outcome despite the fact that the Intrinsic/UNC gene set was reduced from 1300 genes to 306 genes in the combined test set; this indicates the robust nature of the intrinsic subtypes as defined by the new Intrinsic/UNC gene list.</p>
         <p>One of the accomplishments of this manuscript was to develop an unchanging and objective intrinsic subtype predictor that could be used routinely in the clinical setting. This was accomplished by first identifying a new intrinsic gene set and then using this set to develop the Single Sample Predictor (SSP) that was shown here to be both prognostic on the local therapy-only patient subset from Chang et al. <abbrgrp><abbr bid="B31">31</abbr></abbrgrp> and predictive of outcomes on the ER+ tamoxifen-treated data set of Ma et al. <abbrgrp><abbr bid="B6">6</abbr></abbrgrp>. Many other gene expression based predictors for breast cancer patients have been developed, and in a complementary publication<abbrgrp><abbr bid="B33">33</abbr></abbrgrp>, we tested the intrinsic subtype SSP developed here, relative to those predictions made by four other previously published breast cancer prognostic/predictive gene sets using a single patient/tumor set of 295 cases; the four other expression-based predictors used were (1) the "70-gene" Good <it>vs</it>. Poor outcome predictor developed by van't Veer and colleagues<abbrgrp><abbr bid="B5">5</abbr><abbr bid="B11">11</abbr></abbrgrp>, (2) the "Wound-Response" profile developed by Chang et al<abbrgrp><abbr bid="B31">31</abbr><abbr bid="B34">34</abbr></abbrgrp>, (3) the "Recurrence Score (RS)" profile developed by Paik et al. <abbrgrp><abbr bid="B10">10</abbr></abbrgrp>, and (4) the 2-gene (HOXB13:IL17BR) ratio predictor developed by Ma et al. <abbrgrp><abbr bid="B6">6</abbr></abbrgrp>. The results showed that of samples classified as Basal-like, HER2+/ER-, or LumB by the SSP, 93&#8211;100% were classified by the 70-gene, RS and Wound-Response predictors as being in each predictor's bad prognosis group. These data suggest that a high concordance exists across these multiple predictors, in particular the RS, 70-gene and Intrinsic Subtypes; thus, the new intrinsic gene list and classification method developed here, when compared to other predictors as accomplished in Fan et al. <abbrgrp><abbr bid="B33">33</abbr></abbrgrp>, showed that a high concordance across predictors exists, which provides additional validation for each predictor.</p>
      </sec>
      <sec>
         <st>
            <p>Conclusion</p>
         </st>
         <p>The results of this study advance our current knowledge of the intrinsic breast tumor subtypes and provides an objective method (SSP) for prospectively classifying tumors that could be used in the clinical setting. More broadly speaking, our findings show that while the individual brushstrokes (<it>i.e</it>. genes) may sometimes show discordance across data sets, the portraits created by the combined patterns of the individual brushstrokes is conserved and recognizable across datasets because of the similarities to the family portrait <abbrgrp><abbr bid="B24">24</abbr></abbrgrp>. Moreover, these data show that the breast tumor intrinsic subtypes identified using the Intrinsic/UNC gene list can be generalized to many different patient sets, both treated and untreated.</p>
      </sec>
      <sec>
         <st>
            <p>Methods</p>
         </st>
         <sec>
            <st>
               <p>Sample collection, RNA isolation and microarray hybridization</p>
            </st>
            <p>105 fresh frozen breast tumor samples and 9 normal breast samples were obtained using IRB-approved protocols at 4 institutions: the University of North Carolina at Chapel Hill (UNC-CH), The University of Utah, Thomas Jefferson University, and the University of Chicago. This sample set represents an ethnically and geographically diverse cohort. <supplr sid="S2">Additional file 2</supplr> contains clinical data for these samples. Patients were heterogeneously treated according to the standard of care dictated by disease stage, ER and HER2 status.</p>
            <p>Total RNA was purified from each sample using the Qiagen RNAeasy Kit. RNA integrity was determined using the RNA 6000 Nano LabChip Kit and Agilent 2100 Bioanalyzer. Total RNA amplification and labeling were done as previously described in <abbrgrp><abbr bid="B35">35</abbr></abbrgrp>. Microarray hybridizations were performed using Agilent Human oligonucleotide (1Av1, 1Av2 and custom designed 1Av1-based) microarrays using 2 &#956;g of Cy3-labeled common reference sample that is a modified version of the Stratagene Human Universal Reference<abbrgrp><abbr bid="B36">36</abbr></abbrgrp>, and 2 &#956;g of Cy5-labeled experimental sample. Microarrays were hybridized overnight, washed, dried, and scanned as described in <abbrgrp><abbr bid="B35">35</abbr></abbrgrp>. The image files were analyzed with GenePix Pro 4.1 and loaded into the UNC-CH Microarray Database<abbrgrp><abbr bid="B37">37</abbr></abbrgrp> where a Lowess normalization procedure was performed to adjust the Cy3 and Cy5 channels<abbrgrp><abbr bid="B38">38</abbr></abbrgrp>. All primary microarray data associated with this study are available at <abbrgrp><abbr bid="B39">39</abbr></abbrgrp>, 2006 #2192} and in the GEO<abbrgrp><abbr bid="B40">40</abbr></abbrgrp> under the accession number of GSE1992, series GSM34424-GSM34568.</p>
         </sec>
         <sec>
            <st>
               <p>Identification of the intrinsic gene set</p>
            </st>
            <p>We derived a new breast tumor intrinsic gene set, referred to as the "Intrinsic/UNC" list, using a training set composed of the 105 tumor samples described above, 9 normal breast samples, and 26 sample pairs (in total, represented by 146 microarrays). 15, 9, and 2 of the 26 sample pairs were different physical pieces of the same tumor (taken at the same time point), tumor-metastasis pairs and normal sample pairs, respectively. The background subtracted, Lowess normalized log<sub>2 </sub>ratio of Cy5 to Cy3 intensity values were first filtered to select genes that had a signal intensity of at least 30 units above background in both the Cy5 and Cy3 channels. Only genes that met these criteria in at least 70% of the 146 microarrays were included for subsequent analysis. Next, we performed an "intrinsic" analysis as described previously<abbrgrp><abbr bid="B3">3</abbr></abbrgrp> using the 26 sample pairs and 86 additional microarrays. An intrinsic analysis identifies genes showing low variability in expression within paired samples but high variability in expression across different tumors; for each gene a ratio of "within-pair variance" to "between-subject variance" is computed. Genes with ratios below one standard deviation of the mean ratio were defined as "intrinsic". This analysis resulted in 1410 microarray elements representing 1300 genes being identified as "intrinsic". In order to obtain an estimate of the number of false-positive intrinsic genes, we permuted the sample labels to generate 26 random pairs and 86 non-paired samples. This permutation was performed 100 times and the intrinsic scores were calculated for each. These permuted scores were used to determine a threshold on the intrinsic score corresponding to a false discovery rate (FDR) less than 1%. The selected threshold resulted in 1410 microarray features being called significant with a median FDR = 0.3% and 90th percentile FDR = 0.5%. (See Tusher <it>et al. </it>for a complete description of this calculation <abbrgrp><abbr bid="B41">41</abbr></abbrgrp>).</p>
         </sec>
         <sec>
            <st>
               <p>Creation and analyses of the combined test set</p>
            </st>
            <p>The independent test set was a 315-sample "combined test set" consisting of three DNA microarray datasets (Sorlie <it>et al</it>. 2001 and 2003<abbrgrp><abbr bid="B2">2</abbr><abbr bid="B3">3</abbr></abbrgrp>, van't Veer <it>et al</it>. 2002<abbrgrp><abbr bid="B5">5</abbr></abbrgrp> and Sotiriou <it>et al. </it>2003<abbrgrp><abbr bid="B19">19</abbr></abbrgrp>). To combine these datasets obtained from different microarray platforms, we performed the following pre-processing methods. First, the R/G ratios in each dataset were log<sub>2 </sub>transformed and Lowess normalized<abbrgrp><abbr bid="B38">38</abbr></abbrgrp>. Next, missing values were k-NN imputed<abbrgrp><abbr bid="B42">42</abbr></abbrgrp>. Gene annotations from each dataset were converted into UniGene Cluster IDs (UCIDs, Build 161) using the SOURCE database<abbrgrp><abbr bid="B43">43</abbr></abbrgrp>, and multiple occurrences of a UCID were collapsed by taking the median value for that ID within each experiment and platform, which resulted in ~2800 genes having expression data in all three datasets. Next, Distance Weighted Discrimination<abbrgrp><abbr bid="B20">20</abbr></abbrgrp> was performed in a pair-wise fashion by first combining the Sorlie et al. and Sotiriou et al. datasets, and then combining this with the van't Veer et al. dataset to make a single dataset. In the final pre-processing step, each microarray experiment was normalized such that each column/experimental sample was standardized to N(0,1), and each row/gene was median centered. 306 of the 1300 Intrinsic/UNC genes had microarray data present in the combined test set and were used in a two-way average-linkage hierarchical cluster analysis <abbrgrp><abbr bid="B44">44</abbr></abbrgrp>. Cluster results were visualized using the program "Treeview".</p>
         </sec>
         <sec>
            <st>
               <p>Derivation of the Single Sample Predictor</p>
            </st>
            <p>The Single Sample Predictor (SSP) is a Nearest Centroid-based method based upon the work of Hastie and Tibshirani <abbrgrp><abbr bid="B3">3</abbr><abbr bid="B45">45</abbr><abbr bid="B46">46</abbr></abbrgrp>. Our SSP classifies an individual sample according to its nearest centroid as determined by Spearman correlation. To derive our SSP, we utilized the 315-sample combined test set from Figure<figr fid="F2">2</figr> to create centroids for each of the five intrinsic subtypes (LumA, LumB, HER2+/ER-, Basal-like and Normal Breast-like). Please note that we did not create a centroid for the IFN group because it failed significance in multivariate testing, but did create a centroid for the Normal Breast-like group because we feel it is important to be able to identify true normal samples; an H&amp;E examination of most tumor samples falling into the Normal Breast-like category shows that this is occurring mainly because of too much normal tissue contamination.</p>
            <p>To create each intrinsic subtype centroid, we averaged the gene expression profiles for samples clearly assigned to each subtype (limiting the analysis to 249 of the 315 samples) using the hierarchical clustering dendrogram as a guide (Figure <figr fid="F2">2</figr>). We then applied the SSP to two independent test datasets: (1) the Ma et al. 60-sample ER+ tamoxifen-treated tumor dataset and (2) the Chang et al. 96-sample local only-treated tumor dataset. By matching UCIDs, microarray data for as many as possible of the 306 Intrinsic/UNC genes was obtained from these 2 datasets. To remove microarray platform/source systematic biases, we applied DWD to the 2 test datasets relative to the combined test set. The SSP was then used to classify tumors by intrinsic subtype in these 2 test datasets. Using similar methods, the SSP was also applied to the 105-sample training set used to derive the intrinsic/UNC gene set.</p>
         </sec>
         <sec>
            <st>
               <p>Survival analyses</p>
            </st>
            <p>Kaplan-Meier survival plots were compared using the Cox-Mantel log-rank test in WinSTAT for Excel (R. Fitch Software). Two-way contingency table analysis and unpaired Student's t-test were done using WinSTAT. For the "combined test set", multivariate Cox proportional hazards analysis was performed using SAS (Cary, NC).</p>
         </sec>
      </sec>
      <sec>
         <st>
            <p>Authors' contributions</p>
         </st>
         <p>C.M.P. was the Principal Investigator and instigated the study, helped with design and wrote the paper, while J.P., O.I.O, and P.S.B. were the Principal Investigators at each of the three other participating institution and were involved in the study design, implementation and manuscript writing. C.F., J.S.M., B.F.Q., A.N., and J.P. were responsible for the statistical analyses and some writing. Z.H and X.H. performed all of the tumor RNA preparation and microarray experiments and were involved in the writing. J.W. and Y.L. were responsible for all data management and some data analysis. C.L. was responsible for the pathological assessment of most tumor samples and was involved in the writing. M.E. and D.O. were involved in data analysis, interpretation and writing. Tumor sample collection, clinical data acquisition and interpretation was accomplished by L.C., M.E., R.N., M.T., A.R.O., D.D., L.P., E.N., M.M., H.H., M.M., J.F.Q., L.R.S., E.R., and L.D., and it should be noted that this was separately accomplished at four institutions.</p>
      </sec>
   </bdy>
   <bm>
      <ack>
         <sec>
            <st>
               <p>Acknowledgements</p>
            </st>
            <p>C.M.P. was supported by funds from the NCI Breast SPORE program to UNC-CH (P50-CA58223-09A1), by the National Institute of Environmental Health Sciences (U19-ES11391-03) and by NCI (RO1-CA-101227-01). P.S.B. was supported by NCI R33-CA97769-01, O.I.O. by the National Institute of Environmental Health Sciences (P50 ESO12382), A.N by NSF Grant DMS 0406361, J.S.M. by NSF Grant DMS-0308331, and L.A.C. by NIH M01RR00046. The Breast Cancer Research Foundation supported C.M.P., O.I.O and L.R.S.</p>
         </sec>
      </ack>
      <refgrp>
         <bibl id="B1">
            <title>
               <p>Molecular portraits of human breast tumours</p>
            </title>
            <aug>
               <au>
                  <snm>Perou</snm>
                  <fnm>CM</fnm>
               </au>
               <au>
                  <snm>Sorlie</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Eisen</snm>
                  <fnm>MB</fnm>
               </au>
               <au>
                  <snm>van de Rijn</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Jeffrey</snm>
                  <fnm>SS</fnm>
               </au>
               <au>
                  <snm>Rees</snm>
                  <fnm>CA</fnm>
               </au>
               <au>
                  <snm>Pollack</snm>
                  <fnm>JR</fnm>
               </au>
               <au>
                  <snm>Ross</snm>
                  <fnm>DT</fnm>
               </au>
               <au>
                  <snm>Johnsen</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Akslen</snm>
                  <fnm>LA</fnm>
               </au>
               <etal/>
            </aug>
            <source>Nature</source>
            <pubdate>2000</pubdate>
            <volume>406</volume>
            <issue>6797</issue>
            <fpage>747</fpage>
            <lpage>752</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1038/35021093</pubid>
                  <pubid idtype="pmpid" link="fulltext">10963602</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B2">
            <title>
               <p>Gene expression patterns of breast carcinomas distinguish tumor subclasses with clinical implications</p>
            </title>
            <aug>
               <au>
                  <snm>Sorlie</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Perou</snm>
                  <fnm>CM</fnm>
               </au>
               <au>
                  <snm>Tibshirani</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>Aas</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Geisler</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Johnsen</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Hastie</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Eisen</snm>
                  <fnm>MB</fnm>
               </au>
               <au>
                  <snm>van de Rijn</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Jeffrey</snm>
                  <fnm>SS</fnm>
               </au>
               <etal/>
            </aug>
            <source>Proc Natl Acad Sci U S A</source>
            <pubdate>2001</pubdate>
            <volume>98</volume>
            <issue>19</issue>
            <fpage>10869</fpage>
            <lpage>10874</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">58566</pubid>
                  <pubid idtype="pmpid" link="fulltext">11553815</pubid>
                  <pubid idtype="doi">10.1073/pnas.191367098</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B3">
            <title>
               <p>Repeated observation of breast tumor subtypes in independent gene expression data sets</p>
            </title>
            <aug>
               <au>
                  <snm>Sorlie</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Tibshirani</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>Parker</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Hastie</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Marron</snm>
                  <fnm>JS</fnm>
               </au>
               <au>
                  <snm>Nobel</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Deng</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Johnsen</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Pesich</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>Geisler</snm>
                  <fnm>S</fnm>
               </au>
               <etal/>
            </aug>
            <source>Proc Natl Acad Sci U S A</source>
            <pubdate>2003</pubdate>
            <volume>100</volume>
            <issue>14</issue>
            <fpage>8418</fpage>
            <lpage>8423</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">166244</pubid>
                  <pubid idtype="pmpid" link="fulltext">12829800</pubid>
                  <pubid idtype="doi">10.1073/pnas.0932692100</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B4">
            <title>
               <p>Gene expression profiles derived from fine needle aspiration correlate with response to systemic chemotherapy in breast cancer</p>
            </title>
            <aug>
               <au>
                  <snm>Sotiriou</snm>
                  <fnm>C</fnm>
               </au>
               <au>
                  <snm>Powles</snm>
                  <fnm>TJ</fnm>
               </au>
               <au>
                  <snm>Dowsett</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Jazaeri</snm>
                  <fnm>AA</fnm>
               </au>
               <au>
                  <snm>Feldman</snm>
                  <fnm>AL</fnm>
               </au>
               <au>
                  <snm>Assersohn</snm>
                  <fnm>L</fnm>
               </au>
               <au>
                  <snm>Gadisetti</snm>
                  <fnm>C</fnm>
               </au>
               <au>
                  <snm>Libutti</snm>
                  <fnm>SK</fnm>
               </au>
               <au>
                  <snm>Liu</snm>
                  <fnm>ET</fnm>
               </au>
            </aug>
            <source>Breast Cancer Res</source>
            <pubdate>2002</pubdate>
            <volume>4</volume>
            <issue>3</issue>
            <fpage>R3</fpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">111028</pubid>
                  <pubid idtype="pmpid" link="fulltext">12052255</pubid>
                  <pubid idtype="doi">10.1186/bcr433</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B5">
            <title>
               <p>Gene expression profiling predicts clinical outcome of breast cancer</p>
            </title>
            <aug>
               <au>
                  <snm>van 't Veer</snm>
                  <fnm>LJ</fnm>
               </au>
               <au>
                  <snm>Dai</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>van de Vijver</snm>
                  <fnm>MJ</fnm>
               </au>
               <au>
                  <snm>He</snm>
                  <fnm>YD</fnm>
               </au>
               <au>
                  <snm>Hart</snm>
                  <fnm>AA</fnm>
               </au>
               <au>
                  <snm>Mao</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Peterse</snm>
                  <fnm>HL</fnm>
               </au>
               <au>
                  <snm>van der Kooy</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>Marton</snm>
                  <fnm>MJ</fnm>
               </au>
               <au>
                  <snm>Witteveen</snm>
                  <fnm>AT</fnm>
               </au>
               <etal/>
            </aug>
            <source>Nature</source>
            <pubdate>2002</pubdate>
            <volume>415</volume>
            <issue>6871</issue>
            <fpage>530</fpage>
            <lpage>536</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1038/415530a</pubid>
                  <pubid idtype="pmpid" link="fulltext">11823860</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B6">
            <title>
               <p>A two-gene expression ratio predicts clinical outcome in breast cancer patients treated with tamoxifen</p>
            </title>
            <aug>
               <au>
                  <snm>Ma</snm>
                  <fnm>XJ</fnm>
               </au>
               <au>
                  <snm>Wang</snm>
                  <fnm>Z</fnm>
               </au>
               <au>
                  <snm>Ryan</snm>
                  <fnm>PD</fnm>
               </au>
               <au>
                  <snm>Isakoff</snm>
                  <fnm>SJ</fnm>
               </au>
               <au>
                  <snm>Barmettler</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Fuller</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Muir</snm>
                  <fnm>B</fnm>
               </au>
               <au>
                  <snm>Mohapatra</snm>
                  <fnm>G</fnm>
               </au>
               <au>
                  <snm>Salunga</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>Tuggle</snm>
                  <fnm>JT</fnm>
               </au>
               <etal/>
            </aug>
            <source>Cancer Cell</source>
            <pubdate>2004</pubdate>
            <volume>5</volume>
            <issue>6</issue>
            <fpage>607</fpage>
            <lpage>616</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1016/j.ccr.2004.05.015</pubid>
                  <pubid idtype="pmpid" link="fulltext">15193263</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B7">
            <title>
               <p>Gene expression predictors of breast cancer outcomes</p>
            </title>
            <aug>
               <au>
                  <snm>Huang</snm>
                  <fnm>E</fnm>
               </au>
               <au>
                  <snm>Cheng</snm>
                  <fnm>SH</fnm>
               </au>
               <au>
                  <snm>Dressman</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Pittman</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Tsou</snm>
                  <fnm>MH</fnm>
               </au>
               <au>
                  <snm>Horng</snm>
                  <fnm>CF</fnm>
               </au>
               <au>
                  <snm>Bild</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Iversen</snm>
                  <fnm>ES</fnm>
               </au>
               <au>
                  <snm>Liao</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Chen</snm>
                  <fnm>CM</fnm>
               </au>
               <etal/>
            </aug>
            <source>Lancet</source>
            <pubdate>2003</pubdate>
            <volume>361</volume>
            <issue>9369</issue>
            <fpage>1590</fpage>
            <lpage>1596</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1016/S0140-6736(03)13308-9</pubid>
                  <pubid idtype="pmpid" link="fulltext">12747878</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B8">
            <title>
               <p>Different gene expression patterns in invasive lobular and ductal carcinomas of the breast</p>
            </title>
            <aug>
               <au>
                  <snm>Zhao</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Langerod</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Ji</snm>
                  <fnm>Y</fnm>
               </au>
               <au>
                  <snm>Nowels</snm>
                  <fnm>KW</fnm>
               </au>
               <au>
                  <snm>Nesland</snm>
                  <fnm>JM</fnm>
               </au>
               <au>
                  <snm>Tibshirani</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>Bukholm</snm>
                  <fnm>IK</fnm>
               </au>
               <au>
                  <snm>Karesen</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>Botstein</snm>
                  <fnm>D</fnm>
               </au>
               <au>
                  <snm>Borresen-Dale</snm>
                  <fnm>AL</fnm>
               </au>
               <etal/>
            </aug>
            <source>Mol Biol Cell</source>
            <pubdate>2004</pubdate>
            <volume>15</volume>
            <issue>6</issue>
            <fpage>2523</fpage>
            <lpage>2536</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">420079</pubid>
                  <pubid idtype="pmpid" link="fulltext">15034139</pubid>
                  <pubid idtype="doi">10.1091/mbc.E03-11-0786</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B9">
            <title>
               <p>Gene expression profiling identifies molecular subtypes of inflammatory breast cancer</p>
            </title>
            <aug>
               <au>
                  <snm>Bertucci</snm>
                  <fnm>F</fnm>
               </au>
               <au>
                  <snm>Finetti</snm>
                  <fnm>P</fnm>
               </au>
               <au>
                  <snm>Rougemont</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Charafe-Jauffret</snm>
                  <fnm>E</fnm>
               </au>
               <au>
                  <snm>Cervera</snm>
                  <fnm>N</fnm>
               </au>
               <au>
                  <snm>Tarpin</snm>
                  <fnm>C</fnm>
               </au>
               <au>
                  <snm>Nguyen</snm>
                  <fnm>C</fnm>
               </au>
               <au>
                  <snm>Xerri</snm>
                  <fnm>L</fnm>
               </au>
               <au>
                  <snm>Houlgatte</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>Jacquemier</snm>
                  <fnm>J</fnm>
               </au>
               <etal/>
            </aug>
            <source>Cancer Res</source>
            <pubdate>2005</pubdate>
            <volume>65</volume>
            <issue>6</issue>
            <fpage>2170</fpage>
            <lpage>2178</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1158/0008-5472.CAN-04-4115</pubid>
                  <pubid idtype="pmpid" link="fulltext">15781628</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B10">
            <title>
               <p>A multigene assay to predict recurrence of tamoxifen-treated, node-negative breast cancer</p>
            </title>
            <aug>
               <au>
                  <snm>Paik</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Shak</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Tang</snm>
                  <fnm>G</fnm>
               </au>
               <au>
                  <snm>Kim</snm>
                  <fnm>C</fnm>
               </au>
               <au>
                  <snm>Baker</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Cronin</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Baehner</snm>
                  <fnm>FL</fnm>
               </au>
               <au>
                  <snm>Walker</snm>
                  <fnm>MG</fnm>
               </au>
               <au>
                  <snm>Watson</snm>
                  <fnm>D</fnm>
               </au>
               <au>
                  <snm>Park</snm>
                  <fnm>T</fnm>
               </au>
               <etal/>
            </aug>
            <source>N Engl J Med</source>
            <pubdate>2004</pubdate>
            <volume>351</volume>
            <issue>27</issue>
            <fpage>2817</fpage>
            <lpage>2826</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1056/NEJMoa041588</pubid>
                  <pubid idtype="pmpid" link="fulltext">15591335</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B11">
            <title>
               <p>A gene-expression signature as a predictor of survival in breast cancer</p>
            </title>
            <aug>
               <au>
                  <snm>van de Vijver</snm>
                  <fnm>MJ</fnm>
               </au>
               <au>
                  <snm>He</snm>
                  <fnm>YD</fnm>
               </au>
               <au>
                  <snm>van't Veer</snm>
                  <fnm>LJ</fnm>
               </au>
               <au>
                  <snm>Dai</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Hart</snm>
                  <fnm>AA</fnm>
               </au>
               <au>
                  <snm>Voskuil</snm>
                  <fnm>DW</fnm>
               </au>
               <au>
                  <snm>Schreiber</snm>
                  <fnm>GJ</fnm>
               </au>
               <au>
                  <snm>Peterse</snm>
                  <fnm>JL</fnm>
               </au>
               <au>
                  <snm>Roberts</snm>
                  <fnm>C</fnm>
               </au>
               <au>
                  <snm>Marton</snm>
                  <fnm>MJ</fnm>
               </au>
               <etal/>
            </aug>
            <source>N Engl J Med</source>
            <pubdate>2002</pubdate>
            <volume>347</volume>
            <issue>25</issue>
            <fpage>1999</fpage>
            <lpage>2009</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1056/NEJMoa021967</pubid>
                  <pubid idtype="pmpid" link="fulltext">12490681</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B12">
            <title>
               <p>Classification of human lung carcinomas by mRNA expression profiling reveals distinct adenocarcinoma subclasses</p>
            </title>
            <aug>
               <au>
                  <snm>Bhattacharjee</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Richards</snm>
                  <fnm>WG</fnm>
               </au>
               <au>
                  <snm>Staunton</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Li</snm>
                  <fnm>C</fnm>
               </au>
               <au>
                  <snm>Monti</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Vasa</snm>
                  <fnm>P</fnm>
               </au>
               <au>
                  <snm>Ladd</snm>
                  <fnm>C</fnm>
               </au>
               <au>
                  <snm>Beheshti</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Bueno</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>Gillette</snm>
                  <fnm>M</fnm>
               </au>
               <etal/>
            </aug>
            <source>Proc Natl Acad Sci U S A</source>
            <pubdate>2001</pubdate>
            <volume>98</volume>
            <issue>24</issue>
            <fpage>13790</fpage>
            <lpage>13795</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">61120</pubid>
                  <pubid idtype="pmpid" link="fulltext">11707567</pubid>
                  <pubid idtype="doi">10.1073/pnas.191502998</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B13">
            <title>
               <p>Molecular classification of head and neck squamous cell carcinomas using patterns of gene expression</p>
            </title>
            <aug>
               <au>
                  <snm>Chung</snm>
                  <fnm>CH</fnm>
               </au>
               <au>
                  <snm>Parker</snm>
                  <fnm>JS</fnm>
               </au>
               <au>
                  <snm>Karaca</snm>
                  <fnm>G</fnm>
               </au>
               <au>
                  <snm>Wu</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Funkhouser</snm>
                  <fnm>WK</fnm>
               </au>
               <au>
                  <snm>Moore</snm>
                  <fnm>D</fnm>
               </au>
               <au>
                  <snm>Butterfoss</snm>
                  <fnm>D</fnm>
               </au>
               <au>
                  <snm>Xiang</snm>
                  <fnm>D</fnm>
               </au>
               <au>
                  <snm>Zanation</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Yin</snm>
                  <fnm>X</fnm>
               </au>
               <etal/>
            </aug>
            <source>Cancer Cell</source>
            <pubdate>2004</pubdate>
            <volume>5</volume>
            <issue>5</issue>
            <fpage>489</fpage>
            <lpage>500</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1016/S1535-6108(04)00112-6</pubid>
                  <pubid idtype="pmpid" link="fulltext">15144956</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B14">
            <title>
               <p>Diversity of gene expression in adenocarcinoma of the lung</p>
            </title>
            <aug>
               <au>
                  <snm>Garber</snm>
                  <fnm>ME</fnm>
               </au>
               <au>
                  <snm>Troyanskaya</snm>
                  <fnm>OG</fnm>
               </au>
               <au>
                  <snm>Schluens</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>Petersen</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Thaesler</snm>
                  <fnm>Z</fnm>
               </au>
               <au>
                  <snm>Pacyna-Gengelbach</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>van de Rijn</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Rosen</snm>
                  <fnm>GD</fnm>
               </au>
               <au>
                  <snm>Perou</snm>
                  <fnm>CM</fnm>
               </au>
               <au>
                  <snm>Whyte</snm>
                  <fnm>RI</fnm>
               </au>
               <etal/>
            </aug>
            <source>Proc Natl Acad Sci U S A</source>
            <pubdate>2001</pubdate>
            <volume>98</volume>
            <issue>24</issue>
            <fpage>13784</fpage>
            <lpage>13789</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">61119</pubid>
                  <pubid idtype="pmpid" link="fulltext">11707590</pubid>
                  <pubid idtype="doi">10.1073/pnas.241500798</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B15">
            <title>
               <p>Prediction of cancer outcome with microarrays: a multiple random validation strategy</p>
            </title>
            <aug>
               <au>
                  <snm>Michiels</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Koscielny</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Hill</snm>
                  <fnm>C</fnm>
               </au>
            </aug>
            <source>Lancet</source>
            <pubdate>2005</pubdate>
            <volume>365</volume>
            <issue>9458</issue>
            <fpage>488</fpage>
            <lpage>492</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1016/S0140-6736(05)17866-0</pubid>
                  <pubid idtype="pmpid" link="fulltext">15705458</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B16">
            <title>
               <p>Gene-expression profiling in breast cancer</p>
            </title>
            <aug>
               <au>
                  <snm>Jenssen</snm>
                  <fnm>TK</fnm>
               </au>
               <au>
                  <snm>Hovig</snm>
                  <fnm>E</fnm>
               </au>
            </aug>
            <source>Lancet</source>
            <pubdate>2005</pubdate>
            <volume>365</volume>
            <issue>9460</issue>
            <fpage>634</fpage>
            <lpage>635</lpage>
            <xrefbib>
               <pubid idtype="pmpid" link="fulltext">15721457</pubid>
            </xrefbib>
         </bibl>
         <bibl id="B17">
            <title>
               <p>Pitfalls in the use of DNA microarray data for diagnostic and prognostic classification</p>
            </title>
            <aug>
               <au>
                  <snm>Simon</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>Radmacher</snm>
                  <fnm>MD</fnm>
               </au>
               <au>
                  <snm>Dobbin</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>McShane</snm>
                  <fnm>LM</fnm>
               </au>
            </aug>
            <source>J Natl Cancer Inst</source>
            <pubdate>2003</pubdate>
            <volume>95</volume>
            <issue>1</issue>
            <fpage>14</fpage>
            <lpage>18</lpage>
            <xrefbib>
               <pubid idtype="pmpid" link="fulltext">12509396</pubid>
            </xrefbib>
         </bibl>
         <bibl id="B18">
            <title>
               <p>Microarrays and molecular research: noise discovery?</p>
            </title>
            <aug>
               <au>
                  <snm>Ioannidis</snm>
                  <fnm>JP</fnm>
               </au>
            </aug>
            <source>Lancet</source>
            <pubdate>2005</pubdate>
            <volume>365</volume>
            <issue>9458</issue>
            <fpage>454</fpage>
            <lpage>455</lpage>
            <xrefbib>
               <pubid idtype="pmpid" link="fulltext">15705441</pubid>
            </xrefbib>
         </bibl>
         <bibl id="B19">
            <title>
               <p>Breast cancer classification and prognosis based on gene expression profiles from a population-based study</p>
            </title>
            <aug>
               <au>
                  <snm>Sotiriou</snm>
                  <fnm>C</fnm>
               </au>
               <au>
                  <snm>Neo</snm>
                  <fnm>SY</fnm>
               </au>
               <au>
                  <snm>McShane</snm>
                  <fnm>LM</fnm>
               </au>
               <au>
                  <snm>Korn</snm>
                  <fnm>EL</fnm>
               </au>
               <au>
                  <snm>Long</snm>
                  <fnm>PM</fnm>
               </au>
               <au>
                  <snm>Jazaeri</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Martiat</snm>
                  <fnm>P</fnm>
               </au>
               <au>
                  <snm>Fox</snm>
                  <fnm>SB</fnm>
               </au>
               <au>
                  <snm>Harris</snm>
                  <fnm>AL</fnm>
               </au>
               <au>
                  <snm>Liu</snm>
                  <fnm>ET</fnm>
               </au>
            </aug>
            <source>Proc Natl Acad Sci U S A</source>
            <pubdate>2003</pubdate>
            <volume>100</volume>
            <issue>18</issue>
            <fpage>10393</fpage>
            <lpage>10398</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">193572</pubid>
                  <pubid idtype="pmpid" link="fulltext">12917485</pubid>
                  <pubid idtype="doi">10.1073/pnas.1732912100</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B20">
            <title>
               <p>Adjustment of systematic microarray data biases</p>
            </title>
            <aug>
               <au>
                  <snm>Benito</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Parker</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Du</snm>
                  <fnm>Q</fnm>
               </au>
               <au>
                  <snm>Wu</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Xiang</snm>
                  <fnm>D</fnm>
               </au>
               <au>
                  <snm>Perou</snm>
                  <fnm>CM</fnm>
               </au>
               <au>
                  <snm>Marron</snm>
                  <fnm>JS</fnm>
               </au>
            </aug>
            <source>Bioinformatics</source>
            <pubdate>2004</pubdate>
            <volume>20</volume>
            <issue>1</issue>
            <fpage>105</fpage>
            <lpage>114</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1093/bioinformatics/btg385</pubid>
                  <pubid idtype="pmpid" link="fulltext">14693816</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B21">
            <title>
               <p>Identifying biological themes within lists of genes with EASE</p>
            </title>
            <aug>
               <au>
                  <snm>Hosack</snm>
                  <fnm>DA</fnm>
               </au>
               <au>
                  <snm>Dennis</snm>
                  <fnm>G</fnm>
                  <suf>Jr</suf>
               </au>
               <au>
                  <snm>Sherman</snm>
                  <fnm>BT</fnm>
               </au>
               <au>
                  <snm>Lane</snm>
                  <fnm>HC</fnm>
               </au>
               <au>
                  <snm>Lempicki</snm>
                  <fnm>RA</fnm>
               </au>
            </aug>
            <source>Genome Biol</source>
            <pubdate>2003</pubdate>
            <volume>4</volume>
            <issue>10</issue>
            <fpage>R70</fpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">328459</pubid>
                  <pubid idtype="pmpid" link="fulltext">14519205</pubid>
                  <pubid idtype="doi">10.1186/gb-2003-4-10-r70</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B22">
            <title>
               <p>Mutation of GATA3 in human breast tumors</p>
            </title>
            <aug>
               <au>
                  <snm>Usary</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Llaca</snm>
                  <fnm>V</fnm>
               </au>
               <au>
                  <snm>Karaca</snm>
                  <fnm>G</fnm>
               </au>
               <au>
                  <snm>Presswala</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Karaca</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>He</snm>
                  <fnm>X</fnm>
               </au>
               <au>
                  <snm>Langerod</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Karesen</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>Oh</snm>
                  <fnm>DS</fnm>
               </au>
               <au>
                  <snm>Dressler</snm>
                  <fnm>LG</fnm>
               </au>
               <etal/>
            </aug>
            <source>Oncogene</source>
            <pubdate>2004</pubdate>
            <volume>23</volume>
            <issue>46</issue>
            <fpage>7669</fpage>
            <lpage>7678</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1038/sj.onc.1207966</pubid>
                  <pubid idtype="pmpid" link="fulltext">15361840</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B23">
            <title>
               <p>Distinctive gene expression patterns in human mammary epithelial cells and breast cancers</p>
            </title>
            <aug>
               <au>
                  <snm>Perou</snm>
                  <fnm>CM</fnm>
               </au>
               <au>
                  <snm>Jeffrey</snm>
                  <fnm>SS</fnm>
               </au>
               <au>
                  <snm>van de Rijn</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Rees</snm>
                  <fnm>CA</fnm>
               </au>
               <au>
                  <snm>Eisen</snm>
                  <fnm>MB</fnm>
               </au>
               <au>
                  <snm>Ross</snm>
                  <fnm>DT</fnm>
               </au>
               <au>
                  <snm>Pergamenschikov</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Williams</snm>
                  <fnm>CF</fnm>
               </au>
               <au>
                  <snm>Zhu</snm>
                  <fnm>SX</fnm>
               </au>
               <au>
                  <snm>Lee</snm>
                  <fnm>JC</fnm>
               </au>
               <etal/>
            </aug>
            <source>Proc Natl Acad Sci U S A</source>
            <pubdate>1999</pubdate>
            <volume>96</volume>
            <issue>16</issue>
            <fpage>9212</fpage>
            <lpage>9217</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">17759</pubid>
                  <pubid idtype="pmpid" link="fulltext">10430922</pubid>
                  <pubid idtype="doi">10.1073/pnas.96.16.9212</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B24">
            <title>
               <p>Molecular portraits and the family tree of cancer</p>
            </title>
            <aug>
               <au>
                  <snm>Chung</snm>
                  <fnm>CH</fnm>
               </au>
               <au>
                  <snm>Bernard</snm>
                  <fnm>PS</fnm>
               </au>
               <au>
                  <snm>Perou</snm>
                  <fnm>CM</fnm>
               </au>
            </aug>
            <source>Nat Genet</source>
            <pubdate>2002</pubdate>
            <volume>32</volume>
            <issue>Suppl</issue>
            <fpage>533</fpage>
            <lpage>540</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1038/ng1038</pubid>
                  <pubid idtype="pmpid" link="fulltext">12454650</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B25">
            <title>
               <p>Identification of genes periodically expressed in the human cell cycle and their expression in tumors</p>
            </title>
            <aug>
               <au>
                  <snm>Whitfield</snm>
                  <fnm>ML</fnm>
               </au>
               <au>
                  <snm>Sherlock</snm>
                  <fnm>G</fnm>
               </au>
               <au>
                  <snm>Saldanha</snm>
                  <fnm>AJ</fnm>
               </au>
               <au>
                  <snm>Murray</snm>
                  <fnm>JI</fnm>
               </au>
               <au>
                  <snm>Ball</snm>
                  <fnm>CA</fnm>
               </au>
               <au>
                  <snm>Alexander</snm>
                  <fnm>KE</fnm>
               </au>
               <au>
                  <snm>Matese</snm>
                  <fnm>JC</fnm>
               </au>
               <au>
                  <snm>Perou</snm>
                  <fnm>CM</fnm>
               </au>
               <au>
                  <snm>Hurt</snm>
                  <fnm>MM</fnm>
               </au>
               <au>
                  <snm>Brown</snm>
                  <fnm>PO</fnm>
               </au>
               <etal/>
            </aug>
            <source>Mol Biol Cell</source>
            <pubdate>2002</pubdate>
            <volume>13</volume>
            <issue>6</issue>
            <fpage>1977</fpage>
            <lpage>2000</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">117619</pubid>
                  <pubid idtype="pmpid" link="fulltext">12058064</pubid>
                  <pubid idtype="doi">10.1091/mbc.02-02-0030.</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B26">
            <title>
               <p>Transcriptionally active Stat1 is required for the antiproliferative effects of both interferon alpha and interferon gamma</p>
            </title>
            <aug>
               <au>
                  <snm>Bromberg</snm>
                  <fnm>JF</fnm>
               </au>
               <au>
                  <snm>Horvath</snm>
                  <fnm>CM</fnm>
               </au>
               <au>
                  <snm>Wen</snm>
                  <fnm>Z</fnm>
               </au>
               <au>
                  <snm>Schreiber</snm>
                  <fnm>RD</fnm>
               </au>
               <au>
                  <snm>Darnell</snm>
                  <fnm>JE</fnm>
                  <suf>Jr</suf>
               </au>
            </aug>
            <source>Proc Natl Acad Sci U S A</source>
            <pubdate>1996</pubdate>
            <volume>93</volume>
            <issue>15</issue>
            <fpage>7673</fpage>
            <lpage>7678</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">38805</pubid>
                  <pubid idtype="pmpid" link="fulltext">8755534</pubid>
                  <pubid idtype="doi">10.1073/pnas.93.15.7673</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B27">
            <title>
               <p>Interferon-alpha activates multiple STAT proteins and upregulates proliferation-associated IL-2Ralpha, c-myc, and pim-1 genes in human T cells</p>
            </title>
            <aug>
               <au>
                  <snm>Matikainen</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Sareneva</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Ronni</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Lehtonen</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Koskinen</snm>
                  <fnm>PJ</fnm>
               </au>
               <au>
                  <snm>Julkunen</snm>
                  <fnm>I</fnm>
               </au>
            </aug>
            <source>Blood</source>
            <pubdate>1999</pubdate>
            <volume>93</volume>
            <issue>6</issue>
            <fpage>1980</fpage>
            <lpage>1991</lpage>
            <xrefbib>
               <pubid idtype="pmpid" link="fulltext">10068671</pubid>
            </xrefbib>
         </bibl>
         <bibl id="B28">
            <title>
               <p>Biostatistics: a methodology for the health sciences</p>
            </title>
            <aug>
               <au>
                  <snm>Van Belle</snm>
                  <fnm>G</fnm>
               </au>
               <au>
                  <snm>Fisher</snm>
                  <fnm>L</fnm>
               </au>
            </aug>
            <publisher>Hoboken, NJ: Wiley-Interscience John Wiley &amp; Sons</publisher>
            <edition>2</edition>
            <pubdate>2004</pubdate>
         </bibl>
         <bibl id="B29">
            <title>
               <p>Use of gene-expression profiling to identify prognostic subclasses in adult acute myeloid leukemia</p>
            </title>
            <aug>
               <au>
                  <snm>Bullinger</snm>
                  <fnm>L</fnm>
               </au>
               <au>
                  <snm>Dohner</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>Bair</snm>
                  <fnm>E</fnm>
               </au>
               <au>
                  <snm>Frohling</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Schlenk</snm>
                  <fnm>RF</fnm>
               </au>
               <au>
                  <snm>Tibshirani</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>Dohner</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Pollack</snm>
                  <fnm>JR</fnm>
               </au>
            </aug>
            <source>N Engl J Med</source>
            <pubdate>2004</pubdate>
            <volume>350</volume>
            <issue>16</issue>
            <fpage>1605</fpage>
            <lpage>1616</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1056/NEJMoa031046</pubid>
                  <pubid idtype="pmpid" link="fulltext">15084693</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B30">
            <title>
               <p>Semi-supervised methods to predict patient survival from gene expression data</p>
            </title>
            <aug>
               <au>
                  <snm>Bair</snm>
                  <fnm>E</fnm>
               </au>
               <au>
                  <snm>Tibshirani</snm>
                  <fnm>R</fnm>
               </au>
            </aug>
            <source>PLoS Biol</source>
            <pubdate>2004</pubdate>
            <volume>2</volume>
            <issue>4</issue>
            <fpage>E108</fpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">387275</pubid>
                  <pubid idtype="pmpid" link="fulltext">15094809</pubid>
                  <pubid idtype="doi">10.1371/journal.pbio.0020108</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B31">
            <title>
               <p>Robustness, scalability, and integration of a wound-response gene expression signature in predicting breast cancer survival</p>
            </title>
            <aug>
               <au>
                  <snm>Chang</snm>
                  <fnm>HY</fnm>
               </au>
               <au>
                  <snm>Nuyten</snm>
                  <fnm>DS</fnm>
               </au>
               <au>
                  <snm>Sneddon</snm>
                  <fnm>JB</fnm>
               </au>
               <au>
                  <snm>Hastie</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Tibshirani</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>Sorlie</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Dai</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>He</snm>
                  <fnm>YD</fnm>
               </au>
               <au>
                  <snm>van't Veer</snm>
                  <fnm>LJ</fnm>
               </au>
               <au>
                  <snm>Bartelink</snm>
                  <fnm>H</fnm>
               </au>
               <etal/>
            </aug>
            <source>Proc Natl Acad Sci U S A</source>
            <pubdate>2005</pubdate>
            <volume>102</volume>
            <issue>10</issue>
            <fpage>3738</fpage>
            <lpage>3743</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">548329</pubid>
                  <pubid idtype="pmpid" link="fulltext">15701700</pubid>
                  <pubid idtype="doi">10.1073/pnas.0409462102</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B32">
            <title>
               <p>Phenotypic evaluation of the basal-like subtype of invasive breast carcinoma</p>
            </title>
            <aug>
               <au>
                  <snm>Livasy</snm>
                  <fnm>CA</fnm>
               </au>
               <au>
                  <snm>Karaca</snm>
                  <fnm>G</fnm>
               </au>
               <au>
                  <snm>Nanda</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>Tretiakova</snm>
                  <fnm>MS</fnm>
               </au>
               <au>
                  <snm>Olopade</snm>
                  <fnm>OI</fnm>
               </au>
               <au>
                  <snm>Moore</snm>
                  <fnm>DT</fnm>
               </au>
               <au>
                  <snm>Perou</snm>
                  <fnm>CM</fnm>
               </au>
            </aug>
            <source>Mod Pathol</source>
            <pubdate>2005</pubdate>
            <xrefbib>
               <pubid idtype="pmpid" link="fulltext">15861214</pubid>
            </xrefbib>
         </bibl>
         <bibl id="B33">
            <title>
               <p>Different gene expression-based predictors for breast cancer patients are concordant</p>
            </title>
            <aug>
               <au>
                  <snm>Fan</snm>
                  <fnm>C</fnm>
               </au>
               <au>
                  <snm>Oh</snm>
                  <fnm>DS</fnm>
               </au>
               <au>
                  <snm>Wessels</snm>
                  <fnm>L</fnm>
               </au>
               <au>
                  <snm>Weigelt</snm>
                  <fnm>B</fnm>
               </au>
               <au>
                  <snm>Nuyten</snm>
                  <fnm>DSA</fnm>
               </au>
               <au>
                  <snm>Nobel</snm>
                  <fnm>AB</fnm>
               </au>
               <au>
                  <snm>van't Veer</snm>
                  <fnm>LJ</fnm>
               </au>
               <au>
                  <snm>Perou</snm>
                  <fnm>CM</fnm>
               </au>
            </aug>
            <source>N Engl J Med</source>
            <inpress/>
         </bibl>
         <bibl id="B34">
            <title>
               <p>Gene expression signature of fibroblast serum response predicts human cancer progression: similarities between tumors and wounds</p>
            </title>
            <aug>
               <au>
                  <snm>Chang</snm>
                  <fnm>HY</fnm>
               </au>
               <au>
                  <snm>Sneddon</snm>
                  <fnm>JB</fnm>
               </au>
               <au>
                  <snm>Alizadeh</snm>
                  <fnm>AA</fnm>
               </au>
               <au>
                  <snm>Sood</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>West</snm>
                  <fnm>RB</fnm>
               </au>
               <au>
                  <snm>Montgomery</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>Chi</snm>
                  <fnm>JT</fnm>
               </au>
               <au>
                  <snm>van de Rijn</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Botstein</snm>
                  <fnm>D</fnm>
               </au>
               <au>
                  <snm>Brown</snm>
                  <fnm>PO</fnm>
               </au>
            </aug>
            <source>PLoS Biol</source>
            <pubdate>2004</pubdate>
            <volume>2</volume>
            <issue>2</issue>
            <fpage>E7</fpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">314300</pubid>
                  <pubid idtype="pmpid" link="fulltext">14737219</pubid>
                  <pubid idtype="doi">10.1371/journal.pbio.0020007</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B35">
            <title>
               <p>High reproducibility using sodium hydroxide-stripped long oligonucleotide DNA microarrays</p>
            </title>
            <aug>
               <au>
                  <snm>Hu</snm>
                  <fnm>Z</fnm>
               </au>
               <au>
                  <snm>Troester</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Perou</snm>
                  <fnm>CM</fnm>
               </au>
            </aug>
            <source>Biotechniques</source>
            <pubdate>2005</pubdate>
            <volume>38</volume>
            <issue>1</issue>
            <fpage>121</fpage>
            <lpage>124</lpage>
            <xrefbib>
               <pubid idtype="pmpid">15679094</pubid>
            </xrefbib>
         </bibl>
         <bibl id="B36">
            <title>
               <p>Universal Reference RNA as a standard for microarray experiments</p>
            </title>
            <aug>
               <au>
                  <snm>Novoradovskaya</snm>
                  <fnm>N</fnm>
               </au>
               <au>
                  <snm>Whitfield</snm>
                  <fnm>ML</fnm>
               </au>
               <au>
                  <snm>Basehore</snm>
                  <fnm>LS</fnm>
               </au>
               <au>
                  <snm>Novoradovsky</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Pesich</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>Usary</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Karaca</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Wong</snm>
                  <fnm>WK</fnm>
               </au>
               <au>
                  <snm>Aprelikova</snm>
                  <fnm>O</fnm>
               </au>
               <au>
                  <snm>Fero</snm>
                  <fnm>M</fnm>
               </au>
               <etal/>
            </aug>
            <source>BMC Genomics</source>
            <pubdate>2004</pubdate>
            <volume>5</volume>
            <issue>1</issue>
            <fpage>20</fpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">394318</pubid>
                  <pubid idtype="pmpid" link="fulltext">15113400</pubid>
                  <pubid idtype="doi">10.1186/1471-2164-5-20</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B37">
            <title>
               <p>UNC Microarray Database</p>
            </title>
            <url>https://genome.unc.edu/</url>
         </bibl>
         <bibl id="B38">
            <title>
               <p>Normalization for cDNA microarray data: a robust composite method addressing single and multiple slide systematic variation</p>
            </title>
            <aug>
               <au>
                  <snm>Yang</snm>
                  <fnm>YH</fnm>
               </au>
               <au>
                  <snm>Dudoit</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Luu</snm>
                  <fnm>P</fnm>
               </au>
               <au>
                  <snm>Lin</snm>
                  <fnm>DM</fnm>
               </au>
               <au>
                  <snm>Peng</snm>
                  <fnm>V</fnm>
               </au>
               <au>
                  <snm>Ngai</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Speed</snm>
                  <fnm>TP</fnm>
               </au>
            </aug>
            <source>Nucleic Acids Res</source>
            <pubdate>2002</pubdate>
            <volume>30</volume>
            <issue>4</issue>
            <fpage>e15</fpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">100354</pubid>
                  <pubid idtype="pmpid" link="fulltext">11842121</pubid>
                  <pubid idtype="doi">10.1093/nar/30.4.e15</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B39">
            <title>
               <p>UNC Breast Tumor Data</p>
            </title>
            <url>https://genome.unc.edu/pubsup/breastTumor/</url>
         </bibl>
         <bibl id="B40">
            <title>
               <p>Gene Expression Omnibus</p>
            </title>
            <url>http://www.ncbi.nlm.nih.gov/geo/</url>
         </bibl>
         <bibl id="B41">
            <title>
               <p>Significance analysis of microarrays applied to the ionizing radiation response</p>
            </title>
            <aug>
               <au>
                  <snm>Tusher</snm>
                  <fnm>VG</fnm>
               </au>
               <au>
                  <snm>Tibshirani</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>Chu</snm>
                  <fnm>G</fnm>
               </au>
            </aug>
            <source>Proc Natl Acad Sci U S A</source>
            <pubdate>2001</pubdate>
            <volume>98</volume>
            <issue>9</issue>
            <fpage>5116</fpage>
            <lpage>5121</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">33173</pubid>
                  <pubid idtype="pmpid" link="fulltext">11309499</pubid>
                  <pubid idtype="doi">10.1073/pnas.091062498</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B42">
            <title>
               <p>Missing value estimation methods for DNA microarrays</p>
            </title>
            <aug>
               <au>
                  <snm>Troyanskaya</snm>
                  <fnm>O</fnm>
               </au>
               <au>
                  <snm>Cantor</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Sherlock</snm>
                  <fnm>G</fnm>
               </au>
               <au>
                  <snm>Brown</snm>
                  <fnm>P</fnm>
               </au>
               <au>
                  <snm>Hastie</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Tibshirani</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>Botstein</snm>
                  <fnm>D</fnm>
               </au>
               <au>
                  <snm>Altman</snm>
                  <fnm>RB</fnm>
               </au>
            </aug>
            <source>Bioinformatics</source>
            <pubdate>2001</pubdate>
            <volume>17</volume>
            <issue>6</issue>
            <fpage>520</fpage>
            <lpage>525</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1093/bioinformatics/17.6.520</pubid>
                  <pubid idtype="pmpid" link="fulltext">11395428</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B43">
            <title>
               <p>SOURCE: a unified genomic resource of functional annotations, ontologies, and gene expression data</p>
            </title>
            <aug>
               <au>
                  <snm>Diehn</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Sherlock</snm>
                  <fnm>G</fnm>
               </au>
               <au>
                  <snm>Binkley</snm>
                  <fnm>G</fnm>
               </au>
               <au>
                  <snm>Jin</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Matese</snm>
                  <fnm>JC</fnm>
               </au>
               <au>
                  <snm>Hernandez-Boussard</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Rees</snm>
                  <fnm>CA</fnm>
               </au>
               <au>
                  <snm>Cherry</snm>
                  <fnm>JM</fnm>
               </au>
               <au>
                  <snm>Botstein</snm>
                  <fnm>D</fnm>
               </au>
               <au>
                  <snm>Brown</snm>
                  <fnm>PO</fnm>
               </au>
               <etal/>
            </aug>
            <source>Nucleic Acids Res</source>
            <pubdate>2003</pubdate>
            <volume>31</volume>
            <issue>1</issue>
            <fpage>219</fpage>
            <lpage>223</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">165461</pubid>
                  <pubid idtype="pmpid" link="fulltext">12519986</pubid>
                  <pubid idtype="doi">10.1093/nar/gkg014</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B44">
            <title>
               <p>Cluster analysis and display of genome-wide expression patterns</p>
            </title>
            <aug>
               <au>
                  <snm>Eisen</snm>
                  <fnm>MB</fnm>
               </au>
               <au>
                  <snm>Spellman</snm>
                  <fnm>PT</fnm>
               </au>
               <au>
                  <snm>Brown</snm>
                  <fnm>PO</fnm>
               </au>
               <au>
                  <snm>Botstein</snm>
                  <fnm>D</fnm>
               </au>
            </aug>
            <source>Proc Natl Acad Sci U S A</source>
            <pubdate>1998</pubdate>
            <volume>95</volume>
            <issue>25</issue>
            <fpage>14863</fpage>
            <lpage>14868</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">24541</pubid>
                  <pubid idtype="pmpid" link="fulltext">9843981</pubid>
                  <pubid idtype="doi">10.1073/pnas.95.25.14863</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B45">
            <title>
               <p>The elements of statistical learning: data mining, inference, and prediction</p>
            </title>
            <aug>
               <au>
                  <snm>Hastie</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Tibshirani</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>Friedman</snm>
                  <fnm>JH</fnm>
               </au>
            </aug>
            <publisher>New York: Springer</publisher>
            <pubdate>2001</pubdate>
         </bibl>
         <bibl id="B46">
            <title>
               <p>Diagnosis of multiple cancer types by shrunken centroids of gene expression</p>
            </title>
            <aug>
               <au>
                  <snm>Tibshirani</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>Hastie</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Narasimhan</snm>
                  <fnm>B</fnm>
               </au>
               <au>
                  <snm>Chu</snm>
                  <fnm>G</fnm>
               </au>
            </aug>
            <source>Proc Natl Acad Sci U S A</source>
            <pubdate>2002</pubdate>
            <volume>99</volume>
            <issue>10</issue>
            <fpage>6567</fpage>
            <lpage>6572</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">124443</pubid>
                  <pubid idtype="pmpid" link="fulltext">12011421</pubid>
                  <pubid idtype="doi">10.1073/pnas.082099299</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
      </refgrp>
   </bm>
</art>

