<?xml version='1.0'?>
<!DOCTYPE art SYSTEM 'http://www.biomedcentral.com/xml/article.dtd'>
<art>
   <ui>1471-2105-8-66</ui>
   <ji>1471-2105</ji>
   <fm>
      <dochead>Methodology article</dochead>
      <bibl>
         <title>
            <p>Detection of the inferred interaction network in hepatocellular carcinoma from EHCO (<ul>E</ul>ncyclopedia of <ul>H</ul>epatocellular <ul>C</ul>arcinoma genes <ul>O</ul>nline)</p>
         </title>
         <aug>
            <au id="A1" ce="yes">
               <snm>Hsu</snm>
               <fnm>Chun-Nan</fnm>
               <insr iid="I1"/>
               <email>chunnan@iis.sinica.edu.tw</email>
            </au>
            <au id="A2" ce="yes">
               <snm>Lai</snm>
               <fnm>Jin-Mei</fnm>
               <insr iid="I2"/>
               <email>bio2028@mails.fju.edu.tw</email>
            </au>
            <au id="A3" ce="yes">
               <snm>Liu</snm>
               <fnm>Chia-Hung</fnm>
               <insr iid="I1"/>
               <email>chliu@iis.sinica.edu.tw</email>
            </au>
            <au id="A4" ce="yes">
               <snm>Tseng</snm>
               <fnm>Huei-Hun</fnm>
               <insr iid="I3"/>
               <email>b89501035@ntu.edu.tw</email>
            </au>
            <au id="A5" ce="yes">
               <snm>Lin</snm>
               <fnm>Chih-Yun</fnm>
               <insr iid="I4"/>
               <email>lin.chihyun@gmail.com</email>
            </au>
            <au id="A6" ce="yes">
               <snm>Lin</snm>
               <fnm>Kuan-Ting</fnm>
               <insr iid="I1"/>
               <email>woody@iis.sinica.edu.tw</email>
            </au>
            <au id="A7">
               <snm>Yeh</snm>
               <fnm>Hsu-Hua</fnm>
               <insr iid="I3"/>
               <email>ysh521521@yahoo.com.tw</email>
            </au>
            <au id="A8">
               <snm>Sung</snm>
               <fnm>Ting-Yi</fnm>
               <insr iid="I1"/>
               <email>tsung@iis.sinica.edu.tw</email>
            </au>
            <au id="A9">
               <snm>Hsu</snm>
               <fnm>Wen-Lian</fnm>
               <insr iid="I1"/>
               <email>hsu@iis.sinica.edu.tw</email>
            </au>
            <au id="A10">
               <snm>Su</snm>
               <fnm>Li-Jen</fnm>
               <insr iid="I4"/>
               <email>sulijen@nhri.org.tw</email>
            </au>
            <au id="A11">
               <snm>Lee</snm>
               <fnm>Sheng-An</fnm>
               <insr iid="I4"/>
               <insr iid="I5"/>
               <email>shengan@gmail.com</email>
            </au>
            <au id="A12">
               <snm>Chen</snm>
               <fnm>Chang-Han</fnm>
               <insr iid="I3"/>
               <insr iid="I4"/>
               <email>chchen7@nhri.org.tw</email>
            </au>
            <au id="A13">
               <snm>Lee</snm>
               <fnm>Gen-Cher</fnm>
               <insr iid="I1"/>
               <insr iid="I5"/>
               <email>gc@iis.sinica.edu.tw</email>
            </au>
            <au id="A14">
               <snm>Lee</snm>
               <fnm>DT</fnm>
               <insr iid="I1"/>
               <insr iid="I5"/>
               <email>dtlee@iis.sinica.edu.tw</email>
            </au>
            <au id="A15">
               <snm>Shiue</snm>
               <fnm>Yow-Ling</fnm>
               <insr iid="I6"/>
               <email>ylshiue@mail.nsysu.edu.tw</email>
            </au>
            <au id="A16">
               <snm>Yeh</snm>
               <fnm>Chang-Wei</fnm>
               <insr iid="I7"/>
               <email>TerryYeh@nchc.org.tw</email>
            </au>
            <au id="A17">
               <snm>Chang</snm>
               <fnm>Chao-Hui</fnm>
               <insr iid="I4"/>
               <email>chaohui@nhri.org.tw</email>
            </au>
            <au id="A18">
               <snm>Kao</snm>
               <fnm>Cheng-Yan</fnm>
               <insr iid="I5"/>
               <email>cykao@csie.ntu.edu.tw</email>
            </au>
            <au id="A19" ca="yes">
               <snm>Huang</snm>
               <mi>F</mi>
               <fnm>Chi-Ying</fnm>
               <insr iid="I3"/>
               <insr iid="I4"/>
               <insr iid="I5"/>
               <insr iid="I8"/>
               <insr iid="I9"/>
               <email>chiying@nhri.org.tw</email>
            </au>
         </aug>
         <insg>
            <ins id="I1">
               <p>Institute of Information Science, Academia Sinica, Taipei 115, Taiwan, R. O. C</p>
            </ins>
            <ins id="I2">
               <p>Department of Life Science, Fu-Jen Catholic University, Taipei Hsien 242, Taiwan, R. O. C</p>
            </ins>
            <ins id="I3">
               <p>Division of Molecular and Genomic Medicine, National Health Research Institutes, Miaoli County 350, Taiwan, R. O. C</p>
            </ins>
            <ins id="I4">
               <p>Institute of Cancer Research, National Health Research Institutes, Taipei 114, Taiwan, R. O. C</p>
            </ins>
            <ins id="I5">
               <p>Department of Computer Science and Information Engineering, National Taiwan University, Taipei 106, Taiwan, R. O. C</p>
            </ins>
            <ins id="I6">
               <p>Institute of Biomedical Science, National Sun Yat-Sen University, Kaohsiung 804, Taiwan, R. O. C</p>
            </ins>
            <ins id="I7">
               <p>National Center for High-performance Computing, Hsinchu 300, Taiwan, R. O. C</p>
            </ins>
            <ins id="I8">
               <p>Institute of Bio-Pharmaceutical Sciences, National Yang-Ming University, Taipei 112, Taiwan, R. O. C</p>
            </ins>
            <ins id="I9">
               <p>Institute of Biotechnology in Medicine, National Yang-Ming University, Taipei 112, Taiwan, R. O. C</p>
            </ins>
         </insg>
         <source>BMC Bioinformatics</source>
         <issn>1471-2105</issn>
         <pubdate>2007</pubdate>
         <volume>8</volume>
         <issue>1</issue>
         <fpage>66</fpage>
         <url>http://www.biomedcentral.com/1471-2105/8/66</url>
         <xrefbib>
            <pubidlist>
               <pubid idtype="pmpid">17326819</pubid>
               <pubid idtype="doi">10.1186/1471-2105-8-66</pubid>
            </pubidlist>
         </xrefbib>
      </bibl>
      <history>
         <rec>
            <date>
               <day>31</day>
               <month>10</month>
               <year>2006</year>
            </date>
         </rec>
         <acc>
            <date>
               <day>27</day>
               <month>2</month>
               <year>2007</year>
            </date>
         </acc>
         <pub>
            <date>
               <day>27</day>
               <month>2</month>
               <year>2007</year>
            </date>
         </pub>
      </history>
      <cpyrt>
         <year>2007</year>
         <collab>Hsu et al; licensee BioMed Central Ltd.</collab>
         <note>This is an Open Access article distributed under the terms of the Creative Commons Attribution License (<url>http://creativecommons.org/licenses/by/2.0</url>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.</note>
      </cpyrt>
      <abs>
         <sec>
            <st>
               <p>Abstract</p>
            </st>
            <sec>
               <st>
                  <p>Background</p>
               </st>
               <p>The significant advances in microarray and proteomics analyses have resulted in an exponential increase in potential new targets and have promised to shed light on the identification of disease markers and cellular pathways. We aim to collect and decipher the HCC-related genes at the systems level.</p>
            </sec>
            <sec>
               <st>
                  <p>Results</p>
               </st>
               <p>Here, we build an integrative platform, the <ul>E</ul>ncyclopedia of <ul>H</ul>epatocellular <ul>C</ul>arcinoma genes <ul>O</ul>nline, dubbed EHCO <url>http://ehco.iis.sinica.edu.tw</url>, to systematically collect, organize and compare the pileup of unsorted HCC-related studies by using natural language processing and softbots. Among the eight gene set collections, ranging across PubMed, SAGE, microarray, and proteomics data, there are 2,906 genes in total; however, more than 77% genes are only included once, suggesting that tremendous efforts need to be exerted to characterize the relationship between HCC and these genes. Of these HCC inventories, protein binding represents the largest proportion (~25%) from Gene Ontology analysis. In fact, many differentially expressed gene sets in EHCO could form interaction networks (e.g. HBV-associated HCC network) by using available human protein-protein interaction datasets. To further highlight the potential new targets in the inferred network from EHCO, we combine comparative genomics and interactomics approaches to analyze 120 evolutionary conserved and overexpressed genes in HCC. 47 out of 120 queries can form a highly interactive network with 18 queries serving as hubs.</p>
            </sec>
            <sec>
               <st>
                  <p>Conclusion</p>
               </st>
               <p>This architectural map may represent the first step toward the attempt to decipher the hepatocarcinogenesis at the systems level. Targeting hubs and/or disruption of the network formation might reveal novel strategy for HCC treatment.</p>
            </sec>
         </sec>
      </abs>
   </fm>
   <bdy>
      <sec>
         <st>
            <p>Background</p>
         </st>
         <p>Hepatocellular carcinoma (HCC) is the most common liver malignancy and is one of the leading causes of death worldwide. Its incidence has been especially prevalent among Asian populations. Due to HCC being the top cause of cancer death worldwide, research on its cause, diagnosis, and treatment continues into the post-genomic era. However, the carcinogenesis of HCC still remains poorly understood.</p>
         <p>In the post-genomic era, advances in tools and technologies have provided an excellent opportunity to better understand the complex interaction of hepatocarcinogenesis. For example, genome-wide microarray technologies, which are widely used to monitor global gene expression in cancer <abbrgrp><abbr bid="B1">1</abbr><abbr bid="B2">2</abbr><abbr bid="B3">3</abbr><abbr bid="B4">4</abbr><abbr bid="B5">5</abbr><abbr bid="B6">6</abbr><abbr bid="B7">7</abbr><abbr bid="B8">8</abbr><abbr bid="B9">9</abbr><abbr bid="B10">10</abbr><abbr bid="B11">11</abbr><abbr bid="B12">12</abbr><abbr bid="B13">13</abbr><abbr bid="B14">14</abbr></abbrgrp>, have identified numerous differentially expressed genes and enable cancer research to succeed where traditional methods have faltered. These high-throughput analyses have revolutionized the way that HCC is diagnosed and classified. It is generally believed that microarrays are well able to shed light on the identification of disease markers for diagnosis and potential targets for treatment. However, there are several bottlenecks associated with moving from microarray profiling to target identifications. The challenges include: (1) infrastructural challenges, such as the creation of data models and databases for storing data, the integration of data with external databases and the extraction of information from natural language text; (2) information overload, where there are many HCC-related microarray studies <abbrgrp><abbr bid="B1">1</abbr><abbr bid="B2">2</abbr><abbr bid="B3">3</abbr><abbr bid="B4">4</abbr><abbr bid="B5">5</abbr><abbr bid="B6">6</abbr><abbr bid="B7">7</abbr><abbr bid="B8">8</abbr><abbr bid="B9">9</abbr><abbr bid="B10">10</abbr><abbr bid="B11">11</abbr><abbr bid="B12">12</abbr><abbr bid="B13">13</abbr><abbr bid="B14">14</abbr></abbrgrp>, the gene annotations are scattered across different databases and the content is difficult to update or improve. Moreover, an exponential growth in both the number and the size of specialized biological databases has also made the task of performing cross-site browsing or iterative querying very tedious and challenging. There have been several attempts, such as GeneWebEx <abbrgrp><abbr bid="B39">39</abbr></abbrgrp> and GENA <abbrgrp><abbr bid="B40">40</abbr></abbrgrp> to integrate cross-site information, but these often lack adequate annotation related to HCC. All this calls for the establishment of an infrastructure that can collect such scattered annotations, present them in a user-friendly way, and allow viewers to participate actively in its making.</p>
         <p>To embrace the paradigm shift to the next generation of the web and to provide web-based services, we have designed an information harvesting infrastructure, <ul>E</ul>ncyclopedia of <ul>H</ul>epatocellular <ul>C</ul>arcinoma genes <ul>O</ul>nline, dubbed EHCO, which employs softbots (or Web wrapper agents <abbrgrp><abbr bid="B15">15</abbr></abbrgrp>) to collect scattered gene annotations either by mining data sources directly or by querying publicly accessible databases. To overcome the obstacles that EHCO reports merely descriptive results from various studies, we notice that protein bindings from Gene Ontology analysis represent the largest proportion (~25%) in the molecular function category. By using available protein-protein interaction datasets, a highly interactive biological network and novel hubs can be revealed among these seemingly random HCC inventories.</p>
      </sec>
      <sec>
         <st>
            <p>Results and Discussion</p>
         </st>
         <sec>
            <st>
               <p>The architecture of EHCO</p>
            </st>
            <p>EHCO adapts the PLONE platform, an open source content management system (CMS) with a workflow engine, pre-configured security and a set of content types, to create an infrastructure to support flexible storage and presentation (Figure <figr fid="F1">1A</figr>). The advantage of using PLONE is that PLONE supports Wiki so that EHCO can be extended beyond the context of HCC research. One example is Liver Fibrosis <abbrgrp><abbr bid="B41">41</abbr></abbrgrp>, a sister site of EHCO, which provides liver necroinflammatory and fibrosis-related gene knowledge <abbrgrp><abbr bid="B38">38</abbr></abbrgrp>. Both sites shared the same server, same PLONE, the same mySQL database, and most of the python codes. The uniqueness of EHCO lies in its ability to allow registered users to contribute their own work to EHCO to create an integrated biological information portal for efficient information sharing and extensive aggregation of research-related topics. To achieve the uniqueness, EHCO uses ZWiki as its content management and presentation platform. ZWiki is one of many open-source Wiki platforms freely available online. With ZWiki, users can collaboratively create their new Web pages to enrich the contents of EHCO.</p>
            <fig id="F1">
               <title>
                  <p>Figure 1</p>
               </title>
               <caption>
                  <p>The architecture of EHCO</p>
               </caption>
               <text>
                  <p><b>The architecture of EHCO</b>. (A) EHCO uses a Content Management System, PLONE, to maintain different types of information. PLONE supports workflow design, content sharing, front-end editing, and member registration. Softbots, which interact with a software environment by using and interpreting the environment feedback, are used as annotation collectors to retrieve scattered genomic information across the Internet. EHCO also implements Natural Language Processing, a subfield of artificial intelligence and linguistics, and Gene Name Service, a comprehensive cross reference service of all widely used gene ID nomenclatures, to support the annotation engine, which is supported by mySQL database and python-written scripts. The Presentation Engine uses Wiki pages to allow dynamic information display as well as user commenting. (B) We performed biological information retrieval (IR) to obtain PubMed abstracts that may contain gene-HCC relationships, followed by two information extraction tasks: (1) Named Entity Recognition (NER): to recognize biomedical named entities (NEs) and (2) Named Entity Relation Recognition (NERR), as shown in the flowchart.</p>
               </text>
               <graphic file="1471-2105-8-66-1"/>
            </fig>
         </sec>
         <sec>
            <st>
               <p>The differentially expressed gene set collections in EHCO</p>
            </st>
            <p>A fundamental part of EHCO is the collection of eight gene sets related to HCC either from PubMed or diverse high-throughput studies (Figure <figr fid="F2">2A</figr>). Since different labs obtained all gene sets independently at different time, they used a wide variety of gene IDs and appeared in many cases with different gene names, resulting in difficulty in gene ID comparisons. Therefore, we established Gene Name Service <abbrgrp><abbr bid="B49">49</abbr></abbrgrp>, a comprehensive gene name cross-reference database, to unify all of the aliases automatically. Since the amount of biomedical literatures available on the web is rapidly increasing, manual information extraction from search results is usually unable to identify articles of interest immediately. Therefore, in the PubMed section, we had extracted 1,084 genes (with HUGO-approved gene names) from approximately 4,500 abstracts in the PubMed category (Figure <figr fid="F1">1B</figr>) (detail in Methods section). For those genes that either do not have HUGO-approved gene names or fail to be included from PubMed search, they were placed under TableX when reading the articles. Among the HCC-related microarray studies, EHCO was further reorganized into five gene sets. Differentially expressed HCC-related gene sets were collected from four major studies, including Chen et al., <abbrgrp><abbr bid="B1">1</abbr></abbrgrp> (referred to as SMD1648), Neo et al., <abbrgrp><abbr bid="B2">2</abbr></abbrgrp> (referred to as GIS), Lee et al., <abbrgrp><abbr bid="B3">3</abbr></abbrgrp> (referred to as Lee_NIH), and Kim et al., <abbrgrp><abbr bid="B4">4</abbr></abbrgrp> (referred to as Kim_NIH), and ten additional reports <abbrgrp><abbr bid="B5">5</abbr><abbr bid="B6">6</abbr><abbr bid="B7">7</abbr><abbr bid="B8">8</abbr><abbr bid="B9">9</abbr><abbr bid="B10">10</abbr><abbr bid="B11">11</abbr><abbr bid="B12">12</abbr><abbr bid="B13">13</abbr><abbr bid="B14">14</abbr></abbrgrp>, which were manually keyed in and are referred to as TableX_mRNA. The differentially expressed genes collected from these sets ranged from 199 (GIS) to 1,161 (SMD1648). Similarly, eight proteomics reports <abbrgrp><abbr bid="B16">16</abbr><abbr bid="B17">17</abbr><abbr bid="B18">18</abbr><abbr bid="B19">19</abbr><abbr bid="B20">20</abbr><abbr bid="B21">21</abbr><abbr bid="B22">22</abbr><abbr bid="B23">23</abbr></abbrgrp>, which represented relatively small collections of 104 proteins, were also manually keyed in and are referred to as TableX_protein. In addition, the SAGE dataset <abbrgrp><abbr bid="B48">48</abbr></abbrgrp> was collected from CGAP library by using a 2-fold difference in tumor <it>vs</it>. non-tumor sample as criteria, resulting in a gene set of 391 genes.</p>
            <fig id="F2">
               <title>
                  <p>Figure 2</p>
               </title>
               <caption>
                  <p>Statistics analysis of EHCO collections</p>
               </caption>
               <text>
                  <p><b>Statistics analysis of EHCO collections</b>. (A) Gene intersections among the 8 different gene sets collected in EHCO. The numbers in the parenthesis are the gene numbers in each dataset. The intersection between each dataset is shown in the box, e.g. SMD1648 has 138 genes in common with PubMed. (B) Distribution of HCC-related genes in EHCO. (C, D) Top 5 pathways, as analyzed by KEGG and BioCarta pathway collectors, associated with HCC-related genes in EHCO. (E) Chromosomal distribution of HCC-related genes in EHCO.</p>
               </text>
               <graphic file="1471-2105-8-66-2"/>
            </fig>
         </sec>
         <sec>
            <st>
               <p>Intersection genes</p>
            </st>
            <p>Among the eight gene set collections, there are 2906 non-redundant genes in total. Figure <figr fid="F2">2A</figr> shows the intersection between each gene set. The biggest intersection, which contained 138 genes, appeared between SMD1648 and PubMed. Interestingly, 68.0% SMD1648 (790 out of 1,161) and 69.6% PubMed (754 out of 1,084) collections (referred to as distinct genes in Figure <figr fid="F2">2A</figr>) had not been reported in other gene sets. Similarly, a comparison was made that revealed 77% genes were only counted once among each gene set (Figure <figr fid="F2">2B</figr>), suggesting that tremendous efforts will be needed to characterize the relationship between HCC and these genes generated from diverse measurements. A cross-dataset comparison of SAGE and five microarray datasets (mRNA-based measurements) revealed the top 23 intersection genes, which appeared at least 4 times in EHCO, among each gene set. 19 out of the 23 genes shared consistent gene expression patterns (tumor <it>vs</it>. normal; up- or down-regulated) (shown as Up &amp; Down) (Table <tblr tid="T1">1</tblr>). In contrast, 4 genes had discrepancies in HCC expression patterns (bold in Table <tblr tid="T1">1</tblr>). This might be the result of changes in gene expression associated with different pathophysiological states, the type of sample collected, etc. Fortunately, three out of four had PubMed records, including FABP1 (overexpression in HCC <abbrgrp><abbr bid="B24">24</abbr></abbrgrp>), IGFBP3 (down-regulation in HCC <abbrgrp><abbr bid="B25">25</abbr></abbrgrp>), and SGK (overexpression in HCC <abbrgrp><abbr bid="B10">10</abbr></abbrgrp>). Since we did not have an unbiased method to distinguish the accuracy of NNMT, Q-RT-PCR was applied to evaluate the expression patterns by using 21 pairwised HCC patient specimens. The down-regulation profiles were observed in 19 out of 21 paired samples analyzed (Figure <figr fid="F3">3A</figr>). In addition to NNMT, we chose PEG10 <abbrgrp><abbr bid="B26">26</abbr><abbr bid="B27">27</abbr></abbrgrp>, which also had contradictory expression patterns in our collections, and performed Q-RT-PCR to validate the gene expression signatures. Figure <figr fid="F3">3B</figr> showed that overexpression of PEG10 was observed in 17 out of 21 HCC patient samples. Altogether, the majority of EHCO collections (~77%) appeared only once and there were some discrepancies among gene sets, indicative of a need for an immediate further validation of these different measurements by using different HCC samples. Therefore, we especially welcome other investigators to contribute their validation data to our EHCO data warehouse.</p>
            <tbl id="T1">
               <title>
                  <p>Table 1</p>
               </title>
               <caption>
                  <p>Top 23 genes that appear most frequently in 6 mRNA-related gene sets in EHCO.</p>
               </caption>
               <tblbdy cols="8">
                  <r>
                     <c ca="left">
                        <p>Count</p>
                     </c>
                     <c ca="left">
                        <p>Symbol</p>
                     </c>
                     <c ca="left">
                        <p>SMD1648</p>
                     </c>
                     <c ca="left">
                        <p>SAGE</p>
                     </c>
                     <c ca="left">
                        <p>GIS</p>
                     </c>
                     <c ca="left">
                        <p>LEE</p>
                     </c>
                     <c ca="left">
                        <p>KIM</p>
                     </c>
                     <c ca="left">
                        <p>TableX_mRNA</p>
                     </c>
                  </r>
                  <r>
                     <c cspan="8">
                        <hr/>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>5</p>
                     </c>
                     <c ca="left">
                        <p>SCP2</p>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                     <c>
                        <p/>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                  </r>
                  <r>
                     <c cspan="8">
                        <hr/>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>4</p>
                     </c>
                     <c ca="left">
                        <p>ADH1B</p>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                     <c>
                        <p/>
                     </c>
                     <c>
                        <p/>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                  </r>
                  <r>
                     <c>
                        <p/>
                     </c>
                     <c ca="left">
                        <p>ALB</p>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                     <c>
                        <p/>
                     </c>
                     <c>
                        <p/>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                  </r>
                  <r>
                     <c>
                        <p/>
                     </c>
                     <c ca="left">
                        <p>ARG1</p>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                     <c>
                        <p/>
                     </c>
                     <c>
                        <p/>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                  </r>
                  <r>
                     <c>
                        <p/>
                     </c>
                     <c ca="left">
                        <p>CAT</p>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                     <c>
                        <p/>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                     <c>
                        <p/>
                     </c>
                  </r>
                  <r>
                     <c>
                        <p/>
                     </c>
                     <c ca="left">
                        <p>CP</p>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                     <c>
                        <p/>
                     </c>
                     <c>
                        <p/>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                  </r>
                  <r>
                     <c>
                        <p/>
                     </c>
                     <c ca="left">
                        <p>CPB2</p>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                     <c>
                        <p/>
                     </c>
                     <c>
                        <p/>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                  </r>
                  <r>
                     <c>
                        <p/>
                     </c>
                     <c ca="left">
                        <p>CRHBP</p>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                     <c>
                        <p/>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                     <c>
                        <p/>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                  </r>
                  <r>
                     <c>
                        <p/>
                     </c>
                     <c ca="left">
                        <p>CYP2C9</p>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                     <c>
                        <p/>
                     </c>
                     <c>
                        <p/>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                  </r>
                  <r>
                     <c>
                        <p/>
                     </c>
                     <c ca="left">
                        <p>CYP2E1</p>
                     </c>
                     <c>
                        <p/>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                     <c>
                        <p/>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                  </r>
                  <r>
                     <c>
                        <p/>
                     </c>
                     <c ca="left">
                        <p>FGB</p>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                     <c>
                        <p/>
                     </c>
                     <c>
                        <p/>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                  </r>
                  <r>
                     <c>
                        <p/>
                     </c>
                     <c ca="left">
                        <p>GHR</p>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                     <c>
                        <p/>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                     <c>
                        <p/>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                  </r>
                  <r>
                     <c>
                        <p/>
                     </c>
                     <c ca="left">
                        <p>HPD</p>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                     <c>
                        <p/>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                     <c>
                        <p/>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                  </r>
                  <r>
                     <c>
                        <p/>
                     </c>
                     <c ca="left">
                        <p>HSD17B6</p>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                     <c>
                        <p/>
                     </c>
                     <c>
                        <p/>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                  </r>
                  <r>
                     <c>
                        <p/>
                     </c>
                     <c ca="left">
                        <p>MT1B</p>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                     <c>
                        <p/>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                     <c>
                        <p/>
                     </c>
                  </r>
                  <r>
                     <c>
                        <p/>
                     </c>
                     <c ca="left">
                        <p>PCK1</p>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                     <c>
                        <p/>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                     <c>
                        <p/>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                  </r>
                  <r>
                     <c>
                        <p/>
                     </c>
                     <c ca="left">
                        <p>PHYH</p>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                     <c>
                        <p/>
                     </c>
                     <c>
                        <p/>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                  </r>
                  <r>
                     <c>
                        <p/>
                     </c>
                     <c ca="left">
                        <p>TF</p>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                     <c>
                        <p/>
                     </c>
                     <c>
                        <p/>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                  </r>
                  <r>
                     <c>
                        <p/>
                     </c>
                     <c ca="left">
                        <p>TTR</p>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                     <c>
                        <p/>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                     <c>
                        <p/>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                  </r>
                  <r>
                     <c>
                        <p/>
                     </c>
                     <c ca="left">
                        <p>FABP1</p>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                     <c>
                        <p/>
                     </c>
                     <c>
                        <p/>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                     <c ca="left">
                        <p>
                           <b>Up</b>
                        </p>
                     </c>
                  </r>
                  <r>
                     <c>
                        <p/>
                     </c>
                     <c ca="left">
                        <p>IGFBP3</p>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                     <c ca="left">
                        <p>
                           <b>Up</b>
                        </p>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                     <c>
                        <p/>
                     </c>
                     <c>
                        <p/>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                  </r>
                  <r>
                     <c>
                        <p/>
                     </c>
                     <c ca="left">
                        <p>NNMT</p>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                     <c>
                        <p/>
                     </c>
                     <c>
                        <p/>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                     <c ca="left">
                        <p>
                           <b>Up &amp; Down</b>
                        </p>
                     </c>
                  </r>
                  <r>
                     <c>
                        <p/>
                     </c>
                     <c ca="left">
                        <p>SGK</p>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                     <c>
                        <p/>
                     </c>
                     <c>
                        <p/>
                     </c>
                     <c ca="left">
                        <p>Down</p>
                     </c>
                     <c ca="left">
                        <p>
                           <b>Up</b>
                        </p>
                     </c>
                  </r>
               </tblbdy>
            </tbl>
            <fig id="F3">
               <title>
                  <p>Figure 3</p>
               </title>
               <caption>
                  <p>Validation the EHCO dataset by quantitative RT-PCR</p>
               </caption>
               <text>
                  <p><b>Validation the EHCO dataset by quantitative RT-PCR</b>. (A) NNMT is down-regulated and (B) PEG10 is overexpressed in HCC. The mRNA expression levels of NNMT and PEG10 were determined by quantitative RT-PCR in 21-pairwised HCC patients. Results were normalized against the mRNA expression level of ACTB (A) and GAPDH (B) in each sample.</p>
               </text>
               <graphic file="1471-2105-8-66-3"/>
            </fig>
         </sec>
         <sec>
            <st>
               <p>Pathway and chromosome assignments</p>
            </st>
            <p>Pathway information is important for an understanding of the functionality of genes and proteins. EHCO integrated two well-known pathway databases, KEGG <abbrgrp><abbr bid="B28">28</abbr></abbrgrp> and BioCarta <abbrgrp><abbr bid="B51">51</abbr></abbrgrp>. Figure <figr fid="F2">2C</figr> and <figr fid="F2">2D</figr> show the top five KEGG and BioCarta pathways that have the most HCC-related genes associated with them in these two public accessible pathway collections. Out of EHCO's current collections, 2385 genes were identified as part of the associated pathways from KEGG. The most associated pathway is cytokine-cytokine receptor interaction, which has 102 genes associated with it. The most associated pathway in BioCarta is the MAPK signaling pathway, which has 38 genes associated with it. Interestingly, these two public accessible pathway analyzers did not have a similar dataset for their top 5 collections except for the MAPK signaling pathway. This is probably due to the fact that the primary areas of KEGG collections are metabolic pathways whereas the coverage areas of BioCarta are signaling pathways. Moreover, analysis of chromosome distribution indicates that the largest proportion belongs to chromosome 1 (343 genes), whereas Y chromosome contains the least genes, namely only 2 EHCO genes (Figure <figr fid="F2">2E</figr>).</p>
         </sec>
         <sec>
            <st>
               <p>Harvesting gene annotations through softbots and weblinks</p>
            </st>
            <p>The disease specific gene/protein expression pattern can provide important clues about gene function. Currently, ~1000 genes-related to HCC are in our PubMed collections (or referred to as small scale studies). To date, EHCO has extended the HCC-related gene collections to 2,906 genes. The study of these genes can be accelerated by functional annotations.</p>
            <p>To organize diverse HCC-related datasets, the annotation handler stepped in to annotate the EHCO collected gene sets. We use Softbots <abbrgrp><abbr bid="B29">29</abbr></abbrgrp> to harvest gene annotations from various web resources. A softbot is an intelligent software robot that acts, on behalf of the user, to achieve certain goals. Given the resources, which can be online websites, databases, or documents, a softbot extracts the information that it has been targeted to. Since softbot can be fragile against changes to the online resources, we used a Java program, called Agent Toolbox <abbrgrp><abbr bid="B15">15</abbr></abbrgrp> to create and maintain these softbots. Agent Toolbox 'learns' the information extraction rules from users' labels on the web page with a machine-learning algorithm <abbrgrp><abbr bid="B36">36</abbr><abbr bid="B37">37</abbr></abbrgrp>. As a result, we can efficiently repair and maintain our softbots. In this study, individual softbots were used to mine different targets. These programs will periodically update the latest information from NCBI databases and download them into EHCO. Finally, we created a presentation engine integrating all the information into a single user-friendly page view as "Gene Info" (Figure <figr fid="F1">1A</figr>). As a result, each gene annotation webpage displays the most up-to-date information from various databases, e.g. EBI <abbrgrp><abbr bid="B52">52</abbr></abbrgrp>, and SMART <abbrgrp><abbr bid="B30">30</abbr></abbrgrp>.</p>
            <p>To rapidly interrogate the expression patterns of our collection, two gene sets (SMD1648 and PubMed) were used as templates to annotate our EHCO collections. Firstly, SMD-HCC dataset reports not only those differentially expressed gene in SMD1648, but also records the expression patterns of other genes (~16,000 genes), which are not statistically significant in the original data analysis. Inclusion of this on-line quantitative evaluation of gene expression provides a rapid visualization of global trends in gene expression. Secondly, we applied a natural language processing technique to extract information from the abstracts, followed by a summary table illustrating the gene and disease relationship. Toward this goal, we have developed a tool, as described in the Method section, to annotate genes, diseases, and other HCC-related information (i.e. HBV, HCV) from PubMed abstracts to provide a brief summary of each EHCO genes.</p>
         </sec>
         <sec>
            <st>
               <p>Detection of inferred biological network of HCC</p>
            </st>
            <p>The availability of HCC-related gene catalogs now enables elucidation of molecular mechanisms governing hepatocarcinogenesis at the systems level. However, these gene catalogs give no direct clues, at least not immediately evident from the gene symbols, to the underlying carcinogenesis processes. Gene Ontology (GO) analysis <abbrgrp><abbr bid="B42">42</abbr></abbrgrp>, which allows the identification of functional related clusters, indicates that protein binding from molecular function (Figure <figr fid="F4">4A</figr>) and cellular physiological process from biological process (Figure <figr fid="F4">4B</figr>) represent the largest proportion (approximately 25%), respectively, in either category. This analysis raises the possibility that these HCC-related genes might not act alone randomly, but have a propensity to associate physically or functionally to perturb the fundamental biological processes of the liver.</p>
            <fig id="F4">
               <title>
                  <p>Figure 4</p>
               </title>
               <caption>
                  <p>Detection of inferred biological network of HCC</p>
               </caption>
               <text>
                  <p><b>Detection of inferred biological network of HCC</b>. (A, B) Gene Ontology (GO) analysis reveals top 5 categories in molecular function and biological process. Protein binding is the largest proportion in molecular function (A), whereas cellular physiological process ranks number one in the biological process (B). (C, D, E) Detection of protein-protein interaction (PPI) network from various gene sets. Only up-regulated genes (referred to as queries) in each gene set are subjected to search for their interaction partners. Many queries can form interaction network among query themselves, despite the fact that many queries do not have PPI data. (D) Of 139 GIS queries, 45 of them can form 55 PPI to constitute the interaction networks and 28 (red circle) of them serve as hubs (referred to as interacting with more than one query, including homo-dimer). (E) Of 149 Lee_NIH queries with available PPI data, 52 of them (34.9%) can interact with each other and form 62 PPIs.</p>
               </text>
               <graphic file="1471-2105-8-66-4"/>
            </fig>
            <p>To uncover the potential interaction networks or synergistic effects of these seemingly unrelated HCC-related genes, we employed each up-regulated gene set as queries and searched for their interaction partners by accessing our previously established protein-protein interaction (PPI) data <abbrgrp><abbr bid="B31">31</abbr><abbr bid="B32">32</abbr></abbrgrp>. Figure <figr fid="F4">4C</figr> summarizes the analysis results. We used GIS dataset as the first example because all tissue samples in the study were HBV-associated HCC patients. GIS consists of 161 overexpressed genes, but 22 of them have no PPI information. Of the remaining 139 queries, 45 of them (32.4%) could interact with each other (Figure <figr fid="F4">4C</figr> and <figr fid="F4">4D</figr>) and form 55 PPIs. Moreover, 28 queries serve as "hubs" (Figure <figr fid="F4">4D</figr> label with red circle), which are referred to as queries interact with more than one query in a given PPI network (see later). Next, we used Lee_NIH as the second example because this study was using mouse model to recapitulate the HCC and may provide novel insight toward the hepatocarcinogenesis. Similarly, of 149 queries with available PPI data, 52 of them (34.9%) can interact with each other to constitute 43 PPIs and 29 hubs can be revealed (Figure <figr fid="F4">4E</figr> label with red circle). Both queries in the inferred networks belong to primarily cellular physiological process and metabolism from biological process in GO. Together, transformation of these seemingly random genes into their corresponding PPI reveals the intrinsic dys-regulated biological network in HCC and highlights the potential new hubs for discovery of the potential HCC markers or therapeutic targets.</p>
         </sec>
         <sec>
            <st>
               <p>Evaluation of "hubs" in the conserved HCC network at the systems level</p>
            </st>
            <p>It has been illustrated using the yeast PPI network that this network is scale-free, in which some proteins have many more interactions than others <abbrgrp><abbr bid="B33">33</abbr></abbrgrp>. A survey of the yeast protein interaction network has revealed that proteins with more interactions are more important than those with fewer interacting proteins <abbrgrp><abbr bid="B34">34</abbr></abbrgrp>, suggesting that hub degree is an indicator for essentialness in a network. However, it is difficult to address the essentialness of these newly identified hubs (or potential new targets for hepatocarcinogenesis) from the inferred biological network as described in Figure <figr fid="F4">4D</figr> and <figr fid="F4">4E</figr>. Therefore, we employed the comparative genomics approach for the availability of knockout phenotype from various model organisms. To reduce the complexity, we focused on those genes only conserved in evolution, which may represent, at least in part, the dys-regulation of essential cellular physiological roles in hepatocarcinogenesis as concluded in GO analysis (Figure <figr fid="F4">4B</figr>). Based on the HomoloGene orthologues database (build50.1), 228 genes, consisting of 139 up- and 78 down-regulated genes, out of 2906 EHCO collections are conserved range from <it>H. sapiens</it>, <it>M. musculus</it>, <it>C. elegans</it>, <it>D. melanogaster</it>, to <it>S. cerevisiae </it>(Figure <figr fid="F5">5A</figr>). Of 139 queries, 120 queries with PPI data were individually subjected to search for their interaction partners. 47 out of 120 queries (~40%) were associated with each other and there were 18 hubs (Figure <figr fid="F5">5B</figr>, labeled in red) in this network. By using the available phenotypic information from WormBase <abbrgrp><abbr bid="B44">44</abbr></abbrgrp>, FlyBase <abbrgrp><abbr bid="B45">45</abbr></abbrgrp>, and SGD <abbrgrp><abbr bid="B46">46</abbr></abbrgrp>, the percentage of nonviable phenotypes, e.g. embryonic or larval lethality, or sterility, in these 18 hubs was higher than those 120 queries (Figure <figr fid="F5">5C</figr>). Moreover, of these 18 hubs with more interaction proteins, the ratio of nonviable phenotype in yeast seemed to be higher than those with fewer interaction proteins (Figure <figr fid="F5">5D</figr>). This analysis seems to be able to shed light on which research can be based to study potential new players in HCC research in the post-genomic era through the systems biology approach.</p>
            <fig id="F5">
               <title>
                  <p>Figure 5</p>
               </title>
               <caption>
                  <p>An architectural map of the conserved HCC network at the systems level</p>
               </caption>
               <text>
                  <p><b>An architectural map of the conserved HCC network at the systems level</b>. (A) 228 HCC-related genes in EHCO collections are evolution conserved cross various species. EHCO collects 2906 HCC-related <it>H. sapiens </it>genes, of which 2,635 genes have <it>M. musculus </it>homologs (point by arrow) and 114 genes cannot find any homologs (point by node). Of 2635 <it>M. musculus </it>genes, there are 774 genes with <it>C. elegans </it>homologs and 324 genes have <it>D. melanogaster </it>homologs, but do not have <it>C. elegans </it>homologs (branch and point by arrow). In short, 228 genes are conserved from <it>H. sapiens </it>to <it>S. cerevisiae</it>. (B) Conserved HCC network. Among 139 overexpressed and evolutionary conserved HCC-related genes, 120 of them have at least one PPI record. 47 of them can interact with each other and constitute a network and 18 of them are query-also-hubs (red circle). (C) Evaluation of the essentialness of hubs. To evaluate the essentialness of hubs, 139 queries, 47 queries constituting of interaction network, and 18 query-also-hubs were subjected to search for the nonviable phenotype in their corresponding homologs in various model organisms, including <it>C. elegans</it>, <it>D. melanogaster</it>, and <it>S. cerevisiae</it>. (D) Evaluation of the feature of hub degree. The degree, which is referred to as number of interactions associated with a protein, is considered to be one of the features in determining the essentialness of hubs. 18 query-also-hubs, which have various degrees ranging from 2&#8211;7, were subjected to search for the nonviable phenotype in their corresponding homologs in <it>S. cerevisiae</it>. The relative ratio is shown.</p>
               </text>
               <graphic file="1471-2105-8-66-5"/>
            </fig>
         </sec>
      </sec>
      <sec>
         <st>
            <p>Conclusion</p>
         </st>
         <p>The collected genes or associated pathways in EHCO may represent only small part of the whole genome or our understanding of hepatocarcinogenesis. This network construction supporting the view that well coordinated interaction networks may be required for these aberrant HCC-related genes to modulate hepatocarcinogenesis. Targeting multiple hubs and/or disruption of the network formation could offer an excellent opportunity to reveal potential strategy for HCC treatment.</p>
      </sec>
      <sec>
         <st>
            <p>Methods</p>
         </st>
         <sec>
            <st>
               <p>Text-mining to acquire HCC-related gene sets</p>
            </st>
            <p>The fundamental part of EHCO is the collections of eight gene sets related to HCC. The text-mining method to acquire HCC-related literatures from PubMed used "hepatocellular carcinoma" as a keyword. This study used the latest approved human genome nomenclature from HUGO Gene Nomenclature Committee <abbrgrp><abbr bid="B47">47</abbr></abbrgrp>. A simple text-matching program was written to look for the presence of HUGO-approved gene names, symbols, and aliases, in the title and abstract part of HCC-related literatures, resulting in acquiring approximately 24,300 abstracts. Because one publication may contain several genes, we curated the results by removing redundancy and obtained 10,425 abstracts. These potential HCC-related abstracts and gene lists were further verified to remove unrelated abstracts by natural language processing, followed by manual read through by biology major graduate students, research associates and postdoctoral fellows. Finally, in the PubMed category, we had extracted 1,084 genes from 4,492 abstracts.</p>
         </sec>
         <sec>
            <st>
               <p>Information retrieval and extraction by natural language processing</p>
            </st>
            <p>To elucidate the relationship between the EHCO collected genes and HCC, we applied a natural language processing (NLP) technique to extract information from the literature. We performed biological information retrieval (BioIR) to obtain PubMed abstracts that may contain gene-HCC relationships, followed by information extraction to reveal gene-HCC relationships. BioIR started with searching for keyword combinations, i.e. (gene symbol or aliases) and (hepatocellular carcinoma). 10,425 PubMed abstracts were scanned to search for HCC candidate genes. We filtered out abstracts irrelevant to gene-HCC relationships by eliminating those abstracts that HCC and genes did not co-occur in the same sentence since most NLP techniques only handled information in the same line. 5,942 abstracts were found to contain gene-HCC relationships. Biology majored associates were manually read through the abstracts, resulting in the validation of 4,492 abstracts. The accuracy (or precision in NLP terminology) is about 75%. Error analysis on the output and retrieved abstracts indicates that most of errors were caused by lack of sufficient syntactic patterns. The retrieved relevant PubMed abstracts were then subjected to automated information extraction.</p>
            <p>To extract information related to a certain topic, the extraction tool needs to be capable of accomplishing two tasks: (1) Named Entity Recognition (NER): to recognize biomedical named entities (NEs) (e.g. afp, alb, mRNA and HCC); and (2) Named Entity Relation Recognition (NERR): to recognize relationships of interest between NEs (e.g. HCC and mRNA level). NER in biomedical domain is a challenging problem in natural language processing for the following reasons. Most biomedical named entities have no nomenclature. They may include long compound words (e.g. hepatocellular carcinoma), or unknown words that include hyphen, digit, Greek letter, or Roman numeral (e.g. 4'-mycarosyl isovaleryl-CoA transferase). Abbreviations and acronyms may also appear as unknown words and frequently cause ambiguity in their meanings. Moreover, named entities may involve variations in spelling (e.g. N-acetylcysteine, N-acetyl-cysteine, NacetylCysteine) or in expressions (e.g. EGF receptor and EGFR). To handle this NER problem, we constructed a NE dictionary that contains the following NEs and NE classes along with their aliases: gene, protein, mRNA, serum, hepatitis B virus (HBV), hepatitis C virus (HCV), methylation, liver regeneration, HCC, cirrhosis, fibrosis, and necrosis. We used dictionary-based method, i.e. a dictionary and a fast matching algorithm, to identify NEs of our interest occurring in the PudMed abstracts. Next, we proceeded to investigate the relationship between NEs by using a natural language parser to assign part-of-speech annotation, and co-occurrences of terms, and template-based methods. The overall automated procedure to extract gene-HCC relationships for our gene-HCC knowledge base system is shown in Figure <figr fid="F1">1B</figr>.</p>
         </sec>
         <sec>
            <st>
               <p>Patients and tumor samples</p>
            </st>
            <p>21 pairs of tumor and adjacent non-tumor liver tissues were collected from 1996 to 2000 at the Division of General Surgery of Kaohsiung Veterans General Hospital (VGH). No patients had previously received any treatment, e.g. chemotherapy, for HCC. The study protocol had the approval of the ethics committee at Kaohsiung-VGH. All patients gave informed consents. Study samples, including tumor and adjacent non-tumor liver tissues, were obtained during diagnostic biopsy, and non-tumor liver tissues were derived from neighboring site outside of the tumor. Both tumor and adjacent non-tumor liver tissues for subsequent studies were confirmed by pathologists and were used for quantitative RT-PCR (or Q-RT-PCR). The detailed protocols for RNA isolation and SYBR Green I based quantitative RT-PCR were as described previously <abbrgrp><abbr bid="B35">35</abbr></abbrgrp>.</p>
         </sec>
      </sec>
      <sec>
         <st>
            <p>Availability and requirements</p>
         </st>
         <p>Project name: Functional genomics analysis elucidates the signaling networks of hepatocellular carcinoma.</p>
         <p>Project home page: <url>http://ehco.iis.sinica.edu.tw</url></p>
         <p>Operating system(s): Platform independent.</p>
         <p>Any restrictions to use by non-academics: no licence needed.</p>
      </sec>
      <sec>
         <st>
            <p>Authors' contributions</p>
         </st>
         <p>CNH, JML, and CYFH designed the project, analyzed the results and wrote the paper. CHL, HHT, KTL, GCL, DTL and CWY designed the architecture of EHCO and implemented softbots to harvest gene annotation. TYS and WLH were responsible for natural language processing part of the project. LJS and CHC did the Q-RT-PCR experiment. SAL and CYK did the network construction. HHY, YLS, and CHC were responsible for database collections and manually curation of the collected information. All authors read and approved the final manuscript.</p>
      </sec>
   </bdy>
   <bm>
      <ack>
         <sec>
            <st>
               <p>Acknowledgements</p>
            </st>
            <p>This project was supported in part by grants from the National Health Research Institutes and National Science Council (Taiwan) (NSC95-2320-B-400-009-MY3) to C. F. Huang, and by the National Research Program for Genomic Medicine (NRPGM), National Science Council (Taiwan) (NSC95-3112-B-011-013-Y, Advanced Bioinformatics Core) to C. Hsu. We are grateful to National Center for High-performance Computing for providing supports in this project. We thank members of the Huang laboratory for reading through the HCC-related PubMed abstracts and valuable comments, and Yi-Feng Lin, Tsung-Wei Hu and Jen-Jie Chiou for their assistance with the text-mining and migration of the web system. We thank Drs. Ann-Ping Tsou, Chen-Kung Chou, Wey-Jinq Lin, Yuan-Chii G. Lee, and Chang-Tze R. Yu for helpful discussions and suggestions.</p>
         </sec>
      </ack>
      <refgrp>
         <bibl id="B1">
            <title>
               <p>Gene expression patterns in human liver cancers</p>
            </title>
            <aug>
               <au>
                  <snm>Chen</snm>
                  <fnm>X</fnm>
               </au>
               <au>
                  <snm>Cheung</snm>
                  <fnm>ST</fnm>
               </au>
               <au>
                  <snm>So</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Fan</snm>
                  <fnm>ST</fnm>
               </au>
               <au>
                  <snm>Barry</snm>
                  <fnm>C</fnm>
               </au>
               <au>
                  <snm>Higgins</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Lai</snm>
                  <fnm>KM</fnm>
               </au>
               <au>
                  <snm>Ji</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Dudoit</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Ng</snm>
                  <fnm>IO</fnm>
               </au>
               <etal/>
            </aug>
            <source>Mol Biol Cell</source>
            <pubdate>2002</pubdate>
            <volume>13</volume>
            <issue>6</issue>
            <fpage>1929</fpage>
            <lpage>1939</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">117615</pubid>
                  <pubid idtype="pmpid" link="fulltext">12058060</pubid>
                  <pubid idtype="doi">10.1091/mbc.02-02-0023.</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B2">
            <title>
               <p>Identification of discriminators of hepatoma by gene expression profiling using a minimal dataset approach</p>
            </title>
            <aug>
               <au>
                  <snm>Neo</snm>
                  <fnm>SY</fnm>
               </au>
               <au>
                  <snm>Leow</snm>
                  <fnm>CK</fnm>
               </au>
               <au>
                  <snm>Vega</snm>
                  <fnm>VB</fnm>
               </au>
               <au>
                  <snm>Long</snm>
                  <fnm>PM</fnm>
               </au>
               <au>
                  <snm>Islam</snm>
                  <fnm>AF</fnm>
               </au>
               <au>
                  <snm>Lai</snm>
                  <fnm>PB</fnm>
               </au>
               <au>
                  <snm>Liu</snm>
                  <fnm>ET</fnm>
               </au>
               <au>
                  <snm>Ren</snm>
                  <fnm>EC</fnm>
               </au>
            </aug>
            <source>Hepatology</source>
            <pubdate>2004</pubdate>
            <volume>39</volume>
            <issue>4</issue>
            <fpage>944</fpage>
            <lpage>953</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1002/hep.20105</pubid>
                  <pubid idtype="pmpid" link="fulltext">15057898</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B3">
            <title>
               <p>Application of comparative functional genomics to identify best-fit mouse models to study human cancer</p>
            </title>
            <aug>
               <au>
                  <snm>Lee</snm>
                  <fnm>JS</fnm>
               </au>
               <au>
                  <snm>Chu</snm>
                  <fnm>IS</fnm>
               </au>
               <au>
                  <snm>Mikaelyan</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Calvisi</snm>
                  <fnm>DF</fnm>
               </au>
               <au>
                  <snm>Heo</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Reddy</snm>
                  <fnm>JK</fnm>
               </au>
               <au>
                  <snm>Thorgeirsson</snm>
                  <fnm>SS</fnm>
               </au>
            </aug>
            <source>Nat Genet</source>
            <pubdate>2004</pubdate>
            <volume>36</volume>
            <issue>12</issue>
            <fpage>1306</fpage>
            <lpage>1311</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1038/ng1481</pubid>
                  <pubid idtype="pmpid" link="fulltext">15565109</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B4">
            <title>
               <p>Cancer-associated molecular signature in the tissue samples of patients with cirrhosis</p>
            </title>
            <aug>
               <au>
                  <snm>Kim</snm>
                  <fnm>JW</fnm>
               </au>
               <au>
                  <snm>Ye</snm>
                  <fnm>Q</fnm>
               </au>
               <au>
                  <snm>Forgues</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Chen</snm>
                  <fnm>Y</fnm>
               </au>
               <au>
                  <snm>Budhu</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Sime</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Hofseth</snm>
                  <fnm>LJ</fnm>
               </au>
               <au>
                  <snm>Kaul</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>Wang</snm>
                  <fnm>XW</fnm>
               </au>
            </aug>
            <source>Hepatology</source>
            <pubdate>2004</pubdate>
            <volume>39</volume>
            <issue>2</issue>
            <fpage>518</fpage>
            <lpage>527</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1002/hep.20053</pubid>
                  <pubid idtype="pmpid" link="fulltext">14768006</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B5">
            <title>
               <p>Analysis of differentially expressed genes in hepatocellular carcinoma using cDNA arrays</p>
            </title>
            <aug>
               <au>
                  <snm>Goldenberg</snm>
                  <fnm>D</fnm>
               </au>
               <au>
                  <snm>Ayesh</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Schneider</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Pappo</snm>
                  <fnm>O</fnm>
               </au>
               <au>
                  <snm>Jurim</snm>
                  <fnm>O</fnm>
               </au>
               <au>
                  <snm>Eid</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Fellig</snm>
                  <fnm>Y</fnm>
               </au>
               <au>
                  <snm>Dadon</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Ariel</snm>
                  <fnm>I</fnm>
               </au>
               <au>
                  <snm>de Groot</snm>
                  <fnm>N</fnm>
               </au>
               <etal/>
            </aug>
            <source>Mol Carcinog</source>
            <pubdate>2002</pubdate>
            <volume>33</volume>
            <issue>2</issue>
            <fpage>113</fpage>
            <lpage>124</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1002/mc.10027</pubid>
                  <pubid idtype="pmpid" link="fulltext">11813304</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B6">
            <title>
               <p>Discovery of differentially expressed genes related to histological subtype of hepatocellular carcinoma</p>
            </title>
            <aug>
               <au>
                  <snm>Lee</snm>
                  <fnm>D</fnm>
               </au>
               <au>
                  <snm>Choi</snm>
                  <fnm>SW</fnm>
               </au>
               <au>
                  <snm>Kim</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Park</snm>
                  <fnm>JH</fnm>
               </au>
               <au>
                  <snm>Kim</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Kim</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Lee</snm>
                  <fnm>IB</fnm>
               </au>
            </aug>
            <source>Biotechnol Prog</source>
            <pubdate>2003</pubdate>
            <volume>19</volume>
            <issue>3</issue>
            <fpage>1011</fpage>
            <lpage>1015</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1021/bp025746a</pubid>
                  <pubid idtype="pmpid" link="fulltext">12790669</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B7">
            <title>
               <p>Establishment of a hepatocellular carcinoma cell line with unique metastatic characteristics through in vivo selection and screening for metastasis-related genes through cDNA microarray</p>
            </title>
            <aug>
               <au>
                  <snm>Li</snm>
                  <fnm>Y</fnm>
               </au>
               <au>
                  <snm>Tang</snm>
                  <fnm>Y</fnm>
               </au>
               <au>
                  <snm>Ye</snm>
                  <fnm>L</fnm>
               </au>
               <au>
                  <snm>Liu</snm>
                  <fnm>B</fnm>
               </au>
               <au>
                  <snm>Liu</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>Chen</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Xue</snm>
                  <fnm>Q</fnm>
               </au>
            </aug>
            <source>J Cancer Res Clin Oncol</source>
            <pubdate>2003</pubdate>
            <volume>129</volume>
            <issue>1</issue>
            <fpage>43</fpage>
            <lpage>51</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1007/s00432-003-0493-z</pubid>
                  <pubid idtype="pmpid" link="fulltext">12618900</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B8">
            <title>
               <p>Expression profiling suggested a regulatory role of liver-enriched transcription factors in human hepatocellular carcinoma</p>
            </title>
            <aug>
               <au>
                  <snm>Xu</snm>
                  <fnm>L</fnm>
               </au>
               <au>
                  <snm>Hui</snm>
                  <fnm>L</fnm>
               </au>
               <au>
                  <snm>Wang</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Gong</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Jin</snm>
                  <fnm>Y</fnm>
               </au>
               <au>
                  <snm>Wang</snm>
                  <fnm>Y</fnm>
               </au>
               <au>
                  <snm>Ji</snm>
                  <fnm>Y</fnm>
               </au>
               <au>
                  <snm>Wu</snm>
                  <fnm>X</fnm>
               </au>
               <au>
                  <snm>Han</snm>
                  <fnm>Z</fnm>
               </au>
               <au>
                  <snm>Hu</snm>
                  <fnm>G</fnm>
               </au>
            </aug>
            <source>Cancer Res</source>
            <pubdate>2001</pubdate>
            <volume>61</volume>
            <issue>7</issue>
            <fpage>3176</fpage>
            <lpage>3181</lpage>
            <xrefbib>
               <pubid idtype="pmpid" link="fulltext">11306505</pubid>
            </xrefbib>
         </bibl>
         <bibl id="B9">
            <title>
               <p>Functional and genomic implications of global gene expression profiles in cell lines from human hepatocellular cancer</p>
            </title>
            <aug>
               <au>
                  <snm>Lee</snm>
                  <fnm>JS</fnm>
               </au>
               <au>
                  <snm>Thorgeirsson</snm>
                  <fnm>SS</fnm>
               </au>
            </aug>
            <source>Hepatology</source>
            <pubdate>2002</pubdate>
            <volume>35</volume>
            <issue>5</issue>
            <fpage>1134</fpage>
            <lpage>1143</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1053/jhep.2002.33165</pubid>
                  <pubid idtype="pmpid" link="fulltext">11981763</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B10">
            <title>
               <p>Gene expression profile analysis in human hepatocellular carcinoma by cDNA microarray</p>
            </title>
            <aug>
               <au>
                  <snm>Chung</snm>
                  <fnm>EJ</fnm>
               </au>
               <au>
                  <snm>Sung</snm>
                  <fnm>YK</fnm>
               </au>
               <au>
                  <snm>Farooq</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Kim</snm>
                  <fnm>Y</fnm>
               </au>
               <au>
                  <snm>Im</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Tak</snm>
                  <fnm>WY</fnm>
               </au>
               <au>
                  <snm>Hwang</snm>
                  <fnm>YJ</fnm>
               </au>
               <au>
                  <snm>Kim</snm>
                  <fnm>YI</fnm>
               </au>
               <au>
                  <snm>Han</snm>
                  <fnm>HS</fnm>
               </au>
               <au>
                  <snm>Kim</snm>
                  <fnm>JC</fnm>
               </au>
               <etal/>
            </aug>
            <source>Mol Cells</source>
            <pubdate>2002</pubdate>
            <volume>14</volume>
            <issue>3</issue>
            <fpage>382</fpage>
            <lpage>387</lpage>
            <xrefbib>
               <pubid idtype="pmpid" link="fulltext">12521301</pubid>
            </xrefbib>
         </bibl>
         <bibl id="B11">
            <title>
               <p>Gene expression profiling of preneoplastic liver disease and liver cancer: a new era for improved early detection and treatment of these deadly diseases?</p>
            </title>
            <aug>
               <au>
                  <snm>Kim</snm>
                  <fnm>JW</fnm>
               </au>
               <au>
                  <snm>Wang</snm>
                  <fnm>XW</fnm>
               </au>
            </aug>
            <source>Carcinogenesis</source>
            <pubdate>2003</pubdate>
            <volume>24</volume>
            <issue>3</issue>
            <fpage>363</fpage>
            <lpage>369</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1093/carcin/24.3.363</pubid>
                  <pubid idtype="pmpid" link="fulltext">12663493</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B12">
            <title>
               <p>Identification of differentially expressed genes in hepatocellular carcinoma and metastatic liver tumors by oligonucleotide expression profiling</p>
            </title>
            <aug>
               <au>
                  <snm>Tackels-Horne</snm>
                  <fnm>D</fnm>
               </au>
               <au>
                  <snm>Goodman</snm>
                  <fnm>MD</fnm>
               </au>
               <au>
                  <snm>Williams</snm>
                  <fnm>AJ</fnm>
               </au>
               <au>
                  <snm>Wilson</snm>
                  <fnm>DJ</fnm>
               </au>
               <au>
                  <snm>Eskandari</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Vogt</snm>
                  <fnm>LM</fnm>
               </au>
               <au>
                  <snm>Boland</snm>
                  <fnm>JF</fnm>
               </au>
               <au>
                  <snm>Scherf</snm>
                  <fnm>U</fnm>
               </au>
               <au>
                  <snm>Vockley</snm>
                  <fnm>JG</fnm>
               </au>
            </aug>
            <source>Cancer</source>
            <pubdate>2001</pubdate>
            <volume>92</volume>
            <issue>2</issue>
            <fpage>395</fpage>
            <lpage>405</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1002/1097-0142(20010715)92:2&lt;395::AID-CNCR1335>3.0.CO;2-U</pubid>
                  <pubid idtype="pmpid" link="fulltext">11466695</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B13">
            <title>
               <p>Identification of differentially expressed genes in hepatocellular carcinoma with cDNA microarrays</p>
            </title>
            <aug>
               <au>
                  <snm>Shirota</snm>
                  <fnm>Y</fnm>
               </au>
               <au>
                  <snm>Kaneko</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Honda</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Kawai</snm>
                  <fnm>HF</fnm>
               </au>
               <au>
                  <snm>Kobayashi</snm>
                  <fnm>K</fnm>
               </au>
            </aug>
            <source>Hepatology</source>
            <pubdate>2001</pubdate>
            <volume>33</volume>
            <issue>4</issue>
            <fpage>832</fpage>
            <lpage>840</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1053/jhep.2001.23003</pubid>
                  <pubid idtype="pmpid" link="fulltext">11283847</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B14">
            <title>
               <p>Identification of genes associated with dedifferentiation of hepatocellular carcinoma with expression profiling analysis</p>
            </title>
            <aug>
               <au>
                  <snm>Midorikawa</snm>
                  <fnm>Y</fnm>
               </au>
               <au>
                  <snm>Tsutsumi</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Taniguchi</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Ishii</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Kobune</snm>
                  <fnm>Y</fnm>
               </au>
               <au>
                  <snm>Kodama</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Makuuchi</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Aburatani</snm>
                  <fnm>H</fnm>
               </au>
            </aug>
            <source>Jpn J Cancer Res</source>
            <pubdate>2002</pubdate>
            <volume>93</volume>
            <issue>6</issue>
            <fpage>636</fpage>
            <lpage>643</lpage>
            <xrefbib>
               <pubid idtype="pmpid">12079511</pubid>
            </xrefbib>
         </bibl>
         <bibl id="B15">
            <title>
               <p>Reconfigurable web wrapper agents for web information integration</p>
            </title>
            <aug>
               <au>
                  <snm>Hsu</snm>
                  <fnm>CN</fnm>
               </au>
               <au>
                  <snm>Chang</snm>
                  <fnm>CH</fnm>
               </au>
               <au>
                  <snm>Siek</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Lu</snm>
                  <fnm>JJ</fnm>
               </au>
               <au>
                  <snm>Chiou</snm>
                  <fnm>JJ</fnm>
               </au>
            </aug>
            <source>Proceedings of IJCAI-2003 Workshop on Information Integration on the Web</source>
            <publisher>Menlo Park, CA</publisher>
            <pubdate>2003</pubdate>
         </bibl>
         <bibl id="B16">
            <title>
               <p>Identification of novel proteins associated with hepatocellular carcinomas using protein microarrays</p>
            </title>
            <aug>
               <au>
                  <snm>Tannapfel</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Anhalt</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>Hausermann</snm>
                  <fnm>P</fnm>
               </au>
               <au>
                  <snm>Sommerer</snm>
                  <fnm>F</fnm>
               </au>
               <au>
                  <snm>Benicke</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Uhlmann</snm>
                  <fnm>D</fnm>
               </au>
               <au>
                  <snm>Witzigmann</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Hauss</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Wittekind</snm>
                  <fnm>C</fnm>
               </au>
            </aug>
            <source>J Pathol</source>
            <pubdate>2003</pubdate>
            <volume>201</volume>
            <issue>2</issue>
            <fpage>238</fpage>
            <lpage>249</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1002/path.1420</pubid>
                  <pubid idtype="pmpid" link="fulltext">14517841</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B17">
            <title>
               <p>Proteome analysis of hepatocellular carcinoma</p>
            </title>
            <aug>
               <au>
                  <snm>Lim</snm>
                  <fnm>SO</fnm>
               </au>
               <au>
                  <snm>Park</snm>
                  <fnm>SJ</fnm>
               </au>
               <au>
                  <snm>Kim</snm>
                  <fnm>W</fnm>
               </au>
               <au>
                  <snm>Park</snm>
                  <fnm>SG</fnm>
               </au>
               <au>
                  <snm>Kim</snm>
                  <fnm>HJ</fnm>
               </au>
               <au>
                  <snm>Kim</snm>
                  <fnm>YI</fnm>
               </au>
               <au>
                  <snm>Sohn</snm>
                  <fnm>TS</fnm>
               </au>
               <au>
                  <snm>Noh</snm>
                  <fnm>JH</fnm>
               </au>
               <au>
                  <snm>Jung</snm>
                  <fnm>G</fnm>
               </au>
            </aug>
            <source>Biochem Biophys Res Commun</source>
            <pubdate>2002</pubdate>
            <volume>291</volume>
            <issue>4</issue>
            <fpage>1031</fpage>
            <lpage>1037</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1006/bbrc.2002.6547</pubid>
                  <pubid idtype="pmpid" link="fulltext">11866469</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B18">
            <title>
               <p>Proteome analysis of human liver tumor tissue by two-dimensional gel electrophoresis and matrix assisted laser desorption/ionization-mass spectrometry for identification of disease-related proteins</p>
            </title>
            <aug>
               <au>
                  <snm>Kim</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Kim</snm>
                  <fnm>SH</fnm>
               </au>
               <au>
                  <snm>Lee</snm>
                  <fnm>SU</fnm>
               </au>
               <au>
                  <snm>Ha</snm>
                  <fnm>GH</fnm>
               </au>
               <au>
                  <snm>Kang</snm>
                  <fnm>DG</fnm>
               </au>
               <au>
                  <snm>Ha</snm>
                  <fnm>NY</fnm>
               </au>
               <au>
                  <snm>Ahn</snm>
                  <fnm>JS</fnm>
               </au>
               <au>
                  <snm>Cho</snm>
                  <fnm>HY</fnm>
               </au>
               <au>
                  <snm>Kang</snm>
                  <fnm>SJ</fnm>
               </au>
               <au>
                  <snm>Lee</snm>
                  <fnm>YJ</fnm>
               </au>
               <etal/>
            </aug>
            <source>Electrophoresis</source>
            <pubdate>2002</pubdate>
            <volume>23</volume>
            <issue>24</issue>
            <fpage>4142</fpage>
            <lpage>4156</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1002/elps.200290032</pubid>
                  <pubid idtype="pmpid" link="fulltext">12481271</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B19">
            <title>
               <p>From proteomic analysis to clinical significance: overexpression of cytokeratin 19 correlates with hepatocellular carcinoma metastasis</p>
            </title>
            <aug>
               <au>
                  <snm>Ding</snm>
                  <fnm>SJ</fnm>
               </au>
               <au>
                  <snm>Li</snm>
                  <fnm>Y</fnm>
               </au>
               <au>
                  <snm>Tan</snm>
                  <fnm>YX</fnm>
               </au>
               <au>
                  <snm>Jiang</snm>
                  <fnm>MR</fnm>
               </au>
               <au>
                  <snm>Tian</snm>
                  <fnm>B</fnm>
               </au>
               <au>
                  <snm>Liu</snm>
                  <fnm>YK</fnm>
               </au>
               <au>
                  <snm>Shao</snm>
                  <fnm>XX</fnm>
               </au>
               <au>
                  <snm>Ye</snm>
                  <fnm>SL</fnm>
               </au>
               <au>
                  <snm>Wu</snm>
                  <fnm>JR</fnm>
               </au>
               <au>
                  <snm>Zeng</snm>
                  <fnm>R</fnm>
               </au>
               <etal/>
            </aug>
            <source>Mol Cell Proteomics</source>
            <pubdate>2004</pubdate>
            <volume>3</volume>
            <issue>1</issue>
            <fpage>73</fpage>
            <lpage>81</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1074/mcp.M300094-MCP200</pubid>
                  <pubid idtype="pmpid" link="fulltext">14593079</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B20">
            <title>
               <p>Proteomic analysis of hepatitis B virus-associated hepatocellular carcinoma: Identification of potential tumor markers</p>
            </title>
            <aug>
               <au>
                  <snm>Li</snm>
                  <fnm>C</fnm>
               </au>
               <au>
                  <snm>Tan</snm>
                  <fnm>YX</fnm>
               </au>
               <au>
                  <snm>Zhou</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Ding</snm>
                  <fnm>SJ</fnm>
               </au>
               <au>
                  <snm>Li</snm>
                  <fnm>SJ</fnm>
               </au>
               <au>
                  <snm>Ma</snm>
                  <fnm>DJ</fnm>
               </au>
               <au>
                  <snm>Man</snm>
                  <fnm>XB</fnm>
               </au>
               <au>
                  <snm>Hong</snm>
                  <fnm>Y</fnm>
               </au>
               <au>
                  <snm>Zhang</snm>
                  <fnm>L</fnm>
               </au>
               <au>
                  <snm>Li</snm>
                  <fnm>L</fnm>
               </au>
               <etal/>
            </aug>
            <source>Proteomics</source>
            <pubdate>2005</pubdate>
            <volume>5</volume>
            <issue>4</issue>
            <fpage>1125</fpage>
            <lpage>1139</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1002/pmic.200401141</pubid>
                  <pubid idtype="pmpid" link="fulltext">15759316</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B21">
            <title>
               <p>An integrated proteome database for two-dimensional electrophoresis data analysis and laboratory information management system</p>
            </title>
            <aug>
               <au>
                  <snm>Cho</snm>
                  <fnm>SY</fnm>
               </au>
               <au>
                  <snm>Park</snm>
                  <fnm>KS</fnm>
               </au>
               <au>
                  <snm>Shim</snm>
                  <fnm>JE</fnm>
               </au>
               <au>
                  <snm>Kwon</snm>
                  <fnm>MS</fnm>
               </au>
               <au>
                  <snm>Joo</snm>
                  <fnm>KH</fnm>
               </au>
               <au>
                  <snm>Lee</snm>
                  <fnm>WS</fnm>
               </au>
               <au>
                  <snm>Chang</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Kim</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Chung</snm>
                  <fnm>HC</fnm>
               </au>
               <au>
                  <snm>Kim</snm>
                  <fnm>HO</fnm>
               </au>
               <etal/>
            </aug>
            <source>Proteomics</source>
            <pubdate>2002</pubdate>
            <volume>2</volume>
            <issue>9</issue>
            <fpage>1104</fpage>
            <lpage>1113</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1002/1615-9861(200209)2:9&lt;1104::AID-PROT1104>3.0.CO;2-Q</pubid>
                  <pubid idtype="pmpid" link="fulltext">12362329</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B22">
            <title>
               <p>Proteomic alterations of the variants of human aldehyde dehydrogenase isozymes correlate with hepatocellular carcinoma</p>
            </title>
            <aug>
               <au>
                  <snm>Park</snm>
                  <fnm>KS</fnm>
               </au>
               <au>
                  <snm>Cho</snm>
                  <fnm>SY</fnm>
               </au>
               <au>
                  <snm>Kim</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Paik</snm>
                  <fnm>YK</fnm>
               </au>
            </aug>
            <source>Int J Cancer</source>
            <pubdate>2002</pubdate>
            <volume>97</volume>
            <issue>2</issue>
            <fpage>261</fpage>
            <lpage>265</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1002/ijc.1585</pubid>
                  <pubid idtype="pmpid" link="fulltext">11774273</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B23">
            <title>
               <p>Proteomic analysis and molecular characterization of tissue ferritin light chain in hepatocellular carcinoma</p>
            </title>
            <aug>
               <au>
                  <snm>Park</snm>
                  <fnm>KS</fnm>
               </au>
               <au>
                  <snm>Kim</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Kim</snm>
                  <fnm>NG</fnm>
               </au>
               <au>
                  <snm>Cho</snm>
                  <fnm>SY</fnm>
               </au>
               <au>
                  <snm>Choi</snm>
                  <fnm>KH</fnm>
               </au>
               <au>
                  <snm>Seong</snm>
                  <fnm>JK</fnm>
               </au>
               <au>
                  <snm>Paik</snm>
                  <fnm>YK</fnm>
               </au>
            </aug>
            <source>Hepatology</source>
            <pubdate>2002</pubdate>
            <volume>35</volume>
            <issue>6</issue>
            <fpage>1459</fpage>
            <lpage>1466</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1053/jhep.2002.33204</pubid>
                  <pubid idtype="pmpid" link="fulltext">12029631</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B24">
            <title>
               <p>Immunohistochemical demonstration of liver fatty acid-binding protein in human hepatocellular malignancies</p>
            </title>
            <aug>
               <au>
                  <snm>Suzuki</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Watanabe</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>Ono</snm>
                  <fnm>T</fnm>
               </au>
            </aug>
            <source>J Pathol</source>
            <pubdate>1990</pubdate>
            <volume>161</volume>
            <issue>1</issue>
            <fpage>79</fpage>
            <lpage>83</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1002/path.1711610113</pubid>
                  <pubid idtype="pmpid">2164578</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B25">
            <title>
               <p>Molecular features of non-B, non-C hepatocellular carcinoma: a PCR-array gene expression profiling study</p>
            </title>
            <aug>
               <au>
                  <snm>Kurokawa</snm>
                  <fnm>Y</fnm>
               </au>
               <au>
                  <snm>Matoba</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>Takemasa</snm>
                  <fnm>I</fnm>
               </au>
               <au>
                  <snm>Nakamori</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Tsujie</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Nagano</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Dono</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>Umeshita</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>Sakon</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Ueno</snm>
                  <fnm>N</fnm>
               </au>
               <etal/>
            </aug>
            <source>J Hepatol</source>
            <pubdate>2003</pubdate>
            <volume>39</volume>
            <issue>6</issue>
            <fpage>1004</fpage>
            <lpage>1012</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1016/S0168-8278(03)00473-2</pubid>
                  <pubid idtype="pmpid" link="fulltext">14642619</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B26">
            <title>
               <p>Overexpression of a novel imprinted gene, PEG10, in human hepatocellular carcinoma and in regenerating mouse livers</p>
            </title>
            <aug>
               <au>
                  <snm>Tsou</snm>
                  <fnm>AP</fnm>
               </au>
               <au>
                  <snm>Chuang</snm>
                  <fnm>YC</fnm>
               </au>
               <au>
                  <snm>Su</snm>
                  <fnm>JY</fnm>
               </au>
               <au>
                  <snm>Yang</snm>
                  <fnm>CW</fnm>
               </au>
               <au>
                  <snm>Liao</snm>
                  <fnm>YL</fnm>
               </au>
               <au>
                  <snm>Liu</snm>
                  <fnm>WK</fnm>
               </au>
               <au>
                  <snm>Chiu</snm>
                  <fnm>JH</fnm>
               </au>
               <au>
                  <snm>Chou</snm>
                  <fnm>CK</fnm>
               </au>
            </aug>
            <source>J Biomed Sci</source>
            <pubdate>2003</pubdate>
            <volume>10</volume>
            <issue>6 Pt 1</issue>
            <fpage>625</fpage>
            <lpage>635</lpage>
            <xrefbib>
               <pubid idtype="pmpid" link="fulltext">14576465</pubid>
            </xrefbib>
         </bibl>
         <bibl id="B27">
            <title>
               <p>Involvement of PEG10 in human hepatocellular carcinogenesis through interaction with SIAH1</p>
            </title>
            <aug>
               <au>
                  <snm>Okabe</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Satoh</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Furukawa</snm>
                  <fnm>Y</fnm>
               </au>
               <au>
                  <snm>Kato</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Hasegawa</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Nakajima</snm>
                  <fnm>Y</fnm>
               </au>
               <au>
                  <snm>Yamaoka</snm>
                  <fnm>Y</fnm>
               </au>
               <au>
                  <snm>Nakamura</snm>
                  <fnm>Y</fnm>
               </au>
            </aug>
            <source>Cancer Res</source>
            <pubdate>2003</pubdate>
            <volume>63</volume>
            <issue>12</issue>
            <fpage>3043</fpage>
            <lpage>3048</lpage>
            <xrefbib>
               <pubid idtype="pmpid" link="fulltext">12810624</pubid>
            </xrefbib>
         </bibl>
         <bibl id="B28">
            <title>
               <p>The KEGG resource for deciphering the genome</p>
            </title>
            <aug>
               <au>
                  <snm>Kanehisa</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Goto</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Kawashima</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Okuno</snm>
                  <fnm>Y</fnm>
               </au>
               <au>
                  <snm>Hattori</snm>
                  <fnm>M</fnm>
               </au>
            </aug>
            <source>Nucleic Acids Res</source>
            <pubdate>2004</pubdate>
            <issue>32 Database</issue>
            <fpage>D277</fpage>
            <lpage>280</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">308797</pubid>
                  <pubid idtype="pmpid" link="fulltext">14681412</pubid>
                  <pubid idtype="doi">10.1093/nar/gkh063</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B29">
            <title>
               <p>FASTSNP: an always up-to-date and extendable service for SNP function analysis and prioritization</p>
            </title>
            <aug>
               <au>
                  <snm>Yuan</snm>
                  <fnm>HY</fnm>
               </au>
               <au>
                  <snm>Chiou</snm>
                  <fnm>JJ</fnm>
               </au>
               <au>
                  <snm>Tseng</snm>
                  <fnm>WH</fnm>
               </au>
               <au>
                  <snm>Liu</snm>
                  <fnm>CH</fnm>
               </au>
               <au>
                  <snm>Liu</snm>
                  <fnm>CK</fnm>
               </au>
               <au>
                  <snm>Lin</snm>
                  <fnm>YJ</fnm>
               </au>
               <au>
                  <snm>Wang</snm>
                  <fnm>HH</fnm>
               </au>
               <au>
                  <snm>Yao</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Chen</snm>
                  <fnm>YT</fnm>
               </au>
               <au>
                  <snm>Hsu</snm>
                  <fnm>CN</fnm>
               </au>
            </aug>
            <source>Nucleic Acids Res</source>
            <pubdate>2006</pubdate>
            <issue>34 Web Server</issue>
            <fpage>W635</fpage>
            <lpage>641</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">1538865</pubid>
                  <pubid idtype="pmpid" link="fulltext">16845089</pubid>
                  <pubid idtype="doi">10.1093/nar/gkl236</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B30">
            <title>
               <p>SMART 5: domains in the context of genomes and networks</p>
            </title>
            <aug>
               <au>
                  <snm>Letunic</snm>
                  <fnm>I</fnm>
               </au>
               <au>
                  <snm>Copley</snm>
                  <fnm>RR</fnm>
               </au>
               <au>
                  <snm>Pils</snm>
                  <fnm>B</fnm>
               </au>
               <au>
                  <snm>Pinkert</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Schultz</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Bork</snm>
                  <fnm>P</fnm>
               </au>
            </aug>
            <source>Nucleic Acids Res</source>
            <pubdate>2006</pubdate>
            <issue>34 Database</issue>
            <fpage>D257</fpage>
            <lpage>260</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">1347442</pubid>
                  <pubid idtype="pmpid" link="fulltext">16381859</pubid>
                  <pubid idtype="doi">10.1093/nar/gkj079</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B31">
            <title>
               <p>POINT: a database for the prediction of protein-protein interactions based on the orthologous interactome</p>
            </title>
            <aug>
               <au>
                  <snm>Huang</snm>
                  <fnm>TW</fnm>
               </au>
               <au>
                  <snm>Tien</snm>
                  <fnm>AC</fnm>
               </au>
               <au>
                  <snm>Huang</snm>
                  <fnm>WS</fnm>
               </au>
               <au>
                  <snm>Lee</snm>
                  <fnm>YC</fnm>
               </au>
               <au>
                  <snm>Peng</snm>
                  <fnm>CL</fnm>
               </au>
               <au>
                  <snm>Tseng</snm>
                  <fnm>HH</fnm>
               </au>
               <au>
                  <snm>Kao</snm>
                  <fnm>CY</fnm>
               </au>
               <au>
                  <snm>Huang</snm>
                  <fnm>CY</fnm>
               </au>
            </aug>
            <source>Bioinformatics</source>
            <pubdate>2004</pubdate>
            <volume>20</volume>
            <issue>17</issue>
            <fpage>3273</fpage>
            <lpage>3276</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1093/bioinformatics/bth366</pubid>
                  <pubid idtype="pmpid" link="fulltext">15217821</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B32">
            <title>
               <p>Identification of the substrates and interaction proteins of aurora kinases from a protein-protein interaction model</p>
            </title>
            <aug>
               <au>
                  <snm>Tien</snm>
                  <fnm>AC</fnm>
               </au>
               <au>
                  <snm>Lin</snm>
                  <fnm>MH</fnm>
               </au>
               <au>
                  <snm>Su</snm>
                  <fnm>LJ</fnm>
               </au>
               <au>
                  <snm>Hong</snm>
                  <fnm>YR</fnm>
               </au>
               <au>
                  <snm>Cheng</snm>
                  <fnm>TS</fnm>
               </au>
               <au>
                  <snm>Lee</snm>
                  <fnm>YC</fnm>
               </au>
               <au>
                  <snm>Lin</snm>
                  <fnm>WJ</fnm>
               </au>
               <au>
                  <snm>Still</snm>
                  <fnm>IH</fnm>
               </au>
               <au>
                  <snm>Huang</snm>
                  <fnm>CY</fnm>
               </au>
            </aug>
            <source>Mol Cell Proteomics</source>
            <pubdate>2004</pubdate>
            <volume>3</volume>
            <issue>1</issue>
            <fpage>93</fpage>
            <lpage>104</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1074/mcp.M300072-MCP200</pubid>
                  <pubid idtype="pmpid" link="fulltext">14602875</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B33">
            <title>
               <p>Error and attack tolerance of complex networks</p>
            </title>
            <aug>
               <au>
                  <snm>Albert</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>Jeong</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Barabasi</snm>
                  <fnm>AL</fnm>
               </au>
            </aug>
            <source>Nature</source>
            <pubdate>2000</pubdate>
            <volume>406</volume>
            <issue>6794</issue>
            <fpage>378</fpage>
            <lpage>382</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1038/35019019</pubid>
                  <pubid idtype="pmpid" link="fulltext">10935628</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B34">
            <title>
               <p>Lethality and centrality in protein networks</p>
            </title>
            <aug>
               <au>
                  <snm>Jeong</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Mason</snm>
                  <fnm>SP</fnm>
               </au>
               <au>
                  <snm>Barabasi</snm>
                  <fnm>AL</fnm>
               </au>
               <au>
                  <snm>Oltvai</snm>
                  <fnm>ZN</fnm>
               </au>
            </aug>
            <source>Nature</source>
            <pubdate>2001</pubdate>
            <volume>411</volume>
            <issue>6833</issue>
            <fpage>41</fpage>
            <lpage>42</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1038/35075138</pubid>
                  <pubid idtype="pmpid" link="fulltext">11333967</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B35">
            <title>
               <p>Gene expression profiles of the aurora family kinases</p>
            </title>
            <aug>
               <au>
                  <snm>Lin</snm>
                  <fnm>YS</fnm>
               </au>
               <au>
                  <snm>Su</snm>
                  <fnm>LJ</fnm>
               </au>
               <au>
                  <snm>Yu</snm>
                  <fnm>CT</fnm>
               </au>
               <au>
                  <snm>Wong</snm>
                  <fnm>FH</fnm>
               </au>
               <au>
                  <snm>Yeh</snm>
                  <fnm>HH</fnm>
               </au>
               <au>
                  <snm>Chen</snm>
                  <fnm>SL</fnm>
               </au>
               <au>
                  <snm>Wu</snm>
                  <fnm>JC</fnm>
               </au>
               <au>
                  <snm>Lin</snm>
                  <fnm>WJ</fnm>
               </au>
               <au>
                  <snm>Shiue</snm>
                  <fnm>YL</fnm>
               </au>
               <au>
                  <snm>Liu</snm>
                  <fnm>HS</fnm>
               </au>
               <etal/>
            </aug>
            <source>Gene Expr</source>
            <pubdate>2006</pubdate>
            <volume>13</volume>
            <issue>1</issue>
            <fpage>15</fpage>
            <lpage>26</lpage>
            <xrefbib>
               <pubid idtype="pmpid">16572587</pubid>
            </xrefbib>
         </bibl>
         <bibl id="B36">
            <title>
               <p>Finite-state transducers for semi-structured text mining Proceedings of IJCAI-99 Workshop on Text Mining: Foundations</p>
            </title>
            <aug>
               <au>
                  <snm>Hsu</snm>
                  <fnm>CN</fnm>
               </au>
               <au>
                  <snm>Chang</snm>
                  <fnm>CH</fnm>
               </au>
            </aug>
            <source>Techniques and Applications</source>
            <publisher>Stockholm, Sweden</publisher>
            <pubdate>1999</pubdate>
            <fpage>38</fpage>
            <lpage>49</lpage>
         </bibl>
         <bibl id="B37">
            <title>
               <p>Generating finite-state transducers for semistructured data extraction from the web Inform</p>
            </title>
            <aug>
               <au>
                  <snm>Hsu</snm>
                  <fnm>CN</fnm>
               </au>
               <au>
                  <snm>Dung</snm>
                  <fnm>MT</fnm>
               </au>
            </aug>
            <source>Syst</source>
            <pubdate>1998</pubdate>
            <volume>23</volume>
            <fpage>521</fpage>
            <lpage>538</lpage>
         </bibl>
         <bibl id="B38">
            <title>
               <p>Global gene expression profiling of dimethylnitrosamine induced liver fibrosis: from pathological and biochemical data to microarray analysis</p>
            </title>
            <aug>
               <au>
                  <snm>Su</snm>
                  <fnm>LJ</fnm>
               </au>
               <au>
                  <snm>Hsu</snm>
                  <fnm>SL</fnm>
               </au>
               <au>
                  <snm>Yang</snm>
                  <fnm>JS</fnm>
               </au>
               <au>
                  <snm>Tseng</snm>
                  <fnm>HH</fnm>
               </au>
               <au>
                  <snm>Huang</snm>
                  <fnm>SF</fnm>
               </au>
               <au>
                  <snm>Huang</snm>
                  <fnm>CY</fnm>
               </au>
            </aug>
            <source>Gene Expr</source>
            <pubdate>2006</pubdate>
            <volume>13</volume>
            <fpage>107</fpage>
            <lpage>132</lpage>
            <xrefbib>
               <pubid idtype="pmpid">17017125</pubid>
            </xrefbib>
         </bibl>
         <bibl id="B39">
            <title>
               <p>Gene annotation Web Extractor</p>
            </title>
            <url>http://www.medinfopoli.polimi.it/GeneWebEx</url>
         </bibl>
         <bibl id="B40">
            <title>
               <p>GENA</p>
            </title>
            <url>http://gena.ontology.ims.u-tokyo.ac.jp/</url>
         </bibl>
         <bibl id="B41">
            <title>
               <p>Liver Fibrosis</p>
            </title>
            <url>http://ehco.iis.sinica.edu.tw/LF</url>
         </bibl>
         <bibl id="B42">
            <title>
               <p>Gene Ontology analysis</p>
            </title>
            <url>http://fatigo.bioinfo.cipf.es/</url>
         </bibl>
         <bibl id="B43">
            <title>
               <p>Prediction of protein-protein interaction</p>
            </title>
            <url>http://point.bioinformatics.tw</url>
         </bibl>
         <bibl id="B44">
            <title>
               <p>WormBase</p>
            </title>
            <url>http://www.wormbase.org/</url>
         </bibl>
         <bibl id="B45">
            <title>
               <p>FlyBase</p>
            </title>
            <url>http://flybase.bio.indiana.edu/</url>
         </bibl>
         <bibl id="B46">
            <title>
               <p>Saccharomyces Genome Database</p>
            </title>
            <url>http://www.yeastgenome.org/</url>
         </bibl>
         <bibl id="B47">
            <title>
               <p>HUGO Gene Nomenclature Committee</p>
            </title>
            <url>http://www.gene.ucl.ac.uk/nomenclature/</url>
         </bibl>
         <bibl id="B48">
            <title>
               <p>SAGE Genie dataset</p>
            </title>
            <url>http://cgap.nci.nih.gov/SAGE</url>
         </bibl>
         <bibl id="B49">
            <title>
               <p>Gene Name Service</p>
            </title>
            <url>http://bioagent.iis.sinica.edu.tw/GeneAlias/</url>
         </bibl>
         <bibl id="B50">
            <title>
               <p>Kyoto Encyclopedia of Genes and Genomes</p>
            </title>
            <url>http://www.genome.jp/kegg</url>
         </bibl>
         <bibl id="B51">
            <title>
               <p>BioCarta</p>
            </title>
            <url>http://www.biocarta.com/</url>
         </bibl>
         <bibl id="B52">
            <title>
               <p>EBI</p>
            </title>
            <url>http://www.ebi.ac.uk/ego/</url>
         </bibl>
      </refgrp>
   </bm>
</art>
