Supplementary material: CCO_Queries.txt
=======================================
cco1:
# NAME : get_specific_proteins
# PARAMETER: CCO_F0000031: nucleotide binding
# PARAMETER: CCO_C0000252: nucleus
# PARAMETER: CCO_P0000117: meiosis
# FUNCTION : returns all the proteins with the same function,
# process and location
BASE
PREFIX rdfs:
PREFIX ssb:
SELECT distinct ?protein ?protein_id
WHERE {
GRAPH {
?protein_id ssb:has_function ?function_id.
?function_id ssb:is_a ssb:CCO_F0000031.
?protein_id ssb:located_in ?location_id.
?location_id ssb:is_a ssb:CCO_C0000252.
?protein_id ssb:participates_in ?process_id.
?process_id ssb:is_a ssb:CCO_P0000117.
?protein_id rdfs:label ?protein.
}
}
-------------------------------------------------------------------
cco2:
# NAME : get_protein_information
# PARAMETER: CCO_B0000007: the id of the protein (NOT2_YEAST)
# which you can find the protein
# FUNCTION : returns the function, location, process
# in which a given protein is involved
BASE
PREFIX rdfs:
PREFIX ssb:
PREFIX term_id:
PREFIX g:
SELECT ?function ?participates_in ?located_in
WHERE {
{
GRAPH g: {
term_id: ssb:has_function ?f.
?f rdfs:label ?function.
}
}
UNION
{
GRAPH g: {
term_id: ssb:participates_in ?p.
?p rdfs:label ?participates_in.
}
}
UNION
{
GRAPH g: {
term_id: ssb:located_in ?l.
?l rdfs:label ?located_in.
}
}
}
-------------------------------------------------------------------
cco3:
# NAME : get_breast_cancer_related_proteins
# PARAMETER: Breast cancer: the text to search
# FUNCTION : returns all the proteins that have 'Breast cancer' in
# their definition and their interactions (if known)
BASE
PREFIX rdfs:
PREFIX ssb:
SELECT distinct ?protein_name ?definition ?interaction_name ?IntAct_id
WHERE {
GRAPH {
?protein_id rdf:type ssb:protein.
?protein_id ssb:Definition ?Def.
?Def ssb:def ?definition.
FILTER regex(?definition, 'Breast cancer', 'i').
?protein_id rdfs:label ?protein_name.
OPTIONAL {
?protein_id ssb:participates_in ?interaction.
?interaction rdf:type ssb:interaction.
?interaction rdfs:label ?interaction_name.
?interaction ssb:xref ?xref.
?xref ssb:acc ?IntAct_id
}
}
}
-------------------------------------------------------------------
cco4:
# NAME : get_transformed_proteins
# PARAMETER: CCO_B0001575: the id of the given protein
# PARAMETER: CCO: the name of the CCO graph
# FUNCTION : returns all the proteins that are a
# transformation (e.g. phosphorylation) of the given protein
BASE
PREFIX rdfs:
PREFIX ssb:
PREFIX protein_id:
PREFIX CCO:
SELECT ?description ?transformed_protein_name ?cco_id
WHERE {
GRAPH CCO: {
protein_id: ssb:transforms_into ?cco_id.
?cco_id ssb:Definition ?Def.
?Def ssb:def ?description.
?cco_id rdfs:label ?transformed_protein_name.
}
}
-------------------------------------------------------------------
cco5:
# NAME : get_core_cell_cycle_proteins_in_S_pombe_in_known_process
# FUNCTION : all the core cell cycle proteins (IDs) participating
# in any known process (in S. pombe)
BASE
PREFIX rdfs:
PREFIX ssb:
SELECT ?prot_label ?biological_process_label
WHERE {
GRAPH
{
?prot ssb:is_a ssb:CCO_B0000000 .
?prot rdfs:label ?prot_label .
?prot ssb:participates_in ?biological_process .
?biological_process rdfs:label ?biological_process_label
}
}
-------------------------------------------------------------------
cco6:
# NAME : get_interactions_in_At
# PARAMETER: CCO_Y0000028: direct interaction (IntAct)
# FUNCTION : returns all the direct interactions in A thaliana and their
# : of participating proteins
BASE
PREFIX rdfs:
PREFIX ssb:
SELECT ?IntAct_ID ?interaction ?protein
WHERE {
GRAPH {
?interaction_id rdf:type ssb:interaction.
?interaction_id ssb:is_a ssb:CCO_Y0000028.
?interaction_id rdfs:label ?interaction.
?interaction_id ssb:xref ?xref.
?xref ssb:acc ?IntAct_ID.
?interaction_id ssb:has_participant ?participant.
?participant rdfs:label ?protein.
}
}
-------------------------------------------------------------------
cco7:
# NAME : get_terms_by_name
# FUNCTION : returns all the terms (protein, processes, etc)
# that have 'cell cycle' as string in their names
BASE
PREFIX rdfs:
SELECT ?term_id ?term_name
WHERE {
GRAPH
{
?term_id rdfs:label ?term_name.
filter regex(str(?term_name), 'cell cycle')
}
}
-------------------------------------------------------------------
cco8:
# NAME : get_Sp_protein_located_in_cell_wall
# PARAMETER: CCO_B0000000: restricts the subjects to core cell cycle proteins
# PARAMETER: CCO_C0000239: restricts the object to cell wall
# FUNCTION : returns all the core cell cycle proteins in S pombe
# that are located in the cell wall
BASE
PREFIX rdfs:
PREFIX ssb:
SELECT distinct ?protein
WHERE {
GRAPH {
?protein_id ssb:is_a ssb:CCO_B0000000.
?protein_id ssb:located_in ?location_id.
?location_id ssb:is_a ssb:CCO_C0000239.
?protein_id rdfs:label ?protein.
}
}
-------------------------------------------------------------------
cco9:
# NAME : get_TAIR_references
# FUNCTION : returns all the protein names, and their
# corresponding TAIR reference (AT code), of the
# core cell cycle proteins in Arabidopsis Thaliana
BASE
PREFIX rdfs:
PREFIX ssb:
SELECT ?uniprot_name ?AT_code
WHERE {
GRAPH {
?prot ssb:is_a ssb:CCO_B0000000 .
?prot rdfs:label ?uniprot_name .
?prot ssb:encoded_by ?gene .
?gene ssb:xref ?b .
?b ssb:dbname ?database .
?b ssb:acc ?AT_code .
FILTER(?database = 'TAIR')
}
}
-------------------------------------------------------------------
cco10:
# NAME : get_specific_core_cell_cycle_proteins_in_at
# PARAMETER: CCO_C0000324: cytoplasm (location)
# PARAMETER: hydrolysis : protein function
# FUNCTION : returns all the core cell cycle proteins in A thaliana that are located in the
# cytoplasm and that have a hydrolysis-related function
BASE
PREFIX rdfs:
PREFIX ssb:
SELECT distinct ?protein_name ?function
WHERE {
GRAPH {
?protein_id rdf:type ssb:protein.
?protein_id ssb:is_a ssb:CCO_B0000000.
?protein_id ssb:has_function ?subfunction_id.
?subfunction_id ssb:is_a ?function_id.
?protein_id ssb:located_in ?location_id.
?location_id ssb:is_a ssb:CCO_C0000324.
?function_id ssb:Definition ?def.
?def ssb:def ?function.
?protein_id rdfs:label ?protein_name.
FILTER regex(?function, 'hydrolysis','i').
}
}
-------------------------------------------------------------------
cco11:
# NAME : get_a_term_CCO_id
# PARAMETER : 'CCO_P0000003' : a sample CCO id
# FUNCTION : returns the label of a given CCO id
BASE
PREFIX rdfs:
PREFIX ssb:
SELECT ?term_label
WHERE {
GRAPH {
ssb:CCO_P0000003 rdfs:label ?term_label.
}
}
-------------------------------------------------------------------
cco12:
# NAME : get_neighborhood
# PARAMETER: CCO_B0002337: sample protein (WEE1_ARATH)
# FUNCTION : returns the neighbor terms of a given term
BASE
PREFIX rdfs:
PREFIX term_id:
SELECT ?term_as_child ?outward_arrow ?head_name ?tail_name ?inward_arrow ?term_as_parent
WHERE {
GRAPH {
{
term_id: ?outwardarrow ?head_id.
term_id: rdfs:label ?term_as_child.
?outwardarrow rdfs:label ?outward_arrow.
?head_id rdfs:label ?head_name.
}
UNION{
?tail_id ?inwardarrow term_id:.
?tail_id rdfs:label ?tail_name.
?inwardarrow rdfs:label ?inward_arrow.
term_id: rdfs:label ?term_as_parent.
}
}
}
-------------------------------------------------------------------
cco13:
# NAME : get_term_information
# PARAMETER: CCO_B0001733: sample term (cdc20)
# FUNCTION : returns the properties of a given term
BASE
PREFIX rdfs:
PREFIX ssb:
PREFIX term_id:
SELECT distinct ?name ?definition ?db ?nr ?organism ?comment ?synonym ?scope ?syn_db ?syn_nr ?xref_db ?xref_nr ?subnamespace ?alt_id
WHERE {
GRAPH {
{term_id: rdfs:label ?name}
UNION{
term_id: ssb:Definition ?a.
{?a ssb:def ?definition.}
UNION{
OPTIONAL{
?a ssb:DbXref ?b.
?b ssb:dbname ?db.
?b ssb:acc ?nr.
}
}
}
UNION
{
term_id: ?has_source ?organism_id.
?organism_id a ssb:taxon.
?organism_id rdfs:label ?organism.
}
UNION
{term_id: rdfs:comment ?comment}
UNION{
term_id: ssb:synonym ?a.
?a ssb:syn ?synonym.
OPTIONAL{?a ssb:scope ?scope.}
OPTIONAL{
?a ssb:DbXref ?b.
?b ssb:dbname ?syn_db.
?b ssb:acc ?syn_nr.
}
}
UNION{
term_id: ssb:xref ?a.
?a ssb:dbname ?xref_db.
?a ssb:acc ?xref_nr.
}
UNION{
term_id: a ?subnamespace
}
UNION{
term_id: ssb:hasAlternativeId ?alt_id.
}
}
}
-------------------------------------------------------------------
cco14:
# NAME : get_relation_types
# FUNCTION : returns all the relation types used in CCO
BASE
PREFIX rdfs:
PREFIX ssb:
SELECT ?relation_type ?relation_type_id
WHERE {
GRAPH {
?relation_type_id rdf:type ssb:rel_type.
?relation_type_id rdfs:label ?relation_type.
}
}
ORDER BY ?relation_type
-------------------------------------------------------------------
cco15:
# NAME : get_children
# PARAMETER: CCO_P0000160: sample term (cell cycle phase) to look for its children
# FUNCTION : returns the children of a given term
BASE
PREFIX rdfs:
PREFIX ssb:
SELECT ?name ?CCO_id
WHERE {
GRAPH {
?CCO_id ssb:is_a ssb:CCO_P0000160.
?CCO_id rdfs:label ?name.
}
}
-------------------------------------------------------------------
cco16:
# NAME : get_parents
# PARAMETER: CCO_F0001848: sample term (Gram-negative bacterial binding) to
# look for its parents
# FUNCTION : returns the parent terms of a given term
BASE
PREFIX rdfs:
PREFIX ssb:
SELECT ?name_of_parent ?CCO_id
WHERE {
GRAPH {
ssb:CCO_F0001848 ssb:is_a ?CCO_id.
?CCO_id rdfs:label ?name_of_parent.
}
}
-------------------------------------------------------------------
cco17:
# NAME : count_terms
# FUNCTION : returns the number of core cell cycle proteins in A thaliana
BASE
PREFIX ssb:
SELECT distinct count(?term_id)
WHERE {
GRAPH {
?term_id ssb:is_a ssb:CCO_B0000000.
}
}
__________________________________________________________________
cco18:
# NAME : search_terms_on_properties
# PARAMETER: cell: the first search-string
# PARAMETER: cycle: the second search-string
# FUNCTION : returns all the labeled terms for which a
# property -the name, definition, synonym or
# comment- contains 'cell' and 'cycle'
BASE
PREFIX rdfs:
PREFIX ssb:
SELECT ?name ?found_in ?type_of_found_text
WHERE {
GRAPH {
FILTER regex(str(?found_in), 'cell', 'i')
FILTER regex(str(?found_in), 'cycle', 'i')
?term_id rdfs:label ?name.
{
?term_id ?type_of_found_text ?found_in.
?term_id rdfs:label ?found_in.
}
UNION
{
?term_id ?type_of_found_text ?a.
?term_id ssb:Definition ?a.
?a ssb:def ?found_in.
}
UNION
{
?term_id ?type_of_found_text ?a.
?term_id ssb:synonym ?a.
?a ssb:syn ?found_in.
}
UNION
{
?term_id ?type_of_found_text ?found_in.
?term_id rdfs:comment ?found_in.
}
}
}
ORDER BY ?term_id
-------------------------------------------------------------------
cco19:
# NAME : count_cell_cycle_proteins_at
# FUNCTION : returns the number of cell cycle proteins in A thaliana
BASE
PREFIX ssb:
SELECT distinct count(?term_id)
WHERE {
{
GRAPH {
?term_id ssb:is_a ssb:CCO_B0000000.
}
}
UNION
{
GRAPH {
?term_id ssb:is_a ssb:CCO_U0000011.
}
}
UNION
{
GRAPH {
?term_id ssb:is_a ssb:CCO_U0000007.
}
}
?term_id ssb:has_source ssb:CCO_T0000034 .
}
-------------------------------------------------------------------
cco20:
# NAME : count_cell_cycle_genes_sp
# FUNCTION : returns the number of cell cycle proteins in S pombe
BASE
PREFIX ssb:
SELECT distinct count(?term_id)
WHERE {
GRAPH {
?term_id ssb:is_a ssb:CCO_U0000008.
}
}
-------------------------------------------------------------------
cco21:
# NAME : count_cell_cycle_genes_sp_cco
# FUNCTION : returns the number of cell cycle proteins in S pombe
# from the composite ontology CCO
BASE
PREFIX ssb:
SELECT distinct count(?term_id)
WHERE {
GRAPH {
?term_id ssb:is_a ssb:CCO_U0000008.
}
?term_id ssb:has_source ssb:CCO_T0000017.
}
-------------------------------------------------------------------
cco22:
# NAME : protein_protein_interactions_at
# FUNCTION : returns all the protein protein interactions in A thaliana
BASE
PREFIX rdfs:
PREFIX ssb:
SELECT ?protein ?participates_in_interaction
WHERE {
GRAPH {
?term_id ssb:has_source ssb:CCO_T0000034 .
?term_id ssb:participates_in ?interaction.
?interaction rdf:type ssb:interaction.
?term_id rdfs:label ?protein.
?interaction rdfs:label ?participates_in_interaction.
}
}
-------------------------------------------------------------------
cco23:
# NAME : get CCO id by term label
# FUNCTION : returns the CCO id of a given specific term
BASE
PREFIX rdfs:
SELECT ?unique_id
WHERE {
GRAPH {
?unique_id rdfs:label 'WEE1_ARATH'@en
}
}
-------------------------------------------------------------------
cco24:
# NAME : get CCO id by term label
# FUNCTION : returns the CCO id of a given term using regular expressions.
BASE
PREFIX rdfs:
SELECT ?unique_id ?label
WHERE {
GRAPH {
?unique_id rdfs:label ?label.
FILTER regex(str(?label), 'CDK2','i').
}
}
LIMIT 10