Source code for pyhgnc.manager.models

"""This file contains the relational database models used by PyHGNC."""

import datetime

from sqlalchemy import Column, ForeignKey, Integer, String, Text, Boolean, Date, Table, Unicode
from sqlalchemy.ext.declarative import declarative_base, declared_attr
from sqlalchemy.orm import relationship

from .defaults import TABLE_PREFIX

Base = declarative_base()


def foreign_key_to(table_name):
    """Creates a standard foreign key to a table in the database

    :param str table_name: name of the table without TABLE_PREFIX
    :return: foreign key column
    :rtype: sqlalchemy.Column
    """
    foreign_column = TABLE_PREFIX + table_name + '.id'
    return Column(Integer, ForeignKey(foreign_column))


def get_many2many_table(table1, table2):
    """Creates a many-to-many table that links the given tables table1 and table2.

    :param str table1: Tablename of left hand table without TABLE_PREFIX.
    :param str table2: Tablename of right hand table without TABLE_PREFIX.
    :return:
    """
    table_name = ('{}{}__{}'.format(TABLE_PREFIX, table1, table2))
    return Table(table_name, Base.metadata,
                 Column('{}_id'.format(table1), Integer, ForeignKey('{}{}.id'.format(TABLE_PREFIX, table1))),
                 Column('{}_id'.format(table2), Integer, ForeignKey('{}{}.id'.format(TABLE_PREFIX, table2)))
                 )


class MasterModel(object):
    """This class is the parent class of all models in PyHGNC. Automatic creation of table name by class name with
    project prefix"""

    @declared_attr
    def __tablename__(self):
        return TABLE_PREFIX + self.__name__.lower()

    __mapper_args__ = {'always_refresh': True}

    id = Column(Integer, primary_key=True)

    def _to_dict(self):
        data_dict = self.__dict__.copy()
        del data_dict['_sa_instance_state']
        del data_dict['id']
        for k, v in data_dict.items():
            if isinstance(v, datetime.date):
                data_dict[k] = data_dict[k].strftime('%Y-%m-%d')
        return data_dict

    def to_dict(self):
        return self._to_dict()

    def to_dict_with_hgnc(self):
        ret_dict = self._to_dict()
        del ret_dict['hgnc_id']
        ret_dict['hgnc_identifier'] = self.hgnc.identifier
        ret_dict['hgnc_symbol'] = self.hgnc.symbol
        return ret_dict

    def to_dict_with_hgncs(self):
        ret_dict = self._to_dict()
        ret_dict['hgnc_symbols'] = [x.symbol for x in self.hgncs]
        return ret_dict

hgnc_enzyme = get_many2many_table('hgnc', 'enzyme')

hgnc_gene_family = get_many2many_table('hgnc', 'genefamily')

hgnc_refseq = get_many2many_table('hgnc', 'refseq')

hgnc_mgd = get_many2many_table('hgnc', 'mgd')

hgnc_pubmed = get_many2many_table('hgnc', 'pubmed')

hgnc_ena = get_many2many_table('hgnc', 'ena')

hgnc_uniprot = get_many2many_table('hgnc', 'uniprot')

hgnc_rgd = get_many2many_table('hgnc', 'rgd')


[docs]class HGNC(Base, MasterModel): """Root class (table, model) for all other classes (tables, models) in PyHGNC. Basic information with 1:1 relationship to identifier are stored here .. warning:: - homeodb (Homeobox Database ID) - horde_id (Symbol used within HORDE for the gene) described in `README <ftp://ftp.ebi.ac.uk/pub/databases/genenames/README.txt>`_, but not found in `HGNC JSON file <ftp://ftp.ebi.ac.uk/pub/databases/genenames/new/json/hgnc_complete_set.json>`_ .. hint:: To link to IUPHAR/BPS Guide to PHARMACOLOGY database only use the number (only use 1 from the result objectId:1) :cvar str name: HGNC approved name for the gene. Equates to the "APPROVED NAME" field within the gene symbol report :cvar str symbol: The HGNC approved gene symbol. Equates to the "APPROVED SYMBOL" field within the gene symbol report :cvar int orphanet: Orphanet ID :cvar str identifier: Unique ID created by the HGNC for every approved symbol (HGNC ID) :cvar str status: Status of the symbol report, which can be either "Approved" or "Entry Withdrawn" :cvar str uuid: universally unique identifier :cvar str locus_group: Group name for a set of related locus types as defined by the HGNC (e.g. non-coding RNA) :cvar str locus_type: Locus type as defined by the HGNC (e.g. RNA, transfer) :cvar date date_name_changed: date the gene name was last changed :cvar date date_modified: date the entry was last modified :cvar date date_symbol_changed: date the gene symbol was last changed :cvar date date_approved_reserved: date the entry was first approved :cvar str ensembl_gene: Ensembl gene ID. Found within the "GENE RESOURCES" section of the gene symbol report :cvar str horde: symbol used within HORDE for the gene (not available in JSON) :cvar str vega: Vega gene ID. Found within the "GENE RESOURCES" section of the gene symbol report :cvar str lncrnadb: Long Noncoding RNA Database identifier :cvar str entrez: Entrez gene ID. Found within the "GENE RESOURCES" section of the gene symbol report :cvar str mirbase: miRBase ID :cvar str iuphar: The objectId used to link to the IUPHAR/BPS Guide to PHARMACOLOGY database :cvar str ucsc: UCSC gene ID. Found within the "GENE RESOURCES" section of the gene symbol report :cvar str snornabase: snoRNABase ID :cvar str imgt: Symbol used within international ImMunoGeneTics information system :cvar str pseudogeneorg: Pseudogene.org ID :cvar str bioparadigmsslc: Symbol used to link to the SLC tables database at bioparadigms.org for the gene :cvar str locationsortable: locations sortable :cvar str merops: ID used to link to the MEROPS peptidase database :cvar str location: Cytogenetic location of the gene (e.g. 2q34). :cvar str cosmic: Symbol used within the Catalogue of somatic mutations in cancer for the gene :cvar list rgds: relationship to `RGD <#rgd>`__ :cvar list omims: relationship to OMIM :cvar list ccdss: relationship to CCDS :cvar list lsdbs: relationship to LSDB :cvar list orthology_predictions: relationship to OrthologyPrediction :cvar list enzymes: relationship to Enzyme :cvar list gene_families: relationship to GeneFamily :cvar list refseq_accessions: relationship to RefSeq :cvar list mgds: relationship to MGD :cvar list uniprots: relationship to UniProt :cvar list pubmeds: relationship to PubMed :cvar list enas: relationship to ENA """ name = Column(String(255), nullable=True) symbol = Column(Unicode(255), index=True) identifier = Column(Integer, unique=True) status = Column(String(255)) uuid = Column(String(255)) orphanet = Column(Integer, nullable=True) locus_group = Column(String(255)) locus_type = Column(String(255)) # Date information date_name_changed = Column(Date, nullable=True) date_modified = Column(Date, nullable=True) date_symbol_changed = Column(Date, nullable=True) date_approved_reserved = Column(Date, nullable=True) ensembl_gene = Column(String(255), nullable=True) horde = Column(String(255), nullable=True) vega = Column(String(255), nullable=True) lncrnadb = Column(String(255), nullable=True) entrez = Column(String(255), nullable=True) mirbase = Column(String(255), nullable=True) iuphar = Column(String(255), nullable=True) ucsc = Column(String(255), nullable=True) snornabase = Column(String(255), nullable=True) pseudogeneorg = Column(String(255), nullable=True) bioparadigmsslc = Column(String(255), nullable=True) locationsortable = Column(String(255), nullable=True) merops = Column(String(255), nullable=True) location = Column(String(255), nullable=True) cosmic = Column(String(255), nullable=True) imgt = Column(String(255), nullable=True) alias_symbols = relationship('AliasSymbol') alias_names = relationship('AliasName') omims = relationship('OMIM') ccdss = relationship('CCDS') lsdbs = relationship('LSDB') orthology_predictions = relationship('OrthologyPrediction') enzymes = relationship( "Enzyme", secondary=hgnc_enzyme, back_populates="hgncs" ) gene_families = relationship( 'GeneFamily', secondary=hgnc_gene_family, back_populates="hgncs" ) refseqs = relationship( 'RefSeq', secondary=hgnc_refseq, back_populates="hgncs" ) mgds = relationship( 'MGD', secondary=hgnc_mgd, back_populates="hgncs" ) pubmeds = relationship( 'PubMed', secondary=hgnc_pubmed, back_populates="hgncs" ) enas = relationship( 'ENA', secondary=hgnc_ena, back_populates="hgncs" ) uniprots = relationship( 'UniProt', secondary=hgnc_uniprot, back_populates="hgncs" ) rgds = relationship( 'RGD', secondary=hgnc_rgd, back_populates="hgncs" ) def __repr__(self): return self.symbol
[docs]class AliasSymbol(Base, MasterModel): """Other symbols used to refer to this gene as seen in the "SYNONYMS" field in the symbol report. .. attention:: Symbols previously approved by the HGNC for this gene are tagged with `is_previous_symbol==True`. Equates to the "PREVIOUS SYMBOLS & NAMES" field within the gene symbol report. :cvar str alias_symbol: other symbol :cvar bool is_previous_symbol: previously approved :cvar hgnc: back populates to :class:`.HGNC` """ alias_symbol = Column(Unicode(255)) is_previous_symbol = Column(Boolean, default=False) hgnc_id = foreign_key_to('hgnc') hgnc = relationship('HGNC', back_populates='alias_symbols') def to_dict(self): return self.to_dict_with_hgnc() def __repr__(self): return self.alias_symbol
[docs]class AliasName(Base, MasterModel): """Other names used to refer to this gene as seen in the "SYNONYMS" field in the gene symbol report. .. attention:: Gene names previously approved by the HGNC for this gene are tagged with `is_previous_name==True`.. Equates to the "PREVIOUS SYMBOLS & NAMES" field within the gene symbol report. :cvar str alias_name: other name :cvar bool is_previous_name: previously approved :cvar hgnc: back populates to :class:`.HGNC` """ alias_name = Column(String(255)) is_previous_name = Column(Boolean, default=False) hgnc_id = foreign_key_to('hgnc') hgnc = relationship('HGNC', back_populates='alias_names') def to_dict(self): return self.to_dict_with_hgnc() def __repr__(self): return '{}; is_previous:{}'.format(self.alias_name, self.is_previous_name)
[docs]class GeneFamily(Base, MasterModel): """Name and identifier given to a gene family or group the gene has been assigned to. Equates to the "GENE FAMILY" field within the gene symbol report. :cvar int familyid: family identifier :cvar str familyname: family name :cvar list hgncs: back populates to :class:`.HGNC` """ family_identifier = Column(Integer, unique=True) family_name = Column(String(255)) hgncs = relationship( "HGNC", secondary=hgnc_gene_family, back_populates="gene_families" ) def to_dict(self): return self.to_dict_with_hgncs() def __repr__(self): return self.family_name
[docs]class RefSeq(Base, MasterModel): """RefSeq nucleotide accession(s). Found within the"NUCLEOTIDE SEQUENCES" section of the gene symbol report. See also `RefSeq database <https://www.ncbi.nlm.nih.gov/refseq/>`_ for more information. :cvar str accession: RefSeq accession number :cvar list hgncs: back populates to :class:`.HGNC` """ accession = Column(String(255)) hgncs = relationship( "HGNC", secondary=hgnc_refseq, back_populates="refseqs" ) def to_dict(self): return self.to_dict_with_hgncs() def __repr__(self): return self.accession
[docs]class RGD(Base, MasterModel): """Rat genome database gene ID. Found within the "HOMOLOGS" section of the gene symbol report :cvar str rgdid: Rat genome database gene ID :cvar hgncs: back populates to :class:`.HGNC` """ rgdid = Column(Integer) hgncs = relationship( "HGNC", secondary=hgnc_rgd, back_populates="rgds" ) def to_dict(self): return self.to_dict_with_hgncs() def __repr__(self): return str(self.rgdid)
[docs]class OMIM(Base, MasterModel): """Online Mendelian Inheritance in Man (OMIM) ID :cvar str omimid: OMIM ID :cvar hgnc: back populates to `pyhgnc.manager.models.HGNC` """ omimid = Column(Integer) hgnc_id = foreign_key_to('hgnc') hgnc = relationship('HGNC', back_populates='omims') def to_dict(self): return self.to_dict_with_hgnc() def __repr__(self): return str(self.omimid)
[docs]class MGD(Base, MasterModel): """Mouse genome informatics database ID. Found within the "HOMOLOGS" section of the gene symbol report :cvar str mgdid: Mouse genome informatics database ID :cvar list hgncs: back populates to :class:`.HGNC` """ mgdid = Column(Integer) hgncs = relationship( "HGNC", secondary=hgnc_mgd, back_populates="mgds" ) def to_dict(self): return self.to_dict_with_hgncs() def __repr__(self): return str(self.mgdid)
[docs]class UniProt(Base, MasterModel): """Universal Protein Resource (UniProt) protein accession. Found within the "PROTEIN RESOURCES" section of the gene symbol report. See also `UniProt webpage <http://www.uniprot.org>`_ for more information. :cvar str uniprotid: UniProt identifier :cvar list hgncs: back populates to :class:`.HGNC` """ uniprotid = Column(String(255)) hgncs = relationship( "HGNC", secondary=hgnc_uniprot, back_populates="uniprots" ) def to_dict(self): return self.to_dict_with_hgncs() def __repr__(self): return self.uniprotid
[docs]class CCDS(Base, MasterModel): """Consensus CDS ID. Found within the "NUCLEOTIDE SEQUENCES" section of the gene symbol report. See also `CCDS <https://www.ncbi.nlm.nih.gov/projects/CCDS>`_ for more information. :cvar str ccdsid: CCDS identifier :cvar hgnc: back populates to :class:`.HGNC` """ ccdsid = Column(String(255)) hgnc_id = foreign_key_to('hgnc') hgnc = relationship('HGNC', back_populates='ccdss') def to_dict(self): return self.to_dict_with_hgnc() def __repr__(self): return self.ccdsid
[docs]class PubMed(Base, MasterModel): """PubMed and Europe PubMed Central PMID :cvar str pubmedid: Pubmed identifier :cvar list hgncs: back populates to :class:`.HGNC` """ pubmedid = Column(Integer) hgncs = relationship( "HGNC", secondary=hgnc_pubmed, back_populates="pubmeds" ) def to_dict(self): return self.to_dict_with_hgncs() def __repr__(self): return str(self.pubmedid)
[docs]class ENA(Base, MasterModel): """International Nucleotide Sequence Database Collaboration (GenBank, ENA and DDBJ) accession number(s). Found within the "NUCLEOTIDE SEQUENCES" section of the gene symbol report. :cvar str enaid: European Nucleotide Archive (ENA) identifier :cvar list hgncs: back populates to :class:`.HGNC` """ enaid = Column(String(255)) hgncs = relationship( "HGNC", secondary=hgnc_ena, back_populates="enas" ) def to_dict(self): return self.to_dict_with_hgncs() def __repr__(self): return self.enaid
[docs]class Enzyme(Base, MasterModel): """Enzyme Commission number (EC number) :cvar str ec_number: EC number :cvar list hgncs: back populates to :class:`.HGNC` """ ec_number = Column(String(255)) hgncs = relationship( "HGNC", secondary=hgnc_enzyme, back_populates="enzymes" ) def to_dict(self): return self.to_dict_with_hgncs() def __repr__(self): return self.ec_number
[docs]class LSDB(Base, MasterModel): """The name of the Locus Specific Mutation Database and URL :cvar str lsdb: name of the Locus Specific Mutation Database :cvar str url: URL to database :cvar hgnc: back populates to :class:`.HGNC` """ lsdb = Column(String(255)) url = Column(Text) hgnc_id = foreign_key_to('hgnc') hgnc = relationship('HGNC', back_populates='lsdbs') def to_dict(self): return self.to_dict_with_hgnc() def __repr__(self): return self.lsdb
[docs]class OrthologyPrediction(Base, MasterModel): """Orthology Predictions .. warning:: OrthologyPrediction is still not correctly normalized and documented. :cvar int ortholog_species: NCBI taxonomy identifier :cvar int human_entrez_gene: Human Entrey gene identifier :cvar str human_ensembl_gene: Human Ensembl gene identifier :cvar str human_name: Human gene name :cvar str human_symbol: Human gene symbol :cvar str human_chr: Human gene chromosome location :cvar str human_assert_ids: :cvar str ortholog_species_entrez_gene: Ortholog species Entrez gene identifier :cvar str ortholog_species_ensembl_gene: Ortholog species Ensembl gene identifier :cvar str ortholog_species_db_id: Ortholog species database identifier :cvar str ortholog_species_name: Ortholog species gene name :cvar str ortholog_species_symbol: Ortholog species gene symbol :cvar str ortholog_species_chr: Ortholog species gene chromosome location :cvar str ortholog_species_assert_ids: :cvar str support: :cvar hgnc: back populates to :class:`.HGNC` """ ortholog_species = Column(Integer) human_entrez_gene = Column(Integer) human_ensembl_gene = Column(String(255)) human_name = Column(String(255)) human_symbol = Column(Unicode(255)) human_chr = Column(String(255)) human_assert_ids = Column(String(255)) ortholog_species_entrez_gene = Column(Integer) ortholog_species_ensembl_gene = Column(String(255)) ortholog_species_db_id = Column(String(255)) ortholog_species_name = Column(Text) ortholog_species_symbol = Column(Unicode(255), index=True) ortholog_species_chr = Column(String(255)) ortholog_species_assert_ids = Column(String(255)) support = Column(String(255)) hgnc_id = foreign_key_to('hgnc') hgnc = relationship('HGNC', back_populates='orthology_predictions') def to_dict(self): return self.to_dict_with_hgnc() def __repr__(self): return '{}: {}: {}'.format(self.ortholog_species, self.ortholog_species_name, self.ortholog_species_symbol)
class AppUser(Base, MasterModel): name = Column(String(255)) email = Column(String(255), unique=True) username = Column(String(255), unique=True) password = Column(String(255)) def __repr__(self): return self.username