# Copyright (C) 2012 Jerry Fowler (rgfowler@mdanderson.org) # Distributed under GPL. see # # Please refer to http://varianttools.sourceforge.net/Annotation/New # for a description of the format of this file. # [linked fields] hg19=chr, pos, ref, alt hg18=chr, hg18_pos, ref, alt [data sources] description=dbNSFP version 2.4, maintained by Xiaoming Liu from UTSPH. Please cite "Liu X, Jian X, and Boerwinkle E. 2011. dbNSFP: a lightweight database of human non-synonymous SNPs and their functional predictions. Human Mutation. 32:894-899" and "Liu X, Jian X, and Boerwinkle E. 2013. dbNSFP v2.0: A Database of Human Nonsynonymous SNVs and Their Functional Predictions and Annotations. Human Mutation. 34:E2393-E2402." if you find this database useful. version=hg18_hg19_2_4 anno_type=variant source_url=http://dbnsfp.houstonbioinformatics.org/dbNSFPzip/dbNSFPv2.4.zip direct_url=annoDB/dbNSFP-hg18_hg19_2_4.DB.gz f0c9733887dc1984162913cb5181ac78 source_type=txt source_pattern=dbNSFP2.4_variant.chr [chr] index=1 type=VARCHAR(20) NOT NULL comment=Chromosome number [pos] index=2 type=INTEGER NOT NULL comment=physical position on the chromosome as to hg19 (1-based coordinate) [ref] index=3 type=CHAR(1) NOT NULL comment=Reference nucleotide allele (as on the + strand) [alt] index=4 type=CHAR(1) NOT NULL comment=Alternative nucleotide allele (as on the + strand) [aaref] index=5 type=CHAR(1) NULL comment=reference amino acid [aaalt] index=6 type=CHAR(1) NULL comment=alternative amino acid [hg18_pos] index=7 type=INTEGER NULL adj=Nullify('.') comment=physical position on the chromosome as to hg19 (1-based coordinate) [genename] index=8 type=VARCHAR(48) NULL adj=Nullify('.') comment=common gene name, if the NScan be assigned to multiple genes, gene names are separated by ";" [Uniprot_acc] index=9 type=VARCHAR(128) NULL adj=Nullify('.') comment=Uniprot accession number. Multiple entries separated by ";". [Uniprot_id] index=10 type=VARCHAR(64) NULL adj=Nullify('.') comment=Uniprot ID number. Multiple entries separated by ";". [Uniprot_aapos] index=11 type=VARCHAR(128) NULL adj=Nullify('.') comment=amino acid position as to Uniprot. Multiple entries separated by ";". [Interpro_domain] index=12 type=CHAR(48) NULL adj=Nullify('.') comment=Interpro_domain: domain or conserved site on which the variant locates. Domain annotations come from Interpro database. The number in the brackets following a specific domain is the count of times Interpro assigns the variant position to that domain, typically coming from different predicting databases. Multiple entries separated by ";". [cds_strand] index=13 type=CHAR(1) adj=Nullify('.') comment=coding sequence (CDS) strand (+ or -) [refcodon] index=14 type=char(1) adj=Nullify('.') comment=reference codon [SLR_test_statistic] index=15 type=FLOAT NULL adj=Nullify('.') comment=SLR test statistic for testing natural selection on codons. A negative value indicates negative selection, and a positive value indicates positive selection. Larger magnitude of the value suggests stronger evidence. [codonpos] index=16 type=VARCHAR(128) NULL adj=Nullify('.') comment=position on the codon (1, 2 or 3), multiple entries are separated by ; [fold_degenerate] index=17 type=VARCHAR(20) adj=Nullify('.') comment=A list of degenerate type (0, 2 or 3), separated by ; [Ancestral_allele] index=18 type=CHAR(1) adj=Nullify('.') comment=Ancestral allele (based on 1000 genomes reference data). The following comes from its original README file: ACTG - high-confidence call, ancestral state supproted by the other two sequences actg - low-confindence call, ancestral state supported by one sequence only N - failure, the ancestral state is not supported by any other sequence - - the extant species contains an insertion at this postion . - no coverage in the alignment [Ensembl_geneid] index=19 type=VARCHAR(48) NULL adj=Nullify('.') comment=Ensembl gene id [Ensembl_transcriptid] index=20 type=VARCHAR(48) NULL adj=Nullify('.') comment=Ensembl transcript ids (separated by ";") [aapos] index=21 type=VARCHAR(48) NULL adj=Nullify('.') comment=amino acid position as to the protein "-1" if the variant is a splicing site SNP (2bp on each end of an intron), Multiple values separated by ; [aapos_SIFT] index=22 type=VARCHAR(255) NULL adj=Nullify('.') comment=ENSP id and amino acid positions corresponding to SIFT scores. Multiple entries separated by ";" [aapos_FATHMM] index=23 type=VARCHAR(255) NULL adj=Nullify('.') comment=aapos_FATHMM: ENSP id and amino acid positions corresponding to FATHMM scores. Multiple entries separated by ";" [SIFT_score] index=24 type=FLOAT NULL adj=lambda x: None if x.strip() == '.' else max([float(z) for z in x.split(';')]) comment=The same as SIFT_score_all but is of type FLOAT and is the minimal score when multiple scores are available. [SIFT_score_all] index=24 type=VARCHAR(24) NULL adj=Nullify('.') comment=SIFT scores. If a score is smaller than 0.05 the corresponding NS is predicted as "D(amaging)"; otherwise it is predicted as "T(olerated)". Multiple scores are separated by ; [SIFT_score_rankscore] index=25 type=FLOAT NULL adj=Nullify('.') comment=Original scores were first converted to SIFTnew=1-SIFTori, then ranked among all SIFTnew scores in dbNSFP. The rankscore is the ratio of the rank the SIFTnew score over the total number of SIFTnew scores in dbNSFP. If there are multiple scores, only the most damaging (largest) rankscore is presented. The rankscores range from 0.02654 to 0.87932. [SIFT_pred] index=26 type=VARCHAR(1) NULL adj=lambda x: None if x.strip() == '.' else ('D' if 'D' in x else ('T' if 'T' in x else '.')) comment="D" if "D" appears in SIFT_pred_all, "T" otherwise. [SIFT_pred_all] index=26 type=VARCHAR(255) NULL adj=Nullify('.') comment=If SIFTori is smaller than 0.05 (SIFTnew>0.95) the corresponding NS is predicted as "D(amaging)"; otherwise it is predicted as "T(olerated)". [Polyphen2_HDIV_score] index=27 type=FLOAT NULL adj=lambda x: None if x.strip() == '.' else max([float(z) for z in x.split(';')]) comment=The maximum (most damaging) value of Polyphen2 score based on HumDiv, i.e. hdiv_prob. Use Polyphen2_HDIV_score_all to get a list of all scores. [Polyphen2_HDIV_score_all] index=27 type=VARCHAR(255) adj=Nullify('.') comment=Polyphen2 score based on HumDiv, i.e. hdiv_prob. The score ranges from 0 to 1, and the corresponding prediction is "probably damaging" if it is in [0.957,1]; "possibly damaging" if it is in [0.453,0.956]; "benign" if it is in [0,0.452]. Score cutoff for binary classification is 0.5, i.e. the prediction is "neutral" if the score is smaller than 0.5 and "deleterious" if the score is larger than 0.5. Multiple entries separated by ";". [Polyphen2_HDIV_rankscore] index=28 type=VARCHAR(255) adj=Nullify('.') comment=Polyphen2 HDIV scores were first ranked among all HDIV scores in dbNSFP. The rankscore is the ratio of the rank the score over the total number of the scores in dbNSFP. If there are multiple scores, only the most damaging (largest) rankscore is presented. The scores range from 0.02656 to 0.89917. [Polyphen2_HDIV_pred] index=29 type=VARCHAR(1) NULL adj=lambda x: None if x.strip() == '.' else ('D' if 'D' in x else ('P' if 'P' in x else ('B' if 'B' in x else '.'))) comment="D" if any of the predictions in Polyphen2_HDIV_pred_all is "D". Otherwise it can be "P" is any prediction is "P", or "B" is any prediction if "B". [Polyphen2_HDIV_pred_all] index=29 type=VARCHAR(255) NULL adj=Nullify('.') comment=Polyphen2 prediction based on HumDiv, "D" ("probably damaging"), "P" ("possibly damaging") and "B" ("benign"). Multiple entries separated by ";". Because the availability of multiple values, use field Polyphen2_HDIV_pred = 'D' to locate possibly damaging variants. [Polyphen2_HVAR_score] index=30 type=FLOAT NULL adj=lambda x: None if x.strip() == '.' else max([float(z) for z in x.split(';')]) comment=The maximum (most damaging) value of all Polyphen2 score based on HumVar, i.e. hvar_prob. Use Polyphen2_HVAR_score_all to get a list of all scores. [Polyphen2_HVAR_score_all] index=30 type=VARCHAR(255) NULL adj=Nullify('.') comment=Polyphen2 scores based on HumVar, i.e. hvar_prob. The score ranges from 0 to 1, and the corresponding prediction is "probably damaging" if it is in [0.909,1]; "possibly damaging" if it is in [0.447,0.908]; "benign" if it is in [0,0.446]. Score cutoff for binary classification is 0.5, i.e. the prediction is "neutral" if the score is smaller than 0.5 and "deleterious" if the score is larger than 0.5. Multiple entries separated by ";". [Polyphen2_HVAR_rankscore] index=31 type=VARCHAR(255) NULL adj=Nullify('.') comment=Polyphen2 HVAR scores were first ranked among all HVAR scores in dbNSFP. The rankscore is the ratio of the rank the score over the total number of the scores in dbNSFP. If there are multiple scores, only the most damaging (largest) rankscore is presented. The scores range from 0.01281 to 0.9711. [Polyphen2_HVAR_pred] index=32 type=VARCHAR(1) NULL adj=lambda x: None if x.strip() == '.' else ('D' if 'D' in x else ('P' if 'P' in x else ('B' if 'B' in x else '.'))) comment="D" if any of the predictions in Polyphen2_HVAR_pred_all is "D". Otherwise it can be "P" is any prediction is "P", or "B" is any prediction if "B". [Polyphen2_HVAR_pred_all] index=32 type=VARCHAR(255) NULL adj=Nullify('.') comment=Polyphen2 prediction based on HumVar, "D" ("porobably damaging", HVAR score in [0.909,1] or rankscore in [0.62955,0.9711]), "P" ("possibly damaging", HVAR in [0.447,0.908] or rankscore in [0.44359,0.62885]) and "B" ("benign", HVAR score in [0,0.446] or rankscore in [0.01281,0.44315]). Score cutoff for binary classification is 0.5 for HVAR score or 0.45998 for rankscore, i.e. the prediction is "neutral" if the HVAR score is smaller than 0.5 (rankscore is smaller than 0.45998), and "deleterious" if the HVAR score is larger than 0.5 (rankscore is larger than 0.45998). Multiple entries are separated by ";". [LRT_score] index=33 type=FLOAT NULL adj=Nullify('.') comment=The original LRT two-sided p-value (LRTori). [LRT_converted_rankscore] index=34 type=FLOAT NULL adj=Nullify('.') comment= LRTori scores were first converted as LRTnew=1-LRTori*0.5 if Omega<1, or LRTnew=LRTori*0.5 if Omega>=1. Then LRTnew scores were ranked among all LRTnew scores in dbNSFP. The rankscore is the ratio of the rank over the total number of the scores in dbNSFP. The scores range from 0.00166 to 0.85682. [LRT_pred] index=35 type=CHAR(1) NULL adj=Nullify('.') comment=LRT prediction, D(eleterious), N(eutral) or U(nknown) [MutationTaster_score] index=36 type=FLOAT NULL adj=Nullify('.') comment=MutationTaster score [MutationTaster_converted_rankscore] index=37 type=FLOAT NULL adj=Nullify('.') comment=The MTori scores were first converted: if the prediction is "A" or "D" MTnew=MTori; if the prediction is "N" or "P", MTnew=1-MTori. Then MTnew scores were ranked among all MTnew scores in dbNSFP. The rankscore is the ratio of the rank of the score over the total number of MTnew scores in dbNSFP. The scores range from 0.0931 to 0.80722. [MutationTaster_pred] index=38 type=CHAR(1) NULL adj=Nullify('.') comment= MutationTaster prediction, "A" ("disease_causing_automatic"), "D" ("disease_causing"), "N" ("polymorphism") or "P" ("polymorphism_automatic"). The score cutoff between "D" and "N" is 0.5 for MTori and 0.328 for the rankscore. [MutationAssessor_score] index=39 type=FLOAT NULL adj=Nullify('.') comment=MutationAssessor functional impact combined score (MAori). The score ranges from -5.545 to 5.975 in dbNSFP. Please refer to Reva et al. (2011) Nucl. Acids Res. 39(17):e118 for details. [MutationAssessor_rankscore] index=40 type=FLOAT NULL adj=Nullify('.') comment=MAori scores were ranked among all MAori scores in dbNSFP. The rankscore is the ratio of the rank of the score over the total number of MAori scores in dbNSFP. The scores range from 0 to 1. [MutationAssessor_pred] index=41 type=CHAR(16) NULL adj=Nullify('.') comment=MutationAssessor's functional impact of a variant : predicted functional, i.e. high ("H") or medium ("M"), or predicted non-functional, i.e. low ("L") or neutral ("N"). The MAori score cutoffs between "H" and "M", "M" and "L", and "L" and "N", are 3.5, 1.9 and 0.8, respectively. The rankscore cutoffs between "H" and "M", "M" and "L", and "L" and "N", are 0.9416, 0.61387 and 0.26162, respectively. [FATHMM_score] index=42 type=FLOAT NULL adj=lambda x: None if x.strip() == '.' else min([float(z) for z in x.split(';')]) comment=Mimimal (more damaging) scores of the multiple scores reported by FATHMM_score_all [FATHMM_score_all] index=42 type=VARCHAR(255) NULL adj=Nullify('.') comment=FATHMM default score (weighted for human inherited-disease mutations with Disease Ontology) (FATHMMori). Scores range from -18.09 to 11.0. Multiple scores separated by ";" Please refer to Shihab et al. (2013) Human Mutation 34(1):57-65 for details. [FATHMM_rankscore] index=43 type=FLOAT NULL adj=Nullify('.') comment=FATHMMori scores were ranked among all FATHMMori scores in dbNSFP. The rankscore is the ratio of the rank of the score over the total number of FATHMMori scores in dbNSFP. If there are multiple scores, only the most damaging (largest) rankscore is presented. The scores range from 0 to 1. [FATHMM_pred] index=44 type=CHAR(16) NULL adj=lambda x: None if x.strip() == '.' else ('D' if 'D' in x else ('T' if 'T' in x else '.')) comment="D" is any of the prediction of FATHMM_pred_all is "D" [FATHMM_pred_all] index=44 type=CHAR(16) NULL adj=Nullify('.') comment= If a FATHMMori score is <=-1.5 (or rankscore <=0.81415) the corresponding NS is predicted as "D(AMAGING)"; otherwise it is predicted as "T(OLERATED)". Multiple predictions separated by ";" [RadialSVM_score] index=45 type=FLOAT NULL adj=Nullify('.') comment=Our support vector machine (SVM) based ensemble prediction score, which incorporated 10 scores (SIFT, PolyPhen-2 HDIV, PolyPhen-2 HVAR, GERP++, MutationTaster, Mutation Assessor, FATHMM, LRT, SiPhy, PhyloP) and the maximum frequency observed in the 1000 genomes populations. Larger value means the SNV is more likely to be damaging. The threshold seperationg "T(olerated)" and "D(amaging)" is 0. [RadialSVM_rankscore] index=46 type=FLOAT NULL adj=Nullify('.') comment= RadialSVM scores were ranked among all RadialSVM scores in dbNSFP. The rankscore is the ratio of the rank of the screo over the total number of RadialSVM scores in dbNSFP. The scores range from 0 to 1. [RadialSVM_pred] index=47 type=VARCHAR(20) adj=Nullify('.') comment=Prediction of our SVM based ensemble prediction score,"T(olerated)" or "D(amaging)". The score cutoff between "D" and "T" is 0. The rankscore cutoff between "D" and "T" is 0.83357. [LR_score] index=48 type=FLOAT NULL adj=Nullify('.') comment=Our logistic regression (LR) based ensemble prediction score, which incorporated 10 scores (SIFT, PolyPhen-2 HDIV, PolyPhen-2 HVAR, GERP++, MutationTaster, Mutation Assessor, FATHMM, LRT, SiPhy, PhyloP) and the maximum frequency observed in the 1000 genomes populations. Larger value means the SNV is more likely to be damaging. The threshold seperationg "T(olerated)" and "D(amaging)" is 0.5. [LR_rankscore] index=49 type=FLOAT NULL adj=Nullify('.') comment=LR scores were ranked among all LR scores in dbNSFP. The rankscore is the ratio of the rank of the score over the total number of LR scores in dbNSFP. The scores range from 0 to 1. [LR_pred] index=50 type=VARCHAR(20) adj=Nullify('.') comment=Prediction of our LR based ensemble prediction score,"T(olerated)" or "D(amaging)". [Reliability_index] index=51 type=FLOAT NULL adj=Nullify('.') comment=Number of observed component scores (except the maximum frequency in the 1000 genomes populations) for RadialSVM and LR. Ranges from 1 to 10. As RadialSVM and LR scores are calculated based on imputed data, the less missing component scores, the higher the reliability of the scores and predictions. [CADD_raw] index=52 type=FLOAT NULL adj=Nullify('.') comment=: CADD raw score for funtional prediction of a SNP. Please refer to Kircher et al. (2014) Nature Genetics 46(3):310-5 for details. The larger the score the more likely the SNP has damaging effect. [CADD_raw_rankscore] index=53 type=FLOAT NULL adj=Nullify('.') comment=CADD raw scores were ranked among all CADD raw scores in dbNSFP. The rankscore is the ratio of the rank of the score over the total number of CADD raw scores in dbNSFP. [CADD_phred] index=54 type=FLOAT NULL adj=Nullify('.') comment=CADD phred-scaled score [GERP_NR] index=55 type=FLOAT NULL adj=Nullify('.') comment=GERP++ neutral rate [GERP_RS] index=56 type=FLOAT NULL adj=Nullify('.') comment=GERP++ RS score, the larger the score, the more conserved the site. [GERP_RS_rankscore] index=57 type=FLOAT NULL adj=Nullify('.') comment=GERP++ RS scores were ranked among all GERP++ RS scores in dbNSFP. The rankscore is the ratio of the rank of the score over the total number of GERP++ RS scores in dbNSFP. [phyloP46way_primate] index=58 type=FLOAT NULL adj=Nullify('.') comment=phyloP (phylogenetic p-values) conservation score based on the multiple alignments of 10 primate genomes (including human). The larger the score, the more conserved the site. [phyloP46way_primate_rankscore] index=59 type=FLOAT NULL adj=Nullify('.') comment=phyloP46way_primate scores were ranked among all phyloP46way_primate scores in dbNSFP. The rankscore is the ratio of the rank of the score over the total number of phyloP46way_primate scores in dbNSFP. [phyloP46way_placental] index=60 type=FLOAT NULL adj=Nullify('.') comment=phyloP (phylogenetic p-values) conservation score based on the multiple alignments of 33 placental mammal genomes (including human). The larger the score, the more conserved the site. [phyloP46way_placental_rankscore] index=61 type=FLOAT NULL adj=Nullify('.') comment=phyloP46way_placental scores were ranked among all phyloP46way_placental scores in dbNSFP. The rankscore is the ratio of the rank of the score over the total number of phyloP46way_placental scores in dbNSFP. [phyloP100way_vertebrate] index=62 type=FLOAT NULL adj=Nullify('.') comment=phyloP (phylogenetic p-values) conservation score based on the multiple alignments of 100 vertebrate genomes (including human). The larger the score, the more conserved the site. [phyloP100way_vertebrate_rankscore] index=63 type=FLOAT NULL adj=Nullify('.') comment=phyloP100way_vertebrate scores were ranked among all phyloP100way_vertebrate scores in dbNSFP. The rankscore is the ratio of the rank of the score over the total number of phyloP100way_vertebrate scores in dbNSFP. [phastCons46way_primate] index=64 type=FLOAT NULL adj=Nullify('.') comment=phastCons conservation score based on the multiple alignments of 10 primate genomes (including human). The larger the score, the more conserved the site. [phastCons46way_primate_rankscore] index=65 type=FLOAT NULL adj=Nullify('.') comment=phastCons46way_primate scores were ranked among all phastCons46way_primate scores in dbNSFP. The rankscore is the ratio of the rank of the score over the total number of phastCons46way_primate scores in dbNSFP. [phastCons46way_placental] index=66 type=FLOAT NULL adj=Nullify('.') comment=phastCons conservation score based on the multiple alignments of 33 placental mammal genomes (including human). The larger the score, the more conserved the site. [phastCons46way_placental_rankscore] index=67 type=FLOAT NULL adj=Nullify('.') comment=phastCons46way_placental scores were ranked among all phastCons46way_placental scores in dbNSFP. The rankscore is the ratio of the rank of the score over the total number of phastCons46way_placental scores in dbNSFP. [phastCons100way_vertebrate] index=68 type=FLOAT NULL adj=Nullify('.') comment=phastCons conservation score based on the multiple alignments of 100 vertebrate genomes (including human). The larger the score, the more conserved the site. [phastCons100way_vertebrate_rankscore] index=69 type=FLOAT NULL adj=Nullify('.') comment=phastCons100way_vertebrate scores were ranked among all phastCons100way_vertebrate scores in dbNSFP. The rankscore is the ratio of the rank of the score over the total number of phastCons100way_vertebrate scores in dbNSFP. [SiPhy_29way_pi] index=70 type=VARCHAR(255) NULL comment=The estimated stationary distribution of A, C, G and T at the site, using SiPhy algorithm based on 29 mammals genomes. [SiPhy_29way_logOdds] index=71 type=FLOAT NULL adj=Nullify('.') comment=SiPhy score based on 29 mammals genomes. The larger the score, the more conserved the site. [SiPhy_29way_logOdds_rankscore] index=72 type=FLOAT NULL adj=Nullify('.') comment=SiPhy_29way_logOdds scores were ranked among all SiPhy_29way_logOdds scores in dbNSFP. The rankscore is the ratio of the rank of the score over the total number of SiPhy_29way_logOdds scores in dbNSFP. [LRT_Omega] index=73 type=FLOAT NULL adj=Nullify('.') comment=estimated nonsynonymous-to-synonymous-rate ratio (reported by LRT) [UniSNP_ids] index=74 type=CHAR(64) NULL adj=Nullify('.') comment="rs numbers from UniSNP, which is a cleaned version of dbSNP build 129, in format: rs number1;rs number2;..." [KGp1_AC] index=75 type=INTEGER NULL adj=Nullify('.') comment=Alternative allele count in the whole 1000Gp1 data. [KGp1_AF] index=76 type=FLOAT NULL adj=Nullify('.') comment=Alternative allele frequency in the whole 1000Gp1 data. [KGp1_AFR_AC] index=77 type=INTEGER NULL adj=Nullify('.') comment=Alternative allele counts in the 1000Gp1 African descendent samples. [KGp1_AFR_AF] index=78 type=FLOAT NULL adj=Nullify('.') comment=Alternative allele frequency in the 1000Gp1 African descendent samples. [KGp1_EUR_AC] index=79 type=INTEGER NULL adj=Nullify('.') comment=Alternative allele counts in the 1000Gp1 European descendent samples. [KGp1_EUR_AF] index=80 type=FLOAT NULL adj=Nullify('.') comment=Alternative allele frequency in the 1000Gp1 European descendent samples. [KGp1_AMR_AC] index=81 type=INTEGER NULL adj=Nullify('.') comment=Alternative allele counts in the 1000Gp1 American descendent samples. [KGp1_AMR_AF] index=82 type=FLOAT NULL adj=Nullify('.') comment=Alternative allele frequency in the 1000Gp1 American descendent samples. [KGp1_ASN_AC] index=83 type=INTEGER NULL adj=Nullify('.') comment=Alternative allele counts in the 1000Gp1 Asian descendent samples. [KGp1_ASN_AF] index=84 type=FLOAT NULL adj=Nullify('.') comment=Alternative allele frequency in the 1000Gp1 Asian descendent samples. [ESP6500_AA_AF] index=85 type=FLOAT NULL adj=Nullify('.') comment=Alternative allele frequency in the Afrian American samples of the NHLBI GO Exome Sequencing Project (ESP6500 data set). [ESP6500_EA_AF] index=86 type=FLOAT NULL adj=Nullify('.') comment=Alternative allele frequency in the European American samples of the NHLBI GO Exome Sequencing Project (ESP6500 data set).