We are currently using refseq to do location analysis but we wish to switch to gencode.
Old file: Human_hg19_refseq.txt
New file: gencode.v15.annotated.gtf
Is this the correct correspondence?
refseq column1=gencode column1 genelines only
refseq column2=gencode column7 genelines only
refseq column3=gencode column4 genelines only
refseq column4=gencode column5 genelines only
refseq column5=gencode column13 (gene name) genelines only
refseq column6=gencode exon start columns
refseq column7=gencode exon end columns
I also assume both lists are in teh same order right? save one is more current?
Old file: Human_hg19_refseq.txt
Code:
chr1 - 14362 29370 WASH5P 14362,14969,15795,16606,16857,17232,17605,17914,18267,24737,29320, 14829,15038,15947,16765,17055,17368,17742,18061,18366,24891,29370, chr1 - 34611 36081 FAM138A 34611,35276,35720, 35174,35481,36081, chr1 + 69090 70008 OR4F5 69090, 70008, chr1 + 367658 368595 OR4F16 367658, 368595, chr1 - 621097 622034 OR4F16 621097, 622034, chr1 - 761586 762902 NCRNA00115 761586, 762902, chr1 + 763063 789740 LOC643837 763063,764382,783033,787306,788050,788770,788956, 763155,764484,783186,787490,788146,788902,789740, chr1 - 803452 812182 FAM41C 803452,809491,812125, 804055,810535,812182, chr1 - 852952 854817 FLJ39609 852952,853401,854204,854714, 853100,853555,854295,854817, chr1 + 861120 879961 SAMD11 861120,861301,865534,866418,871151,874419,874654,876523,877515,877789,877938,878632,879077,879287, 861180,861393,865716,866469,871276,874509,874840,876686,877631,877868,878438,878757,879188,879961, chr1 - 879583 894679 NOC2L 879583,880436,880897,881552,881781,883510,883869,886506,887379,887791,888554,889161,889383,891302,891474,892273,892478,894308,894594, 880180,880526,881033,881666,881925,883612,883983,886618,887519,887980,888668,889272,889462,891393,891595,892405,892653,894461,894679, chr1 + 895966 901095 KLHL17 895966,896672,897008,897205,897734,898083,898488,898716,899299,899486,899728,900342, 896180,896932,897130,897427,897851,898297,898633,898884,899388,899560,899910,901095, chr1 + 901876 910482 PLEKHN1 901876,902083,905656,905900,906065,906258,906456,906703,907454,907667,908240,908879,909212,909695,909821, 901994,902183,905803,905981,906138,906386,906588,906784,907530,907804,908390,909020,909431,909744,910482, chr1 - 910578 917473 C1orf170 910578,911878,914260,916516,917444, 911649,912004,916037,916553,917473, chr1 - 934341 935552 HES4 934341,934905,935071,935245, 934812,934993,935167,935552, chr1 - 934344 935552 HES4 934344,934905,935071, 934812,934993,935552, chr1 + 948846 949915 ISG15
Code:
##description: evidence-based annotation of the human genome (GRCh37), version 15 (Ensembl 70) ##provider: GENCODE ##contact: [email protected] ##format: gtf ##date: 2013-01-21 chr1 HAVANA gene 11869 14412 . + . gene_id "ENSG00000223972.4"; transcript_id "ENSG00000223972.4"; gene_type "pseudogene"; gene_status "KNOWN"; gene_name "DDX11L1"; transcript_type "pseudogene"; transcript_status "KNOWN"; transcript_name "DDX11L1"; level 2; havana_gene "OTTHUMG00000000961.2"; chr1 HAVANA transcript 11869 14409 . + . gene_id "ENSG00000223972.4"; transcript_id "ENST00000456328.2"; gene_type "pseudogene"; gene_status "KNOWN"; gene_name "DDX11L1"; transcript_type "processed_transcript"; transcript_status "KNOWN"; transcript_name "DDX11L1-002"; level 2; tag "basic"; havana_gene "OTTHUMG00000000961.2"; havana_transcript "OTTHUMT00000362751.1"; chr1 HAVANA exon 11869 12227 . + . gene_id "ENSG00000223972.4"; transcript_id "ENST00000456328.2"; gene_type "pseudogene"; gene_status "KNOWN"; gene_name "DDX11L1"; transcript_type "processed_transcript"; transcript_status "KNOWN"; transcript_name "DDX11L1-002"; exon_number 1; level 2; tag "basic"; havana_gene "OTTHUMG00000000961.2"; havana_transcript "OTTHUMT00000362751.1"; chr1 HAVANA exon 12613 12721 . + . gene_id "ENSG00000223972.4"; transcript_id "ENST00000456328.2"; gene_type "pseudogene"; gene_status "KNOWN"; gene_name "DDX11L1"; transcript_type "processed_transcript"; transcript_status "KNOWN"; transcript_name "DDX11L1-002"; exon_number 2; level 2; tag "basic"; havana_gene "OTTHUMG00000000961.2"; havana_transcript "OTTHUMT00000362751.1"; chr1 HAVANA exon 13221 14409 . + . gene_id "ENSG00000223972.4"; transcript_id "ENST00000456328.2"; gene_type "pseudogene"; gene_status "KNOWN"; gene_name "DDX11L1"; transcript_type "processed_transcript"; transcript_status "KNOWN"; transcript_name "DDX11L1-002"; exon_number 3; level 2; tag "basic"; havana_gene "OTTHUMG00000000961.2"; havana_transcript "OTTHUMT00000362751.1"; chr1 ENSEMBL transcript 11872 14412 . + . gene_id "ENSG00000223972.4"; transcript_id "ENST00000515242.2"; gene_type "pseudogene"; gene_status "KNOWN"; gene_name "DDX11L1"; transcript_type "unprocessed_pseudogene"; transcript_status "KNOWN"; transcript_name "DDX11L1-201"; level 3; havana_gene "OTTHUMG00000000961.2"; chr1 ENSEMBL exon 11872 12227 . + . gene_id "ENSG00000223972.4"; transcript_id "ENST00000515242.2"; gene_type "pseudogene"; gene_status "KNOWN"; gene_name "DDX11L1"; transcript_type "unprocessed_pseudogene"; transcript_status "KNOWN"; transcript_name "DDX11L1-201"; exon_number 1; level 3; havana_gene "OTTHUMG00000000961.2"; chr1 ENSEMBL exon 12613 12721 . + . gene_id "ENSG00000223972.4"; transcript_id "ENST00000515242.2"; gene_type "pseudogene"; gene_status "KNOWN"; gene_name "DDX11L1"; transcript_type "unprocessed_pseudogene"; transcript_status "KNOWN"; transcript_name "DDX11L1-201"; exon_number 2; level 3; havana_gene "OTTHUMG00000000961.2"; chr1 ENSEMBL exon 13225 14412 . + . gene_id "ENSG00000223972.4"; transcript_id "ENST00000515242.2"; gene_type "pseudogene"; gene_status "KNOWN"; gene_name "DDX11L1"; transcript_type "unprocessed_pseudogene"; transcript_status "KNOWN"; transcript_name "DDX11L1-201"; exon_number 3; level 3; havana_gene "OTTHUMG00000000961.2"; chr1 ENSEMBL transcript 11874 14409 . + . gene_id "ENSG00000223972.4"; transcript_id "ENST00000518655.2"; gene_type "pseudogene"; gene_status "KNOWN"; gene_name "DDX11L1"; transcript_type "unprocessed_pseudogene"; transcript_status "KNOWN"; transcript_name "DDX11L1-202"; level 3; havana_gene "OTTHUMG00000000961.2"; chr1 ENSEMBL exon 11874 12227 . + . gene_id "ENSG00000223972.4"; transcript_id "ENST00000518655.2"; gene_type "pseudogene"; gene_status "KNOWN"; gene_name "DDX11L1"; transcript_type "unprocessed_pseudogene"; transcript_status "KNOWN"; transcript_name "DDX11L1-202"; exon_number 1; level 3; havana_gene "OTTHUMG00000000961.2"; chr1 ENSEMBL exon 12595 12721 . + . gene_id "ENSG00000223972.4"; transcript_id "ENST00000518655.2"; gene_type "pseudogene"; gene_status "KNOWN"; gene_name "DDX11L1"; transcript_type "unprocessed_pseudogene"; transcript_status "KNOWN"; transcript_name "DDX11L1-202"; exon_number 2; level 3; havana_gene "OTTHUMG00000000961.2"; chr1 ENSEMBL exon 13403 13655 . + . gene_id "ENSG00000223972.4"; transcript_id "ENST00000518655.2"; gene_type "pseudogene"; gene_status "KNOWN"; gene_name "DDX11L1"; transcript_type "unprocessed_pseudogene"; transcript_status "KNOWN"; transcript_name "DDX11L1-202"; exon_number 3; level 3; havana_gene "OTTHUMG00000000961.2"; chr1 ENSEMBL exon 13661 14409 . + . gene_id "ENSG00000223972.4"; transcript_id "ENST00000518655.2"; gene_type "pseudogene"; gene_status "KNOWN"; gene_name "DDX11L1"; transcript_type "unprocessed_pseudogene"; transcript_status "KNOWN"; transcript_name "DDX11L1-202"; exon_number 4; level 3; havana_gene "OTTHUMG00000000961.2"; chr1 HAVANA transcript 12010 13670 . + . gene_id "ENSG00000223972.4"; transcript_id "ENST00000450305.2"; gene_type "pseudogene"; gene_status "KNOWN"; gene_name "DDX11L1"; transcript_type "transcribed_unprocessed_pseudogene"; transcript_status "KNOWN"; transcript_name "DDX11L1-001"; level 2; ont "PGO:0000005"; ont "PGO:0000019"; havana_gene "OTTHUMG00000000961.2"; havana_transcript "OTTHUMT00000002844.2"; chr1 HAVANA exon 12010 12057 . + . gene_id "ENSG00000223972.4"; transcript_id "ENST00000450305.2"; gene_type "pseudogene"; gene_status "KNOWN"; gene_name "DDX11L1"; transcript_type "transcribed_unprocessed_pseudogene"; transcript_status "KNOWN"; transcript_name "DDX11L1-001"; exon_number 1; level 2; ont "PGO:0000005"; ont "PGO:0000019"; havana_gene "OTTHUMG00000000961.2"; havana_transcript "OTTHUMT00000002844.2"; chr1 HAVANA exon 12179 12227 . + . gene_id "ENSG00000223972.4"; transcript_id "ENST00000450305.2"; gene_type "pseudogene"; gene_status "KNOWN"; gene_name "DDX11L1"; transcript_type "transcribed_unprocessed_pseudogene"; transcript_status "KNOWN"; transcript_name "DDX11L1-001"; exon_number 2; level 2; ont "PGO:0000005"; ont "PGO:0000019"; havana_gene "OTTHUMG00000000961.2"; havana_transcript "OTTHUMT00000002844.2"; chr1 HAVANA exon 12613 12697 . + . gene_id "ENSG00000223972.4"; transcript_id "ENST00000450305.2"; gene_type "pseudogene"; gene_status "KNOWN"; gene_name "DDX11L1"; transcript_type "transcribed_unprocessed_pseudogene"; transcript_status "KNOWN"; transcript_name "DDX11L1-001"; exon_number 3; level 2; ont "PGO:0000005"; ont "PGO:0000019"; havana_gene "OTTHUMG00000000961.2"; havana_transcript "OTTHUMT00000002844.2"; chr1 HAVANA exon 12975 13052 . + . gene_id "ENSG00000223972.4"; transcript_id "ENST00000450305.2"; gene_type "pseudogene"; gene_status "KNOWN"; gene_name "DDX11L1"; transcript_type "transcribed_unprocessed_pseudogene"; transcript_status "KNOWN"; transcript_name "DDX11L1-001"; exon_number 4; level 2; ont "PGO:0000005"; ont "PGO:0000019"; havana_gene "OTTHUMG00000000961.2"; havana_transcript "OTTHUMT00000002844.2"; chr1 HAVANA exon 13221 13374 . + . gene_id "ENSG00000223972.4"; transcript_id "ENST00000450305.2"; gene_type "pseudogene"; gene_status "KNOWN"; gene_name "DDX11L1"; transcript_type "transcribed_unprocessed_pseudogene"; transcript_status "KNOWN"; transcript_name "DDX11L1-001"; exon_number 5; level 2; ont "PGO:0000005"; ont "PGO:0000019"; havana_gene "OTTHUMG00000000961.2"; havana_transcript "OTTHUMT00000002844.2"; chr1 HAVANA exon 13453 13670 . + . gene_id "ENSG00000223972.4"; transcript_id "ENST00000450305.2"; gene_type "pseudogene"; gene_status "KNOWN"; gene_name "DDX11L1"; transcript_type "transcribed_unprocessed_pseudogene"; transcript_status "KNOWN"; transcript_name "DDX11L1-001"; exon_number 6; level 2; ont "PGO:0000005"; ont "PGO:0000019"; havana_gene "OTTHUMG00000000961.2"; havana_transcript "OTTHUMT00000002844.2"; chr1 HAVANA gene 14363 29806 . - . gene_id "ENSG00000227232.3"; transcript_id "ENSG00000227232.3"; gene_type "pseudogene"; gene_status "KNOWN"; gene_name "WASH7P"; transcript_type "pseudogene"; transcript_status "KNOWN"; transcript_name "WASH7P"; level 2; havana_gene "OTTHUMG00000000958.1"; chr1 ENSEMBL transcript 14363 29370 . - . gene_id "ENSG00000227232.3"; transcript_id "ENST00000438504.2"; gene_type "pseudogene"; gene_status "KNOWN"; gene_name "WASH7P"; transcript_type "unprocessed_pseudogene"; transcript_status "KNOWN"; transcript_name "WASH7P-203"; level 3; havana_gene "OTTHUMG00000000958.1"; chr1 ENSEMBL exon 29321 29370 . - . gene_id "ENSG00000227232.3"; transcript_id "ENST00000438504.2"; gene_type "pseudogene"; gene_status "KNOWN"; gene_name "WASH7P"; transcript_type "unprocessed_pseudogene"; transcript_status "KNOWN"; transcript_name "WASH7P-203"; exon_number 1; level 3; havana_gene "OTTHUMG00000000958.1"; chr1 ENSEMBL exon 24738 24891 . - . gene_id "ENSG00000227232.3"; transcript_id "ENST00000438504.2"; gene_type "pseudogene"; gene_status "KNOWN"; gene_name "WASH7P"; transcript_type "unprocessed_pseudogene"; transcript_status "KNOWN"; transcript_name "WASH7P-203"; exon_number 2; level 3; havana_gene "OTTHUMG00000000958.1"; chr1 ENSEMBL exon 18268 18379 . - . gene_id "ENSG00000227232.3"; transcript_id "ENST00000438504.2"; gene_type "pseudogene"; gene_status "KNOWN"; gene_name "WASH7P"; transcript_type "unprocessed_pseudogene"; transcript_status "KNOWN"; transcript_name "WASH7P-203"; exon_number 3; level 3; havana_gene "OTTHUMG00000000958.1"; chr1 ENSEMBL exon 17915 18061 . - . gene_id "ENSG00000227232.3"; transcript_id "ENST00000438504.2"; gene_type "pseudogene"; gene_status "KNOWN"; gene_name "WASH7P"; transcript_type "unprocessed_pseudogene"; transcript_status "KNOWN"; transcript_name "WASH7P-203"; exon_number 4; level 3; havana_gene "OTTHUMG00000000958.1"; chr1 ENSEMBL exon 17602 17742 . - . gene_id "ENSG00000227232.3"; transcript_id "ENST00000438504.2"; gene_type "pseudogene"; gene_status "KNOWN"; gene_name "WASH7P"; transcript_type "unprocessed_pseudogene"; transcript_status "KNOWN"; transcript_name "WASH7P-203"; exon_number 5; level 3; havana_gene "OTTHUMG00000000958.1"; chr1 ENSEMBL exon 17233 17364 . - . gene_id "ENSG00000227232.3"; transcript_id "ENST00000438504.2"; gene_type "pseudogene"; gene_status "KNOWN"; gene_name "WASH7P"; transcript_type "unprocessed_pseudogene"; transcript_status "KNOWN"; transcript_name "WASH7P-203"; exon_number 6; level 3; havana_gene "OTTHUMG00000000958.1"; chr1 ENSEMBL exon 16854 17055 . - . gene_id "ENSG00000227232.3"; transcript_id "ENST00000438504.2"; gene_type "pseudogene"; gene_status "KNOWN"; gene_name "WASH7P"; transcript_type "unprocessed_pseudogene"; transcript_status "KNOWN"; transcript_name "WASH7P-203"; exon_number 7; level 3; havana_gene "OTTHUMG00000000958.1"; chr1 ENSEMBL exon 16607 16765 . - . gene_id "ENSG00000227232.3"; transcript_id "ENST00000438504.2"; gene_type "pseudogene"; gene_status "KNOWN"; gene_name "WASH7P"; transcript_type "unprocessed_pseudogene"; transcript_status "KNOWN"; transcript_name "WASH7P-203"; exon_number 8; level 3; havana_gene "OTTHUMG00000000958.1"; chr1 ENSEMBL exon 15904 15947 . - . gene_id "ENSG00000227232.3"; transcript_id "ENST00000438504.2"; gene_type "pseudogene"; gene_status "KNOWN"; gene_name "WASH7P"; transcript_type "unprocessed_pseudogene"; transcript_status "KNOWN"; transcript_name "WASH7P-203"; exon_number 9; level 3; havana_gene "OTTHUMG00000000958.1"; chr1 ENSEMBL exon 15796 15901 . - . gene_id "ENSG00000227232.3"; transcript_id "ENST00000438504.2"; gene_type "pseudogene"; gene_status "KNOWN"; gene_name "WASH7P"; transcript_type "unprocessed_pseudogene"; transcript_status "KNOWN"; transcript_name "WASH7P-203"; exon_number 10; level 3; havana_gene "OTTHUMG00000000958.1";
refseq column1=gencode column1 genelines only
refseq column2=gencode column7 genelines only
refseq column3=gencode column4 genelines only
refseq column4=gencode column5 genelines only
refseq column5=gencode column13 (gene name) genelines only
refseq column6=gencode exon start columns
refseq column7=gencode exon end columns
I also assume both lists are in teh same order right? save one is more current?
Comment