diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
index d9ed9f8f9..94379e603 100644
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -2,15 +2,8 @@
{
"name": "poly-devcontainer",
// Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile
- "image": "mcr.microsoft.com/devcontainers/base:bullseye",
+ "image": "mcr.microsoft.com/devcontainers/go:1.23-bookworm",
- "features": {
- "ghcr.io/devcontainers/features/go:1": {
- "version": "latest",
- "golangciLintVersion": "latest"
- },
- },
-
"extensions": ["golang.go"],
// Use 'forwardPorts' to make a list of ports inside the container available locally.
diff --git a/.golangci.yml b/.golangci.yml
index 53d8d62f8..fb6921ba2 100644
--- a/.golangci.yml
+++ b/.golangci.yml
@@ -20,7 +20,6 @@ linters:
- stylecheck
- unconvert
- unparam
- - whitespace
linters-settings:
stylecheck:
# https://staticcheck.io/docs/options#checks
diff --git a/CHANGELOG.md b/CHANGELOG.md
index e1f8cc006..3bc15306e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,25 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased]
+### Fixed
+ - Made it possible to simulate primers shorter than design minimum.
+
+## [0.31.1] - 2024-01-31
+
+### Added
+- Fixed package level doc strings for search and bwt packages.
+
+[0.31.1]: https://github.com/TimothyStiles/poly/releases/tag/v0.31.0
+
+## [0.31.0] - 2024-01-31
+
+### Added
+- Basic BWT for sub-sequence count and offset for sequence alignment. Only supports exact matches for now.
+- Moved `BWT`, `align`, and `mash` packages to new `search` sub-directory.
+- Implemented Run-Length Burrows Wheeler Transform.
+
+[0.31.0]: https://github.com/TimothyStiles/poly/releases/tag/v0.31.0
+
## [0.30.0] - 2023-12-18
Oops, we weren't keeping a changelog before this tag!
diff --git a/LICENSE b/LICENSE
index de8c767fa..b457f4c39 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,4 +1,4 @@
-Copyright (c) 2023 Timothy Stiles
+Copyright (c) 2024 Timothy Stiles
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
diff --git a/README.md b/README.md
index 4d2c24370..d0f1e73b2 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-# (Poly)merase
+# (Poly)merase
[](https://pkg.go.dev/github.com/bebop/poly)
[](https://github.com/bebop/poly/blob/main/LICENSE)
@@ -47,4 +47,4 @@ Poly is a Go package for engineering organisms.
* [MIT](LICENSE)
-* Copyright (c) 2023 Timothy Stiles
+* Copyright (c) 2024 Timothy Stiles
diff --git a/fold/fold_test.go b/fold/fold_test.go
index 8be9275f8..786d0a9a9 100644
--- a/fold/fold_test.go
+++ b/fold/fold_test.go
@@ -1,6 +1,7 @@
package fold
import (
+ "fmt"
"math"
"strings"
"testing"
@@ -201,3 +202,13 @@ func TestFold(t *testing.T) {
assert.InDelta(t, struc.energy, -4.2, 0.2)
})
}
+func TestZuker_ErrorCreatingFoldingContext(t *testing.T) {
+ seq := "ATGGATTTAGATAGATADFQ#(RSDOFIA)"
+ temp := 4000.0
+
+ expectedErr := fmt.Errorf("error creating folding context: the sequence ATGGATTTAGATAGATADFQ#(RSDOFIA) is not RNA or DNA")
+
+ _, err := Zuker(seq, temp)
+ require.Error(t, err)
+ assert.Equal(t, expectedErr.Error(), err.Error())
+}
diff --git a/fold/seqfold_test.go b/fold/seqfold_test.go
new file mode 100644
index 000000000..e14a20a10
--- /dev/null
+++ b/fold/seqfold_test.go
@@ -0,0 +1,43 @@
+package fold
+
+import (
+ "fmt"
+ "math"
+ "testing"
+)
+
+func TestResult_MinimumFreeEnergy_LengthZero(t *testing.T) {
+ result := Result{} // Create a Result instance with empty structs
+
+ expectedEnergy := math.Inf(1)
+ actualEnergy := result.MinimumFreeEnergy()
+
+ if actualEnergy != expectedEnergy {
+ t.Errorf("expected energy to be %f, but got %f", expectedEnergy, actualEnergy)
+ }
+}
+
+func TestResult_DotBracket_LengthZero(t *testing.T) {
+ result := Result{} // Create a Result instance with empty structs
+
+ expectedDotBracket := ""
+ actualDotBracket := result.DotBracket()
+
+ if actualDotBracket != expectedDotBracket {
+ t.Errorf("expected dot bracket to be %s, but got %s", expectedDotBracket, actualDotBracket)
+ }
+}
+
+func TestNewFoldingContext_InvalidSequence(t *testing.T) {
+ seq := "XYZ"
+ temp := 37.0
+
+ _, err := newFoldingContext(seq, temp)
+ if err == nil {
+ t.Errorf("expected error, but got nil")
+ }
+ expectedError := fmt.Errorf("the sequence %s is not RNA or DNA", seq)
+ if err.Error() != expectedError.Error() {
+ t.Errorf("expected error message to be %q, but got %q", expectedError.Error(), err.Error())
+ }
+}
diff --git a/io/genbank/data/NC_001141.2.gb b/io/genbank/data/NC_001141.2.gb
new file mode 100644
index 000000000..cf0bd4c7a
--- /dev/null
+++ b/io/genbank/data/NC_001141.2.gb
@@ -0,0 +1,10301 @@
+LOCUS NC_001141 439888 bp DNA linear CON 28-JUN-2024
+DEFINITION Saccharomyces cerevisiae S288C chromosome IX, complete sequence.
+ACCESSION NC_001141
+VERSION NC_001141.2
+DBLINK BioProject: PRJNA128
+ Assembly: GCF_000146045.2
+KEYWORDS RefSeq.
+SOURCE Saccharomyces cerevisiae S288C
+ ORGANISM Saccharomyces cerevisiae S288C
+ Eukaryota; Fungi; Dikarya; Ascomycota; Saccharomycotina;
+ Saccharomycetes; Saccharomycetales; Saccharomycetaceae;
+ Saccharomyces.
+REFERENCE 1 (bases 1 to 439888)
+ AUTHORS Engel,S.R., Wong,E.D., Nash,R.S., Aleksander,S., Alexander,M.,
+ Douglass,E., Karra,K., Miyasato,S.R., Simison,M., Skrzypek,M.S.,
+ Weng,S. and Cherry,J.M.
+ TITLE New data and collaborations at the Saccharomyces Genome Database:
+ updated reference genome, alleles, and the Alliance of Genome
+ Resources
+ JOURNAL Genetics 220 (4) (2022)
+ PUBMED 34897464
+REFERENCE 2 (bases 1 to 439888)
+ AUTHORS Churcher,C., Bowman,S., Badcock,K., Bankier,A., Brown,D.,
+ Chillingworth,T., Connor,R., Devlin,K., Gentles,S., Hamlin,N.,
+ Harris,D., Horsnell,T., Hunt,S., Jagels,K., Jones,M., Lye,G.,
+ Moule,S., Odell,C., Pearson,D., Rajandream,M., Rice,P., Rowley,N.,
+ Skelton,J., Smith,V., Barrell,B. et al.
+ TITLE The nucleotide sequence of Saccharomyces cerevisiae chromosome IX
+ JOURNAL Nature 387 (6632 SUPPL), 84-87 (1997)
+ PUBMED 9169870
+REFERENCE 3 (bases 1 to 439888)
+ AUTHORS Goffeau,A., Barrell,B.G., Bussey,H., Davis,R.W., Dujon,B.,
+ Feldmann,H., Galibert,F., Hoheisel,J.D., Jacq,C., Johnston,M.,
+ Louis,E.J., Mewes,H.W., Murakami,Y., Philippsen,P., Tettelin,H. and
+ Oliver,S.G.
+ TITLE Life with 6000 genes
+ JOURNAL Science 274 (5287), 546 (1996)
+ PUBMED 8849441
+REFERENCE 4 (bases 1 to 439888)
+ CONSRTM NCBI Genome Project
+ TITLE Direct Submission
+ JOURNAL Submitted (28-JUN-2024) National Center for Biotechnology
+ Information, NIH, Bethesda, MD 20894, USA
+REFERENCE 5 (bases 1 to 439888)
+ CONSRTM Saccharomyces Genome Database
+ TITLE Direct Submission
+ JOURNAL Submitted (04-MAY-2012) Department of Genetics, Stanford
+ University, Stanford, CA 94305-5120, USA
+ REMARK Protein update by submitter
+REFERENCE 6 (bases 1 to 439888)
+ CONSRTM Saccharomyces Genome Database
+ TITLE Direct Submission
+ JOURNAL Submitted (31-MAR-2011) Department of Genetics, Stanford
+ University, Stanford, CA 94305-5120, USA
+ REMARK Sequence update by submitter
+REFERENCE 7 (bases 1 to 439888)
+ CONSRTM Saccharomyces Genome Database
+ TITLE Direct Submission
+ JOURNAL Submitted (14-DEC-2009) Department of Genetics, Stanford
+ University, Stanford, CA 94305-5120, USA
+COMMENT REVIEWED REFSEQ: This record has been curated by SGD. The reference
+ sequence is identical to BK006942.
+
+ On Apr 26, 2011 this sequence version replaced NC_001141.1.
+
+ ##Genome-Annotation-Data-START##
+ Annotation Provider :: SGD
+ Annotation Status :: Full Annotation
+ Annotation Version :: R64-5-1
+ URL :: http://www.yeastgenome.org/
+ ##Genome-Annotation-Data-END##
+ COMPLETENESS: full length.
+FEATURES Location/Qualifiers
+ source 1..439888
+ /organism="Saccharomyces cerevisiae S288C"
+ /mol_type="genomic DNA"
+ /strain="S288C"
+ /db_xref="taxon:559292"
+ /chromosome="IX"
+ telomere complement(1..7784)
+ /note="TEL09L; Telomeric region on the left arm of
+ Chromosome IX; composed of an X element core sequence, X
+ element combinatorial repeats, a long Y' element, and a
+ short terminal stretch of telomeric repeats"
+ /db_xref="SGD:S000028896"
+ gene complement(<483..>6147)
+ /locus_tag="YIL177C"
+ /db_xref="GeneID:854630"
+ mRNA complement(join(<483..4598,4987..>6147))
+ /locus_tag="YIL177C"
+ /product="Y' element ATP-dependent helicase"
+ /transcript_id="NM_001179522.1"
+ /db_xref="GeneID:854630"
+ CDS complement(join(483..4598,4987..6147))
+ /locus_tag="YIL177C"
+ /EC_number="3.6.4.12"
+ /note="Putative Y' element ATP-dependent helicase"
+ /codon_start=1
+ /product="Y' element ATP-dependent helicase"
+ /protein_id="NP_012092.1"
+ /db_xref="GeneID:854630"
+ /db_xref="SGD:S000001439"
+ /translation="MKVSDRRKFEKANFDEFESALNNKNDLVHCPSITLFESIPTEVR
+ SFYEDEKSGLIKVVKFRTGAMDRKRSFEKVVISVMVGKNVKKFLTFVEDEPDFQGGPI
+ PSKYLIPKKINLMVYTLFQVHTLKFNRKDYDTLSLFYLNRGYYNELSFRVLERCHEIA
+ SARPNDSSTMRTFTDFVSGAPIVRSLQKSTIRKYGYNLAPYMFLLLHVDELSIFSAYQ
+ ASLPGEKKVDTERLKRDLCPRKPIEIKYFSQICNDMMNKKDRLGDILHIILRACALNF
+ GAGPRGGAGDEEDRSITNEEPIIPSVDEHGLKVCKLRSPNTPRRLRKTLDAVKALLVS
+ SCACTARDLDIFDDNNGVAMWKWIKILYHEVAQETTLKDSYRITLVPSSDGISLLAFA
+ GPQRNVYVDDTTRRIQLYTDYNKNGSSEPRLKTLDGLTSDYVFYFVTVLRQMQICALG
+ NSYDAFNHDPWMDVVGFEDPNQVTNRDISRIVLYSYMFLNTAKGCLVEYATFRQYMRE
+ LPKNAPQKLNFREMRQGLIALGRHCVGSRFETDLYESATSELMANHSVQTGRNIYGVD
+ SFSLTSVSGTTATLLQERASERWIQWLGLESDYHCSFSSTRNAEDVVAGEAASSNHHQ
+ KISRVTRKRPREPKSTNDILVAGQKLFGSSFEFRDLHQLRLCYEIYMADTPSVAVQAP
+ PGYGKTELFHLPLIALASKGDVEYVSFLFVPYTVLLANCMIRLGRCGCLNVAPVRNFI
+ EEGYDGVTDLYVGIYDDLASTNFTDRIAAWENIVECTFRTNNVKLGYLIVDEFHNFET
+ EVYRQSQFGGITNLDFDAFEKAIFLSGTAPEAVADAALQRIGLTGLAKKSMDINELKR
+ SEDLSRGLSSYPTRMFNLIKEKSEVPLGHVHKIRKKVESQPEEALKLLLALFESEPES
+ KAIVVASTTNEVEELACSWRKYFRVVWIHGKLGAAEKVSRTKEFVTDGSMQVLIGTKL
+ VTEGIDIKQLMMVIMLDNRLNIIELIQGVGRLRDGGLCYLLSRKNSWAARNRKGELPP
+ IKEGCITEQVREFYGLESKKGKKGQHVGCCGSRTDLSADTVELIERMDRLAEKQATAS
+ MSIVALPSSFQESNSSDRYRKYCSSDEDSNTCIHGSANASTNASTNAITTASTNVRTN
+ ATTNASTNATTNASTNASTNATTNASTNATTNSSTNATTTASTNVRTSATTTASINVR
+ TSATTTESTNSSTNATTTESTNSSTNATTTESTNSNTSATTTASINVRTSATTTESTN
+ SSTSATTTASINVRTSATTTKSINSSTNATTTESTNSNTNATTTESTNSSTNATTTES
+ TNSSTNATTTESTNSNTSAATTESTNSNTSATTTESTNASAKEDANKDGNAEDNRFHP
+ VTDINKESYKRKGSQMVLLERKKLKAQFPNTSENMNVLQFLGFRSDEIKHLFLYGIDI
+ YFCPEGVFTQYGLCKGCQKMFELCVCWAGQKVSYRRIAWEALAVERMLRNDEEYKEYL
+ EDIEPYHGDPVGYLKYFSVKRREIYSQIQRNYAWYLAITRRRETISVLDSTRGKQGSQ
+ VFRMSGRQIKELYFKVWSNLRESKTEVLQYFLNWDEKKCQEEWEAKDDTVVVEALEKG
+ GVFQRLRSMTSAGLQGPQYVKLQFSRHHRQLRSRYELSLGMHLRDQIALGVTPSKVPH
+ WTAFLSMLIGLFYNKTFRQKLEYLLEQISEVWLLPHWLDLANVEVLAADDTRVPLYML
+ MVAVHKELDSDDVPDGRFDILLCRDSSREVGE"
+ rep_origin 7470..8793
+ /note="ARS902; Putative replication origin; identified in
+ multiple array studies, not yet confirmed by plasmid-based
+ assay"
+ /db_xref="SGD:S000130156"
+ gene complement(<8793..>9155)
+ /gene="PAU14"
+ /locus_tag="YIL176C"
+ /db_xref="GeneID:854631"
+ mRNA complement(<8793..>9155)
+ /gene="PAU14"
+ /locus_tag="YIL176C"
+ /product="seripauperin PAU14"
+ /transcript_id="NM_001179521.1"
+ /db_xref="GeneID:854631"
+ CDS complement(8793..9155)
+ /gene="PAU14"
+ /locus_tag="YIL176C"
+ /note="hypothetical protein; member of the seripauperin
+ multigene family encoded mainly in subtelomeric regions;
+ identical to Pau1p"
+ /codon_start=1
+ /product="seripauperin PAU14"
+ /protein_id="NP_012093.1"
+ /db_xref="GeneID:854631"
+ /db_xref="SGD:S000001438"
+ /translation="MVKLTSIAAGVAAIAATASATTTLAQSDERVNLVELGVYVSDIR
+ AHLAQYYMFQAAHPTETYPVEVAEAVFNYGDFTTMLTGISPDQVTRMITGVPWYSSRL
+ KPAISSALSKDGIYTIAN"
+ gene 9183..9500
+ /locus_tag="YIL175W"
+ /note="hypothetical protein; classified as pseudogene
+ because there is no ATG at the beginning of this ORF"
+ /pseudo
+ /db_xref="GeneID:854632"
+ /db_xref="SGD:S000001437"
+ mRNA 9183..9500
+ /locus_tag="YIL175W"
+ /pseudo
+ /db_xref="GeneID:854632"
+ /db_xref="SGD:S000001437"
+ gene 9469..9696
+ /locus_tag="YIL174W"
+ /note="hypothetical protein"
+ /pseudo
+ /db_xref="GeneID:854633"
+ /db_xref="SGD:S000001436"
+ mRNA 9469..9696
+ /locus_tag="YIL174W"
+ /pseudo
+ /db_xref="GeneID:854633"
+ /db_xref="SGD:S000001436"
+ gene <11492..>16141
+ /gene="VTH1"
+ /locus_tag="YIL173W"
+ /db_xref="GeneID:854634"
+ mRNA <11492..>16141
+ /gene="VTH1"
+ /locus_tag="YIL173W"
+ /product="signal sequence-binding protein"
+ /transcript_id="NM_001179519.1"
+ /db_xref="GeneID:854634"
+ CDS 11492..16141
+ /gene="VTH1"
+ /locus_tag="YIL173W"
+ /experiment="EXISTENCE:direct assay:GO:0000324 fungal-type
+ vacuole [PMID:26928762]"
+ /experiment="EXISTENCE:direct assay:GO:0005768 endosome
+ [PMID:14562095]"
+ /experiment="EXISTENCE:genetic interaction:GO:0005048
+ signal sequence binding [PMID:8662642]"
+ /experiment="EXISTENCE:genetic interaction:GO:0006896
+ Golgi to vacuole transport [PMID:8662642]"
+ /note="Putative membrane glycoprotein; has strong
+ similarity to Vth2p and Pep1p/Vps10p; may be involved in
+ vacuolar protein sorting"
+ /codon_start=1
+ /product="signal sequence-binding protein"
+ /protein_id="NP_012095.1"
+ /db_xref="GeneID:854634"
+ /db_xref="SGD:S000001435"
+ /translation="MALFRALYIIWVFLLIPLSNAEEFTPKVTRTLSRYVFDIVNFDD
+ SNTLIRAEEDSVEISFDAGENWKTIDEIEEPIESFVVDPFRGHDRAFAFVKTAPKFYV
+ TDDQGKSWRPLTIPISEKASNYFCDVTTHPIKKKHLIIRCDLLTIKNSGLMYVGREIY
+ TTNDGVSFSQVKPSFGKIDGHISTARCDFIKSSEDSDLGGNDASILCLFRNTEYIEST
+ GSTIDKSELILSADGGETFKELVQFKDKVVSRYEILKHHVIVLTQDDMYNEMSSTNIW
+ ISNDVSTFQVARTPTKIRHVNMGQIHEDSIGRIVLPVSRERDDEDSNQPGAAEVLISD
+ SEGLKFLPINWIPNNQFGYINVAYPGFLKGTFFGSFHPFIEYSDRKRKYSRQKVREET
+ KVSVDNGLTWTNLKVVDRENVDLFGCDVTKPERCSLQTHFYDLRNLNPSAGIMMISGI
+ VGDGSAYNWKEEKTFISRDSGLTWRLVHNSTGLYTTGDLGNIIMYIPYRSNENGDVPS
+ KFYYSLDQGKTWGEYDLIMPIYPYRLVSTISDGSGSKFILTGTSITEDPIFITYSIDF
+ SAVFDYKSCEEGDFEDWNLADGKCVNGAKYKYRRRKQDAQCLVKKAFKDLSLDETPCN
+ SCTGSDYECSFEFVRDAKGDCIPDYNLIALSDICDKSKGKSVLVKPLQLIKGDKCKTP
+ MKIESVDIPCDEIPKEGSSDKEIVTTENKFDFEIKFYQYFDTVADESLVMLNSIGDAY
+ ISHDGGQTIKRFDTDGEKIVEIVFNPYFNSSAYLFGSKGNIFLTHDRGYSFMIAKLPE
+ ARQLGMPLDFSAKAQDTFIYYGGKNCESILSPECHAVAYLTKDGGETFTEMLDNAIHC
+ EFAGTLFKYPSNDDMVMCQVKEKFSQTRSLVSSTDFFQDDRKTVFENIIGYLSTGGYI
+ IVAVPHEDNELRAYVTNDGAEFTEAKFPYDEDIGKQDAFTILGSEEGSIFLHLATNLE
+ SGHDFGNLLKSNSNGTSFVTLEHAVNRNTFGYVDFEKVQGLEGIIITNIVSNSEKVGE
+ NKEDEQLKTKITFNDGSDWNFLKPPKKDSEGKKFPCDSVSLDKCSLHLHGYTERKDIR
+ DTYSSGSALGMMFGVGNVGDRLLPYEECSTFLTTDGGETWTEVKKGPHQWEYGDHGGV
+ LVLVPENAETDSISYSTDFGKTWKDYKFCGDKVLVKDIITVPRDSALRFLLFGEAKNM
+ GSGSFRTYTIDFRNIFERQCEFDITGRKRADFKYSPLGSRTGCLFGHKTEFLRKTDEK
+ CFIGNIPLSEFSRNVKNCPCTRQDFECDYNFYKASDGTCKLVKGLSSANGADICKKEP
+ DLIEYYDSSGYRKIPLSTCKGGLKLDAHLAPHPCPGKEKAFREKYSINTGAYALVFVT
+ ILLVIFFVAWFVYDRGIRRNGGFSRFEEIRLGDDGLIENNRTDRVVNIIVRLGLCISL
+ ITKSAFQRAKAGTAQLSSKFRARFGNKKGATYSSLLHDQLSDEPDGFHEDSNDLSSFR
+ GQGSNSEIEQEDVDTSQQEHTLRTDLLGASNIPDALPARSASHESDLAAARSEDK"
+ rep_origin 16141..16784
+ /note="ARS904; Putative replication origin; identified in
+ multiple array studies, not yet confirmed by plasmid-based
+ assay"
+ /db_xref="SGD:S000130157"
+ gene complement(<16784..>18553)
+ /gene="IMA3"
+ /locus_tag="YIL172C"
+ /db_xref="GeneID:854635"
+ mRNA complement(<16784..>18553)
+ /gene="IMA3"
+ /locus_tag="YIL172C"
+ /product="oligo-1,6-glucosidase IMA3"
+ /transcript_id="NM_001179518.1"
+ /db_xref="GeneID:854635"
+ CDS complement(16784..18553)
+ /gene="IMA3"
+ /locus_tag="YIL172C"
+ /EC_number="3.2.1.10"
+ /experiment="EXISTENCE:direct assay:GO:0004574
+ oligo-1,6-glucosidase activity [PMID:20471265]"
+ /experiment="EXISTENCE:direct assay:GO:0004575 sucrose
+ alpha-glucosidase activity [PMID:24649402]"
+ /experiment="EXISTENCE:direct assay:GO:0005737 cytoplasm
+ [PMID:14562095]"
+ /experiment="EXISTENCE:genetic interaction:GO:0046352
+ disaccharide catabolic process
+ [PMID:20562106|PMID:20471265]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0004574
+ oligo-1,6-glucosidase activity [PMID:20562106]"
+ /note="Alpha-glucosidase; weak, but broad substrate
+ specificity for alpha-1,4- and alpha-1,6-glucosides;
+ member of IMA isomaltase family; not required for
+ isomaltose utilization, but Ima3p overexpression allows
+ the ima1 null mutant to grow on isomaltose; lower
+ activitiy and thermostability in vitro than Ima2p despite
+ sequence difference of only 3 amino acids; cleaves
+ alpha-1,3 linkage of nigerose and turanose, but not
+ alpha-1,5 of leucrose; identical to IMA4"
+ /codon_start=1
+ /product="oligo-1,6-glucosidase IMA3"
+ /protein_id="NP_012096.1"
+ /db_xref="GeneID:854635"
+ /db_xref="SGD:S000001434"
+ /translation="MTISSAHPETEPKWWKEATIYQIYPASFKDSNNDGWGDMKGIAS
+ KLEYIKELGTDAIWISPFYDSPQDDMGYDIANYEKVWPTYGTNEDCFALIEKTHKLGM
+ KFITDLVINHCSSEHEWFKESRSSKTNPKRDWFFWRPPKGYDAEGKPIPPNNWRSYFG
+ GSAWTFDEKTQEFYLRLFCSTQPDLNWENEDCRKAIYESAVGYWLDHGVDGFRIDVGS
+ LYSKVAGLPDAPVIDENSKWQLSDPFTMNGPRIHEFHQEMNKFIRNRVKDGREIMTVG
+ EMRHATDETKRLYTSASRHELSELFNFSHTDVGTSPKFRQNLIPYELKDWKVALAELF
+ RYVNGTDCWSTIYLENHDQPRSITRFGDDSPKNRVISGKLLSVLLVSLSGTLYVYQGQ
+ ELGEINFKNWPIEKYEDVEVRNNYDAIKEEHGENSKEMKRFLEAIALISRDHARTPMQ
+ WSREEPNAGFSGPNAKPWFYLNESFREGINAEDESKDPNSVLNFWKEALRFRKAHKDI
+ TVYGYDFEFIDLDNKKLFSFTKKYDNKTLFAALNFSSDSIDFTIPNNSSSFKLEFGNY
+ PRSEVDASSRTLKPWEGRIYISE"
+ gene 19515..19844
+ /locus_tag="YIL171W"
+ /gene_synonym="HXT12"
+ /note="Possible pseudogene in strain S288C; YIL171W and
+ the adjacent ORF, YIL170W/HXT12, together encode a
+ non-functional member of the hexose transporter family"
+ /pseudo
+ /db_xref="GeneID:2827704"
+ /db_xref="SGD:S000001433"
+ mRNA 19515..19844
+ /locus_tag="YIL171W"
+ /gene_synonym="HXT12"
+ /pseudo
+ /db_xref="GeneID:2827704"
+ /db_xref="SGD:S000001433"
+ gene 19847..21220
+ /gene="HXT12"
+ /locus_tag="YIL170W"
+ /note="Possible pseudogene in strain S288C; YIL170W/HXT12
+ and the adjacent ORF, YIL171W, together encode a
+ non-functional member of the hexose transporter family"
+ /pseudo
+ /db_xref="GeneID:854636"
+ /db_xref="SGD:S000001432"
+ mRNA 19847..21220
+ /gene="HXT12"
+ /locus_tag="YIL170W"
+ /pseudo
+ /db_xref="GeneID:854636"
+ /db_xref="SGD:S000001432"
+ gene complement(<23119..>26106)
+ /gene="CSS1"
+ /locus_tag="YIL169C"
+ /db_xref="GeneID:854637"
+ mRNA complement(<23119..>26106)
+ /gene="CSS1"
+ /locus_tag="YIL169C"
+ /product="Css1p"
+ /transcript_id="NM_001179517.1"
+ /db_xref="GeneID:854637"
+ CDS complement(23119..26106)
+ /gene="CSS1"
+ /locus_tag="YIL169C"
+ /experiment="EXISTENCE:direct assay:GO:0005576
+ extracellular region [PMID:26928762]"
+ /experiment="EXISTENCE:direct assay:GO:0071944 cell
+ periphery [PMID:26928762]"
+ /note="hypothetical protein, secreted when constitutively
+ expressed; SWAT-GFP, seamless-GFP and mCherry fusion
+ proteins localize to the cell periphery, SWAT-GFP fusion
+ also localizes to the extracellular region, and mCherry
+ fusion also localizes to the vacuole; S/T rich and highly
+ similar to YOL155C, a putative glucan
+ alpha-1,4-glucosidase; transcript is induced in both high
+ and low pH environments; non-essential gene"
+ /codon_start=1
+ /product="Css1p"
+ /protein_id="NP_012097.1"
+ /db_xref="GeneID:854637"
+ /db_xref="SGD:S000001431"
+ /translation="MFNRLNKFQAALALALYSQSALGQYYSNSTSISSNSSSTSVVSS
+ SSGSVSISSSIAETSSSATDILSSITQSASSTSGVSSSVGPSSSSVVSSSVSQSSSSV
+ SDVSSSVSQSSSSASDVSSSVSQSASSTSDVSSSVSQSSSSASDVSSSVSQSSSSASD
+ VSSSVSQSASSASDVSSSVSQSASSTSDVSSSVSQSSSSASDVSSSVSQSSSSASDVS
+ SSVSQSASSTSDVSSSVSQSASSTSGVSSSGSQSVSSASGSSSSFPQSTSSASTASGS
+ ATSNSLSSITSSASSASATASNSLSSSDGTIYLPTTTISGDLTLTGKVIATEGVVVAA
+ GAKLTLLDGDKYSFSADLKVYGDLLVKKSKETYPGTEFDISGENFDVTGNFNAEESAA
+ TSASIYSFTPSSFDNSGDISLSLSKSKKGEVTFSPYSNSGAFSFSNAILNGGSVSGLQ
+ RRDDTEGSVNNGEINLDNGSTYVIVEPVSGKGTVNIISGNLYLHYPDTFTGQTVVFKG
+ EGVLAVDPTETNATPIPVVGYTGKNQIAITADITALSYDGTTGVLTATQGNRQFSFAI
+ GTGFSSSDFSVSEGIFAGAYAYYLNYNGVVATSAASSSTASGASASVTGSTSFGASVT
+ GSTASTSFGASVTGSTASTSFGASVTGSTSVYTTTLDYVNATSTVVVSCSETTDSNGN
+ VYTITTTVPCSSTTATITSCDETGCHVSTSTGAVVTETVSSKSYTTATVTHCDDNGCN
+ TKTVTSECSKETSATTASPKSYTTVTVTHCDDNGCNTKTVTSEAPEATTTTTVSSQSY
+ TTATVTHCDDNGCKTKTVTSEAPEATTTTVSPKTYTTATVTQCDDNGCSTKTVTSECP
+ EETSATTTSPKSYTTVTVTHCDDNGCNTKTVTSEAPEATTTTVSPKTYTTATVTQCDD
+ NGCSTKTVTSEAPKETSETSETSAAPKDIHYCHWLLNGDDNGCNVKIITSKIPEATST
+ VTQLVLLQSHTLLSLLRVLKQPH"
+ gene 29032..30048
+ /gene="SDL1"
+ /locus_tag="YIL167W"
+ /gene_synonym="YIL168W"
+ /pseudo
+ /db_xref="GeneID:854639"
+ CDS 29032..30048
+ /gene="SDL1"
+ /locus_tag="YIL167W"
+ /gene_synonym="YIL168W"
+ /note="Blocked reading frame otherwise encoding L-serine
+ dehydratase"
+ /pseudo
+ /codon_start=1
+ /db_xref="GeneID:854639"
+ /db_xref="SGD:S000001429"
+ gene complement(<30938..>32566)
+ /gene="SOA1"
+ /locus_tag="YIL166C"
+ /db_xref="GeneID:854640"
+ mRNA complement(<30938..>32566)
+ /gene="SOA1"
+ /locus_tag="YIL166C"
+ /product="Soa1p"
+ /transcript_id="NM_001179514.1"
+ /db_xref="GeneID:854640"
+ CDS complement(30938..32566)
+ /gene="SOA1"
+ /locus_tag="YIL166C"
+ /experiment="EXISTENCE:direct assay:GO:0000324 fungal-type
+ vacuole [PMID:26928762]"
+ /experiment="EXISTENCE:direct assay:GO:0071944 cell
+ periphery [PMID:26928762]"
+ /experiment="EXISTENCE:genetic interaction:GO:0000316
+ sulfite transport [PMID:28165463]"
+ /experiment="EXISTENCE:genetic interaction:GO:0008272
+ sulfate transport [PMID:28165463]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0005342 organic
+ acid transmembrane transporter activity [PMID:28165463]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0008514 organic
+ anion transmembrane transporter activity [PMID:28165463]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0015116 sulfate
+ transmembrane transporter activity [PMID:28165463]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0015711 organic
+ anion transport [PMID:28165463]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0015849 organic
+ acid transport [PMID:28165463]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0071705
+ nitrogen compound transport [PMID:28165463]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0072337
+ modified amino acid transport [PMID:28165463]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0072348 sulfur
+ compound transport [PMID:28165463]"
+ /experiment="EXISTENCE:mutant phenotype:GO:1901682 sulfur
+ compound transmembrane transporter activity
+ [PMID:28165463]"
+ /note="Sulfonate and inorganic sulfur transporter; low
+ affinity sulfate, high affinity sulfite, thiosulfite and
+ major sulfonate transporter; functions as a an H+
+ symporter; similar to the allantoate permease (Dal5p)
+ subfamily of the major facilitator superfamily; mRNA
+ expression is elevated by sulfur limitation; non-essential
+ gene"
+ /codon_start=1
+ /product="Soa1p"
+ /protein_id="NP_012100.1"
+ /db_xref="GeneID:854640"
+ /db_xref="SGD:S000001428"
+ /translation="MSVQKEEYDIVEKAQLSVSAESLTSDSESISHNPFDDFHKAERW
+ RKVYESSGYEGLSKFDPEFTWTKDEEKKLVRKMDLKIFLWVFIMFAFLDLIRKNIARA
+ VSDNFIVDLKMNTNDYNLGQTVYLVIFLASELPGNLLSKRFGPERVIPVQIVLWSVIC
+ ITQAGLKNRGQFIATRCLLGMVQGGFIPDNILYLSYYYTGAELTFRLSFFWCAIPLFQ
+ ILGSLLASGIIEMRGIHNLAGWQYLFIIEGFLSLSVGVASFYLMRRGPTQTGESAFHK
+ GKSLFTEYEEKIMVNRILRDDPSKGDMSNRQPVTFKEILYTLTEFDLWPLFIQGITAF
+ ISLQTVGSYLSLILKSLNYSTFLSNILAIPGQALLLINLPLAALLSRKLKEKSLCVGI
+ ANVWVLPFIVSLVALPTDTNPWIKYILLTGILGLPYTHSILAGWVSEISNSVRSRTVG
+ TALYNMSAQVGAIIASNMYRNDDKPYYTRGNKILLGFTCFNICMAVATKFYYISRNKY
+ KDRKWNSMTKEEQINYLDTTKDKGMKRLDYRFIH"
+ gene complement(<33718..>34077)
+ /locus_tag="YIL165C"
+ /db_xref="GeneID:854641"
+ mRNA complement(<33718..>34077)
+ /locus_tag="YIL165C"
+ /product="uncharacterized protein"
+ /transcript_id="NM_001179513.1"
+ /db_xref="GeneID:854641"
+ CDS complement(33718..34077)
+ /locus_tag="YIL165C"
+ /experiment="EXISTENCE:mutant phenotype:GO:0000422
+ autophagy of mitochondrion [PMID:19793921]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0016236
+ macroautophagy [PMID:19793921]"
+ /note="hypothetical protein; mutant exhibits mitophagy
+ defects; in closely related species and other S.
+ cerevisiae strain backgrounds YIL165C and adjacent ORF,
+ YIL164C, likely constitute a single ORF encoding a
+ nitrilase gene"
+ /codon_start=1
+ /product="uncharacterized protein"
+ /protein_id="NP_012101.1"
+ /db_xref="GeneID:854641"
+ /db_xref="SGD:S000001427"
+ /translation="MKNIAYEGRLFLISAVQFMPDATAMGFGEIIDQATGKRKLPGWP
+ SADDNCINGGSVIIDPYGEIIAGPLLGQEGLLTAEINTDLIAEARFDLDPVGHYARGD
+ VFQLTVNERSHDVKFTK"
+ gene complement(<34087..>34686)
+ /gene="NIT1"
+ /locus_tag="YIL164C"
+ /db_xref="GeneID:854642"
+ mRNA complement(<34087..>34686)
+ /gene="NIT1"
+ /locus_tag="YIL164C"
+ /product="Nit1p"
+ /transcript_id="NM_001179512.1"
+ /db_xref="GeneID:854642"
+ CDS complement(34087..34686)
+ /gene="NIT1"
+ /locus_tag="YIL164C"
+ /note="Nitrilase; member of the nitrilase branch of the
+ nitrilase superfamily; in closely related species and
+ other S. cerevisiae strain backgrounds YIL164C and
+ adjacent ORF, YIL165C, likely constitute a single ORF
+ encoding a nitrilase gene"
+ /codon_start=1
+ /product="Nit1p"
+ /protein_id="NP_012102.1"
+ /db_xref="GeneID:854642"
+ /db_xref="SGD:S000001426"
+ /translation="MAKHIVAALQIGSCPGSTKDTLKKILSYEKEIKESGAKLVVIPE
+ ATLGGYPKGSNFGVYLGYRLQEGREEYAKYLAEAIEIGNGEKYPEISQLCALSKATDA
+ SLCVGCIERDGTTLYCTMVYIDPKDGYVGKHRKLMPTAGERLIWGQGDGSTLPVVDTA
+ AGKIGGAICWENMMPLLRYAMYKKGVEIWCAPTVDARPI"
+ gene complement(<36899..>37252)
+ /locus_tag="YIL163C"
+ /db_xref="GeneID:854643"
+ mRNA complement(<36899..>37252)
+ /locus_tag="YIL163C"
+ /product="uncharacterized protein"
+ /transcript_id="NM_001270749.1"
+ /db_xref="GeneID:854643"
+ CDS complement(36899..37252)
+ /locus_tag="YIL163C"
+ /note="hypothetical protein; mRNA identified as translated
+ by ribosome profiling data"
+ /codon_start=1
+ /product="uncharacterized protein"
+ /protein_id="NP_001257678.1"
+ /db_xref="GeneID:854643"
+ /db_xref="SGD:S000001425"
+ /translation="MFLFRRKDYRIEIQKKKVVKSFFQMVYYRALRQHFCQTKSFKHS
+ SKRNVSMMVIGKHRAYLKSLRHHIKGFIITFLVSFSRNLHGKTLDVGSINATRISSPP
+ DNFLNWVFSFYSCSE"
+ gene <37385..>38983
+ /gene="SUC2"
+ /locus_tag="YIL162W"
+ /db_xref="GeneID:854644"
+ mRNA <37385..>38983
+ /gene="SUC2"
+ /locus_tag="YIL162W"
+ /product="beta-fructofuranosidase SUC2"
+ /transcript_id="NM_001179510.1"
+ /db_xref="GeneID:854644"
+ CDS 37385..38983
+ /gene="SUC2"
+ /locus_tag="YIL162W"
+ /EC_number="3.2.1.26"
+ /experiment="EXISTENCE:direct assay:GO:0000324 fungal-type
+ vacuole [PMID:26928762]"
+ /experiment="EXISTENCE:direct assay:GO:0004564
+ beta-fructofuranosidase activity
+ [PMID:363706|PMID:3902817]"
+ /experiment="EXISTENCE:direct assay:GO:0005576
+ extracellular region [PMID:6341817]"
+ /experiment="EXISTENCE:direct assay:GO:0005737 cytoplasm
+ [PMID:6341817]"
+ /experiment="EXISTENCE:direct assay:GO:0005739
+ mitochondrion [PMID:16823961|PMID:14576278]"
+ /experiment="EXISTENCE:direct assay:GO:0005987 sucrose
+ catabolic process [PMID:3902817|PMID:363706]"
+ /experiment="EXISTENCE:direct assay:GO:0051670 inulinase
+ activity [PMID:23104410]"
+ /experiment="EXISTENCE:direct assay:GO:0071944 cell
+ periphery [PMID:26928762]"
+ /experiment="EXISTENCE:direct assay:GO:1902927 inulin
+ catabolic process [PMID:23104410]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0004564
+ beta-fructofuranosidase activity [PMID:2834091]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0005987 sucrose
+ catabolic process [PMID:2834091]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0010147 fructan
+ catabolic process [PMID:23339519]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0034484
+ raffinose catabolic process [PMID:23339519]"
+ /note="Invertase; sucrose hydrolyzing enzyme; a secreted,
+ glycosylated form is regulated by glucose repression, and
+ an intracellular, nonglycosylated enzyme is produced
+ constitutively"
+ /codon_start=1
+ /product="beta-fructofuranosidase SUC2"
+ /protein_id="NP_012104.1"
+ /db_xref="GeneID:854644"
+ /db_xref="SGD:S000001424"
+ /translation="MLLQAFLFLLAGFAAKISASMTNETSDRPLVHFTPNKGWMNDPN
+ GLWYDEKDAKWHLYFQYNPNDTVWGTPLFWGHATSDDLTNWEDQPIAIAPKRNDSGAF
+ SGSMVVDYNNTSGFFNDTIDPRQRCVAIWTYNTPESEEQYISYSLDGGYTFTEYQKNP
+ VLAANSTQFRDPKVFWYEPSQKWIMTAAKSQDYKIEIYSSDDLKSWKLESAFANEGFL
+ GYQYECPGLIEVPTEQDPSKSYWVMFISINPGAPAGGSFNQYFVGSFNGTHFEAFDNQ
+ SRVVDFGKDYYALQTFFNTDPTYGSALGIAWASNWEYSAFVPTNPWRSSMSLVRKFSL
+ NTEYQANPETELINLKAEPILNISNAGPWSRFATNTTLTKANSYNVDLSNSTGTLEFE
+ LVYAVNTTQTISKSVFADLSLWFKGLEDPEEYLRMGFEVSASSFFLDRGNSKVKFVKE
+ NPYFTNRMSVNNQPFKSENDLSYYKVYGLLDQNILELYFNDGDVVSTNTYFMTTGNAL
+ GSVNMTTGVDNLFYIDKFQVREVK"
+ gene <39433..>40140
+ /gene="SMU2"
+ /locus_tag="YIL161W"
+ /db_xref="GeneID:854645"
+ mRNA <39433..>40140
+ /gene="SMU2"
+ /locus_tag="YIL161W"
+ /product="Smu2p"
+ /transcript_id="NM_001179509.1"
+ /db_xref="GeneID:854645"
+ CDS 39433..40140
+ /gene="SMU2"
+ /locus_tag="YIL161W"
+ /experiment="EXISTENCE:direct assay:GO:0005515 protein
+ binding [PMID:36113412]"
+ /experiment="EXISTENCE:direct assay:GO:0005737 cytoplasm
+ [PMID:36113412]"
+ /experiment="EXISTENCE:direct assay:GO:0070651
+ nonfunctional rRNA decay [PMID:36113412]"
+ /note="Protein involved in removal of stalled ribosomes;
+ acts as cofactor for Fap1p and Fpr1p in ubiquitination of
+ stalled monosomes and subsequent degradation; green
+ fluorescent protein (GFP)-fusion protein localizes to the
+ cytoplasm; mRNA is enriched in Scp160p-associated mRNPs;
+ YIL161W is a non-essential gene"
+ /codon_start=1
+ /product="Smu2p"
+ /protein_id="NP_012105.1"
+ /db_xref="GeneID:854645"
+ /db_xref="SGD:S000001423"
+ /translation="MDTKLSVTGAKKSQGKASGLGNEGTPIGNEESTNKAKNGNKKRN
+ KNRNRNKKTETKEQNEPKPVTGGEEVRVEKSQAKNRRRKNNNGANKKNTLHYSKEINV
+ EERKQIAKRQEEIEQCIHTLSDFKLFKKGKHVTSYGYRISPMTDSGKISLKILFNIPL
+ DYPKAPIKLTMKSNEEVSSYMDTVIANFNWKARQLVKEDWRILSQINYLVSELEILKM
+ ENYKQIDKLRNSFYKTI"
+ gene complement(<40191..>41444)
+ /gene="POT1"
+ /locus_tag="YIL160C"
+ /gene_synonym="FOX3; POX3"
+ /db_xref="GeneID:854646"
+ mRNA complement(<40191..>41444)
+ /gene="POT1"
+ /locus_tag="YIL160C"
+ /gene_synonym="FOX3; POX3"
+ /product="acetyl-CoA C-acyltransferase"
+ /transcript_id="NM_001179508.1"
+ /db_xref="GeneID:854646"
+ CDS complement(40191..41444)
+ /gene="POT1"
+ /locus_tag="YIL160C"
+ /gene_synonym="FOX3; POX3"
+ /EC_number="2.3.1.16"
+ /experiment="EXISTENCE:direct assay:GO:0003729 mRNA
+ binding [PMID:21124907]"
+ /experiment="EXISTENCE:direct assay:GO:0003988 acetyl-CoA
+ C-acyltransferase activity [PMID:7754706]"
+ /experiment="EXISTENCE:direct assay:GO:0005758
+ mitochondrial intermembrane space [PMID:22984289]"
+ /experiment="EXISTENCE:direct assay:GO:0005777 peroxisome
+ [PMID:19470242]"
+ /experiment="EXISTENCE:direct assay:GO:0005782 peroxisomal
+ matrix [PMID:7754706|PMID:8125978]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0005782
+ peroxisomal matrix [PMID:8125978]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0006635 fatty
+ acid beta-oxidation [PMID:12819196]"
+ /note="3-ketoacyl-CoA thiolase with broad chain length
+ specificity; cleaves 3-ketoacyl-CoA into acyl-CoA and
+ acetyl-CoA during beta-oxidation of fatty acids"
+ /codon_start=1
+ /product="acetyl-CoA C-acyltransferase"
+ /protein_id="NP_012106.1"
+ /db_xref="GeneID:854646"
+ /db_xref="SGD:S000001422"
+ /translation="MSQRLQSIKDHLVESAMGKGESKRKNSLLEKRPEDVVIVAANRS
+ AIGKGFKGAFKDVNTDYLLYNFLNEFIGRFPEPLRADLNLIEEVACGNVLNVGAGATE
+ HRAACLASGIPYSTPFVALNRQCSSGLTAVNDIANKIKVGQIDIGLALGVESMTNNYK
+ NVNPLGMISSEELQKNREAKKCLIPMGITNENVAANFKISRKDQDEFAANSYQKAYKA
+ KNEGLFEDEILPIKLPDGSICQSDEGPRPNVTAESLSSIRPAFIKDRGTTTAGNASQV
+ SDGVAGVLLARRSVANQLNLPVLGRYIDFQTVGVPPEIMGVGPAYAIPKVLEATGLQV
+ QDIDIFEINEAFAAQALYCIHKLGIDLNKVNPRGGAIALGHPLGCTGARQVATILREL
+ KKDQIGVVSMCIGTGMGAAAIFIKE"
+ gene <41825..>45952
+ /gene="BNR1"
+ /locus_tag="YIL159W"
+ /db_xref="GeneID:854647"
+ mRNA <41825..>45952
+ /gene="BNR1"
+ /locus_tag="YIL159W"
+ /product="formin BNR1"
+ /transcript_id="NM_001179507.1"
+ /db_xref="GeneID:854647"
+ CDS 41825..45952
+ /gene="BNR1"
+ /locus_tag="YIL159W"
+ /experiment="EXISTENCE:direct assay:GO:0005522 profilin
+ binding [PMID:15923184]"
+ /experiment="EXISTENCE:direct assay:GO:0005829 cytosol
+ [PMID:38656798]"
+ /experiment="EXISTENCE:direct assay:GO:0005935 cellular
+ bud neck [PMID:38656798|PMID:9774458]"
+ /experiment="EXISTENCE:direct assay:GO:0032153 cell
+ division site [PMID:26771880]"
+ /experiment="EXISTENCE:direct assay:GO:0045010 actin
+ nucleation [PMID:30076201]"
+ /experiment="EXISTENCE:direct assay:GO:0051016 barbed-end
+ actin filament capping [PMID:15923184]"
+ /experiment="EXISTENCE:direct assay:GO:0051017 actin
+ filament bundle assembly [PMID:15923184]"
+ /experiment="EXISTENCE:direct assay:GO:0070649
+ formin-nucleated actin cable assembly [PMID:15923184]"
+ /experiment="EXISTENCE:genetic interaction:GO:0032956
+ regulation of actin cytoskeleton organization
+ [PMID:17237521]"
+ /experiment="EXISTENCE:genetic interaction:GO:0051017
+ actin filament bundle assembly [PMID:23671312]"
+ /experiment="EXISTENCE:genetic interaction:GO:0070649
+ formin-nucleated actin cable assembly [PMID:11740491]"
+ /experiment="EXISTENCE:genetic interaction:GO:0071474
+ cellular hyperosmotic response [PMID:17237521]"
+ /experiment="EXISTENCE:genetic interaction:GO:1903475
+ mitotic actomyosin contractile ring assembly
+ [PMID:12419188]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0005522
+ profilin binding [PMID:15923184]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0051017 actin
+ filament bundle assembly [PMID:23671312]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0070649
+ formin-nucleated actin cable assembly [PMID:15371545]"
+ /note="Formin; nucleates the formation of linear actin
+ filaments; involved in processes such as budding and
+ mitotic spindle orientation which require the formation of
+ polarized actin cables; activity is regulated by Hof1p and
+ by the Bud14p-Kel1p-Kel2p complex; dephosphorylated and
+ delocalized from the division site in a
+ Glc7p/Ref2p-dependent manner; functionally redundant with
+ BNI1"
+ /codon_start=1
+ /product="formin BNR1"
+ /protein_id="NP_012107.1"
+ /db_xref="GeneID:854647"
+ /db_xref="SGD:S000001421"
+ /translation="MDSSPNKKTYRYPRRSLSLHARDRVSEARKLEELNLNDGLVAAG
+ LQLVGVALEKQGTGSHIYMKQKNFSANDVSSSPMVSEEVNGSEMDFNPKCMPQDASLV
+ ERMFDELLKDGTFFWGAAYKNLQNISLRRKWLLICKIRSSNHWGKKKVTSSTTYSTHL
+ ATNELAENAHFLDGLVRNLSTGGMKLSKALYKLEKFLRKQSFLQLFLKDEIYLTTLIE
+ KTLPLISKELQFVYLRCFKILMNNPLARIRALHSEPLIRWFTELLTDQNSNLKCQLLS
+ MELLLLLTYVEGSTGCELIWDQLSILFTDWLEWFDKILADDIAIHSSLYLNWNQLKID
+ YSTTFLLLINSILQGFNNKTALEILNFLKKNNIHNTITFLELAYKDDPNSVVIMEQIK
+ QFKSKESAIFDSMIKTTNDTNSLHPTKDIARIESEPLCLENCLLLKAKDSPVEAPINE
+ IIQSLWKILDSQKPYSESIKLLKLINSLLFYLIDSFQVSTNPSFDETLESAENVDYVF
+ QDSVNKLLDSLQSDEIARRAVTEIDDLNAKISHLNEKLNLVENHDKDHLIAKLDESES
+ LISLKTKEIENLKLQLKATKKRLDQITTHQRLYDQPPSLASSNLSIAGSIIKNNSHGN
+ IIFQNLAKKQQQQQKISLPKRSTSLLKSKRVTSLSSYLTDANNENESQNESEDKSKDS
+ LFQRSTSTINFNIPSMKNITNMQNVSLNSILSELEFSNSLGTQPNYQSSPVLSSVSSS
+ PKLFPRLSSDSLDNGIQLVPEVVKLPQLPPPPPPPPPPPLPQSLLTEAEAKPDGVSCI
+ AAPAPPPLPDLFKTKTCGAVPPPPPPPPLPESLSMNKGPSNHDLVTPPAPPLPNGLLS
+ SSSVSINPTTTDLKPPPTEKRLKQIHWDKVEDIKDTLWEDTFQRQETIKELQTDGIFS
+ QIEDIFKMKSPTKIANKRNAESSIALSSNNGKSSNELKKISFLSRDLAQQFGINLHMF
+ SQLSDMEFVMKVLNCDNDIVQNVNILKFFCKEELVNIPKSMLNKYEPYSQGKDGKAVS
+ DLQRADRIFLELCINLRFYWNARSKSLLTLSTYERDYYDLIFKLQKIDDAISHLNRSP
+ KFKSLMFIITEIGNHMNKRIVKGIKLKSLTKLAFVRSSIDQNVSFLHFIEKVIRIKYP
+ DIYGFVDDLKNIEDLGKISLEHVESECHEFHKKIEDLVTQFQVGKLSKEENLDPRDQI
+ IKKVKFKINRAKTKSELLIGQCKLTLIDLNKLMKYYGEDPKDKESKNEFFQPFIEFLA
+ MFKKCAKENIEKEEMERVYEQRKSLLDMRTSSNKKSNGSDENDGEKVNRDAVDLLISK
+ LREVKKDPEPLRRRKSTKLNEIAINVHEGDVKTRKDEDHVLLERTHAMLNDIQNI"
+ gene <46201..>46815
+ /gene="AIM20"
+ /locus_tag="YIL158W"
+ /db_xref="GeneID:854648"
+ mRNA <46201..>46815
+ /gene="AIM20"
+ /locus_tag="YIL158W"
+ /product="Aim20p"
+ /transcript_id="NM_001179506.3"
+ /db_xref="GeneID:854648"
+ CDS 46201..46815
+ /gene="AIM20"
+ /locus_tag="YIL158W"
+ /experiment="EXISTENCE:direct assay:GO:0000324 fungal-type
+ vacuole [PMID:14562095]"
+ /experiment="EXISTENCE:direct assay:GO:0005634 nucleus
+ [PMID:22842922]"
+ /experiment="EXISTENCE:direct assay:GO:0005737 cytoplasm
+ [PMID:22842922]"
+ /note="hypothetical protein; overexpression causes cell
+ cycle delay or arrest; green fluorescent protein
+ (GFP)-fusion protein localizes to vacuole; null mutant
+ displays elevated frequency of mitochondrial genome loss;
+ relocalizes from nucleus to cytoplasm upon DNA replication
+ stress; AIM20 has a paralog, SKG1, that arose from the
+ whole genome duplication"
+ /codon_start=1
+ /product="Aim20p"
+ /protein_id="NP_012108.3"
+ /db_xref="GeneID:854648"
+ /db_xref="SGD:S000001420"
+ /translation="MGNVSVAVGTAVGIPIAVGVIIALIFWCKLQRRYKKEEIRDADL
+ EKMVMEEVAVSVYDGFKAEINSSSEASTINEKEANQDLKPCQEKTAKAGYTPAYRRQL
+ NASMGTLRPKKQSTAYTNVPVIFSGEKVNYGMVRDPSYSFMYPLTLSRKETSSLRSAS
+ TSNLSSSTENTALHEEIKLDDPYENDFTNYTVNKREFIDSLRPH"
+ gene complement(<46949..>47542)
+ /gene="COA1"
+ /locus_tag="YIL157C"
+ /db_xref="GeneID:854649"
+ mRNA complement(<46949..>47542)
+ /gene="COA1"
+ /locus_tag="YIL157C"
+ /product="Coa1p"
+ /transcript_id="NM_001179505.1"
+ /db_xref="GeneID:854649"
+ CDS complement(46949..47542)
+ /gene="COA1"
+ /locus_tag="YIL157C"
+ /experiment="EXISTENCE:direct assay:GO:0005739
+ mitochondrion
+ [PMID:14562095|PMID:14576278|PMID:16823961|PMID:24769239]"
+ /experiment="EXISTENCE:direct assay:GO:0005743
+ mitochondrial inner membrane [PMID:17882259]"
+ /experiment="EXISTENCE:direct assay:GO:0031966
+ mitochondrial membrane [PMID:17882260]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0033617
+ mitochondrial cytochrome c oxidase assembly
+ [PMID:17882260|PMID:17882259]"
+ /experiment="EXISTENCE:physical interaction:GO:0033617
+ mitochondrial cytochrome c oxidase assembly
+ [PMID:17882259]"
+ /note="Mitochondrial inner membrane protein; required for
+ assembly of the cytochrome c oxidase complex (complex IV);
+ interacts with complex IV assembly factor Shy1p during the
+ early stages of assembly"
+ /codon_start=1
+ /product="Coa1p"
+ /protein_id="NP_012109.1"
+ /db_xref="GeneID:854649"
+ /db_xref="SGD:S000001419"
+ /translation="MMLRLVTKGLPKVTPSAAKAVLVRGSLLHSFSTSARFNNSVAED
+ EAKIVLKDKNRPLRIDRELPDPTTERRKRIAGFLLFSVAIGSALSLIFNYEKTESPII
+ SNTLYYIRRSPATKNILGESIEFDGIIPWVYGELNSVKGRINITFYIKGDKNVTGTVR
+ LVADRNTHDEEFLIHEWSVTAAGQKIDLLAENTKTPI"
+ gene <47690..>47973
+ /gene="ATG44"
+ /locus_tag="YIL156W-B"
+ /gene_synonym="MCO8; MDI1"
+ /db_xref="GeneID:3628034"
+ mRNA join(<47690..47698,47761..>47973)
+ /gene="ATG44"
+ /locus_tag="YIL156W-B"
+ /gene_synonym="MCO8; MDI1"
+ /product="mitofissin"
+ /transcript_id="NM_001184521.1"
+ /db_xref="GeneID:3628034"
+ CDS join(47690..47698,47761..47973)
+ /gene="ATG44"
+ /locus_tag="YIL156W-B"
+ /gene_synonym="MCO8; MDI1"
+ /experiment="EXISTENCE:direct assay:GO:0005758
+ mitochondrial intermembrane space [PMID:37192628]"
+ /experiment="EXISTENCE:direct assay:GO:0008289 lipid
+ binding [PMID:37192628]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0000266
+ mitochondrial fission [PMID:37192628]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0000423
+ mitophagy [PMID:37192628]"
+ /note="Mitochondrial fission factor (mitofissin) essential
+ for mitophagy; directly binds to lipid membranes and
+ brings about lipid membrane fragility to facilitate
+ membrane fission; S. pombe homolog complements null
+ mutant"
+ /codon_start=1
+ /product="mitofissin"
+ /protein_id="NP_001027534.1"
+ /db_xref="GeneID:3628034"
+ /db_xref="SGD:S000028511"
+ /translation="MTLVGKLVHISIDLVLVSTCLAGIKRNTGLTPKLETLDNQTMRN
+ YMKRYLNLGESVYDYSVATCGSSTYFARK"
+ gene <48091..>51306
+ /gene="UBP7"
+ /locus_tag="YIL156W"
+ /db_xref="GeneID:854650"
+ mRNA <48091..>51306
+ /gene="UBP7"
+ /locus_tag="YIL156W"
+ /product="ubiquitin-specific protease UBP7"
+ /transcript_id="NM_001179504.1"
+ /db_xref="GeneID:854650"
+ CDS 48091..51306
+ /gene="UBP7"
+ /locus_tag="YIL156W"
+ /EC_number="3.4.19.12"
+ /experiment="EXISTENCE:curator inference:GO:0005737
+ cytoplasm [PMID:8982460]"
+ /experiment="EXISTENCE:direct assay:GO:0004843
+ cysteine-type deubiquitinase activity [PMID:24746795]"
+ /experiment="EXISTENCE:direct assay:GO:0016579 protein
+ deubiquitination [PMID:24746795]"
+ /note="Ubiquitin-specific protease that cleaves
+ ubiquitin-protein fusions; involved in cell cycle
+ progression through S phase; UBP7 has a paralog, UBP11,
+ that arose from the whole genome duplication"
+ /codon_start=1
+ /product="ubiquitin-specific protease UBP7"
+ /protein_id="NP_012110.1"
+ /db_xref="GeneID:854650"
+ /db_xref="SGD:S000001418"
+ /translation="MLDDDKGTAMHPHITPFTPEYSNELLRRVQDLYHEDIKHYYPQL
+ KLEKLLDLLEHTEYLFELYLDSIHHDRPNDALTAFIIGCYYVFLIIPQSLQFQTRNKS
+ YSIYTDLKKMYENEMNMTNVVLMVKKEIGVVLDESVKHGAGIEHRITKKRAFSVPADD
+ LSGQVASLSLDTAAPQDHGLKGTFTEDDAEQSSPVWTAPNLEPNDQLKLALLPEVIPT
+ PAFREPERKTSVPVRPSVLLEDVPSIYHEDDTSFASLNPPFREITADRSVTHRKDSYH
+ SVYMVDSGNLKEDNDDLFNVENDGFIQSLDILQKQSIITAPELFSILSNRVEREKVLL
+ IDLRIPQRSAINHIVAPNLVNVDPNLLWDKQTNTPIYKDDILEHLLKENENFINRNKF
+ DYIVYYTDVKTFMTINFDYAFIFFYLMLTSQKTPLTTVPTTLLGGYEKWKKTLHSYAQ
+ EYHISIEDYLYRPYSQKARLQQEQQQQQQQPDSQDSFSAKESSTKVPEPPSWKPPDLP
+ IRLRKRPPPPPPVSMPTTPEIPPPLPPKIMVHSQVSSISRKPPIPAKQHVKKEQLNSN
+ EIIQRKRQHQHQHYDQQILQPQRAYNIPTIERSPNVYVSLSITGLRNLGNTCYINSMI
+ QCLFAAKTFRTLFISSKYKSYLQPIRSNGSHYSPKLSNSLSMLFNKMYLNGGCSVVPT
+ GFLKVINQLRPDLKIPDDQQDTQEFLMILLDRLHDELSDQQHVANDYPNLLLYNADAL
+ KVSNNEYKHWFDKNVIGNGISPIDDIFQGQMENSLQCKRCGYTTFNYSTFYVLSLAIP
+ RRSMKLSKLGRSTEKRVKLEDCINMFTSDEVLSGENAWDCPRCGPTASVSTSVSALEN
+ EPSIVKSKKKKSRFFTLHTGTKRRHLDFFGDGITEGHNSNNNNTTIFERERSRSPFRM
+ LGGSGKRSSSSTPFSTGGNDSNNSSDYKNKKLTTVKTINFVTLPKILVIHLSRFYYDL
+ TKKNNTVVTYPLILNIILKNNDTMKYKLFGVVNHTGTLISGHYTSLVNKDLEHNVNIG
+ RSKWYYFDDEVVKADRKHGSDKNLKISSSDVYVLFYERVYD"
+ gene complement(<51759..>53708)
+ /gene="GUT2"
+ /locus_tag="YIL155C"
+ /db_xref="GeneID:854651"
+ mRNA complement(<51759..>53708)
+ /gene="GUT2"
+ /locus_tag="YIL155C"
+ /product="glycerol-3-phosphate dehydrogenase"
+ /transcript_id="NM_001179503.1"
+ /db_xref="GeneID:854651"
+ CDS complement(51759..53708)
+ /gene="GUT2"
+ /locus_tag="YIL155C"
+ /EC_number="1.1.5.3"
+ /experiment="EXISTENCE:direct assay:GO:0004368
+ glycerol-3-phosphate dehydrogenase (quinone) activity
+ [PMID:12032156]"
+ /experiment="EXISTENCE:direct assay:GO:0005739
+ mitochondrion
+ [PMID:24769239|PMID:14576278|PMID:16823961|PMID:11502169]"
+ /experiment="EXISTENCE:direct assay:GO:0005741
+ mitochondrial outer membrane [PMID:16689936]"
+ /experiment="EXISTENCE:direct assay:GO:0006116 NADH
+ oxidation [PMID:12032156]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0004368
+ glycerol-3-phosphate dehydrogenase (quinone) activity
+ [PMID:8256521]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0005739
+ mitochondrion [PMID:8256521]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0006071
+ glycerol metabolic process [PMID:8256521]"
+ /note="Mitochondrial glycerol-3-phosphate dehydrogenase;
+ expression is repressed by both glucose and cAMP and
+ derepressed by non-fermentable carbon sources in a Snf1p,
+ Rsf1p, Hap2/3/4/5 complex dependent manner"
+ /codon_start=1
+ /product="glycerol-3-phosphate dehydrogenase"
+ /protein_id="NP_012111.1"
+ /db_xref="GeneID:854651"
+ /db_xref="SGD:S000001417"
+ /translation="MFSVTRRRAAGAAAAMATATGTLYWMTSQGDRPLVHNDPSYMVQ
+ FPTAAPPQVSRRDLLDRLAKTHQFDVLIIGGGATGTGCALDAATRGLNVALVEKGDFA
+ SGTSSKSTKMIHGGVRYLEKAFWEFSKAQLDLVIEALNERKHLINTAPHLCTVLPILI
+ PIYSTWQVPYIYMGCKFYDFFAGSQNLKKSYLLSKSATVEKAPMLTTDNLKASLVYHD
+ GSFNDSRLNATLAITAVENGATVLNYVEVQKLIKDPTSGKVIGAEARDVETNELVRIN
+ AKCVVNATGPYSDAILQMDRNPSGLPDSPLNDNSKIKSTFNQIAVMDPKMVIPSIGVH
+ IVLPSFYCPKDMGLLDVRTSDGRVMFFLPWQGKVLAGTTDIPLKQVPENPMPTEADIQ
+ DILKELQHYIEFPVKREDVLSAWAGVRPLVRDPRTIPADGKKGSATQGVVRSHFLFTS
+ DNGLITIAGGKWTTYRQMAEETVDKVVEVGGFHNLKPCHTRDIKLAGAEEWTQNYVAL
+ LAQNYHLSSKMSNYLVQNYGTRSSIICEFFKESMENKLPLSLADKENNVIYSSEENNL
+ VNFDTFRYPFTIGELKYSMQYEYCRTPLDFLLRRTRFAFLDAKEALNAVHATVKVMGD
+ EFNWSEKKRQWELEKTVNFIKTFGV"
+ gene complement(<53981..>55021)
+ /gene="IMP21"
+ /locus_tag="YIL154C"
+ /gene_synonym="IMP2"
+ /db_xref="GeneID:854652"
+ mRNA complement(<53981..>55021)
+ /gene="IMP21"
+ /locus_tag="YIL154C"
+ /gene_synonym="IMP2"
+ /product="Imp21p"
+ /transcript_id="NM_001179502.3"
+ /db_xref="GeneID:854652"
+ CDS complement(53981..55021)
+ /gene="IMP21"
+ /locus_tag="YIL154C"
+ /gene_synonym="IMP2"
+ /experiment="EXISTENCE:direct assay:GO:0003713
+ transcription coactivator activity [PMID:8628275]"
+ /experiment="EXISTENCE:direct assay:GO:0005737 cytoplasm
+ [PMID:14562095]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0000429 carbon
+ catabolite regulation of transcription from RNA polymerase
+ II promoter [PMID:7496532]"
+ /note="Transcriptional activator involved in maintenance
+ of ion homeostasis; also involved in protection against
+ DNA damage caused by bleomycin and other oxidants;
+ contains a C-terminal leucine-rich repeat"
+ /codon_start=1
+ /product="Imp21p"
+ /protein_id="NP_012112.3"
+ /db_xref="GeneID:854652"
+ /db_xref="SGD:S000001416"
+ /translation="MQKSILLTKPDGTQSNLHSIKTETPTTVEFDSEQMERGHRERGR
+ SKKKRGERDSNVSSLSRSRSRASSRSRVREEEFLKWTVLRQDPSMRLRVVDVDSEEEG
+ EGNDEDDDDGDGDDMDEEESDEEQVSDIENDLEIDEEFHYDLGMKVLPNFCTSINEVL
+ DSSKPWIAKYEISIRGHENEDVSLEQLDGGYVRAMQLLTKGAGAEAGNQRSFILYTDL
+ SSESTYALTYLMGAAVNQGDTVYIVHWEPSKPTDDSQMFANVARIRKHVMHLFDCVAG
+ VLDDLDVVVLSLTHPYPKHLLNEMIHGLKPVALCCSLSVILSTLQNFVCSVPILAVRK
+ KLKRAKRKGISE"
+ gene <55198..>56379
+ /gene="RRD1"
+ /locus_tag="YIL153W"
+ /gene_synonym="YPA1"
+ /db_xref="GeneID:854653"
+ mRNA <55198..>56379
+ /gene="RRD1"
+ /locus_tag="YIL153W"
+ /gene_synonym="YPA1"
+ /product="peptidylprolyl isomerase RRD1"
+ /transcript_id="NM_001179501.1"
+ /db_xref="GeneID:854653"
+ CDS 55198..56379
+ /gene="RRD1"
+ /locus_tag="YIL153W"
+ /gene_synonym="YPA1"
+ /EC_number="5.2.1.8"
+ /experiment="EXISTENCE:direct assay:GO:0000785 chromatin
+ [PMID:21129186]"
+ /experiment="EXISTENCE:direct assay:GO:0003755
+ peptidyl-prolyl cis-trans isomerase activity
+ [PMID:16380387]"
+ /experiment="EXISTENCE:direct assay:GO:0005634 nucleus
+ [PMID:15150670]"
+ /experiment="EXISTENCE:direct assay:GO:0005737 cytoplasm
+ [PMID:15150670]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0006281 DNA
+ repair [PMID:9705277|PMID:15150670]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0006357
+ regulation of transcription by RNA polymerase II
+ [PMID:21129186]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0006914
+ autophagy [PMID:31962153]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0007052 mitotic
+ spindle organization [PMID:11262194]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0019888 protein
+ phosphatase regulator activity [PMID:12952889]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0033554
+ cellular response to stress [PMID:21129186]"
+ /experiment="EXISTENCE:physical interaction:GO:0000082
+ G1/S transition of mitotic cell cycle [PMID:11134337]"
+ /experiment="EXISTENCE:physical interaction:GO:0006357
+ regulation of transcription by RNA polymerase II
+ [PMID:21129186]"
+ /experiment="EXISTENCE:physical interaction:GO:0033554
+ cellular response to stress [PMID:21129186]"
+ /note="Peptidyl-prolyl cis/trans-isomerase; activator of
+ the phosphotyrosyl phosphatase activity of PP2A; involved
+ in G1 phase progression, microtubule dynamics, bud
+ morphogenesis and DNA repair; required for rapid reduction
+ of Sgs1p levels in response to rapamycin; subunit of the
+ Tap42p-Sit4p-Rrd1p complex; protein increases in abundance
+ and relative distribution to the nucleus increases upon
+ DNA replication stress"
+ /codon_start=1
+ /product="peptidylprolyl isomerase RRD1"
+ /protein_id="NP_012113.1"
+ /db_xref="GeneID:854653"
+ /db_xref="SGD:S000001415"
+ /translation="MSLDRVDWPHATFSTPVKRIFDTQTTLDFQSSLAIHRIKYHLHK
+ YTTLISHCSDPDPHATASSIAMVNGLMGVLDKLAHLIDETPPLPGPRRYGNLACREWH
+ HKLDERLPQWLQEMLPSEYHEVVPELQYYLGNSFGSSTRLDYGTGHELSFMATVAALD
+ MLGMFPHMRGADVFLLFNKYYTIMRRLILTYTLEPAGSHGVWGLDDHFHLVYILGSSQ
+ WQLLDAQAPLQPREILDKSLVREYKDTNFYCQGINFINEVKMGPFEEHSPILYDIAVT
+ VPRWSKVCKGLLKMYSVEVLKKFPVVQHFWFGTGFFPWVNIQNGTDLPVFEEKEEESI
+ EQANAGSPGREQTSTRFPTSTSMPPPGVPPSGNNINYLLSHQNQSHRNQTSFSRDRLR
+ R"
+ gene <56545..>57252
+ /gene="VPR1"
+ /locus_tag="YIL152W"
+ /db_xref="GeneID:854654"
+ mRNA <56545..>57252
+ /gene="VPR1"
+ /locus_tag="YIL152W"
+ /product="Vpr1p"
+ /transcript_id="NM_001179500.1"
+ /db_xref="GeneID:854654"
+ CDS 56545..57252
+ /gene="VPR1"
+ /locus_tag="YIL152W"
+ /note="hypothetical protein"
+ /codon_start=1
+ /product="Vpr1p"
+ /protein_id="NP_012114.1"
+ /db_xref="GeneID:854654"
+ /db_xref="SGD:S000001414"
+ /translation="MSHKRRGLVIYQDQKQQQQHPPGQSLSSISWSPTRRPHHPLKQQ
+ STNSFSEILSKSSVQPNVQHDGNHMPISLLVLKQEHHKQQQQQQQRQNIRSQNSTPPL
+ RQLVQESQWTSSASNSSLKKQEKQPQTFYNTDSKLVSQLHSSVKDLDAIIQTHKPKFD
+ TIIRDFSQATILSSNELLIKLPKDQTIILHSRAPKINAEWLQNKVNDPSASLVIDSRS
+ FLTLCNNIKWYLHWKFI"
+ gene complement(<57338..>60694)
+ /gene="ESL1"
+ /locus_tag="YIL151C"
+ /db_xref="GeneID:854655"
+ mRNA complement(<57338..>60694)
+ /gene="ESL1"
+ /locus_tag="YIL151C"
+ /product="Esl1p"
+ /transcript_id="NM_001179499.1"
+ /db_xref="GeneID:854655"
+ CDS complement(57338..60694)
+ /gene="ESL1"
+ /locus_tag="YIL151C"
+ /note="hEST1A/B (SMG5/6)-like protein; contributes to
+ environment-sensing adaptive gene expression responses;
+ Esl1p and Esl2p contain a 14-3-3-like domain and a
+ putative PilT N-terminus ribonuclease domain; ESL1 has a
+ paralog, ESL2, that arose from the whole genome
+ duplication"
+ /codon_start=1
+ /product="Esl1p"
+ /protein_id="NP_012115.1"
+ /db_xref="GeneID:854655"
+ /db_xref="SGD:S000001413"
+ /translation="MVDLMVPANDDPSNETDYSRSNNNHTHIVSDMRPTSAAFLHQKR
+ HSSSSHNDTPESSFAKRRVPGIVDPVGKGFIDGITNSQISAQNTPSKTDDASRRPSIS
+ RKVMESTPQVKTSSIPTMDVPKSPYYVNRTMLARNMKVVSRDTYEDNANPQMRADEPL
+ VASNGIYSNSQPQSQVTLSDIRRAPVVAASPPPMIRQLPSAQPNQTFIKKLQEIYKII
+ VVQETELQQRCLYLTTSQTTELKSLWAIYRLNTELIKNYINFIITALLTTQPINDLIM
+ GQEILDIYRIEKRLWVYGIITFLDVLKNFSNFMDPEVCCQFIIYAFISVSNMLEDIPL
+ KYSILWRQRLGDLSRMAISLYPSGFIDWRLSAEYWYTESMKYIYGCGKLYYHIATVQQ
+ NSLEAFVNLGKSVFCQDPFTPSQQTLQLLIENIYQSAFIDRSSGSANNNEIAHRNSQL
+ IDYLKHTEVMLLPSFLENMDLQHVVLMYFKDKFGKDFNGNDVFDTKDMFCQNPESLRY
+ YFRHAPAFAESQLLQLIGFGNPKNPFALLFQLPKYLKLKKDKREKKRSEATETSSYTD
+ PFDVQISSESYFQNIDALNSSFNDIPTNLNIWLDSLNHINMTSIQCSIHVLTKFLHAP
+ LVVALPHFLTWLHFIVAILKKLEMVNSKQVVAFWIHFLRRTMPWNSIVTLGNVLVCYM
+ LDNLHPFLKKELEKFYSLELDDLIEYYNENENLPEIWKCWGTLWFDAIKKCDVMEIPG
+ VQDHLFFDSPLDGIVFDEKDEVGEKFWMRSVRAVLLLKGIAKKFPDLGLKVSFQASVF
+ CRRNDIPPDYFLKNLTFKLDAYDEDNYNDNNELDDLYDTIEINEEIEAVNMDPQATPN
+ LSVVSGESIFEYTGYTRLAPDYHCFDKNGGFNSAFIYSQWSNVGNGVTLDVSGESIYD
+ VANNNLSLHWEKIFFDKIAAASKGSDENYNCTLYFVIDATSWLRHFAHIFKLAKNNTL
+ KFAICLTTFQELRYLRGSKDDTVVEAATRSVITIRQLYDEKKIIPMRFTGNIATHVEE
+ NLEFEEQITWKTHVDEFVIDAIAKLNQRFQAERLTDENKNKGKEFAVLVTDDDNMNQK
+ AKDRMIKTCNTKYLFSLGSKLGINSGLCTN"
+ gene complement(<61013..>62728)
+ /gene="MCM10"
+ /locus_tag="YIL150C"
+ /gene_synonym="DNA43"
+ /db_xref="GeneID:854656"
+ mRNA complement(<61013..>62728)
+ /gene="MCM10"
+ /locus_tag="YIL150C"
+ /gene_synonym="DNA43"
+ /product="Mcm10p"
+ /transcript_id="NM_001179498.1"
+ /db_xref="GeneID:854656"
+ CDS complement(61013..62728)
+ /gene="MCM10"
+ /locus_tag="YIL150C"
+ /gene_synonym="DNA43"
+ /experiment="EXISTENCE:direct assay:GO:0003688 DNA
+ replication origin binding [PMID:15494305|PMID:10783164]"
+ /experiment="EXISTENCE:direct assay:GO:0003690
+ double-stranded DNA binding [PMID:19605346]"
+ /experiment="EXISTENCE:direct assay:GO:0003697
+ single-stranded DNA binding [PMID:27751725|PMID:19605346]"
+ /experiment="EXISTENCE:direct assay:GO:0005634 nucleus
+ [PMID:9154825|PMID:11168584|PMID:13680157]"
+ /experiment="EXISTENCE:direct assay:GO:0031298 replication
+ fork protection complex [PMID:16531994]"
+ /experiment="EXISTENCE:direct assay:GO:1902971 mitotic DNA
+ replication DNA duplex unwinding [PMID:38109526]"
+ /experiment="EXISTENCE:genetic interaction:GO:0006270 DNA
+ replication initiation [PMID:10783164]"
+ /experiment="EXISTENCE:genetic interaction:GO:0006271 DNA
+ strand elongation involved in DNA replication
+ [PMID:10783164|PMID:11168584]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0000727
+ double-strand break repair via break-induced replication
+ [PMID:20516198]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0005657
+ replication fork [PMID:15494305]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0006270 DNA
+ replication initiation [PMID:9154825|PMID:10783164]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0006271 DNA
+ strand elongation involved in DNA replication
+ [PMID:10783164|PMID:11168584|PMID:9154825]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0030466 silent
+ mating-type cassette heterochromatin formation
+ [PMID:16085704|PMID:16328881]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0031509
+ subtelomeric heterochromatin formation [PMID:16085704]"
+ /note="Essential chromatin-associated protein; involved in
+ initiation of DNA replication; promotes origin duplex
+ unwinding by head-to-head CMG helicase complexes; required
+ for association of MCM2-7 complex with replication
+ origins; required to stabilize the catalytic subunit of
+ DNA polymerase-alpha; coordinates function of the
+ replication fork helicase; self-associates through its
+ N-terminal domain"
+ /codon_start=1
+ /product="Mcm10p"
+ /protein_id="NP_012116.1"
+ /db_xref="GeneID:854656"
+ /db_xref="SGD:S000001412"
+ /translation="MNDPREILAVDPYNNITSDEEDEQAIARELEFMERKRQALVERL
+ KRKQEFKKPQDPNFEAIEVPQSPTKNRVKVGSHNATQQGTKFEGSNINEVRLSQLQQQ
+ PKPPASTTTYFMEKFQNAKKNEDKQIAKFESMMNARVHTFSTDEKKYVPIITNELESF
+ SNLWVKKRYIPEDDLKRALHEIKILRLGKLFAKIRPPKFQEPEYANWATVGLISHKSD
+ IKFTSSEKPVKFFMFTITDFQHTLDVYIFGKKGVERYYNLRLGDVIAILNPEVLPWRP
+ SGRGNFIKSFNLRISHDFKCILEIGSSRDLGWCPIVNKKTHKKCGSPINISLHKCCDY
+ HREVQFRGTSAKRIELNGGYALGAPTKVDSQPSLYKAKGENGFNIIKGTRKRLSEEEE
+ RLKKSSHNFTNSNSAKAFFDEKFQNPDMLANLDNKRRKIIETKKSTALSRELGKIMRR
+ RESSGLEDKSVGERQKMKRTTESALQTGLIQRLGFDPTHGKISQVLKSSVSGSEPKNN
+ LLGKKKTVINDLLHYKKEKVILAPSKNEWFKKRSHREEVWQKHFGSKETKETSDGSAS
+ DLEII"
+ gene complement(<63028..>68067)
+ /gene="MLP2"
+ /locus_tag="YIL149C"
+ /db_xref="GeneID:854657"
+ mRNA complement(<63028..>68067)
+ /gene="MLP2"
+ /locus_tag="YIL149C"
+ /product="Mlp2p"
+ /transcript_id="NM_001179497.3"
+ /db_xref="GeneID:854657"
+ CDS complement(63028..68067)
+ /gene="MLP2"
+ /locus_tag="YIL149C"
+ /experiment="EXISTENCE:direct assay:GO:0005635 nuclear
+ envelope [PMID:10085285]"
+ /experiment="EXISTENCE:direct assay:GO:0005654 nucleoplasm
+ [PMID:10085285]"
+ /experiment="EXISTENCE:direct assay:GO:0005739
+ mitochondrion [PMID:16823961|PMID:14576278]"
+ /experiment="EXISTENCE:direct assay:GO:0044615 nuclear
+ pore nuclear basket [PMID:24152732]"
+ /experiment="EXISTENCE:genetic interaction:GO:0006355
+ regulation of DNA-templated transcription [PMID:15692572]"
+ /experiment="EXISTENCE:genetic interaction:GO:0006606
+ protein import into nucleus [PMID:10085285]"
+ /experiment="EXISTENCE:genetic interaction:GO:0034398
+ telomere tethering at nuclear periphery [PMID:10638763]"
+ /experiment="EXISTENCE:genetic interaction:GO:0043021
+ ribonucleoprotein complex binding [PMID:15692572]"
+ /experiment="EXISTENCE:genetic interaction:GO:1901925
+ negative regulation of protein import into nucleus during
+ spindle assembly checkpoint [PMID:23177738]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0000972
+ transcription-dependent tethering of RNA polymerase II
+ gene DNA at nuclear periphery [PMID:20932479]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0000973
+ post-transcriptional tethering of RNA polymerase II gene
+ DNA at nuclear periphery [PMID:20932479]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0016973
+ poly(A)+ mRNA export from nucleus [PMID:21036941]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0051300 spindle
+ pole body organization [PMID:16027220]"
+ /experiment="EXISTENCE:physical interaction:GO:0005816
+ spindle pole body [PMID:16027220]"
+ /experiment="EXISTENCE:physical interaction:GO:0043021
+ ribonucleoprotein complex binding [PMID:15692572]"
+ /note="Myosin-like protein associated with the nuclear
+ envelope; nuclear basket protein that connects the nuclear
+ pore complex with the nuclear interior; involved in the
+ Tel1p pathway that controls telomere length; MLP2 has a
+ paralog, MLP1, that arose from the whole genome
+ duplication"
+ /codon_start=1
+ /product="Mlp2p"
+ /protein_id="NP_012117.3"
+ /db_xref="GeneID:854657"
+ /db_xref="SGD:S000001411"
+ /translation="MEDKISEFLNVPFESLQGVTYPVLRKLYKKIAKFERSEEEVTKL
+ NVLVDEIKSQYYSRISKLKQLLDESSEQKNTAKEELNGLKDQLNEERSRYRREIDALK
+ KQLHVSHEAMREVNDEKRVKEEYDIWQSRDQGNDSLNDDLNKENKLLRRKLMEMENIL
+ QRCKSNAISLQLKYDTSVQEKELMLQSKKLIEEKLSSFSKKTLTEEVTKSSHVENLEE
+ KLYQMQSNYESVFTYNKFLLNQNKQLSQSVEEKVLEMKNLKDTASVEKAEFSKEMTLQ
+ KNMNDLLRSQLTSLEKDCSLRAIEKNDDNSCRNPEHTDVIDELIDTKLRLEKSKNECQ
+ RLQNIVMDCTKEEEATMTTSAVSPTVGKLFSDIKVLKRQLIKERNQKFQLQNQLEDFI
+ LELEHKTPELISFKERTKSLEHELKRSTELLETVSLTKRKQEREITSLRQKINGCEAN
+ IHSLVKQRLDLARQVKLLLLNTSAIQETASPLSQDELISLRKILESSNIVNENDSQAI
+ ITERLVEFSNVNELQEKNVELLNCIRILADKLENYEGKQDKTLQKVENQTIKEAKDAI
+ IELENINAKMETRINILLRERDSYKLLASTEENKANTNSVTSMEAAREKKIRELEAEL
+ SSTKVENSAIIQNLRKELLIYKKSQCKKKTTLEDFENFKGLAKEKERMLEEAIDHLKA
+ ELEKQKSWVPSYIHVEKERASTELSQSRIKIKSLEYEISKLKKETASFIPTKESLTRD
+ FEQCCKEKKELQMRLKESEISHNENKMDFSSKEGQYKAKIKELENNLERLRSDLQSKI
+ QEIESIRSCKDSQLKWAQNTIDDTEMKMKSLLTELSNKETTIEKLSSEIENLDKELRK
+ TKFQYKFLDQNSDASTLEPTLRKELEQIQVQLKDANSQIQAYEEIISSNENALIELKN
+ ELAKTKENYDAKIELEKKEKWAREEDLSRLRGELGEIRALQPKLKEGALHFVQQSEKL
+ RNEVERIQKMIEKIEKMSTIVQLCKKKEMSQYQSTMKENKDLSELVIRLEKDAADCQA
+ ELTKTKSSLYSAQDLLDKHERKWMEEKADYERELISNIEQTESLRVENSVLIEKVDDT
+ AANNGDKDHLKLVSLFSNLRHERNSLETKLTTCKRELAFVKQKNDSLEKTINDLQRTQ
+ TLSEKEYQCSAVIIDEFKDITKEVTQVNILKENNAILQKSLKNVTEKNREIYKQLNDR
+ QEEISRLQRDLIQTKEQVSINSNKILVYESEMEQCKQRYQDLSQQQKDAQKKDIEKLT
+ NEISDLKGKLSSAENANADLENKFNRLKKQAHEKLDASKKQQAALTNELNELKAIKDK
+ LEQDLHFENAKVIDLDTKLKAHELQSEDVSRDHEKDTYRTLMEEIESLKKELQIFKTA
+ NSSSDAFEKLKVNMEKEKDRIIDERTKEFEKKLQETLNKSTSSEAEYSKDIETLKKEW
+ LKEYEDETLRRIKEAEENLKKRIRLPSEERIQKIISKRKEELEEEFRKKLKENAGSLT
+ FLDNKGSGEDAEEELWNSPSKGNSERPSAVAGFINQKNLKPQEQLKNVKNDVSFNDSQ
+ SMVTNKENNIVDSSAAGNKAIPTFSFGKPFFSSNTSSLQSFQNPFTASQSNINTNAPL
+ RTLNIQPEVAVKAAINFSNVTDLTNNSTDGAKITEIGSTSKRPIESGTSSDPDTKKVK
+ ESPANDQASNE"
+ gene <68708..>69528
+ /gene="RPL40A"
+ /locus_tag="YIL148W"
+ /gene_synonym="CEP52A; UB11; UBI1"
+ /db_xref="GeneID:854658"
+ mRNA join(<68708..68715,69150..>69528)
+ /gene="RPL40A"
+ /locus_tag="YIL148W"
+ /gene_synonym="CEP52A; UB11; UBI1"
+ /product="ubiquitin-ribosomal 60S subunit protein L40A
+ fusion protein"
+ /transcript_id="NM_001179496.1"
+ /db_xref="GeneID:854658"
+ CDS join(68708..68715,69150..69528)
+ /gene="RPL40A"
+ /locus_tag="YIL148W"
+ /gene_synonym="CEP52A; UB11; UBI1"
+ /experiment="EXISTENCE:curator inference:GO:0002181
+ cytoplasmic translation [PMID:22096102]"
+ /experiment="EXISTENCE:curator inference:GO:0003735
+ structural constituent of ribosome [PMID:22096102]"
+ /experiment="EXISTENCE:direct assay:GO:0005634 nucleus
+ [PMID:22842922]"
+ /experiment="EXISTENCE:direct assay:GO:0005737 cytoplasm
+ [PMID:22842922]"
+ /experiment="EXISTENCE:direct assay:GO:0022625 cytosolic
+ large ribosomal subunit [PMID:22096102]"
+ /experiment="EXISTENCE:direct assay:GO:0042254 ribosome
+ biogenesis [PMID:2538753]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0000027
+ ribosomal large subunit assembly [PMID:22995916]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0000055
+ ribosomal large subunit export from nucleus
+ [PMID:22995916]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0016567 protein
+ ubiquitination [PMID:8035826]"
+ /note="Ubiquitin-ribosomal 60S subunit protein L40A fusion
+ protein; cleaved to yield ubiquitin and ribosomal protein
+ L40A; ubiquitin may facilitate assembly of the ribosomal
+ protein into ribosomes; homologous to mammalian ribosomal
+ protein L40, no bacterial homolog; RPL40A has a paralog,
+ RPL40B, that arose from the whole genome duplication;
+ relative distribution to the nucleus increases upon DNA
+ replication stress"
+ /codon_start=1
+ /product="ubiquitin-ribosomal 60S subunit protein L40A
+ fusion protein"
+ /protein_id="NP_012118.1"
+ /db_xref="GeneID:854658"
+ /db_xref="SGD:S000001410"
+ /translation="MQIFVKTLTGKTITLEVESSDTIDNVKSKIQDKEGIPPDQQRLI
+ FAGKQLEDGRTLSDYNIQKESTLHLVLRLRGGIIEPSLKALASKYNCDKSVCRKCYAR
+ LPPRATNCRKRKCGHTNQLRPKKKLK"
+ gene complement(<69791..>73453)
+ /gene="SLN1"
+ /locus_tag="YIL147C"
+ /gene_synonym="YPD2"
+ /db_xref="GeneID:854659"
+ mRNA complement(<69791..>73453)
+ /gene="SLN1"
+ /locus_tag="YIL147C"
+ /gene_synonym="YPD2"
+ /product="histidine kinase"
+ /transcript_id="NM_001179495.1"
+ /db_xref="GeneID:854659"
+ CDS complement(69791..73453)
+ /gene="SLN1"
+ /locus_tag="YIL147C"
+ /gene_synonym="YPD2"
+ /EC_number="2.7.13.3"
+ /experiment="EXISTENCE:direct assay:GO:0004673 protein
+ histidine kinase activity [PMID:8808622]"
+ /experiment="EXISTENCE:direct assay:GO:0005034 osmosensor
+ activity [PMID:12821642]"
+ /experiment="EXISTENCE:direct assay:GO:0005886 plasma
+ membrane [PMID:14665464|PMID:10198019]"
+ /experiment="EXISTENCE:direct assay:GO:0007234 osmosensory
+ signaling via phosphorelay pathway [PMID:8808622]"
+ /experiment="EXISTENCE:direct assay:GO:0009927 histidine
+ phosphotransfer kinase activity [PMID:12455952]"
+ /experiment="EXISTENCE:direct assay:GO:0071944 cell
+ periphery [PMID:26928762]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0009927
+ histidine phosphotransfer kinase activity [PMID:12455952]"
+ /note="Transmembrane histidine phosphotransfer kinase and
+ osmosensor; regulates MAP kinase cascade; transmembrane
+ protein with an intracellular kinase domain that signals
+ to Ypd1p and Ssk1p, thereby forming a phosphorelay system
+ similar to bacterial two-component regulators"
+ /codon_start=1
+ /product="histidine kinase"
+ /protein_id="NP_012119.1"
+ /db_xref="GeneID:854659"
+ /db_xref="SGD:S000001409"
+ /translation="MRFGLPSKLELTPPFRIGIRTQLTALVSIVALGSLIILAVTTGV
+ YFTSNYKNLRSDRLYIAAQLKSSQIDQTLNYLYYQAYYLASRDALQSSLTSYVAGNKS
+ ADNWVDSLSVIQKFLSSSNLFYVAKVYDSSFNAVLNATNNGTGDLIPEDVLDSLFPLS
+ TDTPLPSSLETIGILTDPVLNSTDYLMSMSLPIFANPSIILTDSRVYGYITIIMSAEG
+ LKSVFNDTTALEHSTIAIISAVYNSQGKASGYHFVFPPYGSRSDLPQKVFSIKNDTFI
+ SSAFRNGKGGSLKQTNILSTRNTALGYSPCSFNLVNWVAIVSQPESVFLSPATKLAKI
+ ITGTVIAIGVFVILLTLPLAHWAVQPIVRLQKATELITEGRGLRPSTPRTISRASSFK
+ RGFSSGFAVPSSLLQFNTAEAGSTTSVSGHGGSGHGSGAAFSANSSMKSAINLGNEKM
+ SPPEEENKIPNNHTDAKISMDGSLNHDLLGPHSLRHNDTDRSSNRSHILTTSANLTEA
+ RLPDYRRLFSDELSDLTETFNTMTDALDQHYALLEERVRARTKQLEAAKIEAEAANEA
+ KTVFIANISHELRTPLNGILGMTAISMEETDVNKIRNSLKLIFRSGELLLHILTELLT
+ FSKNVLQRTKLEKRDFCITDVALQIKSIFGKVAKDQRVRLSISLFPNLIRTMVLWGDS
+ NRIIQIVMNLVSNALKFTPVDGTVDVRMKLLGEYDKELSEKKQYKEVYIKKGTEVTEN
+ LETTDKYDLPTLSNHRKSVDLESSATSLGSNRDTSTIQEEITKRNTVANESIYKKVND
+ REKASNDDVSSIVSTTTSSYDNAIFNSQFNKAPGSDDEEGGNLGRPIENPKTWVISIE
+ VEDTGPGIDPSLQESVFHPFVQGDQTLSRQYGGTGLGLSICRQLANMMHGTMKLESKV
+ GVGSKFTFTLPLNQTKEISFADMEFPFEDEFNPESRKNRRVKFSVAKSIKSRQSTSSV
+ ATPATNRSSLTNDVLPEVRSKGKHETKDVGNPNMGREEKNDNGGLEQLQEKNIKPSIC
+ LTGAEVNEQNSLSSKHRSRHEGLGSVNLDRPFLQSTGTATSSRNIPTVKDDDKNETSV
+ KILVVEDNHVNQEVIKRMLNLEGIENIELACDGQEAFDKVKELTSKGENYNMIFMDVQ
+ MPKVDGLLSTKMIRRDLGYTSPIVALTAFADDSNIKECLESGMNGFLSKPIKRPKLKT
+ ILTEFCAAYQGKKNNK"
+ rep_origin 73820..73991
+ /note="ARS907; Autonomously replicating sequence"
+ /db_xref="SGD:S000178111"
+ gene complement(<74184..>75773)
+ /gene="ATG32"
+ /locus_tag="YIL146C"
+ /gene_synonym="ECM37"
+ /db_xref="GeneID:854660"
+ mRNA complement(<74184..>75773)
+ /gene="ATG32"
+ /locus_tag="YIL146C"
+ /gene_synonym="ECM37"
+ /product="mitophagy protein ATG32"
+ /transcript_id="NM_001179494.1"
+ /db_xref="GeneID:854660"
+ CDS complement(74184..75773)
+ /gene="ATG32"
+ /locus_tag="YIL146C"
+ /gene_synonym="ECM37"
+ /experiment="EXISTENCE:direct assay:GO:0005739
+ mitochondrion [PMID:29673596]"
+ /experiment="EXISTENCE:direct assay:GO:0005741
+ mitochondrial outer membrane
+ [PMID:19619494|PMID:19619495]"
+ /experiment="EXISTENCE:genetic interaction:GO:0031930
+ mitochondria-nucleus signaling pathway [PMID:30298458]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0000422
+ autophagy of mitochondrion [PMID:19619495|PMID:19619494]"
+ /note="Mitochondrial outer membrane protein required to
+ initiate mitophagy; recruits the autophagy adaptor protein
+ Atg11p and the ubiquitin-like protein Atg8p to the
+ mitochondrial surface to initiate mitophagy, the selective
+ vacuolar degradation of mitochondria in response to
+ starvation; can promote pexophagy when placed ectopically
+ in the peroxisomal membrane; regulates mitophagy and
+ ethanol production during alcoholic fermentation"
+ /codon_start=1
+ /product="mitophagy protein ATG32"
+ /protein_id="NP_012120.1"
+ /db_xref="GeneID:854660"
+ /db_xref="SGD:S000001408"
+ /translation="MVLEYQQREGKGSSSKSMPPDSSSTTIHTCSEAQTGEDKGLLDP
+ HLSVLELLSKTGHSPSPMGQNLVTSIDISGNHNVNDSISGSWQAIQPLDLGASFIPER
+ CSSQTTNGSILSSSDTSEEEQELLQAPAADIINIIKQGQEGANVVSPSHPFKQLQKII
+ SLPLPGKEKTPFNEQDDDGDEDEAFEEDSVTITKSLTSSTNSFVMPKLSLTQKNPVFR
+ LLILGRTGSSFYQSIPKEYQSLFELPKYHDSATFPQYTGIVIIFQELREMVSLLNRIV
+ QYSQGKPVIPICQPGQVIQVKNVLKSFLRNKLVKLLFPPVVVTNKRDLKKMFQRLQDL
+ SLEYGEDVNEEDNDDEAIHTKSRSYCRNKKAENSKKKSPKSNKKPKRKKQKFFTSWFT
+ WGISITIGISFGCCVTYFVTAAYEHQTVKSLSLRPSILASLLSLDSSSDTINTPATAS
+ PSSTEQFLWFDKGTLQINFHSDGFIMKSLTIIKETWGKMNTFVLHALSKPLKFLENLN
+ KSSEFSIDESNRILALGYILL"
+ gene complement(<76354..>77283)
+ /gene="PAN6"
+ /locus_tag="YIL145C"
+ /db_xref="GeneID:854661"
+ mRNA complement(<76354..>77283)
+ /gene="PAN6"
+ /locus_tag="YIL145C"
+ /product="pantoate--beta-alanine ligase PAN6"
+ /transcript_id="NM_001179493.1"
+ /db_xref="GeneID:854661"
+ CDS complement(76354..77283)
+ /gene="PAN6"
+ /locus_tag="YIL145C"
+ /EC_number="6.3.2.1"
+ /experiment="EXISTENCE:direct assay:GO:0004592
+ pantoate-beta-alanine ligase activity [PMID:10417331]"
+ /experiment="EXISTENCE:direct assay:GO:0005634 nucleus
+ [PMID:14562095]"
+ /experiment="EXISTENCE:direct assay:GO:0005737 cytoplasm
+ [PMID:14562095]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0015940
+ pantothenate biosynthetic process [PMID:11154694]"
+ /note="Pantothenate synthase; also known as
+ pantoate-beta-alanine ligase, required for pantothenic
+ acid biosynthesis, deletion causes pantothenic acid
+ auxotrophy, homologous to E. coli panC"
+ /codon_start=1
+ /product="pantoate--beta-alanine ligase PAN6"
+ /protein_id="NP_012121.2"
+ /db_xref="GeneID:854661"
+ /db_xref="SGD:S000001407"
+ /translation="MKIFHTVEEVVQWRTQELRETRFRETIGFVPTMGCLHSGHASLI
+ SQSVKENTYTVVSIFVNPSQFAPTEDLDNYPRTLPDDIKLLESLKVDVLFAPNAHVMY
+ PQGIPLDIEEQKGPFVSVLGLSEKLEGKTRPNFFRGVATVVTKLFNIVMADVAYFGQK
+ DIQQFIVLQCMVDELFVNTRLQMMPIVRNNNGLALSSRNKYLCPESLKISENLYRGLK
+ AAENAIRRLAPGGRLSRSEIIDTVTQIWAPYVDSHDFKIDYVSLADFKTLDELSDVEN
+ TSEQQPIVISCAVYVTDREKPDTVVRLIDNIVI"
+ gene <78074..>80149
+ /gene="NDC80"
+ /locus_tag="YIL144W"
+ /gene_synonym="HEC1; TID3"
+ /db_xref="GeneID:854662"
+ mRNA <78074..>80149
+ /gene="NDC80"
+ /locus_tag="YIL144W"
+ /gene_synonym="HEC1; TID3"
+ /product="kinetochore-associated Ndc80 complex subunit
+ NDC80"
+ /transcript_id="NM_001179492.3"
+ /db_xref="GeneID:854662"
+ CDS 78074..80149
+ /gene="NDC80"
+ /locus_tag="YIL144W"
+ /gene_synonym="HEC1; TID3"
+ /experiment="EXISTENCE:direct assay:GO:0000776 kinetochore
+ [PMID:11511347|PMID:27170178]"
+ /experiment="EXISTENCE:direct assay:GO:0000779 condensed
+ chromosome, centromeric region [PMID:11266451]"
+ /experiment="EXISTENCE:direct assay:GO:0008017 microtubule
+ binding [PMID:17195848]"
+ /experiment="EXISTENCE:direct assay:GO:0031262 Ndc80
+ complex [PMID:11266451|PMID:11179222]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0000776
+ kinetochore [PMID:11179222]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0007059
+ chromosome segregation [PMID:11266451]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0034501 protein
+ localization to kinetochore [PMID:19893618|PMID:19952112]"
+ /experiment="EXISTENCE:physical interaction:GO:0031262
+ Ndc80 complex [PMID:11179222]"
+ /note="Component of the kinetochore-associated Ndc80
+ complex; conserved coiled-coil protein involved in
+ chromosome segregation, spindle checkpoint activity, and
+ kinetochore assembly and clustering; evolutionarily
+ conserved; complex members include Ndc80p, Nuf2p, Scp24p,
+ and Spc25p; modified by sumoylation"
+ /codon_start=1
+ /product="kinetochore-associated Ndc80 complex subunit
+ NDC80"
+ /protein_id="NP_012122.3"
+ /db_xref="GeneID:854662"
+ /db_xref="SGD:S000001406"
+ /translation="MQSSTSTDQHVLHHMDPHRFTSQIPTATSSQLRRRNSTNQGLTD
+ MINKSIARNTISGTGIPTGGINKNKRTRSTVAGGTNGTALALNDKSNSRNSVSRLSIN
+ QLGSLQQHLSNRDPRPLRDKNFQSAIQEEIYDYLKKNKFDIETNHPISIKFLKQPTQK
+ GFIIIFKWLYLRLDPGYGFTKSIENEIYQILKNLRYPFLESINKSQISAVGGSNWHKF
+ LGMLHWMVRTNIKLDMCLNKVDRSLINQNTQEITILSQPLKTLDEQDQRQERYELMVE
+ KLLIDYFTESYKSFLKLEDNYEPSMQELKLGFEKFVHIINTDIANLQTQNDNLYEKYQ
+ EVMKISQKIKTTREKWKALKSDSNKYENYVNAMKQKSQEWPGKLEKMKSECELKEEEI
+ KALQSNISELHKILRKKGISTEQFELQNQEREKLTRELDKINIQSDKLTSSIKSRKLE
+ AEGIFKSLLDTLRQYDSSIQNLTRSRSQLGHNVNDSSLKINISENLLDRDFHEGISYE
+ QLFPKGSGINESIKKSILKLNDEIQERIKTIEKDNITLEKDIKNLKHDINEKTQINEK
+ LELELSEANSKFELSKQENERLLVAQRIEIEKMEKKINDSNLLMKTKISDAEELVTST
+ ELKLEELKVDLNRKRYKLHQQVIHVIDITSKFKINIQSSLENSENELGNVIEELRNLE
+ FETEHNVTN"
+ gene complement(<80510..>83041)
+ /gene="SSL2"
+ /locus_tag="YIL143C"
+ /gene_synonym="LOM3; RAD25"
+ /db_xref="GeneID:854663"
+ mRNA complement(<80510..>83041)
+ /gene="SSL2"
+ /locus_tag="YIL143C"
+ /gene_synonym="LOM3; RAD25"
+ /product="TFIIH/NER complex ATPase/helicase subunit SSL2"
+ /transcript_id="NM_001179491.1"
+ /db_xref="GeneID:854663"
+ CDS complement(80510..83041)
+ /gene="SSL2"
+ /locus_tag="YIL143C"
+ /gene_synonym="LOM3; RAD25"
+ /EC_number="3.6.4.12"
+ /experiment="EXISTENCE:direct assay:GO:0000112
+ nucleotide-excision repair factor 3 complex
+ [PMID:8855246]"
+ /experiment="EXISTENCE:direct assay:GO:0000439
+ transcription factor TFIIH core complex [PMID:7961739]"
+ /experiment="EXISTENCE:direct assay:GO:0003678 DNA
+ helicase activity [PMID:8202161]"
+ /experiment="EXISTENCE:direct assay:GO:0005634 nucleus
+ [PMID:22932476]"
+ /experiment="EXISTENCE:direct assay:GO:0005675
+ transcription factor TFIIH holo complex
+ [PMID:19818408|PMID:27381459]"
+ /experiment="EXISTENCE:direct assay:GO:0005829 cytosol
+ [PMID:22932476]"
+ /experiment="EXISTENCE:direct assay:GO:0006289
+ nucleotide-excision repair [PMID:8631896]"
+ /experiment="EXISTENCE:direct assay:GO:0006366
+ transcription by RNA polymerase II [PMID:19818408]"
+ /experiment="EXISTENCE:direct assay:GO:0032508 DNA duplex
+ unwinding [PMID:22751016]"
+ /experiment="EXISTENCE:direct assay:GO:0097550
+ transcription preinitiation complex [PMID:22751016]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0000019
+ regulation of mitotic recombination [PMID:10713167]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0001111 RNA
+ polymerase II promoter clearance [PMID:10713451]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0001113
+ transcription open complex formation at RNA polymerase II
+ promoter [PMID:10409754]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0001174
+ transcriptional start site selection at RNA polymerase II
+ promoter [PMID:34652274]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0006367
+ transcription initiation at RNA polymerase II promoter
+ [PMID:25775526]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0015616 DNA
+ translocase activity [PMID:25775526]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0016973
+ poly(A)+ mRNA export from nucleus [PMID:17212653]"
+ /note="Component of RNA polymerase transcription factor
+ TFIIH holoenzyme; acts as dsDNA-dependent translocase in
+ context of TFIIH, unwinds DNA strands during initiation
+ and promotes transcription start site (TSS) scanning; has
+ DNA-dependent ATPase/helicase activity; interacts
+ functionally with TFIIB, has roles in TSS selection and
+ gene looping to juxtapose initiation and termination
+ regions; involved in DNA repair; relocalizes to cytosol
+ under hypoxia; homolog of human ERCC3"
+ /codon_start=1
+ /product="TFIIH/NER complex ATPase/helicase subunit SSL2"
+ /protein_id="NP_012123.1"
+ /db_xref="GeneID:854663"
+ /db_xref="SGD:S000001405"
+ /translation="MTDVEGYQPKSKGKIFPDMGESFFSSDEDSPATDAEIDENYDDN
+ RETSEGRGERDTGAMVTGLKKPRKKTKSSRHTAADSSMNQMDAKDKALLQDTNSDIPA
+ DFVPDSVSGMFRSHDFSYLRLRPDHASRPLWISPSDGRIILESFSPLAEQAQDFLVTI
+ AEPISRPSHIHEYKITAYSLYAAVSVGLETDDIISVLDRLSKVPVAESIINFIKGATI
+ SYGKVKLVIKHNRYFVETTQADILQMLLNDSVIGPLRIDSDHQVQPPEDVLQQQLQQT
+ AGKPATNVNPNDVEAVFSAVIGGDNEREEEDDDIDAVHSFEIANESVEVVKKRCQEID
+ YPVLEEYDFRNDHRNPDLDIDLKPSTQIRPYQEKSLSKMFGNGRARSGIIVLPCGAGK
+ TLVGITAACTIKKSVIVLCTSSVSVMQWRQQFLQWCTLQPENCAVFTSDNKEMFQTES
+ GLVVSTYSMVANTRNRSHDSQKVMDFLTGREWGFIILDEVHVVPAAMFRRVVSTIAAH
+ AKLGLTATLVREDDKIGDLNFLIGPKLYEANWMELSQKGHIANVQCAEVWCPMTAEFY
+ QEYLRETARKRMLLYIMNPTKFQACQFLIQYHERRGDKIIVFSDNVYALQEYALKMGK
+ PFIYGSTPQQERMNILQNFQYNDQINTIFLSKVGDTSIDLPEATCLIQISSHYGSRRQ
+ EAQRLGRILRAKRRNDEGFNAFFYSLVSKDTQEMYYSTKRQAFLVDQGYAFKVITHLH
+ GMENIPNLAYASPRERRELLQEVLLKNEEAAGIEVGDDADNSVGRGSNGHKRFKSKAV
+ RGEGSLSGLAGGEDMAYMEYSTNKNKELKEHHPLIRKMYYKNLKK"
+ gene <83302..>84885
+ /gene="CCT2"
+ /locus_tag="YIL142W"
+ /gene_synonym="BIN3; TCP2"
+ /db_xref="GeneID:854664"
+ mRNA <83302..>84885
+ /gene="CCT2"
+ /locus_tag="YIL142W"
+ /gene_synonym="BIN3; TCP2"
+ /product="chaperonin-containing T-complex subunit CCT2"
+ /transcript_id="NM_001179490.1"
+ /db_xref="GeneID:854664"
+ CDS 83302..84885
+ /gene="CCT2"
+ /locus_tag="YIL142W"
+ /gene_synonym="BIN3; TCP2"
+ /experiment="EXISTENCE:direct assay:GO:0005737 cytoplasm
+ [PMID:11914276]"
+ /experiment="EXISTENCE:direct assay:GO:0005832
+ chaperonin-containing T-complex [PMID:16762366]"
+ /experiment="EXISTENCE:direct assay:GO:0006457 protein
+ folding [PMID:16762366]"
+ /experiment="EXISTENCE:direct assay:GO:0051082 unfolded
+ protein binding [PMID:16762366]"
+ /experiment="EXISTENCE:physical interaction:GO:0005832
+ chaperonin-containing T-complex [PMID:15704212]"
+ /note="Subunit beta of the cytosolic chaperonin Cct ring
+ complex; related to Tcp1p, required for the assembly of
+ actin and tubulins in vivo"
+ /codon_start=1
+ /product="chaperonin-containing T-complex subunit CCT2"
+ /protein_id="NP_012124.1"
+ /db_xref="GeneID:854664"
+ /db_xref="SGD:S000001404"
+ /translation="MSVQIFGDQVTEERAENARLSAFVGAIAVGDLVKSTLGPKGMDK
+ LLQSASSNTCMVTNDGATILKSIPLDNPAAKVLVNISKVQDDEVGDGTTSVTVLSAEL
+ LREAEKLIDQSKIHPQTIIEGYRLASAAALDALTKAAVDNSHDKTMFREDLIHIAKTT
+ LSSKILSQDKDHFAELATNAILRLKGSTNLEHIQIIKILGGKLSDSFLDEGFILAKKF
+ GNNQPKRIENAKILIANTTLDTDKVKIFGTKFKVDSTAKLAQLEKAEREKMKNKIAKI
+ SKFGINTFINRQLIYDYPEQLFTDLGINSIEHADFEGVERLALVTGGEVVSTFDEPSK
+ CKLGECDVIEEIMLGEQPFLKFSGCKAGEACTIVLRGATDQTLDEAERSLHDALSVLS
+ QTTKETRTVLGGGCAEMVMSKAVDTEAQNIDGKKSLAVEAFARALRQLPTILADNAGF
+ DSSELVSKLRSSIYNGISTSGLDLNNGTIADMRQLGIVESYKLKRAVVSSASEAAEVL
+ LRVDNIIRARPRTANRQHM"
+ gene <85366..>87837
+ /gene="AXL2"
+ /locus_tag="YIL140W"
+ /gene_synonym="BUD10; SRO4"
+ /db_xref="GeneID:854666"
+ mRNA <85366..>87837
+ /gene="AXL2"
+ /locus_tag="YIL140W"
+ /gene_synonym="BUD10; SRO4"
+ /product="Axl2p"
+ /transcript_id="NM_001179488.1"
+ /db_xref="GeneID:854666"
+ CDS 85366..87837
+ /gene="AXL2"
+ /locus_tag="YIL140W"
+ /gene_synonym="BUD10; SRO4"
+ /experiment="EXISTENCE:direct assay:GO:0000131 incipient
+ cellular bud site [PMID:8846915]"
+ /experiment="EXISTENCE:direct assay:GO:0000144 cellular
+ bud neck septin ring [PMID:8846915]"
+ /experiment="EXISTENCE:direct assay:GO:0000324 fungal-type
+ vacuole [PMID:26928762]"
+ /experiment="EXISTENCE:direct assay:GO:0005886 plasma
+ membrane [PMID:8846915]"
+ /experiment="EXISTENCE:direct assay:GO:0005935 cellular
+ bud neck [PMID:15282802]"
+ /experiment="EXISTENCE:direct assay:GO:0032153 cell
+ division site [PMID:8846915]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0007120 axial
+ cellular bud site selection [PMID:8846915]"
+ /note="Integral plasma membrane protein; required for
+ axial budding in haploid cells; localizes to the incipient
+ bud site and bud neck; glycosylated by Pmt4p; potential
+ Cdc28p substrate"
+ /codon_start=1
+ /product="Axl2p"
+ /protein_id="NP_012126.1"
+ /db_xref="GeneID:854666"
+ /db_xref="SGD:S000001402"
+ /translation="MTQLQISLLLTATISLLHLVVATPYEAYPIGKQYPPVARVNESF
+ TFQISNDTYKSSVDKTAQITYNCFDLPSWLSFDSSSRTFSGEPSSDLLSDANTTLYFN
+ VILEGTDSADSTSLNNTYQFVVTNRPSISLSSDFNLLALLKNYGYTNGKNALKLDPNE
+ VFNVTFDRSMFTNEESIVSYYGRSQLYNAPLPNWLFFDSGELKFTGTAPVINSAIAPE
+ TSYSFVIIATDIEGFSAVEVEFELVIGAHQLTTSIQNSLIINVTDTGNVSYDLPLNYV
+ YLDDDPISSDKLGSINLLDAPDWVALDNATISGSVPDELLGKNSNPANFSVSIYDTYG
+ DVIYFNFEVVSTTDLFAISSLPNINATRGEWFSYYFLPSQFTDYVNTNVSLEFTNSSQ
+ DHDWVKFQSSNLTLAGEVPKNFDKLSLGLKANQGSQSQELYFNIIGMDSKITHSNHSA
+ NATSTRSSHHSTSTSSYTSSTYTAKISSTSAAATSSAPAALPAANKTSSHNKKAVAIA
+ CGVAIPLGVILVALICFLIFWRRRRENPDDENLPHAISGPDLNNPANKPNQENATPLN
+ NPFDDDASSYDDTSIARRLAALNTLKLDNHSATESDISSVDEKRDSLSGMNTYNDQFQ
+ SQSKEELLAKPPVQPPESPFFDPQNRSSSVYMDSEPAVNKSWRYTGNLSPVSDIVRDS
+ YGSQKTVDTEKLFDLEAPEKEKRTSRDVTMSSLDPWNSNISPSPVRKSVTPSPYNVTK
+ HRNRHLQNIQDSQSGKNGITPTTMSTSSSDDFVPVKDGENFCWVHSMEPDRRPSKKRL
+ VDFSNKSNVNVGQVKDIHGRIPEML"
+ gene complement(<87979..>88716)
+ /gene="REV7"
+ /locus_tag="YIL139C"
+ /db_xref="GeneID:854667"
+ mRNA complement(<87979..>88716)
+ /gene="REV7"
+ /locus_tag="YIL139C"
+ /product="Rev7p"
+ /transcript_id="NM_001179487.1"
+ /db_xref="GeneID:854667"
+ CDS complement(87979..88716)
+ /gene="REV7"
+ /locus_tag="YIL139C"
+ /experiment="EXISTENCE:direct assay:GO:0000785 chromatin
+ [PMID:16546083]"
+ /experiment="EXISTENCE:direct assay:GO:0003887
+ DNA-directed DNA polymerase activity [PMID:8658138]"
+ /experiment="EXISTENCE:direct assay:GO:0005739
+ mitochondrion [PMID:16452144]"
+ /experiment="EXISTENCE:direct assay:GO:0016035 zeta DNA
+ polymerase complex
+ [PMID:8658138|PMID:23066099|PMID:22711820]"
+ /experiment="EXISTENCE:direct assay:GO:0042276 error-prone
+ translesion synthesis [PMID:11313481]"
+ /experiment="EXISTENCE:direct assay:GO:0070987 error-free
+ translesion synthesis [PMID:12514101]"
+ /experiment="EXISTENCE:genetic interaction:GO:0042276
+ error-prone translesion synthesis [PMID:9765213]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0042276
+ error-prone translesion synthesis [PMID:3897795]"
+ /note="Accessory subunit of DNA polymerase zeta (pol
+ zeta); involved in translesion synthesis during
+ post-replication repair; required for mutagenesis induced
+ by DNA damage; involved in double-strand break repair; may
+ be involved in meiosis; forms a complex with Rev3p, Pol31p
+ and Pol32p"
+ /codon_start=1
+ /product="Rev7p"
+ /protein_id="NP_012127.1"
+ /db_xref="GeneID:854667"
+ /db_xref="SGD:S000001401"
+ /translation="MNRWVEKWLRVYLKCYINLILFYRNVYPPQSFDYTTYQSFNLPQ
+ FVPINRHPALIDYIEELILDVLSKLTHVYRFSICIINKKNDLCIEKYVLDFSELQHVD
+ KDDQIITETEVFDEFRSSLNSLIMHLEKLPKVNDDTITFEAVINAIELELGHKLDRNR
+ RVDSLEEKAEIERDSNWVKCQEDENLPDNNGFQPPKIKLTSLVGSDVGPLIIHQFSEK
+ LISGDDKILNGVYSQYEEGESIFGSLF"
+ gene complement(<89230..>89715)
+ /gene="TPM2"
+ /locus_tag="YIL138C"
+ /db_xref="GeneID:854668"
+ mRNA complement(<89230..>89715)
+ /gene="TPM2"
+ /locus_tag="YIL138C"
+ /product="tropomyosin TPM2"
+ /transcript_id="NM_001179486.3"
+ /db_xref="GeneID:854668"
+ CDS complement(89230..89715)
+ /gene="TPM2"
+ /locus_tag="YIL138C"
+ /experiment="EXISTENCE:direct assay:GO:0000142 cellular
+ bud neck contractile ring [PMID:9864365]"
+ /experiment="EXISTENCE:direct assay:GO:0003786 actin
+ lateral binding [PMID:11457840]"
+ /experiment="EXISTENCE:direct assay:GO:0005884 actin
+ filament [PMID:7844152]"
+ /experiment="EXISTENCE:direct assay:GO:0032432 actin
+ filament bundle [PMID:9864365]"
+ /experiment="EXISTENCE:direct assay:GO:1904530 negative
+ regulation of actin filament binding [PMID:7844152]"
+ /experiment="EXISTENCE:genetic interaction:GO:0051017
+ actin filament bundle assembly [PMID:9864365]"
+ /experiment="EXISTENCE:genetic interaction:GO:1903475
+ mitotic actomyosin contractile ring assembly
+ [PMID:12419188]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0000282
+ cellular bud site selection [PMID:7844152]"
+ /note="Minor isoform of tropomyosin; binds to and
+ stabilizes actin cables and filaments, which direct
+ polarized cell growth and the distribution of several
+ organelles; appears to have distinct and also overlapping
+ functions with Tpm1p; TPM2 has a paralog, TPM1, that arose
+ from the whole genome duplication"
+ /codon_start=1
+ /product="tropomyosin TPM2"
+ /protein_id="NP_012128.3"
+ /db_xref="GeneID:854668"
+ /db_xref="SGD:S000001400"
+ /translation="MEKIKEKLNSLKLESESWQEKYEELREQLKELEQSNTEKENEIK
+ SLSAKNEQLDSEVEKLESQLSDTKQLAEDSNNLRSNNENYTKKNQDLEQQLEDSEAKL
+ KEAMDKLKEADLNSEQMGRRIVALEEERDEWEKKCEEFQSKYEEAQKELDEIANSLEN
+ L"
+ gene complement(<89948..>92788)
+ /gene="TMA108"
+ /locus_tag="YIL137C"
+ /gene_synonym="TAE3"
+ /db_xref="GeneID:854669"
+ mRNA complement(<89948..>92788)
+ /gene="TMA108"
+ /locus_tag="YIL137C"
+ /gene_synonym="TAE3"
+ /product="Tma108p"
+ /transcript_id="NM_001179485.1"
+ /db_xref="GeneID:854669"
+ CDS complement(89948..92788)
+ /gene="TMA108"
+ /locus_tag="YIL137C"
+ /gene_synonym="TAE3"
+ /experiment="EXISTENCE:direct assay:GO:0005737 cytoplasm
+ [PMID:14562095]"
+ /experiment="EXISTENCE:direct assay:GO:0005854 nascent
+ polypeptide-associated complex [PMID:27580715]"
+ /experiment="EXISTENCE:direct assay:GO:1990593 nascent
+ polypeptide-associated complex binding [PMID:27580715]"
+ /experiment="EXISTENCE:genetic interaction:GO:0042254
+ ribosome biogenesis [PMID:20691087]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0042254
+ ribosome biogenesis [PMID:20691087]"
+ /experiment="EXISTENCE:mutant phenotype:GO:2000765
+ regulation of cytoplasmic translation [PMID:27580715]"
+ /note="Ribosome-associated, nascent chain binding factor;
+ binds N-terminal region of nascent peptides during
+ translation; recognizes target proteins via its putative
+ metallopeptidase peptide-binding pocket"
+ /codon_start=1
+ /product="Tma108p"
+ /protein_id="NP_012129.1"
+ /db_xref="GeneID:854669"
+ /db_xref="SGD:S000001399"
+ /translation="MSDNLLSLENPVVPSHYELRLEIDPKQSSPNFKGSAIIHLKFNP
+ NSTTLASIEDSFTQFKLHSKDLIVLSAHATIGSTKFDLKISQDTGKHLSIFNSESPIQ
+ LSNDCPLILSVQYVGKIRDIKTHHDKTFGIFKTNFMDRKTGTANNHVVATHCQPFSAS
+ NIFPCIDEPSNKSTFQLNIATDAQYKAVSNTPVEMVEALDSSQKHLVKFAKTPLMTTS
+ VFGFSIGDLEFLKTEIKLEGDRTIPVSIYAPWDIANAAFTLDTVQKYLPLLESYFKCP
+ YPLPKLDFVLLPYLSDMAMENFGMITIQLNHLLIPPNALANETVREQAQQLIVHELVH
+ QWMGNYISFDSWESLWFNESFATWLACHILEQNGDLSHYWTSEPYLLQQVEPTMCRDA
+ ADVNGRSIFQIAQRNTGIDSQTSDIFDPEAYTKGIIMLRSLQLATGESHLQKGLESVF
+ EDTKTFHARSVKPMDIWNHIGKFLKSQNITNFVSSWTRTPGLPVVKVEVEEKDGKTQT
+ KLTQHRFINQLSTEEKDQLEDVPYQVPLFGVLPDGKMDTKNVLLTDRTLKFDYPILVI
+ NHLAQGYYRVSYESEECYALINDKITEETLSEIDLRKIFLDLSQFIGDEGFQNSIHLH
+ GLFKILNHIASPSTKIASKYWDPLSKGLEVLQTIDRASLTSSKLQSFLKKKIVIPLFN
+ KIDWPHGEFDKSTNPHELKVMSQVLFLNKNSAKCAELCQIYFKHLLQGPRSSVPLELV
+ NSILVVVSQHCANIKQWKKIFDLVKRSSCTGITNHVINMYDQNSSETAMLIQNGAIES
+ LGFCLDSDIVKKTLNFITSNIESEGMELALFGFNYNFKKRLNKNEKPQDQVVRETIWE
+ WYMGNFDQWARKATRKGTTTGDHLHKALRSISLIIFQMFVADEPQKIEKFINLEKEKL
+ GQSLLSLDDIWASVQQDEESRKTIRRDLASLV"
+ gene <93619..>94800
+ /gene="OM45"
+ /locus_tag="YIL136W"
+ /db_xref="GeneID:854670"
+ mRNA <93619..>94800
+ /gene="OM45"
+ /locus_tag="YIL136W"
+ /product="Om45p"
+ /transcript_id="NM_001179484.1"
+ /db_xref="GeneID:854670"
+ CDS 93619..94800
+ /gene="OM45"
+ /locus_tag="YIL136W"
+ /experiment="EXISTENCE:direct assay:GO:0005739
+ mitochondrion [PMID:24769239|PMID:16823961|PMID:11502169]"
+ /experiment="EXISTENCE:direct assay:GO:0005741
+ mitochondrial outer membrane
+ [PMID:16407407|PMID:11179417|PMID:16689936]"
+ /note="Mitochondrial outer membrane hypothetical protein;
+ major constituent of the outer membrane, extending into
+ the intermembrane space; interacts with porin (Por1p) and
+ with Om14p; imported via the presequence pathway involving
+ the TOM and TIM23 complexes, then assembled in the outer
+ membrane by Mim1p; protein abundance increases in response
+ to DNA replication stress"
+ /codon_start=1
+ /product="Om45p"
+ /protein_id="NP_012130.1"
+ /db_xref="GeneID:854670"
+ /db_xref="SGD:S000001398"
+ /translation="MSSRIIVGSAALAAAITASIMVREQKAKGQRREGNVSAYYNGQE
+ YGSSAPPQLGKLHNIKQGIKEDALSLKDALLGVSQKAREEAPKVTKRVISPEEDAQTR
+ KQLGQKAKDSSSQSIFNWGFSEAERRKAIAIGEFDTAKKRFEEAVDRNEKELLSTVMR
+ EKKAALDRASIEYERYGRARDFNELSDKLDQQERNSNPLKRLLKNNTGDANTEEAAAR
+ SVQGWGDTAQEFGREELEEAKRNASSEPSEAQKRLDELKKIKEKGWFGYNKGEQSEQQ
+ IAERVARGLEGWGETAAQLSKDEMDDLRWNYENSKKQLDKNVSDAMDSLSKAKEDLKQ
+ YGSHWWSGWTSKVDNDKQALKDEAQKKYDEALKKYDEAKNKFKEWNDKGDGKFWSSKK
+ D"
+ gene complement(<95065..>96375)
+ /gene="VHS2"
+ /locus_tag="YIL135C"
+ /db_xref="GeneID:854671"
+ mRNA complement(<95065..>96375)
+ /gene="VHS2"
+ /locus_tag="YIL135C"
+ /product="Vhs2p"
+ /transcript_id="NM_001179483.1"
+ /db_xref="GeneID:854671"
+ CDS complement(95065..96375)
+ /gene="VHS2"
+ /locus_tag="YIL135C"
+ /experiment="EXISTENCE:direct assay:GO:0005737 cytoplasm
+ [PMID:14562095|PMID:16816427]"
+ /experiment="EXISTENCE:genetic interaction:GO:0030950
+ establishment or maintenance of actin cytoskeleton
+ polarity [PMID:16816427]"
+ /experiment="EXISTENCE:genetic interaction:GO:0032186
+ cellular bud neck septin ring organization
+ [PMID:24646733]"
+ /experiment="EXISTENCE:genetic interaction:GO:0034605
+ cellular response to heat [PMID:16816427]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0032186
+ cellular bud neck septin ring organization
+ [PMID:24646733]"
+ /note="Regulator of septin dynamics; involved in the
+ regulation of septin dynamics at bud neck after mitotic
+ entry, likely by stabilizing septin structure; regulated
+ at post-translational level by cell cycle dependent
+ phosphorylation; likely phosphorylated by Cdc28p and
+ dephosphorylated by Cdc14p before cytokinesis; high-copy
+ suppressor of synthetic lethality of sis2 sit4 double
+ mutant; VHS2 has a paralog, MLF3, that arose from the
+ whole genome duplication"
+ /codon_start=1
+ /product="Vhs2p"
+ /protein_id="NP_012131.1"
+ /db_xref="GeneID:854671"
+ /db_xref="SGD:S000001397"
+ /translation="MDTSNHNQDHDSHVAAQRENDNNYMPPSPSMSESSMIFERNVED
+ PSYLYKTVSNNAANSLSRQSSRTSLFNHNNSSNRNFHNLSQRSSAVNLHLQPSRTNES
+ IASYQTYNPDFVVQTPLDHRRTLENFVPPALDAGCSIVTDDTTGLDDVDMVYSRRPST
+ IGLDRALGRTRSLSSQSFDNETSPAHPRSPNDHGSRLLRFYSYADMLSDDNNNNVSNA
+ TSTSSTANPLRRPPMQGHYSFSSSLLNSPSHLPSPPSASASPPQHMNFTNPFIISRRY
+ SNTTINNANGGTSAGSTTGAALSRSPSNQQYLLKQQRSPSGSARSRRNSNRPGSAANI
+ MIGKPKSKFHMESSGSEGFSSEEEDNTMIERDKLNLKQKLQSQLAQPPSIANMVNDNH
+ NNTNKHKNTINNNIKNSPAFTNSNPSSKSNSNSTITSMNPDTTK"
+ gene complement(<96522..>96725)
+ /locus_tag="YIL134C-A"
+ /db_xref="GeneID:1466491"
+ mRNA complement(<96522..>96725)
+ /locus_tag="YIL134C-A"
+ /product="uncharacterized protein"
+ /transcript_id="NM_001184555.1"
+ /db_xref="GeneID:1466491"
+ CDS complement(96522..96725)
+ /locus_tag="YIL134C-A"
+ /note="hypothetical protein; identified by fungal homology
+ and RT-PCR"
+ /codon_start=1
+ /product="uncharacterized protein"
+ /protein_id="NP_878096.1"
+ /db_xref="GeneID:1466491"
+ /db_xref="SGD:S000028556"
+ /translation="MVVGSEFNNTADVDVVAILLTLLNADYFVIKQRLVSACFTKRIK
+ WYFIYAIALLSLFSSRNTVCAPY"
+ gene 97111..97246
+ /gene="SNR68"
+ /locus_tag="YNCI0001W"
+ /db_xref="GeneID:9164907"
+ ncRNA 97111..97246
+ /ncRNA_class="snoRNA"
+ /gene="SNR68"
+ /locus_tag="YNCI0001W"
+ /product="SNR68"
+ /experiment="EXISTENCE:curator inference:GO:0005730
+ nucleolus [PMID:10024243]"
+ /experiment="EXISTENCE:curator inference:GO:0031428 box
+ C/D methylation guide snoRNP complex [PMID:10024243]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0030562 rRNA
+ 2'-O-ribose methylation guide activity [PMID:10024243]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0031167 rRNA
+ methylation [PMID:10024243]"
+ /note="C/D box small nucleolar RNA (snoRNA); guides
+ 2'-O-methylation of large subunit (LSU) rRNA at position
+ A2640"
+ /transcript_id="NR_132190.1"
+ /db_xref="GeneID:9164907"
+ /db_xref="SGD:S000006459"
+ gene <97395..>98330
+ /gene="FLX1"
+ /locus_tag="YIL134W"
+ /db_xref="GeneID:854672"
+ mRNA <97395..>98330
+ /gene="FLX1"
+ /locus_tag="YIL134W"
+ /product="flavin adenine dinucleotide transporter FLX1"
+ /transcript_id="NM_001179482.3"
+ /db_xref="GeneID:854672"
+ CDS 97395..98330
+ /gene="FLX1"
+ /locus_tag="YIL134W"
+ /experiment="EXISTENCE:direct assay:GO:0005739
+ mitochondrion
+ [PMID:26928762|PMID:16823961|PMID:14576278|PMID:8631763]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0015230 FAD
+ transmembrane transporter activity
+ [PMID:8631763|PMID:14555654]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0015883 FAD
+ transport [PMID:14555654]"
+ /note="Mitochondrial flavin adenine dinucleotide
+ transporter; FAD is a synthesis product of riboflavin;
+ human homolog SLC25A32 is implicated in multiple acyl-CoA
+ dehydrogenase deficiency (MADD) or glutaric aciduria type
+ II (GAII), and can complement yeast null mutant"
+ /codon_start=1
+ /product="flavin adenine dinucleotide transporter FLX1"
+ /protein_id="NP_012132.3"
+ /db_xref="GeneID:854672"
+ /db_xref="SGD:S000001396"
+ /translation="MVDHQWTPLQKEVISGLSAGSVTTLVVHPLDLLKVRLQLSATSA
+ QKAHYGPFMVIKEIIRSSANSGRSVTNELYRGLSINLFGNAIAWGVYFGLYGVTKELI
+ YKSVAKPGETQLKGVGNDHKMNSLIYLSAGASSGLMTAILTNPIWVIKTRIMSTSKGA
+ QGAYTSMYNGVQQLLRTDGFQGLWKGLVPALFGVSQGALYFAVYDTLKQRKLRRKREN
+ GLDIHLTNLETIEITSLGKMVSVTLVYPFQLLKSNLQSFRANEQKFRLFPLIKLIIAN
+ DGFVGLYKGLSANLVRAIPSTCITFCVYENLKHRL"
+ gene complement(<98527..>99416)
+ /gene="RPL16A"
+ /locus_tag="YIL133C"
+ /gene_synonym="RPL13"
+ /db_xref="GeneID:854673"
+ mRNA complement(join(<98527..99095,99386..>99416))
+ /gene="RPL16A"
+ /locus_tag="YIL133C"
+ /gene_synonym="RPL13"
+ /product="ribosomal 60S subunit protein L16A"
+ /transcript_id="NM_001179481.1"
+ /db_xref="GeneID:854673"
+ CDS complement(join(98527..99095,99386..99416))
+ /gene="RPL16A"
+ /locus_tag="YIL133C"
+ /gene_synonym="RPL13"
+ /experiment="EXISTENCE:curator inference:GO:0002181
+ cytoplasmic translation [PMID:11983894]"
+ /experiment="EXISTENCE:curator inference:GO:0003735
+ structural constituent of ribosome [PMID:11983894]"
+ /experiment="EXISTENCE:direct assay:GO:0003723 RNA binding
+ [PMID:6337137]"
+ /experiment="EXISTENCE:direct assay:GO:0005730 nucleolus
+ [PMID:27374275]"
+ /experiment="EXISTENCE:direct assay:GO:0022625 cytosolic
+ large ribosomal subunit [PMID:11983894]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0000470
+ maturation of LSU-rRNA [PMID:27374275]"
+ /note="Ribosomal 60S subunit protein L16A; N-terminally
+ acetylated, binds 5.8 S rRNA; transcriptionally regulated
+ by Rap1p; homologous to mammalian ribosomal protein L13A
+ and bacterial L13; RPL16A has a paralog, RPL16B, that
+ arose from the whole genome duplication; protein abundance
+ increases in response to DNA replication stress"
+ /codon_start=1
+ /product="ribosomal 60S subunit protein L16A"
+ /protein_id="NP_012133.1"
+ /db_xref="GeneID:854673"
+ /db_xref="SGD:S000001395"
+ /translation="MSVEPVVVIDGKGHLVGRLASVVAKQLLNGQKIVVVRAEELNIS
+ GEFFRNKLKYHDFLRKATAFNKTRGPFHFRAPSRIFYKALRGMVSHKTARGKAALERL
+ KVFEGIPPPYDKKKRVVVPQALRVLRLKPGRKYTTLGKLSTSVGWKYEDVVAKLEAKR
+ KVSSAEYYAKKRAFTKKVASANATAAESDVAKQLAALGY"
+ gene complement(<99860..>100501)
+ /gene="CSM2"
+ /locus_tag="YIL132C"
+ /db_xref="GeneID:854674"
+ mRNA complement(<99860..>100501)
+ /gene="CSM2"
+ /locus_tag="YIL132C"
+ /product="Csm2p"
+ /transcript_id="NM_001179480.1"
+ /db_xref="GeneID:854674"
+ CDS complement(99860..100501)
+ /gene="CSM2"
+ /locus_tag="YIL132C"
+ /experiment="EXISTENCE:direct assay:GO:0005634 nucleus
+ [PMID:14562095]"
+ /experiment="EXISTENCE:direct assay:GO:0005737 cytoplasm
+ [PMID:14562095]"
+ /experiment="EXISTENCE:direct assay:GO:0097196 Shu complex
+ [PMID:23575680]"
+ /experiment="EXISTENCE:genetic interaction:GO:0000725
+ recombinational repair [PMID:15654096]"
+ /experiment="EXISTENCE:genetic interaction:GO:0043007
+ maintenance of rDNA [PMID:21372173]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0000725
+ recombinational repair [PMID:23575680]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0000730 DNA
+ recombinase assembly [PMID:23575680]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0035861 site of
+ double-strand break [PMID:23575680]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0045132 meiotic
+ chromosome segregation [PMID:11470404]"
+ /experiment="EXISTENCE:physical interaction:GO:0097196 Shu
+ complex [PMID:15654096]"
+ /note="Subunit of the Shu complex (aka PCSS complex); Shu
+ complex also includes Psy3, Shu1, Shu2, and promotes
+ error-free DNA repair; Shu complex mediates inhibition of
+ Srs2p anti-recombinase function; promotes formation of
+ Rad51p filaments; Psy3p and Csm2p contain similar
+ DNA-binding regions which work together to form a single
+ DNA binding site; required for accurate chromosome
+ segregation during meiosis"
+ /codon_start=1
+ /product="Csm2p"
+ /protein_id="NP_012134.1"
+ /db_xref="GeneID:854674"
+ /db_xref="SGD:S000001394"
+ /translation="MEYEDLELITIWPSPTKNKLCQFIKQNLSKEHVVTQLFFIDATS
+ SFPLSQFQKLVPPTLPENVRIYENIRINTCLDLEELSAITVKLLQILSMNKINAQRGT
+ EDAVTEPLKIILYINGLEVMFRNSQFKSSPQRSHELLRDTLLKLRVMGNDENENASIR
+ TLLEFPKEQLLDYYLKKNNNTRTSSVRSKRRRIKNGDSLAEYIWKYYADSLFE"
+ gene complement(<100781..>102235)
+ /gene="FKH1"
+ /locus_tag="YIL131C"
+ /db_xref="GeneID:854675"
+ mRNA complement(<100781..>102235)
+ /gene="FKH1"
+ /locus_tag="YIL131C"
+ /product="forkhead family transcription factor FKH1"
+ /transcript_id="NM_001179479.1"
+ /db_xref="GeneID:854675"
+ CDS complement(100781..102235)
+ /gene="FKH1"
+ /locus_tag="YIL131C"
+ /experiment="EXISTENCE:direct assay:GO:0000978 RNA
+ polymerase II cis-regulatory region sequence-specific DNA
+ binding [PMID:10894548|PMID:11562353]"
+ /experiment="EXISTENCE:direct assay:GO:0001227 DNA-binding
+ transcription repressor activity, RNA polymerase
+ II-specific [PMID:10894548]"
+ /experiment="EXISTENCE:direct assay:GO:0001228 DNA-binding
+ transcription activator activity, RNA polymerase
+ II-specific [PMID:11562353|PMID:10894548]"
+ /experiment="EXISTENCE:direct assay:GO:0003682 chromatin
+ binding [PMID:24504085]"
+ /experiment="EXISTENCE:direct assay:GO:0003688 DNA
+ replication origin binding [PMID:24504085|PMID:22265405]"
+ /experiment="EXISTENCE:direct assay:GO:0005634 nucleus
+ [PMID:20847055|PMID:22932476|PMID:10747051]"
+ /experiment="EXISTENCE:direct assay:GO:0005829 cytosol
+ [PMID:22932476]"
+ /experiment="EXISTENCE:direct assay:GO:0019237 centromeric
+ DNA binding [PMID:10683251]"
+ /experiment="EXISTENCE:direct assay:GO:0043565
+ sequence-specific DNA binding
+ [PMID:24504085|PMID:19158363]"
+ /experiment="EXISTENCE:genetic interaction:GO:0000082 G1/S
+ transition of mitotic cell cycle
+ [PMID:17898805|PMID:10894548]"
+ /experiment="EXISTENCE:genetic interaction:GO:0000086 G2/M
+ transition of mitotic cell cycle
+ [PMID:10959837|PMID:11562353|PMID:10894548|PMID:10747051]"
+ /experiment="EXISTENCE:genetic interaction:GO:0000122
+ negative regulation of transcription by RNA polymerase II
+ [PMID:10894548|PMID:17898805]"
+ /experiment="EXISTENCE:genetic interaction:GO:0001227
+ DNA-binding transcription repressor activity, RNA
+ polymerase II-specific [PMID:10894548]"
+ /experiment="EXISTENCE:genetic interaction:GO:0001228
+ DNA-binding transcription activator activity, RNA
+ polymerase II-specific [PMID:10894548|PMID:11562353]"
+ /experiment="EXISTENCE:genetic interaction:GO:0006338
+ chromatin remodeling [PMID:17283050]"
+ /experiment="EXISTENCE:genetic interaction:GO:0006369
+ termination of RNA polymerase II transcription
+ [PMID:12702877]"
+ /experiment="EXISTENCE:genetic interaction:GO:0007535
+ donor selection [PMID:16809780|PMID:12183363]"
+ /experiment="EXISTENCE:genetic interaction:GO:0034244
+ negative regulation of transcription elongation by RNA
+ polymerase II [PMID:12702877]"
+ /experiment="EXISTENCE:genetic interaction:GO:0045944
+ positive regulation of transcription by RNA polymerase II
+ [PMID:10959837|PMID:11562353|PMID:10894548|PMID:10747051]"
+ /experiment="EXISTENCE:genetic interaction:GO:0090055
+ positive regulation of silent mating-type cassette
+ heterochromatin formation [PMID:10747051|PMID:18045995]"
+ /experiment="EXISTENCE:genetic interaction:GO:2000221
+ negative regulation of pseudohyphal growth
+ [PMID:10747051|PMID:10894548]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0000086 G2/M
+ transition of mitotic cell cycle
+ [PMID:10747051|PMID:18045995]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0000122
+ negative regulation of transcription by RNA polymerase II
+ [PMID:10747051|PMID:18045995]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0006338
+ chromatin remodeling [PMID:17283050]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0006369
+ termination of RNA polymerase II transcription
+ [PMID:12702877]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0007535 donor
+ selection [PMID:27257873|PMID:12183363|PMID:16809780]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0031124 mRNA
+ 3'-end processing [PMID:12702877]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0032298
+ positive regulation of DNA-templated DNA replication
+ initiation [PMID:22265405]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0034244
+ negative regulation of transcription elongation by RNA
+ polymerase II [PMID:12702877]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0090055
+ positive regulation of silent mating-type cassette
+ heterochromatin formation [PMID:10747051]"
+ /experiment="EXISTENCE:mutant phenotype:GO:1903468
+ positive regulation of DNA replication initiation
+ [PMID:26728715]"
+ /note="Forkhead family transcription factor; rate-limiting
+ replication origin activator; evolutionarily conserved
+ lifespan regulator; binds multiple chromosomal elements
+ with distinct specificities, cell cycle dynamics;
+ regulates transcription elongation, chromatin silencing at
+ mating loci, expression of G2/M phase genes; facilitates
+ clustering, activation of early-firing replication
+ origins; binds HML recombination enhancer, regulates donor
+ preference during mating-type switching"
+ /codon_start=1
+ /product="forkhead family transcription factor FKH1"
+ /protein_id="NP_012135.1"
+ /db_xref="GeneID:854675"
+ /db_xref="SGD:S000001393"
+ /translation="MSVTSREQKFSGKYSSYTAQDRQGLVNAVTCVLSSSSDPVAVSS
+ DYSNSLSIAREVNAYAKIAGCDWTYYVQKLEVTIGRNTDSLNLNAVPGTVVKKNIDID
+ LGPAKIVSRKHAAIRFNLESGSWELQIFGRNGAKVNFRRIPTGPDSPPTVLQSGCIID
+ IGGVQMIFILPEQETIISDYCLNHLMPKLLSTYGTNGNNNPLLRNIIEGSTYLREQRL
+ QEEARLQQLDHLHTPLSSSSDVNPIGDPHGDTIMMEEDEEDENYTRGGIRPNTYTSSS
+ NNAVTNGNVPHIENPSDLSLDENRYIKPPQSYASMITQAILSTPEGSISLADIYKFIS
+ DNYAFYRFSQMAWQNSVRHNLSLNKAFEKVPKRAGQQGKGMNWKISDEVRRDFLNKWN
+ AGKLSKIRRGASVTRQLQLHMSKFGEIPAPESSSIDPRGIKAQKVKKSLQATSSILGE
+ SAPQLQRTQLTGQISTTTSMDVTTNANVNNSSLS"
+ gene <102782..>105676
+ /gene="ASG1"
+ /locus_tag="YIL130W"
+ /db_xref="GeneID:854676"
+ mRNA <102782..>105676
+ /gene="ASG1"
+ /locus_tag="YIL130W"
+ /product="Asg1p"
+ /transcript_id="NM_001179478.1"
+ /db_xref="GeneID:854676"
+ CDS 102782..105676
+ /gene="ASG1"
+ /locus_tag="YIL130W"
+ /experiment="EXISTENCE:direct assay:GO:0005634 nucleus
+ [PMID:14562095]"
+ /experiment="EXISTENCE:direct assay:GO:0043565
+ sequence-specific DNA binding
+ [PMID:19158363|PMID:19111667]"
+ /note="Zinc cluster protein proposed to be a
+ transcriptional regulator; regulator involved in the
+ stress response; regulates utilization of fatty acids and
+ accumulation of lipids"
+ /codon_start=1
+ /product="Asg1p"
+ /protein_id="NP_012136.1"
+ /db_xref="GeneID:854676"
+ /db_xref="SGD:S000001392"
+ /translation="MPEQAQQGEQSVKRRRVTRACDECRKKKVKCDGQQPCIHCTVYS
+ YECTYKKPTKRTQNSGNSGVLTLGNVTTGPSSSTVVAAAASNPNKLLSNIKTERAILP
+ GASTIPASNNPSKPRKYKTKSTRLQSKIDRYKQIFDEVFPQLPDIDNLDIPVFLQIFH
+ NFKRDSQSFLDDTVKEYTLIVNDSSSPIQPVLSSNSKNSTPDEFLPNMKSDSNSASSN
+ REQDSVDTYSNIPVGREIKIILPPKAIALQFVKSTWEHCCVLLRFYHRPSFIRQLDEL
+ YETDPNNYTSKQMQFLPLCYAAIAVGALFSKSIVSNDSSREKFLQDEGYKYFIAARKL
+ IDITNARDLNSIQAILMLIIFLQCSARLSTCYTYIGVAMRSALRAGFHRKLSPNSGFS
+ PIEIEMRKRLFYTIYKLDVYINAMLGLPRSISPDDFDQTLPLDLSDENITEVAYLPEN
+ QHSVLSSTGISNEHTKLFLILNEIISELYPIKKTSNIISHETVTSLELKLRNWLDSLP
+ KELIPNAENIDPEYERANRLLHLSFLHVQIILYRPFIHYLSRNMNAENVDPLCYRRAR
+ NSIAVARTVIKLAKEMVSNNLLTGSYWYACYTIFYSVAGLLFYIHEAQLPDKDSAREY
+ YDILKDAETGRSVLIQLKDSSMAASRTYNLLNQIFEKLNSKTIQLTALHSSPSNESAF
+ LVTNNSSALKPHLGDSLQPPVFFSSQDTKNSFSLAKSEESTNDYAMANYLNNTPISEN
+ PLNEAQQQDQVSQGTTNMSNERDPNNFLSIDIRLDNNGQSNILDATDDVFIRNDGDIP
+ TNSAFDFSSSKSNASNNSNPDTINNNYNNVSGKNNNNNNITNNSNNNHNNNNNDNNNN
+ NNNNNNNNNNNNNSGNSSNNNNNNNNNKNNNDFGIKIDNNSPSYEGFPQLQIPLSQDN
+ LNIEDKEEMSPNIEIKNEQNMTDSNDILGVFDQLDAQLFGKYLPLNYPSE"
+ rep_origin 105870..106053
+ /note="ARS909; Autonomously Replicating Sequence"
+ /db_xref="SGD:S000118394"
+ gene complement(<106107..>113237)
+ /gene="TAO3"
+ /locus_tag="YIL129C"
+ /gene_synonym="PAG1"
+ /db_xref="GeneID:854677"
+ mRNA complement(<106107..>113237)
+ /gene="TAO3"
+ /locus_tag="YIL129C"
+ /gene_synonym="PAG1"
+ /product="Tao3p"
+ /transcript_id="NM_001179477.3"
+ /db_xref="GeneID:854677"
+ CDS complement(106107..113237)
+ /gene="TAO3"
+ /locus_tag="YIL129C"
+ /gene_synonym="PAG1"
+ /experiment="EXISTENCE:direct assay:GO:0000131 incipient
+ cellular bud site [PMID:11854408]"
+ /experiment="EXISTENCE:direct assay:GO:0005739
+ mitochondrion [PMID:16823961|PMID:14576278]"
+ /experiment="EXISTENCE:direct assay:GO:0005933 cellular
+ bud [PMID:12972564]"
+ /experiment="EXISTENCE:direct assay:GO:0043332 mating
+ projection tip [PMID:12972564]"
+ /experiment="EXISTENCE:genetic interaction:GO:0007118
+ budding cell apical bud growth [PMID:12972564]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0000902 cell
+ morphogenesis [PMID:11854408]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0007114 cell
+ budding [PMID:11854408]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0007118 budding
+ cell apical bud growth [PMID:12972564]"
+ /note="Component of the RAM signaling network; is involved
+ in regulation of Ace2p activity and cellular
+ morphogenesis, interacts with protein kinase Cbk1p and
+ also with Kic1p"
+ /codon_start=1
+ /product="Tao3p"
+ /protein_id="NP_012137.3"
+ /db_xref="GeneID:854677"
+ /db_xref="SGD:S000001391"
+ /translation="MASRFTFPPQRDQGIGFTFPPTNKAEGSSNNNQISIDIDPSGQD
+ VLEEINEAPLNTFPLHQSVTDAPIIDIPSPTDMSEGTSLNNQLLLRQQQQQGTGEGQA
+ LPPTFVEEQSDQNKISMLLPEQKQQRMQESAPPDITAKSVAEDYVTTLRQQMATDWKS
+ PSEYALHILFTKFIRYAENKLNMCLQQLDMAEPPIVEILGEGVDPSFDEIIKSLGHIA
+ KKKPKPVIDAMMFWRKTKSEAANSASEEMEKLLKEYEFEKAHPSQAHFLMNRRLSRSS
+ SNTTSKYKHNNNTNNLPGMKRHVSSSFNNKVPLIKASSSNNSATSSPSIANSQLKSLE
+ NTIEVAKEEAFLADRKSLISIYILCRVLNEIVKQASSNEEEDLSDKLEEIVFTQLKTT
+ DPLSISTSLIKSSNWNSFAELLGSMSEKKFLSVSDRFIADLEKIPAYIPPELEPSTHL
+ LILGMRYLKLRNYPLEKFEESADFMKSLSKFFAKTENFPVCLAYAEVTNQLLLPLAGS
+ LTAEVNHPTWVEAMSTLLNTAKRLQADSKYWVSGFKLTVSILCASPPDLFSKQWLSLL
+ EANASKVKSKSLNERIIFAVGLSRLVWVYLYRCPETLNNTTRTLTKLLQLYLNTRKKE
+ NWITGDFGLLNPLTDALISIGFLHPNFLMEQALIPLIRQSFNGSNLENINYEKLILTI
+ NTYKGLLVTKERPRFPEDDNRLYELNLNNITVNQVQEASSINHTEISDYFYKLFLLLD
+ SSIGSEVWSPENQHQKQSSNAFSPFGFSFSNDNDSSKNKSLYVILFGTIIEAIPCCLS
+ ISRTIPYKSTIEILSRNAVHSEVIISSSSQNALRALASKKNPYTLITWFAKYSFDFDE
+ KTQSSYNMSYLSSKEYNRLLILYVELLECWLEEFQSSNKEENKKETGLDGIRLLPIDA
+ EQEESNETEKLEWKNTVTVIEEVEGNGLFFLCSHDAKIRRLGIQILRIIFKFDEAMME
+ KTEKLSNGHSRSSSHFAADRGTRLIDLLNECNTTTLINPHKATLSAVEKTRFSRLNSK
+ YKRGLLIKLAESEYGVDAALWQRAFPKLLALVFKTCPMAMALCRSIVCIRLVQVHEII
+ LRVANDVDFKLKNVLPETIVNQWKLYLIAACTSLTSTFDQKLHIPSNIPQHGRKKSQQ
+ IFTVQHQKIKSAKSIFKMVLPLLNAKYIMIRDAIITGLSSMNINIFKAYVEAIDVFLV
+ AWKEGSSNNQIRVEMFHILTILSPYLKSDMIFNDEWILRKLSEFLQKTKQFLEKDSVQ
+ ISYEYQSLRSYFAGLILSYYMAVREHPLIDELFPFQARASCFNFLKEWCGYGEYEPIS
+ EERYAIMIKNTESGRDRTAITTGIEFQKNRLQMIVLETMVVLCSDPITQTLDDDLELP
+ IVISFDTEDLLAWIEALFDSDNTTVKNLGVRALENLLDKNRENFKLFRDVAFQCVSHH
+ SHPSVAVLYYTTLCKSVLKLDNLVLDEDELVSLGLYGLVADKEDTRTFAVDLLSAVET
+ KLHNSSYTKVFKERLANSSKTVYKSTAKEISSIFAELLSQDLCLRIFSSLVRILDLFP
+ FEIKRDLLVLMVPWVNKFTLKSLEELDTFMVLNNLFYITIDLNDSLPNEVEQLWISLG
+ KGNSFQNIHVSLEYIINSSMNHCNPLFVQYARDIVLYLANIPGGIGLLDTLLNNLEPK
+ YMVPLAKHTFNEPMNNNKYSFLGNIWERLNYNGKRIIFSKAQLSIIFLVNLLTNLSES
+ VKAKIPLLLHMSICLLDHYVPLIHESACKIASTLIFGLAPSHEKSEETVKLLRNKHAL
+ WSYDNLMKKGARSPKTMDLLIRNIISIFSDLDEFQVTWQRIALKWATTCSVRHIACRS
+ FQIFRSLLTFLDQEMLRDMLHRLSNTISDGNVDIQGFAMQILMTLNAIMAELDPTNLI
+ SFPQLFWSITACLSSIHEQEFIEVLSCLSKFISKIDLDSPDTVQCLVAIFPSNWEGRF
+ DGLQQIVMTGLRSANSLEITWKFLDKLNLLKDSRIIANTESRLLFALIANLPRFLNAM
+ DRKDFTGIQVAADSLIELANAYKQPSLSRLIDSLAKNKFRSKKDFMSQVVSFISRNYF
+ PSYSAQTLVFLLGLLFNKIGWIRVQTLEILKYVFPLIDLRRPEFIGVGADLISPLLRL
+ LFTEYEAKALEVLDCVPNVSGSKMDKDVLRITMGNKDVKDGDNATTTLFGLPEDSGWS
+ VPMPTMTAATTRHNVHAVFMTCGTGKSDEVSAHGSDDMDAVIEFHADGDYELGRMDTI
+ VEFHADGDYDLGRMDTNDSISVAEEKDASLSHMWAELDNLDSFFTKDTNVPNISSKMG
+ MGIPHGRSDSIETTRTDQTFSFESAPQLYDKKVSVILNRSLSRTPSNVSFKTHLADSF
+ AVKINRNGKPRI"
+ rep_origin 113237..113806
+ /note="ARS910; Putative replication origin; identified in
+ multiple array studies, not yet confirmed by plasmid-based
+ assay"
+ /db_xref="SGD:S000130158"
+ gene <113806..>116904
+ /gene="MET18"
+ /locus_tag="YIL128W"
+ /gene_synonym="MMS19"
+ /db_xref="GeneID:854678"
+ mRNA <113806..>116904
+ /gene="MET18"
+ /locus_tag="YIL128W"
+ /gene_synonym="MMS19"
+ /product="Met18p"
+ /transcript_id="NM_001179476.1"
+ /db_xref="GeneID:854678"
+ CDS 113806..116904
+ /gene="MET18"
+ /locus_tag="YIL128W"
+ /gene_synonym="MMS19"
+ /experiment="EXISTENCE:direct assay:GO:0005737 cytoplasm
+ [PMID:14562095]"
+ /experiment="EXISTENCE:direct assay:GO:0097361 CIA complex
+ [PMID:22678362]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0097428 protein
+ maturation by iron-sulfur cluster transfer
+ [PMID:22678362]"
+ /note="Component of cytosolic iron-sulfur protein assembly
+ (CIA) machinery; acts at a late step of Fe-S cluster
+ assembly; forms the CIA targeting complex with Cia1p and
+ Cia2p that directs Fe-S cluster incorporation and
+ maturation of a subset of cytosolic and nuclear proteins
+ involved in methionine biosynthesis, DNA replication and
+ repair, transcription, and telomere maintenance; ortholog
+ of human MMS19"
+ /codon_start=1
+ /product="Met18p"
+ /protein_id="NP_012138.1"
+ /db_xref="GeneID:854678"
+ /db_xref="SGD:S000001390"
+ /translation="MTPDELNSAVVTFMANLNIDDSKANETASTVTDSIVHRSIKLLE
+ VVVALKDYFLSENEVERKKALTCLTTILAKTPKDHLSKNECSVIFQFYQSKLDDQALA
+ KEVLEGFAALAPMKYVSINEIAQLLRLLLDNYQQGQHLASTRLWPFKILRKIFDRFFV
+ NGSSTEQVKRINDLFIETFLHVANGEKDPRNLLLSFALNKSITSSLQNVENFKEDLFD
+ VLFCYFPITFKPPKHDPYKISNQDLKTALRSAITATPLFAEDAYSNLLDKLTASSPVV
+ KNDTLLTLLECVRKFGGSSILENWTLLWNALKFEIMQNSEGNENTLLNPYNKDQQSDD
+ VGQYTNYDACLKIINLMALQLYNFDKVSFEKFFTHVLDELKPNFKYEKDLKQTCQILS
+ AIGSGNVEIFNKVISSTFPLFLINTSEVAKLKLLIMNFSFFVDSYIDLFGRTSKESLG
+ TPVPNNKMAEYKDEIIMILSMALTRSSKAEVTIRTLSVIQFTKMIKMKGFLTPEEVSL
+ IIQYFTEEILTDNNKNIYYACLEGLKTISEIYEDLVFEISLKKLLDLLPDCFEEKIRV
+ NDEENIHIETILKIILDFTTSRHILVKESITFLATKLNRVAKISKSREYCFLLISTIY
+ SLFNNNNQNENVLNEEDALALKNAIEPKLFEIITQESAIVSDNYNLTLLSNVLFFTNL
+ KIPQAAHQEELDRYNELFISEGKIRILDTPNVLAISYAKILSALNKNCQFPQKFTVLF
+ GTVQLLKKHAPRMTETEKLGYLELLLVLSNKFVSEKDVIGLFDWKDLSVINLEVMVWL
+ TKGLIMQNSLESSEIAKKFIDLLSNEEIGSLVSKLFEVFVMDISSLKKFKGISWNNNV
+ KILYKQKFFGDIFQTLVSNYKNTVDMTIKCNYLTALSLVLKHTPSQSVGPFINDLFPL
+ LLQALDMPDPEVRVSALETLKDTTDKHHTLITEHVSTIVPLLLSLSLPHKYNSVSVRL
+ IALQLLEMITTVVPLNYCLSYQDDVLSALIPVLSDKKRIIRKQCVDTRQVYYELGQIP
+ FE"
+ gene complement(<117024..>117644)
+ /gene="RRT14"
+ /locus_tag="YIL127C"
+ /db_xref="GeneID:854679"
+ mRNA complement(<117024..>117644)
+ /gene="RRT14"
+ /locus_tag="YIL127C"
+ /product="Rrt14p"
+ /transcript_id="NM_001179475.1"
+ /db_xref="GeneID:854679"
+ CDS complement(117024..117644)
+ /gene="RRT14"
+ /locus_tag="YIL127C"
+ /experiment="EXISTENCE:direct assay:GO:0005730 nucleolus
+ [PMID:14562095]"
+ /note="hypothetical protein; identified in a screen for
+ mutants with decreased levels of rDNA transcription; green
+ fluorescent protein (GFP)-fusion protein localizes to the
+ nucleolus; predicted to be involved in ribosome
+ biogenesis"
+ /codon_start=1
+ /product="Rrt14p"
+ /protein_id="NP_012139.1"
+ /db_xref="GeneID:854679"
+ /db_xref="SGD:S000001389"
+ /translation="MSSSLSQTSKYQATSVVNGLLSNLLPGVPKIRANNGKTSVNNGS
+ KAQLIDRNLKKRVQLQNRDVHKIKKKCKLVKKKKVKKHKLDKEQLEQLAKHQVLKKHQ
+ HEGTLTDHERKYLNKLIKRNSQNLRSWDLEEEVRDELEDIQQSILKDTVSTANTDRSK
+ RRRFKRKQFKEDIKESDFVKDHRYPGLTPGLAPVGLSDEEDSSEED"
+ gene <117992..>122071
+ /gene="STH1"
+ /locus_tag="YIL126W"
+ /gene_synonym="NPS1"
+ /db_xref="GeneID:854680"
+ mRNA <117992..>122071
+ /gene="STH1"
+ /locus_tag="YIL126W"
+ /gene_synonym="NPS1"
+ /product="RSC chromatin remodeling complex ATPase subunit
+ STH1"
+ /transcript_id="NM_001179474.1"
+ /db_xref="GeneID:854680"
+ CDS 117992..122071
+ /gene="STH1"
+ /locus_tag="YIL126W"
+ /gene_synonym="NPS1"
+ /EC_number="3.6.4.12"
+ /experiment="EXISTENCE:direct assay:GO:0005634 nucleus
+ [PMID:1396591|PMID:1549132]"
+ /experiment="EXISTENCE:direct assay:GO:0006337 nucleosome
+ disassembly [PMID:16492771]"
+ /experiment="EXISTENCE:direct assay:GO:0006338 chromatin
+ remodeling [PMID:8980231]"
+ /experiment="EXISTENCE:direct assay:GO:0006368
+ transcription elongation by RNA polymerase II
+ [PMID:17081996]"
+ /experiment="EXISTENCE:direct assay:GO:0015616 DNA
+ translocase activity [PMID:12183366]"
+ /experiment="EXISTENCE:direct assay:GO:0016586 RSC-type
+ complex [PMID:10619019|PMID:12183366]"
+ /experiment="EXISTENCE:direct assay:GO:0070577
+ lysine-acetylated histone binding [PMID:20126658]"
+ /experiment="EXISTENCE:direct assay:GO:0140658
+ ATP-dependent chromatin remodeler activity
+ [PMID:9799253|PMID:12183366]"
+ /experiment="EXISTENCE:genetic interaction:GO:0007059
+ chromosome segregation [PMID:12697820]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0006284
+ base-excision repair [PMID:24674626]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0006302
+ double-strand break repair [PMID:16024655]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0007010
+ cytoskeleton organization [PMID:12072455]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0031055
+ chromatin remodeling at centromere [PMID:12697820]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0051321 meiotic
+ cell cycle [PMID:10320476]"
+ /note="ATPase component of the RSC chromatin remodeling
+ complex; required for expression of early meiotic genes;
+ promotes base excision repair in chromatin; essential
+ helicase-related protein homologous to Snf2p"
+ /codon_start=1
+ /product="RSC chromatin remodeling complex ATPase subunit
+ STH1"
+ /protein_id="NP_012140.1"
+ /db_xref="GeneID:854680"
+ /db_xref="SGD:S000001388"
+ /translation="MLQEQSELMSTVMNNTPTTVAALAAVAAASETNGKLGSEEQPEI
+ TIPKPRSSAQLEQLLYRYRAIQNHPKENKLEIKAIEDTFRNISRDQDIYETKLDTLRK
+ SIDKGFQYDEDLLNKHLVALQLLEKDTDVPDYFLDLPDTKNDNTTAIEVDYSEKKPIK
+ ISADFNAKAKSLGLESKFSNATKTALGDPDTEIRISARISNRINELERLPANLGTYSL
+ DDCLEFITKDDLSSRMDTFKIKALVELKSLKLLTKQKSIRQKLINNVASQAHHNIPYL
+ RDSPFTAAAQRSVQIRSKVIVPQTVRLAEELERQQLLEKRKKERNLHLQKINSIIDFI
+ KERQSEQWSRQERCFQFGRLGASLHNQMEKDEQKRIERTAKQRLAALKSNDEEAYLKL
+ LDQTKDTRITQLLRQTNSFLDSLSEAVRAQQNEAKILHGEEVQPITDEEREKTDYYEV
+ AHRIKEKIDKQPSILVGGTLKEYQLRGLEWMVSLYNNHLNGILADEMGLGKTIQSISL
+ ITYLYEVKKDIGPFLVIVPLSTITNWTLEFEKWAPSLNTIIYKGTPNQRHSLQHQIRV
+ GNFDVLLTTYEYIIKDKSLLSKHDWAHMIIDEGHRMKNAQSKLSFTISHYYRTRNRLI
+ LTGTPLQNNLPELWALLNFVLPKIFNSAKTFEDWFNTPFANTGTQEKLELTEEETLLI
+ IRRLHKVLRPFLLRRLKKEVEKDLPDKVEKVIKCKLSGLQQQLYQQMLKHNALFVGAG
+ TEGATKGGIKGLNNKIMQLRKICNHPFVFDEVEGVVNPSRGNSDLLFRVAGKFELLDR
+ VLPKFKASGHRVLMFFQMTQVMDIMEDFLRMKDLKYMRLDGSTKTEERTEMLNAFNAP
+ DSDYFCFLLSTRAGGLGLNLQTADTVIIFDTDWNPHQDLQAQDRAHRIGQKNEVRILR
+ LITTDSVEEVILERAMQKLDIDGKVIQAGKFDNKSTAEEQEAFLRRLIESETNRDDDD
+ KAELDDDELNDTLARSADEKILFDKIDKERMNQERADAKAQGLRVPPPRLIQLDELPK
+ VFREDIEEHFKKEDSEPLGRIRQKKRVYYDDGLTEEQFLEAVEDDNMSLEDAIKKRRE
+ ARERRRLRQNGTKENEIETLENTPEASETSLIENNSFTAAVDEETNADKETTASRSKR
+ RSSRKKRTISIVTAEDKENTQEESTSQENGGAKVEEEVKSSSVEIINGSESKKKKPKL
+ TVKIKLNKTTVLENNDGKRAEEKPESKSPAKKTAAKKTKTKSKSLGIFPTVEKLVEEM
+ REQLDEVDSHPRTSIFEKLPSKRDYPDYFKVIEKPMAIDIILKNCKNGTYKTLEEVRQ
+ ALQTMFENARFYNEEGSWVYVDADKLNEFTDEWFKEHSS"
+ gene <122689..>125733
+ /gene="KGD1"
+ /locus_tag="YIL125W"
+ /gene_synonym="OGD1"
+ /db_xref="GeneID:854681"
+ mRNA <122689..>125733
+ /gene="KGD1"
+ /locus_tag="YIL125W"
+ /gene_synonym="OGD1"
+ /product="alpha-ketoglutarate dehydrogenase KGD1"
+ /transcript_id="NM_001179473.1"
+ /db_xref="GeneID:854681"
+ CDS 122689..125733
+ /gene="KGD1"
+ /locus_tag="YIL125W"
+ /gene_synonym="OGD1"
+ /EC_number="1.2.4.2"
+ /experiment="EXISTENCE:direct assay:GO:0005739
+ mitochondrion [PMID:16823961|PMID:24769239]"
+ /experiment="EXISTENCE:direct assay:GO:0042645
+ mitochondrial nucleoid [PMID:11926067|PMID:15692048]"
+ /experiment="EXISTENCE:direct assay:GO:0045252
+ oxoglutarate dehydrogenase complex [PMID:2072900]"
+ /note="Subunit of the mitochondrial alpha-ketoglutarate
+ dehydrogenase complex; catalyzes a key step in the
+ tricarboxylic acid (TCA) cycle, the oxidative
+ decarboxylation of alpha-ketoglutarate to form
+ succinyl-CoA"
+ /codon_start=1
+ /product="alpha-ketoglutarate dehydrogenase KGD1"
+ /protein_id="NP_012141.1"
+ /db_xref="GeneID:854681"
+ /db_xref="SGD:S000001387"
+ /translation="MLRFVSSQTCRYSSRGLLKTSLLKNASTVKIVGRGLATTGTDNF
+ LSTSNATYIDEMYQAWQKDPSSVHVSWDAYFKNMSNPKIPATKAFQAPPSISNFPQGT
+ EAAPLGTAMTGSVDENVSIHLKVQLLCRAYQVRGHLKAHIDPLGISFGSNKNNPVPPE
+ LTLDYYGFSKHDLDKEINLGPGILPRFARDGKSKMSLKEIVDHLEKLYCSSYGVQYTH
+ IPSKQKCDWLRERIEIPEPYQYTVDQKRQILDRLTWATSFESFLSTKFPNDKRFGLEG
+ LESVVPGIKTLVDRSVELGVEDIVLGMAHRGRLNVLSNVVRKPNESIFSEFKGSSARD
+ DIEGSGDVKYHLGMNYQRPTTSGKYVNLSLVANPSHLESQDPVVLGRTRALLHAKNDL
+ KEKTKALGVLLHGDAAFAGQGVVYETMGFLTLPEYSTGGTIHVITNNQIGFTTDPRFA
+ RSTPYPSDLAKAIDAPIFHVNANDVEAVTFIFNLAAEWRHKFHTDAIIDVVGWRKHGH
+ NETDQPSFTQPLMYKKIAKQKSVIDVYTEKLISEGTFSKKDIDEHKKWVWNLFEDAFE
+ KAKDYVPSQREWLTAAWEGFKSPKELATEILPHEPTNVPESTLKELGKVLSSWPEGFE
+ VHKNLKRILKNRGKSIETGEGIDWATGEALAFGTLVLDGQNVRVSGEDVERGTFSQRH
+ AVLHDQQSEAIYTPLSTLNNEKADFTIANSSLSEYGVMGFEYGYSLTSPDYLVMWEAQ
+ FGDFANTAQVIIDQFIAGGEQKWKQRSGLVLSLPHGYDGQGPEHSSGRLERFLQLANE
+ DPRYFPSEEKLQRQHQDCNFQVVYPTTPANLFHILRRQQHRQFRKPLALFFSKQLLRH
+ PLARSSLSEFTEGGFQWIIEDIEHGKSIGTKEETKRLVLLSGQVYTALHKRRESLGDK
+ TTAFLKIEQLHPFPFAQLRDSLNSYPNLEEIVWCQEEPLNMGSWAYTEPRLHTTLKET
+ DKYKDFKVRYCGRNPSGAVAAGSKSLHLAEEDAFLKDVFQQS"
+ gene <126204..>127097
+ /gene="AYR1"
+ /locus_tag="YIL124W"
+ /gene_synonym="GBG1"
+ /db_xref="GeneID:854682"
+ mRNA <126204..>127097
+ /gene="AYR1"
+ /locus_tag="YIL124W"
+ /gene_synonym="GBG1"
+ /product="acylglycerone-phosphate reductase"
+ /transcript_id="NM_001179472.3"
+ /db_xref="GeneID:854682"
+ CDS 126204..127097
+ /gene="AYR1"
+ /locus_tag="YIL124W"
+ /gene_synonym="GBG1"
+ /EC_number="1.1.1.101"
+ /EC_number="3.1.1.3"
+ /experiment="EXISTENCE:direct assay:GO:0004806
+ triglyceride lipase activity [PMID:24187129]"
+ /experiment="EXISTENCE:direct assay:GO:0005737 cytoplasm
+ [PMID:11914276]"
+ /experiment="EXISTENCE:direct assay:GO:0005739
+ mitochondrion [PMID:16823961|PMID:24769239|PMID:14576278]"
+ /experiment="EXISTENCE:direct assay:GO:0005741
+ mitochondrial outer membrane [PMID:16407407]"
+ /experiment="EXISTENCE:direct assay:GO:0005783 endoplasmic
+ reticulum [PMID:26928762|PMID:10617610]"
+ /experiment="EXISTENCE:direct assay:GO:0005811 lipid
+ droplet [PMID:10617610|PMID:24868093]"
+ /experiment="EXISTENCE:genetic interaction:GO:0004806
+ triglyceride lipase activity [PMID:24187129]"
+ /experiment="EXISTENCE:genetic interaction:GO:0019433
+ triglyceride catabolic process [PMID:24187129]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0000140
+ acylglycerone-phosphate reductase activity
+ [PMID:10617610]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0004806
+ triglyceride lipase activity [PMID:24187129]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0006654
+ phosphatidic acid biosynthetic process [PMID:10617610]"
+ /note="Bifunctional triacylglycerol lipase and 1-acyl DHAP
+ reductase; NADPH-dependent 1-acyl dihydroxyacetone
+ phosphate reductase involved in phosphatidic acid
+ biosynthesis; lipid droplet triacylglycerol lipase
+ involved in mobilization of non-polar lipids; found in
+ lipid particles, endoplasmic reticulum and mitochondrial
+ outer membrane; forms NADPH-regulated channel in
+ mitochondrial outer membrane; required for spore
+ germination; role in cell wall biosynthesis; capable of
+ metabolizing steroid hormones"
+ /codon_start=1
+ /product="acylglycerone-phosphate reductase"
+ /protein_id="NP_012142.3"
+ /db_xref="GeneID:854682"
+ /db_xref="SGD:S000001386"
+ /translation="MSELQSQPKKIAVVTGASGGIGYEVTKELARNGYLVYACARRLE
+ PMAQLAIQFGNDSIKPYKLDISKPEEIVTFSGFLRANLPDGKLDLLYNNAGQSCTFPA
+ LDATDAAVEQCFKVNVFGHINMCRELSEFLIKAKGTIVFTGSLAGVVSFPFGSIYSAS
+ KAAIHQYARGLHLEMKPFNVRVINAITGGVATDIADKRPLPETSIYNFPEGREAFNSR
+ KTMAKDNKPMPADAYAKQLVKDILSTSDPVDVYRGTFANIMRFVMIFVPYWLLEKGLS
+ KKFKLDKVNNALKSKQKNKDD"
+ gene <128151..>129581
+ /gene="SIM1"
+ /locus_tag="YIL123W"
+ /db_xref="GeneID:854683"
+ mRNA <128151..>129581
+ /gene="SIM1"
+ /locus_tag="YIL123W"
+ /product="putative glucosidase SIM1"
+ /transcript_id="NM_001179471.2"
+ /db_xref="GeneID:854683"
+ CDS 128151..129581
+ /gene="SIM1"
+ /locus_tag="YIL123W"
+ /experiment="EXISTENCE:direct assay:GO:0005783 endoplasmic
+ reticulum [PMID:26928762]"
+ /experiment="EXISTENCE:direct assay:GO:0009277 fungal-type
+ cell wall [PMID:11958935]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0031505
+ fungal-type cell wall organization [PMID:15972461]"
+ /note="Protein of the SUN family (Sim1p, Uth1p, Nca3p,
+ Sun4p); may participate in DNA replication; promoter
+ contains SCB regulation box at -300 bp indicating that
+ expression may be cell cycle-regulated; SIM1 has a
+ paralog, SUN4, that arose from the whole genome
+ duplication"
+ /codon_start=1
+ /product="putative glucosidase SIM1"
+ /protein_id="NP_012143.2"
+ /db_xref="GeneID:854683"
+ /db_xref="SGD:S000001385"
+ /translation="MKFSTAVTTLISSGAIVSALPHVDVHQEDAHQHKRAVAYKYVYE
+ TVVVDSDGHTVTPAASEVATAATSAIITTSVLAPTSSAAAADSSASIAVSSAALAKNE
+ KISDAAASATASTSQGASSSSSSSSATSTLESSSVSSSSEEAAPTSTVVSTSSATQSS
+ ASSATKSSTSSTSPSTSTSTSTSSTSSSSSSSSSSSSSSSGSGSIYGDLADFSGPSEK
+ FQDGTIPCDKFPSGQGVISIDWIGEGGWSGVENTDTSTGGSCKEGSYCSYSCQPGMSK
+ TQWPSDQPSDGRSVGGLLCKNGYLYRSNTDADYLCEWGVEAAYVVSKLSKGVAICRTD
+ YPGTENMVIPTYVEGGSSLPLTVVDQDTYFTWEGKKTSAQYYVNNAGVSVEDGCIWGT
+ SGSGIGNWAPLNFGAGSTGGVTYLSLIPNPNNSDALNYNVKIVAADDSSNVIGECVYE
+ NGEFSGGADGCTVSVTSGKAHFVLYN"
+ gene <130610..>131665
+ /gene="POG1"
+ /locus_tag="YIL122W"
+ /db_xref="GeneID:854684"
+ mRNA <130610..>131665
+ /gene="POG1"
+ /locus_tag="YIL122W"
+ /product="Pog1p"
+ /transcript_id="NM_001179470.1"
+ /db_xref="GeneID:854684"
+ CDS 130610..131665
+ /gene="POG1"
+ /locus_tag="YIL122W"
+ /experiment="EXISTENCE:direct assay:GO:0000785 chromatin
+ [PMID:12464632]"
+ /experiment="EXISTENCE:direct assay:GO:0000978 RNA
+ polymerase II cis-regulatory region sequence-specific DNA
+ binding [PMID:22959267]"
+ /experiment="EXISTENCE:direct assay:GO:0005634 nucleus
+ [PMID:11914276]"
+ /experiment="EXISTENCE:direct assay:GO:0030435 sporulation
+ resulting in formation of a cellular spore
+ [PMID:22959267]"
+ /experiment="EXISTENCE:genetic interaction:GO:0000321
+ re-entry into mitotic cell cycle after pheromone arrest
+ [PMID:9927449]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0000321
+ re-entry into mitotic cell cycle after pheromone arrest
+ [PMID:9927449]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0030435
+ sporulation resulting in formation of a cellular spore
+ [PMID:22959267]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0045944
+ positive regulation of transcription by RNA polymerase II
+ [PMID:22959267|PMID:9927449]"
+ /note="DNA-binding transcriptional activator; involved in
+ cell cycle regulation; overexpression promotes recovery
+ from pheromone induced arrest via CLN1/2 transcription,
+ induction of of IME1 during sporulation, and suppression
+ of stress sensitivity resulting from mutation of the E3
+ ubiquitin ligase Rsp5p; binds upstream of BAR1 and cell
+ cycle-related genes; phosphorylated form may be
+ ubiquitinated by Dma2p; potential Cdc28p substrate;
+ regulated by Swi4/6 cell-cycle box binding factor (SBF)"
+ /codon_start=1
+ /product="Pog1p"
+ /protein_id="NP_012144.1"
+ /db_xref="GeneID:854684"
+ /db_xref="SGD:S000001384"
+ /translation="MKQEPHRQSEEKEKPKGPMAVEREQHTSLSSGTTVTASTGDEST
+ NSRPVESSQTEKSLSLRIRILKQLGFDDIQELNACDTGLVEQFLNVRLINDTKELEKI
+ RESNLAKLNQIIDKCMESDKISDSTLNKILDMSMNRDTNNDNNNHLTIPSPITTKKRK
+ INASELASPRGHRRYRSDIPTVSEVETGVGYPQIHQQPGAYTLPMPANQWMSNPYMQP
+ PQPQVQQIMPQYLYPPGMGPQAQLPTMSSNSESQTPVMSSQFLSLNQHGLYQQNIGAH
+ PVMSMGPQANIYGQQHQLQPGQERDQSRKSFSHRRSQSANISMANFRSPMRNPQPASS
+ QRPVNFLIHTPKHPPPT"
+ gene <132244..>133872
+ /gene="QDR2"
+ /locus_tag="YIL121W"
+ /db_xref="GeneID:854685"
+ mRNA <132244..>133872
+ /gene="QDR2"
+ /locus_tag="YIL121W"
+ /product="cation transporter"
+ /transcript_id="NM_001179469.1"
+ /db_xref="GeneID:854685"
+ CDS 132244..133872
+ /gene="QDR2"
+ /locus_tag="YIL121W"
+ /experiment="EXISTENCE:direct assay:GO:0005886 plasma
+ membrane [PMID:34887841|PMID:15215105]"
+ /experiment="EXISTENCE:direct assay:GO:0071944 cell
+ periphery [PMID:26928762]"
+ /experiment="EXISTENCE:genetic interaction:GO:1990573
+ potassium ion import across plasma membrane
+ [PMID:17189489]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0008324
+ monoatomic cation transmembrane transporter activity
+ [PMID:23106982]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0015565
+ threonine efflux transmembrane transporter activity
+ [PMID:34887841]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0032973 amino
+ acid export across plasma membrane [PMID:34887841]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0042910
+ xenobiotic transmembrane transporter activity
+ [PMID:15215105|PMID:15649438]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0055085
+ transmembrane transport [PMID:15649438|PMID:15215105]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0060003 copper
+ ion export [PMID:23106982]"
+ /experiment="EXISTENCE:mutant phenotype:GO:1990573
+ potassium ion import across plasma membrane
+ [PMID:17189489]"
+ /note="Plasma membrane transporter of the major
+ facilitator superfamily; member of the 12-spanner
+ drug:H(+) antiporter DHA1 family; exports copper; has
+ broad substrate specificity and can transport many mono-
+ and divalent cations; transports a variety of drugs and is
+ required for resistance to quinidine, barban, cisplatin,
+ and bleomycin; contributes to potassium homeostasis;
+ expression is regulated by copper"
+ /codon_start=1
+ /product="cation transporter"
+ /protein_id="NP_012145.1"
+ /db_xref="GeneID:854685"
+ /db_xref="SGD:S000001383"
+ /translation="MAGATSSIIRENDFEDELAESMQSYNRETADKLALTRTESVKPE
+ PEITAPPHSRFSRSFKTVLIAQCAFTGFFSTIAGAIYYPVLSVIERKFDIDEELVNVT
+ VVVYFVFQGLAPTFMGGFADSLGRRPVVLVAIVIYFGACIGLACAQTYAQIIVLRCLQ
+ AAGISPVIAINSGIMGDVTTRAERGGYVGYVAGFQVLGSAFGALIGAGLSSRWGWRAI
+ FWFLAIGSGICFLASFLILPETKRNISGNGSVTPKSYLNRAPILVLPTVRKSLHLDNP
+ DYETLELPTQLNLLAPFKILKAYEICILMLVAGLQFAMYTTHLTALSTALSKQYHLTV
+ AKVGLCYLPSGICTLCSIVIAGRYLNWNYRRRLKYYQNWLGKKRSKLLEEHDNDLNLV
+ QRIIENDPKYTFNIFKARLQPAFVTLLLSSSGFCAYGWCITVKAPLAAVLCMSGFASL
+ FSNCILTFSTTLIVDLFPTKTSTATGCLNLFRCILSAVFIAALSKMVEKMKFGGVFTF
+ LGALTSSSSILLFILLRKGKELAFKRKKQELGVN"
+ gene <134417..>136108
+ /gene="QDR1"
+ /locus_tag="YIL120W"
+ /db_xref="GeneID:854686"
+ mRNA <134417..>136108
+ /gene="QDR1"
+ /locus_tag="YIL120W"
+ /product="multidrug transporter"
+ /transcript_id="NM_001179468.1"
+ /db_xref="GeneID:854686"
+ CDS 134417..136108
+ /gene="QDR1"
+ /locus_tag="YIL120W"
+ /experiment="EXISTENCE:direct assay:GO:0005886 plasma
+ membrane [PMID:11302822]"
+ /experiment="EXISTENCE:direct assay:GO:0071944 cell
+ periphery [PMID:26928762]"
+ /experiment="EXISTENCE:genetic interaction:GO:0030476
+ ascospore wall assembly [PMID:23966878]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0015562 efflux
+ transmembrane transporter activity [PMID:15649438]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0055085
+ transmembrane transport [PMID:15649438]"
+ /note="Multidrug transporter of the major facilitator
+ superfamily; member of the 12-spanner drug:H(+) antiporter
+ DHA1 family; involved in spore wall assembly; sequence
+ similarity to DTR1 and QDR3, and the triple mutant dtr1
+ qdr1 qdr3 exhibits reduced dityrosine fluorescence
+ relative to the single mutants; required for resistance to
+ quinidine, ketoconazole, fluconazole, and barban; QDR1 has
+ a paralog, AQR1, that arose from the whole genome
+ duplication"
+ /codon_start=1
+ /product="multidrug transporter"
+ /protein_id="NP_012146.1"
+ /db_xref="GeneID:854686"
+ /db_xref="SGD:S000001382"
+ /translation="MTKQQTSVMRNASIAKEEREGSDNNNVDRSSSDAISDNDAERSN
+ SHSEIDNESNFDMVPYSRFSHKQKMLLVVQCAFTGFFSTVAGSIYYPVLTIIERKFNI
+ TEELANVTIVVYFIFQGVAPSIMGGLADTFGRRPIVLWAILAYFCACIGLACAHNYAQ
+ ILALRCLQAAGISPVIAINSGIMGDVTTKVERGGYVGLVAGFQVVGTAFGALIGAGLS
+ SKWGWRAIFWFLAIGSGICLVFSTLLMPETKRTLVGNGSVTPRSFLNRSLILHVGSVK
+ KTLHLDDPDPETLEPRTSVDFLAPLKILHIREIDILLSIAGLQFSTWTTHQTALTIVL
+ SKKYNLSVAKIGLCFLPAGISTLTSIISAGRYLNWSYRTRKVKYNRWIKEQELQLMEK
+ YKGDKNKVAELIHSNSHYAFNLVEARLHPAFVTLLLSSIGFTAFGWCISVKTPLAAVL
+ CTSAFASLFSNCILTFSTTLIVDLFPSKASTATGCLNLFRCLLSAIFIAALTKMVEKM
+ RYGGVFTFLSAITSSSSLLLFYLLKNGKQLSFDRIRANDKSAGRSVGKNSEKVST"
+ rep_origin 136098..136338
+ /note="ARS911; Autonomously Replicating Sequence"
+ /db_xref="SGD:S000118395"
+ gene complement(<136654..>137877)
+ /gene="RPI1"
+ /locus_tag="YIL119C"
+ /db_xref="GeneID:854687"
+ mRNA complement(<136654..>137877)
+ /gene="RPI1"
+ /locus_tag="YIL119C"
+ /product="Rpi1p"
+ /transcript_id="NM_001179467.3"
+ /db_xref="GeneID:854687"
+ CDS complement(136654..137877)
+ /gene="RPI1"
+ /locus_tag="YIL119C"
+ /experiment="EXISTENCE:direct assay:GO:0005634 nucleus
+ [PMID:12455971]"
+ /experiment="EXISTENCE:genetic interaction:GO:0009272
+ fungal-type cell wall biogenesis [PMID:12455971]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0007265 Ras
+ protein signal transduction [PMID:1649384]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0009272
+ fungal-type cell wall biogenesis [PMID:12455971]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0030695 GTPase
+ regulator activity [PMID:1649384]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0045944
+ positive regulation of transcription by RNA polymerase II
+ [PMID:12455971]"
+ /note="Transcription factor, allelic differences between
+ S288C and Sigma1278b; mediates fermentation stress
+ tolerance by modulating cell wall integrity;
+ overexpression suppresses heat shock sensitivity of
+ wild-type RAS2 overexpression and also suppresses cell
+ lysis defect of mpk1 mutation; allele from S288c can
+ confer fMAPK pathway independent transcription of FLO11;
+ S288C and Sigma1278b alleles differ in number of tandem
+ repeats within ORF"
+ /codon_start=1
+ /product="Rpi1p"
+ /protein_id="NP_012147.3"
+ /db_xref="GeneID:854687"
+ /db_xref="SGD:S000001381"
+ /translation="MYLEYLQPKLNLMDESSTISKNFPDYSPNLNTPITSNFNEETGS
+ DCSLVTPRIISSSNSNSNSNSNSNSNSNSGSIDENELNNSNSSSSSARQIRKKWKEPE
+ DIAFITTIMNNSQLLTFVEYFKPMKNFWKKISKILFQQYGYERNSRQCHDRFKVLYTK
+ SLKVHPSKKSKQKKKKSKQEAGSNLNFDPSKLSRMQYLLVQLQNTFSFVNGNIILKSQ
+ KTLKPNKNGTNDNINNHYYNNCNNNNNNINNSNNSNNNNSNNINRNSNHSTNVFSTPE
+ HIQSSINLDKLESLPALDTKGEPSFISPAQFSLLSSAPADNLILQTPPSPFFQQTMPI
+ QLPRDAQQEQISPVFSTDVIYMWQTMFNTIENLKEQVNCLKNEVKQLNHKFYQQNKPL
+ HNMSTSDSENFMQQH"
+ gene <139752..>140447
+ /gene="RHO3"
+ /locus_tag="YIL118W"
+ /db_xref="GeneID:854688"
+ mRNA <139752..>140447
+ /gene="RHO3"
+ /locus_tag="YIL118W"
+ /product="Rho family GTPase RHO3"
+ /transcript_id="NM_001179466.1"
+ /db_xref="GeneID:854688"
+ CDS 139752..140447
+ /gene="RHO3"
+ /locus_tag="YIL118W"
+ /experiment="EXISTENCE:direct assay:GO:0003924 GTPase
+ activity [PMID:10526184]"
+ /experiment="EXISTENCE:direct assay:GO:0005525 GTP binding
+ [PMID:10207081]"
+ /experiment="EXISTENCE:direct assay:GO:0005829 cytosol
+ [PMID:10207081]"
+ /experiment="EXISTENCE:direct assay:GO:0005886 plasma
+ membrane [PMID:16622836]"
+ /experiment="EXISTENCE:direct assay:GO:0005933 cellular
+ bud [PMID:10207081]"
+ /experiment="EXISTENCE:genetic interaction:GO:0045921
+ positive regulation of exocytosis
+ [PMID:8852836|PMID:19955214]"
+ /experiment="EXISTENCE:genetic interaction:GO:0090338
+ positive regulation of formin-nucleated actin cable
+ assembly [PMID:12810699]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0030950
+ establishment or maintenance of actin cytoskeleton
+ polarity [PMID:10588647|PMID:8852836]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0045921
+ positive regulation of exocytosis [PMID:10588647]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0090338
+ positive regulation of formin-nucleated actin cable
+ assembly [PMID:12810699]"
+ /experiment="EXISTENCE:physical interaction:GO:0045921
+ positive regulation of exocytosis [PMID:19955214]"
+ /note="Non-essential small GTPase of the Rho/Rac family of
+ Ras-like proteins; involved in the establishment of cell
+ polarity; GTPase activity positively regulated by the
+ GTPase activating protein (GAP) Rgd1p"
+ /codon_start=1
+ /product="Rho family GTPase RHO3"
+ /protein_id="NP_012148.1"
+ /db_xref="GeneID:854688"
+ /db_xref="SGD:S000001380"
+ /translation="MSFLCGSASTSNKPIERKIVILGDGACGKTSLLNVFTRGYFPEV
+ YEPTVFENYIHDIFVDSKHITLSLWDTAGQEEFDRLRSLSYSDTQCIMLCFSIDSRDS
+ LENVQNKWVGEITDHCEGVKLVLVALKCDLRNNENESNAITPNNIQQDNSVSNDNGNN
+ INSTSNGKNLISYEEGLAMAKKIGALRYLECSAKLNKGVNEAFTEAARVALTAGPVAT
+ EVKSDSGSSCTIM"
+ gene complement(<140613..>141569)
+ /gene="PRM5"
+ /locus_tag="YIL117C"
+ /db_xref="GeneID:854689"
+ mRNA complement(<140613..>141569)
+ /gene="PRM5"
+ /locus_tag="YIL117C"
+ /product="pheromone-regulated protein PRM5"
+ /transcript_id="NM_001179465.1"
+ /db_xref="GeneID:854689"
+ CDS complement(140613..141569)
+ /gene="PRM5"
+ /locus_tag="YIL117C"
+ /note="Pheromone-regulated protein, predicted to have 1
+ transmembrane segment; induced during cell integrity
+ signaling; PRM5 has a paralog, YNL058C, that arose from
+ the whole genome duplication"
+ /codon_start=1
+ /product="pheromone-regulated protein PRM5"
+ /protein_id="NP_012149.1"
+ /db_xref="GeneID:854689"
+ /db_xref="SGD:S000001379"
+ /translation="MTVITIAKRGLPKLTTSTSSTTTASSSSTITSVASSSSSLPLLS
+ NSTSSSIIPSITPPSRNGNPYILDSGDMPNGTVFIVVGGIAGVIFLAILLWWVITTYS
+ SHRLTRSVQDYESKMFSTQHTQFYGDSPYMDYPAKENFQDQVHISESDISPGNKDESV
+ KDALVSHTNNEKPFLSNFERPLFSLASESNRNSLFISPTGDILYKTRLSKLYQESPRL
+ LQKPVIMTSDNVSTNSLVSTISSSSASSLDNGNEKEVGEDIRKPAKIASSPSRKLLNS
+ PESDGSVNRNHSKGNLLVVQSKRKPTPSTYLEHMLEGKEQDE"
+ gene <142928..>144085
+ /gene="HIS5"
+ /locus_tag="YIL116W"
+ /db_xref="GeneID:854690"
+ mRNA <142928..>144085
+ /gene="HIS5"
+ /locus_tag="YIL116W"
+ /product="histidinol-phosphate transaminase"
+ /transcript_id="NM_001179464.1"
+ /db_xref="GeneID:854690"
+ CDS 142928..144085
+ /gene="HIS5"
+ /locus_tag="YIL116W"
+ /EC_number="2.6.1.9"
+ /experiment="EXISTENCE:direct assay:GO:0005737 cytoplasm
+ [PMID:14562095]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0000105
+ histidine biosynthetic process [PMID:14190241]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0004400
+ histidinol-phosphate transaminase activity
+ [PMID:14190241]"
+ /note="Histidinol-phosphate aminotransferase; catalyzes
+ the seventh step in histidine biosynthesis; responsive to
+ general control of amino acid biosynthesis; mutations
+ cause histidine auxotrophy and sensitivity to Cu, Co, and
+ Ni salts"
+ /codon_start=1
+ /product="histidinol-phosphate transaminase"
+ /protein_id="NP_012150.1"
+ /db_xref="GeneID:854690"
+ /db_xref="SGD:S000001378"
+ /translation="MVFDLKRIVRPKIYNLEPYRCARDDFTEGILLDANENAHGPTPV
+ ELSKTNLHRYPDPHQLEFKTAMTKYRNKTSSYANDPEVKPLTADNLCLGVGSDESIDA
+ IIRACCVPGKEKILVLPPTYSMYSVCANINDIEVVQCPLTVSDGSFQMDTEAVLTILK
+ NDSLIKLMFVTSPGNPTGAKIKTSLIEKVLQNWDNGLVVVDEAYVDFCGGSTAPLVTK
+ YPNLVTLQTLSKSFGLAGIRLGMTYATAELARILNAMKAPYNISSLASEYALKAVQDS
+ NLKKMEATSKIINEEKMRLLKELTALDYVDDQYVGGLDANFLLIRINGGDNVLAKKLY
+ YQLATQSGVVVRFRGNELGCSGCLRITVGTHEENTHLIKYFKETLYKLANE"
+ gene complement(<144327..>148709)
+ /gene="NUP159"
+ /locus_tag="YIL115C"
+ /gene_synonym="NUP158; RAT7"
+ /db_xref="GeneID:854691"
+ mRNA complement(<144327..>148709)
+ /gene="NUP159"
+ /locus_tag="YIL115C"
+ /gene_synonym="NUP158; RAT7"
+ /product="FG-nucleoporin NUP159"
+ /transcript_id="NM_001179463.1"
+ /db_xref="GeneID:854691"
+ CDS complement(144327..148709)
+ /gene="NUP159"
+ /locus_tag="YIL115C"
+ /gene_synonym="NUP158; RAT7"
+ /experiment="EXISTENCE:direct assay:GO:0000774
+ adenyl-nucleotide exchange factor activity
+ [PMID:21576266]"
+ /experiment="EXISTENCE:direct assay:GO:0005643 nuclear
+ pore [PMID:7642562|PMID:7744966|PMID:10684247]"
+ /experiment="EXISTENCE:direct assay:GO:0044613 nuclear
+ pore central transport channel [PMID:18046406]"
+ /experiment="EXISTENCE:direct assay:GO:0044614 nuclear
+ pore cytoplasmic filaments [PMID:14960378|PMID:10684247]"
+ /experiment="EXISTENCE:genetic interaction:GO:0006607
+ NLS-bearing protein import into nucleus [PMID:15039779]"
+ /experiment="EXISTENCE:genetic interaction:GO:0016973
+ poly(A)+ mRNA export from nucleus [PMID:10952996]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0000055
+ ribosomal large subunit export from nucleus
+ [PMID:11739405|PMID:11071906]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0000056
+ ribosomal small subunit export from nucleus
+ [PMID:11739405]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0000774
+ adenyl-nucleotide exchange factor activity
+ [PMID:21576266]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0006405 RNA
+ export from nucleus [PMID:11352936]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0006611 protein
+ export from nucleus [PMID:17347149]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0016973
+ poly(A)+ mRNA export from nucleus
+ [PMID:9802895|PMID:27385342|PMID:9359887|PMID:7744966|PMID
+ :15574330]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0051664 nuclear
+ pore localization [PMID:9359887|PMID:7744966]"
+ /experiment="EXISTENCE:physical interaction:GO:0017056
+ structural constituent of nuclear pore [PMID:17418788]"
+ /note="FG-nucleoporin component of central core of the
+ nuclear pore complex; also part of the nuclear pore
+ complex (NPC) cytoplasmic filaments; contributes directly
+ to nucleocytoplasmic transport; regulates ADP release from
+ the ATP-dependent RNA helicase Dbp5p; forms a stable
+ association with Nup82p, Gle2p and two other
+ FG-nucleoporins (Nsp1p and Nup116p)"
+ /codon_start=1
+ /product="FG-nucleoporin NUP159"
+ /protein_id="NP_012151.1"
+ /db_xref="GeneID:854691"
+ /db_xref="SGD:S000001377"
+ /translation="MSSLKDEVPTETSEDFGFKFLGQKQILPSFNEKLPFASLQNLDI
+ SNSKSLFVAASGSKAVVGELQLLRDHITSDSTPLTFKWEKEIPDVIFVCFHGDQVLVS
+ TRNALYSLDLEELSEFRTVTSFEKPVFQLKNVNNTLVILNSVNDLSALDLRTKSTKQL
+ AQNVTSFDVTNSQLAVLLKDRSFQSFAWRNGEMEKQFEFSLPSELEELPVEEYSPLSV
+ TILSPQDFLAVFGNVISETDDEVSYDQKMYIIKHIDGSASFQETFDITPPFGQIVRFP
+ YMYKVTLSGLIEPDANVNVLASSCSSEVSIWDSKQVIEPSQDSERAVLPISEETDKDT
+ NPIGVAVDVVTSGTILEPCSGVDTIERLPLVYILNNEGSLQIVGLFHVAAIKSGHYSI
+ NLESLEHEKSLSPTSEKIPIAGQEQEEKKKNNESSKALSENPFTSANTSGFTFLKTQP
+ AAANSLQSQSSSTFGAPSFGSSAFKIDLPSVSSTSTGVASSEQDATDPASAKPVFGKP
+ AFGAIAKEPSTSEYAFGKPSFGAPSFGSGKSSVESPASGSAFGKPSFGTPSFGSGNSS
+ VEPPASGSAFGKPSFGTPSFGSGNSSAEPPASGSAFGKPSFGTSAFGTASSNETNSGS
+ IFGKAAFGSSSFAPANNELFGSNFTISKPTVDSPKEVDSTSPFPSSGDQSEDESKSDV
+ DSSSTPFGTKPNTSTKPKTNAFDFGSSSFGSGFSKALESVGSDTTFKFGTQASPFSSQ
+ LGNKSPFSSFTKDDTENGSLSKGSTSEINDDNEEHESNGPNVSGNDLTDSTVEQTSST
+ RLPETPSDEDGEVVEEEAQKSPIGKLTETIKKSANIDMAGLKNPVFGNHVKAKSESPF
+ SAFATNITKPSSTTPAFSFGNSTMNKSNTSTVSPMEEADTKETSEKGPITLKSVENPF
+ LPAKEERTGESSKKDHNDDPKDGYVSGSEISVRTSESAFDTTANEEIPKSQDVNNHEK
+ SETDPKYSQHAVVDHDNKSKEMNETSKNNERSGQPNHGVQGDGIALKKDNEKENFDSN
+ MAIKQFEDHQSSEEDASEKDSRQSSEVKESDDNMSLNSDRDESISESYDKLEDINTDE
+ LPHGGEAFKAREVSASADFDVQTSLEDNYAESGIQTDLSESSKENEVQTDAIPVKHNS
+ TQTVKKEAVDNGLQTEPVETCNFSVQTFEGDENYLAEQCKPKQLKEYYTSAKVSNIPF
+ VSQNSTLRLIESTFQTVEAEFTVLMENIRNMDTFFTDQSSIPLVKRTVRSINNLYTWR
+ IPEAEILLNIQNNIKCEQMQITNANIQDLKEKVTDYVRKDIAQITEDVANAKEEYLFL
+ MHFDDASSGYVKDLSTHQFRMQKTLRQKLFDVSAKINHTEELLNILKLFTVKNKRLDD
+ NPLVAKLAKESLARDGLLKEIKLLREQVSRLQLEEKGKKASSFDASSSITKDMKGFKV
+ VEVGLAMNTKKQIGDFFKNLNMAK"
+ gene complement(<149143..>149988)
+ /gene="POR2"
+ /locus_tag="YIL114C"
+ /db_xref="GeneID:854692"
+ mRNA complement(<149143..>149988)
+ /gene="POR2"
+ /locus_tag="YIL114C"
+ /product="putative porin POR2"
+ /transcript_id="NM_001179462.3"
+ /db_xref="GeneID:854692"
+ CDS complement(149143..149988)
+ /gene="POR2"
+ /locus_tag="YIL114C"
+ /experiment="EXISTENCE:direct assay:GO:0005739
+ mitochondrion [PMID:11914276|PMID:14576278|PMID:16823961]"
+ /experiment="EXISTENCE:direct assay:GO:0005741
+ mitochondrial outer membrane [PMID:9315631|PMID:16407407]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0045454 cell
+ redox homeostasis [PMID:18768136]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0051027 DNA
+ transport [PMID:19056337]"
+ /note="Putative mitochondrial porin (voltage-dependent
+ anion channel); not required for mitochondrial membrane
+ permeability or mitochondrial osmotic stability; POR2 has
+ a paralog, POR1, that arose from the whole genome
+ duplication"
+ /codon_start=1
+ /product="putative porin POR2"
+ /protein_id="NP_012152.3"
+ /db_xref="GeneID:854692"
+ /db_xref="SGD:S000001376"
+ /translation="MALRFFNDISRDVNGLFNRDFFHTNPLSLNISTTTENGVNFTLK
+ AKQGVTEGPIQTSVEGRFYDRKEGVSLSQSWSNQNRLNTRIEFSKIAPGWKGDVNAFL
+ TPQSIKNAKFNLSYAQKSFAARTSIDILQPKDFVGSVTLGHRGFVGGTDIAYDTAAGL
+ CARYAMSIGYLAREYSFILSTNNRQCATASFFQNVNRYLQVGTKATLQSKTSSNMNIE
+ FVTRYVPDSISQVKAKIADSGLTTLSYKRNLNKDISLGVGMSFNALQLTEPVHKFGWS
+ LSFSP"
+ gene <150559..>151188
+ /gene="SDP1"
+ /locus_tag="YIL113W"
+ /db_xref="GeneID:854693"
+ mRNA <150559..>151188
+ /gene="SDP1"
+ /locus_tag="YIL113W"
+ /product="mitogen-activated protein kinase tyrosine
+ protein phosphatase SDP1"
+ /transcript_id="NM_001179461.1"
+ /db_xref="GeneID:854693"
+ CDS 150559..151188
+ /gene="SDP1"
+ /locus_tag="YIL113W"
+ /EC_number="3.1.3.48"
+ /experiment="EXISTENCE:direct assay:GO:0005634 nucleus
+ [PMID:11923319]"
+ /experiment="EXISTENCE:direct assay:GO:0005737 cytoplasm
+ [PMID:11923319]"
+ /experiment="EXISTENCE:direct assay:GO:0017017 MAP kinase
+ tyrosine/serine/threonine phosphatase activity
+ [PMID:12220658]"
+ /experiment="EXISTENCE:direct assay:GO:0033550 MAP kinase
+ tyrosine phosphatase activity
+ [PMID:11923319|PMID:17495930]"
+ /experiment="EXISTENCE:genetic interaction:GO:0000196 cell
+ wall integrity MAPK cascade [PMID:11923319]"
+ /note="Stress-inducible dual-specificity MAP kinase
+ phosphatase; negatively regulates Slt2p MAP kinase by
+ direct dephosphorylation, diffuse localization under
+ normal conditions shifts to punctate localization after
+ heat shock; SDP1 has a paralog, MSG5, that arose from the
+ whole genome duplication"
+ /codon_start=1
+ /product="mitogen-activated protein kinase tyrosine
+ protein phosphatase SDP1"
+ /protein_id="NP_012153.1"
+ /db_xref="GeneID:854693"
+ /db_xref="SGD:S000001375"
+ /translation="MNIYTSPTRTPNIAPKSGQRPSLPMLATDERSTDKESPNEDREF
+ VPCSSLDVRRIYPKGPLLVLPEKIYLYSEPTVKELLPFDVVINVAEEANDLRMQVPAV
+ EYHHYRWEHDSQIALDLPSLTSIIHAATTKREKILIHCQCGLSRSATLIIAYIMKYHN
+ LSLRHSYDLLKSRADKINPSIGLIFQLMEWEVALNAKTNVQANSYRKVP"
+ gene <151595..>154846
+ /gene="HOS4"
+ /locus_tag="YIL112W"
+ /db_xref="GeneID:854694"
+ mRNA <151595..>154846
+ /gene="HOS4"
+ /locus_tag="YIL112W"
+ /product="Hos4p"
+ /transcript_id="NM_001179460.1"
+ /db_xref="GeneID:854694"
+ CDS 151595..154846
+ /gene="HOS4"
+ /locus_tag="YIL112W"
+ /experiment="EXISTENCE:direct assay:GO:0006338 chromatin
+ remodeling [PMID:11711434]"
+ /experiment="EXISTENCE:direct assay:GO:0034967 Set3
+ complex [PMID:11711434]"
+ /note="Subunit of the Set3 complex; complex is a
+ meiotic-specific repressor of sporulation specific genes
+ that contains deacetylase activity; potential Cdc28p
+ substrate"
+ /codon_start=1
+ /product="Hos4p"
+ /protein_id="NP_012154.1"
+ /db_xref="GeneID:854694"
+ /db_xref="SGD:S000001374"
+ /translation="MNETTTKQPLKKRSLSSYLSNVSTRREELEKISKQETSEEEDTA
+ GKHEQRETLSEEVSDKFPENVASFRSQTTSVHQATQNNLNAKESEDLAHKNDASSHEG
+ EVNGDSRPDDVPETNEKISQAIRAKISSSSSSPNVRNVDIQNHQPFSRDQLRAMLKEP
+ KRKTVDDFIEEEGLGAVEEEDLSDEVLEKNTTEPENVEKDIEYSDSDKDTDDVGSDDP
+ TAPNSPIKLGRRKLVRGDQLDATTSSMFNNESDSELSDIDDSKNIALSSSLFRGGSSP
+ VKETNNNLSNMNSSPAQNPKRGSVSRSNDSNKSSHIAVSKRPKQKKGIYRDSGGRTRL
+ QIACDKGKYDVVKKMIEEGGYDINDQDNAGNTALHEAALQGHIEIVELLIENGADVNI
+ KSIEMFGDTPLIDASANGHLDVVKYLLKNGADPTIRNAKGLTAFESVDDESEFDDEED
+ QKILREIKKRLSIAAKKWTNRAGIHNDKSKNGNNAHTIDQPPFDNTTKAKNEKAADSP
+ SMASNIDEKAPEEEFYWTDVTSRAGKEKLFKASKEGHLPYVGTYVENGGKIDLRSFFE
+ SVKCGHEDITSIFLAFGFPVNQTSRDNKTSALMVAVGRGHLGTVKLLLEAGADPTKRD
+ KKGRTALYYAKNSIMGITNSEEIQLIENAINNYLKKHSEDNNDDDDDDDNNNETYKHE
+ KKREKTQSPILASRRSATPRIEDEEDDTRMLNLADDDFNNDRDVKESTTSDSRKRLDD
+ NENVGTQYSLDWKKRKTNALQDEEKLKSISPLSMEPHSPKKAKSVEISKIHEETAAER
+ EARLKEEEEYRKKRLEKKRKKEQELLQKLAEDEKKRIEEQEKQKVLEMERLEKATLEK
+ ARKMEREKEMEEISYRRAVRDLYPLGLKIINFNDKLDYKRFLPLYYFVDEKNDKFVLD
+ LQVMILLKDIDLLSKDNQPTSEKIPVDPSHLTPLWNMLKFIFLYGGSYDDKKNNMENK
+ RYVVNFDGVDLDTKIGYELLEYKKFVSLPMAWIKWDNVVIENHAKRKEIEGNMIQISI
+ NEFARWRNDKLNKAQQPTRKQRSLKIPRELPVKFQHRMSISSVLQQTSKEPFW"
+ gene <155222..>155765
+ /gene="COX5B"
+ /locus_tag="YIL111W"
+ /db_xref="GeneID:854695"
+ mRNA join(<155222,155311..>155765)
+ /gene="COX5B"
+ /locus_tag="YIL111W"
+ /product="cytochrome c oxidase subunit Vb"
+ /transcript_id="NM_001179459.1"
+ /db_xref="GeneID:854695"
+ CDS join(155222,155311..155765)
+ /gene="COX5B"
+ /locus_tag="YIL111W"
+ /experiment="EXISTENCE:direct assay:GO:0005739
+ mitochondrion [PMID:16823961|PMID:24769239]"
+ /experiment="EXISTENCE:direct assay:GO:0005751
+ mitochondrial respiratory chain complex IV [PMID:2986105]"
+ /experiment="EXISTENCE:direct assay:GO:0006123
+ mitochondrial electron transport, cytochrome c to oxygen
+ [PMID:1331058]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0004129
+ cytochrome-c oxidase activity [PMID:2986105]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0050421 nitrite
+ reductase (NO-forming) activity [PMID:18388202]"
+ /note="Subunit Vb of cytochrome c oxidase; cytochrome c
+ oxidase is the terminal member of the mitochondrial inner
+ membrane electron transport chain; Cox5Bp is predominantly
+ expressed during anaerobic growth while its isoform Va
+ (Cox5Ap) is expressed during aerobic growth; COX5B has a
+ paralog, COX5A, that arose from the whole genome
+ duplication"
+ /codon_start=1
+ /product="cytochrome c oxidase subunit Vb"
+ /protein_id="NP_012155.1"
+ /db_xref="GeneID:854695"
+ /db_xref="SGD:S000001373"
+ /translation="MLRTSLTKGARLTGTRFVQTKALSKATLTDLPERWENMPNLEQK
+ EIADNLTERQKLPWKTLNNEEIKAAWYISYGEWGPRRPVHGKGDVAFITKGVFLGLGI
+ SFGLFGLVRLLANPETPKTMNREWQLKSDEYLKSKNANPWGGYSQVQSK"
+ gene <156045..>157178
+ /gene="HPM1"
+ /locus_tag="YIL110W"
+ /gene_synonym="MNI1"
+ /db_xref="GeneID:854696"
+ mRNA <156045..>157178
+ /gene="HPM1"
+ /locus_tag="YIL110W"
+ /gene_synonym="MNI1"
+ /product="protein-histidine N-methyltransferase"
+ /transcript_id="NM_001179458.1"
+ /db_xref="GeneID:854696"
+ CDS 156045..157178
+ /gene="HPM1"
+ /locus_tag="YIL110W"
+ /gene_synonym="MNI1"
+ /EC_number="2.1.1.85"
+ /experiment="EXISTENCE:direct assay:GO:0005634 nucleus
+ [PMID:14562095]"
+ /experiment="EXISTENCE:direct assay:GO:0005737 cytoplasm
+ [PMID:14562095]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0000027
+ ribosomal large subunit assembly
+ [PMID:26826131|PMID:24865971]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0018064
+ protein-L-histidine N-tele-methyltransferase activity
+ [PMID:20864530]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0042038
+ peptidyl-histidine methylation, to form
+ tele-methylhistidine [PMID:20864530]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0045903
+ positive regulation of translational fidelity
+ [PMID:24865971]"
+ /note="AdoMet-dependent methyltransferase; involved in a
+ novel 3-methylhistidine modification of multiple proteins,
+ including ribosomal protein Rpl3p; seven beta-strand MTase
+ family member; null mutant exhibits a weak vacuolar
+ protein sorting defect and caspofungin resistance"
+ /codon_start=1
+ /product="protein-histidine N-methyltransferase"
+ /protein_id="NP_012156.1"
+ /db_xref="GeneID:854696"
+ /db_xref="SGD:S000001372"
+ /translation="MSFSFGFTSNDFDDDELVAQPETFVESSKENENTTAYINPLDSD
+ FLSQAGVVQPNVEDLGTILESLKDVRLTFEEFQSPIYRKPLIKRELFDVKHQLMLETD
+ AQSNNNSTELDILLGDTSEDLRKNIYEGGLKSWECSYDLVDLLSENVDRISNDIDAVV
+ EIGCGTALPSEFLFRSALLRNDRSKGLKFVLTDYNASVLRLVTIPNLVITWAKTVLTK
+ EQWYALQKDECEDIPINNEELLLTSKLLAAFYDDVQSRNISVTLISGSWGRKFSNLIH
+ EVLSGSQKVLSLSSETIYQPDNLPVIAETILDIHNLPQTDVKTYVAAKDIYFGVGGSI
+ TEFEAYLDDKINSEHLPIHSERFKVNSGLKRSIICIETNKAIR"
+ gene complement(<157385..>160165)
+ /gene="SEC24"
+ /locus_tag="YIL109C"
+ /gene_synonym="ANU1"
+ /db_xref="GeneID:854697"
+ mRNA complement(<157385..>160165)
+ /gene="SEC24"
+ /locus_tag="YIL109C"
+ /gene_synonym="ANU1"
+ /product="COPII subunit SEC24"
+ /transcript_id="NM_001179457.3"
+ /db_xref="GeneID:854697"
+ CDS complement(157385..160165)
+ /gene="SEC24"
+ /locus_tag="YIL109C"
+ /gene_synonym="ANU1"
+ /experiment="EXISTENCE:direct assay:GO:0000329 fungal-type
+ vacuole membrane [PMID:26928762]"
+ /experiment="EXISTENCE:direct assay:GO:0030127 COPII
+ vesicle coat [PMID:8004676]"
+ /experiment="EXISTENCE:direct assay:GO:0090110
+ COPII-coated vesicle cargo loading
+ [PMID:12941277|PMID:14627716|PMID:15665868]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0005048 signal
+ sequence binding [PMID:12941277]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0016236
+ macroautophagy [PMID:11694599]"
+ /note="Component of the Sec23p-Sec24p heterodimer of the
+ COPII vesicle coat; required for cargo selection during
+ vesicle formation in ER to Golgi transport; homologous to
+ Sfb3p; SEC24 has a paralog, SFB2, that arose from the
+ whole genome duplication"
+ /codon_start=1
+ /product="COPII subunit SEC24"
+ /protein_id="NP_012157.3"
+ /db_xref="GeneID:854697"
+ /db_xref="SGD:S000001371"
+ /translation="MSHHKKRVYPQAQLQYGQNATPLQQPAQFMPPQDPAAAGMSYGQ
+ MGMPPQGAVPSMGQQQFLTPAQEQLHQQIDQATTSMNDMHLHNVPLVDPNAYMQPQVP
+ VQMGTPLQQQQQPMAAPAYGQPSAAMGQNMRPMNQLYPIDLLTELPPPITDLTLPPPP
+ LVIPPERMLVPSELSNASPDYIRSTLNAVPKNSSLLKKSKLPFGLVIRPYQHLYDDID
+ PPPLNEDGLIVRCRRCRSYMNPFVTFIEQGRRWRCNFCRLANDVPMQMDQSDPNDPKS
+ RYDRNEIKCAVMEYMAPKEYTLRQPPPATYCFLIDVSQSSIKSGLLATTINTLLQNLD
+ SIPNHDERTRISILCVDNAIHYFKIPLDSENNEESADQINMMDIADLEEPFLPRPNSM
+ VVSLKACRQNIETLLTKIPQIFQSNLITNFALGPALKSAYHLIGGVGGKIIVVSGTLP
+ NLGIGKLQRRNESGVVNTSKETAQLLSCQDSFYKNFTIDCSKVQITVDLFLASEDYMD
+ VASLSNLSRFTAGQTHFYPGFSGKNPNDIVKFSTEFAKHISMDFCMETVMRARGSTGL
+ RMSRFYGHFFNRSSDLCAFSTMPRDQSYLFEVNVDESIMADYCYVQVAVLLSLNNSQR
+ RIRIITLAMPTTESLAEVYASADQLAIASFYNSKAVEKALNSSLDDARVLINKSVQDI
+ LATYKKEIVVSNTAGGAPLRLCANLRMFPLLMHSLTKHMAFRSGIVPSDHRASALNNL
+ ESLPLKYLIKNIYPDVYSLHDMADEAGLPVQTEDGEATGTIVLPQPINATSSLFERYG
+ LYLIDNGNELFLWMGGDAVPALVFDVFGTQDIFDIPIGKQEIPVVENSEFNQRVRNII
+ NQLRNHDDVITYQSLYIVRGASLSEPVNHASAREVATLRLWASSTLVEDKILNNESYR
+ EFLQIMKARISK"
+ gene <160887..>162977
+ /locus_tag="YIL108W"
+ /db_xref="GeneID:854698"
+ mRNA <160887..>162977
+ /locus_tag="YIL108W"
+ /product="putative metalloendopeptidase"
+ /transcript_id="NM_001179456.1"
+ /db_xref="GeneID:854698"
+ CDS 160887..162977
+ /locus_tag="YIL108W"
+ /experiment="EXISTENCE:direct assay:GO:0005737 cytoplasm
+ [PMID:22842922|PMID:14562095]"
+ /note="Putative metalloendopeptidase; forms cytoplasmic
+ foci upon DNA replication stress"
+ /codon_start=1
+ /product="putative metalloendopeptidase"
+ /protein_id="NP_012158.1"
+ /db_xref="GeneID:854698"
+ /db_xref="SGD:S000001370"
+ /translation="MVGSKDIDLFNLRENEQIVSPCLIVHGKCNKQNGAKTVQVQHPQ
+ LPPITYPIHNQFFKATVILTPGENKLTFVTDTNTARTIVCYYTPLTQNPPVHLCLILA
+ KDSPLQFDSPREQKDREGGNGLELAIKKLRLGARLMQAYTNEQMLRNSMGNRTFPFVE
+ EFTWDTLFERPAMRNTIKIHVVRSEKTVKEIQDPDIAQQNSKGKNTGALFGIAMDALK
+ SYGGPFTNNEKPVQAACMFLDTHWDGKLIRGHAALGGGDDSIKLAIFGSHGLYSWPTC
+ LEQLVPYFTDETRSSTSEVANDCNECGTYWECLTITLGAFMHEIGHLLGCPHQESGVM
+ LRGYTTLNRSFLTKEAYSVRTNSTGASPPIFPKEECTWNRLDTVRFLYHPSFTLPQDY
+ YDPSFMRPTKLGGYPNIKHSVYPLGNGSCRILSPTGIYLIEIICDDLARGHIEYLPVS
+ LGGQGPQREVIVTLDDLRARLPKNELAKFGNTFKLKILSVNAPETEFDKFPSLLDVQP
+ LDMSKYGFSKNVQGIKSPLYGRSDGGNAVGVVAFDVRLVTAVRIYHGYALDGVRFYYK
+ EKPTGTKDAPASKPSVPPRNYFSKITHSIKNHASINEENLKSVLFGHETQNFTDATLE
+ PGEIIIGFNLRCGAWVDAIQIITSHGRMTDMFGNKDGGGFAELQPPNGQYILGVTGRV
+ GQWVDAFGIIYGAL"
+ gene complement(<163278..>165761)
+ /gene="PFK26"
+ /locus_tag="YIL107C"
+ /db_xref="GeneID:854699"
+ mRNA complement(<163278..>165761)
+ /gene="PFK26"
+ /locus_tag="YIL107C"
+ /product="6-phosphofructo-2-kinase"
+ /transcript_id="NM_001179455.1"
+ /db_xref="GeneID:854699"
+ CDS complement(163278..165761)
+ /gene="PFK26"
+ /locus_tag="YIL107C"
+ /EC_number="2.7.1.105"
+ /experiment="EXISTENCE:curator inference:GO:0005737
+ cytoplasm [PMID:1657152]"
+ /experiment="EXISTENCE:direct assay:GO:0003873
+ 6-phosphofructo-2-kinase activity [PMID:1322693]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0003873
+ 6-phosphofructo-2-kinase activity [PMID:1657152]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0006003
+ fructose 2,6-bisphosphate metabolic process
+ [PMID:1657152]"
+ /note="6-phosphofructo-2-kinase; inhibited by
+ phosphoenolpyruvate and sn-glycerol 3-phosphate; has
+ negligible fructose-2,6-bisphosphatase activity;
+ transcriptional regulation involves protein kinase A"
+ /codon_start=1
+ /product="6-phosphofructo-2-kinase"
+ /protein_id="NP_012159.1"
+ /db_xref="GeneID:854699"
+ /db_xref="SGD:S000001369"
+ /translation="MFKPVDFSETSPVPPDIDLAPTQSPHHVAPSQDSSYDLLSRSSD
+ DKIDAEKGPHDELSKHLPLFQKRPLSDTPISSNWNSPGITEENTPSDSPENSATNLKS
+ LHRLHINDETQLKNAKIPTNDTTDYMPPSDGANEVTRIDLKDIKSPTRHHKRRPTTID
+ VPGLTKSKTSPDGLISKEDSGSKLVIVMVGLPATGKSFITNKLSRFLNYSLYYCKVFN
+ VGNTRRKFAKEHGLKDQDSKFFEPKNADSTRLRDKWAMDTLDELLDYLLEGSGSVGIF
+ DATNTSRERRKNVLARIRKRSPHLKVLFLESVCSDHALVQKNIRLKLFGPDYKGKDPE
+ SSLKDFKSRLANYLKAYEPIEDDENLQYIKMIDVGKKVIAYNIQGFLASQTVYYLLNF
+ NLADRQIWITRSGESEDNVSGRIGGNSHLTPRGLRFAKSLPKFIARQREIFYQNLMQQ
+ KKNNENTDGNIYNDFFVWTSMRARTIGTAQYFNEDDYPIKQMKMLDELSAGDYDGMTY
+ PEIKNNFPEEFEKRQKDKLRYRYPGIGGESYMDVINRLRPVITELERIEDNVLIITHR
+ VVARALLGYFMNLSMGIIANLDVPLHCVYCLEPKPYGITWSLWEYDEASDSFSKVPQT
+ DLNTTRVKEVGLVYNERRYSVIPTAPPSARSSFASDFLSRKRSNPTSASSSQSELSEQ
+ PKNSVSAQTGSNNTTLIGSNFNIKNENGDSRIPLSAPLMATNTSNNILDGGGTSISIH
+ RPRVVPNQNNVNPLLANNNKAASNVPNVKKSAATPRQIFEIDKVDEKLSMLKNKSFLL
+ HGKDYPNNADNNDNEDIRAKTMNRSQSHV"
+ gene <166415..>167444
+ /gene="MOB1"
+ /locus_tag="YIL106W"
+ /db_xref="GeneID:854700"
+ mRNA join(<166415..166434,166520..>167444)
+ /gene="MOB1"
+ /locus_tag="YIL106W"
+ /product="Mob1p"
+ /transcript_id="NM_001179454.1"
+ /db_xref="GeneID:854700"
+ CDS join(166415..166434,166520..167444)
+ /gene="MOB1"
+ /locus_tag="YIL106W"
+ /experiment="EXISTENCE:direct assay:GO:0005737 cytoplasm
+ [PMID:22842922]"
+ /experiment="EXISTENCE:direct assay:GO:0005816 spindle
+ pole body [PMID:11434459]"
+ /experiment="EXISTENCE:direct assay:GO:0005935 cellular
+ bud neck [PMID:11564880|PMID:11434459]"
+ /experiment="EXISTENCE:direct assay:GO:0019207 kinase
+ regulator activity [PMID:11404483]"
+ /experiment="EXISTENCE:direct assay:GO:0034399 nuclear
+ periphery [PMID:22842922]"
+ /experiment="EXISTENCE:direct assay:GO:0034973 Sid2-Mob1
+ complex [PMID:9528782]"
+ /experiment="EXISTENCE:direct assay:GO:0044732 mitotic
+ spindle pole body [PMID:11564880]"
+ /experiment="EXISTENCE:genetic interaction:GO:0007096
+ regulation of exit from mitosis [PMID:9436989]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0000281 mitotic
+ cytokinesis [PMID:11564880]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0007096
+ regulation of exit from mitosis [PMID:9436989]"
+ /experiment="EXISTENCE:physical interaction:GO:0034973
+ Sid2-Mob1 complex [PMID:9528782]"
+ /note="Component of the mitotic exit network; associates
+ with and is required for the activation and
+ Cdc15p-dependent phosphorylation of the Dbf2p kinase;
+ required for cytokinesis and cell separation; component of
+ the CCR4 transcriptional complex; relocalizes from
+ cytoplasm to the nuclear periphery upon DNA replication
+ stress"
+ /codon_start=1
+ /product="Mob1p"
+ /protein_id="NP_012160.2"
+ /db_xref="GeneID:854700"
+ /db_xref="SGD:S000001368"
+ /translation="MSFLQNFHISPGQTIRSTRGFKWNTANAANNAGSVSPTKATPHN
+ NTINGNNNNANTINNRADFTNNPVNGYNESDHGRMSPVLTTPKRHAPPPEQLQNVTDF
+ NYTPSHQKPFLQPQAGTTVTTHQDIKQIVEMTLGSEGVLNQAVKLPRGEDENEWLAVH
+ CVDFYNQINMLYGSITEFCSPQTCPRMIATNEYEYLWAFQKGQPPVSVSAPKYVECLM
+ RWCQDQFDDESLFPSKVTGTFPEGFIQRVIQPILRRLFRVYAHIYCHHFNEILELNLQ
+ TVLNTSFRHFCLFAQEFELLRPADFGPLLELVMELRDR"
+ gene complement(<167581..>169641)
+ /gene="SLM1"
+ /locus_tag="YIL105C"
+ /gene_synonym="LIT2"
+ /db_xref="GeneID:854701"
+ mRNA complement(<167581..>169641)
+ /gene="SLM1"
+ /locus_tag="YIL105C"
+ /gene_synonym="LIT2"
+ /product="phosphatidylinositol 4,5-bisphosphate-binding
+ protein"
+ /transcript_id="NM_001179453.1"
+ /db_xref="GeneID:854701"
+ CDS complement(167581..169641)
+ /gene="SLM1"
+ /locus_tag="YIL105C"
+ /gene_synonym="LIT2"
+ /experiment="EXISTENCE:direct assay:GO:0005546
+ phosphatidylinositol-4,5-bisphosphate binding
+ [PMID:21119626|PMID:15689497]"
+ /experiment="EXISTENCE:direct assay:GO:0005737 cytoplasm
+ [PMID:14562095]"
+ /experiment="EXISTENCE:direct assay:GO:0005739
+ mitochondrion [PMID:16823961|PMID:14576278|PMID:24769239]"
+ /experiment="EXISTENCE:direct assay:GO:0005886 plasma
+ membrane
+ [PMID:16622836|PMID:15372071|PMID:15689497|PMID:17101780]"
+ /experiment="EXISTENCE:direct assay:GO:0046625
+ sphingolipid binding [PMID:21119626]"
+ /experiment="EXISTENCE:genetic interaction:GO:0001558
+ regulation of cell growth [PMID:15689497]"
+ /experiment="EXISTENCE:genetic interaction:GO:0016197
+ endosomal transport [PMID:21451250]"
+ /experiment="EXISTENCE:genetic interaction:GO:0030036
+ actin cytoskeleton organization
+ [PMID:15689497|PMID:15372071]"
+ /experiment="EXISTENCE:genetic interaction:GO:0051017
+ actin filament bundle assembly [PMID:15689497]"
+ /experiment="EXISTENCE:genetic interaction:GO:0070941
+ eisosome assembly [PMID:21451250]"
+ /experiment="EXISTENCE:genetic interaction:GO:0072659
+ protein localization to plasma membrane [PMID:22307609]"
+ /experiment="EXISTENCE:physical interaction:GO:0001558
+ regulation of cell growth [PMID:15689497]"
+ /experiment="EXISTENCE:physical interaction:GO:0030950
+ establishment or maintenance of actin cytoskeleton
+ polarity [PMID:15689497]"
+ /experiment="EXISTENCE:physical interaction:GO:0031929 TOR
+ signaling [PMID:15689497]"
+ /note="Phosphoinositide PI4,5P(2) binding protein, forms a
+ complex with Slm2p; acts downstream of Mss4p in a pathway
+ regulating actin cytoskeleton organization in response to
+ stress; TORC2 complex substrate and effector; protein
+ abundance increases in response to DNA replication stress;
+ SLM1 has a paralog, SLM2, that arose from the whole genome
+ duplication"
+ /codon_start=1
+ /product="phosphatidylinositol 4,5-bisphosphate-binding
+ protein"
+ /protein_id="NP_012161.1"
+ /db_xref="GeneID:854701"
+ /db_xref="SGD:S000001367"
+ /translation="MSKNNTMTSAVSDMLSQQQLNLQHLHNLQQHTRSMTSADHANVL
+ QQQQQQQQQQQQQQQQQQQSASFQNGSLTSDINQQSYLNGQPVPSTSNSTFQNNRTLT
+ MNSGGLQGIISNGSPNIDSNTNVTIAVPDPNNNNGKQLQGKNSLTNTSILSRARSSLQ
+ RQRLAQQQQQQQDPRSPLVILVPTAAQPTDILAARFSAWRNVIKSVIVYLTEIASIQD
+ EIVRQQLRLSHAVQFPFFSIENQYQPSSQEDKSVQKFFLPLGNGSIQDLPTILNQYHE
+ SLASSASKASRELTNDVIPRLEDLRRDLIVKIKEIKSLQSDFKNSCSKELQQTKQAMK
+ QFQESLKDARYSVPKQDPFLTKLALDRQIKKQLQEENFLHEAFDNLETSGAELEKIVV
+ MEIQNSLTIYARLLGQEAQLVFDILISKLDSGFFNVDPQFEWDNFISRDPNFLLPNLP
+ MRTFKEIVYKYQFDPLTYEIKSGFLERRSKFLKSYSKGYYVLTPNFLHEFKTADRKKD
+ LVPVMSLALSECTVTEHSRKNSTSSPNSTGSDAKFVLHAKQNGIIRRGHNWVFKADSY
+ ESMMSWFDNLKILTSTSNIQDKYKFITQKLNLNSDGKPKLTNNHTSINKYQLSNANST
+ MVENDENDDINSNYVGSTVTPKLDNQTNTNTSMSSLPDTNDSELQDQVPNIYIQTPIN
+ DFKS"
+ gene <167664..>167804
+ /locus_tag="YIL105W-A"
+ /db_xref="GeneID:13393613"
+ mRNA <167664..>167804
+ /locus_tag="YIL105W-A"
+ /product="uncharacterized protein"
+ /transcript_id="NM_001270750.1"
+ /db_xref="GeneID:13393613"
+ CDS 167664..167804
+ /locus_tag="YIL105W-A"
+ /note="hypothetical protein; completely overlaps the
+ verified gene SLM1; identified by gene-trapping,
+ microarray-based expression analysis, and genome-wide
+ homology searching; mRNA identified as translated by
+ ribosome profiling data"
+ /codon_start=1
+ /product="uncharacterized protein"
+ /protein_id="NP_001257679.1"
+ /db_xref="GeneID:13393613"
+ /db_xref="SGD:S000028657"
+ /translation="MKTWMYSYLFDCPILVLPWTPHNYYLYHRFHHFLPLCYWHYSAD
+ TY"
+ gene complement(<169982..>171505)
+ /gene="SHQ1"
+ /locus_tag="YIL104C"
+ /db_xref="GeneID:854702"
+ mRNA complement(<169982..>171505)
+ /gene="SHQ1"
+ /locus_tag="YIL104C"
+ /product="Hsp90 cochaperone SHQ1"
+ /transcript_id="NM_001179452.3"
+ /db_xref="GeneID:854702"
+ CDS complement(169982..171505)
+ /gene="SHQ1"
+ /locus_tag="YIL104C"
+ /experiment="EXISTENCE:direct assay:GO:0005634 nucleus
+ [PMID:22932476]"
+ /experiment="EXISTENCE:direct assay:GO:0005654 nucleoplasm
+ [PMID:12228251]"
+ /experiment="EXISTENCE:direct assay:GO:0005829 cytosol
+ [PMID:22932476]"
+ /experiment="EXISTENCE:direct assay:GO:0051082 unfolded
+ protein binding [PMID:19426738]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0000493 box
+ H/ACA snoRNP assembly [PMID:12228251]"
+ /experiment="EXISTENCE:physical interaction:GO:0000493 box
+ H/ACA snoRNP assembly [PMID:12228251]"
+ /note="Chaperone protein; required for the assembly of box
+ H/ACA snoRNPs and thus for pre-rRNA processing; functions
+ as an RNA mimic; forms a complex with Naf1p and interacts
+ with H/ACA snoRNP components Nhp2p and Cbf5p; homology
+ with known Hsp90p cochaperones; relocalizes to the cytosol
+ in response to hypoxia"
+ /codon_start=1
+ /product="Hsp90 cochaperone SHQ1"
+ /protein_id="NP_012162.3"
+ /db_xref="GeneID:854702"
+ /db_xref="SGD:S000001366"
+ /translation="MITPRFSITQDEEFIFLKIFISNIRFSAVGLEIIIQENMIIFHL
+ SPYYLRLRFPHELIDDERSTAQYDSKDECINVKVAKLNKNEYFEDLDLPTKLLARQGD
+ LAGADALTENTDAKKTQKPLIQEVETDGVSNNIKDDVKTIGQMGEGFNWEIEQKMDSS
+ TNNGILKTKYGFDNLYDTVISVSTSNGNDINELDDPEHTDANDRVIERLRKENLKFDP
+ EYYVSEYMTHKYGNEEDLEINGIKELLKFTPSIVKQYLQWYKDSTNPNLVMPIEFTDE
+ EQKQMQDNLPKKSYLVEDIKPLYVTILSVLFSYVFEQIENEGTHTTESAWTMGKLCPQ
+ ISFLDQQLKQVNELQDGMKEISKVNKDSSLIKIAIITGIRRALSYPLHRNYDLAMKAW
+ TFVYYILRGGKRLVIRALLDIHETFRFHDVYYVYDKVLLDDLTAWFISQGSENVIRSL
+ ALEMRKEQESLSKQDIEFECIASFNEQTGEPEWETLNIREMEILAESEYREQQQNPQ"
+ gene <171751..>173028
+ /gene="DPH1"
+ /locus_tag="YIL103W"
+ /db_xref="GeneID:854703"
+ mRNA <171751..>173028
+ /gene="DPH1"
+ /locus_tag="YIL103W"
+ /product="2-(3-amino-3-carboxypropyl)histidine synthase"
+ /transcript_id="NM_001179451.1"
+ /db_xref="GeneID:854703"
+ CDS 171751..173028
+ /gene="DPH1"
+ /locus_tag="YIL103W"
+ /EC_number="2.5.1.108"
+ /experiment="EXISTENCE:direct assay:GO:0005737 cytoplasm
+ [PMID:11914276|PMID:14562095]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0017183 protein
+ histidyl modification to diphthamide [PMID:15485916]"
+ /note="Protein required for synthesis of diphthamide;
+ required along with Dph2p, Kti11p, Jjj3p, and Dph5p;
+ diphthamide is a modified histidine residue of translation
+ elongation factor 2 (Eft1p or Eft2p); forms a complex with
+ Dph2p that catalyzes the first step of diphthamide
+ biosynthesis"
+ /codon_start=1
+ /product="2-(3-amino-3-carboxypropyl)histidine synthase"
+ /protein_id="NP_012163.1"
+ /db_xref="GeneID:854703"
+ /db_xref="SGD:S000001365"
+ /translation="MSGSTESKKQPRRRFIGRKSGNSNNDKLTTVAENGNEIIHKQKS
+ RIALGRSVNHVPEDILNDKELNEAIKLLPSNYNFEIHKTVWNIRKYNAKRIALQMPEG
+ LLIYSLIISDILEQFCGVETLVMGDVSYGACCIDDFTARALDCDFIVHYAHSCLVPID
+ VTKIKVLYVFVTINIQEDHIIKTLQKNFPKGSRIATFGTIQFNPAVHSVRDKLLNDEE
+ HMLYIIPPQIKPLSRGEVLGCTSERLDKEQYDAMVFIGDGRFHLESAMIHNPEIPAFK
+ YDPYNRKFTREGYDQKQLVEVRAEAIEVARKGKVFGLILGALGRQGNLNTVKNLEKNL
+ IAAGKTVVKIILSEVFPQKLAMFDQIDVFVQVACPRLSIDWGYAFNKPLLTPYEASVL
+ LKKDVMFSEKYYPMDYYEAKGYGRGETPKHAIE"
+ gene complement(<173365..>173592)
+ /locus_tag="YIL102C-A"
+ /db_xref="GeneID:3799974"
+ mRNA complement(<173365..>173592)
+ /locus_tag="YIL102C-A"
+ /product="uncharacterized protein"
+ /transcript_id="NM_001184686.1"
+ /db_xref="GeneID:3799974"
+ CDS complement(173365..173592)
+ /locus_tag="YIL102C-A"
+ /experiment="EXISTENCE:direct assay:GO:0005783 endoplasmic
+ reticulum [PMID:26928762]"
+ /note="Regulatory subunit of dolichyl phosphate mannose
+ (DPM) synthase; identified based on comparisons of the
+ genome sequences of six Saccharomyces species; SWAT-GFP,
+ seamless-GFP and mCherry fusion proteins localize to the
+ endoplasmic reticulum"
+ /codon_start=1
+ /product="uncharacterized protein"
+ /protein_id="NP_001032579.1"
+ /db_xref="GeneID:3799974"
+ /db_xref="SGD:S000113587"
+ /translation="MNRFVIICLLFTYYVIWSLLPIFEIENSNPVVSLLFPISSNVAI
+ FLPIFLLLIGFTLTGSVLGVLLIRSDKKKKV"
+ gene complement(<174582..>174887)
+ /locus_tag="YIL102C"
+ /db_xref="GeneID:854704"
+ mRNA complement(<174582..>174887)
+ /locus_tag="YIL102C"
+ /product="uncharacterized protein"
+ /transcript_id="NM_001179450.1"
+ /db_xref="GeneID:854704"
+ CDS complement(174582..174887)
+ /locus_tag="YIL102C"
+ /note="hypothetical protein"
+ /codon_start=1
+ /product="uncharacterized protein"
+ /protein_id="NP_012164.1"
+ /db_xref="GeneID:854704"
+ /db_xref="SGD:S000001364"
+ /translation="MLKVEKFKKLKRFEVYYCLKNSFLEEVDIEMKYSCSITTIMSNG
+ SASLLMNWEELTPGHCFTSYTTNPIAGDYGLNASAIDGHTEELVATHPAGTLENATQ"
+ gene 175031..175103
+ /locus_tag="YNCI0002W"
+ /db_xref="GeneID:854705"
+ tRNA 175031..175103
+ /locus_tag="YNCI0002W"
+ /product="tRNA-Thr"
+ /experiment="EXISTENCE:curator inference:GO:0005829
+ cytosol [PMID:9023104]"
+ /experiment="EXISTENCE:curator inference:GO:0006414
+ translational elongation [PMID:9023104]"
+ /note="Threonine tRNA (tRNA-Thr), predicted by tRNAscan-SE
+ analysis"
+ /db_xref="GeneID:854705"
+ /db_xref="SGD:S000006741"
+ rep_origin 175038..175358
+ /note="ARS912; Autonomously Replicating Sequence"
+ /db_xref="SGD:S000118396"
+ gene complement(<175307..>177250)
+ /gene="XBP1"
+ /locus_tag="YIL101C"
+ /db_xref="GeneID:854706"
+ mRNA complement(<175307..>177250)
+ /gene="XBP1"
+ /locus_tag="YIL101C"
+ /product="Xbp1p"
+ /transcript_id="NM_001179449.1"
+ /db_xref="GeneID:854706"
+ CDS complement(175307..177250)
+ /gene="XBP1"
+ /locus_tag="YIL101C"
+ /experiment="EXISTENCE:direct assay:GO:0000981 DNA-binding
+ transcription factor activity, RNA polymerase II-specific
+ [PMID:10611226]"
+ /experiment="EXISTENCE:direct assay:GO:0003700 DNA-binding
+ transcription factor activity [PMID:9343412]"
+ /experiment="EXISTENCE:direct assay:GO:0005634 nucleus
+ [PMID:22842922|PMID:9343412]"
+ /experiment="EXISTENCE:direct assay:GO:0005737 cytoplasm
+ [PMID:22842922]"
+ /experiment="EXISTENCE:direct assay:GO:0043565
+ sequence-specific DNA binding [PMID:19158363]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0000122
+ negative regulation of transcription by RNA polymerase II
+ [PMID:10611226|PMID:11340165|PMID:24204289|PMID:9343412]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0006995
+ cellular response to nitrogen starvation [PMID:11340165]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0034605
+ cellular response to heat [PMID:10611226]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0042149
+ cellular response to glucose starvation [PMID:9343412]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0051321 meiotic
+ cell cycle [PMID:10611226]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0070314 G1 to
+ G0 transition [PMID:24204289]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0071470
+ cellular response to osmotic stress [PMID:9343412]"
+ /note="Transcriptional repressor; binds promoter sequences
+ of cyclin genes, CYS3, and SMF2; not expressed during log
+ phase of growth, but induced by stress or starvation
+ during mitosis, and late in meiosis; represses 15% of all
+ yeast genes as cells transition to quiescence; important
+ for maintaining G1 arrest and for longevity of quiescent
+ cells; member of Swi4p/Mbp1p family; phosphorylated by
+ Cdc28p; relative distribution to nucleus increases upon
+ DNA replication stress"
+ /codon_start=1
+ /product="Xbp1p"
+ /protein_id="NP_012165.1"
+ /db_xref="GeneID:854706"
+ /db_xref="SGD:S000001363"
+ /translation="MKYPAFSINSDTVHLTDNPLDDYQRLYLVSVLDRDSPPASFSAG
+ LNIRKVNYKSSIAAQFTHPNFIISARDAGNGEEAAAQNVLNCFEYQFPNLQTIQSLVH
+ EQTLLSQLASSATPHSALHLHDKNILMGKIILPSRSNKTPVSASPTKQEKKALSTASR
+ ENATSSLTKNQQFKLTKMDHNLINDKLINPNNCVIWSHDSGYVFMTGIWRLYQDVMKG
+ LINLPRGDSVSTSQQQFFCKAEFEKILSFCFYNHSSFTSEESSSVLLSSSTSSPPKRR
+ TSTGSTFLDANASSSSTSSTQANNYIDFHWNNIKPELRDLICQSYKDFLINELGPDQI
+ DLPNLNPANFTKRIRGGYIKIQGTWLPMEISRLLCLRFCFPIRYFLVPIFGPDFPKDC
+ ESWYLAHQNVTFASSTTGAGAATAATAAANTSTNFTSTAVARPRQKPRPRPRQRSTSM
+ SHSKAQKLVIEDALPSFDSFVENLGLSSNDKNFIKKNSKRQKSSTYTSQTSSPIGPRD
+ PTVQILSNLASFYNTHGHRYSYPGNIYIPQQRYSLPPPNQLSSPQRQLNYTYDHIHPV
+ PSQYQSPRHYNVPSSPIAPAPPTFPQPYGDDHYHFLKYASEVYKQQNQRPAHNTNTNM
+ DTSFSPRANNSLNNFKFKTNSKQ"
+ gene <178004..>179653
+ /gene="SGA1"
+ /locus_tag="YIL099W"
+ /db_xref="GeneID:854708"
+ mRNA <178004..>179653
+ /gene="SGA1"
+ /locus_tag="YIL099W"
+ /product="glucan 1,4-alpha-glucosidase"
+ /transcript_id="NM_001179447.3"
+ /db_xref="GeneID:854708"
+ CDS 178004..179653
+ /gene="SGA1"
+ /locus_tag="YIL099W"
+ /EC_number="3.2.1.3"
+ /experiment="EXISTENCE:direct assay:GO:0000324 fungal-type
+ vacuole [PMID:26928762|PMID:2493265]"
+ /experiment="EXISTENCE:direct assay:GO:0004339 glucan
+ 1,4-alpha-glucosidase activity [PMID:2493265]"
+ /experiment="EXISTENCE:direct assay:GO:0005628 prospore
+ membrane [PMID:24390141]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0005980
+ glycogen catabolic process [PMID:11486014]"
+ /note="Intracellular sporulation-specific glucoamylase;
+ involved in glycogen degradation; induced during
+ starvation of a/a diploids late in sporulation, but
+ dispensable for sporulation"
+ /codon_start=1
+ /product="glucan 1,4-alpha-glucosidase"
+ /protein_id="NP_012167.3"
+ /db_xref="GeneID:854708"
+ /db_xref="SGD:S000001361"
+ /translation="MARQKMFYNKLLGMLSVGFGFAWALENITIYEFDFGKGILDQSY
+ GGVFSNNGPSQVQLRDAVLMNGTVVYDSNGAWDSSALEEWLQGQKKVSIEKIFENIGP
+ SAVYPSISPGVVIASPSQTHPDYFYQWIRDSALTINSIVSHSAGPAIETLLQYLNVSF
+ HLQRSNNTLGAGIGYTNDTVALGDPKWNVDNTAFTEDWGRPQNDGPALRSIAILKIID
+ YIKQSGTDLGAKYPFQSTADIFDDIVRWDLRFIIDHWNSSGFDLWEEVNGMHFFTLLV
+ QLSAVDKSLSYFNASERSSPFVEELRQTRRDISKFLVDPANGFINGKYNYIVGTPMIA
+ DTLRSGLDISTLLAANTVHDAPSASHLPFDINDPAVLNTLHHLMLHMRSIYPINDSSK
+ NATGIALGRYPEDVYDGYGFGEGNPWVLATCTASTTLYQLIYRHISEQHDLVVPMNND
+ CSNAFWSELVFSNLTTLGNDEGYLILEFNTPAFNQTIQKIFQLADSFLVKLKAHVGTD
+ GELSEQFNKYTGFMQGAQHLTWSYTSFWDAYQIRQEVLQSL"
+ gene complement(<179772..>180239)
+ /gene="FMC1"
+ /locus_tag="YIL098C"
+ /db_xref="GeneID:854709"
+ mRNA complement(<179772..>180239)
+ /gene="FMC1"
+ /locus_tag="YIL098C"
+ /product="Fmc1p"
+ /transcript_id="NM_001179446.3"
+ /db_xref="GeneID:854709"
+ CDS complement(179772..180239)
+ /gene="FMC1"
+ /locus_tag="YIL098C"
+ /experiment="EXISTENCE:direct assay:GO:0005739
+ mitochondrion [PMID:16823961|PMID:24769239|PMID:14576278]"
+ /experiment="EXISTENCE:direct assay:GO:0005759
+ mitochondrial matrix [PMID:11096112]"
+ /experiment="EXISTENCE:genetic interaction:GO:0033615
+ mitochondrial proton-transporting ATP synthase complex
+ assembly [PMID:11096112]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0016236
+ macroautophagy [PMID:19793921]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0033615
+ mitochondrial proton-transporting ATP synthase complex
+ assembly [PMID:11096112]"
+ /note="Mitochondrial matrix protein; required for assembly
+ or stability at high temperature of the F1 sector of
+ mitochondrial F1F0 ATP synthase; null mutant temperature
+ sensitive growth on glycerol is suppressed by multicopy
+ expression of Odc1p"
+ /codon_start=1
+ /product="Fmc1p"
+ /protein_id="NP_012168.3"
+ /db_xref="GeneID:854709"
+ /db_xref="SGD:S000001360"
+ /translation="MDRPRTLRTYRGLIRAILKYERPSKIVNWGNLRKAMITKLEYAK
+ KQNQRDSHENINRQLEKWKKLDPVSDRSLNLFIADSKSLRSILQNDIKWEKKVAQGQN
+ VDEIFEHALDIIKFLDNQREYEELVDRYNPGNKLTQDEKVKRTANVVGLDVPT"
+ gene <180427..>181977
+ /gene="FYV10"
+ /locus_tag="YIL097W"
+ /gene_synonym="GID9"
+ /db_xref="GeneID:854710"
+ mRNA <180427..>181977
+ /gene="FYV10"
+ /locus_tag="YIL097W"
+ /gene_synonym="GID9"
+ /product="glucose-induced degradation complex subunit
+ FYV10"
+ /transcript_id="NM_001179445.1"
+ /db_xref="GeneID:854710"
+ CDS 180427..181977
+ /gene="FYV10"
+ /locus_tag="YIL097W"
+ /gene_synonym="GID9"
+ /EC_number="2.3.2.27"
+ /experiment="EXISTENCE:direct assay:GO:0005634 nucleus
+ [PMID:14562095]"
+ /experiment="EXISTENCE:direct assay:GO:0005737 cytoplasm
+ [PMID:14562095]"
+ /experiment="EXISTENCE:direct assay:GO:0034657 GID complex
+ [PMID:16872538|PMID:18508925]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0043066
+ negative regulation of apoptotic process [PMID:18355271]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0043161
+ proteasome-mediated ubiquitin-dependent protein catabolic
+ process [PMID:12686616]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0045721
+ negative regulation of gluconeogenesis [PMID:12686616]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0061630
+ ubiquitin protein ligase activity [PMID:22044534]"
+ /note="Subunit of GID complex; involved in
+ proteasome-dependent catabolite inactivation of
+ gluconeogenic enzymes FBPase, PEPCK, and c-MDH; forms
+ dimer with Rmd5p that is then recruited to GID Complex by
+ Gid8p; contains a degenerate RING finger motif needed for
+ GID complex ubiquitin ligase activity in vivo, as well as
+ CTLH and CRA domains; plays role in anti-apoptosis;
+ required for survival upon exposure to K1 killer toxin"
+ /codon_start=1
+ /product="glucose-induced degradation complex subunit
+ FYV10"
+ /protein_id="NP_012169.1"
+ /db_xref="GeneID:854710"
+ /db_xref="SGD:S000001359"
+ /translation="MAEKSIFNEPDVDFHLKLNQQLFHIPYELLSKRIKHTQAVINKE
+ TKSLHEHTAALNQIFEHNDVEHDELALAKITEMIRKVDHIERFLNTQIKSYCQILNRI
+ KKRLEFFHELKDIKSQNSGTSHNGNNEGTRTKLIQWYQSYTNILIGDYLTRNNPIKYN
+ SETKDHWNSGVVFLKQSQLDDLIDYDVLLEANRISTSLLHERNLLPLISWINENKKTL
+ TKKSSILEFQARLQEYIELLKVDNYTDAIVCFQRFLLPFVKSNFTDLKLASGLLIFIK
+ YCNDQKPTSSTSSGFDTEEIKSQSLPMKKDRIFQHFFHKSLPRITSKPAVNTTDYDKS
+ SLINLQSGDFERYLNLLDDQRWSVLNDLFLSDFYSMYGISQNDPLLIYLSLGISSLKT
+ RDCLHPSDDENGNQETETATTAEKEVEDLQLFTLHSLKRKNCPVCSETFKPITQALPF
+ AHHIQSQLFENPILLPNGNVYDSKKLKKLAKTLKKQNLISLNPGQIMDPVDMKIFCES
+ DSIKMYPT"
+ gene complement(<182117..>183127)
+ /gene="BMT5"
+ /locus_tag="YIL096C"
+ /db_xref="GeneID:854711"
+ mRNA complement(<182117..>183127)
+ /gene="BMT5"
+ /locus_tag="YIL096C"
+ /product="25S rRNA (uracil2634-N3)-methyltransferase"
+ /transcript_id="NM_001179444.1"
+ /db_xref="GeneID:854711"
+ CDS complement(182117..183127)
+ /gene="BMT5"
+ /locus_tag="YIL096C"
+ /EC_number="2.1.1.313"
+ /experiment="EXISTENCE:direct assay:GO:0005634 nucleus
+ [PMID:14562095]"
+ /experiment="EXISTENCE:direct assay:GO:0005730 nucleolus
+ [PMID:14562095|PMID:24335083]"
+ /experiment="EXISTENCE:direct assay:GO:0008757
+ S-adenosylmethionine-dependent methyltransferase activity
+ [PMID:21858014]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0070042 rRNA
+ (uridine-N3-)-methyltransferase activity [PMID:24335083]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0070475 rRNA
+ base methylation [PMID:24335083]"
+ /note="Methyltransferase required for m3U2634 methylation
+ of the 25S rRNA; S-adenosylmethionine-dependent;
+ associates with precursors of the 60S ribosomal subunit;
+ predicted to be involved in ribosome biogenesis"
+ /codon_start=1
+ /product="25S rRNA (uracil2634-N3)-methyltransferase"
+ /protein_id="NP_012170.1"
+ /db_xref="GeneID:854711"
+ /db_xref="SGD:S000001358"
+ /translation="MARKLKGKIGSKGLKGALLRHKAKVKLVRNIESKQKHELRKKNS
+ SANNKTVKRNQEFQKLNQGKVMPFEKDETLMLCGEGDFSFARSIVEQNYIESDNLIIT
+ SYDNSVNELKLKYPHTFEENYQYLKDLNIPIFFQIDVTKLVKSFKISKNNTWFKIINR
+ LSDHRWGNKPLQNIVFNFPHNGKGIKDQERNIREHQDLIFNFFQNSLQLFNLINTKIQ
+ NDTLRYTQGYDLNEDTPQAKKLTAEGYGNIILSLFDGEPYDSWQIKLLAKKNGLTLSR
+ SSKFQWENFPGYHHRRTNSEQDTTKPAKERDARFYIFSKYVSNSSKHNRKSKKDTDSD
+ SD"
+ gene complement(183440..183513)
+ /locus_tag="YNCI0003C"
+ /db_xref="GeneID:854712"
+ tRNA complement(183440..183513)
+ /locus_tag="YNCI0003C"
+ /product="tRNA-Ile"
+ /experiment="EXISTENCE:curator inference:GO:0005829
+ cytosol [PMID:9023104]"
+ /experiment="EXISTENCE:curator inference:GO:0006414
+ translational elongation [PMID:9023104]"
+ /note="Isoleucine tRNA (tRNA-Ile), predicted by
+ tRNAscan-SE analysis"
+ /db_xref="GeneID:854712"
+ /db_xref="SGD:S000006605"
+ gene <183937..>186369
+ /gene="PRK1"
+ /locus_tag="YIL095W"
+ /gene_synonym="PAK1"
+ /db_xref="GeneID:854713"
+ mRNA <183937..>186369
+ /gene="PRK1"
+ /locus_tag="YIL095W"
+ /gene_synonym="PAK1"
+ /product="serine/threonine protein kinase PRK1"
+ /transcript_id="NM_001179443.1"
+ /db_xref="GeneID:854713"
+ CDS 183937..186369
+ /gene="PRK1"
+ /locus_tag="YIL095W"
+ /gene_synonym="PAK1"
+ /EC_number="2.7.11.1"
+ /experiment="EXISTENCE:direct assay:GO:0004672 protein
+ kinase activity [PMID:16319894]"
+ /experiment="EXISTENCE:direct assay:GO:0004674 protein
+ serine/threonine kinase activity
+ [PMID:9885245|PMID:17978096|PMID:11739778|PMID:13679512|PM
+ ID:12956961]"
+ /experiment="EXISTENCE:direct assay:GO:0030479 actin
+ cortical patch [PMID:9885245|PMID:10087264]"
+ /experiment="EXISTENCE:direct assay:GO:0043332 mating
+ projection tip [PMID:19053807]"
+ /experiment="EXISTENCE:genetic interaction:GO:0120133
+ negative regulation of actin cortical patch assembly
+ [PMID:9885245]"
+ /experiment="EXISTENCE:genetic interaction:GO:1900186
+ negative regulation of clathrin-dependent endocytosis
+ [PMID:13679512]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0000147 actin
+ cortical patch assembly [PMID:12952930]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0031333
+ negative regulation of protein-containing complex assembly
+ [PMID:11739778]"
+ /experiment="EXISTENCE:mutant phenotype:GO:1900186
+ negative regulation of clathrin-dependent endocytosis
+ [PMID:12956961]"
+ /note="Ser/Thr protein kinase; regulates the organization
+ and function of the actin cytoskeleton; inhibits
+ clathrin-mediated endocytosis; phosphorylates the Pan1p
+ and Sla1p subunits of the Pan1p-Sla1p-End3p complex,
+ resulting in inhibition of complex formation; inhibits
+ Scd5p through phosphorylation; phosphorylates
+ Pan1p-interacting proteins, Ent1/2 and Yap1801/2;
+ negatively regulated through autophosphorylation;
+ functional overlap with ARK1"
+ /codon_start=1
+ /product="serine/threonine protein kinase PRK1"
+ /protein_id="NP_012171.1"
+ /db_xref="GeneID:854713"
+ /db_xref="SGD:S000001357"
+ /translation="MNTPQISLYEPGTILTVGSHHAKIIKYLTSGGFAQVYTAEISPP
+ DPYSNANIACLKRVIVPHKQGLNTLRAEVDAMKLLRNNKHVVSYIDSHAARSVNGIAY
+ EVFVLMEFCERGGLIDFMNTRLQNRLQESEILEIMSQTVQGITAMHALQPPLIHRDIK
+ IENVLISHDGLYKVCDFGSVSGVIRPPRNTQEFNYVQHDILTNTTAQYRSPEMIDLYR
+ GLPIDEKSDIWALGVFLYKICYYTTPFEKSGEAGILHARYQYPSFPQYSDRLKNLIRL
+ MLMEAPSQRPNICQVLEEVSRLQNKPCPIRNFYLLRAMNQNANTQLAGEPSSTTYVPT
+ QKFIPVQSLQSINQPPNMMPVTHVSTTPNLGTFPISINDNNKTEVTAHAGLQVGSHSN
+ LTSPLMKTKSVPLSDEFASLYYKELHPFQKSQTFKSVESFQSPQRKSMPPLSLTPVNN
+ DIFDRVSAINRPNNYVDSETQTIDNMAVPNLKLSPTITSKSLSSTKEIAAPDNINGSK
+ IVRSLSSKLKKVITGESRGNSPIKSRQNTGDSIRSAFGKLRHGFTGNSVNNSRSASFD
+ NNNVNGNGNNTNRRLVSSSTSSFPKFNSDTKRKEESDKNQRLEKRRSMPPSILSDFDQ
+ HERNNSRTGSRDYYRSHSPVKKTQASAKTTSKPTLIPDNGNVNINQEKKESIQRRVHN
+ LLKSSDDPVTYKSASGYGKYTDIGTETSNRHSSVRITPITEEKFKKTLKDGVLDIKTK
+ SNGKDKSRPPRPPPKPLHLRTEIQKIRNFSRLQSKKLPIERISSEATETIVDVNVDDL
+ EADFRKRFPSKV"
+ gene complement(<186517..>187632)
+ /gene="LYS12"
+ /locus_tag="YIL094C"
+ /gene_synonym="LYS10; LYS11"
+ /db_xref="GeneID:854714"
+ mRNA complement(<186517..>187632)
+ /gene="LYS12"
+ /locus_tag="YIL094C"
+ /gene_synonym="LYS10; LYS11"
+ /product="homoisocitrate dehydrogenase"
+ /transcript_id="NM_001179442.1"
+ /db_xref="GeneID:854714"
+ CDS complement(186517..187632)
+ /gene="LYS12"
+ /locus_tag="YIL094C"
+ /gene_synonym="LYS10; LYS11"
+ /EC_number="1.1.1.87"
+ /experiment="EXISTENCE:curator inference:GO:0009085 lysine
+ biosynthetic process [PMID:17223711]"
+ /experiment="EXISTENCE:direct assay:GO:0005739
+ mitochondrion
+ [PMID:24769239|PMID:11914276|PMID:14576278|PMID:16823961]"
+ /experiment="EXISTENCE:direct assay:GO:0047046
+ homoisocitrate dehydrogenase activity [PMID:17223711]"
+ /note="Homo-isocitrate dehydrogenase; an NAD-linked
+ mitochondrial enzyme required for the fourth step in the
+ biosynthesis of lysine, in which homo-isocitrate is
+ oxidatively decarboxylated to alpha-ketoadipate"
+ /codon_start=1
+ /product="homoisocitrate dehydrogenase"
+ /protein_id="NP_012172.1"
+ /db_xref="GeneID:854714"
+ /db_xref="SGD:S000001356"
+ /translation="MFRSVATRLSACRGLASNAARKSLTIGLIPGDGIGKEVIPAGKQ
+ VLENLNSKHGLSFNFIDLYAGFQTFQETGKALPDETVKVLKEQCQGALFGAVQSPTTK
+ VEGYSSPIVALRREMGLFANVRPVKSVEGEKGKPIDMVIVRENTEDLYIKIEKTYIDK
+ ATGTRVADATKRISEIATRRIATIALDIALKRLQTRGQATLTVTHKSNVLSQSDGLFR
+ EICKEVYESNKDKYGQIKYNEQIVDSMVYRLFREPQCFDVIVAPNLYGDILSDGAAAL
+ VGSLGVVPSANVGPEIVIGEPCHGSAPDIAGKGIANPIATIRSTALMLEFLGHNEAAQ
+ DIYKAVDANLREGSIKTPDLGGKASTQQVVDDVLSRL"
+ gene complement(<187990..>188784)
+ /gene="RSM25"
+ /locus_tag="YIL093C"
+ /gene_synonym="mS23"
+ /db_xref="GeneID:854715"
+ mRNA complement(<187990..>188784)
+ /gene="RSM25"
+ /locus_tag="YIL093C"
+ /gene_synonym="mS23"
+ /product="mitochondrial 37S ribosomal protein RSM25"
+ /transcript_id="NM_001179441.3"
+ /db_xref="GeneID:854715"
+ CDS complement(187990..188784)
+ /gene="RSM25"
+ /locus_tag="YIL093C"
+ /gene_synonym="mS23"
+ /experiment="EXISTENCE:curator inference:GO:0032543
+ mitochondrial translation [PMID:11278769]"
+ /experiment="EXISTENCE:direct assay:GO:0003735 structural
+ constituent of ribosome [PMID:11278769]"
+ /experiment="EXISTENCE:direct assay:GO:0005739
+ mitochondrion [PMID:24769239|PMID:16823961]"
+ /experiment="EXISTENCE:direct assay:GO:0005763
+ mitochondrial small ribosomal subunit [PMID:11278769]"
+ /note="Mitochondrial ribosomal protein of the small
+ subunit"
+ /codon_start=1
+ /product="mitochondrial 37S ribosomal protein RSM25"
+ /protein_id="NP_012173.3"
+ /db_xref="GeneID:854715"
+ /db_xref="SGD:S000001355"
+ /translation="MKIQTNAVNVLQRTSAYLKSGLLKETPAWYNVVASIPPSTKFTR
+ EPRFKNPSNGHIIGKLVDVTEQPHANNKGLYKTRPNSSDKRVGVKRLYRPPKLTYVED
+ RLRSLFYKQHPWELSRPKILVENEIGDENYDWSHMLQIGRPLDGESVIQRTMYLIKTK
+ QYGDMVEAYDHARYEFYALRMQEETEQQVALEEAEMFGSLFGVSAIEHGIQKEQEVLD
+ VWEKKVVEETELMAARTSNPAGSWKDDTTLNTAQEEESTTSENLHF"
+ gene <189066..>190967
+ /locus_tag="YIL092W"
+ /db_xref="GeneID:854716"
+ mRNA <189066..>190967
+ /locus_tag="YIL092W"
+ /product="uncharacterized protein"
+ /transcript_id="NM_001179440.3"
+ /db_xref="GeneID:854716"
+ CDS 189066..190967
+ /locus_tag="YIL092W"
+ /experiment="EXISTENCE:direct assay:GO:0005634 nucleus
+ [PMID:14562095]"
+ /experiment="EXISTENCE:direct assay:GO:0005737 cytoplasm
+ [PMID:14562095]"
+ /note="hypothetical protein; green fluorescent protein
+ (GFP)-fusion protein localizes to the cytoplasm and to the
+ nucleus"
+ /codon_start=1
+ /product="uncharacterized protein"
+ /protein_id="NP_012174.3"
+ /db_xref="GeneID:854716"
+ /db_xref="SGD:S000001354"
+ /translation="MVQMRSKNMAYESGTNNYSDTIANGNTLPPRSKKGHSGRRKRSE
+ TLPIACNNFCVTRQIDDDEQAFKMLDKVSHLKKFSAEDGDDNNIFVQWADDITDILFG
+ LCCTGTFLKLLISSALSGRAKTWFDSTTEGIDDHVIKAYSFEKFLALLSEEFDGARSL
+ RREIFTELLTLSIDSEKSLEAFAHKSGRLTPYYLSSGAALDLFLTKLEPQLQKQLENC
+ AFPMTLNLALLITACEFAKRASNHKKYRYKNTRDSDICTPKSKNTAIVSKLSNTKTIS
+ KNKVIEKSDKKNYFDKNSQHIPDPKRRKQNEPGMRLFLVMDEEKNILTSRNVSANAYT
+ SKNGHTNLSDLHTNLKNSKSQQCAVEPISILNSGSLVTGTINIDLINDEVLGTKEETT
+ TYDERMDGNSRSLNERCCAVKKNSLQPITSNIFQKNAEIQGTKIGSVLDSGISNSFSS
+ TEYMFPPTSSATVSNPVKKNEISKSSQVKDIAQFNPFMTNEKEKKLNPSESFKSPGVS
+ MEINRLSRIAGLRNIPGNIYEDSKMLNLKTRKCYPLHNFAVRTRSAHFNDRPSNYISP
+ HETINATLRSPASFDSIQCITRSKRVDAETNKATGSAKSENIETKSRKFPEVINPFLV
+ NTTNKKESD"
+ gene complement(<191030..>193195)
+ /gene="UTP25"
+ /locus_tag="YIL091C"
+ /db_xref="GeneID:854717"
+ mRNA complement(<191030..>193195)
+ /gene="UTP25"
+ /locus_tag="YIL091C"
+ /product="rRNA-binding ribosome biosynthesis protein
+ UTP25"
+ /transcript_id="NM_001179439.1"
+ /db_xref="GeneID:854717"
+ CDS complement(191030..193195)
+ /gene="UTP25"
+ /locus_tag="YIL091C"
+ /experiment="EXISTENCE:direct assay:GO:0005634 nucleus
+ [PMID:14690591|PMID:14562095|PMID:20528918]"
+ /experiment="EXISTENCE:direct assay:GO:0005730 nucleolus
+ [PMID:14690591|PMID:14562095|PMID:20528918]"
+ /experiment="EXISTENCE:direct assay:GO:0019843 rRNA
+ binding [PMID:20528918]"
+ /experiment="EXISTENCE:direct assay:GO:0032040
+ small-subunit processome [PMID:20884785]"
+ /experiment="EXISTENCE:direct assay:GO:0034511 U3 snoRNA
+ binding [PMID:20528918]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0000462
+ maturation of SSU-rRNA from tricistronic rRNA transcript
+ (SSU-rRNA, 5.8S rRNA, LSU-rRNA)
+ [PMID:20884785|PMID:20528918]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0042274
+ ribosomal small subunit biogenesis
+ [PMID:20884785|PMID:20528918|PMID:19806183]"
+ /experiment="EXISTENCE:physical interaction:GO:0032040
+ small-subunit processome [PMID:20528918]"
+ /note="Nucleolar protein; required for 35S pre-RNA
+ processing and 40S ribosomal subunit biogenesis"
+ /codon_start=1
+ /product="rRNA-binding ribosome biosynthesis protein
+ UTP25"
+ /protein_id="NP_012175.1"
+ /db_xref="GeneID:854717"
+ /db_xref="SGD:S000001353"
+ /translation="MSDSSVREKNDNFRGYRKRGRQELRKIKRSSARTEGGSTETLED
+ VAEDIDHRSDEDEVSDVDSGDDFDIEDEEGKKEKVYDALLTILKSEHPEPKRRRREAD
+ ESNKAPAEVGEDEHENTEHGPVDDQLEIENGLLGNHEDDNDDDSSGDEKDIDSEDEQD
+ PFESHFNQVPEKFVDKLSNAFKTKSVKYKSVKGSLSDSESYIYAKPVVIGEEALVESP
+ YRSSSIYSYFLKQRLKVQNGLLDKKTDPLTALQKKLVDPMFQYKDILYEYDSYEKDED
+ EYRDLYALHVLNHIYKTRDRILKNNQRLQDNPDTEHLDQGFTRPKVLIVVPTREVAYR
+ VVDKIISKSGIDQVDKKGKFYDQFRDDSLPPKSKPKSFQHIFRGNTNDFFVVGLKFTR
+ KAIKLYSNFYQSDIIVCSPLGIQMILENTDKKKRQDDFLSSIELMVIDQLHSIEYQNI
+ SHIFTIFDHLNKIPDQQHEADFSRIRMWYINEQAKLFRQTMVFTKYISPAANSLINGR
+ CRNMAGRWKNHKVIGSENSSIGQSGLKIRQIFQRFDIIGNSIIEEPDYRFKFFTSVII
+ PGIVKSTGYEDGILIYIPDYTDFIRIRNYMKEKTTILFGDINEYSSQRQLNANRSLFQ
+ QGRLKVMLYTERLHHYRRYEIKGVKSVVFYKPPNNPEFYNEVVRFIGKNAFLGNTDLN
+ ISTVRCIYSKLDGLSLERIVGTKRAAVLSHAQKEIYEFK"
+ gene <193595..>195070
+ /gene="ICE2"
+ /locus_tag="YIL090W"
+ /db_xref="GeneID:854718"
+ mRNA <193595..>195070
+ /gene="ICE2"
+ /locus_tag="YIL090W"
+ /product="Ice2p"
+ /transcript_id="NM_001179438.1"
+ /db_xref="GeneID:854718"
+ CDS 193595..195070
+ /gene="ICE2"
+ /locus_tag="YIL090W"
+ /experiment="EXISTENCE:direct assay:GO:0005783 endoplasmic
+ reticulum [PMID:14562095|PMID:26928762]"
+ /experiment="EXISTENCE:direct assay:GO:0005789 endoplasmic
+ reticulum membrane [PMID:15585575]"
+ /experiment="EXISTENCE:direct assay:GO:0032541 cortical
+ endoplasmic reticulum [PMID:15585575]"
+ /experiment="EXISTENCE:direct assay:GO:0097038 perinuclear
+ endoplasmic reticulum [PMID:15585575]"
+ /experiment="EXISTENCE:genetic interaction:GO:0000921
+ septin ring assembly [PMID:17984322]"
+ /experiment="EXISTENCE:genetic interaction:GO:0004865
+ protein serine/threonine phosphatase inhibitor activity
+ [PMID:34617598]"
+ /experiment="EXISTENCE:genetic interaction:GO:0160031
+ endoplasmic reticulum membrane biogenesis [PMID:34617598]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0000921 septin
+ ring assembly [PMID:17984322]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0004865 protein
+ serine/threonine phosphatase inhibitor activity
+ [PMID:34617598]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0006882
+ intracellular zinc ion homeostasis [PMID:22685415]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0031204
+ post-translational protein targeting to membrane,
+ translocation [PMID:32513868]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0036228 protein
+ localization to nuclear inner membrane [PMID:15911569]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0048309
+ endoplasmic reticulum inheritance
+ [PMID:15585575|PMID:17984322]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0160031
+ endoplasmic reticulum membrane biogenesis [PMID:34617598]"
+ /note="Integral ER membrane protein with type-III
+ transmembrane domains; promotes ER membrane expansion by
+ inhibiting the Nem1-Spo7 phosphatase complex, resulting in
+ reduced dephosphorylation and activation of Pah1p;
+ maintains homeostasis during ER stress; required for
+ post-translational SRP-independent protein translocation
+ into the ER; necessary for efficient targeting of the
+ Trm1p to the inner nuclear membrane; mutations cause
+ defects in cortical ER morphology; member of the SERINC
+ superfamily"
+ /codon_start=1
+ /product="Ice2p"
+ /protein_id="NP_012176.1"
+ /db_xref="GeneID:854718"
+ /db_xref="SGD:S000001352"
+ /translation="MTSLSKSFMQSGRICAACFYLLFTLLSIPISFKVGGLECGLSFT
+ VTLFTLYFITTTLNVLARRHGGRLYIFFTNCLYYSQHFIIASLLYLFLSGFSNDELGN
+ VLKNKYNESESFLEALKNSLNSNQINYVLYYYYYRFVVQPWQFVLTKSTPFFTLSEGF
+ FTILAIQAVGETNRWLSNDLNSNTWIISSLLTSGGVITASLYYLYRIYVTPIWPLSIQ
+ TASLLGLVLSMVCGLGLYGIVSQKGSVIESSLFFAYIVRCIYEISPKLATTATDEILN
+ LFKDVWQKHQRNLPTADNLLCYFHNVILKNAEVLWGSFIPRGRKKTGDFHDKLISILS
+ FEKVSLISKPFWKFFKNFTFSVPLSINEFCQVTIKMASESVSPAIVINLCFRVLMFYS
+ ATRIIPALQRKNDKQLRKSRRIMKGLYWYSPCILIAMYTHLILQYSGELKKDLCIWGC
+ SEKWFGVDQPEIIVDSWGFWNWCNIFCTILVYATELIGSGS"
+ gene <195599..>196216
+ /locus_tag="YIL089W"
+ /db_xref="GeneID:854719"
+ mRNA <195599..>196216
+ /locus_tag="YIL089W"
+ /product="uncharacterized protein"
+ /transcript_id="NM_001179437.1"
+ /db_xref="GeneID:854719"
+ CDS 195599..196216
+ /locus_tag="YIL089W"
+ /experiment="EXISTENCE:direct assay:GO:0000324 fungal-type
+ vacuole [PMID:26928762]"
+ /experiment="EXISTENCE:direct assay:GO:0000328 fungal-type
+ vacuole lumen [PMID:21777356]"
+ /experiment="EXISTENCE:direct assay:GO:0005777 peroxisome
+ [PMID:26928762]"
+ /experiment="EXISTENCE:direct assay:GO:0005783 endoplasmic
+ reticulum [PMID:21777356]"
+ /note="hypothetical protein found in the ER and vacuole
+ lumen; overexpression of YIL089W affects endocytic protein
+ trafficking"
+ /codon_start=1
+ /product="uncharacterized protein"
+ /protein_id="NP_012177.1"
+ /db_xref="GeneID:854719"
+ /db_xref="SGD:S000001351"
+ /translation="MQRTRELESSVAIDQTEVPRSRFFIMVKKLSRVADIVYIVDTFL
+ IPPLHPLKKQHPKVAKFLKVQLVFDLISLFIFATHQLLLLEDGNFGKHYFKRKTKRCS
+ KFSCSRCNANAHHPKWFKFKHSLLCLGTFCFGVYSLVKINKFFKTDQTVDLNRLLELF
+ FWQLNAILNMKLFAFYGDHLESHSAPLDVYEDSFANKSSSGGDEV"
+ repeat_region complement(196653..197023)
+ /note="Ty4 LTR"
+ /rpt_type=long_terminal_repeat
+ /db_xref="SGD:S000007015"
+ repeat_region complement(197140..197488)
+ /note="Ty1 LTR"
+ /rpt_type=long_terminal_repeat
+ /db_xref="SGD:S000007011"
+ gene 197592..197663
+ /locus_tag="YNCI0004W"
+ /db_xref="GeneID:854720"
+ tRNA 197592..197663
+ /locus_tag="YNCI0004W"
+ /product="tRNA-Glu"
+ /experiment="EXISTENCE:curator inference:GO:0005829
+ cytosol [PMID:9023104]"
+ /experiment="EXISTENCE:curator inference:GO:0006414
+ translational elongation [PMID:9023104]"
+ /note="Glutamate tRNA (tRNA-Glu), predicted by tRNAscan-SE
+ analysis"
+ /db_xref="GeneID:854720"
+ /db_xref="SGD:S000006547"
+ gene complement(<197931..>199403)
+ /gene="AVT7"
+ /locus_tag="YIL088C"
+ /db_xref="GeneID:854721"
+ mRNA complement(<197931..>199403)
+ /gene="AVT7"
+ /locus_tag="YIL088C"
+ /product="Avt7p"
+ /transcript_id="NM_001179436.1"
+ /db_xref="GeneID:854721"
+ CDS complement(197931..199403)
+ /gene="AVT7"
+ /locus_tag="YIL088C"
+ /experiment="EXISTENCE:direct assay:GO:0000324 fungal-type
+ vacuole [PMID:26928762|PMID:11274162|PMID:27246536]"
+ /experiment="EXISTENCE:direct assay:GO:0005774 vacuolar
+ membrane [PMID:25266154]"
+ /experiment="EXISTENCE:direct assay:GO:0005886 plasma
+ membrane [PMID:11274162]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0006865 amino
+ acid transport [PMID:25266154]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0043937
+ regulation of sporulation [PMID:25266154]"
+ /note="Vacuolar amino acid transporter; member of a family
+ of seven S. cerevisiae genes (AVT1-7) related to vesicular
+ GABA-glycine transporters"
+ /codon_start=1
+ /product="Avt7p"
+ /protein_id="NP_012178.1"
+ /db_xref="GeneID:854721"
+ /db_xref="SGD:S000001350"
+ /translation="MEATSSALSSTANLVKTIVGAGTLAIPYSFKSDGVLVGVILTLL
+ AAVTSGLGLFVLSKCSKTLINPRNSSFFTLCMLTYPTLAPIFDLAMIVQCFGVGLSYL
+ VLIGDLFPGLFGGERNYWIIASAVIIIPLCLVKKLDQLKYSSILGLFALAYISILVFS
+ HFVFELGKGELTNILRNDICWWKIHDFKGLLSTFSIIIFAFTGSMNLFPMINELKDNS
+ MENITFVINNSISLSTALFLIVGLSGYLTFGNETLGNLMLNYDPNSIWIVIGKFCLGS
+ MLILSFPLLFHPLRIAVNNVIIWIEITYGGANPEEDPQVSEYTRASNLRPISMTVEDP
+ AQPSDALDATSYNEQECLLPNGNFDNGSIESQENNNDERGTMAVAGDNEHHAPFVKSR
+ FYWITALLLISMYTLALSVQSFALVLSFVGATGSTSISFTLPGLLGYKLIGLDSLAIG
+ KMIPPKDRFYKRCSLLLVFYGLSVMFLSLYVTVFNRSDEA"
+ gene complement(<199646..>200119)
+ /gene="AIM19"
+ /locus_tag="YIL087C"
+ /db_xref="GeneID:854722"
+ mRNA complement(<199646..>200119)
+ /gene="AIM19"
+ /locus_tag="YIL087C"
+ /product="Aim19p"
+ /transcript_id="NM_001179435.3"
+ /db_xref="GeneID:854722"
+ CDS complement(199646..200119)
+ /gene="AIM19"
+ /locus_tag="YIL087C"
+ /experiment="EXISTENCE:direct assay:GO:0005739
+ mitochondrion
+ [PMID:16823961|PMID:24769239|PMID:14576278|PMID:14562095]"
+ /experiment="EXISTENCE:physical interaction:GO:0005739
+ mitochondrion [PMID:25124039]"
+ /note="hypothetical protein; mitochondrial protein that
+ physically interacts with Tim23p; null mutant displays
+ reduced respiratory growth"
+ /codon_start=1
+ /product="Aim19p"
+ /protein_id="NP_012179.3"
+ /db_xref="GeneID:854722"
+ /db_xref="SGD:S000001349"
+ /translation="MSAKPATDDAKDELLSPFRRLYALTRTPYPALANAALLASTPVL
+ SPSFKVPPTQSPALSIPMSRVFSKSSTARIGITTKTALFFSTMQAIGAYMIYDNDLEN
+ GAGFIATWSALYLIVGGKKSFSALRYGRTWPLVLSSVSLANAVLYGQRFLATGFQ"
+ gene complement(<200153..>200461)
+ /locus_tag="YIL086C"
+ /db_xref="GeneID:854723"
+ mRNA complement(<200153..>200461)
+ /locus_tag="YIL086C"
+ /product="uncharacterized protein"
+ /transcript_id="NM_001348844.1"
+ /db_xref="GeneID:854723"
+ CDS complement(200153..200461)
+ /locus_tag="YIL086C"
+ /note="hypothetical protein; conserved across S.
+ cerevisiae strains"
+ /codon_start=1
+ /product="uncharacterized protein"
+ /protein_id="NP_001335784.1"
+ /db_xref="GeneID:854723"
+ /db_xref="SGD:S000001348"
+ /translation="MNKIIKESTNFSRYLRTGGVLNSLRTTSKFVYINNNSYLTHGGF
+ DGNVATIFNISEFNYINSSAKGSLLTYKSITFFCPRYFKKRPLGRHAKGKGKSDEKIL
+ "
+ gene complement(<200490..>202043)
+ /gene="KTR7"
+ /locus_tag="YIL085C"
+ /db_xref="GeneID:854724"
+ mRNA complement(<200490..>202043)
+ /gene="KTR7"
+ /locus_tag="YIL085C"
+ /product="putative mannosyltransferase"
+ /transcript_id="NM_001179433.3"
+ /db_xref="GeneID:854724"
+ CDS complement(200490..202043)
+ /gene="KTR7"
+ /locus_tag="YIL085C"
+ /experiment="EXISTENCE:mutant phenotype:GO:0031505
+ fungal-type cell wall organization [PMID:9090056]"
+ /note="Putative mannosyltransferase involved in protein
+ glycosylation; member of the KRE2/MNT1 mannosyltransferase
+ family; KTR7 has a paralog, KTR5, that arose from the
+ whole genome duplication"
+ /codon_start=1
+ /product="putative mannosyltransferase"
+ /protein_id="NP_012181.3"
+ /db_xref="GeneID:854724"
+ /db_xref="SGD:S000001347"
+ /translation="MAIRLNPKVRRFLLDKCRQKRYGFLFLGCIFAILYCMGTWPFFA
+ KDIVHDPNNLPYSLQDYSTDKDEPFFRGCTDTKLYLQNPAYSKMNASFVMLTRNEEIE
+ DVLKTMRSIEGHFNKWFKYPYVFLNDDPFTDHFKDQIQAATNATVEFGTVDEIMWEFP
+ AKVRNSLQFKASLEDQNDRGIMYGNMESYHKMCRFYSGIFYKHPLVSKYEWYWRIEPD
+ VDFFCDISYDPFFEMAKHNKKYGFTVLITELYWTVPNLFRTTKSFIKKTAGLKENLGT
+ LWKLFTFNYNILDTDDEEISRWVNFPWDAKPKLTEKLMVDFLLENHGQVNNEEDLEGI
+ QYLVERARSKVPMLEDSLEGEDYNLCHFWSNFEIARVDLFDNEIYNAYFKFLEESGGF
+ WTERWGDAPIHSIGLGMTLDLEDVHYFRDIGYRHSSLQHCPKNALQSQENLNTFDEGY
+ NFGCGCRCVCPKKGEDIEDHSTPCMDIFFELLHGREYEKEFPGCYKPSIKDKDVIEEI
+ RRENFRVIE"
+ gene complement(<202276..>203259)
+ /gene="SDS3"
+ /locus_tag="YIL084C"
+ /db_xref="GeneID:854725"
+ mRNA complement(<202276..>203259)
+ /gene="SDS3"
+ /locus_tag="YIL084C"
+ /product="Sds3p"
+ /transcript_id="NM_001179432.1"
+ /db_xref="GeneID:854725"
+ CDS complement(202276..203259)
+ /gene="SDS3"
+ /locus_tag="YIL084C"
+ /experiment="EXISTENCE:direct assay:GO:0005634 nucleus
+ [PMID:22932476]"
+ /experiment="EXISTENCE:direct assay:GO:0005829 cytosol
+ [PMID:22932476]"
+ /experiment="EXISTENCE:direct assay:GO:0033698 Rpd3L
+ complex
+ [PMID:16286007|PMID:16286008|PMID:16314178|PMID:19040720]"
+ /experiment="EXISTENCE:direct assay:GO:0070210
+ Rpd3L-Expanded complex [PMID:19040720]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0000122
+ negative regulation of transcription by RNA polymerase II
+ [PMID:24358376]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0004407 histone
+ deacetylase activity [PMID:11024051]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0031507
+ heterochromatin formation [PMID:11024051]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0034605
+ cellular response to heat [PMID:20398213]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0043709 cell
+ adhesion involved in single-species biofilm formation
+ [PMID:18202364]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0045944
+ positive regulation of transcription by RNA polymerase II
+ [PMID:20398213|PMID:17210643]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0061186
+ negative regulation of silent mating-type cassette
+ heterochromatin formation [PMID:16286008]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0061188
+ negative regulation of rDNA heterochromatin formation
+ [PMID:16286008]"
+ /experiment="EXISTENCE:mutant phenotype:GO:2000217
+ regulation of invasive growth in response to glucose
+ limitation [PMID:18202364]"
+ /note="Component of the Rpd3L histone deacetylase complex;
+ required for its structural integrity and catalytic
+ activity, involved in transcriptional silencing and
+ required for sporulation; relocalizes to the cytosol in
+ response to hypoxia; cells defective in SDS3 display
+ pleiotropic phenotypes"
+ /codon_start=1
+ /product="Sds3p"
+ /protein_id="NP_012182.1"
+ /db_xref="GeneID:854725"
+ /db_xref="SGD:S000001346"
+ /translation="MAIQKVSNKDLSRKDKRRFNIESKVNKIYQNFYSERDNQYKDRL
+ TALQTDLTSLHQGDNGQYARQVRDLEEERDLELVRLRLFEEYRVSRSGIEFQEDIEKA
+ KAEHEKLIKLCKERLYSSIEQKIKKLQEERLLMDVANVHSYAMNYSRPQYQKNTRSHT
+ VSGWDSSSNEYGRDTANESATDTGAGNDRRTLRRRNASKDTRGNNNNQDESDFQTGNG
+ SGSNGHGSRQGSQFPHFNNLTYKSGMNSDSDFLQGINEGTDLYAFLFGEKNPKDNANG
+ NEKKKNRGAQRYSTKTAPPLQSLKPDEVTEDISLIRELTGQPPAPFRLRSD"
+ gene complement(<203556..>204653)
+ /gene="CAB2"
+ /locus_tag="YIL083C"
+ /db_xref="GeneID:854726"
+ mRNA complement(<203556..>204653)
+ /gene="CAB2"
+ /locus_tag="YIL083C"
+ /product="phosphopantothenate--cysteine ligase CAB2"
+ /transcript_id="NM_001179431.2"
+ /db_xref="GeneID:854726"
+ CDS complement(203556..204653)
+ /gene="CAB2"
+ /locus_tag="YIL083C"
+ /EC_number="6.3.2.5"
+ /experiment="EXISTENCE:direct assay:GO:0004632
+ phosphopantothenate--cysteine ligase activity
+ [PMID:30653991]"
+ /experiment="EXISTENCE:direct assay:GO:0005634 nucleus
+ [PMID:14690591]"
+ /experiment="EXISTENCE:direct assay:GO:0005737 cytoplasm
+ [PMID:14690591]"
+ /experiment="EXISTENCE:direct assay:GO:1990143
+ CoA-synthesizing protein complex [PMID:23789928]"
+ /experiment="EXISTENCE:genetic interaction:GO:0015937
+ coenzyme A biosynthetic process [PMID:19266201]"
+ /note="Phosphopantothenoylcysteine synthetase (PPCS);
+ catalyzes the second step of coenzyme A biosynthesis from
+ pantothenate; subunit of the CoA-Synthesizing Protein
+ Complex (CoA-SPC) that contains: Cab2p, Cab3p, Cab4p,
+ Cab5p, Sis2p and Vhs3p subunits; null mutant lethality is
+ complemented by human homolog PPCS and by E. coli coaBC, a
+ bifunctional enzyme with PPCS activity"
+ /codon_start=1
+ /product="phosphopantothenate--cysteine ligase CAB2"
+ /protein_id="NP_012183.2"
+ /db_xref="GeneID:854726"
+ /db_xref="SGD:S000001345"
+ /translation="MPPLPVLNRPQIHTSVTEISHAIDRTIKEELFPVAYTTEEEQYF
+ KTNPKPAYIDELIKDAKEFIDLQYSLKRNKIVLITSGGTTVPLENNTVRFIDNFSAGT
+ RGASSAEQFLANGYSVIFLHREFSLTPYNRSFSHSINTLFLDYIDSEGKIKPEFAENV
+ LKNKKLYDKYMEKEEKLLLLPFTTVNQYLWSLKSIAKLLNNSGCLFYLAAAVSDFFVP
+ YSRLPQHKIQSGDNGKMGANNDTEGTTRTTPDGKLIVNLDPVPKFLRRLVESWATQAM
+ IVSFKLETDESMLLYKCTQALDRYNHQLVIGNLLQTRNKQVIFVSPENRKGDWVRLDE
+ KHHSIEEMIIPEVIARHDKWVAHSKTKLATK"
+ mobile_element 205220..210647
+ /note="YILWTy3-1; Ty3 element, LTR retrotransposon of the
+ Gypsy (Metaviridae) group; contains co-transcribed genes
+ TYA Gag and TYB Pol, encoding proteins involved in
+ structure and function of virus-like particles, flanked by
+ two direct repeats; transposition is induced upon exposure
+ to mating pheromone"
+ /mobile_element_type="retrotransposon:YILWTy3-1"
+ /db_xref="SGD:S000007020"
+ repeat_region 205220..205559
+ /note="Ty3 LTR"
+ /rpt_type=long_terminal_repeat
+ /db_xref="SGD:S000007017"
+ gene <205635..>210132
+ /locus_tag="YIL082W-A"
+ /db_xref="GeneID:854728"
+ mRNA <205635..>210132
+ /locus_tag="YIL082W-A"
+ /product="gag-pol fusion protein"
+ /transcript_id="NM_001181434.4"
+ /db_xref="GeneID:854728"
+ CDS join(205635..206485,206487..210132)
+ /locus_tag="YIL082W-A"
+ /EC_number="2.7.7.7"
+ /EC_number="2.7.7.49"
+ /EC_number="3.1.26.4"
+ /experiment="EXISTENCE:direct assay:GO:0005634 nucleus
+ [PMID:9448009]"
+ /ribosomal_slippage
+ /note="Retrotransposon TYA Gag and TYB Pol genes;
+ transcribed/translated as one unit; polyprotein is
+ processed to make a nucleocapsid-like protein (Gag),
+ reverse transcriptase (RT), protease (PR), and integrase
+ (IN); similar to retroviral genes"
+ /codon_start=1
+ /product="gag-pol fusion protein"
+ /protein_id="NP_012184.1"
+ /db_xref="GeneID:854728"
+ /db_xref="SGD:S000003537"
+ /translation="MSFMDQIPGGGNYPKLPVECLPNFPIQPSLTFRGRNDSHKLKNF
+ ISEIMLNMSMISWPNDASRIVYCRRHLLNPAAQWANDFVQEQGILEITFDTFIQGLYQ
+ HFYKPPDINKIFNAITQLSEAKLGIERLNQRFRKIWDRMPPDFMTEKAAIMTYTRLLT
+ KETYNIVRMHKPETLKDAMEEAYQTTALTERFFPGFELDADGDTIIGATTHLQEEYDS
+ DYDSEDNLTQNRYVHTVRTRRSYNKPMSNHRNRRNNNASREECIKNRLCFYCKKEGHR
+ LNECRARKRVLTDLELESKDQQTLFIKTLPIVHYIAIPEMDNTAEKTIKIQNTKVKTL
+ FDSGSPTSFIRRDIVELLKYEIYETPPLRFRGFVATKSAVTSEAVTIDLKINDLQITL
+ AAYILDNMDYQLLIGNPILRRYPKILHTVLNTRESPDSLKPKTYRSETVNNVRTYSAG
+ NRGNPRNIKLSFAPTILEATDPKSAGNRGNPRNTKLSLAPTILEATDPKSAGNRGDSR
+ TKTLSLATTTPAAIDPLTTLDNPGSTQSTFAQFPIPEEASILEEDGKYSNVVSTIQSV
+ EPNATDHSNKDTFCTLPVWLQQKYREIIRNDLPPRPADINNIPVKHDIEIKPGARLPR
+ LQPYHVTEKNEQEINKIVQKLLDNKFIVPSKSPCSSPVVLVPKKDGTFRLCVDYRTLN
+ KATISDPFPLPRIDNLLSRIGNAQIFTTLDLHSGYHQIPMEPKDRYKTAFVTPSGKYE
+ YTVMPFGLVNAPSTFARYMADTFRDLRFVNVYLDDILIFSESPEEHWKHLDTVLERLK
+ NENLIVKKKKCKFASEETEFLGYSIGIQKIAPLQHKCAAIRDFPTPKTVKQAQRFLGM
+ INYYRRFIPNCSKIAQPIQLFICDKSQWTEKQDKAIEKLKAALCNSPVLVPFNNKANY
+ RLTTDASKDGIGAVLEEVDNKNKLVGVVGYFSKSLESAQKNYPAGELELLGIIKALHH
+ FRYMLHGKHFTLRTDHISLLSLQNKNEPARRVQRWLDDLATYDFTLEYLAGPKNVVAD
+ AISRAIYTITPETSRPIDTESWKSYYKSDPLCSAVLIHMKELTQHNVTPEDMSAFRSY
+ QKKLELSETFRKNYSLEDEMIYYQDRLVVPIKQQNAVMRLYHDHTLFGGHFGVTVTLA
+ KISPIYYWPKLQHSIIQYIRTCVQCQLIKSHRPRLHGLLQPLPIAEGRWLDISMDFVT
+ GLPPTSNNLNMILVVVDRFSKRAHFIATRKTLDATQLIDLLFRYIFSYHGFPRTITSD
+ RDVRMTADKYQELTKRLGIKSTMSSANHPQTDGQSERTIQTLNRLLRAYVSTNIQNWH
+ VYLPQIEFVYNSTPTRTLGKSPFEIDLGYLPNTPAIKSDDEVNARSFTAVELAKHLKA
+ LTIQTKEQLEHAQIEMETNNNQRRKPLLLNIGDHVLVHRDAYFKKGAYMKVQQIYVGP
+ FRVVKKINDNAYELDLNSHKKKHRVINVQFLKSLYTVQTRTQRINQSAPLRELREHTK
+ LLHS"
+ repeat_region 210308..210647
+ /note="Ty3 LTR"
+ /rpt_type=long_terminal_repeat
+ /db_xref="SGD:S000007018"
+ gene 210665..210738
+ /locus_tag="YNCI0005W"
+ /db_xref="GeneID:854730"
+ tRNA 210665..210738
+ /locus_tag="YNCI0005W"
+ /product="tRNA-Ile"
+ /experiment="EXISTENCE:curator inference:GO:0005829
+ cytosol [PMID:9023104]"
+ /experiment="EXISTENCE:curator inference:GO:0006414
+ translational elongation [PMID:9023104]"
+ /note="Isoleucine tRNA (tRNA-Ile), predicted by
+ tRNAscan-SE analysis"
+ /db_xref="GeneID:854730"
+ /db_xref="SGD:S000006606"
+ gene complement(<210923..>212005)
+ /gene="AIR1"
+ /locus_tag="YIL079C"
+ /db_xref="GeneID:854731"
+ mRNA complement(<210923..>212005)
+ /gene="AIR1"
+ /locus_tag="YIL079C"
+ /product="TRAMP complex RNA-binding subunit"
+ /transcript_id="NM_001179429.1"
+ /db_xref="GeneID:854731"
+ CDS complement(210923..212005)
+ /gene="AIR1"
+ /locus_tag="YIL079C"
+ /experiment="EXISTENCE:direct assay:GO:0005730 nucleolus
+ [PMID:11489916]"
+ /experiment="EXISTENCE:direct assay:GO:0031499 TRAMP
+ complex [PMID:15828860|PMID:15935759]"
+ /experiment="EXISTENCE:direct assay:GO:0071038
+ TRAMP-dependent tRNA surveillance pathway
+ [PMID:17643380|PMID:15828860]"
+ /experiment="EXISTENCE:direct assay:GO:1990817 poly(A) RNA
+ polymerase activity [PMID:15935759]"
+ /experiment="EXISTENCE:genetic interaction:GO:0071031
+ nuclear mRNA surveillance of mRNA 3'-end processing
+ [PMID:17410208]"
+ /experiment="EXISTENCE:genetic interaction:GO:0071035
+ nuclear polyadenylation-dependent rRNA catabolic process
+ [PMID:15935758|PMID:18007593]"
+ /experiment="EXISTENCE:genetic interaction:GO:0071036
+ nuclear polyadenylation-dependent snoRNA catabolic process
+ [PMID:15935758]"
+ /experiment="EXISTENCE:genetic interaction:GO:0071037
+ nuclear polyadenylation-dependent snRNA catabolic process
+ [PMID:15935758]"
+ /experiment="EXISTENCE:genetic interaction:GO:0071039
+ nuclear polyadenylation-dependent CUT catabolic process
+ [PMID:18591258]"
+ /experiment="EXISTENCE:genetic interaction:GO:1990817
+ poly(A) RNA polymerase activity [PMID:15935759]"
+ /note="Zinc knuckle protein; involved in nuclear RNA
+ processing and degradation as a component of the TRAMP
+ complex; stimulates the poly(A) polymerase activity of
+ Pap2p in vitro; AIR1 has a paralog, AIR2, that arose from
+ the whole genome duplication; although Air1p and Air2p are
+ homologous TRAMP subunits, they have nonredundant roles in
+ regulation of substrate specificity of the exosome"
+ /codon_start=1
+ /product="TRAMP complex RNA-binding subunit"
+ /protein_id="NP_012186.1"
+ /db_xref="GeneID:854731"
+ /db_xref="SGD:S000001341"
+ /translation="MSTLLSEVESIDTLPYVKDTTPTGSDSSSFNKLLAPSIEDVDAN
+ PEELRTLRGQGRYFGITDYDSNGAIMEAEPKCNNCSQRGHLKRNCPHVICTYCGFMDD
+ HYSQHCPKAIICTNCNANGHYKSQCPHKWKKVFCTLCNSKRHSRERCPSIWRSYLLKT
+ KDANQGDFDFQTVFCYNCGNAGHFGDDCAERRSSRVPNTDGSAFCGDNLATKFKQHYF
+ NQLKDYKREASQRQHFDNEHEFNLLDYEYNDDAYDLPGSRTYRDKMKWKGKVQSTRNK
+ NSSNNRYESSNNRKKKSPFSAQNYKVTKNKRVQTHPLDFPRSSQNNRTNDYSSQFSYN
+ RDDFPKGPKNKRGRSSSNKSQRNGRY"
+ gene <212499..>214703
+ /gene="THS1"
+ /locus_tag="YIL078W"
+ /db_xref="GeneID:854732"
+ mRNA <212499..>214703
+ /gene="THS1"
+ /locus_tag="YIL078W"
+ /product="threonine--tRNA ligase THS1"
+ /transcript_id="NM_001179428.3"
+ /db_xref="GeneID:854732"
+ CDS 212499..214703
+ /gene="THS1"
+ /locus_tag="YIL078W"
+ /EC_number="6.1.1.3"
+ /experiment="EXISTENCE:direct assay:GO:0004829
+ threonine-tRNA ligase activity [PMID:8143729]"
+ /experiment="EXISTENCE:direct assay:GO:0005739
+ mitochondrion [PMID:16823961|PMID:14576278]"
+ /experiment="EXISTENCE:direct assay:GO:0006435
+ threonyl-tRNA aminoacylation [PMID:8143729]"
+ /experiment="EXISTENCE:direct assay:GO:1990825
+ sequence-specific mRNA binding [PMID:34039240]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0005737
+ cytoplasm [PMID:2995918]"
+ /note="Threonyl-tRNA synthetase involved in tRNA
+ aminoacylation; mRNA binding protein that preferentially
+ binds to the mRNA of RNAPI subunits and DNA binding
+ proteins; binds to an anticodon stem loop-like structure
+ in RPC10 and impacts its translation; cytoplasmic protein;
+ human homolog TARS can complement a yeast null mutant"
+ /codon_start=1
+ /product="threonine--tRNA ligase THS1"
+ /protein_id="NP_116578.3"
+ /db_xref="GeneID:854732"
+ /db_xref="SGD:S000001340"
+ /translation="MSASEAGVTEQVKKLSVKDSSNDAVKPNKKENKKSKQQSLYLDP
+ EPTFIEERIEMFDRLQKEYNDKVASMPRVPLKIVLKDGAVKEATSWETTPMDIAKGIS
+ KSLADRLCISKVNGQLWDLDRPFEGEANEEIKLELLDFESDEGKKVFWHSSAHVLGES
+ CECHLGAHICLGPPTDDGFFYEMAVRDSMKDISESPERTVSQADFPGLEGVAKNVIKQ
+ KQKFERLVMSKEDLLKMFHYSKYKTYLVQTKVPDGGATTVYRCGKLIDLCVGPHIPHT
+ GRIKAFKLLKNSSCYFLGDATNDSLQRVYGISFPDKKLMDAHLKFLAEASMRDHRKIG
+ KEQELFLFNEMSPGSCFWLPHGTRIYNTLVDLLRTEYRKRGYEEVITPNMYNSKLWET
+ SGHWANYKENMFTFEVEKETFGLKPMNCPGHCLMFKSRERSYRELPWRVADFGVIHRN
+ EFSGALSGLTRVRRFQQDDAHIFCTHDQIESEIENIFNFLQYIYGVFGFEFKMELSTR
+ PEKYVGKIETWDAAESKLESALKKWGGNWEINAGDGAFYGPKIDIMISDALRRWHQCA
+ TIQLDFQLPNRFELEFKSKDQDSESYERPVMIHRAILGSVERMTAILTEHFAGKWPFW
+ LSPRQVLVVPVGVKYQGYAEDVRNKLHDAGFYADVDLTGNTLQKKVRNGQMLKYNFIF
+ IVGEQEMNEKSVNIRNRDVMEQQGKNATVSVEEVLKQLRNLKDEKRGDNVLA"
+ rep_origin 214624..214754
+ /note="ARS901; Autonomously Replicating Sequence"
+ /db_xref="SGD:S000007644"
+ gene complement(<214991..>215953)
+ /gene="RCI37"
+ /locus_tag="YIL077C"
+ /gene_synonym="MRX17"
+ /db_xref="GeneID:854733"
+ mRNA complement(<214991..>215953)
+ /gene="RCI37"
+ /locus_tag="YIL077C"
+ /gene_synonym="MRX17"
+ /product="Rci37p"
+ /transcript_id="NM_001179427.3"
+ /db_xref="GeneID:854733"
+ CDS complement(214991..215953)
+ /gene="RCI37"
+ /locus_tag="YIL077C"
+ /gene_synonym="MRX17"
+ /experiment="EXISTENCE:direct assay:GO:0005739
+ mitochondrion [PMID:16823961|PMID:14562095]"
+ /note="Protein that associates with the large
+ mitoribosomal subunit; the authentic, non-tagged protein
+ is detected in highly purified mitochondria in
+ high-throughput studies; deletion confers sensitivity to
+ 4-(N-(S-glutathionylacetyl)amino) phenylarsenoxide (GSAO)"
+ /codon_start=1
+ /product="Rci37p"
+ /protein_id="NP_012188.3"
+ /db_xref="GeneID:854733"
+ /db_xref="SGD:S000001339"
+ /translation="MLGKEEEQQYGQNGKGMENELPFMKRPWFKKAYENAIEFHEKDE
+ LLDARDRLELSKAYRSIAKAEMWGGWLGFSAVFLTPFAYRYYKTKAIKGVKVPRNFVL
+ GVMALFFATNFAGRSMYTRQLNERDPTGVLKDNYSNKYGDNDFGAFQHDQTKEIPRNQ
+ RQYNMMRLLDSGSPSRWSMYFYITYQNPERRLPDPKVKLQQMKKGGVFNGSPFMNQRD
+ PIGLYRNKGRKSPDPIEGEQNDSPVLSSWEKIRNGDNSSSSSWENIRNTSRDQSQESD
+ ASVDHESDIFISGFSDDGNATDNSSSDDKYQRLLQSGRYGGNRS"
+ gene <216658..>217548
+ /gene="SEC28"
+ /locus_tag="YIL076W"
+ /gene_synonym="ANU2"
+ /db_xref="GeneID:854734"
+ mRNA <216658..>217548
+ /gene="SEC28"
+ /locus_tag="YIL076W"
+ /gene_synonym="ANU2"
+ /product="coatomer subunit epsilon"
+ /transcript_id="NM_001179426.1"
+ /db_xref="GeneID:854734"
+ CDS 216658..217548
+ /gene="SEC28"
+ /locus_tag="YIL076W"
+ /gene_synonym="ANU2"
+ /experiment="EXISTENCE:direct assay:GO:0030126 COPI
+ vesicle coat [PMID:21435344]"
+ /experiment="EXISTENCE:direct assay:GO:1990841
+ promoter-specific chromatin binding [PMID:27184763]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0006888
+ endoplasmic reticulum to Golgi vesicle-mediated transport
+ [PMID:10532354]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0006901 vesicle
+ coating [PMID:9463377]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0030126 COPI
+ vesicle coat [PMID:9463377]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0032511 late
+ endosome to vacuole transport via multivesicular body
+ sorting pathway [PMID:17101773]"
+ /experiment="EXISTENCE:physical interaction:GO:0032511
+ late endosome to vacuole transport via multivesicular body
+ sorting pathway [PMID:17101773]"
+ /note="Epsilon-COP subunit of the coatomer; regulates
+ retrograde Golgi-to-ER protein traffic; stabilizes Cop1p,
+ the alpha-COP and the coatomer complex; non-essential for
+ cell growth; protein abundance increases in response to
+ DNA replication stress"
+ /codon_start=1
+ /product="coatomer subunit epsilon"
+ /protein_id="NP_012189.2"
+ /db_xref="GeneID:854734"
+ /db_xref="SGD:S000001338"
+ /translation="MDYFNIKQNYYTGNFVQCLQEIEKFSKVTDNTLLFYKAKTLLAL
+ GQYQSQDPTSKLGKVLDLYVQFLDTKNIEELENLLKDKQNSPYELYLLATAQAILGDL
+ DKSLETCVEGIDNDEAEGTTELLLLAIEVALLNNNVSTASTIFDNYTNAIEDTVSGDN
+ EMILNLAESYIKFATNKETATSNFYYYEELSQTFPTWKTQLGLLNLHLQQRNIAEAQG
+ IVELLLSDYYSVEQKENAVLYKPTFLANQITLALMQGLDTEDLTNQLVKLDHEHAFIK
+ HHQEIDAKFDELVRKYDTSN"
+ gene complement(<217863..>220700)
+ /gene="RPN2"
+ /locus_tag="YIL075C"
+ /gene_synonym="SEN3"
+ /db_xref="GeneID:854735"
+ mRNA complement(<217863..>220700)
+ /gene="RPN2"
+ /locus_tag="YIL075C"
+ /gene_synonym="SEN3"
+ /product="proteasome regulatory particle base subunit
+ RPN2"
+ /transcript_id="NM_001179425.1"
+ /db_xref="GeneID:854735"
+ CDS complement(217863..220700)
+ /gene="RPN2"
+ /locus_tag="YIL075C"
+ /gene_synonym="SEN3"
+ /experiment="EXISTENCE:direct assay:GO:0005634 nucleus
+ [PMID:22842922|PMID:15210724]"
+ /experiment="EXISTENCE:direct assay:GO:0008540 proteasome
+ regulatory particle, base subcomplex [PMID:9741626]"
+ /experiment="EXISTENCE:direct assay:GO:0034515 proteasome
+ storage granule [PMID:18504300]"
+ /experiment="EXISTENCE:genetic interaction:GO:0043248
+ proteasome assembly [PMID:17911101]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0004175
+ endopeptidase activity [PMID:7565784]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0006511
+ ubiquitin-dependent protein catabolic process
+ [PMID:7565784]"
+ /experiment="EXISTENCE:physical interaction:GO:0031625
+ ubiquitin protein ligase binding [PMID:10688918]"
+ /note="Subunit of the 26S proteasome; substrate of the
+ N-acetyltransferase Nat1p; protein abundance increases in
+ response to DNA replication stress"
+ /codon_start=1
+ /product="proteasome regulatory particle base subunit
+ RPN2"
+ /protein_id="NP_012190.1"
+ /db_xref="GeneID:854735"
+ /db_xref="SGD:S000001337"
+ /translation="MSLTTAAPLLALLRENQDSVKTYALESINNVVDQLWSEISNELP
+ DIEALYDDDTFSDREMAALIASKVYYNLGEYESAVKYALAAKDRFDIDEKSQFVETIV
+ SKSIEMYVQEASKQYTKDEQFYTKDIIDPKLTSIFERMIEKCLKASELKLALGIALEG
+ YRLDIIESALKSKLDQDSTSENVKIINYLLTLAITTVTNSKFRSSILRKSFDFLMNMP
+ NCDYLTLNKVVVNLNDAGLALQLFKKLKEENDEGLSAQIAFDLVSSASQQLLEILVTE
+ LTAQGYDPALLNILSGLPTCDYYNTFLLNNKNIDIGLLNKSKSSLDGKFSLFHTAVSV
+ ANGFMHAGTTDNSFIKANLPWLGKAQNWAKFTATASLGVIHKGNLLEGKKVMAPYLPG
+ SRASSRFIKGGSLYGLGLIYAGFGRDTTDYLKNIIVENSGTSGDEDVDVLLHGASLGI
+ GLAAMGSANIEVYEALKEVLYNDSATSGEAAALGMGLCMLGTGKPEAIHDMFTYSQET
+ QHGNITRGLAVGLALINYGRQELADDLITKMLASDESLLRYGGAFTIALAYAGTGNNS
+ AVKRLLHVAVSDSNDDVRRAAVIALGFVLLRDYTTVPRIVQLLSKSHNAHVRCGTAFA
+ LGIACAGKGLQSAIDVLDPLTKDPVDFVRQAAMIALSMILIQQTEKLNPQVADINKNF
+ LSVITNKHQEGLAKFGACVAQGIMNAGGRNVTIQLENADTGTLDTKSVVGLVMFSQFW
+ YWFPLAHFLSLSFTPTTVIGIRGSDQAIPKFQMNCYAKEDAFSYPRMYEEASGKEVEK
+ VATAVLSTTARAKARAKKTKKEKGPNEEEKKKEHEEKEKERETNKKGIKETKENDEEF
+ YKNKYSSKPYKVDNMTRILPQQSRYISFIKDDRFVPVRKFKGNNGVVVLRDREPKEPV
+ ALIETVRQMKDVNAPLPTPFKVDDNVDFPSA"
+ gene complement(<221081..>222490)
+ /gene="SER33"
+ /locus_tag="YIL074C"
+ /db_xref="GeneID:854736"
+ mRNA complement(<221081..>222490)
+ /gene="SER33"
+ /locus_tag="YIL074C"
+ /product="phosphoglycerate dehydrogenase SER33"
+ /transcript_id="NM_001179424.1"
+ /db_xref="GeneID:854736"
+ CDS complement(221081..222490)
+ /gene="SER33"
+ /locus_tag="YIL074C"
+ /EC_number="1.1.1.95"
+ /EC_number="1.1.1.399"
+ /experiment="EXISTENCE:direct assay:GO:0005737 cytoplasm
+ [PMID:14562095]"
+ /experiment="EXISTENCE:direct assay:GO:0061759
+ alpha-ketoglutarate reductase activity [PMID:26774271]"
+ /experiment="EXISTENCE:genetic interaction:GO:0061759
+ alpha-ketoglutarate reductase activity [PMID:26774271]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0004617
+ phosphoglycerate dehydrogenase activity [PMID:12525494]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0009070 serine
+ family amino acid biosynthetic process [PMID:12525494]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0061759
+ alpha-ketoglutarate reductase activity [PMID:26774271]"
+ /note="3-phosphoglycerate dehydrogenase and
+ alpha-ketoglutarate reductase; 3PG dehydrogenase that
+ catalyzes the first step in serine and glycine
+ biosynthesis; also functions as an alpha-ketoglutarate
+ reductase, converting alpha-ketoglutarate to
+ D-2-hydroxyglutarate (D-2HG); localizes to the cytoplasm;
+ SER33 has a paralog, SER3, that arose from the whole
+ genome duplication"
+ /codon_start=1
+ /product="phosphoglycerate dehydrogenase SER33"
+ /protein_id="NP_012191.1"
+ /db_xref="GeneID:854736"
+ /db_xref="SGD:S000001336"
+ /translation="MSYSAADNLQDSFQRAMNFSGSPGAVSTSPTQSFMNTLPRRVSI
+ TKQPKALKPFSTGDMNILLLENVNATAIKIFKDQGYQVEFHKSSLPEDELIEKIKDVH
+ AIGIRSKTRLTEKILQHARNLVCIGCFCIGTNQVDLKYAASKGIAVFNSPFSNSRSVA
+ ELVIGEIISLARQLGDRSIELHTGTWNKVAARCWEVRGKTLGIIGYGHIGSQLSVLAE
+ AMGLHVLYYDIVTIMALGTARQVSTLDELLNKSDFVTLHVPATPETEKMLSAPQFAAM
+ KDGAYVINASRGTVVDIPSLIQAVKANKIAGAALDVYPHEPAKNGEGSFNDELNSWTS
+ ELVSLPNIILTPHIGGSTEEAQSSIGIEVATALSKYINEGNSVGSVNFPEVSLKSLDY
+ DQENTVRVLYIHRNVPGVLKTVNDILSDHNIEKQFSDSHGEIAYLMADISSVNQSEIK
+ DIYEKLNQTSAKVSIRLLY"
+ gene complement(<222937..>225954)
+ /gene="SPO22"
+ /locus_tag="YIL073C"
+ /gene_synonym="ZIP4"
+ /db_xref="GeneID:854737"
+ mRNA complement(join(<222937..225809,225900..>225954))
+ /gene="SPO22"
+ /locus_tag="YIL073C"
+ /gene_synonym="ZIP4"
+ /product="Spo22p"
+ /transcript_id="NM_001179423.1"
+ /db_xref="GeneID:854737"
+ CDS complement(join(222937..225809,225900..225954))
+ /gene="SPO22"
+ /locus_tag="YIL073C"
+ /gene_synonym="ZIP4"
+ /experiment="EXISTENCE:direct assay:GO:0005634 nucleus
+ [PMID:24390141]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0033235
+ positive regulation of protein sumoylation
+ [PMID:23326245]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0090173
+ regulation of synaptonemal complex assembly
+ [PMID:16740482]"
+ /note="Meiosis-specific protein essential for chromosome
+ synapsis; involved in completion of nuclear divisions
+ during meiosis; induced early in meiosis"
+ /codon_start=1
+ /product="Spo22p"
+ /protein_id="NP_012192.2"
+ /db_xref="GeneID:854737"
+ /db_xref="SGD:S000001335"
+ /translation="MSDHNVNSTFRKTLVELCETATWITSQVYAAKNLEKNDLITVDN
+ KISALYPIAEKYDRSFRTTTVILDEELILKLENAASSLWNSLTIAMKAEKASDKYFNE
+ VFCKCKIFATKLLSIHEALFRTNTNLLRNFKCYISSFKSASEYRFDDLITNTQQHSEK
+ YLQIINENVESFSNEEKTEFKKLTFEFYLVNFQLYLSENDLDTANIYTAKVNITDNSK
+ YMDADLLIELCRMIYNSTVMLKEINNPETQLVDVNIISFLKDVEKYLELPVENLKSHT
+ DYSNLKYSVLIFMANCLVEGHPQASELEQCDHYLSLLQNEYPNKVDPFILAINLTKRR
+ NIVNPAETIEEILMRMIMSVDVISNFQAVIASINDLSKMNTKFSIVCLDYLLINKLNS
+ KNDSKFLGKAICSRFLITTQSKTMNDSEIAESLENFSTQMERIVSEPLTKHAISCIIT
+ LLWNTGKKLEKMEKYVVSIRFYKLALKDIISQNYSDRGKIQRALQVVYNKIEDYSNTV
+ RVYQDMDEVDRQSPLCQLLMLQSFLADDKTEEALTCLQKIKSSEDEKSTDALILAVAE
+ CKRKTDLSVQGLLMIFDKLQSKSNSQTISSTSSSQTLSILRYTLQMIVKVSEEEPLET
+ FINYLPTVQKLLQKAVEFLKTVKLLNQLPPDVEKEAIYQQSVAVNEIEWFASFSYNVA
+ VKCLVDQSCESISEFPQYCIQFIDLIPVQDFTFPKMYHFTYWRFKATILQLIIAKEKA
+ KQDQHQKDWDIYEKSEELVNSINVMKKSSEFKDGSSLEDRNTLHECFLEALTIHLESA
+ LMMPDQTRILDILKKTELYQDSRVDALLIDISSNMEDLPKGVLIEILETVLKRNMGPE
+ VKERELCSWLRILLENAINLNHEVELRILDRVLKILNINQSSLQDTDGVLQTELETIA
+ TYCWNIGVNYIIKDNKSNGIVWCKHSMGFANMVNEGLQEQLYSLWESLASSANIDINS
+ IAK"
+ gene <226602..>228419
+ /gene="HOP1"
+ /locus_tag="YIL072W"
+ /db_xref="GeneID:854738"
+ mRNA <226602..>228419
+ /gene="HOP1"
+ /locus_tag="YIL072W"
+ /product="Hop1p"
+ /transcript_id="NM_001179422.3"
+ /db_xref="GeneID:854738"
+ CDS 226602..228419
+ /gene="HOP1"
+ /locus_tag="YIL072W"
+ /experiment="EXISTENCE:direct assay:GO:0000400 four-way
+ junction DNA binding [PMID:17027027]"
+ /experiment="EXISTENCE:direct assay:GO:0000794 condensed
+ nuclear chromosome [PMID:2107981]"
+ /experiment="EXISTENCE:direct assay:GO:0003682 chromatin
+ binding [PMID:38332377]"
+ /experiment="EXISTENCE:direct assay:GO:0005634 nucleus
+ [PMID:24390141|PMID:23841450]"
+ /experiment="EXISTENCE:genetic interaction:GO:0051598
+ meiotic recombination checkpoint signaling
+ [PMID:10848609]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0003682
+ chromatin binding [PMID:38332377]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0007129
+ homologous chromosome pairing at meiosis
+ [PMID:2107981|PMID:8207053]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0007130
+ synaptonemal complex assembly [PMID:2653960]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0010846
+ activation of reciprocal meiotic recombination
+ [PMID:38332377]"
+ /experiment="EXISTENCE:physical interaction:GO:0000800
+ lateral element [PMID:9060462]"
+ /note="Meiosis-specific HORMAD protein required for
+ chromosome synapsis; displays Red1p-dependent localization
+ to unsynapsed axial-lateral elements of the synaptonemal
+ complex; required for chiasma formation; binds chromatin
+ to regulate initiation of meiotic recombination; promotes
+ intra- and intermolecular synapsis between dsDNA molecules
+ and folds DNA into rigid protein-DNA filaments in vitro;
+ contains a HORMA domain, and a chromatin-binding region
+ made up of PHD and winged helix-turn-helix domains"
+ /codon_start=1
+ /product="Hop1p"
+ /protein_id="NP_012193.3"
+ /db_xref="GeneID:854738"
+ /db_xref="SGD:S000001334"
+ /translation="MSNKQLVKPKTETKTEITTEQSQKLLQTMLTMSFGCLAFLRGLF
+ PDDIFVDQRFVPEKVEKNYNKQNTSQNNSIKIKTLIRGKSAQADLLLDWLEKGVFKSI
+ RLKCLKALSLGIFLEDPTDLLENYIFSFDYDEENNVNINVNLSGNKKGSKNADPENET
+ ISLLDSRRMVQQLMRRFIIITQSLEPLPQKKFLTMRLMFNDNVDEDYQPELFKDATFD
+ KRATLKVPTNLDNDAIDVGTLNTKHHKVALSVLSAATSSMEKAGNTNFIRVDPFDLIL
+ QQQEENKLEESVPTKPQNFVTSQTTNVLGNLLNSSQASIQPTQFVSNNPVTGICSCEC
+ GLEVPKAATVLKTCKSCRKTLHGICYGNFLHSSIEKCFTCIFGPSLDTKWSKFQDLMM
+ IRKVFRFLVRKKKGFPASITELIDSFINVEDQNNEVKERVAFALFVFFLDETLCLDNG
+ GKPSQTIRYVTSSVLVDVKGIVIPNTRKQLNVNHEYKWHFTTSSPKAESFYQEVLPNS
+ RKQVESWLQDITNLRKVYSEALSPSSTLQELDLNSSLPTQDPIISGQKRRRYDLDEYL
+ EEDKSSVVNDTIKAKDFDESVPAKIRKISVSKKTLKSNW"
+ gene complement(<228660..>229994)
+ /gene="PCI8"
+ /locus_tag="YIL071C"
+ /gene_synonym="CSN11; YIH1; YIL071W"
+ /db_xref="GeneID:854739"
+ mRNA complement(<228660..>229994)
+ /gene="PCI8"
+ /locus_tag="YIL071C"
+ /gene_synonym="CSN11; YIH1; YIL071W"
+ /product="Pci8p"
+ /transcript_id="NM_001179421.3"
+ /db_xref="GeneID:854739"
+ CDS complement(228660..229994)
+ /gene="PCI8"
+ /locus_tag="YIL071C"
+ /gene_synonym="CSN11; YIH1; YIL071W"
+ /experiment="EXISTENCE:direct assay:GO:0008180 COP9
+ signalosome [PMID:12446563]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0000338 protein
+ deneddylation [PMID:12186635|PMID:12446563]"
+ /note="Possible shared subunit of Cop9 signalosome (CSN)
+ and eIF3; binds eIF3b subunit Prt1p, has possible dual
+ functions in transcriptional and translational control,
+ contains a PCI (Proteasome-COP9 signalosome (CSN)-eIF3)
+ domain"
+ /codon_start=1
+ /product="Pci8p"
+ /protein_id="NP_085069.3"
+ /db_xref="GeneID:854739"
+ /db_xref="SGD:S000001333"
+ /translation="MFHGAKGPLLIERIGHLLSINYGEKEERKRWAMQGISYLQEVQC
+ TSTPYLEILVEESGLRPVSLLNSQLVGKPHFSLLGGFDENIARDIISHNFQNAIFQME
+ SEEVPLTKRYQHLEKITQISLLCKNFKGIEEIEYNVKNIIQGRKNFDMLNSMEKDRIS
+ HEVVQDDSFSLLRIQMLLCVSYFLQERYFDCCTKFFTMMTSEPLTLKVLSEHLDCMNF
+ ISKEEFIMMVNISVLISIPLDNYDDFIYLSDLKQFFQMTPLLVNCLELLINTNFNKFF
+ KIWHGEINKICMESLFLEPSWSSSAAVIMRCKIYFFYLRISKKLQFSYLSSTLGIDLE
+ DIKEELTKLIISGQLNFEIDGDVIHFEDSSILQSIVNEISRNGTMINEVIDKLKNENT
+ DLKDIIQGNPLMYSGGNNTATIINNESSDDMDIDEVNDRSDISDSEGGLFEC"
+ gene complement(<230272..>231072)
+ /gene="MAM33"
+ /locus_tag="YIL070C"
+ /db_xref="GeneID:854740"
+ mRNA complement(<230272..>231072)
+ /gene="MAM33"
+ /locus_tag="YIL070C"
+ /product="Mam33p"
+ /transcript_id="NM_001179420.1"
+ /db_xref="GeneID:854740"
+ CDS complement(230272..231072)
+ /gene="MAM33"
+ /locus_tag="YIL070C"
+ /experiment="EXISTENCE:direct assay:GO:0005739
+ mitochondrion [PMID:16823961|PMID:24769239]"
+ /experiment="EXISTENCE:direct assay:GO:0005759
+ mitochondrial matrix [PMID:9305894]"
+ /experiment="EXISTENCE:direct assay:GO:0097177
+ mitochondrial ribosome binding [PMID:31053642]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0009060 aerobic
+ respiration [PMID:9305894]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0061668
+ mitochondrial ribosome assembly [PMID:31053642]"
+ /note="Acidic protein of the mitochondrial matrix; binds
+ unassembled large subunit proteins during ribosome
+ assembly; subunit of a complex containing Mrx6p, Pim1p,
+ and Pet20p that may regulate mtDNA replication; related to
+ the human complement receptor gC1q-R"
+ /codon_start=1
+ /product="Mam33p"
+ /protein_id="NP_012194.1"
+ /db_xref="GeneID:854740"
+ /db_xref="SGD:S000001332"
+ /translation="MFLRSVNRAVTRSILTTPKPAVVKSSWRVFTVANSKRCFTPAAI
+ MRNQETQRVGDILQSELKIEKETLPESTSLDSFNDFLNKYKFSLVETPGKNEAEIVRR
+ TESGETVHVFFDVAQIANLPYNNAMDENTEQNEDGINEDDFDALSDNFANVNVVISKE
+ SASEPAVSFELLMNLQEGSFYVDSATPYPSVDAALNQSAEAEITRELVYHGPPFSNLD
+ EELQESLEAYLESRGVNEELASFISAYSEFKENNEYISWLEKMKKFFH"
+ gene complement(<231553..>232369)
+ /gene="RPS24B"
+ /locus_tag="YIL069C"
+ /gene_synonym="RPS24EB"
+ /db_xref="GeneID:854741"
+ mRNA complement(join(<231553..231957,232367..>232369))
+ /gene="RPS24B"
+ /locus_tag="YIL069C"
+ /gene_synonym="RPS24EB"
+ /product="ribosomal 40S subunit protein S24B"
+ /transcript_id="NM_001179419.1"
+ /db_xref="GeneID:854741"
+ CDS complement(join(231553..231957,232367..232369))
+ /gene="RPS24B"
+ /locus_tag="YIL069C"
+ /gene_synonym="RPS24EB"
+ /experiment="EXISTENCE:genetic interaction:GO:0000462
+ maturation of SSU-rRNA from tricistronic rRNA transcript
+ (SSU-rRNA, 5.8S rRNA, LSU-rRNA) [PMID:16246728]"
+ /note="Protein component of the small (40S) ribosomal
+ subunit; homologous to mammalian ribosomal protein S24, no
+ bacterial homolog; RPS24B has a paralog, RPS24A, that
+ arose from the whole genome duplication"
+ /codon_start=1
+ /product="ribosomal 40S subunit protein S24B"
+ /protein_id="NP_012195.1"
+ /db_xref="GeneID:854741"
+ /db_xref="SGD:S000001331"
+ /translation="MSDAVTIRTRKVISNPLLARKQFVVDVLHPNRANVSKDELREKL
+ AEVYKAEKDAVSVFGFRTQFGGGKSVGFGLVYNSVAEAKKFEPTYRLVRYGLAEKVEK
+ ASRQQRKQKKNRDKKIFGTGKRLAKKVARRNAD"
+ gene complement(<233057..>235474)
+ /gene="SEC6"
+ /locus_tag="YIL068C"
+ /db_xref="GeneID:854742"
+ mRNA complement(<233057..>235474)
+ /gene="SEC6"
+ /locus_tag="YIL068C"
+ /product="SNARE-binding exocyst subunit SEC6"
+ /transcript_id="NM_001179418.1"
+ /db_xref="GeneID:854742"
+ CDS complement(233057..235474)
+ /gene="SEC6"
+ /locus_tag="YIL068C"
+ /experiment="EXISTENCE:direct assay:GO:0000145 exocyst
+ [PMID:8978675|PMID:7615633]"
+ /experiment="EXISTENCE:direct assay:GO:0000149 SNARE
+ binding [PMID:22114349|PMID:15835919]"
+ /experiment="EXISTENCE:direct assay:GO:0005628 prospore
+ membrane [PMID:24390141]"
+ /experiment="EXISTENCE:direct assay:GO:0005934 cellular
+ bud tip [PMID:19073882]"
+ /experiment="EXISTENCE:direct assay:GO:0005935 cellular
+ bud neck [PMID:26928762|PMID:19073882]"
+ /experiment="EXISTENCE:direct assay:GO:0035544 negative
+ regulation of SNARE complex assembly
+ [PMID:15835919|PMID:22114349]"
+ /experiment="EXISTENCE:direct assay:GO:0043332 mating
+ projection tip [PMID:19053807]"
+ /experiment="EXISTENCE:genetic interaction:GO:0006893
+ Golgi to plasma membrane transport [PMID:7026045]"
+ /experiment="EXISTENCE:genetic interaction:GO:0035544
+ negative regulation of SNARE complex assembly
+ [PMID:22114349]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0006887
+ exocytosis [PMID:6996832]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0006893 Golgi
+ to plasma membrane transport [PMID:7026045]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0051601 exocyst
+ localization [PMID:19073882]"
+ /experiment="EXISTENCE:physical interaction:GO:0000149
+ SNARE binding [PMID:22114349|PMID:15835919]"
+ /experiment="EXISTENCE:physical interaction:GO:0035544
+ negative regulation of SNARE complex assembly
+ [PMID:15835919|PMID:22114349]"
+ /note="Essential 88kDa subunit of the exocyst complex; the
+ exocyst mediates polarized targeting and tethering of
+ post-Golgi secretory vesicles to active sites of
+ exocytosis at the plasma membrane prior to SNARE-mediated
+ fusion; anchors the assembled complex to sites of
+ secretion; interacts with SM-like protein and SNARE
+ regulator Sec1p and may recruit it to sites of secretion;
+ binds to SNARE complexes binteracting with Sec9p"
+ /codon_start=1
+ /product="SNARE-binding exocyst subunit SEC6"
+ /protein_id="NP_012196.1"
+ /db_xref="GeneID:854742"
+ /db_xref="SGD:S000001330"
+ /translation="MSSDPLQQVCDLIKGDLSLERVRDIKEQLLKEKSVVEYQLNKES
+ DKYYGEVEESLKLLNLSKNSVTSIKQQINEVNKLGNDNRFAINRYDILFRATKLYETV
+ NTTSSIYDRIYNFVALMEHIERLLVAELAEDALETGCPHLLEIHFLLTSARDFQEQVV
+ VMAKEATEDAQRTVMKLFSRLSGIISKFDKLLDGLTYDIVEMARAEQISLAIRLFKIY
+ DLEEREDLRIEAIRNIIKKKEIEIEKSSIKKLPNSKNTARLQDETPKVIEYPTNKGLY
+ QEIMSGTISTRTAPRGYKHFLINGINNSISEMFGEMREKYVGDQKFDVLDNMDWIFNE
+ LIIVKEHIANCCPPHWNIFEVYFDQYYKELHSLITDLVESEPETIIILDILAFDKTFQ
+ DTLKQDFGFTKSEVKSVIGDKEKETLFKDYLNLIVVKMTEWIGNLEKAEFDVFLERST
+ PPHSDSDGLLFLDGTKTCFQMFTQQVEVAAGTNQAKILVGVVERFSDLLTKRQKNWIS
+ KISEEIKKQINYNHKYDIDPESITPEDECPGGLVEYLIAVSNDQMKAADYAVAISSKY
+ GKLVSKVYEKQITNHLEGTLDGFAEVAQCSSLGLITLMFDDLRKPYQEIFSKTWYMGS
+ QAQQIADTLDEYLLDIKPQMNSVLFVNFIDNVIGETIIKFLTALSFEHSFKNKNNKFL
+ EAMKRDFEIFYQLFVKVLDGNESKDTLITQNFTVMEFFMDLSCEPIDSILDIWQKYLE
+ VYWDSRIDLLVGILKCRKDVSSSERKKIVQQATEMLHEYRRNMEANGVDREPTLMRRF
+ VLEFEKQ"
+ gene complement(<235724..>237760)
+ /locus_tag="YIL067C"
+ /db_xref="GeneID:854743"
+ mRNA complement(<235724..>237760)
+ /locus_tag="YIL067C"
+ /product="uncharacterized protein"
+ /transcript_id="NM_001179417.1"
+ /db_xref="GeneID:854743"
+ CDS complement(235724..237760)
+ /locus_tag="YIL067C"
+ /experiment="EXISTENCE:direct assay:GO:0000324 fungal-type
+ vacuole [PMID:14562095]"
+ /note="Uncharacterized hypothetical protein"
+ /codon_start=1
+ /product="uncharacterized protein"
+ /protein_id="NP_012197.1"
+ /db_xref="GeneID:854743"
+ /db_xref="SGD:S000001329"
+ /translation="MGVHFDDNANTTWEATDPGVSSDCDGQHRVTESIQLQNFSNTDM
+ ESMLDEEGRENSKSKWLLLKRKHPIQKFIERVWNGPVEPSDEPPSFPKRWGWLKKIDD
+ FPQTTFKTKIPSKLIRLLLLIVYCCFWMRIFYSLIYPYLIKPPYFHPNDGSEKIPILS
+ LSCNSYLNWEGTNNECGLNAKNCGPLDNKEYMIRCPALCDRGGWTYSAIAVGNRRVKY
+ TGYEIGGGALFSEEDPMVVSYPYRSDSFPCASAVHAGVISPFYGGCTKVSMQGAQNSF
+ PSKKGMYNTGFSVAFNSFFPGSYSFRDIQGGILSGCYDPRAAVVALNMLFGLPIFYLY
+ DSIYGYWINTIVGYWTLVLSLDPPLLTDAHDPASVYELFSVGFQRLLPLCFVLYVVWK
+ SAVKRTLENGSPIAKVILWYPTFWLGISNNVTFDRLPVDRLTTTDLKEQAGALTAVGS
+ IAATILTCAVIQAYSLWKSGRFKKYFKIYICFIGGLIALGSLPGLNLRIHHYILGSIL
+ VPGCATRGSSAYLFQGILVGLILSGVARWDFASIVETDTALLRGEAGASLKPPILDFN
+ DDQNHSLSWHLNATDPVIDQIGNIDGFSLLLNDVEVYVGKNETVSIDVLRMENPALAQ
+ MMDDALDASNGTIDLYLRVARASVRSPTNRGDYTNAGVLQWPNGMWQKPEPGVS"
+ gene complement(<238099..>240708)
+ /gene="RNR3"
+ /locus_tag="YIL066C"
+ /gene_synonym="DIN1; RIR3"
+ /db_xref="GeneID:854744"
+ mRNA complement(<238099..>240708)
+ /gene="RNR3"
+ /locus_tag="YIL066C"
+ /gene_synonym="DIN1; RIR3"
+ /product="ribonucleotide-diphosphate reductase subunit
+ RNR3"
+ /transcript_id="NM_001179416.3"
+ /db_xref="GeneID:854744"
+ CDS complement(238099..240708)
+ /gene="RNR3"
+ /locus_tag="YIL066C"
+ /gene_synonym="DIN1; RIR3"
+ /EC_number="1.17.4.1"
+ /experiment="EXISTENCE:direct assay:GO:0004748
+ ribonucleoside-diphosphate reductase activity, thioredoxin
+ disulfide as acceptor [PMID:11893751]"
+ /experiment="EXISTENCE:direct assay:GO:0005737 cytoplasm
+ [PMID:12732713]"
+ /experiment="EXISTENCE:direct assay:GO:0005739
+ mitochondrion [PMID:14576278|PMID:16823961]"
+ /experiment="EXISTENCE:direct assay:GO:0009263
+ deoxyribonucleotide biosynthetic process [PMID:5459124]"
+ /note="Minor isoform of large subunit of
+ ribonucleotide-diphosphate reductase; the RNR complex
+ catalyzes rate-limiting step in dNTP synthesis, regulated
+ by DNA replication and DNA damage checkpoint pathways via
+ localization of small subunits; RNR3 has a paralog, RNR1,
+ that arose from the whole genome duplication"
+ /codon_start=1
+ /product="ribonucleotide-diphosphate reductase subunit
+ RNR3"
+ /protein_id="NP_012198.3"
+ /db_xref="GeneID:854744"
+ /db_xref="SGD:S000001328"
+ /translation="MYVIKRDGRKEPVQFDKITSRITRLSYGLDPNRIDAVKVTQRII
+ SGVYSGVTTVELDNLAAETCAYMTTVHPDYATLAARIAISNLHKQTTKQFSKVIEDLH
+ DWINPATGKHAPMISDEIYNIVMENKDTLNSAIVYDRDFQYTYFGFKTLERSYLLRLN
+ GEVAERPQHLVMRVALGIHGSDIESVLKTYNLMSLRYFTHASPTLFNAGTPHPQMSSC
+ FLIAMKDDSIEGIYDTLKECAMISKTAGGVGLHINNIRSTGSYIAGTNGTSNGLIPMI
+ RVFNNTARYVDQGGNKRPGAFALFLEPWHADIFDFVDIRKTHGKEEIRARDLFPALWI
+ PDLFMKRVQEDGPWTLFSPSAAPGLDDVWGDEFEELYTRYEREGRGKTIKAQKLWYAI
+ LQAQTETGTPFMVYKDACNRKTNQQNLGTIKSSNLCCEIVEYSSPDETAVCNLASIAL
+ PAFVEVSEDGKTASYNFERLHEIAKVITHNLNRVIDRNYYPVPEARNSNMKHRPIALG
+ VQGLADTYMMLRLPFESEEAQTLNKQIFETIYHATLEASCELAQKEGKYSTFEGSPAS
+ KGILQFDMWNAKPFGMWDWETLRKDIVKHGLRNSLTMAPMPTASTSQILGYNECFEPV
+ TSNMYSRRVLSGEFQVVNPYLLRDLVDLGIWDDSMKQYLITQNGSIQGLPNVPQELKE
+ LYKTVWEISQKTIINMAADRAIYIDQSHSLNLFLQAPSMGKITSMHFYGWKKGLKTGM
+ YYLRTQAASAAIQFTIDQEVADQAATHIASVSELDRPVYVPKGTKFSEQKAASALTES
+ SDNEKDASPVPSEQSSVSSAMSNVKLEDSVAPAVPTETIKEDSDEKKCDIYNEKVIAC
+ TAPTPEACESCSG"
+ gene complement(<241308..>241775)
+ /gene="FIS1"
+ /locus_tag="YIL065C"
+ /gene_synonym="MDV2"
+ /db_xref="GeneID:854745"
+ mRNA complement(<241308..>241775)
+ /gene="FIS1"
+ /locus_tag="YIL065C"
+ /gene_synonym="MDV2"
+ /product="Fis1p"
+ /transcript_id="NM_001179415.3"
+ /db_xref="GeneID:854745"
+ CDS complement(241308..241775)
+ /gene="FIS1"
+ /locus_tag="YIL065C"
+ /gene_synonym="MDV2"
+ /experiment="EXISTENCE:direct assay:GO:0005739
+ mitochondrion [PMID:16823961|PMID:24769239]"
+ /experiment="EXISTENCE:direct assay:GO:0005741
+ mitochondrial outer membrane
+ [PMID:16407407|PMID:11038183|PMID:15809300]"
+ /experiment="EXISTENCE:direct assay:GO:0005777 peroxisome
+ [PMID:16968746]"
+ /experiment="EXISTENCE:genetic interaction:GO:0016559
+ peroxisome fission [PMID:18445678]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0000266
+ mitochondrial fission [PMID:11038182|PMID:11038183]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0007031
+ peroxisome organization [PMID:16968746]"
+ /note="Protein involved in mitochondrial fission and
+ peroxisome abundance; may have a distinct role in
+ tethering protein aggregates to mitochondria in order to
+ retain them in the mother cell; required for localization
+ of Dnm1p and Mdv1p during mitochondrial division; mediates
+ ethanol-induced apoptosis and ethanol-induced
+ mitochondrial fragmentation"
+ /codon_start=1
+ /product="Fis1p"
+ /protein_id="NP_012199.3"
+ /db_xref="GeneID:854745"
+ /db_xref="SGD:S000001327"
+ /translation="MTKVDFWPTLKDAYEPLYPQQLEILRQQVVSEGGPTATIQSRFN
+ YAWGLIKSTDVNDERLGVKILTDIYKEAESRRRECLYYLTIGCYKLGEYSMAKRYVDT
+ LFEHERNNKQVGALKSMVEDKIQKETLKGVVVAGGVLAGAVAVASFFLRNKRR"
+ gene <242027..>242716
+ /gene="EFM4"
+ /locus_tag="YIL064W"
+ /gene_synonym="SEE1"
+ /db_xref="GeneID:854746"
+ mRNA <242027..>242716
+ /gene="EFM4"
+ /locus_tag="YIL064W"
+ /gene_synonym="SEE1"
+ /product="Efm4p"
+ /transcript_id="NM_001179414.2"
+ /db_xref="GeneID:854746"
+ CDS 242027..242716
+ /gene="EFM4"
+ /locus_tag="YIL064W"
+ /gene_synonym="SEE1"
+ /experiment="EXISTENCE:direct assay:GO:0005737 cytoplasm
+ [PMID:14562095]"
+ /experiment="EXISTENCE:direct assay:GO:0016279
+ protein-lysine N-methyltransferase activity
+ [PMID:20510667]"
+ /note="Lysine methyltransferase; involved in the
+ dimethylation of eEF1A (Tef1p/Tef2p) at lysine 316;
+ sequence similarity to S-adenosylmethionine-dependent
+ methyltransferases of the seven beta-strand family; role
+ in vesicular transport"
+ /codon_start=1
+ /product="Efm4p"
+ /protein_id="NP_012200.2"
+ /db_xref="GeneID:854746"
+ /db_xref="SGD:S000001326"
+ /translation="MQGTADLSTSKLGTKKYWDELYALELENFRRNPQDTGDCWFSDS
+ DAEQKMIDFLVDNIGAYRISENASVVDLGTGNGHMLFELHQTEFQGKLVGIDYSEESV
+ KLASNIAEATGVDNFISFQQADIFSGDWKPGKYDIVLDKGTLDAISLSGMKINGKLDV
+ VDVYAGVVERILKKDGIFLITSCNFTQDELVKIIETDNLKMWKTIKYPVFQFGGVQGA
+ TICSVAFVKQN"
+ gene complement(<242761..>243744)
+ /gene="YRB2"
+ /locus_tag="YIL063C"
+ /db_xref="GeneID:854747"
+ mRNA complement(<242761..>243744)
+ /gene="YRB2"
+ /locus_tag="YIL063C"
+ /product="Yrb2p"
+ /transcript_id="NM_001179413.1"
+ /db_xref="GeneID:854747"
+ CDS complement(242761..243744)
+ /gene="YRB2"
+ /locus_tag="YIL063C"
+ /experiment="EXISTENCE:direct assay:GO:0005634 nucleus
+ [PMID:9121474|PMID:9395535|PMID:22932476|PMID:10684247]"
+ /experiment="EXISTENCE:direct assay:GO:0005829 cytosol
+ [PMID:22932476]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0000056
+ ribosomal small subunit export from nucleus
+ [PMID:12082158]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0006611 protein
+ export from nucleus [PMID:9636166]"
+ /note="hypothetical protein; involved in nuclear processes
+ of the Ran-GTPase cycle; involved in nuclear protein
+ export; contains Ran Binding Domain and FxFG repeats;
+ interacts with Srm1p, GTP-Gsp1p, Rna1p and Crm1p;
+ relocalizes to the cytosol in response to hypoxia; not
+ essential for viability"
+ /codon_start=1
+ /product="Yrb2p"
+ /protein_id="NP_012201.1"
+ /db_xref="GeneID:854747"
+ /db_xref="SGD:S000001325"
+ /translation="MSETNGGNAARENSEVKQTAVENPIDKLDGTPKRPREKDQDEQA
+ EETSDKSEAPNKNDEEKKEEGKKDQEPSHKKIKVDDGKTVESGIVEDDKKEDKFVFGA
+ ASKFGTGFGVAKKDTKDGDATTSTESLPASDSKTKKPFAFGSGLSFGSGFNILKNKTE
+ NNSESEKKATDVDKDKVHSGSEQLANASEDTKDKPKPLKLQKQEVKSGEESEECIYQV
+ NAKLYQLSNIKEGWKERGVGIIKINKSKDDVEKTRIVMRSRGILKVILNIQLVKGFTV
+ QKGFTGSLQSEKFIRLLAVDDNGDPAQYAIKTGKKETTDELYNIIVKSVPK"
+ gene complement(<243998..>244462)
+ /gene="ARC15"
+ /locus_tag="YIL062C"
+ /db_xref="GeneID:854748"
+ mRNA complement(<243998..>244462)
+ /gene="ARC15"
+ /locus_tag="YIL062C"
+ /product="Arc15p"
+ /transcript_id="NM_001179412.1"
+ /db_xref="GeneID:854748"
+ CDS complement(243998..244462)
+ /gene="ARC15"
+ /locus_tag="YIL062C"
+ /experiment="EXISTENCE:direct assay:GO:0003729 mRNA
+ binding [PMID:20844764]"
+ /experiment="EXISTENCE:direct assay:GO:0005885 Arp2/3
+ protein complex [PMID:9210376|PMID:10377407]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0000001
+ mitochondrion inheritance [PMID:11248049]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0030674
+ protein-macromolecule adaptor activity [PMID:10377407]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0044396 actin
+ cortical patch organization [PMID:10377407]"
+ /note="Subunit of the ARP2/3 complex; ARP2/3 is required
+ for the motility and integrity of cortical actin patches;
+ has mRNA binding activity"
+ /codon_start=1
+ /product="Arc15p"
+ /protein_id="NP_012202.1"
+ /db_xref="GeneID:854748"
+ /db_xref="SGD:S000001324"
+ /translation="MEADWRRIDIDAFDPESGRLTAADLVPPYETTVTLQELQPRMNQ
+ LRSLATSGDSLGAVQLLTTDPPYSADAPTKEQYFKSVLEALTQVRQADIGNVIKNLSD
+ SQRDVLVKYLYKGMSVPQGQKQGGVLLAWLERITQVSGVTPIVHYISDRRTV"
+ gene complement(<244657..>245559)
+ /gene="SNP1"
+ /locus_tag="YIL061C"
+ /db_xref="GeneID:854749"
+ mRNA complement(<244657..>245559)
+ /gene="SNP1"
+ /locus_tag="YIL061C"
+ /product="U1 snRNP complex subunit SNP1"
+ /transcript_id="NM_001179411.1"
+ /db_xref="GeneID:854749"
+ CDS complement(244657..245559)
+ /gene="SNP1"
+ /locus_tag="YIL061C"
+ /experiment="EXISTENCE:direct assay:GO:0005685 U1 snRNP
+ [PMID:9630245]"
+ /experiment="EXISTENCE:direct assay:GO:0071004 U2-type
+ prespliceosome [PMID:16618970]"
+ /experiment="EXISTENCE:physical interaction:GO:0000243
+ commitment complex [PMID:10072386]"
+ /experiment="EXISTENCE:physical interaction:GO:0000398
+ mRNA splicing, via spliceosome [PMID:10072386]"
+ /experiment="EXISTENCE:physical interaction:GO:0003729
+ mRNA binding [PMID:10072386]"
+ /experiment="EXISTENCE:physical interaction:GO:0030619 U1
+ snRNA binding [PMID:1387202]"
+ /note="Component of U1 snRNP required for mRNA splicing
+ via spliceosome; substrate of arginine methyltransferase
+ Hmt1p; may interact with poly(A) polymerase to regulate
+ polyadenylation; homolog of human U1-70K, which has been
+ linked to several types of autoimmune and
+ neurodegenerative diseases"
+ /codon_start=1
+ /product="U1 snRNP complex subunit SNP1"
+ /protein_id="NP_012203.1"
+ /db_xref="GeneID:854749"
+ /db_xref="SGD:S000001323"
+ /translation="MNYNLSKYPDDVSRLFKPRPPLSYKRPTDYPYAKRQTNPNITGV
+ ANLLSTSLKHYMEEFPEGSPNNHLQRYEDIKLSKIKNAQLLDRRLQNWNPNVDPHIKD
+ TDPYRTIFIGRLPYDLDEIELQKYFVKFGEIEKIRIVKDKITQKSKGYAFIVFKDPIS
+ SKMAFKEIGVHRGIQIKDRICIVDIERGRTVKYFKPRRLGGGLGGRGYSNRDSRLPGR
+ FASASTSNPAERNYAPRLPRRETSSSAYSADRYGSSTLDARYRGNRPLLSAATPTAAV
+ TSVYKSRNSRTRESQPAPKEAPDY"
+ rep_origin 245756..245925
+ /note="ARS923; Autonomously Replicating Sequence"
+ /db_xref="SGD:S000119034"
+ repeat_region 246220..246552
+ /note="Ty2 LTR"
+ /rpt_type=long_terminal_repeat
+ /db_xref="SGD:S000007012"
+ gene <246392..>246826
+ /locus_tag="YIL060W"
+ /db_xref="GeneID:854750"
+ mRNA <246392..>246826
+ /locus_tag="YIL060W"
+ /product="uncharacterized protein"
+ /transcript_id="NM_001179410.3"
+ /db_xref="GeneID:854750"
+ CDS 246392..246826
+ /locus_tag="YIL060W"
+ /experiment="EXISTENCE:direct assay:GO:0005739
+ mitochondrion [PMID:24360837]"
+ /note="Mitochondrial hypothetical protein; required for
+ respiratory growth; mutant accumulates less glycogen than
+ does wild type; null mutation results in a decrease in
+ plasma membrane electron transport; YIL060W is not an
+ essential gene"
+ /codon_start=1
+ /product="uncharacterized protein"
+ /protein_id="NP_012204.3"
+ /db_xref="GeneID:854750"
+ /db_xref="SGD:S000001322"
+ /translation="MMIIIFIELCRIADSLLWIPKSSRRTSSTFYIPNIIALLKMESQ
+ QLSQNSPTLHIHTCGSKIGTLFLRFTKVAIGTSLIVGAGVAMEVSVPLPPQPLYSRSE
+ VPSVELCGIVAICRSPPSVYPTCRPISLSKKIVSGLVRTNSS"
+ gene complement(<246550..>246915)
+ /locus_tag="YIL059C"
+ /db_xref="GeneID:854752"
+ mRNA complement(<246550..>246915)
+ /locus_tag="YIL059C"
+ /product="uncharacterized protein"
+ /transcript_id="NM_001431140.1"
+ /db_xref="GeneID:854752"
+ CDS complement(246550..246915)
+ /locus_tag="YIL059C"
+ /note="hypothetical protein; has predicted signal peptide
+ cleavage site in non-enzymatic end; located adjacent and
+ on opposite strand to a Ty2 LTR, suggesting horizontal
+ transfer; partially overlaps uncharacterized ORF YIL060W"
+ /codon_start=1
+ /product="uncharacterized protein"
+ /protein_id="NP_001418069.1"
+ /db_xref="GeneID:854752"
+ /db_xref="SGD:S000001321"
+ /translation="MNFSTVFQAIIAVLGLTTVTALAEFDFDVGYEEFVRTNPDTIFL
+ ESDIGLHVGYTEGGERQIATIPHNSTLGTSLREYSGCGGNGTETSIATPAPTMSEVPI
+ ATFVKRRKSVPILLPQVCM"
+ rep_origin 247705..247760
+ /note="ARS914; Autonomously Replicating Sequence"
+ /db_xref="SGD:S000118397"
+ gene complement(<247902..>248396)
+ /gene="RGI2"
+ /locus_tag="YIL057C"
+ /db_xref="GeneID:854753"
+ mRNA complement(<247902..>248396)
+ /gene="RGI2"
+ /locus_tag="YIL057C"
+ /product="Rgi2p"
+ /transcript_id="NM_001179407.3"
+ /db_xref="GeneID:854753"
+ CDS complement(247902..248396)
+ /gene="RGI2"
+ /locus_tag="YIL057C"
+ /experiment="EXISTENCE:direct assay:GO:0005737 cytoplasm
+ [PMID:14562095]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0006112 energy
+ reserve metabolic process [PMID:20567505]"
+ /note="hypothetical protein; involved in energy metabolism
+ under respiratory conditions; expression induced under
+ carbon limitation and repressed under high glucose; RGI2
+ has a paralog, RGI1, that arose from the whole genome
+ duplication"
+ /codon_start=1
+ /product="Rgi2p"
+ /protein_id="NP_012207.3"
+ /db_xref="GeneID:854753"
+ /db_xref="SGD:S000001319"
+ /translation="MTKKDKKAKGPKMSTITTKSGESLKVFEDLHDFETYLKGETEDQ
+ EFDHVHCQLKYYPPFVLHDAHDDPEKIKETANSHSKKFVRHLHQHVEKHLLKDIKTAI
+ NKPELKFHDKKKQESFDRIVWNYGEETELNAKKFKVSVEVVCKHDGAMVDVDYKTEPL
+ QPLI"
+ rep_origin 248396..248850
+ /note="ARS915; Putative replication origin; identified in
+ multiple array studies, not yet confirmed by plasmid-based
+ assay"
+ /db_xref="SGD:S000130159"
+ gene 248850..248931
+ /gene="SUP17"
+ /locus_tag="YNCI0006W"
+ /db_xref="GeneID:854754"
+ tRNA 248850..248931
+ /gene="SUP17"
+ /locus_tag="YNCI0006W"
+ /product="tRNA-Ser"
+ /experiment="EXISTENCE:curator inference:GO:0002181
+ cytoplasmic translation [PMID:9023104]"
+ /experiment="EXISTENCE:curator inference:GO:0005829
+ cytosol [PMID:9023104]"
+ /experiment="EXISTENCE:curator inference:GO:0006414
+ translational elongation [PMID:9023104]"
+ /note="Serine tRNA (tRNA-Ser), predicted by tRNAscan-SE
+ analysis; can mutate to suppress ochre nonsense mutations"
+ /db_xref="GeneID:854754"
+ /db_xref="SGD:S000006735"
+ gene <249991..>251913
+ /gene="VHR1"
+ /locus_tag="YIL056W"
+ /db_xref="GeneID:854755"
+ mRNA <249991..>251913
+ /gene="VHR1"
+ /locus_tag="YIL056W"
+ /product="Vhr1p"
+ /transcript_id="NM_001179406.1"
+ /db_xref="GeneID:854755"
+ CDS 249991..251913
+ /gene="VHR1"
+ /locus_tag="YIL056W"
+ /experiment="EXISTENCE:direct assay:GO:0000977 RNA
+ polymerase II transcription regulatory region
+ sequence-specific DNA binding [PMID:16533810]"
+ /experiment="EXISTENCE:direct assay:GO:0000981 DNA-binding
+ transcription factor activity, RNA polymerase II-specific
+ [PMID:21278159]"
+ /experiment="EXISTENCE:direct assay:GO:0005634 nucleus
+ [PMID:14562095|PMID:16533810]"
+ /experiment="EXISTENCE:direct assay:GO:0005737 cytoplasm
+ [PMID:14562095]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0000981
+ DNA-binding transcription factor activity, RNA polymerase
+ II-specific [PMID:21278159]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0006357
+ regulation of transcription by RNA polymerase II
+ [PMID:21278159|PMID:16533810]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0045944
+ positive regulation of transcription by RNA polymerase II
+ [PMID:16533810]"
+ /experiment="EXISTENCE:mutant phenotype:GO:1990383
+ cellular response to biotin starvation
+ [PMID:16533810|PMID:21278159]"
+ /note="Transcriptional activator; required for the vitamin
+ H-responsive element (VHRE) mediated induction of VHT1
+ (Vitamin H transporter) and BIO5 (biotin biosynthesis
+ intermediate transporter) in response to low biotin
+ concentrations; VHR1 has a paralog, VHR2, that arose from
+ the whole genome duplication"
+ /codon_start=1
+ /product="Vhr1p"
+ /protein_id="NP_012208.1"
+ /db_xref="GeneID:854755"
+ /db_xref="SGD:S000001318"
+ /translation="MNGPPTFTQYRINKFSGNGATHKIRELLNFNDEKKWKQFSSRRL
+ ELIDKFQLSQYKASEQDQNIKQIATILRTEFGYPVSCSKEFEKLVTAAVQSVRRNRKR
+ SKKRYALSIANGSGGNVNNSISSNSTSDDEISPSIYQRSNSDFLPSSNYAADFQFSNK
+ FQPLMSHQSHNGTIFPTVGTQNDSSPSVTSTQQKYNDIVTMLVHDLVTNVVPLSEQAL
+ KDPYTGPNLSHFATSSLSQQPNITTNIPIDSTVPFFLREKLLLQIQRSRTCQDISQAA
+ GSIDIYANLEILGEMSIRMSIAFVIERFFSNLVSSSMKYITAKTCSPENLALLSQRLF
+ GAATRHNLSHFPAAQVQLRLLYLVIGGIVKDFGFDPTLYPLSEIIHHIVMVQYPLASS
+ CASEPPSSSPNKRVKRSPPVVSSDVMLNNNNTLSNRATLLTTLPMKPQSANKDVNRRV
+ IIRFNDREQAFTFHQLSNGPPTVSEVLENCKNLFNIINKNKNFGIFHNDNLLNDESLA
+ KLFDSFSTSEIHLVIKDISTIPLQDAKIPVPITLPKMSCIGENPSMPSIPLVPQEKDD
+ PKKSSLTAFDNILNRISKSPMNEENSNTTLNTGTSTSNTNNNDHNESVPAPYVTKNKN
+ SFQNGNLPQPVFQPLL"
+ gene complement(<252042..>253925)
+ /locus_tag="YIL055C"
+ /db_xref="GeneID:854756"
+ mRNA complement(<252042..>253925)
+ /locus_tag="YIL055C"
+ /product="uncharacterized protein"
+ /transcript_id="NM_001179405.1"
+ /db_xref="GeneID:854756"
+ CDS complement(252042..253925)
+ /locus_tag="YIL055C"
+ /experiment="EXISTENCE:direct assay:GO:0005739
+ mitochondrion [PMID:24390141]"
+ /note="hypothetical protein"
+ /codon_start=1
+ /product="uncharacterized protein"
+ /protein_id="NP_012209.1"
+ /db_xref="GeneID:854756"
+ /db_xref="SGD:S000001317"
+ /translation="MTLEPHYSAKSAASASAFVARSATESSTASTKAAPRKKTYSQSN
+ GIPIRIDNLPPGKTWAQVKYLIGGIIYHTNILQVKMLPPMTSMVPPFITFQSCIVILK
+ NSIDNESLENLLLTLNTYQWDYHDLFVYLLPYTNDSPSLRYPEISDSNNDVRSAPDET
+ KRSISPRYASHVSSVTPQPPSASTPPSQFFSFSPEVSLRKNENITPLPTPVPVPVGVP
+ PLAPPPHGPFSTSMLPMLGAPPGTVPNMQMPYQTTLPSPTAAATAGGPLASPTHYPRR
+ RHFYHQNQSQFQKYMHNSPRNPDTGTGPRLSQQHHLSLRNNKINPSYNEISALYNLNM
+ ASNSNNNGNIPTTSTNGDDRALQAKNGGTITPSQTQINHKRLKHIFNEKSFRKQMTNR
+ GMWQLKIINFPPYIPIEFLEKLSESDFNELMNQEKFTVIEIKEKGQLEKFGRLRWTVL
+ KDFIKLKCPKLLRLQERQFLQQQNEASLLNESMDALKISENENTNGSANNSTYTNGGP
+ RTSINNTREFYVGVYEDHEEATLLRFELPEDELEEFNRNLPTTFAQSGNVSDSEGDSK
+ AKYFKVSTIVYNAIVGFHDKELSDLTFESLQDQEYSLGYKIHVMELPPFDEDEFENQR
+ QQF"
+ gene <254543..>254860
+ /locus_tag="YIL054W"
+ /db_xref="GeneID:854757"
+ mRNA <254543..>254860
+ /locus_tag="YIL054W"
+ /product="uncharacterized protein"
+ /transcript_id="NM_001270751.1"
+ /db_xref="GeneID:854757"
+ CDS 254543..254860
+ /locus_tag="YIL054W"
+ /note="hypothetical protein; expressed at both mRNA and
+ protein levels"
+ /codon_start=1
+ /product="uncharacterized protein"
+ /protein_id="NP_001257680.1"
+ /db_xref="GeneID:854757"
+ /db_xref="SGD:S000001316"
+ /translation="MAPKAFFVCLPWVLPRHALIVRQAGNPYHFLAYTNPRAPGKLQD
+ SHCPVFFMGIIIITIITVTLAIIIINIIFLTLFDDGMCFYCSLLTFSFVSFNFDHFDH
+ FDL"
+ gene <255115..>255867
+ /gene="GPP1"
+ /locus_tag="YIL053W"
+ /gene_synonym="RHR2"
+ /db_xref="GeneID:854758"
+ mRNA <255115..>255867
+ /gene="GPP1"
+ /locus_tag="YIL053W"
+ /gene_synonym="RHR2"
+ /product="glycerol-1-phosphatase RHR2"
+ /transcript_id="NM_001179403.1"
+ /db_xref="GeneID:854758"
+ CDS 255115..255867
+ /gene="GPP1"
+ /locus_tag="YIL053W"
+ /gene_synonym="RHR2"
+ /EC_number="3.1.3.21"
+ /experiment="EXISTENCE:direct assay:GO:0000121
+ glycerol-1-phosphatase activity [PMID:8662716]"
+ /experiment="EXISTENCE:direct assay:GO:0005634 nucleus
+ [PMID:14562095]"
+ /experiment="EXISTENCE:direct assay:GO:0005737 cytoplasm
+ [PMID:27385335|PMID:14562095]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0006114
+ glycerol biosynthetic process [PMID:11058591]"
+ /note="Constitutively expressed DL-glycerol-3-phosphate
+ phosphatase; also known as glycerol-1-phosphatase;
+ involved in glycerol biosynthesis, induced in response to
+ both anaerobic and osmotic stress; GPP1 has a paralog,
+ GPP2, that arose from the whole genome duplication"
+ /codon_start=1
+ /product="glycerol-1-phosphatase RHR2"
+ /protein_id="NP_012211.2"
+ /db_xref="GeneID:854758"
+ /db_xref="SGD:S000001315"
+ /translation="MPLTTKPLSLKINAALFDVDGTIIISQPAIAAFWRDFGKDKPYF
+ DAEHVIHISHGWRTYDAIAKFAPDFADEEYVNKLEGEIPEKYGEHSIEVPGAVKLCNA
+ LNALPKEKWAVATSGTRDMAKKWFDILKIKRPEYFITANDVKQGKPHPEPYLKGRNGL
+ GFPINEQDPSKSKVVVFEDAPAGIAAGKAAGCKIVGIATTFDLDFLKEKGCDIIVKNH
+ ESIRVGEYNAETDEVELIFDDYLYAKDDLLKW"
+ gene complement(<256226..>257063)
+ /gene="RPL34B"
+ /locus_tag="YIL052C"
+ /db_xref="GeneID:854759"
+ mRNA complement(join(<256226..256554,257027..>257063))
+ /gene="RPL34B"
+ /locus_tag="YIL052C"
+ /product="ribosomal 60S subunit protein L34B"
+ /transcript_id="NM_001179402.1"
+ /db_xref="GeneID:854759"
+ CDS complement(join(256226..256554,257027..257063))
+ /gene="RPL34B"
+ /locus_tag="YIL052C"
+ /experiment="EXISTENCE:curator inference:GO:0002181
+ cytoplasmic translation [PMID:11983894]"
+ /experiment="EXISTENCE:curator inference:GO:0003735
+ structural constituent of ribosome [PMID:11983894]"
+ /experiment="EXISTENCE:direct assay:GO:0022625 cytosolic
+ large ribosomal subunit [PMID:11983894]"
+ /note="Ribosomal 60S subunit protein L34B; homologous to
+ mammalian ribosomal protein L34, no bacterial homolog;
+ RPL34B has a paralog, RPL34A, that arose from the whole
+ genome duplication"
+ /codon_start=1
+ /product="ribosomal 60S subunit protein L34B"
+ /protein_id="NP_012212.1"
+ /db_xref="GeneID:854759"
+ /db_xref="SGD:S000001314"
+ /translation="MAQRVTFRRRNPYNTRSNKIKVVKTPGGILRAQHVKKLATRPKC
+ GDCGSALQGISTLRPRQYATVSKTHKTVSRAYGGSRCANCVKERIVRAFLIEEQKIVK
+ KVVKEQTEAAKKSEKKSKK"
+ gene complement(<257843..>258280)
+ /gene="MMF1"
+ /locus_tag="YIL051C"
+ /gene_synonym="IBM1"
+ /db_xref="GeneID:854760"
+ mRNA complement(<257843..>258280)
+ /gene="MMF1"
+ /locus_tag="YIL051C"
+ /gene_synonym="IBM1"
+ /product="isoleucine biosynthesis protein MMF1"
+ /transcript_id="NM_001179401.3"
+ /db_xref="GeneID:854760"
+ CDS complement(257843..258280)
+ /gene="MMF1"
+ /locus_tag="YIL051C"
+ /gene_synonym="IBM1"
+ /experiment="EXISTENCE:direct assay:GO:0005739
+ mitochondrion [PMID:16823961|PMID:24769239|PMID:14576278]"
+ /experiment="EXISTENCE:direct assay:GO:0005759
+ mitochondrial matrix [PMID:11003673]"
+ /experiment="EXISTENCE:genetic interaction:GO:0032543
+ mitochondrial translation [PMID:15164357]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0009097
+ isoleucine biosynthetic process [PMID:11442631]"
+ /note="Mitochondrial protein required for transamination
+ of isoleucine; but not of valine or leucine; may regulate
+ specificity of branched-chain transaminases Bat1p and
+ Bat2p; induction of expression in response to stress is
+ mediated by a Hog1p-regulated antisense RNA and gene
+ looping; interacts genetically with mitochondrial
+ ribosomal protein genes; MMF1 has a paralog, HMF1, that
+ arose from the whole genome duplication"
+ /codon_start=1
+ /product="isoleucine biosynthesis protein MMF1"
+ /protein_id="NP_012213.3"
+ /db_xref="GeneID:854760"
+ /db_xref="SGD:S000001313"
+ /translation="MFLRNSVLRTAPVLRRGITTLTPVSTKLAPPAAASYSQAMKANN
+ FVYVSGQIPYTPDNKPVQGSISEKAEQVFQNVKNILAESNSSLDNIVKVNVFLADMKN
+ FAEFNSVYAKHFHTHKPARSCVGVASLPLNVDLEMEVIAVEKN"
+ gene <258913..>259770
+ /gene="PCL7"
+ /locus_tag="YIL050W"
+ /db_xref="GeneID:854761"
+ mRNA <258913..>259770
+ /gene="PCL7"
+ /locus_tag="YIL050W"
+ /product="Pcl7p"
+ /transcript_id="NM_001179400.3"
+ /db_xref="GeneID:854761"
+ CDS 258913..259770
+ /gene="PCL7"
+ /locus_tag="YIL050W"
+ /experiment="EXISTENCE:direct assay:GO:0005634 nucleus
+ [PMID:16611745]"
+ /experiment="EXISTENCE:direct assay:GO:0016538
+ cyclin-dependent protein serine/threonine kinase regulator
+ activity [PMID:12407105]"
+ /experiment="EXISTENCE:genetic interaction:GO:0005979
+ regulation of glycogen biosynthetic process
+ [PMID:11602261]"
+ /experiment="EXISTENCE:genetic interaction:GO:0005981
+ regulation of glycogen catabolic process [PMID:11602261]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0031647
+ regulation of protein stability [PMID:16611745]"
+ /experiment="EXISTENCE:physical interaction:GO:0000307
+ cyclin-dependent protein kinase holoenzyme complex
+ [PMID:16611745]"
+ /note="Pho85p cyclin of the Pho80p subfamily; forms a
+ functional kinase complex with Pho85p which phosphorylates
+ Mmr1p and is regulated by Pho81p; involved in glycogen
+ metabolism, expression is cell-cycle regulated; PCL7 has a
+ paralog, PCL6, that arose from the whole genome
+ duplication"
+ /codon_start=1
+ /product="Pcl7p"
+ /protein_id="NP_012214.3"
+ /db_xref="GeneID:854761"
+ /db_xref="SGD:S000001312"
+ /translation="MELSSPSKKTTTSPINIPGGNRDNLIIGPHSHSFKTDPFSSNNS
+ SLLSKISTNPSLESPFSSKSLLDCSPVQAVKKSLESEAKTHSLDEETNEQTDVKILNI
+ ADFPTDELILMISALLNRIITANDETTDVSQQVSDETEDELLTPILAFYGKNVPEIAV
+ VQYLERIQKYCPTTNDIFLSLLVYFDRISKNYGHSSERNGCAKQLFVMDSGNIHRLLI
+ TGVTICTKFLSDFFYSNSRYAKVGGISLQELNHLELQFLILCDFKLLVSVEEMQKYAN
+ LLYKFWNDQ"
+ gene <260158..>260919
+ /gene="DFG10"
+ /locus_tag="YIL049W"
+ /db_xref="GeneID:854762"
+ mRNA <260158..>260919
+ /gene="DFG10"
+ /locus_tag="YIL049W"
+ /product="putative polyprenol reductase"
+ /transcript_id="NM_001179399.1"
+ /db_xref="GeneID:854762"
+ CDS 260158..260919
+ /gene="DFG10"
+ /locus_tag="YIL049W"
+ /EC_number="1.3.1.94"
+ /experiment="EXISTENCE:direct assay:GO:0005783 endoplasmic
+ reticulum [PMID:26928762]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0006488
+ dolichol-linked oligosaccharide biosynthetic process
+ [PMID:20637498]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0007124
+ pseudohyphal growth [PMID:9055077]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0019408
+ dolichol biosynthetic process [PMID:20637498]"
+ /note="Probable polyprenol reductase; catalyzes conversion
+ of polyprenol to dolichol, the precursor for
+ N-glycosylation; involved in filamentous growth; mutations
+ in human homolog SRD5A3 confer CDG (Congenital Disorders
+ of Glycosylation); human SRD5A3 can complement yeast null
+ mutant"
+ /codon_start=1
+ /product="putative polyprenol reductase"
+ /protein_id="NP_012215.1"
+ /db_xref="GeneID:854762"
+ /db_xref="SGD:S000001311"
+ /translation="MYFDEEQLLKYTIYAYRLSFFVGICSLFIAKSCLPEFLQYGKTY
+ RPKENSKYSSILERIKKFTVPKAYFSHFYYLATFLSLVTLYFYPKFPIVWIIFGHSLR
+ RLYETLYVLHYTSNSRMNWSHYLVGIWFYSVLLLILNISLYKNSIPNTLNMNAFIIFC
+ IASWDQYKNHVILANLVKYSLPTGRLFRLVCCPHYLDEIIIYSTLLPYEQEFYLTLVW
+ VITSLTISALETKNYYRHKFKDNHVAPYAIIPFII"
+ gene <261437..>264892
+ /gene="NEO1"
+ /locus_tag="YIL048W"
+ /db_xref="GeneID:854763"
+ mRNA <261437..>264892
+ /gene="NEO1"
+ /locus_tag="YIL048W"
+ /product="putative aminophospholipid-translocating P4-type
+ ATPase NEO1"
+ /transcript_id="NM_001179398.1"
+ /db_xref="GeneID:854763"
+ CDS 261437..264892
+ /gene="NEO1"
+ /locus_tag="YIL048W"
+ /EC_number="7.6.2.1"
+ /experiment="EXISTENCE:direct assay:GO:0000139 Golgi
+ membrane [PMID:12960419]"
+ /experiment="EXISTENCE:direct assay:GO:0005768 endosome
+ [PMID:15314152]"
+ /experiment="EXISTENCE:direct assay:GO:0005770 late
+ endosome [PMID:30981741]"
+ /experiment="EXISTENCE:direct assay:GO:0005794 Golgi
+ apparatus [PMID:15314152]"
+ /experiment="EXISTENCE:direct assay:GO:0005802 trans-Golgi
+ network [PMID:30981741]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0005886 plasma
+ membrane [PMID:27235400]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0006890
+ retrograde vesicle-mediated transport, Golgi to
+ endoplasmic reticulum [PMID:12960419]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0006897
+ endocytosis [PMID:15314152]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0007033 vacuole
+ organization [PMID:15314152]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0045332
+ phospholipid translocation [PMID:27235400]"
+ /note="Phospholipid translocase (flippase); involved in
+ generating phospholipid asymmetry in plasma membrane;
+ involved in endocytosis, vacuolar biogenesis and
+ Golgi-to-ER vesicle-mediated transport; localizes to
+ endosomes and Golgi apparatus; targeted to vacuole via
+ AP-3 pathway"
+ /codon_start=1
+ /product="putative aminophospholipid-translocating P4-type
+ ATPase NEO1"
+ /protein_id="NP_012216.1"
+ /db_xref="GeneID:854763"
+ /db_xref="SGD:S000001310"
+ /translation="MPNPPSFKSHKQNLFNSNNNQHANSVDSFDLHLDDSFDAALDSL
+ QINNNPEPLSKHNTVGDRESFEMRTVDDLDNFSNHSSDSHRKSSNTDTHPLMYDNRLS
+ QDDNFKFTNIASSPPSSSNNIFSKALSYLKVSNTKNWSKFGSPIELSDQHIEREIHPD
+ TTPVYDRNRYVSNELSNAKYNAVTFVPTLLYEQFKFFYNLYFLVVALSQAVPALRIGY
+ LSSYIVPLAFVLTVTMAKEAIDDIQRRRRDRESNNELYHVITRNRSIPSKDLKVGDLI
+ KVHKGDRIPADLVLLQSSEPSGESFIKTDQLDGETDWKLRVACPLTQNLSENDLINRI
+ SITASAPEKSIHKFLGKVTYKDSTSNPLSVDNTLWANTVLASSGFCIACVVYTGRDTR
+ QAMNTTTAKVKTGLLELEINSISKILCACVFALSILLVAFAGFHNDDWYIDILRYLIL
+ FSTIIPVSLRVNLDLAKSVYAHQIEHDKTIPETIVRTSTIPEDLGRIEYLLSDKTGTL
+ TQNDMQLKKIHLGTVSYTSETLDIVSDYVQSLVSSKNDSLNNSKVALSTTRKDMSFRV
+ RDMILTLAICHNVTPTFEDDELTYQAASPDEIAIVKFTESVGLSLFKRDRHSISLLHE
+ HSGKTLNYEILQVFPFNSDSKRMGIIVRDEQLDEYWFMQKGADTVMSKIVESNDWLEE
+ ETGNMAREGLRTLVIGRKKLNKKIYEQFQKEYNDASLSMLNRDQQMSQVITKYLEHDL
+ ELLGLTGVEDKLQKDVKSSIELLRNAGIKIWMLTGDKVETARCVSISAKLISRGQYVH
+ TITKVTRPEGAFNQLEYLKINRNACLLIDGESLGMFLKHYEQEFFDVVVHLPTVIACR
+ CTPQQKADVALVIRKMTGKRVCCIGDGGNDVSMIQCADVGVGIVGKEGKQASLAADFS
+ ITQFCHLTELLLWHGRNSYKRSAKLAQFVMHRGLIIAICQAVYSICSLFEPIALYQGW
+ LMVGYATCYTMAPVFSLTLDHDIEESLTKIYPELYKELTEGKSLSYKTFFVWVLLSLF
+ QGSVIQLFSQAFTSLLDTDFTRMVAISFTALVVNELIMVALEIYTWNKTMLVTEIATL
+ LFYIVSVPFLGDYFDLGYMTTVNYYAGLLVILLISIFPVWTAKAIYRRLHPPSYAKVQ
+ EFATP"
+ gene complement(<265115..>267823)
+ /gene="SYG1"
+ /locus_tag="YIL047C"
+ /db_xref="GeneID:854764"
+ mRNA complement(<265115..>267823)
+ /gene="SYG1"
+ /locus_tag="YIL047C"
+ /product="Syg1p"
+ /transcript_id="NM_001179397.3"
+ /db_xref="GeneID:854764"
+ CDS complement(265115..267823)
+ /gene="SYG1"
+ /locus_tag="YIL047C"
+ /experiment="EXISTENCE:direct assay:GO:0000329 fungal-type
+ vacuole membrane [PMID:26928762]"
+ /experiment="EXISTENCE:direct assay:GO:0005739
+ mitochondrion [PMID:14576278|PMID:16823961]"
+ /experiment="EXISTENCE:direct assay:GO:0005886 plasma
+ membrane [PMID:7592711]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0007165 signal
+ transduction [PMID:7592711]"
+ /note="Plasma membrane hypothetical protein; targeted to
+ vacuole via AP-3 pathway; truncation and overexpression
+ suppresses lethality of G-alpha protein deficiency"
+ /codon_start=1
+ /product="Syg1p"
+ /protein_id="NP_012217.3"
+ /db_xref="GeneID:854764"
+ /db_xref="SGD:S000001309"
+ /translation="MKFADHLTESAIPEWRDKYIDYKVGKKKLRRYKEKLDAEEEQSS
+ SYRSWMPSVSVYQTAFQQREPGKSRSDGDYRSGPAFKKDYSALQREFVADFIEDWLIS
+ FQLSKCNEFYLWLLKECDKKFEVLQSQLHYYSLQKNYERDNLNRSSSNVDMSTSLYAA
+ GLAGRSDSRVNSIDSDSRSVMYGSMPCTKEAKKPRLSLLAYCQKVLKDNRLLPSWPKR
+ GFSLLQDLRQDASSRGRETFAFGASFLETMTTTQARNLLSNAIIEYYLYLQLVKSFRD
+ INVTGFRKMVKKFDKTCHTRELTTFMSYARTHYTLFKHADANVQLVAQKMQQITSSQP
+ TPTSELSSAQRDKEPITWLETQITEWFTTALTNSPKDRKHNTHKLKKLTIQYSISEQM
+ VHRNNRSIVQMLVVGLGIGVSMTLITYTLYLGISSEETSFTHKILFPLWGGWYMVLLI
+ AFLFLVNCFIWHRTGINYRFIMLGEIQSKNGTQFFNNDFATSKIPLKLYFLTFFIVPC
+ AVCSMLSFALEKLTPLGFLYIGIVSFLFLCPSGLIPYWDKVVHTRKWLVVTLIRLMMS
+ GFFPVEFGDFFLGDIICSLTYSIADIAMFFCVYSHTPNNLCGSSHSRAMGVLSCLPSY
+ WRFMQCLRRFADSGDWFPHLLNAAKYTLGIAYNATLCAYRLSDRSEQRRTPFIVCATL
+ NSILTSAWDLVMDWSFAHNTTSYNWLLRDDLYLAGKKNWENGSYSFSRKLVYYFAMIW
+ DILIRFEWIVYAIAPQTIQQSAVTSFILALLEVLRRFVWIIFRVENEHVANVHLFRVT
+ GDAPLPYPIAQVGDDSMDSSDLGSKAFSSLNDIPITPSHDNNPHSFAEPMPAYRGTFR
+ RRSSVFENISRSIPWAHATDFQRPTVNTVDDRSPETDSESEVESIM"
+ gene <268309..>268473
+ /locus_tag="YIL046W-A"
+ /db_xref="GeneID:1466492"
+ mRNA <268309..>268473
+ /locus_tag="YIL046W-A"
+ /product="uncharacterized protein"
+ /transcript_id="NM_001184654.1"
+ /db_xref="GeneID:1466492"
+ CDS 268309..268473
+ /locus_tag="YIL046W-A"
+ /note="hypothetical protein; identified by expression
+ profiling and mass spectrometry"
+ /codon_start=1
+ /product="uncharacterized protein"
+ /protein_id="NP_878097.1"
+ /db_xref="GeneID:1466492"
+ /db_xref="SGD:S000028836"
+ /translation="MMCVCIPKKKLMDWRVYYIYSYVVCLYMCGSDCACICVLACVVQ
+ CVCFNVEMRL"
+ gene <268651..>270573
+ /gene="MET30"
+ /locus_tag="YIL046W"
+ /gene_synonym="ZRG11"
+ /db_xref="GeneID:854765"
+ mRNA <268651..>270573
+ /gene="MET30"
+ /locus_tag="YIL046W"
+ /gene_synonym="ZRG11"
+ /product="ubiquitin-binding SDF ubiquitin ligase complex
+ subunit MET30"
+ /transcript_id="NM_001179396.1"
+ /db_xref="GeneID:854765"
+ CDS 268651..270573
+ /gene="MET30"
+ /locus_tag="YIL046W"
+ /gene_synonym="ZRG11"
+ /experiment="EXISTENCE:direct assay:GO:0005634 nucleus
+ [PMID:10637232]"
+ /experiment="EXISTENCE:direct assay:GO:0019005 SCF
+ ubiquitin ligase complex [PMID:9716410]"
+ /experiment="EXISTENCE:direct assay:GO:0043130 ubiquitin
+ binding [PMID:21070969]"
+ /experiment="EXISTENCE:direct assay:GO:0043224 nuclear SCF
+ ubiquitin ligase complex [PMID:10637232]"
+ /experiment="EXISTENCE:direct assay:GO:0046685 response to
+ arsenic-containing substance [PMID:15689486]"
+ /experiment="EXISTENCE:direct assay:GO:0046686 response to
+ cadmium ion [PMID:15689486]"
+ /experiment="EXISTENCE:genetic interaction:GO:0031146
+ SCF-dependent proteasomal ubiquitin-dependent protein
+ catabolic process [PMID:9716410]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0000082 G1/S
+ transition of mitotic cell cycle [PMID:15870262]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0000209 protein
+ polyubiquitination [PMID:9716410]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0006357
+ regulation of transcription by RNA polymerase II
+ [PMID:15870262]"
+ /note="F-box protein containing five copies of the WD40
+ motif; controls cell cycle function, sulfur metabolism,
+ and methionine biosynthesis as part of the ubiquitin
+ ligase complex; interacts with and regulates Met4p,
+ localizes within the nucleus; dissociation of Met30p from
+ SCF complex in response to cadmium stress is regulated by
+ Cdc48p"
+ /codon_start=1
+ /product="ubiquitin-binding SDF ubiquitin ligase complex
+ subunit MET30"
+ /protein_id="NP_012218.1"
+ /db_xref="GeneID:854765"
+ /db_xref="SGD:S000001308"
+ /translation="MRRERQRMMSFEDKDKDDLDNSNSNNSSEMTDTAMMPPLKRLLI
+ TGSSDDLAQGSSGKKKMTMATRSPSSSPDLATNDSGTRVQPLPEYNFTKFCYRHNPDI
+ QFSPTHTACYKQDLKRTQEINANIAKLPLQEQSDIHHIISKYSNSNDKIRKLILDGIL
+ STSCFPQLSYISSLVTHMIKIDFISILPQELSLKILSYLDCQSLCNATRVCRKWQKLA
+ DDDRVWYHMCEQHIDRKCPNCGWGLPLLHMKRARIQQNSTGSSSNADIQTQTTRPWKV
+ IYRERFKVESNWRKGHCRIQEFKGHMDGVLTLQFNYRLLFTGSYDSTIGIWDLFTGKL
+ IRRLSGHSDGVKTLYFDDRKLITGSLDKTIRVWNYITGECISTYRGHSDSVLSVDSYQ
+ KVIVSGSADKTVKVWHVESRTCYTLRGHTEWVNCVKLHPKSFSCFSCSDDTTIRMWDI
+ RTNSCLKVFRGHVGQVQKIIPLTIKDVENLATDNTSDGSSPQDDPTMTDGADESDTPS
+ NEQETVLDENIPYPTHLLSCGLDNTIKLWDVKTGKCIRTQFGHVEGVWDIAADNFRII
+ SGSHDGSIKVWDLQSGKCMHTFNGRRLQRETQHTQTQSLGDKVAPIACVCIGDSECFS
+ GDEFGCVKMYKFDLND"
+ gene <271161..>272777
+ /gene="PIG2"
+ /locus_tag="YIL045W"
+ /db_xref="GeneID:854766"
+ mRNA <271161..>272777
+ /gene="PIG2"
+ /locus_tag="YIL045W"
+ /product="putative protein phosphatase regulator PIG2"
+ /transcript_id="NM_001179395.3"
+ /db_xref="GeneID:854766"
+ CDS 271161..272777
+ /gene="PIG2"
+ /locus_tag="YIL045W"
+ /experiment="EXISTENCE:direct assay:GO:0005737 cytoplasm
+ [PMID:14562095]"
+ /experiment="EXISTENCE:physical interaction:GO:0005979
+ regulation of glycogen biosynthetic process
+ [PMID:9046081]"
+ /note="Putative type-1 protein phosphatase targeting
+ subunit; tethers Glc7p type-1 protein phosphatase to Gsy2p
+ glycogen synthase; PIG2 has a paralog, GIP2, that arose
+ from the whole genome duplication"
+ /codon_start=1
+ /product="putative protein phosphatase regulator PIG2"
+ /protein_id="NP_012219.3"
+ /db_xref="GeneID:854766"
+ /db_xref="SGD:S000001307"
+ /translation="MATTTQPQNILMDEPLNLPNNSAHNNNYGNINANIRTFAGMSMH
+ MHPARLNSLEFLHKPRRLSNVKLHRLPQDELQRNTDMNKGMYFNGKQVHAHHPFINSG
+ ANFNAHHQDVSKLGEEEDEISPLSHDNFQYESEENGNPSPPIYKKSGELVKSSLKRRS
+ KSLPITPKSIFNKTGSKSKHVNLDHVDTRLLQRSKSVHFDRVLPIKLFNENEKPIDVG
+ KQMVQQDVLNFKHKPLTRLSALNGGSDSVPIEDLLSENNQNEYGDTWLQNPKGVFLFG
+ TNSNNRRNKKKKFKLSDDDSDIENDNDSDDAINRLVRQQDKDQAHLAHGLKNLLINDD
+ DDYLETRTNSAKSGANLFIGNSKRIVGLYNKNFPILSDRNRKSLKLNIFLNLSRGRPV
+ FLQEITLLTGFHNMVIIGKVFVKNIYFDKKIIVRYTWDAWRTFHESECVYFSNANGIL
+ PGSNMDIFKFSIDDIHNPNDKDSNISQLEFCIQYLTWGVDRSRKEYWDNNDSANYKID
+ VVTNETRTGPTTDVNDNYEMKHSLFRNPFH"
+ gene complement(<272950..>273846)
+ /gene="AGE2"
+ /locus_tag="YIL044C"
+ /gene_synonym="SAT2"
+ /db_xref="GeneID:854767"
+ mRNA complement(<272950..>273846)
+ /gene="AGE2"
+ /locus_tag="YIL044C"
+ /gene_synonym="SAT2"
+ /product="GTPase-activating protein AGE2"
+ /transcript_id="NM_001179394.3"
+ /db_xref="GeneID:854767"
+ CDS complement(272950..273846)
+ /gene="AGE2"
+ /locus_tag="YIL044C"
+ /gene_synonym="SAT2"
+ /experiment="EXISTENCE:direct assay:GO:0005096 GTPase
+ activator activity [PMID:12627398]"
+ /experiment="EXISTENCE:direct assay:GO:0005829 cytosol
+ [PMID:26928762]"
+ /experiment="EXISTENCE:genetic interaction:GO:0006888
+ endoplasmic reticulum to Golgi vesicle-mediated transport
+ [PMID:9677411]"
+ /experiment="EXISTENCE:genetic interaction:GO:0006891
+ intra-Golgi vesicle-mediated transport [PMID:9677411]"
+ /note="ADP-ribosylation factor (ARF) GTPase activating
+ protein (GAP) effector; involved in Trans-Golgi-Network
+ (TGN) transport; contains C2C2H2 cysteine/histidine motif"
+ /codon_start=1
+ /product="GTPase-activating protein AGE2"
+ /protein_id="NP_012220.3"
+ /db_xref="GeneID:854767"
+ /db_xref="SGD:S000001306"
+ /translation="MSTSVPVKKALSALLRDPGNSHCADCKAQLHPRWASWSLGVFIC
+ IKCAGIHRSLGTHISKVKSVDLDTWKEEHLVKLIQFKNNLRANSYYEATLADELKQRK
+ ITDTSSLQNFIKNKYEYKKWIGDLSSIEGLNDSTEPVLHKPSANHSLPASNARLDQSS
+ NSLQKTQTQPPSHLLSTSRSNTSLLNLQVSSLSKTTSNTSVTSSATSIGAANTKTGNR
+ VGEFGQRNDLKKSILSLYSKPSAQTQSQNSFFTSTTPQPCNTPSPFVNTGITATNNNS
+ MNSNSSSNISLDDNELFKNVWS"
+ gene complement(<274072..>274926)
+ /gene="CBR1"
+ /locus_tag="YIL043C"
+ /gene_synonym="CBR5"
+ /db_xref="GeneID:854768"
+ mRNA complement(<274072..>274926)
+ /gene="CBR1"
+ /locus_tag="YIL043C"
+ /gene_synonym="CBR5"
+ /product="cytochrome-b5 reductase"
+ /transcript_id="NM_001179393.1"
+ /db_xref="GeneID:854768"
+ CDS complement(274072..274926)
+ /gene="CBR1"
+ /locus_tag="YIL043C"
+ /gene_synonym="CBR5"
+ /EC_number="1.6.2.2"
+ /experiment="EXISTENCE:direct assay:GO:0004128
+ cytochrome-b5 reductase activity, acting on NAD(P)H
+ [PMID:14930|PMID:27694803]"
+ /experiment="EXISTENCE:direct assay:GO:0005634 nucleus
+ [PMID:26928762]"
+ /experiment="EXISTENCE:direct assay:GO:0005739
+ mitochondrion [PMID:24769239|PMID:14576278|PMID:16823961]"
+ /experiment="EXISTENCE:direct assay:GO:0005741
+ mitochondrial outer membrane [PMID:16407407]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0002098 tRNA
+ wobble uridine modification [PMID:27694803]"
+ /note="Cytochrome b reductase and NADH-dependent reductase
+ for Dph3p; required for diphthamide synthesis and tRNA
+ wobble uridine modification; also detected in
+ mitochondria; mutation in conserved NADH binding domain of
+ the human ortholog results in type I methemoglobinemia"
+ /codon_start=1
+ /product="cytochrome-b5 reductase"
+ /protein_id="NP_012221.2"
+ /db_xref="GeneID:854768"
+ /db_xref="SGD:S000001305"
+ /translation="MAIDAQKLVVVIVIVVVPLLFKFIIGPKTKPVLDPKRNDFQSFP
+ LVEKTILTHNTSMYKFGLPHADDVLGLPIGQHIVIKANINGKDITRSYTPTSLDGDTK
+ GNFELLVKSYPTGNVSKMIGELKIGDSIQIKGPRGNYHYERNCRSHLGMIAGGTGIAP
+ MYQIMKAIAMDPHDTTKVSLVFGNVHEEDILLKKELEALVAMKPSQFKIVYYLDSPDR
+ EDWTGGVGYITKDVIKEHLPAATMDNVQILICGPPAMVASVRRSTVDLGFRRSKPLSK
+ MEDQVFVF"
+ gene complement(<275108..>276292)
+ /gene="PKP1"
+ /locus_tag="YIL042C"
+ /db_xref="GeneID:854769"
+ mRNA complement(<275108..>276292)
+ /gene="PKP1"
+ /locus_tag="YIL042C"
+ /product="protein kinase PKP1"
+ /transcript_id="NM_001179392.1"
+ /db_xref="GeneID:854769"
+ CDS complement(275108..276292)
+ /gene="PKP1"
+ /locus_tag="YIL042C"
+ /EC_number="2.7.11.2"
+ /experiment="EXISTENCE:direct assay:GO:0004672 protein
+ kinase activity [PMID:17918780]"
+ /experiment="EXISTENCE:direct assay:GO:0005739
+ mitochondrion
+ [PMID:16823961|PMID:24769239|PMID:16643908|PMID:17918780]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0004740
+ pyruvate dehydrogenase (acetyl-transferring) kinase
+ activity [PMID:18180296|PMID:16643908]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0065003
+ protein-containing complex assembly [PMID:16643908]"
+ /experiment="EXISTENCE:mutant phenotype:GO:1901524
+ regulation of mitophagy [PMID:31548421]"
+ /note="Mitochondrial protein kinase; involved in negative
+ regulation of pyruvate dehydrogenase complex activity by
+ phosphorylating the ser-133 residue of the Pda1p subunit;
+ acts in concert with kinase Pkp2p and phosphatases Ptc5p
+ and Ptc6p"
+ /codon_start=1
+ /product="protein kinase PKP1"
+ /protein_id="NP_012222.1"
+ /db_xref="GeneID:854769"
+ /db_xref="SGD:S000001304"
+ /translation="MWKIMRSWKCGGMRWAHRQRPSHELLSQLSFDQHYKIRSNIELL
+ IQDYASKPIAPLNYEYFLQYRPPLTKKEEYMLTIKTINLLLSLTCKRLNAIQRLPYNA
+ VINPHIERTNSLYLKSLQTLLSIAYPYELHNPPKIQAKFTELLDDHEDAIVVLAKGLQ
+ EIQSCYPKFQISQFLNFHLKERITMKLLVTHYLSLMAQNKGDTNKRMIGILHRDLPIA
+ QLIKHVSDYVNDICFVKFNTQRTPVLIHPPSQDITFTCIPPILEYIMTEVFKNAFEAQ
+ IALGKEHMPIEINLLKPDDDELYLRIRDHGGGITPEVEALMFNYSYSTHTQQSADSES
+ TDLPGEQINNVSGMGFGLPMCKTYLELFGGKIDVQSLLGWGTDVYIKLKGPSKTALLS
+ KK"
+ gene <276525..>277505
+ /gene="GVP36"
+ /locus_tag="YIL041W"
+ /db_xref="GeneID:854770"
+ mRNA <276525..>277505
+ /gene="GVP36"
+ /locus_tag="YIL041W"
+ /product="Gvp36p"
+ /transcript_id="NM_001179391.3"
+ /db_xref="GeneID:854770"
+ CDS 276525..277505
+ /gene="GVP36"
+ /locus_tag="YIL041W"
+ /experiment="EXISTENCE:direct assay:GO:0000139 Golgi
+ membrane [PMID:16107716]"
+ /experiment="EXISTENCE:direct assay:GO:0005737 cytoplasm
+ [PMID:14562095]"
+ /experiment="EXISTENCE:direct assay:GO:0005829 cytosol
+ [PMID:26928762]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0006897
+ endocytosis [PMID:18156177]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0007033 vacuole
+ organization [PMID:18156177]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0030950
+ establishment or maintenance of actin cytoskeleton
+ polarity [PMID:18156177]"
+ /note="BAR domain protein that localizes to early and late
+ Golgi vesicles; required for adaptation to varying
+ nutrient concentrations, fluid-phase endocytosis,
+ polarization of the actin cytoskeleton, and vacuole
+ biogenesis"
+ /codon_start=1
+ /product="Gvp36p"
+ /protein_id="NP_012223.3"
+ /db_xref="GeneID:854770"
+ /db_xref="SGD:S000001303"
+ /translation="MSFNAFASSLSKKLQEISTSVSEKTQELPSLAQSTQRMVQERLG
+ QVTDISQLPREYTELEDKVDTIKLIYNHFLGVTAIYENGSYDYPKYINESVNEFSRSV
+ ASKLTELTHATSASEAQNILVAPGPIKEPKTLNYALSKVALNSSECLNKMFPTEEQPL
+ ASALLQFSDVQAKIAQARIQQDTLIQTKFNKNLRERLSFEIGKADKCRKDVHSMRLRY
+ DVARTNLANNKKPEKEASLRVQMETLEDQFAQVTEDATVCLQEVISHANFSEDLKELA
+ KAQAEYFETSAGLMKEFLSNSFAEEPEAKPEVAEEEKPQTAISMNDEDDA"
+ gene <277723..>278139
+ /gene="APQ12"
+ /locus_tag="YIL040W"
+ /db_xref="GeneID:854771"
+ mRNA <277723..>278139
+ /gene="APQ12"
+ /locus_tag="YIL040W"
+ /product="Apq12p"
+ /transcript_id="NM_001179390.1"
+ /db_xref="GeneID:854771"
+ CDS 277723..278139
+ /gene="APQ12"
+ /locus_tag="YIL040W"
+ /experiment="EXISTENCE:direct assay:GO:0005635 nuclear
+ envelope [PMID:15273328|PMID:26432634]"
+ /experiment="EXISTENCE:direct assay:GO:0005783 endoplasmic
+ reticulum
+ [PMID:26928762|PMID:14562095|PMID:26432634|PMID:15273328]"
+ /experiment="EXISTENCE:direct assay:GO:0016020 membrane
+ [PMID:17724120]"
+ /experiment="EXISTENCE:genetic interaction:GO:0055088
+ lipid homeostasis [PMID:26432634|PMID:20016074]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0006998 nuclear
+ envelope organization [PMID:17724120|PMID:20016074]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0055088 lipid
+ homeostasis [PMID:26432634|PMID:20016074]"
+ /note="Nuclear envelope/ER integral membrane protein;
+ interacts and functions with Brr6p and Brl1p in lipid
+ homeostasis; mutants are defective in nuclear pore complex
+ biogenesis, nuclear envelope morphology, mRNA export from
+ the nucleus and are sensitive to sterol biosynthesis
+ inhibitors and membrane fluidizing agents; exhibits
+ synthetic lethal genetic interactions with genes involved
+ in lipid metabolism"
+ /codon_start=1
+ /product="Apq12p"
+ /protein_id="NP_012224.1"
+ /db_xref="GeneID:854771"
+ /db_xref="SGD:S000001302"
+ /translation="MDATQPQYELSVVTQCLKSAIDVIQWLIPTITKFSQSHPLVFQL
+ LFIFFTFYVFYKLLMNFITLVKRFLYLTLVVTCIGIYMRGSQQFLTVDLLNFYNFVMS
+ NRYYAFKIYTLFINALEREINTVYHLAQMKMEQLLK"
+ gene <278426..>279847
+ /gene="TED1"
+ /locus_tag="YIL039W"
+ /db_xref="GeneID:854772"
+ mRNA <278426..>279847
+ /gene="TED1"
+ /locus_tag="YIL039W"
+ /product="Ted1p"
+ /transcript_id="NM_001179389.1"
+ /db_xref="GeneID:854772"
+ CDS 278426..279847
+ /gene="TED1"
+ /locus_tag="YIL039W"
+ /experiment="EXISTENCE:direct assay:GO:0005783 endoplasmic
+ reticulum [PMID:14562095|PMID:26928762]"
+ /experiment="EXISTENCE:genetic interaction:GO:0006888
+ endoplasmic reticulum to Golgi vesicle-mediated transport
+ [PMID:17989219]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0006888
+ endoplasmic reticulum to Golgi vesicle-mediated transport
+ [PMID:17989219]"
+ /note="GPI-glycan remodelase; conserved phosphoesterase
+ domain-containing protein; acts together with
+ Emp24p/Erv25p in cargo exit from the ER; functional
+ ortholog of mammalian GPI-glycan remodelase PGAP5;
+ deletion confers sensitivity to
+ 4-(N-(S-glutathionylacetyl)amino) phenylarsenoxide (GSAO)"
+ /codon_start=1
+ /product="Ted1p"
+ /protein_id="NP_012225.1"
+ /db_xref="GeneID:854772"
+ /db_xref="SGD:S000001301"
+ /translation="MLRCAVKKFAYFATFLTIVANIYIYTYPSFHPEQCSWNCSNKNA
+ PLQKDLTFVDKVKNYFSDVREQWHGSHASAGNDEDIHILAFGDPQIKGIWPKTPYVSR
+ LDTYGNDYYLGHIYDMMQQRLKPQVVTVMGDLFSSQWIGDSEFHNRTKRYISRIFKRD
+ PTSIENIKQQNLDEKGQYKANWPEWGDRFNEILDNVKENEADNQELSFGFGYENIHSW
+ NPDLEDFLIINITGNHDVGYSGDATYQHMTRFHDLFGKDNYWIEYETNTTHPWRIVVL
+ NDLLLEGPALQPEFVEATWIFLNQLNERKFNGSTVLLTHVPFYKREGLCVDGPDTRYY
+ PDAHAPESYKSGLLRSQNHLSESVSNQVLNMIFENGKPGIILTGHDHEGCETVYNKKS
+ TSTWEATKNIESDVFVKEITVKSMMGEFNGNTGLVTGHFNTDSMTWEWTFSLCPFAIQ
+ HVWWFAKVSLLVTIFTWSSLLFV"
+ gene complement(<280142..>282652)
+ /gene="NOT3"
+ /locus_tag="YIL038C"
+ /db_xref="GeneID:854773"
+ mRNA complement(<280142..>282652)
+ /gene="NOT3"
+ /locus_tag="YIL038C"
+ /product="CCR4-NOT core subunit NOT3"
+ /transcript_id="NM_001179388.3"
+ /db_xref="GeneID:854773"
+ CDS complement(280142..282652)
+ /gene="NOT3"
+ /locus_tag="YIL038C"
+ /experiment="EXISTENCE:direct assay:GO:0000289
+ nuclear-transcribed mRNA poly(A) tail shortening
+ [PMID:11889048]"
+ /experiment="EXISTENCE:direct assay:GO:0005737 cytoplasm
+ [PMID:11889048]"
+ /experiment="EXISTENCE:direct assay:GO:0032968 positive
+ regulation of transcription elongation by RNA polymerase
+ II [PMID:21406554]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0000290
+ deadenylation-dependent decapping of nuclear-transcribed
+ mRNA [PMID:26952104]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0016567 protein
+ ubiquitination [PMID:16926149]"
+ /experiment="EXISTENCE:physical interaction:GO:0005515
+ protein binding [PMID:26952104]"
+ /experiment="EXISTENCE:physical interaction:GO:0030015
+ CCR4-NOT core complex [PMID:10490603]"
+ /note="Component of the CCR4-NOT core complex, involved in
+ mRNA decapping; involved in transcription initiation and
+ elongation and in mRNA degradation; conserved lysine in
+ human homolog of Not3p and Not5p is mutated in cancers"
+ /codon_start=1
+ /product="CCR4-NOT core subunit NOT3"
+ /protein_id="NP_012226.3"
+ /db_xref="GeneID:854773"
+ /db_xref="SGD:S000001300"
+ /translation="MAHRKLQQEVDRVFKKINEGLEIFNSYYERHESCTNNPSQKDKL
+ ESDLKREVKKLQRLREQIKSWQSSPDIKDKDSLLDYRRSVEIAMEKYKAVEKASKEKA
+ YSNISLKKSETLDPQERERRDISEYLSQMIDELERQYDSLQVEIDKLLLLNKKKKTSS
+ TTNDEKKEQYKRFQARYRWHQQQMELALRLLANEELDPQDVKNVQDDINYFVESNQDP
+ DFVEDETIYDGLNLQSNEAIAHEVAQYFASQNAEDNNTSDANESLQDISKLSKKEQRK
+ LEREAKKAAKLAAKNATGAAIPVAGPSSTPSPVIPVADASKETERSPSSSPIHNATKP
+ EEAVKTSIKSPRSSADNLLPSLQKSPSSATPETPTNVHTHIHQTPNGITGATTLKPAT
+ LPAKPAGELKWAVAASQAVEKDRKVTSASSTISNTSTKTPTTAAATTTSSNANSRIGS
+ ALNTPKLSTSSLSLQPDNTGASSSAATAAAVLAAGAAAVHQNNQAFYRNMSSSHHPLV
+ SLATNPKSEHEVATTVNQNGPENTTKKVMEQKEEESPEERNKLQVPTFGVFDDDFESD
+ RDSETEPEEEEQPSTPKYLSLEQREAKTNEIKKEFVSDFETLLLPSGVQEFIMSSELY
+ NSQIESKITYKRSRDMCEISRLVEVPQGVNPPSPLDAFRSTQQWDVMRCSLRDIIIGS
+ ERLKEDSSSIYAKILENFRTLEMFSLFYNYYFAITPLEREIAYKILNERDWKVSKDGT
+ MWFLRQGEVKFFNEICEVGDYKIFKLDDWTVIDKINFRLDYSFLQPPVDTASEVRDVS
+ VDNNNVNDQSNVTLEQQKQEISHGKQLLKQLKQGKISV"
+ gene complement(<283029..>284999)
+ /gene="PRM2"
+ /locus_tag="YIL037C"
+ /db_xref="GeneID:854774"
+ mRNA complement(<283029..>284999)
+ /gene="PRM2"
+ /locus_tag="YIL037C"
+ /product="pheromone-regulated protein PRM2"
+ /transcript_id="NM_001179387.3"
+ /db_xref="GeneID:854774"
+ CDS complement(283029..284999)
+ /gene="PRM2"
+ /locus_tag="YIL037C"
+ /experiment="EXISTENCE:mutant phenotype:GO:0000742
+ karyogamy involved in conjugation with cellular fusion
+ [PMID:17101777]"
+ /note="Pheromone-regulated protein; predicted to have 4
+ transmembrane segments and a coiled coil domain; regulated
+ by Ste12p; required for efficient nuclear fusion"
+ /codon_start=1
+ /product="pheromone-regulated protein PRM2"
+ /protein_id="NP_012227.3"
+ /db_xref="GeneID:854774"
+ /db_xref="SGD:S000001299"
+ /translation="MNNVHIIKPLSLPQRFFSCIFHPLLLIFFTSVILTIWGSFSVID
+ ITMAKMSHAQVKRNDTVSTFASISTATATATTTATTTATMTAVTTQHAIYSANSYSLN
+ KTFIDNTIDQYFESKLRSIESTVGTDMQEKFKSYTDDILDNKQKLINDQISLETELIK
+ EVLEVNNTIFNELLTKSQLINDTWNEISEDAMTIDKDSISQMASNLLLNYSMFDSIFG
+ NYSRKLKSLQNFNGTITDFSTQLDTSSTLSLNFLRNSTDWLQLKRNFTANLQNEISIL
+ SGGSTEVTSSTSIIKRSLKTNSEENSVLSAVKNHVFRKCKRMTIIFTVMYFAFVILLM
+ AIERILFQLENQQVNLVMSQINGLTGQTNFTKYNKVLKSLITTLNLSTLYPIPYQLTK
+ LINQKIFKREPEKIDDKKVKKSKLFYCNWWIISNGAHLWLFGFLMLLIHWQIVTRLTN
+ FEVPSLPTFHKRAGPSLYKREVWTDGNITTTIEGFINDSVSLLCENFQMEVNEKFITA
+ NLSLQTDPNLKVQSTDILNLWVNDTNTQFEKYLNESSQNWQGIDLQVEPLLGSDSINE
+ FLGQYFLPTYEVTNTNSSFALDIQKYGIINRGINITNASVAALSSLSKRQIKDKEQKQ
+ TYFLHTVYKWGLLAVCLTILFHHMLIFIILKL"
+ gene <285666..>287429
+ /gene="CST6"
+ /locus_tag="YIL036W"
+ /gene_synonym="ACA2; SHF1"
+ /db_xref="GeneID:854775"
+ mRNA <285666..>287429
+ /gene="CST6"
+ /locus_tag="YIL036W"
+ /gene_synonym="ACA2; SHF1"
+ /product="Cst6p"
+ /transcript_id="NM_001179386.1"
+ /db_xref="GeneID:854775"
+ CDS 285666..287429
+ /gene="CST6"
+ /locus_tag="YIL036W"
+ /gene_synonym="ACA2; SHF1"
+ /experiment="EXISTENCE:direct assay:GO:0001228 DNA-binding
+ transcription activator activity, RNA polymerase
+ II-specific [PMID:10825197]"
+ /experiment="EXISTENCE:direct assay:GO:0005634 nucleus
+ [PMID:22932476]"
+ /experiment="EXISTENCE:direct assay:GO:0005829 cytosol
+ [PMID:22932476]"
+ /experiment="EXISTENCE:direct assay:GO:0006357 regulation
+ of transcription by RNA polymerase II [PMID:27143390]"
+ /experiment="EXISTENCE:direct assay:GO:0033554 cellular
+ response to stress [PMID:27143390]"
+ /experiment="EXISTENCE:direct assay:GO:0043565
+ sequence-specific DNA binding
+ [PMID:10825197|PMID:27143390]"
+ /experiment="EXISTENCE:direct assay:GO:0045944 positive
+ regulation of transcription by RNA polymerase II
+ [PMID:10825197]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0061429
+ positive regulation of transcription from RNA polymerase
+ II promoter by oleic acid [PMID:20395639]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0071244
+ cellular response to carbon dioxide [PMID:22253597]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0071400
+ cellular response to oleic acid [PMID:20395639]"
+ /note="Basic leucine zipper (bZIP) transcription factor
+ from ATF/CREB family involved in stress-responsive
+ regulatory network; mediates transcriptional activation of
+ NCE103 in response to low CO2 levels; proposed to be a
+ regulator of oleate responsive genes; involved in
+ utilization of non-optimal carbon sources and chromosome
+ stability; relocalizes to the cytosol in response to
+ hypoxia; CST6 has a paralog, ACA1, that arose from the
+ whole genome duplication"
+ /codon_start=1
+ /product="Cst6p"
+ /protein_id="NP_012228.1"
+ /db_xref="GeneID:854775"
+ /db_xref="SGD:S000001298"
+ /translation="MFTGQEYHSVDSNSNKQKDNNKRGIDDTSKILNNKIPHSVSDTS
+ AAATTTSTMNNSALSRSLDPTDINYSTNMAGVVDQIHDYTTSNRNSLTPQYSIAAGNV
+ NSHDRVVKPSANSNYQQAAYLRQQQQQDQRQQSPSMKTEEESQLYGDILMNSGVVQDM
+ HQNLATHTNLSQLSSTRKSAPNDSTTAPTNASNIANTASVNKQMYFMNMNMNNNPHAL
+ NDPSILETLSPFFQPFGVDVAHLPMTNPPIFQSSLPGCDEPIRRRRISISNGQISQLG
+ EDIETLENLHNTQPPPMPNFHNYNGLSQTRNVSNKPVFNQAVPVSSIPQYNAKKVINP
+ TKDSALGDQSVIYSKSQQRNFVNAPSKNTPAESISDLEGMTTFAPTTGGENRGKSALR
+ ESHSNPSFTPKSQGSHLNLAANTQGNPIPGTTAWKRARLLERNRIAASKCRQRKKVAQ
+ LQLQKEFNEIKDENRILLKKLNYYEKLISKFKKFSKIHLREHEKLNKDSDNNVNGTNS
+ SNKNESMTVDSLKIIEELLMIDSDVTEVDKDTGKIIAIKHEPYSQRFGSDTDDDDIDL
+ KPVEGGKDPDNQSLPNSEKIK"
+ gene complement(<287790..>288908)
+ /gene="CKA1"
+ /locus_tag="YIL035C"
+ /db_xref="GeneID:854776"
+ mRNA complement(<287790..>288908)
+ /gene="CKA1"
+ /locus_tag="YIL035C"
+ /product="casein kinase 2 catalytic subunit CKA1"
+ /transcript_id="NM_001179385.1"
+ /db_xref="GeneID:854776"
+ CDS complement(287790..288908)
+ /gene="CKA1"
+ /locus_tag="YIL035C"
+ /EC_number="2.7.11.1"
+ /experiment="EXISTENCE:direct assay:GO:0004672 protein
+ kinase activity [PMID:16319894]"
+ /experiment="EXISTENCE:direct assay:GO:0004674 protein
+ serine/threonine kinase activity [PMID:8226802]"
+ /experiment="EXISTENCE:direct assay:GO:0005956 protein
+ kinase CK2 complex [PMID:8135547]"
+ /experiment="EXISTENCE:direct assay:GO:0006356 regulation
+ of transcription by RNA polymerase I [PMID:11551505]"
+ /experiment="EXISTENCE:direct assay:GO:0006359 regulation
+ of transcription by RNA polymerase III [PMID:11551505]"
+ /experiment="EXISTENCE:direct assay:GO:0006974 DNA damage
+ response [PMID:11551505]"
+ /experiment="EXISTENCE:direct assay:GO:0034456 UTP-C
+ complex [PMID:17515605]"
+ /experiment="EXISTENCE:genetic interaction:GO:0007535
+ donor selection [PMID:22496671]"
+ /note="Alpha catalytic subunit of casein kinase 2 (CK2); a
+ Ser/Thr protein kinase with roles in cell growth and
+ proliferation; CK2, comprised of CKA1, CKA2, CKB1 and
+ CKB2, has many substrates including transcription factors
+ and all RNA polymerases; regulates Fkh1p-mediated donor
+ preference during mating-type switching"
+ /codon_start=1
+ /product="casein kinase 2 catalytic subunit CKA1"
+ /protein_id="NP_012229.1"
+ /db_xref="GeneID:854776"
+ /db_xref="SGD:S000001297"
+ /translation="MKCRVWSEARVYTNINKQRTEEYWDYENTVIDWSTNTKDYEIEN
+ KVGRGKYSEVFQGVKLDSKVKIVIKMLKPVKKKKIKREIKILTDLSNEKVPPTTLPFQ
+ KDQYYTNQKEDVLKFIRPYIFDQPHNGHANIIHLFDIIKDPISKTPALVFEYVDNVDF
+ RILYPKLTDLEIRFYMFELLKALDYCHSMGIMHRDVKPHNVMIDHKNKKLRLIDWGLA
+ EFYHVNMEYNVRVASRFFKGPELLVDYRMYDYSLDLWSFGTMLASMIFKREPFFHGTS
+ NTDQLVKIVKVLGTSDFEKYLLKYEITLPREFYDMDQYIRKPWHRFINDGNKHLSGND
+ EIIDLIDNLLRYDHQERLTAKEAMGHPWFAPIREQIEK"
+ gene complement(<289226..>290089)
+ /gene="CAP2"
+ /locus_tag="YIL034C"
+ /db_xref="GeneID:854777"
+ mRNA complement(<289226..>290089)
+ /gene="CAP2"
+ /locus_tag="YIL034C"
+ /product="F-actin-capping protein subunit beta"
+ /transcript_id="NM_001179384.3"
+ /db_xref="GeneID:854777"
+ CDS complement(289226..290089)
+ /gene="CAP2"
+ /locus_tag="YIL034C"
+ /experiment="EXISTENCE:direct assay:GO:0000131 incipient
+ cellular bud site [PMID:1315784]"
+ /experiment="EXISTENCE:direct assay:GO:0000142 cellular
+ bud neck contractile ring [PMID:36729023]"
+ /experiment="EXISTENCE:direct assay:GO:0005886 plasma
+ membrane [PMID:22842922]"
+ /experiment="EXISTENCE:direct assay:GO:0005934 cellular
+ bud tip [PMID:22842922|PMID:1315784]"
+ /experiment="EXISTENCE:direct assay:GO:0008290 F-actin
+ capping protein complex [PMID:14769858]"
+ /experiment="EXISTENCE:direct assay:GO:0030479 actin
+ cortical patch [PMID:14769858]"
+ /experiment="EXISTENCE:direct assay:GO:0043332 mating
+ projection tip [PMID:19053807]"
+ /experiment="EXISTENCE:direct assay:GO:0051016 barbed-end
+ actin filament capping [PMID:14769858|PMID:1315784]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0008290 F-actin
+ capping protein complex [PMID:14769858]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0030447
+ filamentous growth [PMID:15645503]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0051015 actin
+ filament binding [PMID:14769858]"
+ /note="Beta subunit of the capping protein heterodimer
+ (Cap1p and Cap2p); capping protein (CP) binds to the
+ barbed ends of actin filaments preventing further
+ polymerization; localized predominantly to cortical actin
+ patches and the cytokinetic contractile ring; protein
+ increases in abundance and relocalizes from bud neck to
+ plasma membrane upon DNA replication stress"
+ /codon_start=1
+ /product="F-actin-capping protein subunit beta"
+ /protein_id="NP_012230.3"
+ /db_xref="GeneID:854777"
+ /db_xref="SGD:S000001296"
+ /translation="MSDAQFDAALDLLRRLNPTTLQENLNNLIELQPNLAQDLLSSVD
+ VPLSTQKDSADSNREYLCCDYNRDIDSFRSPWSNTYYPELSPKDLQDSPFPSAPLRKL
+ EILANDSFDVYRDLYYEGGISSVYLWDLNEEDFNGHDFAGVVLFKKNQSDHSNWDSIH
+ VFEVTTSPSSPDSFNYRVTTTIILHLDKTKTDQNSHMMLSGNLTRQTEKDIAIDMSRP
+ LDVIFTSHVANLGSLIEDIESQMRNLLETVYFEKTRDIFHQTKNAAIASSAEEANKDA
+ QAEVIRGLQSL"
+ gene complement(<290419..>291669)
+ /gene="BCY1"
+ /locus_tag="YIL033C"
+ /gene_synonym="SRA1"
+ /db_xref="GeneID:854778"
+ mRNA complement(<290419..>291669)
+ /gene="BCY1"
+ /locus_tag="YIL033C"
+ /gene_synonym="SRA1"
+ /product="cAMP-dependent protein kinase regulatory subunit
+ BCY1"
+ /transcript_id="NM_001179383.1"
+ /db_xref="GeneID:854778"
+ CDS complement(290419..291669)
+ /gene="BCY1"
+ /locus_tag="YIL033C"
+ /gene_synonym="SRA1"
+ /experiment="EXISTENCE:direct assay:GO:0000785 chromatin
+ [PMID:26403272]"
+ /experiment="EXISTENCE:direct assay:GO:0004862
+ cAMP-dependent protein kinase inhibitor activity
+ [PMID:3037314]"
+ /experiment="EXISTENCE:direct assay:GO:0005634 nucleus
+ [PMID:22842922|PMID:11914276|PMID:11134339|PMID:18417610|P
+ MID:3288487]"
+ /experiment="EXISTENCE:direct assay:GO:0005737 cytoplasm
+ [PMID:22842922|PMID:11914276|PMID:11134339]"
+ /experiment="EXISTENCE:direct assay:GO:0005886 plasma
+ membrane [PMID:2831892]"
+ /experiment="EXISTENCE:direct assay:GO:0046580 negative
+ regulation of Ras protein signal transduction
+ [PMID:3037314]"
+ /experiment="EXISTENCE:genetic interaction:GO:0097271
+ protein localization to bud neck [PMID:12782684]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0004862
+ cAMP-dependent protein kinase inhibitor activity
+ [PMID:6292221]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0006995
+ cellular response to nitrogen starvation [PMID:26947009]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0010603
+ regulation of cytoplasmic mRNA processing body assembly
+ [PMID:21925385]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0042149
+ cellular response to glucose starvation
+ [PMID:21925385|PMID:26947009]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0045944
+ positive regulation of transcription by RNA polymerase II
+ [PMID:26947009]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0046580
+ negative regulation of Ras protein signal transduction
+ [PMID:3037314]"
+ /note="Regulatory subunit of the cyclic AMP-dependent
+ protein kinase (PKA); PKA is a component of a signaling
+ pathway that controls a variety of cellular processes,
+ including metabolism, cell cycle, stress response,
+ stationary phase, and sporulation; regulator of meiotic
+ commitment"
+ /codon_start=1
+ /product="cAMP-dependent protein kinase regulatory subunit
+ BCY1"
+ /protein_id="NP_012231.1"
+ /db_xref="GeneID:854778"
+ /db_xref="SGD:S000001295"
+ /translation="MVSSLPKESQAELQLFQNEINAANPSDFLQFSANYFNKRLEQQR
+ AFLKAREPEFKAKNIVLFPEPEESFSRPQSAQSQSRSRSSVMFKSPFVNEDPHSNVFK
+ SGFNLDPHEQDTHQQAQEEQQHTREKTSTPPLPMHFNAQRRTSVSGETLQPNNFDDWT
+ PDHYKEKSEQQLQRLEKSIRNNFLFNKLDSDSKRLVINCLEEKSVPKGATIIKQGDQG
+ DYFYVVEKGTVDFYVNDNKVNSSGPGSSFGELALMYNSPRAATVVATSDCLLWALDRL
+ TFRKILLGSSFKKRLMYDDLLKSMPVLKSLTTYDRAKLADALDTKIYQPGETIIREGD
+ QGENFYLIEYGAVDVSKKGQGVINKLKDHDYFGEVALLNDLPRQATVTATKRTKVATL
+ GKSGFQRLLGPAVDVLKLNDPTRH"
+ gene <292633..>295737
+ /gene="ULP2"
+ /locus_tag="YIL031W"
+ /gene_synonym="SMT4"
+ /db_xref="GeneID:854780"
+ mRNA <292633..>295737
+ /gene="ULP2"
+ /locus_tag="YIL031W"
+ /gene_synonym="SMT4"
+ /product="SUMO protease ULP2"
+ /transcript_id="NM_001179381.1"
+ /db_xref="GeneID:854780"
+ CDS 292633..295737
+ /gene="ULP2"
+ /locus_tag="YIL031W"
+ /gene_synonym="SMT4"
+ /experiment="EXISTENCE:direct assay:GO:0000785 chromatin
+ [PMID:30575729]"
+ /experiment="EXISTENCE:direct assay:GO:0003711
+ transcription elongation factor activity [PMID:31313851]"
+ /experiment="EXISTENCE:direct assay:GO:0005634 nucleus
+ [PMID:10713161]"
+ /experiment="EXISTENCE:direct assay:GO:0008234
+ cysteine-type peptidase activity [PMID:10713161]"
+ /experiment="EXISTENCE:direct assay:GO:0016926 protein
+ desumoylation [PMID:10713161]"
+ /experiment="EXISTENCE:direct assay:GO:0016929 deSUMOylase
+ activity [PMID:10713161]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0006276 plasmid
+ maintenance [PMID:11333221|PMID:10713161]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0007094 mitotic
+ spindle assembly checkpoint signaling [PMID:10713162]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0016926 protein
+ desumoylation [PMID:10713161]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0016929
+ deSUMOylase activity [PMID:10713161]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0030261
+ chromosome condensation [PMID:11333221]"
+ /note="Peptidase that deconjugates Smt3/SUMO-1 peptides
+ from proteins; plays a role in chromosome cohesion at
+ centromeric regions and recovery from checkpoint arrest
+ induced by DNA damage or DNA replication defects;
+ potential Cdc28p substrate; human homolog PML implicated
+ in promyelocytic leukemia can partially complement yeast
+ null mutant"
+ /codon_start=1
+ /product="SUMO protease ULP2"
+ /protein_id="NP_012233.1"
+ /db_xref="GeneID:854780"
+ /db_xref="SGD:S000001293"
+ /translation="MSARKRKFNSLKPLDTLNSSRASSPRSSASLPPKRYNTFRKDPK
+ IVDHLNNASTKDFLPVLSMNSESKRQIELSDNDVDNNDEGEGVNSGCSDQDFEPLQSS
+ PLKRHSSLKSTSNGLLFQMSNNLGNGSPEPAVASTSPNGSIISTKLNLNGQFSCVDSK
+ TLRIYRHKAPCIMTFVSDHNHPKFSLYFQQSVIYNSQVNLLDDVELIILDKKNSFMAI
+ ILKDLKKVKMILDVNNSSININTNILIWSTASSASNKKIKSIKRFLLMSYSSSIKVEI
+ LDHKEQILERLKHLIHPISSSSPSLNMERAINSTKNAFDSLRLKKTKLSTNDDESPQI
+ HTHFLSNKPHGLQSLTKRTRIASLGKKEHSISVPKSNISPSDFYNTNGTETLQSHAVS
+ QLRRSNRFKDVSDPANSNSNSEFDDATTEFETPELFKPSLCYKFNDGSSYTITNQDFK
+ CLFNKDWVNDSILDFFTKFYIESSIEKSIIKREQVHLMSSFFYTKLISNPADYYSNVK
+ KWVNNTDLFSKKYVVIPINISYHWFSCIITNLDAILDFHQNKDKNDAINSDEISINNP
+ LVNILTFDSLRQTHSREIDPIKEFLISYALDKYSIQLDKTQIKMKTCPVPQQPNMSDC
+ GVHVILNIRKFFENPVETIDVWKNSKIKSKHFTAKMINKYFDKNERNSARKNLRHTLK
+ LLQLNYISYLKKENLYEEVMQMEEKKSTNINNNENYDDDDEEIQIIENIDQSSKDNNA
+ QLTSEPPCSRSSSISTTEREPTELHNSVVRQPTGEIITDNEDPVRAASPETASVSPPI
+ RHNILKSSSPFISESANETEQEEFTSPYFGRPSLKTRAKQFEGVSSPIKNDQALSSTH
+ DIMMPSPKPKRIYPSKKIPQLSSHVQSLSTDSMERQSSPNNTNIVISDTEQDSRLGVN
+ SESKNTSGIVNRDDSDVNLIGSSLPNVAEKNHDNTQESNGNNDSLGKILQNVDKELNE
+ KLVDIDDVAFSSPTRGIPRTSATSKGSNAQLLSNYGDENNQSQDSVWDEGRDNPILLE
+ DEDP"
+ gene complement(<296050..>300009)
+ /gene="SSM4"
+ /locus_tag="YIL030C"
+ /gene_synonym="DOA10; KIS3"
+ /db_xref="GeneID:854781"
+ mRNA complement(<296050..>300009)
+ /gene="SSM4"
+ /locus_tag="YIL030C"
+ /gene_synonym="DOA10; KIS3"
+ /product="E3 ubiquitin-protein ligase SSM4"
+ /transcript_id="NM_001179380.3"
+ /db_xref="GeneID:854781"
+ CDS complement(296050..300009)
+ /gene="SSM4"
+ /locus_tag="YIL030C"
+ /gene_synonym="DOA10; KIS3"
+ /EC_number="2.3.2.27"
+ /experiment="EXISTENCE:direct assay:GO:0000837 Doa10p
+ ubiquitin ligase complex [PMID:16873066]"
+ /experiment="EXISTENCE:direct assay:GO:0004842
+ ubiquitin-protein transferase activity [PMID:11641273]"
+ /experiment="EXISTENCE:direct assay:GO:0005635 nuclear
+ envelope [PMID:11641273]"
+ /experiment="EXISTENCE:direct assay:GO:0005637 nuclear
+ inner membrane [PMID:17051211]"
+ /experiment="EXISTENCE:direct assay:GO:0005783 endoplasmic
+ reticulum [PMID:26928762]"
+ /experiment="EXISTENCE:direct assay:GO:0005789 endoplasmic
+ reticulum membrane [PMID:11641273]"
+ /experiment="EXISTENCE:direct assay:GO:0030970 retrograde
+ protein transport, ER to cytosol [PMID:32588820]"
+ /experiment="EXISTENCE:direct assay:GO:0061630 ubiquitin
+ protein ligase activity [PMID:25918226]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0030970
+ retrograde protein transport, ER to cytosol
+ [PMID:32588820]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0036503 ERAD
+ pathway [PMID:11641273|PMID:15252059]"
+ /note="Membrane-embedded ubiquitin-protein ligase and
+ retrotranslocase; ER and inner nuclear membrane localized
+ RING-CH domain E3 ligase involved in ER-associated protein
+ degradation (ERAD); aids Cdc48p in the extraction of
+ faulty membrane proteins; targets misfolded
+ cytosolic/nucleoplasmic domains of soluble and membrane
+ embedded proteins (ERAD-C) and Sbh2p, a transmembrane
+ domain-containing substrate (ERAD-M); C-terminal element,
+ conserved in human ortholog MARCH6, determines substrate
+ selectivity"
+ /codon_start=1
+ /product="E3 ubiquitin-protein ligase SSM4"
+ /protein_id="NP_012234.3"
+ /db_xref="GeneID:854781"
+ /db_xref="SGD:S000001292"
+ /translation="MDVDSDVNVSRLRDELHKVANEETDTATFNDDAPSGATCRICRG
+ EATEDNPLFHPCKCRGSIKYMHESCLLEWVASKNIDISKPGADVKCDICHYPIQFKTI
+ YAENMPEKIPFSLLLSKSILTFFEKARLALTIGLAAVLYIIGVPLVWNMFGKLYTMML
+ DGSSPYPGDFLKSLIYGYDQSATPELTTRAIFYQLLQNHSFTSLQFIMIVILHIALYF
+ QYDMIVREDVFSKMVFHKIGPRLSPKDLKSRLKERFPMMDDRMVEYLAREMRAHDENR
+ QEQGHDRLNMPAAAADNNNNVINPRNDNVPPQDPNDHRNFENLRHVDELDHDEATEEH
+ ENNDSDNSLPSGDDSSRILPGSSSDNEEDEEAEGQQQQQQPEEEADYRDHIEPNPIDM
+ WANRRAQNEFDDLIAAQQNAINRPNAPVFIPPPAQNRAGNVDQDEQDFGAAVGVPPAQ
+ ANPDDQGQGPLVINLKLKLLNVIAYFIIAVVFTAIYLAISYLFPTFIGFGLLKIYFGI
+ FKVILRGLCHLYYLSGAHIAYNGLTKLVPKVDVAMSWISDHLIHDIIYLYNGYTENTM
+ KHSIFIRALPALTTYLTSVSIVCASSNLVSRGYGRENGMSNPTRRLIFQILFALKCTF
+ KVFTLFFIELAGFPILAGVMLDFSLFCPILASNSRMLWVPSICAIWPPFSLFVYWTIG
+ TLYMYWFAKYIGMIRKNIIRPGVLFFIRSPEDPNIKILHDSLIHPMSIQLSRLCLSMF
+ IYAIFIVLGFGFHTRIFFPFMLKSNLLSVPEAYKPTSIISWKFNTILLTLYFTKRILE
+ SSSYVKPLLERYWKTIFKLCSRKLRLSSFILGKDTPTERGHIVYRNLFYKYIAAKNAE
+ WSNQELFTKPKTLEQAEELFGQVRDVHAYFVPDGVLMRVPSSDIVSRNYVQTMFVPVT
+ KDDKLLKPLDLERIKERNKRAAGEFGYLDEQNTEYDQYYIVYVPPDFRLRYMTLLGLV
+ WLFASILMLGVTFISQALINFVCSFGFLPVVKLLLGERNKVYVAWKELSDISYSYLNI
+ YYVCVGSVCLSKIAKDILHFTEGQNTLDEHAVDENEVEEVEHDIPERDINNAPVNNIN
+ NVEEGQGIFMAIFNSIFDSMLVKYNLMVFIAIMIAVIRTMVSWVVLTDGILACYNYLT
+ IRVFGNSSYTIGNSKWFKYDESLLFVVWIISSMVNFGTGYKSLKLFFRNRNTSKLNFL
+ KTMALELFKQGFLHMVIYVLPIIILSLVFLRDVSTKQIIDISHGSRSFTLSLNESFPT
+ WTRMQDIYFGLLIALESFTFFFQATVLFIQWFKSTVQNVKDEVYTKGRALENLPDES"
+ gene complement(300228..300300)
+ /locus_tag="YNCI0007C"
+ /db_xref="GeneID:854782"
+ tRNA complement(300228..300300)
+ /locus_tag="YNCI0007C"
+ /product="tRNA-Lys"
+ /experiment="EXISTENCE:curator inference:GO:0006414
+ translational elongation [PMID:9023104]"
+ /experiment="EXISTENCE:direct assay:GO:0005739
+ mitochondrion [PMID:8915539]"
+ /experiment="EXISTENCE:direct assay:GO:0005829 cytosol
+ [PMID:8915539]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0030533 triplet
+ codon-amino acid adaptor activity [PMID:17560369]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0032543
+ mitochondrial translation [PMID:17560369]"
+ /note="Lysine tRNA (tRNA-Lys), predicted by tRNAscan-SE
+ analysis; a small portion is imported into mitochondria
+ via interaction with mt lysyl-tRNA synthetase Msk1p and is
+ necessary to decode AAG codons at high temperature, when
+ base modification of mt-encoded tRNA-Lys is reduced"
+ /db_xref="GeneID:854782"
+ /db_xref="SGD:S000006624"
+ repeat_region complement(300409..300727)
+ /note="Ty1 LTR"
+ /rpt_type=long_terminal_repeat
+ /db_xref="SGD:S000007013"
+ gene complement(<300829..>301257)
+ /gene="EMA17"
+ /locus_tag="YIL029C"
+ /db_xref="GeneID:854783"
+ mRNA complement(<300829..>301257)
+ /gene="EMA17"
+ /locus_tag="YIL029C"
+ /product="Ema17p"
+ /transcript_id="NM_001179379.3"
+ /db_xref="GeneID:854783"
+ CDS complement(300829..301257)
+ /gene="EMA17"
+ /locus_tag="YIL029C"
+ /note="Protein involved in targeting mitochondrial
+ membrane protein precursors to mitochondrial translocation
+ system; deletion confers sensitivity to
+ 4-(N-(S-glutathionylacetyl)amino) phenylarsenoxide (GSAO);
+ YIL029C has a paralog, YPR071W, that arose from a
+ single-locus duplication"
+ /codon_start=1
+ /product="Ema17p"
+ /protein_id="NP_012235.3"
+ /db_xref="GeneID:854783"
+ /db_xref="SGD:S000001291"
+ /translation="MRLIFIAKMLQYSFLPFSPFNLLNFDNSISVSWFITYSVIVSIW
+ GFAVWIEGAYRNKINLQLPRCTKIKCSRYNTRIKSPKWFNCKNWMHFFLLYLFLTASN
+ LIVQLAYFSKEMCSQGINVPGTKKPGNRVYLSVIILMGNG"
+ gene complement(<303679..>304104)
+ /gene="EMC5"
+ /locus_tag="YIL027C"
+ /gene_synonym="KRE27"
+ /db_xref="GeneID:854785"
+ mRNA complement(<303679..>304104)
+ /gene="EMC5"
+ /locus_tag="YIL027C"
+ /gene_synonym="KRE27"
+ /product="Emc5p"
+ /transcript_id="NM_001179377.1"
+ /db_xref="GeneID:854785"
+ CDS complement(303679..304104)
+ /gene="EMC5"
+ /locus_tag="YIL027C"
+ /gene_synonym="KRE27"
+ /experiment="EXISTENCE:direct assay:GO:0005783 endoplasmic
+ reticulum [PMID:14562095|PMID:26928762]"
+ /experiment="EXISTENCE:direct assay:GO:0072546 EMC complex
+ [PMID:19325107]"
+ /experiment="EXISTENCE:genetic interaction:GO:0034975
+ protein folding in endoplasmic reticulum [PMID:19325107]"
+ /note="Member of conserved ER transmembrane complex;
+ required for efficient folding of proteins in the ER; null
+ mutant displays induction of the unfolded protein
+ response, and also shows K1 killer toxin resistance;
+ homologous to worm B0334.15/EMC-5, fly CG15168, human
+ MMGT"
+ /codon_start=1
+ /product="Emc5p"
+ /protein_id="NP_012237.1"
+ /db_xref="GeneID:854785"
+ /db_xref="SGD:S000001289"
+ /translation="MSFVSKLLYTVSALVLFHSGFSSYEFHHLLKLNSLNNAQGAISK
+ LPKDIMYETYAGLILFVLAVFTSFEKLQYLPIESNDGKIISQGNYLKEIALNKATNVD
+ NLIGSNPNGEIIFTPSFVDVHMKRKICREWASNTVKKEK"
+ gene complement(<304477..>307929)
+ /gene="IRR1"
+ /locus_tag="YIL026C"
+ /gene_synonym="SCC3"
+ /db_xref="GeneID:854786"
+ mRNA complement(<304477..>307929)
+ /gene="IRR1"
+ /locus_tag="YIL026C"
+ /gene_synonym="SCC3"
+ /product="Irr1p"
+ /transcript_id="NM_001179376.1"
+ /db_xref="GeneID:854786"
+ CDS complement(304477..307929)
+ /gene="IRR1"
+ /locus_tag="YIL026C"
+ /gene_synonym="SCC3"
+ /experiment="EXISTENCE:direct assay:GO:0003682 chromatin
+ binding [PMID:9990856]"
+ /experiment="EXISTENCE:direct assay:GO:0005634 nucleus
+ [PMID:22932476|PMID:10028182|PMID:28077952]"
+ /experiment="EXISTENCE:direct assay:GO:0005737 cytoplasm
+ [PMID:28077952]"
+ /experiment="EXISTENCE:direct assay:GO:0005829 cytosol
+ [PMID:22932476]"
+ /experiment="EXISTENCE:direct assay:GO:0030892 mitotic
+ cohesin complex [PMID:9990856]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0007064 mitotic
+ sister chromatid cohesion [PMID:9990856]"
+ /note="Subunit of the cohesin complex; which is required
+ for sister chromatid cohesion during mitosis and meiosis
+ and interacts with centromeres and chromosome arms;
+ relocalizes to the cytosol in response to hypoxia;
+ essential for viability"
+ /codon_start=1
+ /product="Irr1p"
+ /protein_id="NP_012238.1"
+ /db_xref="GeneID:854786"
+ /db_xref="SGD:S000001288"
+ /translation="MTAVRRSTRIRTKSQVIEEDYDDEQNTSAQHVESDKITAKTQHE
+ EEEEQDTGESEESSSEDDYEDQDDDDYVDTATAKRKSRKRKPKSASNTSSKRQKKKPT
+ SAQKSAVSHAPAYHRSKKDQDQYLEIAKDFQPTELFDILSTSEDVSIEELLREWLETY
+ SENRDKFLQEFINLLLNCCGSVARVEDHDVHSNESSNETIGEIQLLFQRQKLHEFYLL
+ ISKENKKRKNFKMGPLYQNFAEFMTKLLEVANDLQLLYVESDEDDTQIVTGNLVLDLL
+ TWLSSFSVCKIRCFRYISTLTLYLFQDYLTQQAVNLEKNYLAKLSKQLSLEEKKKRPN
+ NKTLEKLESTIAETQGSKVVIDSIIDNIVKLCFVHRYKDVSDLIRSESMLHLSIWIKN
+ YPEYFLKVTFLKYFGWLLSDNSVSVRLQVTKILPHLIIQNHNSKSTDNSAIRQVFERF
+ KTKILEVAIRDVNLDVRIHSIQVLTEASSLGYLDDSEILIISSLMFDEEFDPFKTSSF
+ NKRSKFLSTVAKFLARVIKEKFDEFIKTHEDLPKEVDGLEVGPVVQVGIFIKILNDSL
+ IYHLKDCAEVDSRTKIRMLTQAAEFLSPYISTHLKTICNLLISDTESNELIQKLQNSA
+ NNNSDDEDVDDEELDITPLFPIDRNSTILYLNVFHGLCAGANNPKIQTKDSVKEIVLP
+ LFYDLLNAASIESADILCPLLESFITFSLDDWISIGYETELKKITDKTIKAFMDSTIG
+ NSKVDMKYDIFAKFIHHIHHFEKKELQEKFLNQIATLKIHLKKFLQEKMDPNNSRDDY
+ KDLTCSLYELYINKLTILGRDYPIEVDEELLQLFLNNFVSRIPIMFQDFDDSTAQEIN
+ FKMLVLLATWNLEKWREIIEKVRDYENSISKDLRSVWKPIAAIIGRLNTLVISLAATN
+ ETFENINSLFYLKWSACTSLMDIIVAIKIFELKLPADATTWRYSMSEQFPFYLHDNAS
+ KVLLKIFLYLESLFAKQVDVQLERVADEDANLNDLPETGFFENIETEFLLFTVKLKGL
+ MKLNILDERFASRVALNKEKLGPLFKKIVDDTIMENPEPNKKNIQKAKSNQTQREKAP
+ LQPNSERETDHANTENNDPDIPMTIDLEPIEESSQNNSELAPIEEHPTVVDAIDNSDE
+ ITQD"
+ gene complement(<308602..>309171)
+ /locus_tag="YIL024C"
+ /db_xref="GeneID:854788"
+ mRNA complement(<308602..>309171)
+ /locus_tag="YIL024C"
+ /product="uncharacterized protein"
+ /transcript_id="NM_001179374.3"
+ /db_xref="GeneID:854788"
+ CDS complement(308602..309171)
+ /locus_tag="YIL024C"
+ /note="hypothetical protein; non-essential gene;
+ expression directly regulated by the metabolic and meiotic
+ transcriptional regulator Ume6p"
+ /codon_start=1
+ /product="uncharacterized protein"
+ /protein_id="NP_012240.3"
+ /db_xref="GeneID:854788"
+ /db_xref="SGD:S000001286"
+ /translation="MSNFLLVIPEDVIKGCSKADKLVVTGEFDNWRHSDYVLQYDGST
+ QNYRVQIPRRKGQRSTMFKVVINDKKWVTLNYFDTVTDKSGYTNNILHFKDNEASQLM
+ DIPLSPHTRSNTAKGKPEDDSLNDYVNLSSHSDLSSTEEIVCWNSDMEDENMDATIQC
+ DFHQAFNSRKESLNGLMCIAKKVKTYWNK"
+ gene complement(<309386..>310426)
+ /gene="YKE4"
+ /locus_tag="YIL023C"
+ /db_xref="GeneID:854789"
+ mRNA complement(<309386..>310426)
+ /gene="YKE4"
+ /locus_tag="YIL023C"
+ /product="Zn(2+) transporter YKE4"
+ /transcript_id="NM_001179373.1"
+ /db_xref="GeneID:854789"
+ CDS complement(309386..310426)
+ /gene="YKE4"
+ /locus_tag="YIL023C"
+ /experiment="EXISTENCE:direct assay:GO:0005783 endoplasmic
+ reticulum [PMID:26928762|PMID:16760462]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0005385 zinc
+ ion transmembrane transporter activity [PMID:16760462]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0006829 zinc
+ ion transport [PMID:16760462]"
+ /note="Zinc transporter; localizes to the ER; null mutant
+ is sensitive to calcofluor white, leads to zinc
+ accumulation in cytosol; ortholog of the mouse KE4 and
+ member of the ZIP (ZRT, IRT-like Protein) family"
+ /codon_start=1
+ /product="Zn(2+) transporter YKE4"
+ /protein_id="NP_012241.1"
+ /db_xref="GeneID:854789"
+ /db_xref="SGD:S000001285"
+ /translation="MKASHICSYLLSIAPLVVSHGVHHNRDHGHEANHESKQSFLILK
+ QESIFYSLVCFLQNHLFVLGPRYNAIVAILIIQLMPCLFVLFVPGLRKNDRASLTLSL
+ LVSFSLGTLLGDILLHVIPESLSGVTDVTMVGGAIFLGFISFLTLDKTMRILSGTSND
+ DGSIHSHSHSHTPQQTAEKKAGFNMSAYLNVISGIAHHITDGIALATSFYSSTQVGIM
+ TSIAVTFHEIPHELGDFAILLSSGFTFPQAIRAQAVTAFGAVVGTSIGCWMNEIGNNS
+ HKATSSSANASELMLPFTAGGLIYIATTSVVPQILHSSAPDSKLREFKKWALQLVFIF
+ VGFAVMALMDEH"
+ gene <311165..>312460
+ /gene="TIM44"
+ /locus_tag="YIL022W"
+ /gene_synonym="ISP45; MIM44; MPI1"
+ /db_xref="GeneID:854790"
+ mRNA <311165..>312460
+ /gene="TIM44"
+ /locus_tag="YIL022W"
+ /gene_synonym="ISP45; MIM44; MPI1"
+ /product="protein translocase subunit TIM44"
+ /transcript_id="NM_001179372.1"
+ /db_xref="GeneID:854790"
+ CDS 311165..312460
+ /gene="TIM44"
+ /locus_tag="YIL022W"
+ /gene_synonym="ISP45; MIM44; MPI1"
+ /experiment="EXISTENCE:direct assay:GO:0001405 PAM
+ complex, Tim23 associated import motor
+ [PMID:14517234|PMID:14638855]"
+ /experiment="EXISTENCE:direct assay:GO:0005739
+ mitochondrion [PMID:16823961|PMID:24769239]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0030150 protein
+ import into mitochondrial matrix
+ [PMID:1396562|PMID:18400944]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0030674
+ protein-macromolecule adaptor activity
+ [PMID:10824101|PMID:11344168]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0051087
+ protein-folding chaperone binding [PMID:11344168]"
+ /experiment="EXISTENCE:physical interaction:GO:0051087
+ protein-folding chaperone binding [PMID:7809127]"
+ /note="Essential component of the TIM23 complex; tethers
+ the import motor and regulatory factors (PAM complex) to
+ the translocation channel (Tim23p-Tim17p core complex);
+ TIM23 complex is short for the translocase of the inner
+ mitochondrial membrane"
+ /codon_start=1
+ /product="protein translocase subunit TIM44"
+ /protein_id="NP_012242.1"
+ /db_xref="GeneID:854790"
+ /db_xref="SGD:S000001284"
+ /translation="MHRSTFIRTSGTSSRTLTARYRSQYTGLLVARVLFSTSTTRAQG
+ GNPRSPLQIFRDTFKKEWEKSQELQENIKTLQDASGKLGESEAYKKAREAYLKAQRGS
+ TIVGKTLKKTGETMEHIATKAWESELGKNTRKAAAATAKKLDESFEPVRQTKIYKEVS
+ EVIDDGESSRYGGFITKEQRRLKRERDLASGKRHRAVKSNEDAGTAVVATNIESKESF
+ GKKVEDFKEKTVVGRSIQSLKNKLWDESENPLIVVMRKITNKVGGFFAETESSRVYSQ
+ FKLMDPTFSNESFTRHLREYIVPEILEAYVKGDVKVLKKWFSEAPFNVYAAQQKIFKE
+ QDVYADGRILDIRGVEIVSAKLLAPQDIPVLVVGCRAQEINLYRKKKTGEIAAGDEAN
+ ILMSSYAMVFTRDPEQIDDDETEGWKILEFVRGGSRQFT"
+ gene <312905..>313861
+ /gene="RPB3"
+ /locus_tag="YIL021W"
+ /db_xref="GeneID:854791"
+ mRNA <312905..>313861
+ /gene="RPB3"
+ /locus_tag="YIL021W"
+ /product="DNA-directed RNA polymerase II core subunit
+ RPB3"
+ /transcript_id="NM_001179371.3"
+ /db_xref="GeneID:854791"
+ CDS 312905..313861
+ /gene="RPB3"
+ /locus_tag="YIL021W"
+ /experiment="EXISTENCE:direct assay:GO:0001055 RNA
+ polymerase II activity [PMID:8288647]"
+ /experiment="EXISTENCE:direct assay:GO:0003968
+ RNA-dependent RNA polymerase activity [PMID:18004386]"
+ /experiment="EXISTENCE:direct assay:GO:0005654 nucleoplasm
+ [PMID:15520468]"
+ /experiment="EXISTENCE:direct assay:GO:0005665 RNA
+ polymerase II, core complex
+ [PMID:2186966|PMID:1331084|PMID:2183013]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0006366
+ transcription by RNA polymerase II [PMID:2685562]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0006369
+ termination of RNA polymerase II transcription
+ [PMID:16537912]"
+ /note="RNA polymerase II third largest subunit B44; part
+ of central core; similar to prokaryotic alpha subunit"
+ /codon_start=1
+ /product="DNA-directed RNA polymerase II core subunit
+ RPB3"
+ /protein_id="NP_012243.3"
+ /db_xref="GeneID:854791"
+ /db_xref="SGD:S000001283"
+ /translation="MSEEGPQVKIREASKDNVDFILSNVDLAMANSLRRVMIAEIPTL
+ AIDSVEVETNTTVLADEFIAHRLGLIPLQSMDIEQLEYSRDCFCEDHCDKCSVVLTLQ
+ AFGESESTTNVYSKDLVIVSNLMGRNIGHPIIQDKEGNGVLICKLRKGQELKLTCVAK
+ KGIAKEHAKWGPAAAIEFEYDPWNKLKHTDYWYEQDSAKEWPQSKNCEYEDPPNEGDP
+ FDYKAQADTFYMNVESVGSIPVDQVVVRGIDTLQKKVASILLALTQMDQDKVNFASGD
+ NNTASNMLGSNEDVMMTGAEQDPYSNASQMGNTGSGGYDNAW"
+ rep_origin 313861..314035
+ /note="ARS918; Putative replication origin; identified in
+ multiple array studies, not yet confirmed by plasmid-based
+ assay"
+ /db_xref="SGD:S000130160"
+ gene complement(<314035..>314820)
+ /gene="HIS6"
+ /locus_tag="YIL020C"
+ /db_xref="GeneID:854792"
+ mRNA complement(<314035..>314820)
+ /gene="HIS6"
+ /locus_tag="YIL020C"
+ /product="1-(5-phosphoribosyl)-5-
+ ((5-phosphoribosylamino)methylideneamino)imidazole-4-
+ carboxamide isomerase HIS6"
+ /transcript_id="NM_001179370.3"
+ /db_xref="GeneID:854792"
+ CDS complement(314035..314820)
+ /gene="HIS6"
+ /locus_tag="YIL020C"
+ /EC_number="5.3.1.16"
+ /experiment="EXISTENCE:direct assay:GO:0005634 nucleus
+ [PMID:14562095]"
+ /experiment="EXISTENCE:direct assay:GO:0005737 cytoplasm
+ [PMID:14562095]"
+ /experiment="EXISTENCE:genetic interaction:GO:0000105
+ histidine biosynthetic process [PMID:9332345]"
+ /experiment="EXISTENCE:genetic interaction:GO:0003949
+ 1-(5-phosphoribosyl)-5-[(5-
+ phosphoribosylamino)methylideneamino]imidazole-4-
+ carboxamide isomerase activity [PMID:9332345]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0000105
+ histidine biosynthetic process
+ [PMID:14190241|PMID:9332345]"
+ /note="Enzyme that catalyzes the fourth step in the
+ histidine pathway;
+ Phosphoribosylformimino-5-aminoimidazole carboxamide
+ ribotide isomerase; mutations cause histidine auxotrophy
+ and sensitivity to Cu, Co, and Ni salts"
+ /codon_start=1
+ /product="1-(5-phosphoribosyl)-5-
+ ((5-phosphoribosylamino)methylideneamino)imidazole-4-
+ carboxamide isomerase HIS6"
+ /protein_id="NP_012244.3"
+ /db_xref="GeneID:854792"
+ /db_xref="SGD:S000001282"
+ /translation="MTKFIGCIDLHNGEVKQIVGGTLTSKKEDVPKTNFVSQHPSSYY
+ AKLYKDRDVQGCHVIKLGPNNDDAAREALQESPQFLQVGGGINDTNCLEWLKWASKVI
+ VTSWLFTKEGHFQLKRLERLTELCGKDRIVVDLSCRKTQDGRWIVAMNKWQTLTDLEL
+ NADTFRELRKYTNEFLIHAADVEGLCGGIDELLVSKLFEWTKDYDDLKIVYAGGAKSV
+ DDLKLVDELSHGKVDLTFGSSLDIFGGNLVKFEDCCRWNEKQG"
+ gene <315093..>316133
+ /gene="FAF1"
+ /locus_tag="YIL019W"
+ /db_xref="GeneID:854793"
+ mRNA <315093..>316133
+ /gene="FAF1"
+ /locus_tag="YIL019W"
+ /product="Faf1p"
+ /transcript_id="NM_001179369.1"
+ /db_xref="GeneID:854793"
+ CDS 315093..316133
+ /gene="FAF1"
+ /locus_tag="YIL019W"
+ /experiment="EXISTENCE:direct assay:GO:0005634 nucleus
+ [PMID:14562095]"
+ /experiment="EXISTENCE:direct assay:GO:0005730 nucleolus
+ [PMID:15078877|PMID:15178413]"
+ /experiment="EXISTENCE:direct assay:GO:0005737 cytoplasm
+ [PMID:14562095]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0000462
+ maturation of SSU-rRNA from tricistronic rRNA transcript
+ (SSU-rRNA, 5.8S rRNA, LSU-rRNA)
+ [PMID:15078877|PMID:15178413]"
+ /note="Protein required for pre-rRNA processing; also
+ required for 40S ribosomal subunit assembly"
+ /codon_start=1
+ /product="Faf1p"
+ /protein_id="NP_012245.1"
+ /db_xref="GeneID:854793"
+ /db_xref="SGD:S000001281"
+ /translation="MTLDDDDYIKQMELQRKAFESQFGSLESMGFEDKTKNIRTEVDT
+ RDSSGDEIDNSDHGSDFKDGTIESSNSSDEDSGNETAEENNQDSKPKTQPKVIRFNGP
+ SDVYVPPSKKTQKLLRSGKTLTQINKKLESTEAKEEKEDETLEAENLQNDLELQQFLR
+ ESHLLSAFNNGGSGSTNSGVSLTLQSMGGGNDDGIVYQDDQVIGKARSRTLEMRLNRL
+ SRVNGHQDKINKLEKVPMHIRRGMIDKHVKRIKKYEQEAAEGGIVLSKVKKGQFRKIE
+ STYKKDIERRIGGSIKARDKEKATKRERGLKISSVGRSTRNGLIVSKRDIARISGGER
+ SGKFNGKKKSRR"
+ gene <316768..>317932
+ /gene="RPL2B"
+ /locus_tag="YIL018W"
+ /gene_synonym="LOT2; RPL5A"
+ /db_xref="GeneID:854794"
+ mRNA join(<316768..316771,317172..>317932)
+ /gene="RPL2B"
+ /locus_tag="YIL018W"
+ /gene_synonym="LOT2; RPL5A"
+ /product="ribosomal 60S subunit protein L2B"
+ /transcript_id="NM_001179368.1"
+ /db_xref="GeneID:854794"
+ CDS join(316768..316771,317172..317932)
+ /gene="RPL2B"
+ /locus_tag="YIL018W"
+ /gene_synonym="LOT2; RPL5A"
+ /experiment="EXISTENCE:direct assay:GO:0002181 cytoplasmic
+ translation [PMID:18782943]"
+ /experiment="EXISTENCE:direct assay:GO:0003735 structural
+ constituent of ribosome [PMID:18782943]"
+ /experiment="EXISTENCE:direct assay:GO:0022625 cytosolic
+ large ribosomal subunit [PMID:18782943]"
+ /note="Ribosomal 60S subunit protein L2B; homologous to
+ mammalian ribosomal protein L2 and bacterial L2; RPL2B has
+ a paralog, RPL2A, that arose from the whole genome
+ duplication; expression is upregulated at low
+ temperatures"
+ /codon_start=1
+ /product="ribosomal 60S subunit protein L2B"
+ /protein_id="NP_012246.1"
+ /db_xref="GeneID:854794"
+ /db_xref="SGD:S000001280"
+ /translation="MGRVIRNQRKGAGSIFTSHTRLRQGAAKLRTLDYAERHGYIRGI
+ VKQIVHDSGRGAPLAKVVFRDPYKYRLREEIFIANEGVHTGQFIYAGKKASLNVGNVL
+ PLGSVPEGTIVSNVEEKPGDRGALARASGNYVIIIGHNPDENKTRVRLPSGAKKVISS
+ DARGVIGVIAGGGRVDKPLLKAGRAFHKYRLKRNSWPKTRGVAMNPVDHPHGGGNHQH
+ IGKASTISRGAVSGQKAGLIAARRTGLLRGSQKTQD"
+ gene complement(<318200..>320965)
+ /gene="VID28"
+ /locus_tag="YIL017C"
+ /gene_synonym="GID5; YIL017W"
+ /db_xref="GeneID:854795"
+ mRNA complement(<318200..>320965)
+ /gene="VID28"
+ /locus_tag="YIL017C"
+ /gene_synonym="GID5; YIL017W"
+ /product="glucose-induced degradation complex subunit
+ VID28"
+ /transcript_id="NM_001179367.3"
+ /db_xref="GeneID:854795"
+ CDS complement(318200..320965)
+ /gene="VID28"
+ /locus_tag="YIL017C"
+ /gene_synonym="GID5; YIL017W"
+ /experiment="EXISTENCE:direct assay:GO:0005634 nucleus
+ [PMID:14562095]"
+ /experiment="EXISTENCE:direct assay:GO:0005737 cytoplasm
+ [PMID:14562095]"
+ /experiment="EXISTENCE:direct assay:GO:0034657 GID complex
+ [PMID:16872538|PMID:18508925]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0007039 protein
+ catabolic process in the vacuole [PMID:15358789]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0030437
+ ascospore formation [PMID:12586695]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0043161
+ proteasome-mediated ubiquitin-dependent protein catabolic
+ process [PMID:12686616|PMID:15358789]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0045721
+ negative regulation of gluconeogenesis [PMID:12686616]"
+ /note="GID Complex subunit, serves as adaptor for
+ regulatory subunit Vid24p; protein involved in
+ proteasome-dependent catabolite degradation of
+ fructose-1,6-bisphosphatase (FBPase); localized to the
+ nucleus and the cytoplasm"
+ /codon_start=1
+ /product="glucose-induced degradation complex subunit
+ VID28"
+ /protein_id="NP_012247.3"
+ /db_xref="GeneID:854795"
+ /db_xref="SGD:S000001279"
+ /translation="MTVAYSLENLKKISNSLVGDQLAKVDYFLAPKCQIFQCLLSIEQ
+ SDGVELKNAKLDLLYTLLHLEPQQRDIVGTYYFDIVSAIYKSMSLASSFTKNNSSTNY
+ KYIKLLNLCAGVYPNCGFPDLQYLQNGFIQLVNHKFLRSKCKIDEVVTIIELLKLFLL
+ VDEKNCSDFNKSKFMEEEREVTETSHYQDFKMAESLEHIIVKISSKYLDQISLKYIVR
+ LKVSRPASPSSVKNDPFDNKGVDCTRAIPKKINISNMYDSSLLSLALLLYLRYHYMIP
+ GDRKLRNDATFKMFVLGLLKSNDVNIRCVALKFLLQPYFTEDKKWEDTRTLEKILPYL
+ VKSFNYDPLPWWFDPFDMLDSLIVLYNEITPMNNPVLTTLAHTNVIFCILSRFAQCLS
+ LPQHNEATLKTTTKFIKICASFAASDEKYRLLLLNDTLLLNHLEYGLESHITLIQDFI
+ SLKDEIKETTTESHSMCLPPIYDHDFVAAWLLLLKSFSRSVSALRTTLKRNKIAQLLL
+ QILSKTYTLTKECYFAGQDFMKPEIMIMGITLGSICNFVVEFSNLQSFMLRNGIIDII
+ EKMLTDPLFNSKKAWDDNEDERRIALQGIPVHEVKANSLWVLRHLMYNCQNEEKFQLL
+ AKIPMNLILDFINDPCWAVQAQCFQLLRNLTCNSRKIVNILLEKFKDVEYKIDPQTGN
+ KISIGSTYLFEFLAKKMRLLNPLDTQQKKAMEGILYIIVNLAAVNENKKQLVIEQDEI
+ LNIMSEILVETTTDSSSNGNDSNLKLACLWVLNNLLWNSSVSHYTQYAIENGLEPGHS
+ PSDSENPQSTVTIGYNESVAGGYSRGKYYDEPDGDDSSSNANDDEDDDNDEGDDEGDE
+ FVRTPAAKGSTSNVQVTRATVERCRKLVEVGLYDLVRKNITDESLSVREKARTLLYHM
+ DLLLKVK"
+ gene <321454..>321933
+ /gene="SNL1"
+ /locus_tag="YIL016W"
+ /db_xref="GeneID:854796"
+ mRNA <321454..>321933
+ /gene="SNL1"
+ /locus_tag="YIL016W"
+ /product="Snl1p"
+ /transcript_id="NM_001179366.1"
+ /db_xref="GeneID:854796"
+ CDS 321454..321933
+ /gene="SNL1"
+ /locus_tag="YIL016W"
+ /experiment="EXISTENCE:direct assay:GO:0005635 nuclear
+ envelope [PMID:9450961]"
+ /experiment="EXISTENCE:direct assay:GO:0005739
+ mitochondrion [PMID:14576278|PMID:16823961]"
+ /experiment="EXISTENCE:direct assay:GO:0005789 endoplasmic
+ reticulum membrane [PMID:9450961]"
+ /experiment="EXISTENCE:direct assay:GO:0043022 ribosome
+ binding [PMID:22635919]"
+ /experiment="EXISTENCE:genetic interaction:GO:0006999
+ nuclear pore organization [PMID:9450961]"
+ /experiment="EXISTENCE:physical interaction:GO:0006457
+ protein folding [PMID:12105220]"
+ /note="Ribosome-associated protein; proposed to act in
+ protein synthesis, nuclear pore complex biogenesis and
+ maintenance as well as protein folding and prion
+ maintenance; has similarity to the mammalian BAG-1
+ protein"
+ /codon_start=1
+ /product="Snl1p"
+ /protein_id="NP_012248.1"
+ /db_xref="GeneID:854796"
+ /db_xref="SGD:S000001278"
+ /translation="MSHNAMEHWKSKLSKTSTSTYVLLAVIAVVFLVTIRRPNGSKGK
+ SSKKRASKKNKKGKNQFEKAPVPLTLEEQIDNVSLRYGNELEGRSKDLINRFDVEDEK
+ DIYERNYCNEMLLKLLIELDSIDLINVDESLRRPLKEKRKGVIKEIQAMLKSLDSLK"
+ gene <322342..>324105
+ /gene="BAR1"
+ /locus_tag="YIL015W"
+ /gene_synonym="SST1"
+ /db_xref="GeneID:854797"
+ mRNA <322342..>324105
+ /gene="BAR1"
+ /locus_tag="YIL015W"
+ /gene_synonym="SST1"
+ /product="aspartyl protease BAR1"
+ /transcript_id="NM_001179365.1"
+ /db_xref="GeneID:854797"
+ CDS 322342..324105
+ /gene="BAR1"
+ /locus_tag="YIL015W"
+ /gene_synonym="SST1"
+ /EC_number="3.4.23.35"
+ /experiment="EXISTENCE:direct assay:GO:0000747 conjugation
+ with cellular fusion [PMID:6345506]"
+ /experiment="EXISTENCE:direct assay:GO:0004190
+ aspartic-type endopeptidase activity [PMID:9249020]"
+ /experiment="EXISTENCE:direct assay:GO:0005576
+ extracellular region [PMID:3124102]"
+ /experiment="EXISTENCE:direct assay:GO:0009277 fungal-type
+ cell wall [PMID:10438739]"
+ /experiment="EXISTENCE:direct assay:GO:0043171 peptide
+ catabolic process [PMID:9249020]"
+ /experiment="EXISTENCE:direct assay:GO:0071444 cellular
+ response to pheromone [PMID:6345506]"
+ /experiment="EXISTENCE:direct assay:GO:0071944 cell
+ periphery [PMID:26928762]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0000747
+ conjugation with cellular fusion [PMID:7050666]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0071444
+ cellular response to pheromone [PMID:7050666]"
+ /note="Aspartyl protease secreted to periplasmic space of
+ mating type a cell; helps cells monitor availability of
+ mating partners; cleaves and inactivates alpha factor
+ allowing cells to recover from alpha-factor-induced cell
+ cycle arrest"
+ /codon_start=1
+ /product="aspartyl protease BAR1"
+ /protein_id="NP_012249.1"
+ /db_xref="GeneID:854797"
+ /db_xref="SGD:S000001277"
+ /translation="MSAINHLCLKLILASFAIINTITALTNDGTGHLEFLLQHEEEMY
+ YATTLDIGTPSQSLTVLFDTGSADFWVMDSSNPFCLPNSNTSSYSNATYNGEEVKPSI
+ DCRSMSTYNEHRSSTYQYLENGRFYITYADGTFADGSWGTETVSINGIDIPNIQFGVA
+ KYATTPVSGVLGIGFPRRESVKGYEGAPNEYYPNFPQILKSEKIIDVVAYSLFLNSPD
+ SGTGSIVFGAIDESKFSGDLFTFPMVNEYPTIVDAPATLAMTIQGLGAQNKSSCEHET
+ FTTTKYPVLLDSGTSLLNAPKVIADKMASFVNASYSEEEGIYILDCPVSVGDVEYNFD
+ FGDLQISVPLSSLILSPETEGSYCGFAVQPTNDSMVLGDVFLSSAYVVFDLDNYKISL
+ AQANWNASEVSKKLVNIQTDGSISGAKIATAEPWSTNEPFTVTSDIYSSTGCKSRPFL
+ QSSTASSLIAETNVQSRNCSTKMPGTRSTTVLSKPTQNSAMHQSTGAVTQTSNETKLE
+ LSSTMANSGSVSLPTSNSIDKEFEHSKSQTTSDPSVAEHSTFNQTFVHETKYRPTHKT
+ VITETVTKYSTVLINVCKPTY"
+ gene complement(324303..324374)
+ /locus_tag="YNCI0008C"
+ /db_xref="GeneID:854798"
+ tRNA complement(324303..324374)
+ /locus_tag="YNCI0008C"
+ /product="tRNA-Asp"
+ /experiment="EXISTENCE:curator inference:GO:0005829
+ cytosol [PMID:9023104]"
+ /experiment="EXISTENCE:curator inference:GO:0006414
+ translational elongation [PMID:9023104]"
+ /note="Aspartate tRNA (tRNA-Asp), predicted by tRNAscan-SE
+ analysis"
+ /db_xref="GeneID:854798"
+ /db_xref="SGD:S000006534"
+ repeat_region 324391..324734
+ /note="Ty3 LTR"
+ /rpt_type=long_terminal_repeat
+ /db_xref="SGD:S000007019"
+ repeat_region 324831..325164
+ /note="Ty1 LTR"
+ /rpt_type=long_terminal_repeat
+ /db_xref="SGD:S000007016"
+ gene complement(<325212..>325526)
+ /locus_tag="YIL014C-A"
+ /gene_synonym="YIL015C-A"
+ /db_xref="GeneID:854799"
+ mRNA complement(<325212..>325526)
+ /locus_tag="YIL014C-A"
+ /gene_synonym="YIL015C-A"
+ /product="uncharacterized protein"
+ /transcript_id="NM_001181433.3"
+ /db_xref="GeneID:854799"
+ CDS complement(325212..325526)
+ /locus_tag="YIL014C-A"
+ /gene_synonym="YIL015C-A"
+ /note="hypothetical protein; null mutant shows improved
+ incorporation efficiency of noncanonical amino acids in
+ place of amber stop codon"
+ /codon_start=1
+ /product="uncharacterized protein"
+ /protein_id="NP_012250.3"
+ /db_xref="GeneID:854799"
+ /db_xref="SGD:S000003536"
+ /translation="MDIDMNYPSITTLMSNESANLLIIWGNATPDISYLSYTTNPMLG
+ DYVLNVSAINGCTEELIATHLVPTLENATQWVYDAGEYWDNYSFTDESTPLPGLSWPF
+ NE"
+ gene 325748..325820
+ /locus_tag="YNCI0009W"
+ /db_xref="GeneID:854800"
+ tRNA 325748..325820
+ /locus_tag="YNCI0009W"
+ /product="tRNA-Thr"
+ /experiment="EXISTENCE:curator inference:GO:0005829
+ cytosol [PMID:9023104]"
+ /experiment="EXISTENCE:curator inference:GO:0006414
+ translational elongation [PMID:9023104]"
+ /note="Threonine tRNA (tRNA-Thr), predicted by tRNAscan-SE
+ analysis"
+ /db_xref="GeneID:854800"
+ /db_xref="SGD:S000006742"
+ gene <326103..>327995
+ /gene="MNT3"
+ /locus_tag="YIL014W"
+ /db_xref="GeneID:854801"
+ mRNA <326103..>327995
+ /gene="MNT3"
+ /locus_tag="YIL014W"
+ /product="alpha-1,3-mannosyltransferase MNT3"
+ /transcript_id="NM_001179364.3"
+ /db_xref="GeneID:854801"
+ CDS 326103..327995
+ /gene="MNT3"
+ /locus_tag="YIL014W"
+ /experiment="EXISTENCE:direct assay:GO:0000324 fungal-type
+ vacuole [PMID:26928762]"
+ /experiment="EXISTENCE:direct assay:GO:0005794 Golgi
+ apparatus [PMID:30700649]"
+ /experiment="EXISTENCE:genetic interaction:GO:0000033
+ alpha-1,3-mannosyltransferase activity [PMID:10521541]"
+ /experiment="EXISTENCE:genetic interaction:GO:0006493
+ protein O-linked glycosylation [PMID:10521541]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0000033
+ alpha-1,3-mannosyltransferase activity [PMID:10521541]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0006493 protein
+ O-linked glycosylation [PMID:10521541]"
+ /note="Alpha-1,3-mannosyltransferase; adds the fourth and
+ fifth alpha-1,3-linked mannose residues to O-linked
+ glycans during protein O-glycosylation"
+ /codon_start=1
+ /product="alpha-1,3-mannosyltransferase MNT3"
+ /protein_id="NP_012251.3"
+ /db_xref="GeneID:854801"
+ /db_xref="SGD:S000001276"
+ /translation="MLKSLKSRRLILKRLVTLLLSLFFSYLIFSASRNVTSSNKLNNH
+ ASERTAVESSAFNWIEKRQHQVRSENLMNRLSAYFLPFLSRSSHKERVLLRQLGNNEI
+ AKSDKCRYIFEVLYKIDPDWDNAQTAKFYNVDGVDNTLASLLGERLRSYDYCFLSGQL
+ DPTAIFANSTVNPHDLQNRMFPFLKKINEESKTVMWPIITDMTTGEAVPAPEVDMESS
+ NFNGNFWSNWNRLSKGRGFVLTIAEKDVPLFLKQLKVMEFSKNELPFQIVSTGNELSA
+ ESIAKISETAKETEQRVYLVDCSTVLDTNFANTYISFFQNKWVATLFNTFEEYILLDA
+ DVVPFVGSDYFFDSPSYRESGILLFKDRVMENEQTFQYCIEMLNEVEPSAQERRFIGS
+ RLVFDSSLPFSSETSEEASVYYNFFKKLRLHHVDSGLVVVNKLEKLNGLLMSFMLNLD
+ GKLQRCVYGDKEIFWLGQLYAGQDYSINPVDGSIIGPVNEEPENDDGHKSGMYYICST
+ QIAHSDSKNRLLWVNGGLKTCKISNSAEDDFGREPEYFKSRYGDISKLKRIYDASLNV
+ EGLIVPDVSVHPWMQIKECSNYMYCAYATGDGHTNSELDEGRLITFTEKELRYINDIS
+ RTWNAN"
+ gene complement(<328207..>332442)
+ /gene="PDR11"
+ /locus_tag="YIL013C"
+ /db_xref="GeneID:854802"
+ mRNA complement(<328207..>332442)
+ /gene="PDR11"
+ /locus_tag="YIL013C"
+ /product="ATP-binding cassette multidrug transporter
+ PDR11"
+ /transcript_id="NM_001179363.1"
+ /db_xref="GeneID:854802"
+ CDS complement(328207..332442)
+ /gene="PDR11"
+ /locus_tag="YIL013C"
+ /experiment="EXISTENCE:direct assay:GO:0005886 plasma
+ membrane [PMID:33375075]"
+ /experiment="EXISTENCE:direct assay:GO:0042626
+ ATPase-coupled transmembrane transporter activity
+ [PMID:28922409]"
+ /experiment="EXISTENCE:direct assay:GO:0071944 cell
+ periphery [PMID:26928762]"
+ /experiment="EXISTENCE:genetic interaction:GO:0035376
+ sterol import [PMID:12077145|PMID:21110944]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0035376 sterol
+ import [PMID:12077145]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0042626
+ ATPase-coupled transmembrane transporter activity
+ [PMID:28922409]"
+ /note="ATP-binding cassette (ABC) transporter; multidrug
+ transporter involved in multiple drug resistance; mediates
+ sterol uptake when sterol biosynthesis is compromised;
+ regulated by Pdr1p; required for anaerobic growth; PDR11
+ has a paralog, AUS1, that arose from the whole genome
+ duplication"
+ /codon_start=1
+ /product="ATP-binding cassette multidrug transporter
+ PDR11"
+ /protein_id="NP_012252.1"
+ /db_xref="GeneID:854802"
+ /db_xref="SGD:S000001275"
+ /translation="MSLSKYFNPIPDASVTFDGATVQLEESLGAVQNDEESASEFKNV
+ GHLEISDITFRANEGEVVLVLGNPTSALFKGLFHGHKHLKYSPEGSIRFKDNEYKQFA
+ SKCPHQIIYNNEQDIHFPYLTVEQTIDFALSCKFHIPKQERIEMRDELLKEFGLSHVK
+ KTYVGNDYVRGVSGGERKRISIIETFIANGSVYLWDNSTKGLDSATALEFLSITQKMA
+ KATRSVNFVKISQASDKIVSKFDKILMLGDSFQVFYGTMEECLTHFHDTLQIKKNPND
+ CIIEYLTSILNFKFKETSNSIVGLDTPSVVSEENQALNINNETDLHTLWIQSPYYKHW
+ KAITSKTVQECTRKDVNPDDISPIFSIPLKTQLKTCTVRAFERIIGDRNYLISQFVSV
+ VVQSLVIGSLFYNIPLTTIGSFSRGSLTFFSILFFTFLSLADMPASFQRQPVVRKHVQ
+ LHFYYNWVETLATNFFDCCSKFILVVIFTIILYFLAHLQYNAARFFIFLLFLSVYNFC
+ MVSLFALTALIAPTLSMANLLAGILLLAIAMYASYVIYMKDMHPWFIWIAYLNPAMFA
+ MEAILSNELFNLKLDCHESIIPRGEYYDNISFSHKACAWQGATLGNDYVRGRDYLKSG
+ LKYTYHHVWRNFGIIIGFLCFFLFCSLLAAEYITPLFTRENLLRWNNYLKRYCPFLNS
+ QKKNNKSAITNNDGVCTPKTPIANFSTSSSSVPSVSHQYDTDYNIKHPDETVNNHTKE
+ SVAMETQKHVISWKNINYTIGDKKLINDASGYISSGLTALMGESGAGKTTLLNVLSQR
+ TESGVVTGELLIDGQPLTNIDAFRRSIGFVQQQDVHLELLTVRESLEISCVLRGDGDR
+ DYLGVVSNLLRLPSEKLVADLSPTQRKLLSIGVELVTKPSLLLFLDEPTSGLDAEAAL
+ TIVQFLKKLSMQGQAILCTIHQPSKSVISYFDNIYLLKRGGECVYFGSLPNACDYFVA
+ HDRRLTFDREMDNPADFVIDVVGSGSTNIPMDDAEKPTSSKIDEPVSYHKQSDSINWA
+ ELWQSSPEKVRVADDLLLLEEEARKSGVDFTTSVWSPPSYMEQIKLITKRQYICTKRD
+ MTYVFAKYALNAGAGLFIGFSFWRTKHNINGLQDAIFLCFMMLCVSSPLINQVQDKAL
+ QSKEVYIAREARSNTYHWTVLLIAQTIVELPLAISSSTLFFLCCYFCCGFETSARVAG
+ VFYLNYILFSMYYLSFGLWLLYSAPDLQTAAVFVAFLYSFTASFCGVMQPYSLFPRFW
+ TFMYRVSPYTYFIETFVSLLLHDREVNCSTSEMVPSQPVMGQTCGQFMKPFIDEFGGK
+ LHINNTYTVCAYCMYTVGDDFLAQENMSYHHRWRNFGFEWVFVCFNIAAMFVGFYLTY
+ IKKIWPSVIDGIKKCIPSMRRSKTSHNPNEQSV"
+ gene <333011..>333352
+ /locus_tag="YIL012W"
+ /db_xref="GeneID:854803"
+ mRNA <333011..>333352
+ /locus_tag="YIL012W"
+ /product="uncharacterized protein"
+ /transcript_id="NM_001348845.1"
+ /db_xref="GeneID:854803"
+ CDS 333011..333352
+ /locus_tag="YIL012W"
+ /note="hypothetical protein; conserved across S.
+ cerevisiae strains"
+ /codon_start=1
+ /product="uncharacterized protein"
+ /protein_id="NP_001335785.1"
+ /db_xref="GeneID:854803"
+ /db_xref="SGD:S000001274"
+ /translation="MCLGKLYFEQLILVRCIKGRQSGNITTGESRCVSWNVYCTTSMI
+ GLFSWRKMLLFQHFSYTQRENRQIGGKTWSLISSLFHLNETLASALHHPYETLALYEA
+ YFALREKKFLL"
+ gene <333727..>334536
+ /gene="TIR3"
+ /locus_tag="YIL011W"
+ /gene_synonym="YIB1"
+ /db_xref="GeneID:854804"
+ mRNA <333727..>334536
+ /gene="TIR3"
+ /locus_tag="YIL011W"
+ /gene_synonym="YIB1"
+ /product="Tir3p"
+ /transcript_id="NM_001179361.1"
+ /db_xref="GeneID:854804"
+ CDS 333727..334536
+ /gene="TIR3"
+ /locus_tag="YIL011W"
+ /gene_synonym="YIB1"
+ /experiment="EXISTENCE:direct assay:GO:0000324 fungal-type
+ vacuole [PMID:26928762]"
+ /experiment="EXISTENCE:direct assay:GO:0009277 fungal-type
+ cell wall [PMID:10383953]"
+ /experiment="EXISTENCE:direct assay:GO:0071944 cell
+ periphery [PMID:26928762]"
+ /note="Cell wall mannoprotein; member of Srp1p/Tip1p
+ family of serine-alanine-rich proteins; expressed under
+ anaerobic conditions and required for anaerobic growth;
+ TIR3 has a paralog, TIR2, that arose from the whole genome
+ duplication"
+ /codon_start=1
+ /product="Tir3p"
+ /protein_id="NP_012254.1"
+ /db_xref="GeneID:854804"
+ /db_xref="SGD:S000001273"
+ /translation="MSFTKIAALLAVAAASTQLVSAEVGQYEIVEFDAILADVKANLE
+ QYMSLAMNNPDFTLPSGVLDVYQHMTTATDDSYTSYFTEMDFAQITTAMVQVPWYSSR
+ LEPEIIAALQSAGISITSLGQTVSESGSESATASSDASSASESSSAASSSASESSSAA
+ SSSASESSSAASSSASESSSAASSSASEAAKSSSSAKSSGSSAASSAASSASSKASSA
+ ASSSAKASSSAEKSTNSSSSATSKNAGAAMDMGFFSAGVGAAIAGAAAMLL"
+ gene <334882..>335529
+ /gene="DOT5"
+ /locus_tag="YIL010W"
+ /db_xref="GeneID:854805"
+ mRNA <334882..>335529
+ /gene="DOT5"
+ /locus_tag="YIL010W"
+ /product="thioredoxin peroxidase DOT5"
+ /transcript_id="NM_001179360.3"
+ /db_xref="GeneID:854805"
+ CDS 334882..335529
+ /gene="DOT5"
+ /locus_tag="YIL010W"
+ /EC_number="1.11.1.24"
+ /experiment="EXISTENCE:direct assay:GO:0005634 nucleus
+ [PMID:2408019|PMID:10681558]"
+ /experiment="EXISTENCE:direct assay:GO:0008379 thioredoxin
+ peroxidase activity [PMID:10681558]"
+ /experiment="EXISTENCE:direct assay:GO:0045454 cell redox
+ homeostasis [PMID:10681558]"
+ /experiment="EXISTENCE:genetic interaction:GO:0034599
+ cellular response to oxidative stress [PMID:15051715]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0008379
+ thioredoxin peroxidase activity [PMID:10681558]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0045454 cell
+ redox homeostasis [PMID:10681558]"
+ /note="Nuclear thiol peroxidase; functions as an
+ alkyl-hydroperoxide reductase during post-diauxic growth"
+ /codon_start=1
+ /product="thioredoxin peroxidase DOT5"
+ /protein_id="NP_012255.3"
+ /db_xref="GeneID:854805"
+ /db_xref="SGD:S000001272"
+ /translation="MGEALRRSTRIAISKRMLEEEESKLAPISTPEVPKKKIKTGPKH
+ NANQAVVQEANRSSDVNELEIGDPIPDLSLLNEDNDSISLKKITENNRVVVFFVYPRA
+ STPGCTRQACGFRDNYQELKKYAAVFGLSADSVTSQKKFQSKQNLPYHLLSDPKREFI
+ GLLGAKKTPLSGSIRSHFIFVDGKLKFKRVKISPEVSVNDAKKEVLEVAEKFKEE"
+ gene complement(<335666..>336212)
+ /gene="EST3"
+ /locus_tag="YIL009C-A"
+ /db_xref="GeneID:854806"
+ mRNA complement(<335666..>336212)
+ /gene="EST3"
+ /locus_tag="YIL009C-A"
+ /product="telomerase subunit EST3"
+ /transcript_id="NM_001184306.2"
+ /db_xref="GeneID:854806"
+ CDS complement(join(335666..335935,335937..336212))
+ /gene="EST3"
+ /locus_tag="YIL009C-A"
+ /experiment="EXISTENCE:direct assay:GO:0003924 GTPase
+ activity [PMID:20884318]"
+ /experiment="EXISTENCE:direct assay:GO:0005634 nucleus
+ [PMID:12101098]"
+ /experiment="EXISTENCE:direct assay:GO:0005697 telomerase
+ holoenzyme complex [PMID:10898986]"
+ /experiment="EXISTENCE:direct assay:GO:0005829 cytosol
+ [PMID:26928762]"
+ /experiment="EXISTENCE:direct assay:GO:0007004 telomere
+ maintenance via telomerase [PMID:10898986]"
+ /experiment="EXISTENCE:direct assay:GO:0033677 DNA/RNA
+ helicase activity [PMID:16884717]"
+ /experiment="EXISTENCE:direct assay:GO:0042162 telomeric
+ DNA binding [PMID:16884717]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0003924 GTPase
+ activity [PMID:20884318]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0007004
+ telomere maintenance via telomerase [PMID:8978029]"
+ /ribosomal_slippage
+ /note="Component of the telomerase holoenzyme; involved in
+ telomere replication; synthesis of the full-length protein
+ results from a programmed +1 ribosomal frameshift"
+ /codon_start=1
+ /product="telomerase subunit EST3"
+ /protein_id="NP_012256.1"
+ /db_xref="GeneID:854806"
+ /db_xref="SGD:S000006432"
+ /translation="MPKVILESHSKPTDSVFLQPWIKALIEDNSEHDQYHPSGHVIPS
+ LTKQDLALPHMSPTILTNPCHFAKITKFYNVCDYKVYASIRDSSHQILVEFSQECVSN
+ FERTHNCRITSETTNCLMIIGDADLVYVTNSRAMSHFKICLSNISSKEIVPVLNVNQA
+ TIFDIDQVGSLSTFPFVYKYL"
+ gene complement(336349..336420)
+ /locus_tag="YNCI0010C"
+ /db_xref="GeneID:854807"
+ tRNA complement(336349..336420)
+ /locus_tag="YNCI0010C"
+ /product="tRNA-Asp"
+ /experiment="EXISTENCE:curator inference:GO:0005829
+ cytosol [PMID:9023104]"
+ /experiment="EXISTENCE:curator inference:GO:0006414
+ translational elongation [PMID:9023104]"
+ /note="Aspartate tRNA (tRNA-Asp), predicted by tRNAscan-SE
+ analysis"
+ /db_xref="GeneID:854807"
+ /db_xref="SGD:S000006535"
+ repeat_region complement(336708..336897)
+ /note="Ty1 LTR"
+ /rpt_type=long_terminal_repeat
+ /db_xref="SGD:S000007014"
+ gene <339344..>341428
+ /gene="FAA3"
+ /locus_tag="YIL009W"
+ /db_xref="GeneID:854808"
+ mRNA <339344..>341428
+ /gene="FAA3"
+ /locus_tag="YIL009W"
+ /product="long-chain fatty acid-CoA ligase FAA3"
+ /transcript_id="NM_001179359.1"
+ /db_xref="GeneID:854808"
+ CDS 339344..341428
+ /gene="FAA3"
+ /locus_tag="YIL009W"
+ /EC_number="6.2.1.3"
+ /experiment="EXISTENCE:direct assay:GO:0004467 long-chain
+ fatty acid-CoA ligase activity [PMID:8206942]"
+ /experiment="EXISTENCE:direct assay:GO:0031957 very
+ long-chain fatty acid-CoA ligase activity [PMID:8206942]"
+ /experiment="EXISTENCE:genetic interaction:GO:0001676
+ long-chain fatty acid metabolic process [PMID:7962057]"
+ /note="Long chain fatty acyl-CoA synthetase; activates
+ imported fatty acids with a preference for C16:0-C18:0
+ chain lengths; green fluorescent protein (GFP)-fusion
+ protein localizes to the cell periphery"
+ /codon_start=1
+ /product="long-chain fatty acid-CoA ligase FAA3"
+ /protein_id="NP_012257.1"
+ /db_xref="GeneID:854808"
+ /db_xref="SGD:S000001271"
+ /translation="MSEQHSVAVGKAANEHETAPRRNVRVKKRPLIRPLNSSASTLYE
+ FALECFNKGGKRDGMAWRDVIEIHETKKTIVRKVDGKDKSIEKTWLYYEMSPYKMMTY
+ QELIWVMHDMGRGLAKIGIKPNGEHKFHIFASTSHKWMKIFLGCISQGIPVVTAYDTL
+ GESGLIHSMVETESAAIFTDNQLLAKMIVPLQSAKDIKFLIHNEPIDPNDRRQNGKLY
+ KAAKDAINKIREVRPDIKIYSFEEVVKIGKKSKDEVKLHPPEPKDLACIMYTSGSISA
+ PKGVVLTHYNIVSGIAGVGHNVFGWIGSTDRVLSFLPLAHIFELVFEFEAFYWNGILG
+ YGSVKTLTNTSTRNCKGDLVEFKPTIMIGVAAVWETVRKAILEKISDLTPVLQKIFWS
+ AYSMKEKSVPCTGFLSRMVFKKVRQATGGHLKYIMNGGSAISIDAQKFFSIVLCPMII
+ GYGLTETVANACVLEPDHFEYGIVGDLVGSVTAKLVDVKDLGYYAKNNQGELLLKGAP
+ VCSEYYKNPIETAVSFTYDGWFRTGDIVEWTPKGQLKIIDRRKNLVKTLNGEYIALEK
+ LESVYRSNSYVKNICVYADESRVKPVGIVVPNPGPLSKFAVKLRIMKKGEDIENYIHD
+ KALRNAVFKEMIATAKSQGLVGIELLCGIVFFDEEWTPENGFVTSAQKLKRREILAAV
+ KSEVERVYKENS"
+ rep_origin 341976..342045
+ /note="ARS919; Autonomously Replicating Sequence"
+ /db_xref="SGD:S000118398"
+ gene <342536..>342835
+ /gene="URM1"
+ /locus_tag="YIL008W"
+ /db_xref="GeneID:854809"
+ mRNA <342536..>342835
+ /gene="URM1"
+ /locus_tag="YIL008W"
+ /product="ubiquitin-related modifier URM1"
+ /transcript_id="NM_001179358.3"
+ /db_xref="GeneID:854809"
+ CDS 342536..342835
+ /gene="URM1"
+ /locus_tag="YIL008W"
+ /experiment="EXISTENCE:direct assay:GO:0005634 nucleus
+ [PMID:14562095]"
+ /experiment="EXISTENCE:direct assay:GO:0005737 cytoplasm
+ [PMID:14562095|PMID:14551258]"
+ /experiment="EXISTENCE:direct assay:GO:0031386 protein tag
+ activity [PMID:14551258|PMID:10713047]"
+ /experiment="EXISTENCE:direct assay:GO:0032447 protein
+ urmylation [PMID:14555475|PMID:10713047]"
+ /experiment="EXISTENCE:direct assay:GO:0034599 cellular
+ response to oxidative stress [PMID:21209336]"
+ /experiment="EXISTENCE:genetic interaction:GO:0001403
+ invasive growth in response to glucose limitation
+ [PMID:14551258]"
+ /experiment="EXISTENCE:genetic interaction:GO:0007114 cell
+ budding [PMID:14551258]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0001403
+ invasive growth in response to glucose limitation
+ [PMID:14551258]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0002098 tRNA
+ wobble uridine modification [PMID:18755837]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0002143 tRNA
+ wobble position uridine thiolation
+ [PMID:19145231|PMID:19151091|PMID:18664566]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0031386 protein
+ tag activity [PMID:14551258]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0032447 protein
+ urmylation [PMID:14555475]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0034599
+ cellular response to oxidative stress [PMID:14555475]"
+ /note="Ubiquitin-like protein involved in thiolation of
+ cytoplasmic tRNAs; receives sulfur from the E1-like enzyme
+ Uba4p and transfers it to tRNA; also functions as a
+ protein tag with roles in nutrient sensing and oxidative
+ stress response"
+ /codon_start=1
+ /product="ubiquitin-related modifier URM1"
+ /protein_id="NP_012258.3"
+ /db_xref="GeneID:854809"
+ /db_xref="SGD:S000001270"
+ /translation="MVNVKVEFLGGLDAIFGKQRVHKIKMDKEDPVTVGDLIDHIVST
+ MINNPNDVSIFIEDDSIRPGIITLINDTDWELEGEKDYILEDGDIISFTSTLHGG"
+ gene complement(<342994..>343656)
+ /gene="NAS2"
+ /locus_tag="YIL007C"
+ /db_xref="GeneID:854810"
+ mRNA complement(<342994..>343656)
+ /gene="NAS2"
+ /locus_tag="YIL007C"
+ /product="Nas2p"
+ /transcript_id="NM_001179357.3"
+ /db_xref="GeneID:854810"
+ CDS complement(342994..343656)
+ /gene="NAS2"
+ /locus_tag="YIL007C"
+ /experiment="EXISTENCE:direct assay:GO:0005634 nucleus
+ [PMID:19446323]"
+ /experiment="EXISTENCE:direct assay:GO:0005737 cytoplasm
+ [PMID:11489916]"
+ /experiment="EXISTENCE:direct assay:GO:0005829 cytosol
+ [PMID:19446323]"
+ /experiment="EXISTENCE:direct assay:GO:0070682 proteasome
+ regulatory particle assembly [PMID:19446322]"
+ /experiment="EXISTENCE:genetic interaction:GO:0070682
+ proteasome regulatory particle assembly
+ [PMID:19446322|PMID:19446323]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0070682
+ proteasome regulatory particle assembly
+ [PMID:19446323|PMID:19446322]"
+ /note="Evolutionarily conserved 19S regulatory particle
+ assembly-chaperone; involved in assembly of the base
+ subcomplex of the 19S proteasomal regulatory particle
+ (RP); non-essential gene; interacts with Rpn4p; protein
+ abundance increases in response to DNA replication stress;
+ ortholog of human p27"
+ /codon_start=1
+ /product="Nas2p"
+ /protein_id="NP_012259.3"
+ /db_xref="GeneID:854810"
+ /db_xref="SGD:S000001269"
+ /translation="MEEEELSKLLANVKIDPSLTSRISQIDSFKLSELMVLKTDIETQ
+ LEAYFSVLEQQGIGMDSALVTPDGYPRSDVDVLQVTMIRKNVNMLKNDLNHLLQRSHV
+ LLNQHFDNMNVKSNQDARRNNDDQAIQYTIPFAFISEVVPGSPSDKADIKVDDKLISI
+ GNVHAANHSKLQNIQMVVMKNEDRPLPVLLLREGQILKTSLTPSRNWNGRGLLGCRIQ
+ EL"
+ gene <344062..>345183
+ /gene="YIA6"
+ /locus_tag="YIL006W"
+ /gene_synonym="NDT1"
+ /db_xref="GeneID:854811"
+ mRNA <344062..>345183
+ /gene="YIA6"
+ /locus_tag="YIL006W"
+ /gene_synonym="NDT1"
+ /product="NAD+ transporter"
+ /transcript_id="NM_001179356.1"
+ /db_xref="GeneID:854811"
+ CDS 344062..345183
+ /gene="YIA6"
+ /locus_tag="YIL006W"
+ /gene_synonym="NDT1"
+ /experiment="EXISTENCE:direct assay:GO:0005739
+ mitochondrion [PMID:24769239|PMID:16291748]"
+ /experiment="EXISTENCE:direct assay:GO:0035352 NAD
+ transmembrane transport [PMID:16291748]"
+ /experiment="EXISTENCE:direct assay:GO:0051724 NAD
+ transmembrane transporter activity [PMID:16291748]"
+ /experiment="EXISTENCE:genetic interaction:GO:0035352 NAD
+ transmembrane transport [PMID:16291748]"
+ /experiment="EXISTENCE:genetic interaction:GO:0051724 NAD
+ transmembrane transporter activity [PMID:16291748]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0006850
+ mitochondrial pyruvate transmembrane transport
+ [PMID:12887330]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0035352 NAD
+ transmembrane transport [PMID:16291748]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0051724 NAD
+ transmembrane transporter activity [PMID:16291748]"
+ /note="Mitochondrial NAD+ transporter; involved in the
+ transport of NAD+ into the mitochondria (see also YEA6);
+ member of the mitochondrial carrier subfamily; disputed
+ role as a pyruvate transporter; has putative mouse and
+ human orthologs; YIA6 has a paralog, YEA6, that arose from
+ the whole genome duplication; human NAD+ transporter
+ MCART1/SLC25A51 functionally complements the yia6 yea6
+ double null mutant, and yeast YIA6 reciprocally
+ complements defects in MCART1/SLC25A51 null cells"
+ /codon_start=1
+ /product="NAD+ transporter"
+ /protein_id="NP_012260.1"
+ /db_xref="GeneID:854811"
+ /db_xref="SGD:S000001268"
+ /translation="MTQTDNPVPNCGLLPEQQYCSADHEEPLLLHEEQLIFPDHSSQL
+ SSADIIEPIKMNSSTESIIGTTLRKKWVPLSSTQITALSGAFAGFLSGVAVCPLDVAK
+ TRLQAQGLQTRFENPYYRGIMGTLSTIVRDEGPRGLYKGLVPIVLGYFPTWMIYFSVY
+ EFSKKFFHGIFPQFDFVAQSCAAITAGAASTTLTNPIWVVKTRLMLQSNLGEHPTHYK
+ GTFDAFRKLFYQEGFKALYAGLVPSLLGLFHVAIHFPIYEDLKVRFHCYSRENNTNSI
+ NLQRLIMASSVSKMIASAVTYPHEILRTRMQLKSDIPDSIQRRLFPLIKATYAQEGLK
+ GFYSGFTTNLVRTIPASAITLVSFEYFRNRLENISTMVI"
+ gene <345692..>347797
+ /gene="EPS1"
+ /locus_tag="YIL005W"
+ /db_xref="GeneID:854812"
+ mRNA <345692..>347797
+ /gene="EPS1"
+ /locus_tag="YIL005W"
+ /product="protein disulfide isomerase EPS1"
+ /transcript_id="NM_001179355.1"
+ /db_xref="GeneID:854812"
+ CDS 345692..347797
+ /gene="EPS1"
+ /locus_tag="YIL005W"
+ /EC_number="5.3.4.1"
+ /experiment="EXISTENCE:direct assay:GO:0005789 endoplasmic
+ reticulum membrane [PMID:10545109]"
+ /experiment="EXISTENCE:direct assay:GO:0019153
+ protein-disulfide reductase (glutathione) activity
+ [PMID:16002399]"
+ /experiment="EXISTENCE:direct assay:GO:0051082 unfolded
+ protein binding [PMID:16002399]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0003756 protein
+ disulfide isomerase activity [PMID:11157982]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0006621 protein
+ retention in ER lumen [PMID:10545109]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0036503 ERAD
+ pathway [PMID:12881414]"
+ /experiment="EXISTENCE:physical interaction:GO:0005515
+ protein binding [PMID:16002399]"
+ /note="ER protein with chaperone and co-chaperone
+ activity; involved in retention of resident ER proteins;
+ has a role in recognizing proteins targeted for
+ ER-associated degradation (ERAD), member of the protein
+ disulfide isomerase family"
+ /codon_start=1
+ /product="protein disulfide isomerase EPS1"
+ /protein_id="NP_012261.1"
+ /db_xref="GeneID:854812"
+ /db_xref="SGD:S000001267"
+ /translation="MKMNLKRLVVTFFSCITFLLKFTIAAAEPPEGFPEPLNPTNFKE
+ ELSKGLHIIDFYSPYCPHCKHLAPVWMETWEEFKEESKTLNITFSQVNCIESADLCGD
+ ENIEYFPEIRLYNPSGYIKSFTETPRTKESLIAFARRESMDPNNLDTDLDSAKSESQY
+ LEGFDFLELIAGKATRPHLVSFWPTKDMKNSDDSLEFKNCDKCHEFQRTWKIISRQLA
+ VDDINTGHVNCESNPTICEELGFGDLVKITNHRADREPKVALVLPNKTSNNLFDYPNG
+ YSAKSDGYVDFARRTFTNSKFPNITEGELEKKANRDIDFLQERGRVTNNDIHLVFSYD
+ PETVVIEDFDILEYLIEPLSKIPNIYLHQIDKNLINLSRNLFGRMYEKINYDASQTQK
+ VFNKEYFTMNTVTQLPTFFMFKDGDPISYVFPGYSTTEMRNIDAIMDWVKKYSNPLVT
+ EVDSSNLKKLISFQTKSYSDLAIQLISSTDHKHIKGSNKLIKNLLLASWEYEHIRMEN
+ NFEEINERRARKADGIKKIKEKKAPANKIVDKMREEIPHMDQKKLLLGYLDISKEKNF
+ FRKYGITGEYKIGDVIIIDKSNNYYYNKDNFGNSLTSNNPQLLREAFVSLNIPSKALY
+ SSKLKGRLINSPFHNVLSFLDIIHGNGMPGYLIVIVLFIAILKGPSIYRRYKVRKHYR
+ AKRNAVGILGNMEKKKNQD"
+ gene complement(<347946..>348505)
+ /gene="BET1"
+ /locus_tag="YIL004C"
+ /gene_synonym="SLY12"
+ /db_xref="GeneID:854813"
+ mRNA complement(join(<347946..348363,348495..>348505))
+ /gene="BET1"
+ /locus_tag="YIL004C"
+ /gene_synonym="SLY12"
+ /product="Bet1p"
+ /transcript_id="NM_001179354.3"
+ /db_xref="GeneID:854813"
+ CDS complement(join(347946..348363,348495..348505))
+ /gene="BET1"
+ /locus_tag="YIL004C"
+ /gene_synonym="SLY12"
+ /experiment="EXISTENCE:direct assay:GO:0005484 SNAP
+ receptor activity [PMID:11001058]"
+ /experiment="EXISTENCE:direct assay:GO:0005789 endoplasmic
+ reticulum membrane [PMID:1396561]"
+ /experiment="EXISTENCE:direct assay:GO:0006890 retrograde
+ vesicle-mediated transport, Golgi to endoplasmic reticulum
+ [PMID:9813082]"
+ /experiment="EXISTENCE:direct assay:GO:0006906 vesicle
+ fusion [PMID:11001046]"
+ /experiment="EXISTENCE:direct assay:GO:0016020 membrane
+ [PMID:1396561]"
+ /experiment="EXISTENCE:direct assay:GO:0030134
+ COPII-coated ER to Golgi transport vesicle
+ [PMID:11157978]"
+ /experiment="EXISTENCE:direct assay:GO:0031201 SNARE
+ complex [PMID:11001058]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0005484 SNAP
+ receptor activity [PMID:3312234]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0006888
+ endoplasmic reticulum to Golgi vesicle-mediated transport
+ [PMID:3312234]"
+ /note="Type II membrane protein required for vesicular
+ transport; required for vesicular transport between the
+ endoplasmic reticulum and Golgi complex; v-SNARE with
+ similarity to synaptobrevins"
+ /codon_start=1
+ /product="Bet1p"
+ /protein_id="NP_012262.3"
+ /db_xref="GeneID:854813"
+ /db_xref="SGD:S000001266"
+ /translation="MSSRFAGGNAYQRDTGRTQLFGPADGSNSLDDNVSSALGSTDKL
+ DYSQSTLASLESQSEEQMGAMGQRIKALKSLSLKMGDEIRGSNQTIDQLGDTFHNTSV
+ KLKRTFGNMMEMARRSGISIKTWLIIFFMVGVLFFWVWIT"
+ gene <349122..>350003
+ /gene="CFD1"
+ /locus_tag="YIL003W"
+ /gene_synonym="DRE3"
+ /db_xref="GeneID:854814"
+ mRNA <349122..>350003
+ /gene="CFD1"
+ /locus_tag="YIL003W"
+ /gene_synonym="DRE3"
+ /product="iron-sulfur cluster assembly protein CFD1"
+ /transcript_id="NM_001179353.1"
+ /db_xref="GeneID:854814"
+ CDS 349122..350003
+ /gene="CFD1"
+ /locus_tag="YIL003W"
+ /gene_synonym="DRE3"
+ /experiment="EXISTENCE:direct assay:GO:0005737 cytoplasm
+ [PMID:12970194]"
+ /experiment="EXISTENCE:direct assay:GO:0016226 iron-sulfur
+ cluster assembly [PMID:17401378]"
+ /experiment="EXISTENCE:direct assay:GO:0016887 ATP
+ hydrolysis activity [PMID:26195633]"
+ /experiment="EXISTENCE:direct assay:GO:0051539 4 iron, 4
+ sulfur cluster binding [PMID:17401378]"
+ /experiment="EXISTENCE:direct assay:GO:1904564 Nbp35-Cfd1
+ ATPase complex [PMID:26195633]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0002098 tRNA
+ wobble uridine modification [PMID:18755837]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0016226
+ iron-sulfur cluster assembly [PMID:12970194]"
+ /note="Highly conserved iron-sulfur cluster binding
+ protein; localized in the cytoplasm; forms a complex with
+ Nbp35p that is involved in iron-sulfur protein assembly in
+ the cytosol"
+ /codon_start=1
+ /product="iron-sulfur cluster assembly protein CFD1"
+ /protein_id="NP_012263.1"
+ /db_xref="GeneID:854814"
+ /db_xref="SGD:S000001265"
+ /translation="MEEQEIGVPAASLAGIKHIILILSGKGGVGKSSVTTQTALTLCS
+ MGFKVGVLDIDLTGPSLPRMFGLENESIYQGPEGWQPVKVETNSTGSLSVISLGFLLG
+ DRGNSVIWRGPKKTSMIKQFISDVAWGELDYLLIDTPPGTSDEHISIAEELRYSKPDG
+ GIVVTTPQSVATADVKKEINFCKKVDLKILGIIENMSGFVCPHCAECTNIFSSGGGKR
+ LSEQFSVPYLGNVPIDPKFVEMIENQVSSKKTLVEMYRESSLCPIFEEIMKKLRKQDT
+ TTPVVDKHEQPQIESPK"
+ gene <350301..>350510
+ /gene="CMI7"
+ /locus_tag="YIL002W-A"
+ /db_xref="GeneID:1466493"
+ mRNA <350301..>350510
+ /gene="CMI7"
+ /locus_tag="YIL002W-A"
+ /product="Cmi7p"
+ /transcript_id="NM_001184653.1"
+ /db_xref="GeneID:1466493"
+ CDS 350301..350510
+ /gene="CMI7"
+ /locus_tag="YIL002W-A"
+ /note="Putative mitochondrial hypothetical protein;
+ identified by expression profiling and mass spectrometry"
+ /codon_start=1
+ /product="Cmi7p"
+ /protein_id="NP_878098.1"
+ /db_xref="GeneID:1466493"
+ /db_xref="SGD:S000028835"
+ /translation="MTRDTPEDVSTAGAKDILDVLNLLKGGEEKISEVELKLDEMEKK
+ MDSLLVQLEDLHRDNNDLAKSSSQK"
+ gene complement(<350591..>353431)
+ /gene="INP51"
+ /locus_tag="YIL002C"
+ /gene_synonym="SJL1"
+ /db_xref="GeneID:854815"
+ mRNA complement(<350591..>353431)
+ /gene="INP51"
+ /locus_tag="YIL002C"
+ /gene_synonym="SJL1"
+ /product="phosphoinositide 5-phosphatase INP51"
+ /transcript_id="NM_001179352.3"
+ /db_xref="GeneID:854815"
+ CDS complement(350591..353431)
+ /gene="INP51"
+ /locus_tag="YIL002C"
+ /gene_synonym="SJL1"
+ /EC_number="3.1.3.36"
+ /experiment="EXISTENCE:direct assay:GO:0004439
+ phosphatidylinositol-4,5-bisphosphate 5-phosphatase
+ activity [PMID:9565610|PMID:10224048]"
+ /experiment="EXISTENCE:direct assay:GO:0005737 cytoplasm
+ [PMID:17452534]"
+ /experiment="EXISTENCE:direct assay:GO:0016020 membrane
+ [PMID:9560389]"
+ /experiment="EXISTENCE:direct assay:GO:0046856
+ phosphatidylinositol dephosphorylation [PMID:10224048]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0004439
+ phosphatidylinositol-4,5-bisphosphate 5-phosphatase
+ activity [PMID:9438131|PMID:9565610]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0046856
+ phosphatidylinositol dephosphorylation
+ [PMID:9438131|PMID:9560389]"
+ /note="Phosphatidylinositol 4,5-bisphosphate
+ 5-phosphatase; synaptojanin-like protein with an
+ N-terminal Sac1 domain, plays a role in
+ phosphatidylinositol 4,5-bisphosphate homeostasis and in
+ endocytosis; null mutation confers cold-tolerant growth"
+ /codon_start=1
+ /product="phosphoinositide 5-phosphatase INP51"
+ /protein_id="NP_012264.3"
+ /db_xref="GeneID:854815"
+ /db_xref="SGD:S000001264"
+ /translation="MRLFIGRRSRSIVISSNNYCLSFQRLRSIPGASSQQRQLSKTPS
+ VTIKSYPDTDLSSDSNYLEVKSCIFNGLLGLVCLNGDIYVAVISGVQNVGFPRWKLID
+ HQVRPSESIYKVLDVDFYSLENDVFDYLLCERSEQNYDKLIHEHPCGPLKKLFSDGTF
+ YYSRDFDISNIVKNHGLSHNLEYTVDNQDLSFIWNANLASEVINWRSKISNEEKQLFA
+ NAGFLTFVIRGYCKTALIEDGPNTASITIISRISTESKQDTLELEGISEDGRVSLFVE
+ TEIVVTTEKFIFSYTQVNGSIPLFWESVESQLLYGKKIKVTKDSIEAQGAFDRHFDNL
+ TSKYGVVSIVNIIKPKSESQEKLALTYKDCAESKGIKITNIEYSSSVLTKSPHKLLYL
+ LKQDIYEFGAFAYDISRGIYFAKQTGVLRISAFDSIEKPNTVERLVSKEVLELTTNEI
+ DVFELTSPFLDAHDKLWSENYYWLDRTYTKHTKNSGKYTKVYSKLFGSRVRLYDPLHI
+ YISQYLKQLRSKYTFEKDISIFAGTFNISGKIPKDDIKDWIFPKSMSKEDEMADLYVI
+ GLEEVVELTPGHMLATDPYVRQFWEKKILTLLNGPGRKKKYIRLWSTQLGGILLLLFM
+ NETEYSKVKHIEGDVKKTGFGGMASNKGAVAVSFKYSATRFCVLVSHLAAGLENVEQR
+ HNDYKTIAKSIRFSKGLRIKDHDAIIWMGDFNYRILMSNEDVRRKIVSKEYASLFEKD
+ QLNQQMIAGESFPYFHEMAIDFPPTYKFDPGTKNYDTSEKMRIPAWTDRILSRGEVLE
+ QLEYKCCEDILFSDHRPVYAIFRARVTVVDEQKKTTLGTQIYEKIMERLEGLDDDEKI
+ AVLSDDAFVIESFEGSDSIAGPTHSPTPIPEPKRGRKLPPPSSDLKKWWIGSGKQVKV
+ VLDVDPAVYMINPKRDPNPFVENEDEPLFIER"
+ gene <353940..>355481
+ /locus_tag="YIL001W"
+ /db_xref="GeneID:854816"
+ mRNA <353940..>355481
+ /locus_tag="YIL001W"
+ /product="uncharacterized protein"
+ /transcript_id="NM_001179351.3"
+ /db_xref="GeneID:854816"
+ CDS 353940..355481
+ /locus_tag="YIL001W"
+ /experiment="EXISTENCE:direct assay:GO:0005737 cytoplasm
+ [PMID:14562095]"
+ /note="hypothetical protein; contains a BTB/POZ domain
+ which generally function in protein interactions; deletion
+ slightly improved competitive fitness in rich media;
+ GFP-tagged protein is localized to the cytoplasm"
+ /codon_start=1
+ /product="uncharacterized protein"
+ /protein_id="NP_012265.3"
+ /db_xref="GeneID:854816"
+ /db_xref="SGD:S000001263"
+ /translation="MADKLMDKNFEELCYSCRTGDMDNVDRLISTGVNVNSVDKFDNS
+ PLFLASLCGHEAVVKLLLQRGAVCDRDRYEGARCIYGALTDTIRDTLLSYDISKAVDV
+ KQPFATHISSMYNDEGFLKRDITFRVSNGKLFTAHKFLLCARSEILAEKMVNEWAKHE
+ IVSLEVRPDIFDIFLKFLYLIPILHQIEPGQYEELIELSSKFDIELLPEFLDKARHTA
+ DPTEKSRLMSDYQYKFTEVARSQLLIFVNNCIFRSTVDLANSERRVFSLMNCPAYPDV
+ QLMVKNRNGAIRIYPCHLAVLSRAEYFKVMFTNNFKEKVTYIKAKHVTGKYNSIIPQL
+ TLPNCEFEVAEIILRYLYADNTDIPWMYAVDVLLLADILLEDRLKTIASTIITQSKEF
+ IQQYNVFDVLYLSWEIGVERLEQFAAKFIAIHLQELYKDPEIKRAIMLSSQRISLRQE
+ TDTIELVDDIRYYLLRKYSFEPDDVELFENQDDLEYLKQVGYLEYRKDMGMLDNILAD
+ LELDV"
+ centromere 355629..355745
+ /note="CEN9; Chromosome IX centromere"
+ /db_xref="SGD:S000006470"
+ centromere 355629..355638
+ /note="CEN9_CDEI of CEN9"
+ centromere 355639..355720
+ /note="CEN9_CDEII of CEN9"
+ centromere 355721..355745
+ /note="CEN9_CDEIII of CEN9"
+ gene complement(<356143..>356895)
+ /gene="SGN1"
+ /locus_tag="YIR001C"
+ /gene_synonym="RBP1; RBP29"
+ /db_xref="GeneID:854817"
+ mRNA complement(<356143..>356895)
+ /gene="SGN1"
+ /locus_tag="YIR001C"
+ /gene_synonym="RBP1; RBP29"
+ /product="Sgn1p"
+ /transcript_id="NM_001179523.1"
+ /db_xref="GeneID:854817"
+ CDS complement(356143..356895)
+ /gene="SGN1"
+ /locus_tag="YIR001C"
+ /gene_synonym="RBP1; RBP29"
+ /experiment="EXISTENCE:direct assay:GO:0005737 cytoplasm
+ [PMID:10764794]"
+ /experiment="EXISTENCE:direct assay:GO:0008143 poly(A)
+ binding [PMID:10764794]"
+ /experiment="EXISTENCE:genetic interaction:GO:0016071 mRNA
+ metabolic process [PMID:10764794]"
+ /experiment="EXISTENCE:physical interaction:GO:0016071
+ mRNA metabolic process [PMID:10764794]"
+ /note="Cytoplasmic RNA-binding protein; contains an RNA
+ recognition motif (RRM); may have a role in mRNA
+ translation, as suggested by genetic interactions with
+ genes encoding proteins involved in translational
+ initiation"
+ /codon_start=1
+ /product="Sgn1p"
+ /protein_id="NP_012266.1"
+ /db_xref="GeneID:854817"
+ /db_xref="SGD:S000001440"
+ /translation="MSQEEKVDAKATLKTEISNNKKNDKQELELDELVGKLSIEGTPQ
+ VSQKLSKEEKHAHQLEADSRSIFVGNITPDVTPEQIEDHFKDCGQIKRITLLYDRNTG
+ TPKGYGYIEFESPAYREKALQLNGGELKGKKIAVSRKRTNIPGFNRHYNSQNQYFQQW
+ QWNYPLMAYPNPDTFPYYPPYPPNQSPNQNFGYNKNNYYRSPYNNKNRTFQKKHFNSA
+ KDSTKNIRSTSQKPVVMPSDNVKSSTQEKDSK"
+ rep_origin 357160..357396
+ /note="ARS920; Autonomously Replicating Sequence"
+ /db_xref="SGD:S000118399"
+ gene complement(<357415..>360396)
+ /gene="MPH1"
+ /locus_tag="YIR002C"
+ /db_xref="GeneID:854818"
+ mRNA complement(<357415..>360396)
+ /gene="MPH1"
+ /locus_tag="YIR002C"
+ /product="3'-5' DNA helicase"
+ /transcript_id="NM_001179524.1"
+ /db_xref="GeneID:854818"
+ CDS complement(357415..360396)
+ /gene="MPH1"
+ /locus_tag="YIR002C"
+ /EC_number="3.6.4.12"
+ /experiment="EXISTENCE:direct assay:GO:0005634 nucleus
+ [PMID:10880470|PMID:19995966]"
+ /experiment="EXISTENCE:direct assay:GO:0033677 DNA/RNA
+ helicase activity [PMID:26966248]"
+ /experiment="EXISTENCE:direct assay:GO:0043138 3'-5' DNA
+ helicase activity [PMID:15634678]"
+ /experiment="EXISTENCE:direct assay:GO:0060543 negative
+ regulation of strand invasion [PMID:19136626]"
+ /experiment="EXISTENCE:direct assay:GO:0070336
+ flap-structured DNA binding [PMID:19181670]"
+ /experiment="EXISTENCE:genetic interaction:GO:0033567 DNA
+ replication, Okazaki fragment processing [PMID:19181670]"
+ /experiment="EXISTENCE:genetic interaction:GO:0036297
+ interstrand cross-link repair [PMID:22912599]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0000725
+ recombinational repair [PMID:19995966]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0007535 donor
+ selection [PMID:27257873]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0060543
+ negative regulation of strand invasion [PMID:19136626]"
+ /experiment="EXISTENCE:physical interaction:GO:0007535
+ donor selection [PMID:27257873]"
+ /note="3'-5' DNA helicase involved in error-free bypass of
+ DNA lesions; binds flap DNA, stimulates activity of Rad27p
+ and Dna2p; prevents crossovers between ectopic sequences
+ by removing substrates for Mus81-Mms4 or Rad1-Rad10
+ cleavage; homolog of human FANCM Fanconi anemia protein
+ that is involved in stabilizing and remodeling blocked
+ replication forks; member of SF2 DExD/H superfamily of
+ helicases; nonsense or missense mutations in FANCM
+ predispose individuals to breast cancer"
+ /codon_start=1
+ /product="3'-5' DNA helicase"
+ /protein_id="NP_012267.1"
+ /db_xref="GeneID:854818"
+ /db_xref="SGD:S000001441"
+ /translation="MASADDYFSDFEDDELDKLYEKAINKSVKETITRRAVPVQKDLH
+ DNVLPGQKTVYEEIQRDVSFGPTHHELDYDALSFYVYPTNYEVRDYQYTIVHKSLFQN
+ TLCAIPTGMGKTFIASTVMLNYFRWTKKAKIIFTAPTRPLVAQQIKACLGITGIPSDQ
+ TAILLDKSRKNREEIWANKRVFFATPQVVENDLKRGVLDPKDIVCLVIDEAHRATGSS
+ AYTNVVKFIDRFNSSYRLLALTATPASDLEGVQEVVNNLDISKIEIRTEESMDIVKYM
+ KKRKKEKIEVPLLLEIEDIIEQLGMAVKPVLQQAIELGIYEECDPSQINAFKAMQQSQ
+ KIIANPTIPEGIKWRNFFILQLLNNVGQMLKRLKIYGIRTFFNYFQNKCTEFTTKYNL
+ KKSTNKIAAEFYYHPILKNIKNQCENYLSDPKFVGHGKLQCVRDELMDFFQKRGSDSR
+ VIIFTELRESALEIVKFIDSVADDQIRPHIFIGQARAKEGFDEVKYTRKHAPKGRKKV
+ ERLHRQEQEKFLEAERTKRAANDKLERSARRTGSSEEAQISGMNQKMQKEVIHNFKKG
+ EYNVLVCTSIGEEGLDIGEVDLIICYDTTSSPIKNIQRMGRTGRKRDGKIVLLFSSNE
+ SYKFERAMEDYSTLQALISKQCIDYKKSDRIIPEDIIPECHETLITINDENEIINEME
+ DVDEVIRYATQCMMGKKVKPKKAITKKKRVQENKKPKKFFMPDNVETSIVSASTLINK
+ FLVNESGGKQLVTSNENPSKKRKIFKALDNLENDSTEEASSSLETEDEEVSDDNNVFI
+ AEGQNGCQKDLETAIIRTGESLTTLKPLHNFERPNMALFVNDCGLPTKIEKNVKDIRG
+ NQHNLEKEKSCTVDKNNMVLSLDDWNFFRNRYIPEGVSFDVEPNFVQYTKGVKVPHCH
+ KVSKIITLFNDESNDNKKRTIDMNYTKCLARGMLRDEKKFVKVNDKSQVDNNSVNHDS
+ SQSFTLSNAELDDILGSDSDF"
+ gene <360885..>362924
+ /gene="AIM21"
+ /locus_tag="YIR003W"
+ /db_xref="GeneID:854819"
+ mRNA <360885..>362924
+ /gene="AIM21"
+ /locus_tag="YIR003W"
+ /product="Aim21p"
+ /transcript_id="NM_001179525.3"
+ /db_xref="GeneID:854819"
+ CDS 360885..362924
+ /gene="AIM21"
+ /locus_tag="YIR003W"
+ /experiment="EXISTENCE:direct assay:GO:0015629 actin
+ cytoskeleton [PMID:14562095]"
+ /experiment="EXISTENCE:direct assay:GO:0030479 actin
+ cortical patch [PMID:28706108|PMID:29467252]"
+ /experiment="EXISTENCE:direct assay:GO:0030837 negative
+ regulation of actin filament polymerization
+ [PMID:29467252]"
+ /experiment="EXISTENCE:direct assay:GO:0043332 mating
+ projection tip [PMID:19053807]"
+ /experiment="EXISTENCE:direct assay:GO:0051015 actin
+ filament binding [PMID:29467252]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0034642
+ mitochondrion migration along actin filament
+ [PMID:19300474]"
+ /experiment="EXISTENCE:physical interaction:GO:0110131
+ Aim21-Tda2 complex [PMID:28706108|PMID:29467252]"
+ /note="Subunit of a complex that associates with actin
+ filaments; forms a complex with Tda2p that inhibits barbed
+ end F-actin assembly; elevates actin monomer pools to
+ increase endocytotic efficiency and to regulate the
+ distribution of actin between cables and patches;
+ Aim21p/Tda2p forms a larger complex with actin capping
+ proteins Cap1p and Cap2p; involved in mitochondrial
+ migration along actin filaments; recruited to cortical
+ actin patches by SH3 domain-containing proteins Bbc1p and
+ Abp1p"
+ /codon_start=1
+ /product="Aim21p"
+ /protein_id="NP_012268.3"
+ /db_xref="GeneID:854819"
+ /db_xref="SGD:S000001442"
+ /translation="MPSEVTPKVPERPSRRKTSELFPLSGSESGDIKANSEPPTPAGT
+ PNVPTRRPILKAKTMTSFESGMDQESLPKVPLQRPVRRSTTEELNNVMNNTSKELEEI
+ ESLISKHNIHNVSRKKSPTSVEEGKVAAIHQNGQRSASDNKTSTNPSPLEKNEHEGAE
+ GNESAISPSNLVNKSNNEVTEHSDSEDLTEKQKVHAALDNEAGDRSHFEEKLIPGDMK
+ VQVDVSKDVEEGSLNALPPSGITESDDKAEKFTKHPESSLEELQKHQEQQEEKIFQNP
+ TDEESTTSLNEKQEGKDNMEVNSQPQGPSDTETVIAATSSNVPSQIASEEENDVPVIP
+ RSRPKKDFEAHVQKEELPNTQEKRVSEECDSTLISTEEESKIPKIPSERPKRRAPPPV
+ PKKPSSRIAAFQEMLQKQQQQDLHNNGNSSATTASADIAKKHTDSSITSDTTKADFTS
+ KLNGLFALPGMVNPGQLPPSLEKKLSSPDTESKLGPQDQSQAKTGPLGGTRRGRGPRG
+ RKLPSKVASVEKIEEDDNTNKIEIFNNWNVSSSFSKEKVLIDTTPGEQAERALDEKSK
+ SIPEEQREQSPNKMEAALCPFELDEKEKLPANAESDPLSQLPQTNAVGNRKAISEESL
+ SPSEAIANRDQNDTTEIQEQQMEDQMEVDMERELSGGYEDVDSALHSEEASFHSL"
+ rep_origin 362924..363221
+ /note="ARS921; Putative replication origin; identified in
+ multiple array studies, not yet confirmed by plasmid-based
+ assay"
+ /db_xref="SGD:S000130161"
+ gene <363221..>364519
+ /gene="DJP1"
+ /locus_tag="YIR004W"
+ /gene_synonym="ICS1; PAS22"
+ /db_xref="GeneID:854820"
+ mRNA <363221..>364519
+ /gene="DJP1"
+ /locus_tag="YIR004W"
+ /gene_synonym="ICS1; PAS22"
+ /product="Djp1p"
+ /transcript_id="NM_001179526.1"
+ /db_xref="GeneID:854820"
+ CDS 363221..364519
+ /gene="DJP1"
+ /locus_tag="YIR004W"
+ /gene_synonym="ICS1; PAS22"
+ /experiment="EXISTENCE:direct assay:GO:0005783 endoplasmic
+ reticulum [PMID:26928762]"
+ /experiment="EXISTENCE:direct assay:GO:0005829 cytosol
+ [PMID:9679141]"
+ /experiment="EXISTENCE:direct assay:GO:0005933 cellular
+ bud [PMID:26928762]"
+ /experiment="EXISTENCE:direct assay:GO:0071944 cell
+ periphery [PMID:26928762]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0006626 protein
+ targeting to mitochondrion [PMID:30213914]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0016558 protein
+ import into peroxisome matrix [PMID:9679141]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0045040 protein
+ insertion into mitochondrial outer membrane
+ [PMID:23959800]"
+ /note="ER-associated chaperone involved in protein
+ targeting; redirects mitochondrial membrane protein
+ precursors to mitochondrial translocation system; required
+ for peroxisomal protein import and involved in peroxisome
+ assembly; facilitates import of Mim1p and Mim2p into the
+ mitochondrial outer membrane; homologous to E. coli DnaJ"
+ /codon_start=1
+ /product="Djp1p"
+ /protein_id="NP_012269.1"
+ /db_xref="GeneID:854820"
+ /db_xref="SGD:S000001443"
+ /translation="MVVDTEYYDLLGVSTTASSIEIKKAYRKKSIQEHPDKNPNDPTA
+ TERFQAISEAYQVLGDDDLRAKYDKYGRKEAIPQGGFEDAAEQFSVIFGGDAFASYIG
+ ELMLLKNLQKTEELNAEDEAEKEKENVETMEESPADGKTNGTTNAVDAALGNTNEKDD
+ KNKARTTSGNLTVHDGNKKNEQVGAEAKKKKTKLEQFEEEQEVEKQKRVDQLSKTLIE
+ RLSILTESVYDDACKDSFKKKFEEEANLLKMESFGLDILHTIGDVYYEKAEIFLASQN
+ LFGMGGIFHSMKAKGGVFMDTLRTVSAAIDAQNTMKELEKMKEASTNNEPLFDKDGNE
+ QIKPTTEELAQQEQLLMGKVLSAAWHGSKYEITSTLRGVCKKVLEDDSVSKKTLIRRA
+ EAMKLLGEVFKKTFRTKVEQEEAQIFEELVAEATKKKRHT"
+ gene <364889..>365335
+ /gene="IST3"
+ /locus_tag="YIR005W"
+ /gene_synonym="SNU17"
+ /db_xref="GeneID:854821"
+ mRNA <364889..>365335
+ /gene="IST3"
+ /locus_tag="YIR005W"
+ /gene_synonym="SNU17"
+ /product="U2 snRNP complex subunit IST3"
+ /transcript_id="NM_001179527.1"
+ /db_xref="GeneID:854821"
+ CDS 364889..365335
+ /gene="IST3"
+ /locus_tag="YIR005W"
+ /gene_synonym="SNU17"
+ /experiment="EXISTENCE:direct assay:GO:0000384 first
+ spliceosomal transesterification activity [PMID:11287609]"
+ /experiment="EXISTENCE:direct assay:GO:0000398 mRNA
+ splicing, via spliceosome [PMID:15565172]"
+ /experiment="EXISTENCE:direct assay:GO:0005686 U2 snRNP
+ [PMID:16314500]"
+ /experiment="EXISTENCE:direct assay:GO:0070274 RES complex
+ [PMID:15565172]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0000245
+ spliceosomal complex assembly [PMID:11287609]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0000349
+ generation of catalytic spliceosome for first
+ transesterification step [PMID:11287609]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0000384 first
+ spliceosomal transesterification activity [PMID:11287609]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0006406 mRNA
+ export from nucleus [PMID:15565172]"
+ /note="Component of the U2 snRNP; required for the first
+ catalytic step of splicing and for spliceosomal assembly;
+ interacts with Rds3p and is required for Mer1p-activated
+ splicing; diploid mutants have a specific defect in MATa1
+ pre-mRNA splicing which leads to haploid gene expression
+ in diploids"
+ /codon_start=1
+ /product="U2 snRNP complex subunit IST3"
+ /protein_id="NP_012270.1"
+ /db_xref="GeneID:854821"
+ /db_xref="SGD:S000001444"
+ /translation="MNKIQQINDKELQSGILSPHQSWHNEYKDNAYIYIGNLNRELTE
+ GDILTVFSEYGVPVDVILSRDENTGESQGFAYLKYEDQRSTILAVDNLNGFKIGGRAL
+ KIDHTFYRPKRSLQKYYEAVKEELDRDIVSKNNAEKLILAKKDQPN"
+ gene complement(<365466..>369908)
+ /gene="PAN1"
+ /locus_tag="YIR006C"
+ /gene_synonym="DIM2; MDP3; MIP3"
+ /db_xref="GeneID:854822"
+ mRNA complement(<365466..>369908)
+ /gene="PAN1"
+ /locus_tag="YIR006C"
+ /gene_synonym="DIM2; MDP3; MIP3"
+ /product="Pan1p"
+ /transcript_id="NM_001179528.3"
+ /db_xref="GeneID:854822"
+ CDS complement(365466..369908)
+ /gene="PAN1"
+ /locus_tag="YIR006C"
+ /gene_synonym="DIM2; MDP3; MIP3"
+ /experiment="EXISTENCE:direct assay:GO:0005634 nucleus
+ [PMID:17967424]"
+ /experiment="EXISTENCE:direct assay:GO:0005737 cytoplasm
+ [PMID:17967424]"
+ /experiment="EXISTENCE:direct assay:GO:0005886 plasma
+ membrane [PMID:11914276]"
+ /experiment="EXISTENCE:direct assay:GO:0005935 cellular
+ bud neck [PMID:26928762]"
+ /experiment="EXISTENCE:direct assay:GO:0030479 actin
+ cortical patch [PMID:14622601|PMID:16824951]"
+ /experiment="EXISTENCE:direct assay:GO:0043332 mating
+ projection tip [PMID:19053807]"
+ /experiment="EXISTENCE:direct assay:GO:0071933 Arp2/3
+ complex binding [PMID:11433303]"
+ /experiment="EXISTENCE:direct assay:GO:1990964 actin
+ cytoskeleton-regulatory complex
+ [PMID:10594004|PMID:17151356]"
+ /experiment="EXISTENCE:direct assay:GO:2000601 positive
+ regulation of Arp2/3 complex-mediated actin nucleation
+ [PMID:16824951|PMID:17151356|PMID:11433303]"
+ /experiment="EXISTENCE:genetic interaction:GO:0061709
+ reticulophagy [PMID:35101986]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0000147 actin
+ cortical patch assembly [PMID:18177206]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0006897
+ endocytosis [PMID:8978817]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0007120 axial
+ cellular bud site selection [PMID:8756649]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0007121 bipolar
+ cellular bud site selection [PMID:8756649]"
+ /experiment="EXISTENCE:physical interaction:GO:1990964
+ actin cytoskeleton-regulatory complex [PMID:17151356]"
+ /note="Part of actin cytoskeleton-regulatory complex
+ Pan1p-Sla1p-End3p; associates with actin patches on cell
+ cortex; promotes protein-protein interactions essential
+ for endocytosis; regulates late stages of endocytosis;
+ binds to and activates Arp2/3 complex in vitro;
+ phosphorylation of Thr-1225 is regulated by MAPK Hog1p in
+ response to osmotic stress"
+ /codon_start=1
+ /product="Pan1p"
+ /protein_id="NP_012271.3"
+ /db_xref="GeneID:854822"
+ /db_xref="SGD:S000001445"
+ /translation="MYNPYQQQGMGYQQQQQQQQQQPNGFYPQQQQGQSSNQPQGQPQ
+ PQQQMAFNQPQATGIGGMPQSFGNSFSSMPQQPQTGYNNNGNNGSVYGNGNFGQQPQQ
+ QQQQAKPQHTGYVPNSSMPMMNTTGTMPPPNPAQQPQLQSIQPQGTGYYQAANTANVH
+ SVQPLQSQGTGYYVSTPNLISSNQTQQPLQAQGTGYYQSQPQQVPPPQQAQSLQPLKP
+ QQTGFYLQPQNQAPLEPLKPTATGFVNSFANNGLNNDIKIPAIRLSFITAQDQAKFET
+ LFRSIVTNGSNTVSGANCRKILMRSGLPPSQLARIWTLCDTSKAGELLFPEFALAMHL
+ INDVLQGDTIPYELDSKTKNEVSSFIDAINLSIANQDSSANDAPKTPFDEFITAGVQN
+ LQPQPTGYMPQTSFGIPLQSQITGGGVASALNPQSTGFMAPTTFNMSMNTGTPGLNPQ
+ ITGGAPASMQPNITGNALQPQTTGMMPQTTGMMPQTTGMMPQTSFGVNLGPQLTGGAL
+ QSQYTGGYGSVMPQQSGPASMPNLSFNQQGLQSQLTGLQPQPTGFLPPSNFSATMPLT
+ AQKTGFGNNEIYTKSNFNNNLIDNSSQDKISTEEKSLFYKIFETFDTQNKGLLDSPTA
+ VEIFRKSGLNRADLEQIWNLCDINNTGQLNKQEFALGMHLVYGKLNGKPIPNVLPSSL
+ IPSSTKLLDNLKNQLKTEPTTTKEKPSFGKIDALSYKNNDDDVLPNYRNRRKVYSAKN
+ EEQSSFSSPSAKSVNHSSSTLQTDDISVDKTVEKKTAKPKYAGFSREINLKNIASLEN
+ EIKNISNPENCYDSSIPSDLTSRFDAIIAKLPNLFNEISTIDNEITNAKIQLYRKKNP
+ SSIIGSGPNGEITENDRKKAKSRALLRARMSALTGKSTESEDSLSMEDEQQSAEIKRI
+ QQENGKNQEIIKDIRSSISDISASLKSTMTGSNMISNQEFERWEFGIGLEDGVREFLD
+ DLKSNSNKSVTESSPFVPSSTPTPVDDRSSSPSYSQFKTAEERAAYLKEQAKKRMKEK
+ LAKFDKNRRNVTQSSRSISSENSREQPQQIAGSSNLVEPRATPFQEEKYVEVAQPTQP
+ VQSTQPVQPTQPVQPTQPVQPTQPVQPTQPVQPTQPVQNVYNAKQESDDEDEDDEEKR
+ LQEELKRLKLKKKADKEKRLAALRKQIEDAQNESDEEETNGKDNFGGHVNVPQAAPVA
+ PSAAFSQNSTNAPRSVHAAVTPAAGKNSTGLPSTTMGHNPYFKDASASSTSTFDARAA
+ EMQRRIQRGLDEDEDDGWSDEDESNNRVAVDNKVEEAKIGHPDHARAPPVTAAPLPSV
+ TPVPPAVPVPQANTSNEKSSPIPIAPIPPSVTQEPPVPLAPPLPAVDGFQEPPIPSAP
+ AIATAVQKSGSSTPALAGGVLPPPPPLPTQQASTSEPIIAHVDNYNGAEKGTGAYGSD
+ SDDDVLSIPESVGTDEEEEGAQPVSTAGIPSIPPAGIPPPPPLP"
+ gene 370417..370488
+ /locus_tag="YNCI0011W"
+ /db_xref="GeneID:854823"
+ tRNA 370417..370488
+ /locus_tag="YNCI0011W"
+ /product="tRNA-Glu"
+ /experiment="EXISTENCE:curator inference:GO:0005829
+ cytosol [PMID:9023104]"
+ /experiment="EXISTENCE:curator inference:GO:0006414
+ translational elongation [PMID:9023104]"
+ /note="Glutamate tRNA (tRNA-Glu), predicted by tRNAscan-SE
+ analysis; thiolation of uridine at wobble position (34)
+ requires Ncs6p"
+ /db_xref="GeneID:854823"
+ /db_xref="SGD:S000006556"
+ gene <370704..>372998
+ /gene="EGH1"
+ /locus_tag="YIR007W"
+ /db_xref="GeneID:854824"
+ mRNA <370704..>372998
+ /gene="EGH1"
+ /locus_tag="YIR007W"
+ /product="hydrolase"
+ /transcript_id="NM_001179529.3"
+ /db_xref="GeneID:854824"
+ CDS 370704..372998
+ /gene="EGH1"
+ /locus_tag="YIR007W"
+ /experiment="EXISTENCE:direct assay:GO:0000329 fungal-type
+ vacuole membrane [PMID:26928762]"
+ /experiment="EXISTENCE:direct assay:GO:0005737 cytoplasm
+ [PMID:14562095]"
+ /experiment="EXISTENCE:direct assay:GO:0005829 cytosol
+ [PMID:26116408]"
+ /experiment="EXISTENCE:direct assay:GO:0050295
+ steryl-beta-glucosidase activity [PMID:26116408]"
+ /experiment="EXISTENCE:mutant phenotype:GO:1904462
+ ergosteryl 3-beta-D-glucoside catabolic process
+ [PMID:26116408]"
+ /note="Steryl-beta-glucosidase with broad specificity for
+ aglycones; has a role in ergosteryl-beta-glucoside
+ catabolism; required for normal vacuolar morphology; has
+ similarity to the C. neoformans
+ ergosteryl-beta-glucosidase EGCrP2; localizes to the
+ cytosol"
+ /codon_start=1
+ /product="hydrolase"
+ /protein_id="NP_012272.3"
+ /db_xref="GeneID:854824"
+ /db_xref="SGD:S000001446"
+ /translation="MPAKIHISADGQFCDKDGNEIQLRGVNLDPSVKIPAKPFLSTHA
+ PIENDTFFEDADKVSFINHPLVLDDIEQHIIRLKSLGYNTIRLPFTWESLEHAGPGQY
+ DFDYMDYIVEVLTRINSVQQGMYIYLDPHQDVWSRFSGGSGAPLWTLYCAGFQPANFL
+ ATDAAILHNYYIDPKTGREVGKDEESYPKMVWPTNYFKLACQTMFTLFFGGKQYAPKC
+ TINGENIQDYLQGRFNDAIMTLCARIKEKAPELFESNCIIGLESMNEPNCGYIGETNL
+ DVIPKERNLKLGKTPTAFQSFMLGEGIECTIDQYKRTFFGFSKGKPCTINPKGKKAWL
+ SAEERDAIDAKYNWERNPEWKPDTCIWKLHGVWEIQNGKRPVLLKPNYFSQPDATVFI
+ NNHFVDYYTGIYNKFREFDQELFIIIQPPVMKPPPNLQNSKILDNRTICACHFYDGMT
+ LMYKTWNKRIGIDTYGLVNKKYSNPAFAVVLGENNIRKCIRKQLSEMQKDAKSMLGKK
+ VPVFFTEIGIPFDMDDKKAYITNDYSSQTAALDALGFALEGSNLSYTLWCYCSINSHI
+ WGDNWNNEDFSIWSPDDKPLYHDTRAKTPTPEPSPASTVASVSTSTSKSGSSQPPSFI
+ KPDNHLDLDSPSCTLKSDLSGFRALDAIMRPFPIQIHGRFEFAEFNLCNKSYLLKLVG
+ KTTPEQITVPTYIFIPRHHFTPSRLSIRSSSGHYTYNTDYQVLEWFHEPGHQFIEICA
+ KSKSRPNTPGSDTSNDLPAECVIS"
+ gene complement(<373077..>374306)
+ /gene="PRI1"
+ /locus_tag="YIR008C"
+ /db_xref="GeneID:854825"
+ mRNA complement(<373077..>374306)
+ /gene="PRI1"
+ /locus_tag="YIR008C"
+ /product="DNA primase subunit PRI1"
+ /transcript_id="NM_001179530.1"
+ /db_xref="GeneID:854825"
+ CDS complement(373077..374306)
+ /gene="PRI1"
+ /locus_tag="YIR008C"
+ /experiment="EXISTENCE:direct assay:GO:0003697
+ single-stranded DNA binding [PMID:20404922]"
+ /experiment="EXISTENCE:direct assay:GO:0003896 DNA primase
+ activity [PMID:3888995|PMID:2644256|PMID:3061469]"
+ /experiment="EXISTENCE:direct assay:GO:0005658 alpha DNA
+ polymerase:primase complex [PMID:3888995|PMID:22593576]"
+ /experiment="EXISTENCE:direct assay:GO:0006269 DNA
+ replication, synthesis of primer [PMID:2644256]"
+ /experiment="EXISTENCE:direct assay:GO:0043596 nuclear
+ replication fork [PMID:16103218]"
+ /experiment="EXISTENCE:genetic interaction:GO:0003896 DNA
+ primase activity [PMID:8436268]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0003896 DNA
+ primase activity [PMID:8436268]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0006260 DNA
+ replication [PMID:2023935]"
+ /note="Subunit of DNA primase; DNA primase is required for
+ DNA synthesis and double-strand break repair"
+ /codon_start=1
+ /product="DNA primase subunit PRI1"
+ /protein_id="NP_012273.1"
+ /db_xref="GeneID:854825"
+ /db_xref="SGD:S000001447"
+ /translation="MTNSVKTNGPSSSDMEYYYKSLYPFKHIFNWLNHSPKPSRDMIN
+ REFAMAFRSGAYKRYNSFNSVQDFKAQIEKANPDRFEIGAIYNKPPRERDTLLKSELK
+ ALEKELVFDIDMDDYDAFRTCCSGAQVCSKCWKFISLAMKITNTALREDFGYKDFIWV
+ FSGRRGAHCWVSDKRARALTDVQRRNVLDYVNVIRDRNTDKRLALKRPYHPHLARSLE
+ QLKPFFVSIMLEEQNPWEDDQHAIQTLLPALYDKQLIDSLKKYWLDNPRRSSKEKWND
+ IDQIATSLFKGPKQDSHIIKLRECKEDLVLMTLYPKLDVEVTKQTIHLLKAPFCIHPA
+ TGNVCVPIDESFAPEKAPKLIDLQTEMEKNNDVSLTALQPFINQFQAYVSSLLKNELG
+ SVKREREDDDEPASLDF"
+ gene <374525..>374860
+ /gene="MSL1"
+ /locus_tag="YIR009W"
+ /gene_synonym="YIB9"
+ /db_xref="GeneID:854826"
+ mRNA <374525..>374860
+ /gene="MSL1"
+ /locus_tag="YIR009W"
+ /gene_synonym="YIB9"
+ /product="U2 snRNP complex subunit MSL1"
+ /transcript_id="NM_001179531.1"
+ /db_xref="GeneID:854826"
+ CDS 374525..374860
+ /gene="MSL1"
+ /locus_tag="YIR009W"
+ /gene_synonym="YIB9"
+ /experiment="EXISTENCE:direct assay:GO:0071004 U2-type
+ prespliceosome [PMID:16618970]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0000398 mRNA
+ splicing, via spliceosome [PMID:9799242]"
+ /experiment="EXISTENCE:physical interaction:GO:0030620 U2
+ snRNA binding [PMID:8649387]"
+ /note="U2B component of U2 snRNP; involved in splicing,
+ binds the U2 snRNA stem-loop IV in vitro but requires
+ association of Lea1p for in vivo binding; does not contain
+ the conserved C-terminal RNA binding domain found in other
+ family members"
+ /codon_start=1
+ /product="U2 snRNP complex subunit MSL1"
+ /protein_id="NP_012274.1"
+ /db_xref="GeneID:854826"
+ /db_xref="SGD:S000001448"
+ /translation="MVEPARKKQRIDRDTHHTVAEPVTEAKNTLYVSQLNEKINMQRL
+ RVNLFLLFATFGEVLKVSMNFKKQRGQAFITMRTIDQASLAQISLNGERFFGKPLKVE
+ FSKSETKTL"
+ gene <375431..>377161
+ /gene="DSN1"
+ /locus_tag="YIR010W"
+ /db_xref="GeneID:854827"
+ mRNA <375431..>377161
+ /gene="DSN1"
+ /locus_tag="YIR010W"
+ /product="MIND complex subunit DSN1"
+ /transcript_id="NM_001179532.3"
+ /db_xref="GeneID:854827"
+ CDS 375431..377161
+ /gene="DSN1"
+ /locus_tag="YIR010W"
+ /experiment="EXISTENCE:direct assay:GO:0000444 MIS12/MIND
+ type complex [PMID:14633972]"
+ /experiment="EXISTENCE:direct assay:GO:0000776 kinetochore
+ [PMID:14657030|PMID:24402315]"
+ /experiment="EXISTENCE:direct assay:GO:0000922 spindle
+ pole [PMID:12455957]"
+ /experiment="EXISTENCE:direct assay:GO:0007059 chromosome
+ segregation [PMID:12455957]"
+ /experiment="EXISTENCE:genetic interaction:GO:0051455
+ spindle attachment to meiosis I kinetochore
+ [PMID:23861669]"
+ /note="Essential component of the outer kinetochore MIND
+ complex; joins kinetochore subunits contacting DNA to
+ those contacting microtubules; phosphorylation promotes
+ interaction between outer and inner kinetochore proteins;
+ kinetochore receptor for monopolin, via interaction with
+ Csm1p; essential for both meiotic and mitotic chromosome
+ segregation; MIND complex consists of Mtw1p, Nnf1p, Nsl1p
+ and Dsn1p; phosphorylated by monopolin subunit, Hrr25p and
+ Aurora kinase, Ipl1p; modified by sumoylation"
+ /codon_start=1
+ /product="MIND complex subunit DSN1"
+ /protein_id="NP_012275.3"
+ /db_xref="GeneID:854827"
+ /db_xref="SGD:S000001449"
+ /translation="MSLEPTQTVSGTPPMLHQRTHKQVYPLRMETIPILESDSKATLQ
+ SNEPTQKDEEETEYFENKQSVSNLSPDLKFKRHKNKHIQGFPTLGERLDNLQDIKKAK
+ RVENFNSSAPIADDNHSGDATANATANATANATANVNASAMPAPYMPYYYYYHPMNAP
+ TPAMIPYPGSPMHSIMPNSSLQPFYSQPTAAGGPDMTTPQNISSSQQLLPAPQLFPYG
+ SFHQQQLQQPHYIQRTRERKKSIGSQRGRRLSMLASQANGGSTIISPHKDIPEEDFYT
+ VVGNASFGKNLQIRQLFNWCLMRSLHKLELKAKNQEEEGELEHLTKKSKLESTKAETD
+ YVDPKRLAMVIIKEFVDDLKKDHIAIDWEDEEKYEDEDEEKILDNTENYDDTELRQLF
+ QENDDDDDDDDEVDYSEIQRSRRKFSERRKALPKEPKKLLPNSKNVENTKNLSILTSK
+ VNAIKNEVKEWAVTLDTSRPDLEWQELTSFSSQPLEPLSDTEEPDLAIADVETKLETK
+ VDELRYQSHILNSHSLALNEITNSKVNKLNIETMRKISSETDDDHSQVINPQQLLKGL
+ SLSFSKKLDL"
+ gene complement(<377287..>378246)
+ /gene="STS1"
+ /locus_tag="YIR011C"
+ /gene_synonym="DBF8; SSM5"
+ /db_xref="GeneID:854828"
+ mRNA complement(<377287..>378246)
+ /gene="STS1"
+ /locus_tag="YIR011C"
+ /gene_synonym="DBF8; SSM5"
+ /product="Sts1p"
+ /transcript_id="NM_001179533.1"
+ /db_xref="GeneID:854828"
+ CDS complement(377287..378246)
+ /gene="STS1"
+ /locus_tag="YIR011C"
+ /gene_synonym="DBF8; SSM5"
+ /experiment="EXISTENCE:direct assay:GO:0005634 nucleus
+ [PMID:10913188]"
+ /experiment="EXISTENCE:direct assay:GO:0070628 proteasome
+ binding [PMID:17916559]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0007059
+ chromosome segregation [PMID:8065366]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0031144
+ proteasome localization [PMID:21075847]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0071630 nuclear
+ protein quality control by the ubiquitin-proteasome system
+ [PMID:21075847]"
+ /note="Karyopherin adaptor required for nuclear
+ translocation of the proteasome; mediates interaction
+ between nuclear import factor Srp1p and the proteasome;
+ nuclear import of the 26S proteasome is coupled to
+ RanGTP-initiated, ubiquitin-independent proteasomal
+ degradation of Sts1p; Sts1p and Srp1p couple proteasomes
+ to nascent polypeptides emerging from the ribosome for
+ cotranslational degradation; role in ubiquitin-mediated
+ protein degradation; contains a bipartite nuclear
+ localization sequence"
+ /codon_start=1
+ /product="Sts1p"
+ /protein_id="NP_012276.1"
+ /db_xref="GeneID:854828"
+ /db_xref="SGD:S000001450"
+ /translation="MMGFEWGFKPSSKITQSTVSSQGTGNVMIPTAGVKQKRRYANEE
+ QEEEELPRNKNVMKYGGVSKRRPQPGSLIRGQPLPLQRGMELMNKNQLQQLLVDLMTK
+ HPEIQQSVHTRVIGLDFSIQKCLDMLKQKSEAVYQSIPYNRSYESNKLDDYAFVRMKP
+ QILEFLNCLVDFILDNIPPRLENLHASLKFLDICTELVIKLPRFELASNNYYYDKCIE
+ QLSHVWCTLIEHVARDRIILLADNSSVWKSHMTRLQVYNEHSNGLLERPLQLFKSLDM
+ GSPSAASSSTLSLQESIIYHHDTMTANENNNNSGSAATDSPFN"
+ gene <378486..>379781
+ /gene="SQT1"
+ /locus_tag="YIR012W"
+ /db_xref="GeneID:854829"
+ mRNA <378486..>379781
+ /gene="SQT1"
+ /locus_tag="YIR012W"
+ /product="Sqt1p"
+ /transcript_id="NM_001179534.1"
+ /db_xref="GeneID:854829"
+ CDS 378486..379781
+ /gene="SQT1"
+ /locus_tag="YIR012W"
+ /experiment="EXISTENCE:direct assay:GO:0005737 cytoplasm
+ [PMID:11914276]"
+ /experiment="EXISTENCE:direct assay:GO:0005829 cytosol
+ [PMID:9271392]"
+ /experiment="EXISTENCE:direct assay:GO:0051082 unfolded
+ protein binding [PMID:26112308]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0000027
+ ribosomal large subunit assembly [PMID:9271392]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0042273
+ ribosomal large subunit biogenesis [PMID:26112308]"
+ /note="Specific assembly chaperone for ribosomal protein
+ Rpl10p; co-translationally associates with nascent Rpl10p,
+ preventing aggregation; involved in biogenesis of the
+ ribosomal large subunit; contains multiple WD repeats;
+ interacts genetically and physically with Qsr1p; protein
+ abundance increases in response to DNA replication stress"
+ /codon_start=1
+ /product="Sqt1p"
+ /protein_id="NP_012277.1"
+ /db_xref="GeneID:854829"
+ /db_xref="SGD:S000001451"
+ /translation="MEPQEEFITTEEVEQEIVPTVEVEQDVPVDIEGENDDDDEMMND
+ DEEALEVDMSNNSLTYFDKHTDSVFAIGHHPNLPLVCTGGGDNLAHLWTSHSQPPKFA
+ GTLTGYGESVISCSFTSEGGFLVTADMSGKVLVHMGQKGGAQWKLASQMQEVEEIVWL
+ KTHPTIARTFAFGATDGSVWCYQINEQDGSLEQLMSGFVHQQDCSMGEFINTDKGENT
+ LELVTCSLDSTIVAWNCFTGQQLFKITQAEIKGLEAPWISLSLAPETLTKGNSGVVAC
+ GSNNGLLAVINCNNGGAILHLSTVIELKPEQDELDASIESISWSSKFSLMAIGLVCGE
+ ILLYDTSAWRVRHKFVLEDSVTKLMFDNDDLFASCINGKVYQFNARTGQEKFVCVGHN
+ MGVLDFILLHPVANTGTEQKRKVITAGDEGVSLVFEVPN"
+ gene complement(<380019..>380384)
+ /gene="GAT4"
+ /locus_tag="YIR013C"
+ /db_xref="GeneID:854830"
+ mRNA complement(<380019..>380384)
+ /gene="GAT4"
+ /locus_tag="YIR013C"
+ /product="Gat4p"
+ /transcript_id="NM_001179535.3"
+ /db_xref="GeneID:854830"
+ CDS complement(380019..380384)
+ /gene="GAT4"
+ /locus_tag="YIR013C"
+ /experiment="EXISTENCE:direct assay:GO:0005634 nucleus
+ [PMID:24390141]"
+ /experiment="EXISTENCE:direct assay:GO:0034224 cellular
+ response to zinc ion starvation [PMID:10392447]"
+ /experiment="EXISTENCE:direct assay:GO:0043565
+ sequence-specific DNA binding [PMID:19158363]"
+ /experiment="EXISTENCE:direct assay:GO:0045944 positive
+ regulation of transcription by RNA polymerase II
+ [PMID:10392447]"
+ /note="Protein containing GATA family zinc finger motifs;
+ involved in spore wall assembly; sequence similarity to
+ GAT3, and the double mutant gat3 gat4 exhibits reduced
+ dityrosine fluorescence relative to the single mutants"
+ /codon_start=1
+ /product="Gat4p"
+ /protein_id="NP_012278.3"
+ /db_xref="GeneID:854830"
+ /db_xref="SGD:S000001452"
+ /translation="MSTKLPIVISNGTAFKKVPVQLLLNSGSEAQHGLPRNADSQPAR
+ PRTGITRTCGQCGEIKTSLQWREGPNGAACLCNACGLFFRKLILRFGRAAAKRYMEQI
+ KGTGTKRRIPKELTGTVRF"
+ gene <381086..>381814
+ /gene="VLD1"
+ /locus_tag="YIR014W"
+ /db_xref="GeneID:854831"
+ mRNA <381086..>381814
+ /gene="VLD1"
+ /locus_tag="YIR014W"
+ /product="Vld1p"
+ /transcript_id="NM_001179536.3"
+ /db_xref="GeneID:854831"
+ CDS 381086..381814
+ /gene="VLD1"
+ /locus_tag="YIR014W"
+ /experiment="EXISTENCE:direct assay:GO:0000324 fungal-type
+ vacuole [PMID:14562095]"
+ /experiment="EXISTENCE:direct assay:GO:0005783 endoplasmic
+ reticulum [PMID:26928762]"
+ /experiment="EXISTENCE:direct assay:GO:0044695 Dsc E3
+ ubiquitin ligase complex [PMID:29355480]"
+ /note="Component of Dsc E3 ligase complex in vacuolar
+ membranes; green fluorescent protein (GFP)-fusion protein
+ localizes to the vacuole; expression directly regulated by
+ the metabolic and meiotic transcriptional regulator Ume6p;
+ YIR014W is a non-essential gene"
+ /codon_start=1
+ /product="Vld1p"
+ /protein_id="NP_012279.4"
+ /db_xref="GeneID:854831"
+ /db_xref="SGD:S000001453"
+ /translation="MLHLEDDNGRQRSVIANLQKFVYCCLYLRFIKDGSLFLILLGWI
+ ISSLCDFIQELTLRYLKKNYLEVGRDNDQEDDESLAIRGLETPIVRMIINKAIRYYQG
+ LILLETAYCIVYHIRLDVSRDICSKPYGFVIMLLIREFTCPVPTAFPSKLLLVLLDIL
+ LLFCQIVIINGSLSSSLQNVKLIVKELNAEEEGALNILKLNTWHMDATGPELIVLKNH
+ DKSIPQQADGDDATEITPLLNIAE"
+ gene <381948..>382382
+ /gene="RPR2"
+ /locus_tag="YIR015W"
+ /db_xref="GeneID:854832"
+ mRNA <381948..>382382
+ /gene="RPR2"
+ /locus_tag="YIR015W"
+ /product="ribonuclease P protein subunit RPR2"
+ /transcript_id="NM_001179537.1"
+ /db_xref="GeneID:854832"
+ CDS 381948..382382
+ /gene="RPR2"
+ /locus_tag="YIR015W"
+ /EC_number="3.1.26.5"
+ /experiment="EXISTENCE:direct assay:GO:0004526
+ ribonuclease P activity [PMID:19095620]"
+ /experiment="EXISTENCE:direct assay:GO:0005655 nucleolar
+ ribonuclease P complex [PMID:9620854|PMID:19095620]"
+ /experiment="EXISTENCE:direct assay:GO:0034965 intronic
+ box C/D snoRNA processing [PMID:18713869]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0008033 tRNA
+ processing [PMID:9620854]"
+ /note="Subunit of nuclear RNase P; nuclear RNase P cleaves
+ tRNA precursors to generate mature 5' ends and facilitates
+ turnover of nuclear RNAs; not shared between RNase MRP and
+ RNase P, in contrast to all other RNase P protein
+ subunits; increases the activity and thermal stability of
+ the complex, along with Pop2p; protein abundance increases
+ in response to DNA replication stress"
+ /codon_start=1
+ /product="ribonuclease P protein subunit RPR2"
+ /protein_id="NP_012280.1"
+ /db_xref="GeneID:854832"
+ /db_xref="SGD:S000001454"
+ /translation="MGKKAHGGKMKPEIDENGTLLVPPPRTIANQDHFHRLNYLYQIS
+ AYQTRARQKARTDAHTPLARNYIKSMDLISKKTKTSLLPTIKRTICKKCHRLLWTPKK
+ LEITSDGALSVMCGCGTVKRFNIGADPNYRTYSEREGNLLNS"
+ gene <382628..>383425
+ /locus_tag="YIR016W"
+ /db_xref="GeneID:854833"
+ mRNA <382628..>383425
+ /locus_tag="YIR016W"
+ /product="uncharacterized protein"
+ /transcript_id="NM_001179538.1"
+ /db_xref="GeneID:854833"
+ CDS 382628..383425
+ /locus_tag="YIR016W"
+ /note="hypothetical protein; expression directly regulated
+ by the metabolic and meiotic transcriptional regulator
+ Ume6p; overexpression causes a cell cycle delay or arrest;
+ non-essential gene; YIR016W has a paralog, YOL036W, that
+ arose from the whole genome duplication"
+ /codon_start=1
+ /product="uncharacterized protein"
+ /protein_id="NP_012281.1"
+ /db_xref="GeneID:854833"
+ /db_xref="SGD:S000001455"
+ /translation="MSGTRCLLGVGLPVDVTATETLTHDEQGPGVEPGPCSRGSSIDG
+ LLPSLLGPHDDVDDDSAAFHKYMTLSRDGAGAIHAPSLVEDASRNDDDDDDEDDDDSS
+ MSRDLSKALDMSSSSSSSPRVQSRRHRSSVSAISAILHQGKSGREDITGSLSVPAEQE
+ KLSFLAKASSIFFRRNSMPRDKHTHSVCPASRPDSERFIVTSAAAQSLRRQQQLEDAQ
+ YARVITNFRTIGWCSPSEIESVEYKRSLINAEWDEKISLLSHAQCYK"
+ gene complement(<383556..>384119)
+ /gene="MET28"
+ /locus_tag="YIR017C"
+ /db_xref="GeneID:854834"
+ mRNA complement(<383556..>384119)
+ /gene="MET28"
+ /locus_tag="YIR017C"
+ /product="Met28p"
+ /transcript_id="NM_001179539.3"
+ /db_xref="GeneID:854834"
+ CDS complement(383556..384119)
+ /gene="MET28"
+ /locus_tag="YIR017C"
+ /experiment="EXISTENCE:direct assay:GO:0005634 nucleus
+ [PMID:14562095]"
+ /experiment="EXISTENCE:direct assay:GO:0061629 RNA
+ polymerase II-specific DNA-binding transcription factor
+ binding [PMID:8665859]"
+ /experiment="EXISTENCE:direct assay:GO:0089713
+ Cbf1-Met4-Met28 complex [PMID:8665859|PMID:9171357]"
+ /experiment="EXISTENCE:direct assay:GO:2000679 positive
+ regulation of transcription regulatory region DNA binding
+ [PMID:9171357]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0006357
+ regulation of transcription by RNA polymerase II
+ [PMID:8665859]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0031335
+ regulation of sulfur amino acid metabolic process
+ [PMID:8665859]"
+ /experiment="EXISTENCE:physical interaction:GO:0061629 RNA
+ polymerase II-specific DNA-binding transcription factor
+ binding [PMID:8665859]"
+ /note="bZIP transcriptional activator in the
+ Cbf1p-Met4p-Met28p complex; participates in the regulation
+ of sulfur metabolism"
+ /codon_start=1
+ /product="Met28p"
+ /protein_id="NP_012282.3"
+ /db_xref="GeneID:854834"
+ /db_xref="SGD:S000001456"
+ /translation="MSAKQGWEKKSTNIDIASRKGMNVNNLSEHLQNLISSDSELGSR
+ LLSLLLVSSGNAEELISMINNGQDVSQFKKLREPRKGKVAATTAVVVKEEEAPVSTSN
+ ELDKIKQERRRKNTEASQRFRIRKKQKNFENMNKLQNLNTQINKLRDRIEQLNKENEF
+ WKAKLNDINEIKSLKLLNDIKRRNMGR"
+ gene <384609..>385346
+ /gene="YAP5"
+ /locus_tag="YIR018W"
+ /db_xref="GeneID:854835"
+ mRNA <384609..>385346
+ /gene="YAP5"
+ /locus_tag="YIR018W"
+ /product="Yap5p"
+ /transcript_id="NM_001179540.1"
+ /db_xref="GeneID:854835"
+ CDS 384609..385346
+ /gene="YAP5"
+ /locus_tag="YIR018W"
+ /experiment="EXISTENCE:direct assay:GO:0000785 chromatin
+ [PMID:12464632]"
+ /experiment="EXISTENCE:direct assay:GO:0000981 DNA-binding
+ transcription factor activity, RNA polymerase II-specific
+ [PMID:18287073]"
+ /experiment="EXISTENCE:direct assay:GO:0005634 nucleus
+ [PMID:18070921]"
+ /experiment="EXISTENCE:direct assay:GO:0045944 positive
+ regulation of transcription by RNA polymerase II
+ [PMID:9372930]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0000122
+ negative regulation of transcription by RNA polymerase II
+ [PMID:18287073]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0045944
+ positive regulation of transcription by RNA polymerase II
+ [PMID:18287073|PMID:18070921]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0071281
+ cellular response to iron ion [PMID:18070921]"
+ /note="Basic leucine zipper (bZIP) iron-sensing
+ transcription factor; senses high-iron conditions via two
+ Fe/S clusters bound to its activator domain; involved in
+ diauxic shift; YAP5 has a paralog, YAP7, that arose from
+ the whole genome duplication"
+ /codon_start=1
+ /product="Yap5p"
+ /protein_id="NP_012283.1"
+ /db_xref="GeneID:854835"
+ /db_xref="SGD:S000001457"
+ /translation="MALPLIKPKESEESHLALLSKIHVSKNWKLPPRLPHRAAQRRKR
+ VHRLHEDYETEENDEELQKKKRQNRDAQRAYRERKNNKLQVLEETIESLSKVVKNYET
+ KLNRLQNELQAKESENHALKQKLETLTLKQASVPAQDPILQNLIENFKPMKAIPIKYN
+ TAIKRHQHSTELPSSVKCGFCNDNTTCVCKELETDHRKSDDGVATEQKDMSMPHAECN
+ NKDNPNGLCSNCTNIDKSCIDIRSIIH"
+ gene complement(<385564..>385701)
+ /locus_tag="YIR018C-A"
+ /db_xref="GeneID:1466494"
+ mRNA complement(<385564..>385701)
+ /locus_tag="YIR018C-A"
+ /product="uncharacterized protein"
+ /transcript_id="NM_001184655.1"
+ /db_xref="GeneID:1466494"
+ CDS complement(385564..385701)
+ /locus_tag="YIR018C-A"
+ /note="hypothetical protein; identified by expression
+ profiling and mass spectrometry"
+ /codon_start=1
+ /product="uncharacterized protein"
+ /protein_id="NP_878099.1"
+ /db_xref="GeneID:1466494"
+ /db_xref="SGD:S000028837"
+ /translation="MPSDYTSHYPVILIKKKKKKIAGMYRHSKRYLEIMSTASAQFVG
+ N"
+ gene complement(<389572..>393675)
+ /gene="FLO11"
+ /locus_tag="YIR019C"
+ /gene_synonym="MUC1; STA4"
+ /db_xref="GeneID:854836"
+ mRNA complement(<389572..>393675)
+ /gene="FLO11"
+ /locus_tag="YIR019C"
+ /gene_synonym="MUC1; STA4"
+ /product="Flo11p"
+ /transcript_id="NM_001179541.3"
+ /db_xref="GeneID:854836"
+ CDS complement(389572..393675)
+ /gene="FLO11"
+ /locus_tag="YIR019C"
+ /gene_synonym="MUC1; STA4"
+ /experiment="EXISTENCE:direct assay:GO:0000324 fungal-type
+ vacuole [PMID:26928762]"
+ /experiment="EXISTENCE:direct assay:GO:0005576
+ extracellular region [PMID:20619652]"
+ /experiment="EXISTENCE:direct assay:GO:0005886 plasma
+ membrane [PMID:11027318]"
+ /experiment="EXISTENCE:direct assay:GO:0005935 cellular
+ bud neck [PMID:19799621]"
+ /experiment="EXISTENCE:direct assay:GO:0030447 filamentous
+ growth [PMID:12072450]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0000128
+ flocculation [PMID:19160455]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0001403
+ invasive growth in response to glucose limitation
+ [PMID:19087208|PMID:8710886|PMID:12150916]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0007124
+ pseudohyphal growth [PMID:8710886]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0043709 cell
+ adhesion involved in single-species biofilm formation
+ [PMID:11157168]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0090606
+ single-species surface biofilm formation
+ [PMID:12024013|PMID:19160455]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0098609
+ cell-cell adhesion [PMID:19160455|PMID:21875945]"
+ /note="GPI-anchored cell surface glycoprotein (flocculin);
+ required for pseudohyphal and invasive growth,
+ flocculation, and biofilm formation; major determinant of
+ colony morphology; QTL that controls chronological life
+ span; carries intragenic tandem repeats that are expanded
+ in different strains; required for formation of fibrous
+ interconnections between cells; role in co-flocculation
+ with other yeast species; cleaved and shed from cells,
+ contributing to their surface properties"
+ /codon_start=1
+ /product="Flo11p"
+ /protein_id="NP_012284.3"
+ /db_xref="GeneID:854836"
+ /db_xref="SGD:S000001458"
+ /translation="MQRPFLLAYLVLSLLFNSALGFPTALVPRGSSEGTSCNSIVNGC
+ PNLDFNWHMDQQNIMQYTLDVTSVSWVQDNTYQITIHVKGKENIDLKYLWSLKIIGVT
+ GPKGTVQLYGYNENTYLIDNPTDFTATFEVYATQDVNSCQVWMPNFQIQFEYLQGSAA
+ QYASSWQWGTTSFDLSTGCNNYDNQGHSQTDFPGFYWNIDCDNNCGGTKSSTTTSSTS
+ ESSTTTSSTSESSTTTSSTSESSTTTSSTSESSTSSSTTAPATPTTTSCTKEKPTPPT
+ TTSCTKEKPTPPHHDTTPCTKKKTTTSKTCTKKTTTPVPTPSSSTTESSSAPVPTPSS
+ STTESSSAPVTSSTTESSSAPVPTPSSSTTESSSAPVTSSTTESSSAPVTSSTTESSS
+ APVPTPSSSTTESSSAPVTSSTTESSSAPVTSSTTESSSAPVTSSTTESSSAPVTSST
+ TESSSAPVPTPSSSTTESSSAPVTSSTTESSSAPVPTPSSSTTESSSAPVTSSTTESS
+ SAPVPTPSSSTTESSSAPAPTPSSSTTESSSAPVTSSTTESSSAPVPTPSSSTTESSS
+ TPVTSSTTESSSAPVPTPSSSTTESSSAPVPTPSSSTTESSSAPAPTPSSSTTESSSA
+ PVTSSTTESSSAPVPTPSSSTTESSSAPVPTPSSSTTESSSAPVPTPSSSTTESSSAP
+ VTSSTTESSSAPVTSSTTESSSAPVPTPSSSTTESSSAPVPTPSSSTTESSSAPVPTP
+ SSSTTESSSAPVTSSTTESSSAPVPTPSSSTTESSSAPVPTPSSSTTESSSAPVPTPS
+ SSTTESSVAPVPTPSSSSNITSSAPSSTPFSSSTESSSVPVPTPSSSTTESSSAPVSS
+ STTESSVAPVPTPSSSSNITSSAPSSIPFSSTTESFSTGTTVTPSSSKYPGSQTETSV
+ SSTTETTIVPTKTTTSVTTPSTTTITTTVCSTGTNSAGETTSGCSPKTVTTTVPTTTT
+ TSVTTSSTTTITTTVCSTGTNSAGETTSGCSPKTITTTVPCSTSPSETASESTTTSPT
+ TPVTTVVSTTVVTTEYSTSTKPGGEITTTFVTKNIPTTYLTTIAPTPSVTTVTNFTPT
+ TITTTVCSTGTNSAGETTSGCSPKTVTTTVPCSTGTGEYTTEATTLVTTAVTTTVVTT
+ ESSTGTNSAGKTTTGYTTKSVPTTYVTTLAPSAPVTPATNAVPTTITTTECSAATNAA
+ GETTSVCSAKTIVSSASAGENTAPSATTPVTTAIPTTVITTESSVGTNSAGETTTGYT
+ TKSIPTTYITTLIPGSNGAKNYETVATATNPISIKTTSQLATTASASSVAPVVTSPSL
+ TGPLQSASGSAVATYSVPSISSTYQGAANIKVLGNFMWLLLALPVVF"
+ regulatory complement(393718..393732)
+ /regulatory_class="other"
+ /note="Upstream open reading frame (uORF) in 5'
+ untranslated region of FLO11 gene, regulate translation"
+ gene complement(393884..397082)
+ /gene="ICR1"
+ /locus_tag="YNCI0012"
+ /db_xref="GeneID:9164906"
+ ncRNA complement(393884..397082)
+ /ncRNA_class="other"
+ /gene="ICR1"
+ /locus_tag="YNCI0012"
+ /product="ICR1"
+ /experiment="EXISTENCE:curator inference:GO:0005634
+ nucleus [PMID:19805129]"
+ /experiment="EXISTENCE:direct assay:GO:0006357 regulation
+ of transcription by RNA polymerase II [PMID:19805129]"
+ /experiment="EXISTENCE:genetic interaction:GO:0006357
+ regulation of transcription by RNA polymerase II
+ [PMID:19805129]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0006357
+ regulation of transcription by RNA polymerase II
+ [PMID:19805129]"
+ /note="Long intergenic regulatory ncRNA; has a key role in
+ regulating transcription of the nearby protein-coding ORF
+ FLO11; initiated far upstream from FLO11 and transcribed
+ across much of the large promoter of FLO11, repressing
+ FLO11 transcription in cis"
+ /transcript_id="NR_132191.1"
+ /db_xref="GeneID:9164906"
+ /db_xref="SGD:S000132612"
+ gene complement(<394255..>394557)
+ /locus_tag="YIR020C"
+ /db_xref="GeneID:854837"
+ mRNA complement(<394255..>394557)
+ /locus_tag="YIR020C"
+ /product="uncharacterized protein"
+ /transcript_id="NM_001270752.1"
+ /db_xref="GeneID:854837"
+ CDS complement(394255..394557)
+ /locus_tag="YIR020C"
+ /note="hypothetical protein; mRNA identified as translated
+ by ribosome profiling data; SWAT-GFP fusion protein
+ localizes to the endoplasmic reticulum"
+ /codon_start=1
+ /product="uncharacterized protein"
+ /protein_id="NP_001257681.1"
+ /db_xref="GeneID:854837"
+ /db_xref="SGD:S000001459"
+ /translation="MTFFLKRKISFFLSGIAQTFLFLPILLNRSVIHVVFLTVVLGHR
+ IPWDSVIRCNNTGTTHSAVSSRTELLLPIGGVINNWKRRAWNGFSIQWIWRYSFVY"
+ gene 395999..396939
+ /gene="PWR1"
+ /locus_tag="YNCI0013W"
+ /db_xref="GeneID:9164905"
+ ncRNA 395999..396939
+ /ncRNA_class="other"
+ /gene="PWR1"
+ /locus_tag="YNCI0013W"
+ /product="PWR1"
+ /experiment="EXISTENCE:curator inference:GO:0005634
+ nucleus [PMID:19805129]"
+ /experiment="EXISTENCE:direct assay:GO:0006357 regulation
+ of transcription by RNA polymerase II [PMID:19805129]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0006357
+ regulation of transcription by RNA polymerase II
+ [PMID:19805129]"
+ /note="Intergenic regulatory ncRNA; one of two long ncRNAs
+ that play key roles in regulating transcription of the
+ nearby protein-coding ORF FLO11; PWR1 promotes FLO11
+ transcription by interfering with ncRNA ICR1, which is
+ transcribed across the FLO11 promoter"
+ /transcript_id="NR_132192.1"
+ /db_xref="GeneID:9164905"
+ /db_xref="SGD:S000132614"
+ gene <397294..>398385
+ /gene="MRS1"
+ /locus_tag="YIR021W"
+ /gene_synonym="PET157"
+ /db_xref="GeneID:854839"
+ mRNA <397294..>398385
+ /gene="MRS1"
+ /locus_tag="YIR021W"
+ /gene_synonym="PET157"
+ /product="Mrs1p"
+ /transcript_id="NM_001179543.3"
+ /db_xref="GeneID:854839"
+ CDS 397294..398385
+ /gene="MRS1"
+ /locus_tag="YIR021W"
+ /gene_synonym="PET157"
+ /experiment="EXISTENCE:direct assay:GO:0000372 Group I
+ intron splicing [PMID:11773622]"
+ /experiment="EXISTENCE:direct assay:GO:0003723 RNA binding
+ [PMID:11773622]"
+ /experiment="EXISTENCE:direct assay:GO:0005739
+ mitochondrion
+ [PMID:16823961|PMID:24769239|PMID:14562095|PMID:14576278]"
+ /experiment="EXISTENCE:direct assay:GO:1990904
+ ribonucleoprotein complex [PMID:12924947]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0000963
+ mitochondrial RNA processing [PMID:2443348]"
+ /note="Splicing protein; required for splicing of two
+ mitochondrial group I introns (BI3 in COB and AI5beta in
+ COX1); forms a splicing complex, containing four subunits
+ of Mrs1p and two subunits of the BI3-encoded maturase,
+ that binds to the BI3 RNA; MRS1 has a paralog, CCE1, that
+ arose from the whole genome duplication"
+ /codon_start=1
+ /product="Mrs1p"
+ /protein_id="NP_012287.3"
+ /db_xref="GeneID:854839"
+ /db_xref="SGD:S000001460"
+ /translation="MSPKNITRSVIPAIDLYCRKANFKTLKSLSMILGSKKEWYDTKK
+ APLRTFLVSRCGIFEQLRGRLVEDGKVNLFSVFLTNDSFSFCKMTVDDKFNTSLVDWQ
+ KIPFDSTFATDRRQNISLLPVDTLFATEKIISILGVSPNMTNLVSIERERSDLVDFNC
+ KLQSNILEHLLYAKCQGVYVTSTNEKARLLAAVCNPEFIDTFWCELTPIRVSLKENPS
+ ISVPREYQMYDPVVRATIKEVVTKRLLRSAFDNDIDPLMCLHLDKGWKLKFPILSSTT
+ GLNFSLKDCLSLDTGKDASDMTEVFLATMESSKVLRTYSNLVDIVMKDNGRLDSGVLK
+ QFNDYVKQEKLNLQHFQAGSSKFLKGAKI"
+ gene <398514..>398726
+ /locus_tag="YIR021W-A"
+ /db_xref="GeneID:1466495"
+ mRNA <398514..>398726
+ /locus_tag="YIR021W-A"
+ /product="uncharacterized protein"
+ /transcript_id="NM_001184656.1"
+ /db_xref="GeneID:1466495"
+ CDS 398514..398726
+ /locus_tag="YIR021W-A"
+ /note="hypothetical protein; identified by expression
+ profiling and mass spectrometry"
+ /codon_start=1
+ /product="uncharacterized protein"
+ /protein_id="NP_878100.1"
+ /db_xref="GeneID:1466495"
+ /db_xref="SGD:S000028838"
+ /translation="MSFSVSCKTPKTTKLLVSSISESAVALIIITIRILFSIGKSDFK
+ KIISKEINGAETIYYRNIPESKPQGS"
+ gene <398733..>399236
+ /gene="SEC11"
+ /locus_tag="YIR022W"
+ /db_xref="GeneID:854840"
+ mRNA <398733..>399236
+ /gene="SEC11"
+ /locus_tag="YIR022W"
+ /product="signal peptidase complex catalytic subunit
+ SEC11"
+ /transcript_id="NM_001179544.1"
+ /db_xref="GeneID:854840"
+ CDS 398733..399236
+ /gene="SEC11"
+ /locus_tag="YIR022W"
+ /EC_number="3.4.21.89"
+ /experiment="EXISTENCE:direct assay:GO:0005783 endoplasmic
+ reticulum [PMID:11058593]"
+ /experiment="EXISTENCE:direct assay:GO:0005787 signal
+ peptidase complex [PMID:8910564|PMID:1846444]"
+ /experiment="EXISTENCE:direct assay:GO:0006465 signal
+ peptide processing [PMID:10206957]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0006465 signal
+ peptide processing [PMID:3283143|PMID:10206957]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0008233
+ peptidase activity [PMID:10206957]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0045047 protein
+ targeting to ER [PMID:3283143]"
+ /note="18kDa catalytic subunit of the Signal Peptidase
+ Complex (SPC); the Signal Peptidase Complex cleaves the
+ signal sequence of proteins targeted to the endoplasmic
+ reticulum; other members are Spc1p, Spc2p, Spc3p, and
+ Sec11p"
+ /codon_start=1
+ /product="signal peptidase complex catalytic subunit
+ SEC11"
+ /protein_id="NP_012288.1"
+ /db_xref="GeneID:854840"
+ /db_xref="SGD:S000001461"
+ /translation="MNLRFELQKLLNVCFLFASAYMFWQGLAIATNSASPIVVVLSGS
+ MEPAFQRGDILFLWNRNTFNQVGDVVVYEVEGKQIPIVHRVLRQHNNHADKQFLLTKG
+ DNNAGNDISLYANKKIYLNKSKEIVGTVKGYFPQLGYITIWISENKYAKFALLGMLGL
+ SALLGGE"
+ gene <399777..>402689
+ /gene="DAL81"
+ /locus_tag="YIR023W"
+ /gene_synonym="UGA35"
+ /db_xref="GeneID:854841"
+ mRNA <399777..>402689
+ /gene="DAL81"
+ /locus_tag="YIR023W"
+ /gene_synonym="UGA35"
+ /product="Dal81p"
+ /transcript_id="NM_001179545.3"
+ /db_xref="GeneID:854841"
+ CDS 399777..402689
+ /gene="DAL81"
+ /locus_tag="YIR023W"
+ /gene_synonym="UGA35"
+ /experiment="EXISTENCE:curator inference:GO:0005634
+ nucleus [PMID:21515579]"
+ /experiment="EXISTENCE:direct assay:GO:0003713
+ transcription coactivator activity [PMID:21515579]"
+ /experiment="EXISTENCE:direct assay:GO:0051123 RNA
+ polymerase II preinitiation complex assembly
+ [PMID:21515579]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0001080
+ nitrogen catabolite activation of transcription from RNA
+ polymerase II promoter [PMID:7899074]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0003713
+ transcription coactivator activity [PMID:21515579]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0051123 RNA
+ polymerase II preinitiation complex assembly
+ [PMID:21515579]"
+ /experiment="EXISTENCE:mutant phenotype:GO:1901714
+ positive regulation of urea catabolic process
+ [PMID:2406136]"
+ /experiment="EXISTENCE:mutant phenotype:GO:1901717
+ positive regulation of gamma-aminobutyric acid catabolic
+ process [PMID:2406136]"
+ /experiment="EXISTENCE:physical interaction:GO:0001080
+ nitrogen catabolite activation of transcription from RNA
+ polymerase II promoter [PMID:10906145]"
+ /note="Positive regulator of genes in multiple nitrogen
+ degradation pathways; contains DNA binding domain but does
+ not appear to bind the dodecanucleotide sequence present
+ in the promoter region of many genes involved in allantoin
+ catabolism"
+ /codon_start=1
+ /product="Dal81p"
+ /protein_id="NP_012289.3"
+ /db_xref="GeneID:854841"
+ /db_xref="SGD:S000001462"
+ /translation="MDPHQSPADNAASPTKSVKATTKNSSTNNNVNSNNSNNNSNHDI
+ LNFNDNYTTILQHLANDHPNILREKGGSQQQQHQQQQQQQQQQQQQQQQQSLDTLLHH
+ YQSLLSKSDNAIAFDDNVSNSADHNGSNSNNNNNNNDISSPGNLMGSCNQCRLKKTKC
+ NYFPDLGNCLECETSRTKCTFSIAPNYLKRTSSGANNNMPTSSNSKRMKNFEDYSNRL
+ PSSMLYRHQQQQQQQQQQQRIQYPRSSFFVGPASVFDLNLTKHVRLDNVDQIQLSKTL
+ SLRKVSPTAQFILQDDFDTTLHSKQEYEVDLVENLVHPHGHLLVEIFFKLIHPFLPIL
+ HERVFLEKYSRSYRELTAPLLASIYSLALQYWDFHPALLGFPKPDVTAQLNNIALETF
+ YARVGRPKLSIIQTGLLILQCRSECHNNWVLCSSVVALAEELGLGVECNDWKLPKWEK
+ DLRKRLAWAVWLMDKWCALNEGRQSHLILGRNWMIKLLNFDDFPLNSPTILNSLQNDQ
+ SGSSPSSSNDVKNHQIAFGNLPIFNINPTLEDFKNGTLMFQQMVSLSIILGEIMDTFY
+ TQGSMTINKSIEQVLKLAKPLQLKLREWYHSLPKNLSMSYATPQKLNSNSTLTLAYFA
+ TEITLHRKIICALNPQTPKELVQVCRTAARTRLVAAIEFIRDLKNEHINAFWYNCSTG
+ NLMLIGTFAALLYVTSATKEEAMIFRDYVRNYTWVLKIGSKYFDKLSNALNNMHLLFA
+ QIPGLLTDEPVVVSPNSNINSVNPQRSGVQSQIPIQFNVGSPAMTEQGSPLNQWKNLP
+ QEILQQLNSFPNGTTSTTTPVNPTSRQTQLESQGSPAINSANNNSNNTPLPFAPNKSS
+ KKTSQSSPNVTPSHMSRHPPSNTSSPRVNSSTNVNSNTQMNASPLTSINETRQESGDA
+ ADEKTAGRERTANEESSTELKDDNPNSNQETSATGNQTIKMNDDKNVTINTRETPL"
+ gene complement(<402841..>403491)
+ /gene="INA22"
+ /locus_tag="YIR024C"
+ /db_xref="GeneID:854842"
+ mRNA complement(<402841..>403491)
+ /gene="INA22"
+ /locus_tag="YIR024C"
+ /product="Ina22p"
+ /transcript_id="NM_001179546.1"
+ /db_xref="GeneID:854842"
+ CDS complement(402841..403491)
+ /gene="INA22"
+ /locus_tag="YIR024C"
+ /experiment="EXISTENCE:direct assay:GO:0005739
+ mitochondrion [PMID:16823961|PMID:14576278|PMID:14562095]"
+ /experiment="EXISTENCE:direct assay:GO:0005743
+ mitochondrial inner membrane [PMID:24942160]"
+ /experiment="EXISTENCE:direct assay:GO:1990524 INA complex
+ [PMID:24942160]"
+ /experiment="EXISTENCE:genetic interaction:GO:0033615
+ mitochondrial proton-transporting ATP synthase complex
+ assembly [PMID:24942160]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0033615
+ mitochondrial proton-transporting ATP synthase complex
+ assembly [PMID:24942160]"
+ /experiment="EXISTENCE:physical interaction:GO:1990524 INA
+ complex [PMID:24942160]"
+ /note="F1F0 ATP synthase peripheral stalk assembly factor;
+ subunit of the matrix-exposed inner mitochondrial membrane
+ localized INA complex (Ina22p-Ina17p) involved in assembly
+ of the F1F0 peripheral stalk; co-purifies with Aim43p, ATP
+ synthase subunits, and cytochrome bc1 complex assembly
+ factors; interacts with Arh1p, a mitochondrial
+ oxidoreductase; deletion mutant has a respiratory growth
+ defect"
+ /codon_start=1
+ /product="Ina22p"
+ /protein_id="NP_012290.1"
+ /db_xref="GeneID:854842"
+ /db_xref="SGD:S000001463"
+ /translation="MFMARQVLRNGLFLRSLAPIKITARTVASANAGIKRKSRFDKTM
+ IKPLLLVMIFGSILNAVIAEKRNIIDMERKYKLKLDKLKELIRRVHDNNGKVDFDADD
+ ELKLVNLRLGIVGKNATGMKEDETDIVVPKEESLEEIWQSIIDEAKKEVIEKTPDAGV
+ KNKEGIVTDLNVLKDLEKSKKEDEKVYLSGDVHMMMNQPGDLNEIAKEHDKIPKFL"
+ gene <403659..>404765
+ /gene="MND2"
+ /locus_tag="YIR025W"
+ /db_xref="GeneID:854843"
+ mRNA <403659..>404765
+ /gene="MND2"
+ /locus_tag="YIR025W"
+ /product="Mnd2p"
+ /transcript_id="NM_001179547.1"
+ /db_xref="GeneID:854843"
+ CDS 403659..404765
+ /gene="MND2"
+ /locus_tag="YIR025W"
+ /experiment="EXISTENCE:direct assay:GO:0005634 nucleus
+ [PMID:15797380]"
+ /experiment="EXISTENCE:direct assay:GO:0005680
+ anaphase-promoting complex [PMID:12609981|PMID:12574115]"
+ /experiment="EXISTENCE:direct assay:GO:0071173 spindle
+ assembly checkpoint signaling [PMID:22940250]"
+ /experiment="EXISTENCE:direct assay:GO:1902499 positive
+ regulation of protein autoubiquitination [PMID:22940250]"
+ /experiment="EXISTENCE:direct assay:GO:1990948 ubiquitin
+ ligase inhibitor activity [PMID:15797379]"
+ /experiment="EXISTENCE:genetic interaction:GO:1905785
+ negative regulation of anaphase-promoting
+ complex-dependent catabolic process [PMID:15797380]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0000070 mitotic
+ sister chromatid segregation [PMID:15797379]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0007131
+ reciprocal meiotic recombination [PMID:11470404]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0030071
+ regulation of mitotic metaphase/anaphase transition
+ [PMID:12609981]"
+ /experiment="EXISTENCE:mutant phenotype:GO:1902426
+ deactivation of mitotic spindle assembly checkpoint
+ [PMID:22940250]"
+ /experiment="EXISTENCE:mutant phenotype:GO:1902499
+ positive regulation of protein autoubiquitination
+ [PMID:22940250]"
+ /experiment="EXISTENCE:physical interaction:GO:0005680
+ anaphase-promoting complex [PMID:15797379]"
+ /note="Subunit of the Anaphase-Promoting Complex/Cyclosome
+ (APC/C); necessary for maintaining sister chromatid
+ cohesion in prophase I of meiosis by inhibiting premature
+ ubiquitination and subsequent degradation of substrates by
+ the APC(Ama1) ubiquitin ligase"
+ /codon_start=1
+ /product="Mnd2p"
+ /protein_id="NP_012291.1"
+ /db_xref="GeneID:854843"
+ /db_xref="SGD:S000001464"
+ /translation="MARALRDISLFNDIRKDQNSAGAKHERYNMRDLRSKKNQHVNGI
+ DDYEDDSLDRFIRRKKSRVVKYIPSLSAYNVFNEFPYYPTSASQLLDGKLDEFLMLSE
+ QYKSRLPKIRKLGWNRFKPIGINKTMYELEMLRSRARAQNAEGNNEEDFRQHDSREED
+ PRNNGSIGRVILPHILQENEEYDTGEGVTGLHSMPNDSMAILANNSANNSQNEEVSEE
+ DEISYDYDAEFDHVVDEDDNEEGEVPGEGVEGIEVQRERIVPDDLLMRPTSLSRSLQQ
+ FVEEAHHLDRNPYDIDSDNDGEDSKVELDMNPDFEDDVGREHDYNSEYSQEPTSYGGI
+ TPDLASNWRNWTRERITSLDELMERRARQQRGQD"
+ gene complement(<404873..>405967)
+ /gene="YVH1"
+ /locus_tag="YIR026C"
+ /db_xref="GeneID:854844"
+ mRNA complement(<404873..>405967)
+ /gene="YVH1"
+ /locus_tag="YIR026C"
+ /product="tyrosine protein phosphatase YVH1"
+ /transcript_id="NM_001179548.3"
+ /db_xref="GeneID:854844"
+ CDS complement(404873..405967)
+ /gene="YVH1"
+ /locus_tag="YIR026C"
+ /EC_number="3.1.3.48"
+ /experiment="EXISTENCE:direct assay:GO:0004725 protein
+ tyrosine phosphatase activity [PMID:1334559]"
+ /experiment="EXISTENCE:direct assay:GO:0005634 nucleus
+ [PMID:19797078|PMID:19797079]"
+ /experiment="EXISTENCE:direct assay:GO:0005737 cytoplasm
+ [PMID:14562095|PMID:19797079|PMID:19797078]"
+ /experiment="EXISTENCE:direct assay:GO:0010494 cytoplasmic
+ stress granule [PMID:26777405]"
+ /experiment="EXISTENCE:direct assay:GO:1990275 preribosome
+ binding [PMID:19797078]"
+ /experiment="EXISTENCE:genetic interaction:GO:0000027
+ ribosomal large subunit assembly [PMID:19797078]"
+ /experiment="EXISTENCE:genetic interaction:GO:0000055
+ ribosomal large subunit export from nucleus
+ [PMID:19797079]"
+ /experiment="EXISTENCE:genetic interaction:GO:0030476
+ ascospore wall assembly [PMID:10464190]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0000027
+ ribosomal large subunit assembly
+ [PMID:19797078|PMID:19114459]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0000055
+ ribosomal large subunit export from nucleus
+ [PMID:19797079]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0006914
+ autophagy [PMID:26125457]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0006995
+ cellular response to nitrogen starvation [PMID:26125457]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0030476
+ ascospore wall assembly [PMID:10464190]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0051321 meiotic
+ cell cycle [PMID:8896280]"
+ /experiment="EXISTENCE:mutant phenotype:GO:2000786
+ positive regulation of autophagosome assembly
+ [PMID:26125457]"
+ /note="Dual specificity protein phosphatase; regulates
+ growth, sporulation, and glycogen accumulation in a
+ cAMP-dependent protein kinase cascade dependent manner;
+ mutants are defective in 60S ribosome assembly; positively
+ regulates pre-autophagosomal structure (PAS) formation
+ upon nitrogen starvation or rapamycin treatment"
+ /codon_start=1
+ /product="tyrosine protein phosphatase YVH1"
+ /protein_id="NP_012292.3"
+ /db_xref="GeneID:854844"
+ /db_xref="SGD:S000001465"
+ /translation="MAGNANSVDEEVTRILGGIYLGGIRPIIDHRPLGAEFNITHILS
+ VIKFQVIPEYLIRKGYTLKNIPIDDDDVTDVLQYFDETNRFIDQCLFPNEVEYSPRLV
+ DFKKKPQRGAVFAHCQAGLSRSVTFIVAYLMYRYGLSLSMAMHAVKRKKPSVEPNENF
+ MEQLHLFEKMGGDFVDFDNPAYKQWKLKQSIKLDPSGSELVSNSGMFKDSESSQDLDK
+ LTEAEKSKVTAVRCKKCRTKLALSTSFIAHDPPSKESSEGHFIKRAANSHRIIDIQES
+ QANCSHFFIEPLKWMQPELQGKQELEGKFSCPGCSSKVGGYNWKGSRCSCGKWVIPAI
+ HLQTSKVDQFPLQSTALPNMVNFESEKVNR"
+ gene complement(<406260..>407642)
+ /gene="DAL1"
+ /locus_tag="YIR027C"
+ /db_xref="GeneID:854845"
+ mRNA complement(<406260..>407642)
+ /gene="DAL1"
+ /locus_tag="YIR027C"
+ /product="allantoinase"
+ /transcript_id="NM_001179549.3"
+ /db_xref="GeneID:854845"
+ CDS complement(406260..407642)
+ /gene="DAL1"
+ /locus_tag="YIR027C"
+ /EC_number="3.5.2.5"
+ /experiment="EXISTENCE:mutant phenotype:GO:0004038
+ allantoinase activity [PMID:4604238|PMID:1803816]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0009442
+ allantoin assimilation pathway
+ [PMID:1803816|PMID:4604238]"
+ /note="Allantoinase; converts allantoin to allantoate in
+ the first step of allantoin degradation; expression
+ sensitive to nitrogen catabolite repression"
+ /codon_start=1
+ /product="allantoinase"
+ /protein_id="NP_012293.3"
+ /db_xref="GeneID:854845"
+ /db_xref="SGD:S000001466"
+ /translation="MPINAITSDHVIINGANKPATIVYSTESGTILDVLEGSVVMEKT
+ EITKYEIHTLENVSPCTILPGLVDSHVHLNEPGRTSWEGFETGTQAAISGGVTTVVDM
+ PLNAIPPTTNVENFRIKLEAAEGQMWCDVGFWGGLVPHNLPDLIPLVKAGVRGFKGFL
+ LDSGVEEFPPIGKEYIEEALKVLAEEDTMMMFHAELPKAHEDQQQPEQSHREYSSFLS
+ SRPDSFEIDAINLILECLRARNGPVPPVHIVHLASMKAIPLIRKARASGLPVTTETCF
+ HYLCIAAEQIPDGATYFKCCPPIRSESNRQGLWDALREGVIGSVVSDHSPCTPELKNL
+ QKGDFFDSWGGIASVGLGLPLMFTQGCSLVDIVTWCCKNTSHQVGLSHQKGTIAPGYD
+ ADLVVFDTASKHKISNSSVYFKNKLTAYNGMTVKGTVLKTILRGQVVYTNANGVSKTP
+ LGQTLLDSRR"
+ gene <408468..>410375
+ /gene="DAL4"
+ /locus_tag="YIR028W"
+ /db_xref="GeneID:854846"
+ mRNA <408468..>410375
+ /gene="DAL4"
+ /locus_tag="YIR028W"
+ /product="allantoin permease"
+ /transcript_id="NM_001179550.3"
+ /db_xref="GeneID:854846"
+ CDS 408468..410375
+ /gene="DAL4"
+ /locus_tag="YIR028W"
+ /experiment="EXISTENCE:direct assay:GO:0000324 fungal-type
+ vacuole [PMID:26928762]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0000256
+ allantoin catabolic process [PMID:385448]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0005274
+ allantoin:proton symporter activity [PMID:385448]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0015720
+ allantoin transport [PMID:385448|PMID:3549700]"
+ /note="Allantoin permease; expression sensitive to
+ nitrogen catabolite repression and induced by allophanate,
+ an intermediate in allantoin degradation"
+ /codon_start=1
+ /product="allantoin permease"
+ /protein_id="NP_012294.3"
+ /db_xref="GeneID:854846"
+ /db_xref="SGD:S000001467"
+ /translation="MANDALSAIFSNPSRKGVQPSTSIVSYTNNEDDIIDVENGKFNK
+ NKNINTNVYVDNSSIEESEVVPLPETKSIWSKIYYDFIVLDKTTLNVSLKESFLYNRD
+ LKPVEEERRCWSWFNYLYFWLADCFNINTWQIAGTGLQLGLNWWQCWLTVWIGYTFAG
+ IFVVLNSRFGSAYHLSFPITVRASFGIFFSMWPIINRVVMAIVWYAVQAWLGATPVAL
+ MLKSIFGKNLEDRIPNHFGSPNSTTFEFMCFFIFWVVSIPFVLVAPHKIRHLFTVKAA
+ LIPFAAFGFLIWALKKSHGKIELGTLNDYSPHGSEFSWIFVRSLMACVANFAALIINA
+ PDFGRFAKNPQASLWPQLVAIPLFFAITCLIGIIVTAAGYHLYGVNYWSPLDVLGQFL
+ ETTYTRGTRAGVFLISFVFALAQLGTNISANSLACGADMTALFPRYINIRRGSLFCVA
+ MALCICPWNLMASSSKFTSALGAYAIFLSSIAGVICADYFVVRRGYVKLTHLFLAQKG
+ SFYMFGNKFGANWRAFVAYICGIAPNLPGFIGDVGAPKITVSEGAMRLYYLGYPVGFF
+ ISAVIYLILCYFFPVPGTPVTNFLTEKGWFQRWAYVEDFEQDWKNELRRDDLCDDTVS
+ IYDGTEEKIVY"
+ gene <410807..>411838
+ /gene="DAL2"
+ /locus_tag="YIR029W"
+ /gene_synonym="ALC1"
+ /db_xref="GeneID:854847"
+ mRNA <410807..>411838
+ /gene="DAL2"
+ /locus_tag="YIR029W"
+ /gene_synonym="ALC1"
+ /product="allantoicase"
+ /transcript_id="NM_001179551.1"
+ /db_xref="GeneID:854847"
+ CDS 410807..411838
+ /gene="DAL2"
+ /locus_tag="YIR029W"
+ /gene_synonym="ALC1"
+ /EC_number="3.5.3.4"
+ /experiment="EXISTENCE:direct assay:GO:0004037
+ allantoicase activity [PMID:3915539]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0000256
+ allantoin catabolic process [PMID:1916277]"
+ /note="Allantoicase; converts allantoate to urea and
+ ureidoglycolate in the second step of allantoin
+ degradation; expression sensitive to nitrogen catabolite
+ repression and induced by allophanate, an intermediate in
+ allantoin degradation"
+ /codon_start=1
+ /product="allantoicase"
+ /protein_id="NP_012295.1"
+ /db_xref="GeneID:854847"
+ /db_xref="SGD:S000001468"
+ /translation="MKFFSLADEAEFKSIIISKNKAVDVIGSKLGGQVVSFSDEWFAS
+ AENLIQPTAPIRDPTRFVHSGAWYDGWETRRHNEMEYDWVIIKMGVAAAHIIGGEIDT
+ AFFNGNHAPFVSIEALYDEGEEGNIVEDDSRWVEIVEKFECGPSQRHLFVRGNGLTKE
+ RFTHIKLKMYPDGGIARFRLYGRVVPPELKTKDHIIDLAYVCNGAVALKYSDQHFGSV
+ DNLLLPGRGHDMSDGWETKRSRQPGHTDWAVIQLGRESSFIEKIIVDTAHFRGNFPQF
+ ITVEGCLKESESSENTGEGTWVELVGKSKTGPDKEHVYEIRKSIRVSHVKLTIIPDGG
+ VKRIRVWGY"
+ rep_origin 411935..412011
+ /note="ARS922; Autonomously Replicating Sequence"
+ /db_xref="SGD:S000118400"
+ gene complement(<412036..>412770)
+ /gene="DCG1"
+ /locus_tag="YIR030C"
+ /db_xref="GeneID:854848"
+ mRNA complement(<412036..>412770)
+ /gene="DCG1"
+ /locus_tag="YIR030C"
+ /product="Dcg1p"
+ /transcript_id="NM_001179552.1"
+ /db_xref="GeneID:854848"
+ CDS complement(412036..412770)
+ /gene="DCG1"
+ /locus_tag="YIR030C"
+ /experiment="EXISTENCE:expression pattern:GO:0008152
+ metabolic process [PMID:1916277]"
+ /note="hypothetical protein; expression is sensitive to
+ nitrogen catabolite repression and regulated by Dal80p;
+ contains transmembrane domain"
+ /codon_start=1
+ /product="Dcg1p"
+ /protein_id="NP_012296.1"
+ /db_xref="GeneID:854848"
+ /db_xref="SGD:S000001469"
+ /translation="METRILVVNPNSSKSMTVSLRETIEKTFSMESCKISYFTGPDTS
+ PPQIDGQETSIKSMEACLPLLIDDQESVYYFQKFNGILIACFSDHPLVAKIKDRAAKE
+ KADVSIVGLLDSSINYCNLVGKKFSIITSNKEWIPILNNSVESKFLTGNTVNKNLWKG
+ TVSTDLQVLDLHSPENFQQIAEIIYRENIKKLDSDIVILGCAGFSGLQNKLAKTFQRD
+ GTLFLDTIEIGLQILITMIRFVNSQK"
+ gene complement(<413015..>414679)
+ /gene="DAL7"
+ /locus_tag="YIR031C"
+ /gene_synonym="MLS2; MSL2"
+ /db_xref="GeneID:854849"
+ mRNA complement(<413015..>414679)
+ /gene="DAL7"
+ /locus_tag="YIR031C"
+ /gene_synonym="MLS2; MSL2"
+ /product="malate synthase DAL7"
+ /transcript_id="NM_001179553.3"
+ /db_xref="GeneID:854849"
+ CDS complement(413015..414679)
+ /gene="DAL7"
+ /locus_tag="YIR031C"
+ /gene_synonym="MLS2; MSL2"
+ /EC_number="2.3.3.9"
+ /experiment="EXISTENCE:direct assay:GO:0000256 allantoin
+ catabolic process [PMID:8462696]"
+ /experiment="EXISTENCE:direct assay:GO:0004474 malate
+ synthase activity [PMID:8462696]"
+ /experiment="EXISTENCE:direct assay:GO:0005777 peroxisome
+ [PMID:27678487]"
+ /experiment="EXISTENCE:direct assay:GO:0005829 cytosol
+ [PMID:26928762]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0004474 malate
+ synthase activity [PMID:23642236]"
+ /note="Malate synthase; can accept butyryl-CoA as acyl-CoA
+ donor in addition to traditional substrate acetyl-CoA;
+ recycles glyoxylate generated during allantoin
+ degradation; SWAT-GFP and mCherry fusion proteins localize
+ to the cytosol; expression sensitive to nitrogen
+ catabolite repression and induced by allophanate, an
+ intermediate in allantoin degradation"
+ /codon_start=1
+ /product="malate synthase DAL7"
+ /protein_id="NP_012297.3"
+ /db_xref="GeneID:854849"
+ /db_xref="SGD:S000001470"
+ /translation="MVKISLDNTALYADIDTTPQFEPSKTTVADILTKDALEFIVLLH
+ RTFNSTRKQLLANRSNLQSKLDSGEYRFDFLPETEQIRNDPTWQGAIPAPGLINRSSE
+ ITGPPLRNMLVNALNAEVTTYMTDFEDSSSPTWENMIYGQVNLYDAIRNQIDFKTPRK
+ EYRLKDDISRLPTLIVRPRGWHMVEKHLYIDDEPISASIFDFGLYFYHNAKELVKIGK
+ GPYFYLPKMEHHMEVKLWNDIFCVAQDFIGMPRGTIRATVLIETLPAAFQMEEIIYQI
+ REHSSGLNCGRWDYIFSTIKKLRNLPEHVLPNRDLVTMTSPFMDAYVKRLINTCHRRG
+ VHAMGGMAAQIPIKDDPKANEAAMNKVRNDKIREMKNGHDGSWVAHPALAPICNEVFS
+ NMGTANQIYFVPDVHVTSSDLLNTKIQDAQVTTEGIRVNLDIGLQYMEAWLRGSGCVP
+ INHLMEDAATAEVSRCQLYQWVKHGVVLSDTGDKVTPELTAKILNEETAKLASASPLG
+ EKNKFALAAKYFLPEVTGKIFSDFLTTLLYDEIIKPSAKPVDLSKL"
+ gene complement(<415030..>415617)
+ /gene="DAL3"
+ /locus_tag="YIR032C"
+ /db_xref="GeneID:854850"
+ mRNA complement(<415030..>415617)
+ /gene="DAL3"
+ /locus_tag="YIR032C"
+ /product="ureidoglycolate hydrolase"
+ /transcript_id="NM_001179554.1"
+ /db_xref="GeneID:854850"
+ CDS complement(415030..415617)
+ /gene="DAL3"
+ /locus_tag="YIR032C"
+ /EC_number="4.3.2.3"
+ /experiment="EXISTENCE:curator inference:GO:0000256
+ allantoin catabolic process [PMID:3915539]"
+ /experiment="EXISTENCE:direct assay:GO:0050385
+ ureidoglycolate lyase activity [PMID:5965980]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0050385
+ ureidoglycolate lyase activity [PMID:3915539]"
+ /note="Ureidoglycolate lyase; converts ureidoglycolate to
+ glyoxylate and urea in the third step of allantoin
+ degradation; expression is sensitive to nitrogen
+ catabolite repression; this enzyme is sometimes referred
+ to 'ureidoglycolate hydrolase' but should not be confused
+ with the Arabidopsis thaliana ureidoglycolate hydrolase
+ enzyme which converts ureidoglycolate to glyoxylate,
+ ammonia and carbon dioxide"
+ /codon_start=1
+ /product="ureidoglycolate hydrolase"
+ /protein_id="NP_012298.1"
+ /db_xref="GeneID:854850"
+ /db_xref="SGD:S000001471"
+ /translation="MVTVVAETLTKESFEEYGTIISPDEEISRMQNLEKGANQGTAIK
+ LLQVSQVENKSTSKVPNWNLFRCFPQPHLNRVFTQGSNQAISHSIKVLEKHPCSTQTF
+ VPMGRTSAEVAYLVVVAKEIGNKPDLSTLRAFTCLGNQAVTYGLGTWHAPMIVLGKEE
+ HLDFSVLIYESLDPDRPEKDCVEEHYSDGDVCIII"
+ gene <416124..>419465
+ /gene="MGA2"
+ /locus_tag="YIR033W"
+ /db_xref="GeneID:854851"
+ mRNA <416124..>419465
+ /gene="MGA2"
+ /locus_tag="YIR033W"
+ /product="Mga2p"
+ /transcript_id="NM_001179555.1"
+ /db_xref="GeneID:854851"
+ CDS 416124..419465
+ /gene="MGA2"
+ /locus_tag="YIR033W"
+ /experiment="EXISTENCE:direct assay:GO:0005634 nucleus
+ [PMID:19061897]"
+ /experiment="EXISTENCE:direct assay:GO:0005783 endoplasmic
+ reticulum [PMID:26928762]"
+ /experiment="EXISTENCE:direct assay:GO:0005789 endoplasmic
+ reticulum membrane [PMID:11007476]"
+ /experiment="EXISTENCE:genetic interaction:GO:0030466
+ silent mating-type cassette heterochromatin formation
+ [PMID:11063674]"
+ /experiment="EXISTENCE:genetic interaction:GO:0045944
+ positive regulation of transcription by RNA polymerase II
+ [PMID:9927444]"
+ /experiment="EXISTENCE:genetic interaction:GO:2001280
+ positive regulation of unsaturated fatty acid biosynthetic
+ process [PMID:9927444]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0010106
+ cellular response to iron ion starvation [PMID:29627385]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0045944
+ positive regulation of transcription by RNA polymerase II
+ [PMID:11557770|PMID:29627385|PMID:11509659]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0048255 mRNA
+ stabilization [PMID:15220333]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0071279
+ cellular response to cobalt ion [PMID:11509659]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0071456
+ cellular response to hypoxia [PMID:11509659]"
+ /note="ER membrane protein involved in regulation of OLE1
+ transcription; inactive ER form dimerizes and one subunit
+ is then activated by ubiquitin/proteasome-dependent
+ processing followed by nuclear targeting; MGA2 has a
+ paralog, SPT23, that arose from the whole genome
+ duplication"
+ /codon_start=1
+ /product="Mga2p"
+ /protein_id="NP_012299.1"
+ /db_xref="GeneID:854851"
+ /db_xref="SGD:S000001472"
+ /translation="MQQNSEFLTETPGSDPHISQLHANSVMESQLLDDFLLNGSPMYQ
+ DDSMAHINIDEGANFQNFIKTDEGDSPNLLSFEGIGNNTHVNQNVSTPLEEEMESNRA
+ LKEEEEDEHENKVFNEKNIGNPAHDEIVFGRKETIQSVYINPLDYLKVNAAQLPLDVE
+ VSGLPQVSRVENQLKLKVKITSETPLNQSMLYLPSDSISREKFYLKKNIEDFSEDFKK
+ NLLYINAFVLCAVSNRTTNVCTKCVKREQRRAARRKSGIADNLLWCNNINRRLVVFNN
+ KQVFPIMKTFDNVKEFELTTRLVCYCRHHKANNGFVILFTITDWQNRLLGKFTTTPIM
+ ITDRKPANMDTTKFNNTTTSSRRQLTEEESTTEYYSTDNNQLSKDENMPFQYTYQHNP
+ YDNDSQMNNIPLKDKNVPFPYSISQQTDLLQNNNLSLNLSLPNQHIPSPTSMSEEGSE
+ SFNYHHRDNDNPVRTISLTNIEQQSQLNQRKRARNNLENDIGKPLFKHSFSNSISATN
+ TMNPALHSMQDFSMKNNNNNLPSINRVIPSQGPINGGIEVTLLGCNFKDGLSVKFGSN
+ LALSTQCWSETTIVTYLPPAAYAGQVFVSITDTNNENNNDDLPQEIEINDNKKAIFTY
+ VDDTDRQLIELALQIVGLKMNGKLEDARNIAKRIVGNDSPDSGTNGNSCSKSTGPSPN
+ QHSMNLNTSVLYSDEVLIQKVIKSLNINSNISICDSLGRTLLHLACLKNYSSLVYTLI
+ KKGARVNDIDSFGLTPLHFACISGDPKIIKMLLNCKVNYSLRSHNGLTAREVFIANHI
+ HSKEIDKKQDNRDNHKFVHNDTYISEVLSLFEEFQNGTKFTDSVETDSNYSISRKYSQ
+ SSFNSSLLDNESLNENLFESQSMINPTSMEIQHPTLQLFENSSYSEYDQSDFEEDGDE
+ DLFVTDEVEKPGVACREEQSELLDIGSSANEPEEDNGSTSLWNRVLHRINDDLPKYED
+ LFPLSWGKDDKLKTTNQDSIVEQSASNIENSENSEEEDYEEEEEFLKKQFNRFFQNKQ
+ NFRNDKMLIFFWIPLTLLLLTWFIMYKFGNQDSSINHISELISEYLRIALAKFLLGNE
+ RMKTAFRSKLSNLQTTRMLNDLIVS"
+ gene complement(<419615..>420736)
+ /gene="LYS1"
+ /locus_tag="YIR034C"
+ /db_xref="GeneID:854852"
+ mRNA complement(<419615..>420736)
+ /gene="LYS1"
+ /locus_tag="YIR034C"
+ /product="saccharopine dehydrogenase (NAD+,
+ L-lysine-forming)"
+ /transcript_id="NM_001179556.3"
+ /db_xref="GeneID:854852"
+ CDS complement(419615..420736)
+ /gene="LYS1"
+ /locus_tag="YIR034C"
+ /EC_number="1.5.1.7"
+ /experiment="EXISTENCE:direct assay:GO:0003729 mRNA
+ binding [PMID:20844764]"
+ /experiment="EXISTENCE:direct assay:GO:0004754
+ saccharopine dehydrogenase (NAD+, L-lysine-forming)
+ activity [PMID:17002315]"
+ /experiment="EXISTENCE:direct assay:GO:0005737 cytoplasm
+ [PMID:11914276]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0009085 lysine
+ biosynthetic process [PMID:17247984]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0016558 protein
+ import into peroxisome matrix [PMID:35563734]"
+ /note="Saccharopine dehydrogenase (NAD+,
+ L-lysine-forming); catalyzes the conversion of
+ saccharopine to L-lysine, which is the final step in the
+ lysine biosynthesis pathway; contains mRNA binding
+ activity; required for targeting of Pls1p to the
+ peroxisomal matrix in the absence of lysine"
+ /codon_start=1
+ /product="saccharopine dehydrogenase (NAD+,
+ L-lysine-forming)"
+ /protein_id="NP_012300.3"
+ /db_xref="GeneID:854852"
+ /db_xref="SGD:S000001473"
+ /translation="MAAVTLHLRAETKPLEARAALTPTTVKKLIAKGFKIYVEDSPQS
+ TFNINEYRQAGAIIVPAGSWKTAPRDRIIIGLKEMPETDTFPLVHEHIQFAHCYKDQA
+ GWQNVLMRFIKGHGTLYDLEFLENDQGRRVAAFGFYAGFAGAALGVRDWAFKQTHSDD
+ EDLPAVSPYPNEKALVKDVTKDYKEALATGARKPTVLIIGALGRCGSGAIDLLHKVGI
+ PDANILKWDIKETSRGGPFDEIPQADIFINCIYLSKPIAPFTNMEKLNNPNRRLRTVV
+ DVSADTTNPHNPIPIYTVATVFNKPTVLVPTTAGPKLSVISIDHLPSLLPREASEFFS
+ HDLLPSLELLPQRKTAPVWVRAKKLFDRHCARVKRSSRL"
+ gene complement(<421026..>421790)
+ /gene="NRE1"
+ /locus_tag="YIR035C"
+ /db_xref="GeneID:854853"
+ mRNA complement(<421026..>421790)
+ /gene="NRE1"
+ /locus_tag="YIR035C"
+ /product="sepiapterin reductase family protein"
+ /transcript_id="NM_001179557.3"
+ /db_xref="GeneID:854853"
+ CDS complement(421026..421790)
+ /gene="NRE1"
+ /locus_tag="YIR035C"
+ /experiment="EXISTENCE:direct assay:GO:0005737 cytoplasm
+ [PMID:11914276|PMID:14562095]"
+ /experiment="EXISTENCE:direct assay:GO:0102306 benzil
+ reductase [(S)-benzoin-forming] activity [PMID:37602278]"
+ /note="Benzil oxidoreductase with preference for NADPH;
+ localizes to the cytoplasm; sequence similarity with
+ short-chain dehydrogenase/reductases; putative paralog of
+ IRC24"
+ /codon_start=1
+ /product="sepiapterin reductase family protein"
+ /protein_id="NP_012301.3"
+ /db_xref="GeneID:854853"
+ /db_xref="SGD:S000001474"
+ /translation="MGKVILVTGVSRGIGKSIVDVLFSLDKDTVVYGVARSEAPLKKL
+ KEKYGDRFFYVVGDITEDSVLKQLVNAAVKGHGKIDSLVANAGVLEPVQNVNEIDVNA
+ WKKLYDINFFSIVSLVGIALPELKKTNGNVVFVSSDACNMYFSSWGAYGSSKAALNHF
+ AMTLANEERQVKAIAVAPGIVDTDMQVNIRENVGPSSMSAEQLKMFRGLKENNQLLDS
+ SVPATVYAKLALHGIPDGVNGQYLSYNDPALADFMP"
+ gene complement(<422074..>422865)
+ /gene="IRC24"
+ /locus_tag="YIR036C"
+ /db_xref="GeneID:854854"
+ mRNA complement(<422074..>422865)
+ /gene="IRC24"
+ /locus_tag="YIR036C"
+ /product="sepiapterin reductase family protein IRC24"
+ /transcript_id="NM_001179558.3"
+ /db_xref="GeneID:854854"
+ CDS complement(422074..422865)
+ /gene="IRC24"
+ /locus_tag="YIR036C"
+ /EC_number="1.1.1.320"
+ /experiment="EXISTENCE:direct assay:GO:0005737 cytoplasm
+ [PMID:14562095]"
+ /experiment="EXISTENCE:direct assay:GO:0050664
+ oxidoreductase activity, acting on NAD(P)H, oxygen as
+ acceptor [PMID:11796169]"
+ /experiment="EXISTENCE:direct assay:GO:0102306 benzil
+ reductase [(S)-benzoin-forming] activity [PMID:37602278]"
+ /note="Benzil oxidoreductase with preference for NADPH;
+ localizes to the cytoplasm; induced by the DNA-damaging
+ agent MMS; null mutant has elevated levels of spontaneous
+ Rad52p foci; sequence similarity with short-chain
+ dehydrogenase/reductases; putative paralog of IRC24"
+ /codon_start=1
+ /product="sepiapterin reductase family protein IRC24"
+ /protein_id="NP_012302.3"
+ /db_xref="GeneID:854854"
+ /db_xref="SGD:S000001475"
+ /translation="MGKVILITGASRGIGLQLVKTVIEEDDECIVYGVARTEAGLQSL
+ QREYGADKFVYRVLDITDRSRMEALVEEIRQKHGKLDGIVANAGMLEPVKSISQSNSE
+ HDIKQWERLFDVNFFSIVSLVALCLPLLKSSPFVGNIVFVSSGASVKPYNGWSAYGCS
+ KAALNHFAMDIASEEPSDKVRAVCIAPGVVDTQMQKDIRETLGPQGMTPKALERFTQL
+ YKTSSLLDPKVPAAVLAQLVLKGIPDSLNGQYLRYNDERLGPVQG"
+ gene <423128..>423619
+ /gene="HYR1"
+ /locus_tag="YIR037W"
+ /gene_synonym="GPX3; ORP1"
+ /db_xref="GeneID:854855"
+ mRNA <423128..>423619
+ /gene="HYR1"
+ /locus_tag="YIR037W"
+ /gene_synonym="GPX3; ORP1"
+ /product="peroxiredoxin HYR1"
+ /transcript_id="NM_001179559.1"
+ /db_xref="GeneID:854855"
+ CDS 423128..423619
+ /gene="HYR1"
+ /locus_tag="YIR037W"
+ /gene_synonym="GPX3; ORP1"
+ /EC_number="1.11.1.24"
+ /experiment="EXISTENCE:direct assay:GO:0004602 glutathione
+ peroxidase activity [PMID:19755417]"
+ /experiment="EXISTENCE:direct assay:GO:0005739
+ mitochondrion [PMID:24769239]"
+ /experiment="EXISTENCE:direct assay:GO:0005758
+ mitochondrial intermembrane space [PMID:22984289]"
+ /experiment="EXISTENCE:direct assay:GO:0005782 peroxisomal
+ matrix [PMID:22659048]"
+ /experiment="EXISTENCE:direct assay:GO:0005829 cytosol
+ [PMID:26928762]"
+ /experiment="EXISTENCE:direct assay:GO:0047066
+ phospholipid-hydroperoxide glutathione peroxidase activity
+ [PMID:11445588]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0004602
+ glutathione peroxidase activity [PMID:10480913]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0034599
+ cellular response to oxidative stress [PMID:11445588]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0047066
+ phospholipid-hydroperoxide glutathione peroxidase activity
+ [PMID:11445588]"
+ /note="Glutathione peroxidase; functions as hydroperoxide
+ receptor to sense intracellular hydroperoxide levels and
+ transduce redox signal to Yap1p transcription factor;
+ glutathione peroxidase activity is neuroprotective in
+ models of Huntington's disease; HYR1 has a paralog, GPX1,
+ that arose from the whole genome duplication"
+ /codon_start=1
+ /product="peroxiredoxin HYR1"
+ /protein_id="NP_012303.1"
+ /db_xref="GeneID:854855"
+ /db_xref="SGD:S000001476"
+ /translation="MSEFYKLAPVDKKGQPFPFDQLKGKVVLIVNVASKCGFTPQYKE
+ LEALYKRYKDEGFTIIGFPCNQFGHQEPGSDEEIAQFCQLNYGVTFPIMKKIDVNGGN
+ EDPVYKFLKSQKSGMLGLRGIKWNFEKFLVDKKGKVYERYSSLTKPSSLSETIEELLK
+ EVE"
+ gene complement(<423809..>424513)
+ /gene="GTT1"
+ /locus_tag="YIR038C"
+ /db_xref="GeneID:854856"
+ mRNA complement(<423809..>424513)
+ /gene="GTT1"
+ /locus_tag="YIR038C"
+ /product="bifunctional glutathione transferase/peroxidase"
+ /transcript_id="NM_001179560.1"
+ /db_xref="GeneID:854856"
+ CDS complement(423809..424513)
+ /gene="GTT1"
+ /locus_tag="YIR038C"
+ /EC_number="2.5.1.18"
+ /experiment="EXISTENCE:direct assay:GO:0004364 glutathione
+ transferase activity [PMID:9792709]"
+ /experiment="EXISTENCE:direct assay:GO:0004602 glutathione
+ peroxidase activity [PMID:16709151]"
+ /experiment="EXISTENCE:direct assay:GO:0005739
+ mitochondrion [PMID:24769239|PMID:14576278|PMID:16823961]"
+ /experiment="EXISTENCE:direct assay:GO:0005741
+ mitochondrial outer membrane [PMID:16407407]"
+ /experiment="EXISTENCE:direct assay:GO:0005783 endoplasmic
+ reticulum [PMID:26928762|PMID:9792709]"
+ /experiment="EXISTENCE:direct assay:GO:0005886 plasma
+ membrane [PMID:16622836]"
+ /experiment="EXISTENCE:direct assay:GO:0006749 glutathione
+ metabolic process [PMID:9792709]"
+ /experiment="EXISTENCE:direct assay:GO:0071944 cell
+ periphery [PMID:26928762]"
+ /experiment="EXISTENCE:mutant phenotype:GO:0010731 protein
+ glutathionylation [PMID:27708136]"
+ /note="ER associated glutathione S-transferase; capable of
+ homodimerization; glutathione transferase for Yvc1p
+ vacuolar cation channel; expression induced during the
+ diauxic shift and throughout stationary phase; functional
+ overlap with Gtt2p, Grx1p, and Grx2p"
+ /codon_start=1
+ /product="bifunctional glutathione transferase/peroxidase"
+ /protein_id="NP_012304.1"
+ /db_xref="GeneID:854856"
+ /db_xref="SGD:S000001477"
+ /translation="MSLPIIKVHWLDHSRAFRLLWLLDHLNLEYEIVPYKRDANFRAP
+ PELKKIHPLGRSPLLEVQDRETGKKKILAESGFIFQYVLQHFDHSHVLMSEDADIADQ
+ INYYLFYVEGSLQPPLMIEFILSKVKDSGMPFPISYLARKVADKISQAYSSGEVKNQF
+ DFVEGEISKNNGYLVDGKLSGADILMSFPLQMAFERKFAAPEDYPAISKWLKTITSEE
+ SYAASKEKARALGSNF"
+ repeat_region complement(426201..426425)
+ /note="Ty1 LTR"
+ /rpt_type=long_terminal_repeat
+ /db_xref="SGD:S000007021"
+ gene complement(<430498..>432111)
+ /gene="YPS6"
+ /locus_tag="YIR039C"
+ /db_xref="GeneID:854857"
+ mRNA complement(<430498..>432111)
+ /gene="YPS6"
+ /locus_tag="YIR039C"
+ /product="aspartyl protease"
+ /transcript_id="NM_001179561.3"
+ /db_xref="GeneID:854857"
+ CDS complement(430498..432111)
+ /gene="YPS6"
+ /locus_tag="YIR039C"
+ /experiment="EXISTENCE:direct assay:GO:0009277 fungal-type
+ cell wall [PMID:10383953]"
+ /experiment="EXISTENCE:genetic interaction:GO:0031505
+ fungal-type cell wall organization [PMID:16087741]"
+ /note="Putative GPI-anchored aspartic protease; member of
+ the yapsin family of proteases involved in cell wall
+ growth and maintenance"
+ /codon_start=1
+ /product="aspartyl protease"
+ /protein_id="NP_012305.3"
+ /db_xref="GeneID:854857"
+ /db_xref="SGD:S000001478"
+ /translation="MQLISILSLLSSLMCSLTVLGSSASSYVKFPVQKLADIINICTQ
+ DVSTVFKRNEVLNTTVINGIGVYVVKMEIGTPPQTLYLQLDTGSSDMIVNNADIAYCK
+ SMSDGSDYASTDNYELTATFNGLPSTTISSEAYNTLCSYWGTFDASNSSTFENNATFF
+ NNTYGDGTYYAGTYGTDVVSFENITLNDFTFGVSNDTIGNPSGILGISLPIAEFTDGI
+ EYALALNRTPFIYDNFPMELKNQGKINKIAYSLFLNGPDAHFGSILFGAVDKSKYTGQ
+ LYTLPMLQAFNTLGSNPGMIITAQSVAILDSESGNKTVSDIQFPVMLDSGTTFSYLPT
+ EIAEAIGKSFDGEYSSDDQGYIFDCSKVNDTLLSVDFGGFNISANISNFVTSAKDRCV
+ LNVKQSESTYMLGDAFLVDAYVVYDLENYEISIAQASFNNQEEDIEVISDTVPGATPA
+ PGYFSTWVYKPGSPIGTGDFINVSWTSYSEFSQYKSLLATAAQSDDASSFSSSGGSSE
+ STTKKQNAGYKYRSSFSFSLLSFISYFLL"
+ gene <433929..>434303
+ /gene="PAU15"
+ /locus_tag="YIR041W"
+ /db_xref="GeneID:854859"
+ mRNA <433929..>434303
+ /gene="PAU15"
+ /locus_tag="YIR041W"
+ /product="seripauperin PAU15"
+ /transcript_id="NM_001179563.1"
+ /db_xref="GeneID:854859"
+ CDS 433929..434303
+ /gene="PAU15"
+ /locus_tag="YIR041W"
+ /experiment="EXISTENCE:direct assay:GO:0000324 fungal-type
+ vacuole [PMID:26928762]"
+ /note="hypothetical protein; member of the seripauperin
+ multigene family encoded mainly in subtelomeric regions;
+ SWAT-GFP and mCherry fusion proteins localize to the
+ vacuole"
+ /codon_start=1
+ /product="seripauperin PAU15"
+ /protein_id="NP_012307.1"
+ /db_xref="GeneID:854859"
+ /db_xref="SGD:S000001480"
+ /translation="MVKLTSIAAGVAAIAAGVAAAPATTTLSPSDERVNLVELGVYVS
+ DIRAHLAQYYLFQAAHPSETYPVEIAEAVFNYGDFTTMLTGIPAEQVTRVITGVPWYS
+ TRLRPAISSALSKDGIYTAIPK"
+ gene complement(<435273..>435983)
+ /locus_tag="YIR042C"
+ /db_xref="GeneID:854860"
+ mRNA complement(<435273..>435983)
+ /locus_tag="YIR042C"
+ /product="uncharacterized protein"
+ /transcript_id="NM_001179564.1"
+ /db_xref="GeneID:854860"
+ CDS complement(435273..435983)
+ /locus_tag="YIR042C"
+ /note="hypothetical protein; YIR042C is a non-essential
+ gene"
+ /codon_start=1
+ /product="uncharacterized protein"
+ /protein_id="NP_012308.1"
+ /db_xref="GeneID:854860"
+ /db_xref="SGD:S000001481"
+ /translation="MANLNIFGQEVGADVEGWTTRAFPEKVVLKGNTCRLEPLDRERH
+ GSELFSAYSEAGQKLWTYLPAGPFTNLEEYLEFIKELNETKDTVPFAIINKETERAVG
+ TLCLIRIDEANGSLEVGYVVFSPELQKTIIATEAQFLLMKYVFDDLQYRRYEWKCDSL
+ NGPSRRAAMRLGFKYEGTFRQVVVYKGRTRDTQWFSIIDKEWLRIRKTFEEWLDKTNF
+ ENGKQKRGIAAIRESLSN"
+ gene complement(437043..438179)
+ /locus_tag="YIR043C"
+ /gene_synonym="YIR044C"
+ /pseudo
+ /db_xref="GeneID:854861"
+ CDS complement(437043..438179)
+ /locus_tag="YIR043C"
+ /gene_synonym="YIR044C"
+ /note="Blocked reading frame; may encode a non-functional
+ member of the conserved, often subtelomerically-encoded
+ Cos protein family"
+ /pseudo
+ /codon_start=1
+ /db_xref="GeneID:854861"
+ /db_xref="SGD:S000001482"
+ telomere 439068..439888
+ /note="TEL09R; Telomeric region on the right arm of
+ Chromosome IX; composed of an X element core sequence, X
+ element combinatorial repeats, and a short terminal
+ stretch of telomeric repeats"
+ /db_xref="SGD:S000028970"
+CONTIG join(BK006942.2:1..439888)
+//
+
diff --git a/io/genbank/data/U49845.1.gb b/io/genbank/data/U49845.1.gb
new file mode 100644
index 000000000..7f05a703c
--- /dev/null
+++ b/io/genbank/data/U49845.1.gb
@@ -0,0 +1,164 @@
+LOCUS SCU49845 5028 bp DNA linear PLN 29-OCT-2018
+DEFINITION Saccharomyces cerevisiae TCP1-beta gene, partial cds; and Axl2p
+ (AXL2) and Rev7p (REV7) genes, complete cds.
+ACCESSION U49845
+VERSION U49845.1
+KEYWORDS .
+SOURCE Saccharomyces cerevisiae (brewer's yeast)
+ ORGANISM Saccharomyces cerevisiae
+ Eukaryota; Fungi; Dikarya; Ascomycota; Saccharomycotina;
+ Saccharomycetes; Saccharomycetales; Saccharomycetaceae;
+ Saccharomyces.
+REFERENCE 1 (bases 1 to 5028)
+ AUTHORS Roemer,T., Madden,K., Chang,J. and Snyder,M.
+ TITLE Selection of axial growth sites in yeast requires Axl2p, a novel
+ plasma membrane glycoprotein
+ JOURNAL Genes Dev. 10 (7), 777-793 (1996)
+ PUBMED 8846915
+REFERENCE 2 (bases 1 to 5028)
+ AUTHORS Roemer,T.
+ TITLE Direct Submission
+ JOURNAL Submitted (22-FEB-1996) Biology, Yale University, New Haven, CT
+ 06520, USA
+FEATURES Location/Qualifiers
+ source 1..5028
+ /organism="Saccharomyces cerevisiae"
+ /mol_type="genomic DNA"
+ /db_xref="taxon:4932"
+ /chromosome="IX"
+ mRNA <1..>206
+ /product="TCP1-beta"
+ CDS <1..206
+ /codon_start=3
+ /product="TCP1-beta"
+ /protein_id="AAA98665.1"
+ /translation="SSIYNGISTSGLDLNNGTIADMRQLGIVESYKLKRAVVSSASEA
+ AEVLLRVDNIIRARPRTANRQHM"
+ gene <687..>3158
+ /gene="AXL2"
+ mRNA <687..>3158
+ /gene="AXL2"
+ /product="Axl2p"
+ CDS 687..3158
+ /gene="AXL2"
+ /note="plasma membrane glycoprotein"
+ /codon_start=1
+ /product="Axl2p"
+ /protein_id="AAA98666.1"
+ /translation="MTQLQISLLLTATISLLHLVVATPYEAYPIGKQYPPVARVNESF
+ TFQISNDTYKSSVDKTAQITYNCFDLPSWLSFDSSSRTFSGEPSSDLLSDANTTLYFN
+ VILEGTDSADSTSLNNTYQFVVTNRPSISLSSDFNLLALLKNYGYTNGKNALKLDPNE
+ VFNVTFDRSMFTNEESIVSYYGRSQLYNAPLPNWLFFDSGELKFTGTAPVINSAIAPE
+ TSYSFVIIATDIEGFSAVEVEFELVIGAHQLTTSIQNSLIINVTDTGNVSYDLPLNYV
+ YLDDDPISSDKLGSINLLDAPDWVALDNATISGSVPDELLGKNSNPANFSVSIYDTYG
+ DVIYFNFEVVSTTDLFAISSLPNINATRGEWFSYYFLPSQFTDYVNTNVSLEFTNSSQ
+ DHDWVKFQSSNLTLAGEVPKNFDKLSLGLKANQGSQSQELYFNIIGMDSKITHSNHSA
+ NATSTRSSHHSTSTSSYTSSTYTAKISSTSAAATSSAPAALPAANKTSSHNKKAVAIA
+ CGVAIPLGVILVALICFLIFWRRRRENPDDENLPHAISGPDLNNPANKPNQENATPLN
+ NPFDDDASSYDDTSIARRLAALNTLKLDNHSATESDISSVDEKRDSLSGMNTYNDQFQ
+ SQSKEELLAKPPVQPPESPFFDPQNRSSSVYMDSEPAVNKSWRYTGNLSPVSDIVRDS
+ YGSQKTVDTEKLFDLEAPEKEKRTSRDVTMSSLDPWNSNISPSPVRKSVTPSPYNVTK
+ HRNRHLQNIQDSQSGKNGITPTTMSTSSSDDFVPVKDGENFCWVHSMEPDRRPSKKRL
+ VDFSNKSNVNVGQVKDIHGRIPEML"
+ gene complement(<3300..>4037)
+ /gene="REV7"
+ mRNA complement(<3300..>4037)
+ /gene="REV7"
+ /product="Rev7p"
+ CDS complement(3300..4037)
+ /gene="REV7"
+ /codon_start=1
+ /product="Rev7p"
+ /protein_id="AAA98667.1"
+ /translation="MNRWVEKWLRVYLKCYINLILFYRNVYPPQSFDYTTYQSFNLPQ
+ FVPINRHPALIDYIEELILDVLSKLTHVYRFSICIINKKNDLCIEKYVLDFSELQHVD
+ KDDQIITETEVFDEFRSSLNSLIMHLEKLPKVNDDTITFEAVINAIELELGHKLDRNR
+ RVDSLEEKAEIERDSNWVKCQEDENLPDNNGFQPPKIKLTSLVGSDVGPLIIHQFSEK
+ LISGDDKILNGVYSQYEEGESIFGSLF"
+ORIGIN
+ 1 gatcctccat atacaacggt atctccacct caggtttaga tctcaacaac ggaaccattg
+ 61 ccgacatgag acagttaggt atcgtcgaga gttacaagct aaaacgagca gtagtcagct
+ 121 ctgcatctga agccgctgaa gttctactaa gggtggataa catcatccgt gcaagaccaa
+ 181 gaaccgccaa tagacaacat atgtaacata tttaggatat acctcgaaaa taataaaccg
+ 241 ccacactgtc attattataa ttagaaacag aacgcaaaaa ttatccacta tataattcaa
+ 301 agacgcgaaa aaaaaagaac aacgcgtcat agaacttttg gcaattcgcg tcacaaataa
+ 361 attttggcaa cttatgtttc ctcttcgagc agtactcgag ccctgtctca agaatgtaat
+ 421 aatacccatc gtaggtatgg ttaaagatag catctccaca acctcaaagc tccttgccga
+ 481 gagtcgccct cctttgtcga gtaattttca cttttcatat gagaacttat tttcttattc
+ 541 tttactctca catcctgtag tgattgacac tgcaacagcc accatcacta gaagaacaga
+ 601 acaattactt aatagaaaaa ttatatcttc ctcgaaacga tttcctgctt ccaacatcta
+ 661 cgtatatcaa gaagcattca cttaccatga cacagcttca gatttcatta ttgctgacag
+ 721 ctactatatc actactccat ctagtagtgg ccacgcccta tgaggcatat cctatcggaa
+ 781 aacaataccc cccagtggca agagtcaatg aatcgtttac atttcaaatt tccaatgata
+ 841 cctataaatc gtctgtagac aagacagctc aaataacata caattgcttc gacttaccga
+ 901 gctggctttc gtttgactct agttctagaa cgttctcagg tgaaccttct tctgacttac
+ 961 tatctgatgc gaacaccacg ttgtatttca atgtaatact cgagggtacg gactctgccg
+ 1021 acagcacgtc tttgaacaat acataccaat ttgttgttac aaaccgtcca tccatctcgc
+ 1081 tatcgtcaga tttcaatcta ttggcgttgt taaaaaacta tggttatact aacggcaaaa
+ 1141 acgctctgaa actagatcct aatgaagtct tcaacgtgac ttttgaccgt tcaatgttca
+ 1201 ctaacgaaga atccattgtg tcgtattacg gacgttctca gttgtataat gcgccgttac
+ 1261 ccaattggct gttcttcgat tctggcgagt tgaagtttac tgggacggca ccggtgataa
+ 1321 actcggcgat tgctccagaa acaagctaca gttttgtcat catcgctaca gacattgaag
+ 1381 gattttctgc cgttgaggta gaattcgaat tagtcatcgg ggctcaccag ttaactacct
+ 1441 ctattcaaaa tagtttgata atcaacgtta ctgacacagg taacgtttca tatgacttac
+ 1501 ctctaaacta tgtttatctc gatgacgatc ctatttcttc tgataaattg ggttctataa
+ 1561 acttattgga tgctccagac tgggtggcat tagataatgc taccatttcc gggtctgtcc
+ 1621 cagatgaatt actcggtaag aactccaatc ctgccaattt ttctgtgtcc atttatgata
+ 1681 cttatggtga tgtgatttat ttcaacttcg aagttgtctc cacaacggat ttgtttgcca
+ 1741 ttagttctct tcccaatatt aacgctacaa ggggtgaatg gttctcctac tattttttgc
+ 1801 cttctcagtt tacagactac gtgaatacaa acgtttcatt agagtttact aattcaagcc
+ 1861 aagaccatga ctgggtgaaa ttccaatcat ctaatttaac attagctgga gaagtgccca
+ 1921 agaatttcga caagctttca ttaggtttga aagcgaacca aggttcacaa tctcaagagc
+ 1981 tatattttaa catcattggc atggattcaa agataactca ctcaaaccac agtgcgaatg
+ 2041 caacgtccac aagaagttct caccactcca cctcaacaag ttcttacaca tcttctactt
+ 2101 acactgcaaa aatttcttct acctccgctg ctgctacttc ttctgctcca gcagcgctgc
+ 2161 cagcagccaa taaaacttca tctcacaata aaaaagcagt agcaattgcg tgcggtgttg
+ 2221 ctatcccatt aggcgttatc ctagtagctc tcatttgctt cctaatattc tggagacgca
+ 2281 gaagggaaaa tccagacgat gaaaacttac cgcatgctat tagtggacct gatttgaata
+ 2341 atcctgcaaa taaaccaaat caagaaaacg ctacaccttt gaacaacccc tttgatgatg
+ 2401 atgcttcctc gtacgatgat acttcaatag caagaagatt ggctgctttg aacactttga
+ 2461 aattggataa ccactctgcc actgaatctg atatttccag cgtggatgaa aagagagatt
+ 2521 ctctatcagg tatgaataca tacaatgatc agttccaatc ccaaagtaaa gaagaattat
+ 2581 tagcaaaacc cccagtacag cctccagaga gcccgttctt tgacccacag aataggtctt
+ 2641 cttctgtgta tatggatagt gaaccagcag taaataaatc ctggcgatat actggcaacc
+ 2701 tgtcaccagt ctctgatatt gtcagagaca gttacggatc acaaaaaact gttgatacag
+ 2761 aaaaactttt cgatttagaa gcaccagaga aggaaaaacg tacgtcaagg gatgtcacta
+ 2821 tgtcttcact ggacccttgg aacagcaata ttagcccttc tcccgtaaga aaatcagtaa
+ 2881 caccatcacc atataacgta acgaagcatc gtaaccgcca cttacaaaat attcaagact
+ 2941 ctcaaagcgg taaaaacgga atcactccca caacaatgtc aacttcatct tctgacgatt
+ 3001 ttgttccggt taaagatggt gaaaattttt gctgggtcca tagcatggaa ccagacagaa
+ 3061 gaccaagtaa gaaaaggtta gtagattttt caaataagag taatgtcaat gttggtcaag
+ 3121 ttaaggacat tcacggacgc atcccagaaa tgctgtgatt atacgcaacg atattttgct
+ 3181 taattttatt ttcctgtttt attttttatt agtggtttac agatacccta tattttattt
+ 3241 agtttttata cttagagaca tttaatttta attccattct tcaaatttca tttttgcact
+ 3301 taaaacaaag atccaaaaat gctctcgccc tcttcatatt gagaatacac tccattcaaa
+ 3361 attttgtcgt caccgctgat taatttttca ctaaactgat gaataatcaa aggccccacg
+ 3421 tcagaaccga ctaaagaagt gagttttatt ttaggaggtt gaaaaccatt attgtctggt
+ 3481 aaattttcat cttcttgaca tttaacccag tttgaatccc tttcaatttc tgctttttcc
+ 3541 tccaaactat cgaccctcct gtttctgtcc aacttatgtc ctagttccaa ttcgatcgca
+ 3601 ttaataactg cttcaaatgt tattgtgtca tcgttgactt taggtaattt ctccaaatgc
+ 3661 ataatcaaac tatttaagga agatcggaat tcgtcgaaca cttcagtttc cgtaatgatc
+ 3721 tgatcgtctt tatccacatg ttgtaattca ctaaaatcta aaacgtattt ttcaatgcat
+ 3781 aaatcgttct ttttattaat aatgcagatg gaaaatctgt aaacgtgcgt taatttagaa
+ 3841 agaacatcca gtataagttc ttctatatag tcaattaaag caggatgcct attaatggga
+ 3901 acgaactgcg gcaagttgaa tgactggtaa gtagtgtagt cgaatgactg aggtgggtat
+ 3961 acatttctat aaaataaaat caaattaatg tagcatttta agtataccct cagccacttc
+ 4021 tctacccatc tattcataaa gctgacgcaa cgattactat tttttttttc ttcttggatc
+ 4081 tcagtcgtcg caaaaacgta taccttcttt ttccgacctt ttttttagct ttctggaaaa
+ 4141 gtttatatta gttaaacagg gtctagtctt agtgtgaaag ctagtggttt cgattgactg
+ 4201 atattaagaa agtggaaatt aaattagtag tgtagacgta tatgcatatg tatttctcgc
+ 4261 ctgtttatgt ttctacgtac ttttgattta tagcaagggg aaaagaaata catactattt
+ 4321 tttggtaaag gtgaaagcat aatgtaaaag ctagaataaa atggacgaaa taaagagagg
+ 4381 cttagttcat cttttttcca aaaagcaccc aatgataata actaaaatga aaaggatttg
+ 4441 ccatctgtca gcaacatcag ttgtgtgagc aataataaaa tcatcacctc cgttgccttt
+ 4501 agcgcgtttg tcgtttgtat cttccgtaat tttagtctta tcaatgggaa tcataaattt
+ 4561 tccaatgaat tagcaatttc gtccaattct ttttgagctt cttcatattt gctttggaat
+ 4621 tcttcgcact tcttttccca ttcatctctt tcttcttcca aagcaacgat ccttctaccc
+ 4681 atttgctcag agttcaaatc ggcctctttc agtttatcca ttgcttcctt cagtttggct
+ 4741 tcactgtctt ctagctgttg ttctagatcc tggtttttct tggtgtagtt ctcattatta
+ 4801 gatctcaagt tattggagtc ttcagccaat tgctttgtat cagacaattg actctctaac
+ 4861 ttctccactt cactgtcgag ttgctcgttt ttagcggaca aagatttaat ctcgttttct
+ 4921 ttttcagtgt tagattgctc taattctttg agctgttctc tcagctcctc atatttttct
+ 4981 tgccatgact cagattctaa ttttaagcta ttcaatttct ctttgatc
+//
+
diff --git a/io/genbank/error.go b/io/genbank/error.go
new file mode 100644
index 000000000..7795815b9
--- /dev/null
+++ b/io/genbank/error.go
@@ -0,0 +1,27 @@
+package genbank
+
+import "fmt"
+
+// A GenbankSyntaxError denotes a sytntax error in
+// a Genbank flatfile.
+type GenbankSyntaxError struct {
+ Line uint
+ Context string
+ Msg string
+ InnerErr error
+}
+
+// Error returns a human-readable error message.
+func (gse GenbankSyntaxError) Error() string {
+ msg := gse.Msg
+ if gse.InnerErr != nil {
+ msg = fmt.Errorf("%v: %w", msg, gse.InnerErr).Error()
+ }
+
+ return fmt.Sprintf("syntax error at line %v: %v\n%v\t%v", gse.Line, msg, gse.Line, gse.Context)
+}
+
+// Unwrap returns any errors underlying the syntax error, if applicable.
+func (gse GenbankSyntaxError) Unwrap() error {
+ return gse.InnerErr
+}
diff --git a/io/genbank/example_test.go b/io/genbank/example_test.go
deleted file mode 100644
index 89b128b85..000000000
--- a/io/genbank/example_test.go
+++ /dev/null
@@ -1,168 +0,0 @@
-package genbank_test
-
-import (
- "bytes"
- "fmt"
- "os"
- "path/filepath"
-
- "github.com/bebop/poly/io/genbank"
-)
-
-// This example shows how to open a genbank file and search for a gene given
-// its name. After finding it, notes about the particular gene are read.
-func Example_basic() {
- sequences, _ := genbank.Read("../../data/puc19.gbk")
- for _, feature := range sequences.Features {
- if feature.Attributes["gene"] == "bla" {
- fmt.Println(feature.Attributes["note"])
- }
- }
- // Output: confers resistance to ampicillin, carbenicillin, andrelated antibiotics
-}
-
-func ExampleRead() {
- sequence, _ := genbank.Read("../../data/puc19.gbk")
- fmt.Println(sequence.Meta.Locus.ModificationDate)
- // Output: 22-OCT-2019
-}
-
-func ExampleWrite() {
- tmpDataDir, err := os.MkdirTemp("", "data-*")
- if err != nil {
- fmt.Println(err.Error())
- }
- defer os.RemoveAll(tmpDataDir)
-
- sequences, _ := genbank.Read("../../data/puc19.gbk")
-
- tmpGbkFilePath := filepath.Join(tmpDataDir, "puc19.gbk")
- _ = genbank.Write(sequences, tmpGbkFilePath)
-
- testSequence, _ := genbank.Read(tmpGbkFilePath)
-
- fmt.Println(testSequence.Meta.Locus.ModificationDate)
- // Output: 22-OCT-2019
-}
-
-func ExampleBuild() {
- sequences, _ := genbank.Read("../../data/puc19.gbk")
- gbkBytes, _ := genbank.Build(sequences)
- testSequence, _ := genbank.Parse(bytes.NewReader(gbkBytes))
-
- fmt.Println(testSequence.Meta.Locus.ModificationDate)
- // Output: 22-OCT-2019
-}
-
-func ExampleParse() {
- file, _ := os.Open("../../data/puc19.gbk")
- sequence, _ := genbank.Parse(file)
-
- fmt.Println(sequence.Meta.Locus.ModificationDate)
- // Output: 22-OCT-2019
-}
-
-func ExampleReadMulti() {
- sequences, err := genbank.ReadMulti("../../data/multiGbk_test.seq")
- if err != nil {
- fmt.Println(err.Error())
- }
-
- fmt.Println(sequences[1].Meta.Locus.ModificationDate)
- // Output: 05-FEB-1999
-}
-
-func ExampleWriteMulti() {
- tmpDataDir, err := os.MkdirTemp("", "data-*")
- if err != nil {
- fmt.Println(err.Error())
- }
-
- sequences, _ := genbank.ReadMulti("../../data/multiGbk_test.seq")
- tmpGbkFilePath := filepath.Join(tmpDataDir, "multiGbk_test.seq")
-
- err = genbank.WriteMulti(sequences, tmpGbkFilePath)
-
- if err != nil {
- fmt.Println(err.Error())
- }
-
- testSequences, _ := genbank.ReadMulti(tmpGbkFilePath)
- isEqual := sequences[1].Meta.Locus.ModificationDate == testSequences[1].Meta.Locus.ModificationDate
- fmt.Println(isEqual)
- // Output: true
-}
-
-func ExampleBuildMulti() {
- sequences, _ := genbank.ReadMulti("../../data/multiGbk_test.seq")
- gbkBytes, _ := genbank.BuildMulti(sequences)
- testSequences, _ := genbank.ParseMulti(bytes.NewReader(gbkBytes))
-
- isEqual := sequences[1].Meta.Locus.ModificationDate == testSequences[1].Meta.Locus.ModificationDate
- fmt.Println(isEqual)
- // Output: true
-}
-
-func ExampleParseMulti() {
- file, _ := os.Open("../../data/multiGbk_test.seq")
- sequences, _ := genbank.ParseMulti(file)
- fmt.Println(sequences[1].Meta.Locus.ModificationDate)
- // Output: 05-FEB-1999
-}
-
-func ExampleGenbank_AddFeature() {
- // Sequence for greenflourescent protein (GFP) that we're using as test data for this example.
- gfpSequence := "ATGGCTAGCAAAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGTTGAATTAGATGGTGATGTTAATGGGCACAAATTTTCTGTCAGTGGAGAGGGTGAAGGTGATGCTACATACGGAAAGCTTACCCTTAAATTTATTTGCACTACTGGAAAACTACCTGTTCCATGGCCAACACTTGTCACTACTTTCTCTTATGGTGTTCAATGCTTTTCCCGTTATCCGGATCATATGAAACGGCATGACTTTTTCAAGAGTGCCATGCCCGAAGGTTATGTACAGGAACGCACTATATCTTTCAAAGATGACGGGAACTACAAGACGCGTGCTGAAGTCAAGTTTGAAGGTGATACCCTTGTTAATCGTATCGAGTTAAAAGGTATTGATTTTAAAGAAGATGGAAACATTCTCGGACACAAACTCGAGTACAACTATAACTCACACAATGTATACATCACGGCAGACAAACAAAAGAATGGAATCAAAGCTAACTTCAAAATTCGCCACAACATTGAAGATGGATCCGTTCAACTAGCAGACCATTATCAACAAAATACTCCAATTGGCGATGGCCCTGTCCTTTTACCAGACAACCATTACCTGTCGACACAATCTGCCCTTTCGAAAGATCCCAACGAAAAGCGTGACCACATGGTCCTTCTTGAGTTTGTAACTGCTGCTGGGATTACACATGGCATGGATGAGCTCTACAAATAA"
-
- // initialize sequence and feature structs.
- var sequence genbank.Genbank
- var feature genbank.Feature
-
- // set the initialized sequence struct's sequence.
- sequence.Sequence = gfpSequence
-
- // Set the initialized feature name and sequence location.
- feature.Description = "Green Fluorescent Protein"
- feature.Location = genbank.Location{}
- feature.Location.Start = 0
- feature.Location.End = len(sequence.Sequence)
-
- // Add the GFP feature to the sequence struct.
- _ = sequence.AddFeature(&feature)
-
- // get the GFP feature sequence string from the sequence struct.
- featureSequence, _ := feature.GetSequence()
-
- // check to see if the feature was inserted properly into the sequence.
- fmt.Println(gfpSequence == featureSequence)
-
- // Output: true
-}
-
-func ExampleFeature_GetSequence() {
- // Sequence for greenflourescent protein (GFP) that we're using as test data for this example.
- gfpSequence := "ATGGCTAGCAAAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGTTGAATTAGATGGTGATGTTAATGGGCACAAATTTTCTGTCAGTGGAGAGGGTGAAGGTGATGCTACATACGGAAAGCTTACCCTTAAATTTATTTGCACTACTGGAAAACTACCTGTTCCATGGCCAACACTTGTCACTACTTTCTCTTATGGTGTTCAATGCTTTTCCCGTTATCCGGATCATATGAAACGGCATGACTTTTTCAAGAGTGCCATGCCCGAAGGTTATGTACAGGAACGCACTATATCTTTCAAAGATGACGGGAACTACAAGACGCGTGCTGAAGTCAAGTTTGAAGGTGATACCCTTGTTAATCGTATCGAGTTAAAAGGTATTGATTTTAAAGAAGATGGAAACATTCTCGGACACAAACTCGAGTACAACTATAACTCACACAATGTATACATCACGGCAGACAAACAAAAGAATGGAATCAAAGCTAACTTCAAAATTCGCCACAACATTGAAGATGGATCCGTTCAACTAGCAGACCATTATCAACAAAATACTCCAATTGGCGATGGCCCTGTCCTTTTACCAGACAACCATTACCTGTCGACACAATCTGCCCTTTCGAAAGATCCCAACGAAAAGCGTGACCACATGGTCCTTCTTGAGTTTGTAACTGCTGCTGGGATTACACATGGCATGGATGAGCTCTACAAATAA"
-
- // initialize sequence and feature structs.
- var sequence genbank.Genbank
- var feature genbank.Feature
-
- // set the initialized sequence struct's sequence.
- sequence.Sequence = gfpSequence
-
- // Set the initialized feature name and sequence location.
- feature.Description = "Green Fluorescent Protein"
- feature.Location.Start = 0
- feature.Location.End = len(sequence.Sequence)
-
- // Add the GFP feature to the sequence struct.
- _ = sequence.AddFeature(&feature)
-
- // get the GFP feature sequence string from the sequence struct.
- featureSequence, _ := feature.GetSequence()
-
- // check to see if the feature was inserted properly into the sequence.
- fmt.Println(gfpSequence == featureSequence)
-
- // Output: true
-}
diff --git a/io/genbank/genbank.go b/io/genbank/genbank.go
deleted file mode 100644
index 6ef8cb43e..000000000
--- a/io/genbank/genbank.go
+++ /dev/null
@@ -1,1028 +0,0 @@
-/*
-Package genbank provides genbank parsers and writers.
-
-GenBank is a flat text file format developed in the 1980s to annotate genetic
-sequences, and has since become the standard for sharing annotated genetic
-sequences.
-
-This package provides a parser and writer to convert between the GenBank file
-format and the more general Genbank struct.
-*/
-package genbank
-
-import (
- "bufio"
- "bytes"
- "fmt"
- "io"
- "os"
- "regexp"
- "strconv"
- "strings"
-
- "github.com/bebop/poly/transform"
- "github.com/lunny/log"
- "github.com/mitchellh/go-wordwrap"
-)
-
-/******************************************************************************
-
-GBK specific IO related things begin here.
-
-******************************************************************************/
-
-var (
- readFileFn = os.ReadFile
- parseMultiNthFn = ParseMultiNth
- parseReferencesFn = parseReferences
-)
-
-// Genbank is the main struct for the Genbank file format.
-type Genbank struct {
- Meta Meta
- Features []Feature
- Sequence string // will be changed and include reader, writer, and byte slice.
-}
-
-// Meta holds the meta data for Genbank and other annotated sequence files.
-type Meta struct {
- Date string `json:"date"`
- Definition string `json:"definition"`
- Accession string `json:"accession"`
- Version string `json:"version"`
- Keywords string `json:"keywords"`
- Organism string `json:"organism"`
- Source string `json:"source"`
- Taxonomy []string `json:"taxonomy"`
- Origin string `json:"origin"`
- Locus Locus `json:"locus"`
- References []Reference `json:"references"`
- BaseCount []BaseCount `json:"base_count"`
- Other map[string]string `json:"other"`
- Name string `json:"name"`
- SequenceHash string `json:"sequence_hash"`
- SequenceHashFunction string `json:"hash_function"`
-}
-
-// Feature holds the information for a feature in a Genbank file and other annotated sequence files.
-type Feature struct {
- Type string `json:"type"`
- Description string `json:"description"`
- Attributes map[string]string `json:"attributes"`
- SequenceHash string `json:"sequence_hash"`
- SequenceHashFunction string `json:"hash_function"`
- Sequence string `json:"sequence"`
- Location Location `json:"location"`
- ParentSequence *Genbank `json:"-"`
-}
-
-// Reference holds information for one reference in a Meta struct.
-type Reference struct {
- Authors string `json:"authors"`
- Title string `json:"title"`
- Journal string `json:"journal"`
- PubMed string `json:"pub_med"`
- Remark string `json:"remark"`
- Range string `json:"range"`
- Consortium string `json:"consortium"`
-}
-
-// Locus holds Locus information in a Meta struct.
-type Locus struct {
- Name string `json:"name"`
- SequenceLength string `json:"sequence_length"`
- MoleculeType string `json:"molecule_type"`
- GenbankDivision string `json:"genbank_division"`
- ModificationDate string `json:"modification_date"`
- SequenceCoding string `json:"sequence_coding"`
- Circular bool `json:"circular"`
-}
-
-// Location is a struct that holds the location of a feature.
-type Location struct {
- Start int `json:"start"`
- End int `json:"end"`
- Complement bool `json:"complement"`
- Join bool `json:"join"`
- FivePrimePartial bool `json:"five_prime_partial"`
- ThreePrimePartial bool `json:"three_prime_partial"`
- GbkLocationString string `json:"gbk_location_string"`
- SubLocations []Location `json:"sub_locations"`
-}
-
-// BaseCount is a struct that holds the base counts for a sequence.
-type BaseCount struct {
- Base string
- Count int
-}
-
-// Precompiled regular expressions:
-var (
- basePairRegex = regexp.MustCompile(` \d* \w{2} `)
- circularRegex = regexp.MustCompile(` circular `)
- modificationDateRegex = regexp.MustCompile(`\d{2}-[A-Z]{3}-\d{4}`)
- partialRegex = regexp.MustCompile("<|>")
- sequenceRegex = regexp.MustCompile("[^a-zA-Z]+")
-)
-
-// AddFeature adds a feature to a Genbank struct.
-func (sequence *Genbank) AddFeature(feature *Feature) error {
- feature.ParentSequence = sequence
- sequence.Features = append(sequence.Features, *feature)
- return nil
-}
-
-// GetSequence returns the sequence of a feature.
-func (feature Feature) GetSequence() (string, error) {
- return getFeatureSequence(feature, feature.Location)
-}
-
-// getFeatureSequence takes a feature and location object and returns a sequence string.
-func getFeatureSequence(feature Feature, location Location) (string, error) {
- var sequenceBuffer bytes.Buffer
- var sequenceString string
- parentSequence := feature.ParentSequence.Sequence
-
- if len(location.SubLocations) == 0 {
- sequenceBuffer.WriteString(parentSequence[location.Start:location.End])
- } else {
- for _, subLocation := range location.SubLocations {
- sequence, _ := getFeatureSequence(feature, subLocation)
-
- sequenceBuffer.WriteString(sequence)
- }
- }
-
- // reverse complements resulting string if needed.
- if location.Complement {
- sequenceString = transform.ReverseComplement(sequenceBuffer.String())
- } else {
- sequenceString = sequenceBuffer.String()
- }
-
- return sequenceString, nil
-}
-
-// Read reads a GBK file from path and returns a Genbank struct.
-func Read(path string) (Genbank, error) {
- genbankSlice, err := ReadMultiNth(path, 1)
- if err != nil {
- return Genbank{}, err
- }
- genbank := genbankSlice[0]
- return genbank, err
-}
-
-// ReadMulti reads a multi Gbk from path and parses it into a slice of Genbank structs.
-func ReadMulti(path string) ([]Genbank, error) {
- return ReadMultiNth(path, -1)
-}
-
-// ReadMultiNth reads a multi Gbk from path and parses N entries into a slice of Genbank structs.
-func ReadMultiNth(path string, count int) ([]Genbank, error) {
- file, err := os.Open(path)
- if err != nil {
- return []Genbank{}, err
- }
-
- sequence, err := parseMultiNthFn(file, count)
- if err != nil {
- return []Genbank{}, err
- }
-
- return sequence, nil
-}
-
-// Write takes an Genbank list and a path string and writes out a genbank record to that path.
-func Write(sequences Genbank, path string) error {
- // build function always returns nil error.
- // This is for API consistency in case we need to
- // add error handling in the future.
- gbk, _ := Build(sequences)
-
- err := os.WriteFile(path, gbk, 0644)
- return err
-}
-
-// WriteMulti takes a slice of Genbank structs and a path string and writes out a multi genbank record to that path.
-func WriteMulti(sequences []Genbank, path string) error {
- // buildmulti function always returns nil error.
- // This is for API consistency in case we need to
- // add error handling in the future.
- gbk, _ := BuildMulti(sequences)
-
- err := os.WriteFile(path, gbk, 0644)
- return err
-}
-
-// Build builds a GBK byte slice to be written out to db or file.
-func Build(gbk Genbank) ([]byte, error) {
- gbkSlice := []Genbank{gbk}
- multiGBK, err := BuildMulti(gbkSlice)
- return multiGBK, err
-}
-
-// BuildMulti builds a MultiGBK byte slice to be written out to db or file.
-func BuildMulti(sequences []Genbank) ([]byte, error) {
- var gbkString bytes.Buffer
- for _, sequence := range sequences {
- locus := sequence.Meta.Locus
- var shape string
-
- if locus.Circular {
- shape = "circular"
- } else {
- shape = "linear"
- }
-
- fivespace := generateWhiteSpace(subMetaIndex)
-
- // building locus
- locusData := locus.Name + fivespace + locus.SequenceLength + " bp" + fivespace + locus.MoleculeType + fivespace + shape + fivespace + locus.GenbankDivision + fivespace + locus.ModificationDate
- locusString := "LOCUS " + locusData + "\n"
- gbkString.WriteString(locusString)
-
- // building other standard meta features
- definitionString := buildMetaString("DEFINITION", sequence.Meta.Definition)
- gbkString.WriteString(definitionString)
-
- accessionString := buildMetaString("ACCESSION", sequence.Meta.Accession)
- gbkString.WriteString(accessionString)
-
- versionString := buildMetaString("VERSION", sequence.Meta.Version)
- gbkString.WriteString(versionString)
-
- keywordsString := buildMetaString("KEYWORDS", sequence.Meta.Keywords)
- gbkString.WriteString(keywordsString)
-
- sourceString := buildMetaString("SOURCE", sequence.Meta.Source)
- gbkString.WriteString(sourceString)
-
- organismString := buildMetaString(" ORGANISM", sequence.Meta.Organism)
- gbkString.WriteString(organismString)
-
- if len(sequence.Meta.Taxonomy) > 0 {
- var taxonomyString strings.Builder
- for i, taxonomyData := range sequence.Meta.Taxonomy {
- taxonomyString.WriteString(taxonomyData)
- if len(sequence.Meta.Taxonomy) == i+1 {
- taxonomyString.WriteString(".")
- } else {
- taxonomyString.WriteString("; ")
- }
- }
- gbkString.WriteString(buildMetaString("", taxonomyString.String()))
- }
-
- // building references
- // TODO: could use reflection to get keys and make more general.
- for referenceIndex, reference := range sequence.Meta.References {
- referenceString := buildMetaString("REFERENCE", fmt.Sprintf("%d %s", referenceIndex+1, reference.Range))
- gbkString.WriteString(referenceString)
-
- if reference.Authors != "" {
- authorsString := buildMetaString(" AUTHORS", reference.Authors)
- gbkString.WriteString(authorsString)
- }
-
- if reference.Title != "" {
- titleString := buildMetaString(" TITLE", reference.Title)
- gbkString.WriteString(titleString)
- }
-
- if reference.Journal != "" {
- journalString := buildMetaString(" JOURNAL", reference.Journal)
- gbkString.WriteString(journalString)
- }
-
- if reference.PubMed != "" {
- pubMedString := buildMetaString(" PUBMED", reference.PubMed)
- gbkString.WriteString(pubMedString)
- }
- if reference.Consortium != "" {
- consrtmString := buildMetaString(" CONSRTM", reference.Consortium)
- gbkString.WriteString(consrtmString)
- }
- }
-
- // building other meta fields that are catch all
- otherKeys := make([]string, 0, len(sequence.Meta.Other))
- for key := range sequence.Meta.Other {
- otherKeys = append(otherKeys, key)
- }
-
- for _, otherKey := range otherKeys {
- otherString := buildMetaString(otherKey, sequence.Meta.Other[otherKey])
- gbkString.WriteString(otherString)
- }
-
- // start writing features section.
- gbkString.WriteString("FEATURES Location/Qualifiers\n")
- for _, feature := range sequence.Features {
- gbkString.WriteString(BuildFeatureString(feature))
- }
-
- if len(sequence.Meta.BaseCount) > 0 {
- gbkString.WriteString("BASE COUNT ")
- for _, baseCount := range sequence.Meta.BaseCount {
- gbkString.WriteString(strconv.Itoa(baseCount.Count) + " " + baseCount.Base + " ")
- }
- gbkString.WriteString("\n")
- }
- // start writing sequence section.
- gbkString.WriteString("ORIGIN\n")
-
- // iterate over every character in sequence range.
- for index, base := range sequence.Sequence {
- // if 60th character add newline then whitespace and index number and space before adding next base.
- if index%60 == 0 {
- if index != 0 {
- gbkString.WriteString("\n")
- }
- lineNumberString := strconv.Itoa(index + 1) // genbank indexes at 1 for some reason
- leadingWhiteSpaceLength := 9 - len(lineNumberString) // <- I wish I was kidding
- for i := 0; i < leadingWhiteSpaceLength; i++ {
- gbkString.WriteString(" ")
- }
- gbkString.WriteString(lineNumberString + " ")
- gbkString.WriteRune(base)
- // if base index is divisible by ten add a space (genbank convention)
- } else if index%10 == 0 {
- gbkString.WriteString(" ")
- gbkString.WriteRune(base)
- // else just add the base.
- } else {
- gbkString.WriteRune(base)
- }
- }
- // finish genbank file with "//" on newline (again a genbank convention)
- gbkString.WriteString("\n//\n")
- }
-
- return gbkString.Bytes(), nil
-}
-
-// Parse takes in a reader representing a single gbk/gb/genbank file and parses it into a Genbank struct.
-func Parse(r io.Reader) (Genbank, error) {
- genbankSlice, err := parseMultiNthFn(r, 1)
-
- if err != nil {
- return Genbank{}, err
- }
-
- return genbankSlice[0], err
-}
-
-// ParseMulti takes in a reader representing a multi gbk/gb/genbank file and parses it into a slice of Genbank structs.
-func ParseMulti(r io.Reader) ([]Genbank, error) {
- genbankSlice, err := parseMultiNthFn(r, -1)
-
- if err != nil {
- return []Genbank{}, err
- }
-
- return genbankSlice, err
-}
-
-type parseLoopParameters struct {
- newLocation bool
- quoteActive bool
- attribute string
- attributeValue string
- emptyAttribute bool
- sequenceBuilder strings.Builder
- parseStep string
- genbank Genbank // since we are scanning lines we need a Genbank struct to store the data outside the loop.
- feature Feature
- features []Feature
- metadataTag string
- metadataData []string //this stutters but will remain to make it easier to batch rename variables when compared to parameters.metadataTag.
- genbankStarted bool
- currentLine string
- prevline string
- multiLineFeature bool
-}
-
-// method to init loop parameters
-func (params *parseLoopParameters) init() {
- params.newLocation = true
- params.feature.Attributes = make(map[string]string)
- params.parseStep = "metadata"
- params.genbankStarted = false
- params.genbank.Meta.Other = make(map[string]string)
-}
-
-// ParseMultiNth takes in a reader representing a multi gbk/gb/genbank file and parses the first n records into a slice of Genbank structs.
-func ParseMultiNth(r io.Reader, count int) ([]Genbank, error) {
- scanner := bufio.NewScanner(r)
- var genbanks []Genbank
-
- // Sequence setup
-
- var parameters parseLoopParameters
- parameters.init()
-
- // Loop through each line of the file
- for lineNum := 0; scanner.Scan(); lineNum++ {
- // get line from scanner and split it
- line := scanner.Text()
- splitLine := strings.Split(strings.TrimSpace(line), " ")
-
- prevline := parameters.currentLine
- parameters.currentLine = line
- parameters.prevline = prevline
-
- // keep scanning until we find the start of the first record
- if !parameters.genbankStarted {
- // We detect the beginning of a new genbank file with "LOCUS"
- locusFlag := strings.Contains(line, "LOCUS")
-
- if locusFlag {
- parameters = parseLoopParameters{}
- parameters.init()
- parameters.genbank.Meta.Locus = parseLocus(line)
- parameters.genbankStarted = true
- }
- continue
- }
-
- switch parameters.parseStep {
- case "metadata":
- // Handle empty lines
- if len(line) == 0 {
- return genbanks, fmt.Errorf("Empty metadata line on line %d", lineNum)
- }
-
- // If we are currently reading a line, we need to figure out if it is a new meta line.
- if string(line[0]) != " " || parameters.metadataTag == "FEATURES" {
- // If this is true, it means we are beginning a new meta tag. In that case, let's save
- // the older data, and then continue along.
- switch parameters.metadataTag {
- case "DEFINITION":
- parameters.genbank.Meta.Definition = parseMetadata(parameters.metadataData)
- case "ACCESSION":
- parameters.genbank.Meta.Accession = parseMetadata(parameters.metadataData)
- case "VERSION":
- parameters.genbank.Meta.Version = parseMetadata(parameters.metadataData)
- case "KEYWORDS":
- parameters.genbank.Meta.Keywords = parseMetadata(parameters.metadataData)
- case "SOURCE":
- parameters.genbank.Meta.Source, parameters.genbank.Meta.Organism, parameters.genbank.Meta.Taxonomy = getSourceOrganism(parameters.metadataData)
- case "REFERENCE":
- reference, err := parseReferencesFn(parameters.metadataData)
- if err != nil {
- return []Genbank{}, fmt.Errorf("Failed in parsing reference above line %d. Got error: %s", lineNum, err)
- }
- parameters.genbank.Meta.References = append(parameters.genbank.Meta.References, reference)
-
- case "FEATURES":
- parameters.parseStep = "features"
-
- // We know that we are now parsing features, so lets initialize our first feature
- parameters.feature.Type = strings.TrimSpace(splitLine[0])
- parameters.feature.Location.GbkLocationString = strings.TrimSpace(splitLine[len(splitLine)-1])
- parameters.newLocation = true
-
- continue
-
- default:
- if parameters.metadataTag != "" {
- parameters.genbank.Meta.Other[parameters.metadataTag] = parseMetadata(parameters.metadataData)
- }
- }
-
- parameters.metadataTag = strings.TrimSpace(splitLine[0])
- parameters.metadataData = []string{strings.TrimSpace(line[len(parameters.metadataTag):])}
- } else {
- parameters.metadataData = append(parameters.metadataData, line)
- }
- case "features":
-
- baseCountFlag := strings.Contains(line, "BASE COUNT") // example string for BASE COUNT: "BASE COUNT 67070277 a 48055043 c 48111528 g 67244164 t 18475410 n"
- if baseCountFlag {
- fields := strings.Fields(line)
- for countIndex := 2; countIndex < len(fields)-1; countIndex += 2 { // starts at two because we don't want to include "BASE COUNT" in our fields
- count, err := strconv.Atoi(fields[countIndex])
- if err != nil {
- return []Genbank{}, err
- }
-
- baseCount := BaseCount{
- Base: fields[countIndex+1],
- Count: count,
- }
- parameters.genbank.Meta.BaseCount = append(parameters.genbank.Meta.BaseCount, baseCount)
- }
- break
- }
- // Switch to sequence parsing
- originFlag := strings.Contains(line, "ORIGIN") // we detect the beginning of the sequence with "ORIGIN"
- if originFlag {
- parameters.parseStep = "sequence"
-
- // save our completed attribute / qualifier string to the current feature
- if parameters.attributeValue != "" {
- parameters.feature.Attributes[parameters.attribute] = parameters.attributeValue
- parameters.features = append(parameters.features, parameters.feature)
- parameters.attributeValue = ""
- parameters.attribute = ""
- parameters.feature = Feature{}
- parameters.feature.Attributes = make(map[string]string)
- } else {
- parameters.features = append(parameters.features, parameters.feature)
- }
-
- // add our features to the genbank
- for _, feature := range parameters.features {
- location, err := parseLocation(feature.Location.GbkLocationString)
- if err != nil {
- return []Genbank{}, err
- }
- feature.Location = location
- err = parameters.genbank.AddFeature(&feature)
- if err != nil {
- return []Genbank{}, err
- }
- }
- continue
- } // end sequence parsing flag logic
-
- // check if current line contains anything but whitespace
- trimmedLine := strings.TrimSpace(line)
- if len(trimmedLine) < 1 {
- continue
- }
-
- // determine if current line is a new top level feature
- if countLeadingSpaces(parameters.currentLine) < countLeadingSpaces(parameters.prevline) || parameters.prevline == "FEATURES" {
- // save our completed attribute / qualifier string to the current feature
- if parameters.attributeValue != "" {
- parameters.feature.Attributes[parameters.attribute] = parameters.attributeValue
- parameters.features = append(parameters.features, parameters.feature)
- parameters.attributeValue = ""
- parameters.attribute = ""
- parameters.feature = Feature{}
- parameters.feature.Attributes = make(map[string]string)
- }
-
- // }
- // checks for empty types
- if parameters.feature.Type != "" {
- parameters.features = append(parameters.features, parameters.feature)
- }
-
- parameters.feature = Feature{}
- parameters.feature.Attributes = make(map[string]string)
-
- // An initial feature line looks like this: `source 1..2686` with a type separated by its location
- if len(splitLine) < 2 {
- return genbanks, fmt.Errorf("Feature line malformed on line %d. Got line: %s", lineNum, line)
- }
- parameters.feature.Type = strings.TrimSpace(splitLine[0])
- parameters.feature.Location.GbkLocationString = strings.TrimSpace(splitLine[len(splitLine)-1])
- parameters.multiLineFeature = false // without this we can't tell if something is a multiline feature or multiline qualifier
- } else if !strings.Contains(parameters.currentLine, "/") { // current line is continuation of a feature or qualifier (sub-constituent of a feature)
- // if it's a continuation of the current feature, add it to the location
- if !strings.Contains(parameters.currentLine, "\"") && (countLeadingSpaces(parameters.currentLine) > countLeadingSpaces(parameters.prevline) || parameters.multiLineFeature) {
- parameters.feature.Location.GbkLocationString += strings.TrimSpace(line)
- parameters.multiLineFeature = true // without this we can't tell if something is a multiline feature or multiline qualifier
- } else { // it's a continued line of a qualifier
- removeAttributeValueQuotes := strings.Replace(trimmedLine, "\"", "", -1)
-
- parameters.attributeValue = parameters.attributeValue + removeAttributeValueQuotes
- }
- } else if strings.Contains(parameters.currentLine, "/") { // current line is a new qualifier
- trimmedCurrentLine := strings.TrimSpace(parameters.currentLine)
- if trimmedCurrentLine[0] != '/' { // if we have an exception case, like (adenine(1518)-N(6)/adenine(1519)-N(6))-
- parameters.attributeValue = parameters.attributeValue + trimmedCurrentLine
- continue
- }
- // save our completed attribute / qualifier string to the current feature
- if parameters.attributeValue != "" || parameters.emptyAttribute {
- parameters.feature.Attributes[parameters.attribute] = parameters.attributeValue
- parameters.emptyAttribute = false
- }
- parameters.attributeValue = ""
- splitAttribute := strings.Split(line, "=")
- trimmedSpaceAttribute := strings.TrimSpace(splitAttribute[0])
- removedForwardSlashAttribute := strings.Replace(trimmedSpaceAttribute, "/", "", 1)
-
- parameters.attribute = removedForwardSlashAttribute
-
- var removeAttributeValueQuotes string
- if len(splitAttribute) == 1 { // handle case of ` /pseudo `, which has no text
- removeAttributeValueQuotes = ""
- parameters.emptyAttribute = true
- } else { // this is normally triggered
- removeAttributeValueQuotes = strings.Replace(splitAttribute[1], "\"", "", -1)
- }
- parameters.attributeValue = removeAttributeValueQuotes
- parameters.multiLineFeature = false // without this we can't tell if something is a multiline feature or multiline qualifier
- }
-
- case "sequence":
- if len(line) < 2 { // throw error if line is malformed
- return genbanks, fmt.Errorf("Too short line found while parsing genbank sequence on line %d. Got line: %s", lineNum, line)
- } else if line[0:2] == "//" { // end of sequence
- parameters.genbank.Sequence = parameters.sequenceBuilder.String()
-
- genbanks = append(genbanks, parameters.genbank)
- parameters.genbankStarted = false
- parameters.sequenceBuilder.Reset()
- } else { // add line to total sequence
- parameters.sequenceBuilder.WriteString(sequenceRegex.ReplaceAllString(line, ""))
- }
- default:
- log.Warnf("Unknown parse step: %s", parameters.parseStep)
- parameters.genbankStarted = false
- }
- }
- return genbanks, nil
-}
-
-func countLeadingSpaces(line string) int {
- return len(line) - len(strings.TrimLeft(line, " "))
-}
-
-func parseMetadata(metadataData []string) string {
- var outputMetadata string
- if len(metadataData) == 0 {
- return "."
- }
- for _, data := range metadataData {
- outputMetadata = outputMetadata + strings.TrimSpace(data) + " "
- }
- outputMetadata = outputMetadata[:len(outputMetadata)-1] // Remove trailing whitespace
- return outputMetadata
-}
-
-func parseReferences(metadataData []string) (Reference, error) {
- var reference Reference
- var err error
- rangeIndex := strings.Index(metadataData[0], "(")
- if rangeIndex != -1 {
- reference.Range = metadataData[0][rangeIndex:]
- }
- var referenceKey string
- var referenceValue string
-
- if len(metadataData) == 1 {
- return Reference{}, fmt.Errorf("Got reference with no additional information")
- }
-
- referenceKey = strings.Split(strings.TrimSpace(metadataData[1]), " ")[0]
- referenceValue = strings.TrimSpace(metadataData[1][len(referenceKey)+2:])
- for index := 2; index < len(metadataData); index++ {
- if len(metadataData[index]) > 3 {
- if metadataData[index][3] != ' ' {
- err = reference.addKey(referenceKey, referenceValue)
- if err != nil {
- return reference, err
- }
- referenceKey = strings.Split(strings.TrimSpace(metadataData[index]), " ")[0]
- referenceValue = strings.TrimSpace(metadataData[index][len(referenceKey)+2:])
- } else {
- // Otherwise, simply append the next metadata.
- referenceValue = referenceValue + " " + strings.TrimSpace(metadataData[index])
- }
- }
- }
- err = reference.addKey(referenceKey, referenceValue)
- if err != nil {
- return reference, err
- }
-
- return reference, nil
-}
-
-func (reference *Reference) addKey(referenceKey string, referenceValue string) error {
- switch referenceKey {
- case "AUTHORS":
- reference.Authors = referenceValue
- case "TITLE":
- reference.Title = referenceValue
- case "JOURNAL":
- reference.Journal = referenceValue
- case "PUBMED":
- reference.PubMed = referenceValue
- case "REMARK":
- reference.Remark = referenceValue
- case "CONSRTM":
- reference.Consortium = referenceValue
- default:
- return fmt.Errorf("ReferenceKey not in [AUTHORS, TITLE, JOURNAL, PUBMED, REMARK, CONSRTM]. Got: %s", referenceKey)
- }
- return nil
-}
-
-var genBankMoleculeTypes = []string{
- "DNA",
- "genomic DNA",
- "genomic RNA",
- "mRNA",
- "tRNA",
- "rRNA",
- "other RNA",
- "other DNA",
- "transcribed RNA",
- "viral cRNA",
- "unassigned DNA",
- "unassigned RNA",
-}
-
-// used in parseLocus function though it could be useful elsewhere.
-var genbankDivisions = []string{
- "PRI", //primate sequences
- "ROD", //rodent sequences
- "MAM", //other mamallian sequences
- "VRT", //other vertebrate sequences
- "INV", //invertebrate sequences
- "PLN", //plant, fungal, and algal sequences
- "BCT", //bacterial sequences
- "VRL", //viral sequences
- "PHG", //bacteriophage sequences
- "SYN", //synthetic sequences
- "UNA", //unannotated sequences
- "EST", //EST sequences (expressed sequence tags)
- "PAT", //patent sequences
- "STS", //STS sequences (sequence tagged sites)
- "GSS", //GSS sequences (genome survey sequences)
- "HTG", //HTG sequences (high-throughput genomic sequences)
- "HTC", //unfinished high-throughput cDNA sequencing
- "ENV", //environmental sampling sequences
-}
-
-// TODO rewrite with proper error handling.
-// parses locus from provided string.
-func parseLocus(locusString string) Locus {
- locus := Locus{}
-
- locusSplit := strings.Split(strings.TrimSpace(locusString), " ")
-
- var filteredLocusSplit []string
- for i := range locusSplit {
- if locusSplit[i] != "" {
- filteredLocusSplit = append(filteredLocusSplit, locusSplit[i])
- }
- }
-
- locus.Name = filteredLocusSplit[1]
-
- // sequence length and coding
- baseSequenceLength := basePairRegex.FindString(locusString)
- if baseSequenceLength != "" {
- splitBaseSequenceLength := strings.Split(strings.TrimSpace(baseSequenceLength), " ")
- if len(splitBaseSequenceLength) == 2 {
- locus.SequenceLength = splitBaseSequenceLength[0]
- locus.SequenceCoding = splitBaseSequenceLength[1]
- }
- }
-
- // molecule type
- for _, moleculeType := range genBankMoleculeTypes {
- moleculeRegex, _ := regexp.Compile(moleculeType)
- match := string(moleculeRegex.Find([]byte(locusString)))
- if match != "" {
- locus.MoleculeType = match
- break
- }
- }
-
- // circularity flag
- if circularRegex.Match([]byte(locusString)) {
- locus.Circular = true
- }
-
- // genbank division
- for _, genbankDivision := range genbankDivisions {
- genbankDivisionRegex, _ := regexp.Compile(genbankDivision)
- match := string(genbankDivisionRegex.Find([]byte(locusString)))
- if match != "" {
- locus.GenbankDivision = match
- break
- }
- }
-
- // ModificationDate
- locus.ModificationDate = modificationDateRegex.FindString(locusString)
-
- return locus
-}
-
-// indices for random points of interests on a gbk line.
-const subMetaIndex = 5
-const qualifierIndex = 21
-
-func getSourceOrganism(metadataData []string) (string, string, []string) {
- source := strings.TrimSpace(metadataData[0])
- var organism string
- var taxonomy []string
- for iterator := 1; iterator < len(metadataData); iterator++ {
- dataLine := metadataData[iterator]
- headString := strings.Split(strings.TrimSpace(dataLine), " ")[0]
- if headString == "ORGANISM" {
- index := strings.Index(dataLine, `ORGANISM`)
- organism = strings.TrimSpace(dataLine[index+len("ORGANISM"):])
- continue
- }
- for _, taxonomyData := range strings.Split(strings.TrimSpace(dataLine), ";") {
- taxonomyDataTrimmed := strings.TrimSpace(taxonomyData)
- // Taxonomy ends with a ".", which we check for here
- if len(taxonomyDataTrimmed) > 1 {
- if taxonomyDataTrimmed[len(taxonomyDataTrimmed)-1] == '.' {
- taxonomyDataTrimmed = taxonomyDataTrimmed[:len(taxonomyDataTrimmed)-1]
- }
- taxonomy = append(taxonomy, taxonomyDataTrimmed)
- }
- }
- }
- return source, organism, taxonomy
-}
-
-func parseLocation(locationString string) (Location, error) {
- var location Location
- location.GbkLocationString = locationString
- if !strings.ContainsAny(locationString, "(") { // Case checks for simple expression of x..x
- if !strings.ContainsAny(locationString, ".") { //Case checks for simple expression x
- position, err := strconv.Atoi(locationString)
- if err != nil {
- return Location{}, err
- }
- location = Location{Start: position, End: position}
- } else {
- // to remove FivePrimePartial and ThreePrimePartial indicators from start and end before converting to int.
- startEndSplit := strings.Split(locationString, "..")
- start, err := strconv.Atoi(partialRegex.ReplaceAllString(startEndSplit[0], ""))
- if err != nil {
- return Location{}, err
- }
- end, err := strconv.Atoi(partialRegex.ReplaceAllString(startEndSplit[1], ""))
- if err != nil {
- return Location{}, err
- }
- location = Location{Start: start - 1, End: end}
- }
- } else {
- firstOuterParentheses := strings.Index(locationString, "(")
- expression := locationString[firstOuterParentheses+1 : strings.LastIndex(locationString, ")")]
- switch command := locationString[0:firstOuterParentheses]; command {
- case "join":
- location.Join = true
- // This case checks for join(complement(x..x),complement(x..x)), or any more complicated derivatives
- if strings.ContainsAny(expression, "(") {
- firstInnerParentheses := strings.Index(expression, "(")
- ParenthesesCount := 1
- prevSubLocationStart := 0
- for i := firstInnerParentheses + 1; i < len(expression); i++ { // "(" is at 0, so we start at 1
- switch expression[i] {
- case '(':
- ParenthesesCount++
- case ')':
- ParenthesesCount--
- case ',':
- if ParenthesesCount == 0 {
- parsedSubLocation, err := parseLocation(expression[prevSubLocationStart:i])
- if err != nil {
- return Location{}, err
- }
- parsedSubLocation.GbkLocationString = locationString
- location.SubLocations = append(location.SubLocations, parsedSubLocation)
- prevSubLocationStart = i + 1
- }
- }
- }
- if ParenthesesCount != 0 {
- return Location{}, fmt.Errorf("Unbalanced parentheses")
- }
- parsedSubLocation, err := parseLocation(expression[prevSubLocationStart:])
- if err != nil {
- return Location{}, err
- }
- parsedSubLocation.GbkLocationString = locationString
- location.SubLocations = append(location.SubLocations, parsedSubLocation)
- } else { // This is the default join(x..x,x..x)
- for _, numberRange := range strings.Split(expression, ",") {
- joinLocation, err := parseLocation(numberRange)
- if err != nil {
- return Location{}, err
- }
- location.SubLocations = append(location.SubLocations, joinLocation)
- }
- }
-
- case "complement":
- // location.Complement = true
- subLocation, err := parseLocation(expression)
- if err != nil {
- return Location{}, err
- }
- subLocation.Complement = true
- subLocation.GbkLocationString = locationString
- location.SubLocations = append(location.SubLocations, subLocation)
- }
- }
-
- if strings.Contains(locationString, "<") {
- location.FivePrimePartial = true
- }
-
- if strings.Contains(locationString, ">") {
- location.ThreePrimePartial = true
- }
-
- // if excess root node then trim node. Maybe should just be handled with second arg?
- if location.Start == 0 && location.End == 0 && !location.Join && !location.Complement {
- location = location.SubLocations[0]
- }
-
- return location, nil
-}
-
-// buildMetaString is a helper function to build the meta section of genbank files.
-func buildMetaString(name string, data string) string {
- keyWhitespaceTrailLength := 12 - len(name) // I wish I was kidding.
- var keyWhitespaceTrail string
- for i := 0; i < keyWhitespaceTrailLength; i++ {
- keyWhitespaceTrail += " "
- }
- name += keyWhitespaceTrail
- wrappedData := wordwrap.WrapString(data, 68)
- splitData := strings.Split(wrappedData, "\n")
- var returnData string
- for index, datum := range splitData {
- if index == 0 {
- returnData = name + datum + "\n"
- } else {
- returnData += generateWhiteSpace(12) + datum + "\n"
- }
- }
-
- return returnData
-}
-
-// BuildLocationString is a recursive function that takes a location object and creates a gbk location string for Build()
-func BuildLocationString(location Location) string {
- var locationString string
-
- if location.Complement {
- location.Complement = false
- locationString = "complement(" + BuildLocationString(location) + ")"
- } else if location.Join {
- locationString = "join("
- for _, sublocation := range location.SubLocations {
- locationString += BuildLocationString(sublocation) + ","
- }
- locationString = strings.TrimSuffix(locationString, ",") + ")"
- } else {
- locationString = strconv.Itoa(location.Start+1) + ".." + strconv.Itoa(location.End)
- if location.FivePrimePartial {
- locationString = "<" + locationString
- }
-
- if location.ThreePrimePartial {
- locationString += ">"
- }
- }
- return locationString
-}
-
-// BuildFeatureString is a helper function to build gbk feature strings for Build()
-func BuildFeatureString(feature Feature) string {
- whiteSpaceTrailLength := 16 - len(feature.Type) // I wish I was kidding.
- whiteSpaceTrail := generateWhiteSpace(whiteSpaceTrailLength)
- var location string
-
- if feature.Location.GbkLocationString != "" {
- location = feature.Location.GbkLocationString
- } else {
- location = BuildLocationString(feature.Location)
- }
- featureHeader := generateWhiteSpace(subMetaIndex) + feature.Type + whiteSpaceTrail + location + "\n"
- returnString := featureHeader
-
- qualifierKeys := make([]string, 0, len(feature.Attributes))
- for key := range feature.Attributes {
- qualifierKeys = append(qualifierKeys, key)
- }
-
- for _, qualifier := range qualifierKeys {
- returnString += generateWhiteSpace(qualifierIndex) + "/" + qualifier + "=\"" + feature.Attributes[qualifier] + "\"\n"
- }
- return returnString
-}
-
-func generateWhiteSpace(length int) string {
- var spaceBuilder strings.Builder
-
- for i := 0; i < length; i++ {
- spaceBuilder.WriteString(" ")
- }
-
- return spaceBuilder.String()
-}
-
-/******************************************************************************
-
-GBK specific IO related things end here.
-
-******************************************************************************/
diff --git a/io/genbank/genbank_test.go b/io/genbank/genbank_test.go
deleted file mode 100644
index a0ac784cc..000000000
--- a/io/genbank/genbank_test.go
+++ /dev/null
@@ -1,804 +0,0 @@
-package genbank
-
-import (
- "encoding/json"
- "errors"
- "fmt"
- "io"
- "os"
- "path/filepath"
- "strings"
- "testing"
-
- "reflect"
-
- "github.com/bebop/poly/transform"
- "github.com/google/go-cmp/cmp"
- "github.com/google/go-cmp/cmp/cmpopts"
- "github.com/stretchr/testify/assert"
-)
-
-/******************************************************************************
-
-Gbk/gb/genbank related benchmarks begin here.
-
-******************************************************************************/
-
-var singleGbkPaths = []string{
- "../../data/t4_intron.gb",
- "../../data/puc19.gbk",
- "../../data/puc19_snapgene.gb",
- "../../data/benchling.gb",
- "../../data/phix174.gb",
- "../../data/sample.gbk",
- // "../../data/pichia_chr1_head.gb",
-}
-
-func TestGbkIO(t *testing.T) {
- tmpDataDir, err := os.MkdirTemp("", "data-*")
- if err != nil {
- t.Error(err)
- }
- defer os.RemoveAll(tmpDataDir)
-
- // test single gbk read, write, build, parse
- for _, gbkPath := range singleGbkPaths {
- gbk, _ := Read(gbkPath)
-
- tmpGbkFilePath := filepath.Join(tmpDataDir, filepath.Base(gbkPath))
- _ = Write(gbk, tmpGbkFilePath)
-
- writeTestGbk, _ := Read(tmpGbkFilePath)
- if diff := cmp.Diff(gbk, writeTestGbk, []cmp.Option{cmpopts.IgnoreFields(Feature{}, "ParentSequence")}...); diff != "" {
- t.Errorf("Parsing the output of Build() does not produce the same output as parsing the original file, \"%s\", read with Read(). Got this diff:\n%s", filepath.Base(gbkPath), diff)
- }
- } // end test single gbk read, write, build, parse
-}
-
-func TestMultiLineFeatureParse(t *testing.T) {
- pichia, _ := Read("../../data/pichia_chr1_head.gb")
- var multilineOutput string
- for _, feature := range pichia.Features {
- multilineOutput = feature.Location.GbkLocationString
- }
-
- if multilineOutput != "join(<459260..459456,459556..459637,459685..459739,459810..>460126)" {
- t.Errorf("Failed to parse multiline genbank feature string")
- }
-}
-
-func TestMultiGenbankIO(t *testing.T) {
- tmpDataDir, err := os.MkdirTemp("", "data-*")
- if err != nil {
- t.Error(err)
- }
- defer os.RemoveAll(tmpDataDir)
-
- // Test multiline Genbank features
- gbkPath := "../../data/multiGbk_test.seq"
- multiGbk, _ := ReadMulti(gbkPath)
- tmpGbkFilePath := filepath.Join(tmpDataDir, filepath.Base(gbkPath))
- _ = WriteMulti(multiGbk, tmpGbkFilePath)
-
- writeTestGbk, _ := ReadMulti(tmpGbkFilePath)
-
- if diff := cmp.Diff(multiGbk, writeTestGbk, []cmp.Option{cmpopts.IgnoreFields(Feature{}, "ParentSequence")}...); diff != "" {
- t.Errorf("Parsing the output of Build() does not produce the same output as parsing the original file, \"%s\", read with Read(). Got this diff:\n%s", filepath.Base(gbkPath), diff)
- }
-}
-
-func TestGbkLocationStringBuilder(t *testing.T) {
- tmpDataDir, err := os.MkdirTemp("", "data-*")
- if err != nil {
- t.Error(err)
- }
- defer os.RemoveAll(tmpDataDir)
-
- scrubbedGbk, err := Read("../../data/sample.gbk")
- if err != nil {
- t.Error(err)
- }
-
- // removing gbkLocationString from features to allow testing for gbkLocationBuilder
- for featureIndex := range scrubbedGbk.Features {
- scrubbedGbk.Features[featureIndex].Location.GbkLocationString = ""
- }
-
- tmpGbkFilePath := filepath.Join(tmpDataDir, "sample.gbk")
- _ = Write(scrubbedGbk, tmpGbkFilePath)
-
- testInputGbk, _ := Read("../../data/sample.gbk")
- testOutputGbk, _ := Read(tmpGbkFilePath)
-
- if diff := cmp.Diff(testInputGbk, testOutputGbk, []cmp.Option{cmpopts.IgnoreFields(Feature{}, "ParentSequence")}...); diff != "" {
- t.Errorf("Issue with partial location building. Parsing the output of Build() does not produce the same output as parsing the original file read with Read(). Got this diff:\n%s", diff)
- }
-}
-
-func TestGbLocationStringBuilder(t *testing.T) {
- tmpDataDir, err := os.MkdirTemp("", "data-*")
- if err != nil {
- t.Error(err)
- }
- defer os.RemoveAll(tmpDataDir)
-
- scrubbedGb, _ := Read("../../data/t4_intron.gb")
-
- // removing gbkLocationString from features to allow testing for gbkLocationBuilder
- for featureIndex := range scrubbedGb.Features {
- scrubbedGb.Features[featureIndex].Location.GbkLocationString = ""
- }
-
- tmpGbFilePath := filepath.Join(tmpDataDir, "t4_intron_test.gb")
- _ = Write(scrubbedGb, tmpGbFilePath)
-
- testInputGb, _ := Read("../../data/t4_intron.gb")
- testOutputGb, _ := Read(tmpGbFilePath)
-
- if diff := cmp.Diff(testInputGb, testOutputGb, []cmp.Option{cmpopts.IgnoreFields(Feature{}, "ParentSequence")}...); diff != "" {
- t.Errorf("Issue with either Join or complement location building. Parsing the output of Build() does not produce the same output as parsing the original file read with Read(). Got this diff:\n%s", diff)
- }
-}
-
-func TestPartialLocationParseRegression(t *testing.T) {
- gbk, _ := Read("../../data/sample.gbk")
-
- for _, feature := range gbk.Features {
- if feature.Location.GbkLocationString == "687..3158>" && (feature.Location.Start != 686 || feature.Location.End != 3158) {
- t.Errorf("Partial location for three prime location parsing has failed. Parsing the output of Build() does not produce the same output as parsing the original file read with Read()")
- }
- }
- gbk, err := Read("../../data/sample.gbk")
- if err != nil {
- t.Errorf("Failed to read sample.gbk. Got err: %s", err)
- }
-
- for _, feature := range gbk.Features {
- if feature.Location.GbkLocationString == "687..3158>" && (feature.Location.Start != 686 || feature.Location.End != 3158) {
- t.Errorf("Partial location for three prime location parsing has failed. Parsing the output of Build() does not produce the same output as parsing the original file read with Read(). Got location start %d and location end %d. Expected 687..3158>.", feature.Location.Start, feature.Location.End)
- } else if feature.Location.GbkLocationString == "<1..206" && (feature.Location.Start != 0 || feature.Location.End != 206) {
- t.Errorf("Partial location for five prime location parsing has failed. Parsing the output of Build() does not produce the same output as parsing the original file read with Read().")
- }
- }
-}
-
-func TestSubLocationStringParseRegression(t *testing.T) {
- location := "join(complement(5306942..5307394),complement(5304401..5305029),complement(5303328..5303393),complement(5301928..5302004))"
- parsedLocation, err := parseLocation(location)
- if err != nil {
- t.Errorf("Failed to parse location string. Got err: %s", err)
- }
- jsonFile, err := os.Open("../../data/parseLocationRegressionTest.json")
- // if we os.Open returns an error then handle it
- if err != nil {
- fmt.Println(err)
- }
- defer jsonFile.Close()
-
- byteValue, _ := io.ReadAll(jsonFile)
- var testParsedLocation Location
- err = json.Unmarshal(byteValue, &testParsedLocation)
- if err != nil {
- t.Errorf("Failed to unmarshal json. Got err: %s", err)
- }
-
- if diff := cmp.Diff(parsedLocation, testParsedLocation); diff != "" {
- t.Errorf("Failed to parse sublocation string. Got this diff:\n%s", diff)
- }
-}
-
-func TestSnapgeneGenbankRegression(t *testing.T) {
- snapgene, err := Read("../../data/puc19_snapgene.gb")
-
- if snapgene.Sequence == "" {
- t.Errorf("Parsing snapgene returned an empty string. Got error: %s", err)
- }
-}
-
-func TestGetSequenceMethod(t *testing.T) {
- gbk, _ := Read("../../data/t4_intron.gb")
-
- // Check to see if GetSequence method works on Features struct
- feature, _ := gbk.Features[1].GetSequence()
- seq := "atgagattacaacgccagagcatcaaagattcagaagttagaggtaaatggtattttaatatcatcggtaaagattctgaacttgttgaaaaagctgaacatcttttacgtgatatgggatgggaagatgaatgcgatggatgtcctctttatgaagacggagaaagcgcaggattttggatttaccattctgacgtcgagcagtttaaagctgattggaaaattgtgaaaaagtctgtttga"
- if feature != seq {
- t.Errorf("Feature GetSequence method has failed. Got this:\n%s instead of \n%s", feature, seq)
- }
-}
-
-func TestLocationParser(t *testing.T) {
- gbk, _ := Read("../../data/t4_intron.gb")
-
- // Read 1..243
- feature, _ := gbk.Features[1].GetSequence()
- seq := "atgagattacaacgccagagcatcaaagattcagaagttagaggtaaatggtattttaatatcatcggtaaagattctgaacttgttgaaaaagctgaacatcttttacgtgatatgggatgggaagatgaatgcgatggatgtcctctttatgaagacggagaaagcgcaggattttggatttaccattctgacgtcgagcagtttaaagctgattggaaaattgtgaaaaagtctgtttga"
- if feature != seq {
- t.Errorf("Feature sequence parser has changed on test '1..243'. Got this:\n%s instead of \n%s", feature, seq)
- }
-
- // Read join(893..1441,2459..2770)
- featureJoin, _ := gbk.Features[6].GetSequence()
- seqJoin := "atgaaacaataccaagatttaattaaagacatttttgaaaatggttatgaaaccgatgatcgtacaggcacaggaacaattgctctgttcggatctaaattacgctgggatttaactaaaggttttcctgcggtaacaactaagaagctcgcctggaaagcttgcattgctgagctaatatggtttttatcaggaagcacaaatgtcaatgatttacgattaattcaacacgattcgttaatccaaggcaaaacagtctgggatgaaaattacgaaaatcaagcaaaagatttaggataccatagcggtgaacttggtccaatttatggaaaacagtggcgtgattttggtggtgtagaccaaattatagaagttattgatcgtattaaaaaactgccaaatgataggcgtcaaattgtttctgcatggaatccagctgaacttaaatatatggcattaccgccttgtcatatgttctatcagtttaatgtgcgtaatggctatttggatttgcagtggtatcaacgctcagtagatgttttcttgggtctaccgtttaatattgcgtcatatgctacgttagttcatattgtagctaagatgtgtaatcttattccaggggatttgatattttctggtggtaatactcatatctatatgaatcacgtagaacaatgtaaagaaattttgaggcgtgaacctaaagagctttgtgagctggtaataagtggtctaccttataaattccgatatctttctactaaagaacaattaaaatatgttcttaaacttaggcctaaagatttcgttcttaacaactatgtatcacaccctcctattaaaggaaagatggcggtgtaa"
- if featureJoin != seqJoin {
- t.Errorf("Feature sequence parser has changed on test 'join(893..1441,2459..2770)'. Got this:\n%s instead of \n%s", featureJoin, seqJoin)
- }
-
- // Read complement(2791..3054)
- featureComplement, _ := gbk.Features[10].GetSequence()
- seqComplement := "ttattcactacccggcatagacggcccacgctggaataattcgtcatattgtttttccgttaaaacagtaatatcgtagtaacagtcagaagaagttttaactgtggaaattttattatcaaaatactcacgagtcattttatgagtatagtattttttaccataaatggtaataggctgttctggtcctggaacttctaactcgcttgggttaggaagtgtaaaaagaactacaccagaagtatctttaaatcgtaaaatcat"
- if featureComplement != seqComplement {
- t.Errorf("Feature sequence parser has changed on test 'complement(2791..3054)'. Got this:\n%s instead of \n%s", featureComplement, seqComplement)
- }
-
- // Read join(complement(315..330),complement(339..896))
- // Note: it is known that some software, like Snapgene, assumes that since both strands are in the reverse direction
- // that the first sequence should be appended to the reverse sequence, instead of the second sequence
- // getting appended to the first. Biopython appends the second sequence to the first, and that is logically
- // the most obvious thing to do, so we are implementing it that way.
- featureJoinComplement, _ := gbk.Features[3].GetSequence()
- seqJoinComplement := "ataccaatttaatcattcatttatatactgattccgtaagggttgttacttcatctattttataccaatgcgtttcaaccatttcacgcttgcttatatcatcaagaaaacttgcgtctaattgaactgttgaattaacacgatgccttttaacgatgcgagaaacaactacttcatctgcataaggtaatgcagcatataacagagcaggcccgccaattacacttactttagaattctgatcaagcatagtttcgaatggtgcattagggcttgacacttgaatttcgccgccagaaatgtaagttatatattgctcccaagtaatatagaaatgtgctaaatcgccgtctttagttacaggataatcacgcgcaaggtcacacaccacaatatggctacgaccaggaagtaatgtaggcaatgactggaacgttttagcacccataatcataattgtgccttcagtacgagctttaaaattctggaggtcctttttaactcgtccccatggtaaaccatcacctaaaccgaatgctaattcattaaagccgtcgaccgttttagttggaga"
- if featureJoinComplement != seqJoinComplement {
- t.Errorf("Feature sequence parser has changed on test 'join(complement(315..330),complement(339..896))'. Got this:\n%s instead of \n%s", featureJoinComplement, seqJoinComplement)
- }
-
- // Read complement(join(893..1098,1101..2770))
- featureComplementJoin, _ := gbk.Features[5].GetSequence()
- seqComplementJoin := "ttacaccgccatctttcctttaataggagggtgtgatacatagttgttaagaacgaaatctttaggcctaagtttaagaacatattttaattgttctttagtagaaagatatcggaatttataaggtagaccacttattaccagctcacaaagctctttaggttcacgcctcaaaatttctttacattgttctacgtgattcatatagatatgagtattaccaccagaaaatatcaaatcccctggaataagattacacatcttagctacaatatgaactaacgtagcatatgacgcaatattaaacggtagcattatgttcagataaggtcgttaatcttaccccggaattatatccagctgcatgtcaccatgcagagcagactatatctccaacttgttaaagcaagttgtctatcgtttcgagtcacttgaccctactccccaaagggatagtcgttaggcatttatgtagaaccaattccatttatcagattttacacgataagtaactaatccagacgaaattttaaaatgtctagctgcatctgctgcacaatcaaaaataaccccatcacatgaaatctttttaatattactaggctttttacctttcatcttttctgatattttagatttagttatgtctgaatgcttatgattaaagaatgaattattttcacctgaacgatttctgcatttactacaagtataagcagaagtttgtatgcgaacaccgcacttacaaaacttatgggtttctggattccaacgcccgtttttacttccgggtttactgtaaagagctttccgaccatcaggtccaagtttaagcatcttagctttaacagtttcagaacgtttcttaataatttcttcttttaatggatgcgtagaacatgtatcaccaaacgttgcatcagcaatattgtatccattaattttagaattaagctctttaatccaaaaattttctcgttcaataatcaaatctttctcatatggaatttcttccaaaatagaacattcaaacacattaccatgtttgttaaaagacctctgaagttttatagaagaatggcatcctttttctaaatctttaaaatgcctcttccatctcttttcaaaatctttagcacttcctacatatactttattgtttaaagtatttttaatctgataaattccgcttttcataaatacctctttaaatatagaagtatttattaaagggcaagtcctacaatttagcacgggattgtctactagagaggttccccgtttagatagattacaagtataagtcaccttatactcaggcctcaattaacccaagaaaacatctactgagcgttgataccactgcaaatccaaatagccattacgcacattaaactgatagaacatatgacaaggcggtaatgccatatatttaagttcagctggattccatgcagaaacaatttgacgcctatcatttggcagttttttaatacgatcaataacttctataatttggtctacaccaccaaaatcacgccactgttttccataaattggaccaagttcaccgctatggtatcctaaatcttttgcttgattttcgtaattttcatcccagactgttttgccttggattaacgaatcgtgttgaattaatcgtaaatcatacatttgtgcttcctgataaaaaccatattagctcagcaatgcaagctttccaggcgagcttcttagttgttaccgcaggaaaacctttagttaaatcccagcgtaatttagatccgaacagagcaattgttcctgtgcctgtacgatcatcggtttcataaccattttcaaaaatgtctttaattaaatcttggtattgtttcat"
- if featureComplementJoin != seqComplementJoin {
- t.Errorf("Feature sequence parser has changed on test 'complement(join(893..1098,1101..2770))'. Got this:\n%s instead of \n%s", featureComplementJoin, seqComplementJoin)
- }
-}
-
-func TestGenbankNewlineParsingRegression(t *testing.T) {
- gbk, _ := Read("../../data/puc19.gbk")
-
- for _, feature := range gbk.Features {
- if feature.Location.Start == 410 && feature.Location.End == 1750 && feature.Type == "CDS" {
- if feature.Attributes["product"] != "chromosomal replication initiator informational ATPase" {
- t.Errorf("Newline parsing has failed.")
- }
- break
- }
- }
-}
-
-func BenchmarkRead(b *testing.B) {
- for i := 0; i < b.N; i++ {
- _, _ = Read("../../data/bsub.gbk")
- }
-}
-
-func BenchmarkRead1(b *testing.B) { BenchmarkRead(b) }
-func BenchmarkRead10(b *testing.B) { BenchmarkRead(b) }
-func BenchmarkRead100(b *testing.B) { BenchmarkRead(b) }
-func BenchmarkRead1000(b *testing.B) { BenchmarkRead(b) }
-func BenchmarkRead10000(b *testing.B) { BenchmarkRead(b) }
-
-/******************************************************************************
-
-Gbk/gb/genbank related benchmarks end here.
-
-******************************************************************************/
-
-func TestBenchlingGenbank(t *testing.T) {
- sequence, _ := Read("../../data/benchling.gb")
-
- if len(sequence.Features) != 17 {
- t.Errorf("Parsing benchling genbank file not returned the correct quantity of features")
- }
-}
-
-func TestParse(t *testing.T) {
- type args struct {
- r io.Reader
- }
- tests := []struct {
- name string
- args args
- want Genbank
- wantErr bool
- }{
- // TODO: Add test cases.
- // empty line in genbank meta data
- // {
-
- // name: "empty line in genbank meta data",
- // args: args{r: strings.NewReader("LOCUS puc19.gbk 2686 bp DNA circular 22-OCT-2019")},
- // wantErr: true,
- // },
- }
- for _, tt := range tests {
- t.Run(tt.name, func(t *testing.T) {
- got, err := Parse(tt.args.r)
- if (err != nil) != tt.wantErr {
- t.Errorf("Parse() error = %v, wantErr %v", err, tt.wantErr)
- return
- }
- if !reflect.DeepEqual(got, tt.want) {
- t.Errorf("Parse() = %v, want %v", got, tt.want)
- }
- })
- }
-}
-
-func TestParseMulti(t *testing.T) {
- type args struct {
- r io.Reader
- }
- tests := []struct {
- name string
- args args
- want []Genbank
- wantErr bool
- }{
- // TODO: Add test cases.
- }
- for _, tt := range tests {
- t.Run(tt.name, func(t *testing.T) {
- got, err := ParseMulti(tt.args.r)
- if (err != nil) != tt.wantErr {
- t.Errorf("ParseMulti() error = %v, wantErr %v", err, tt.wantErr)
- return
- }
- if !reflect.DeepEqual(got, tt.want) {
- t.Errorf("ParseMulti() = %v, want %v", got, tt.want)
- }
- })
- }
-}
-
-// this was hand-written and tests the same as the above suite.
-func TestFeature_GetSequence_Legacy(t *testing.T) {
- // This test is a little too complex and contrived for an example function.
- // Essentially, it's testing GetSequence()'s ability to parse and retrieve sequences from complex location structures.
- // This was originally covered in the old package system it was not covered in the new package system so I decided to include it here.
-
- // Sequence for greenflourescent protein (GFP) that we're using as test data for this example.
- gfpSequence := "ATGGCTAGCAAAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGTTGAATTAGATGGTGATGTTAATGGGCACAAATTTTCTGTCAGTGGAGAGGGTGAAGGTGATGCTACATACGGAAAGCTTACCCTTAAATTTATTTGCACTACTGGAAAACTACCTGTTCCATGGCCAACACTTGTCACTACTTTCTCTTATGGTGTTCAATGCTTTTCCCGTTATCCGGATCATATGAAACGGCATGACTTTTTCAAGAGTGCCATGCCCGAAGGTTATGTACAGGAACGCACTATATCTTTCAAAGATGACGGGAACTACAAGACGCGTGCTGAAGTCAAGTTTGAAGGTGATACCCTTGTTAATCGTATCGAGTTAAAAGGTATTGATTTTAAAGAAGATGGAAACATTCTCGGACACAAACTCGAGTACAACTATAACTCACACAATGTATACATCACGGCAGACAAACAAAAGAATGGAATCAAAGCTAACTTCAAAATTCGCCACAACATTGAAGATGGATCCGTTCAACTAGCAGACCATTATCAACAAAATACTCCAATTGGCGATGGCCCTGTCCTTTTACCAGACAACCATTACCTGTCGACACAATCTGCCCTTTCGAAAGATCCCAACGAAAAGCGTGACCACATGGTCCTTCTTGAGTTTGTAACTGCTGCTGGGATTACACATGGCATGGATGAGCTCTACAAATAA"
-
- sequenceLength := len(gfpSequence)
-
- // Splitting the sequence into two parts to make a multi-location feature.
- sequenceFirstHalf := gfpSequence[:sequenceLength/2]
- sequenceSecondHalf := transform.ReverseComplement(gfpSequence[sequenceLength/2:]) // This feature is reverse complemented.
-
- // rejoining the two halves into a single string where the second half of the sequence is reverse complemented.
- gfpSequenceModified := sequenceFirstHalf + sequenceSecondHalf
-
- // initialize sequence and feature structs.
- var sequence Genbank
- var feature Feature
-
- // set the initialized sequence struct's sequence.
- sequence.Sequence = gfpSequenceModified
- // initialize sublocations to be usedin the feature.
-
- var subLocation Location
- var subLocationReverseComplemented Location
-
- subLocation.Start = 0
- subLocation.End = sequenceLength / 2
-
- subLocationReverseComplemented.Start = sequenceLength / 2
- subLocationReverseComplemented.End = sequenceLength
- subLocationReverseComplemented.Complement = true // According to genbank complement means reverse complement. What a country.
-
- feature.Description = "Green Fluorescent Protein"
- feature.Location.SubLocations = []Location{subLocation, subLocationReverseComplemented}
-
- // Add the GFP feature to the sequence struct.
- _ = sequence.AddFeature(&feature)
-
- // get the GFP feature sequence string from the sequence struct.
- featureSequence, _ := feature.GetSequence()
-
- // check to see if the feature was inserted properly into the sequence.
- if gfpSequence != featureSequence {
- t.Error("Feature sequence was not properly retrieved.")
- }
-}
-
-func Test_parseLoopParameters_init(t *testing.T) {
- type fields struct {
- newLocation bool
- quoteActive bool
- attribute string
- attributeValue string
- sequenceBuilder strings.Builder
- parseStep string
- genbank Genbank
- feature Feature
- features []Feature
- metadataTag string
- metadataData []string
- genbankStarted bool
- }
- tests := []struct {
- name string
- fields fields
- }{
- // TODO: Add test cases.
- }
- for _, tt := range tests {
- t.Run(tt.name, func(t *testing.T) {
- params := &parseLoopParameters{
- newLocation: tt.fields.newLocation,
- quoteActive: tt.fields.quoteActive,
- attribute: tt.fields.attribute,
- attributeValue: tt.fields.attributeValue,
- sequenceBuilder: tt.fields.sequenceBuilder,
- parseStep: tt.fields.parseStep,
- genbank: tt.fields.genbank,
- feature: tt.fields.feature,
- features: tt.fields.features,
- metadataTag: tt.fields.metadataTag,
- metadataData: tt.fields.metadataData,
- genbankStarted: tt.fields.genbankStarted,
- }
- params.init()
- })
- }
-}
-
-func TestParseMultiNth(t *testing.T) {
- type args struct {
- r io.Reader
- count int
- }
- tests := []struct {
- name string
- args args
- want []Genbank
- wantErr bool
- }{
- // TODO: Add test cases.
- }
- for _, tt := range tests {
- t.Run(tt.name, func(t *testing.T) {
- got, err := ParseMultiNth(tt.args.r, tt.args.count)
- if (err != nil) != tt.wantErr {
- t.Errorf("ParseMultiNth() error = %v, wantErr %v", err, tt.wantErr)
- return
- }
- if !reflect.DeepEqual(got, tt.want) {
- t.Errorf("ParseMultiNth() = %v, want %v", got, tt.want)
- }
- })
- }
-}
-
-func Test_parseMetadata(t *testing.T) {
- type args struct {
- metadataData []string
- }
- tests := []struct {
- name string
- args args
- want string
- }{
- // TODO: Add test cases.
- }
- for _, tt := range tests {
- t.Run(tt.name, func(t *testing.T) {
- if got := parseMetadata(tt.args.metadataData); got != tt.want {
- t.Errorf("parseMetadata() = %v, want %v", got, tt.want)
- }
- })
- }
-}
-
-func Test_parseReferences(t *testing.T) {
- type args struct {
- metadataData []string
- }
- tests := []struct {
- name string
- args args
- want Reference
- wantErr bool
- }{
- // TODO: Add test cases.
- }
- for _, tt := range tests {
- t.Run(tt.name, func(t *testing.T) {
- got, err := parseReferences(tt.args.metadataData)
- if (err != nil) != tt.wantErr {
- t.Errorf("parseReferences() error = %v, wantErr %v", err, tt.wantErr)
- return
- }
- if !reflect.DeepEqual(got, tt.want) {
- t.Errorf("parseReferences() = %v, want %v", got, tt.want)
- }
- })
- }
-}
-
-func TestReference_addKey(t *testing.T) {
- type fields struct {
- Authors string
- Title string
- Journal string
- PubMed string
- Remark string
- Range string
- }
- type args struct {
- referenceKey string
- referenceValue string
- }
- tests := []struct {
- name string
- fields fields
- args args
- wantErr bool
- }{
- // TODO: Add test cases.
- }
- for _, tt := range tests {
- t.Run(tt.name, func(t *testing.T) {
- reference := &Reference{
- Authors: tt.fields.Authors,
- Title: tt.fields.Title,
- Journal: tt.fields.Journal,
- PubMed: tt.fields.PubMed,
- Remark: tt.fields.Remark,
- Range: tt.fields.Range,
- }
- if err := reference.addKey(tt.args.referenceKey, tt.args.referenceValue); (err != nil) != tt.wantErr {
- t.Errorf("Reference.addKey() error = %v, wantErr %v", err, tt.wantErr)
- }
- })
- }
-}
-
-func Test_parseLocus(t *testing.T) {
- type args struct {
- locusString string
- }
- tests := []struct {
- name string
- args args
- want Locus
- }{
- // TODO: Add test cases.
- }
- for _, tt := range tests {
- t.Run(tt.name, func(t *testing.T) {
- if got := parseLocus(tt.args.locusString); !reflect.DeepEqual(got, tt.want) {
- t.Errorf("parseLocus() = %v, want %v", got, tt.want)
- }
- })
- }
-}
-
-func TestRead(t *testing.T) {
- type args struct {
- path string
- }
- tests := []struct {
- name string
- args args
- want Genbank
- wantErr bool
- }{
- // TODO: Add test cases.
- {
- name: "error on missing file",
- args: args{
- path: "../../afdaljhdfa.txt",
- },
- wantErr: true,
- },
- {
- name: "error on malformed file",
- args: args{
- path: "../../data/malformed_read_test.gbk",
- },
- wantErr: true,
- },
- }
- for _, tt := range tests {
- t.Run(tt.name, func(t *testing.T) {
- got, err := Read(tt.args.path)
- if (err != nil) != tt.wantErr {
- t.Errorf("Read() error = %v, wantErr %v", err, tt.wantErr)
- return
- }
- if !reflect.DeepEqual(got, tt.want) {
- t.Errorf("Read() = %v, want %v", got, tt.want)
- }
- })
- }
-}
-
-func TestReadMulti(t *testing.T) {
- type args struct {
- path string
- }
- tests := []struct {
- name string
- args args
- want []Genbank
- wantErr bool
- }{
- // TODO: Add test cases.
- }
- for _, tt := range tests {
- t.Run(tt.name, func(t *testing.T) {
- got, err := ReadMulti(tt.args.path)
- if (err != nil) != tt.wantErr {
- t.Errorf("ReadMulti() error = %v, wantErr %v", err, tt.wantErr)
- return
- }
- if !reflect.DeepEqual(got, tt.want) {
- t.Errorf("ReadMulti() = %v, want %v", got, tt.want)
- }
- })
- }
-}
-
-func Test_parseLocation(t *testing.T) {
- type args struct {
- locationString string
- }
- tests := []struct {
- name string
- args args
- want Location
- wantErr bool
- }{
- // TODO: Add test cases.
- }
- for _, tt := range tests {
- t.Run(tt.name, func(t *testing.T) {
- got, err := parseLocation(tt.args.locationString)
- if (err != nil) != tt.wantErr {
- t.Errorf("parseLocation() error = %v, wantErr %v", err, tt.wantErr)
- return
- }
- if !reflect.DeepEqual(got, tt.want) {
- t.Errorf("parseLocation() = %v, want %v", got, tt.want)
- }
- })
- }
-}
-
-func Test_buildMetaString(t *testing.T) {
- type args struct {
- name string
- data string
- }
- tests := []struct {
- name string
- args args
- want string
- }{
- // TODO: Add test cases.
- }
- for _, tt := range tests {
- t.Run(tt.name, func(t *testing.T) {
- if got := buildMetaString(tt.args.name, tt.args.data); got != tt.want {
- t.Errorf("buildMetaString() = %v, want %v", got, tt.want)
- }
- })
- }
-}
-
-func TestBuildLocationString(t *testing.T) {
- type args struct {
- location Location
- }
- tests := []struct {
- name string
- args args
- want string
- }{
- // TODO: Add test cases.
- }
- for _, tt := range tests {
- t.Run(tt.name, func(t *testing.T) {
- if got := BuildLocationString(tt.args.location); got != tt.want {
- t.Errorf("BuildLocationString() = %v, want %v", got, tt.want)
- }
- })
- }
-}
-
-func Test_generateWhiteSpace(t *testing.T) {
- type args struct {
- length int
- }
- tests := []struct {
- name string
- args args
- want string
- }{
- // TODO: Add test cases.
- }
- for _, tt := range tests {
- t.Run(tt.name, func(t *testing.T) {
- if got := generateWhiteSpace(tt.args.length); got != tt.want {
- t.Errorf("generateWhiteSpace() = %v, want %v", got, tt.want)
- }
- })
- }
-}
-
-func TestRead_error(t *testing.T) {
- readErr := errors.New("open /tmp/file: no such file or directory")
- oldReadFileFn := readFileFn
- readFileFn = func(filename string) ([]byte, error) {
- return nil, readErr
- }
- defer func() {
- readFileFn = oldReadFileFn
- }()
- _, err := Read("/tmp/file")
- assert.EqualError(t, err, readErr.Error())
-}
-
-func TestBuildFeatureString(t *testing.T) {
- feature := Feature{
- Type: "test type",
- Description: "a description",
- Location: Location{
- GbkLocationString: "gbk location",
- },
- }
- str := BuildFeatureString(feature)
- assert.Equal(t, str, " test type gbk location\n")
-}
-
-func TestParse_error(t *testing.T) {
- parseMultiErr := errors.New("parse error")
- oldParseMultiNthFn := parseMultiNthFn
- parseMultiNthFn = func(r io.Reader, count int) ([]Genbank, error) {
- return nil, parseMultiErr
- }
- defer func() {
- parseMultiNthFn = oldParseMultiNthFn
- }()
- _, err := Parse(strings.NewReader(""))
- assert.EqualError(t, err, parseMultiErr.Error())
-
- _, err = ParseMulti(strings.NewReader(""))
- assert.EqualError(t, err, parseMultiErr.Error())
-}
-
-func TestParseReferences_error(t *testing.T) {
- parseReferencesErr := errors.New("Failed in parsing reference above line 13. Got error: ")
- oldParseReferencesFn := parseReferencesFn
- parseReferencesFn = func(metadataData []string) (Reference, error) {
- return Reference{}, errors.New("")
- }
- defer func() {
- parseReferencesFn = oldParseReferencesFn
- }()
- file, _ := os.Open("../../data/puc19.gbk")
- _, err := parseMultiNthFn(file, 1)
- assert.EqualError(t, err, parseReferencesErr.Error())
-}
-
-func TestIssue303Regression(t *testing.T) {
- seq, _ := Read("../../data/puc19_303_regression.gbk")
- expectedAttribute := "16S rRNA(adenine(1518)-N(6)/adenine(1519)-N(6))-dimethyltransferase"
- for _, feature := range seq.Features {
- if feature.Attributes["locus_tag"] == "JCVISYN3A_0004" && feature.Type == "CDS" {
- if feature.Attributes["product"] != expectedAttribute {
- t.Errorf("Failed to get proper expected attribute. Got: %s Expected: %s", feature.Attributes["product"], expectedAttribute)
- }
- }
- if feature.Attributes["locus_tag"] == "JCVISYN3A_0051" && feature.Type == "CDS" {
- if _, ok := feature.Attributes["pseudo"]; !ok {
- t.Errorf("pseudo should be in attributes")
- }
- }
- }
-}
-
-func TestConsortiumRegression(t *testing.T) {
- _, err := Read("../../data/puc19_consrtm.gbk")
- if err != nil {
- t.Errorf("Failed to read consrtm. Got err: %s", err)
- }
-}
diff --git a/io/genbank/parser.go b/io/genbank/parser.go
new file mode 100644
index 000000000..52b771124
--- /dev/null
+++ b/io/genbank/parser.go
@@ -0,0 +1,985 @@
+package genbank
+
+import (
+ "bufio"
+ "fmt"
+ "io"
+ "regexp"
+ "strconv"
+ "strings"
+ "time"
+ "unicode"
+)
+
+/* PARSER TYPE DEFINITIONS */
+
+// A Parser stores state during parsing of a Genbank
+// flatfile.
+type Parser struct {
+ reader *bufio.Reader
+ line uint
+ currLine string
+ peekedLine string
+}
+
+// A parseContext holds the data structures currently being parsed.
+type parseContext struct {
+ entry *Entry
+ reference *Reference
+}
+
+// NewParser instantiates a new Genbank parser to
+// parse a flatfile in an io.Reader.
+func NewParser(reader io.Reader) *Parser {
+ return &Parser{
+ reader: bufio.NewReader(reader),
+ line: 0,
+ }
+}
+
+// A tokenRange holds indices in which a token is expected to occur.
+type tokenRange struct {
+ start int
+ end int
+}
+
+/* PARSER UTILITY FUNCTIONS */
+
+func (p *Parser) makeSyntaxError(msg string, innerError ...error) GenbankSyntaxError {
+ res := GenbankSyntaxError{
+ Msg: msg,
+ Line: p.line,
+ Context: p.currLine,
+ }
+ if len(innerError) > 0 {
+ res.InnerErr = innerError[0]
+ }
+
+ return res
+}
+
+func (p *Parser) makeWrongContextError(linetype lineType) GenbankSyntaxError {
+ switch linetype {
+ case keyword:
+ return p.makeSyntaxError("unexpected keyword found")
+ case empty:
+ return p.makeSyntaxError("unexpected empty line found")
+ case eof:
+ return p.makeSyntaxError("unexpected end of file found")
+ case subKeyword:
+ return p.makeSyntaxError("subkeyword found outside of keyword context")
+ case entryEnd:
+ return p.makeSyntaxError("unexpected end of entry found")
+ case continuation:
+ return p.makeSyntaxError("continuation found outside of keyword context")
+ case featureKey:
+ return p.makeSyntaxError("feature key found outside of FEATURES keyword context")
+ case sequence:
+ return p.makeSyntaxError("sequence found outside of ORIGIN keyword context")
+ default:
+ return p.makeSyntaxError("unknown linetype found, please report this to repository maintainers")
+ }
+}
+
+// readLine reads a line from the underlying reader.
+// Does not return a line's newline.
+func (p *Parser) readLine() (string, error) {
+ // If we previously peeked a line, make sure we output it
+ // before we start reading new lines. Otherwise, read
+ // from the underlying reader.
+ var res string
+ if p.peekedLine != "" {
+ res = p.peekedLine
+ } else {
+ var err error
+ res, err = p.reader.ReadString('\n')
+ if err != nil && !(err == io.EOF && res != "") { // Don't return an error if we are reading the last line
+ return "", err
+ }
+ }
+
+ p.peekedLine = ""
+ p.line++
+ p.currLine = res
+ trimmed, _ := strings.CutSuffix(res, "\n")
+ return trimmed, nil
+}
+
+// peekLine returns the next line without consuming it from
+// the underlying reader. Repeated calls to peekLine return
+// the same result. Does not return a line's newline.
+func (p *Parser) peekLine() (string, error) {
+ // If we've already peeked this line, simply return it again.
+ if p.peekedLine != "" {
+ res, _ := strings.CutSuffix(p.peekedLine, "\n")
+ return res, nil
+ }
+
+ res, err := p.reader.ReadString('\n')
+ if err != nil && !(err == io.EOF && res != "") { // Don't return an error if we are reading the last line
+ return "", err
+ }
+
+ p.peekedLine = res
+ res, _ = strings.CutSuffix(res, "\n")
+ return res, nil
+}
+
+type lineType int
+
+const (
+ empty lineType = iota
+ subKeyword
+ continuation
+ featureKey
+ sequence
+ entryEnd
+ keyword
+ eof
+)
+
+// Determine which type of information the next line encodes
+// (see Genbank spec 3.4.2).
+func (p *Parser) peekNextLineType() (lineType, error) {
+ nextLine, err := p.peekLine()
+ if err == io.EOF {
+ return eof, nil
+ } else if err != nil {
+ return empty, err
+ }
+
+ switch {
+ case len(strings.TrimSpace(nextLine)) == 0:
+ return empty, nil
+ case unicode.IsLetter(rune(nextLine[0])):
+ return keyword, nil
+ case strings.HasPrefix(nextLine, "//"):
+ return entryEnd, nil
+ case unicode.IsLetter(rune(nextLine[2])) || unicode.IsLetter(rune(nextLine[3])):
+ return subKeyword, nil
+ case !unicode.IsSpace(rune(nextLine[5])):
+ return featureKey, nil
+ }
+
+ if _, err := strconv.Atoi(strings.TrimSpace(nextLine[:9])); err == nil {
+ return sequence, nil
+ } else if len(nextLine) > 0 {
+ return continuation, nil
+ }
+
+ return empty, fmt.Errorf("unrecognized line type")
+}
+
+func (p *Parser) dispatchByPrefix(pCtx parseContext, funcs map[string]func(parseContext) error) (err error) {
+ line, err := p.peekLine()
+ if err != nil {
+ return err
+ }
+
+ for prefix, parseFunc := range funcs {
+ if strings.HasPrefix(line, prefix) {
+ return parseFunc(pCtx)
+ }
+ }
+
+ p.readLine() // We only peeked previously, so we have to read for proper error reporting
+ return p.makeSyntaxError("keyword expected, none found")
+}
+
+/* MAIN PARSER FUNCTIONS */
+
+// Parse parses a Genbank file from a Parser's io.Reader.
+func (p *Parser) Parse() (Genbank, error) {
+ res := Genbank{}
+
+ hasHeader, err := p.hasHeader()
+ if err != nil {
+ return res, err
+ }
+ if hasHeader {
+ header, err := p.parseHeader()
+ if err != nil {
+ return res, fmt.Errorf("failed to parse header: %w", err)
+ }
+ res.Header = header
+ }
+
+ res.Entries = make([]Entry, 0)
+ var entry Entry
+ reachedEOF := false
+ for !reachedEOF {
+ entry, reachedEOF, err = p.parseEntry()
+ if err != nil {
+ return res, fmt.Errorf("failed to parse entry %v: %w", len(res.Entries), err)
+ }
+ res.Entries = append(res.Entries, entry)
+ }
+
+ return res, nil
+}
+
+const headerDateLayout = "January 2 2006"
+
+var fileNameRange = tokenRange{start: 0, end: 10}
+var releaseNumRange = tokenRange{start: 47, end: 52}
+var entryNumRange = tokenRange{start: 0, end: 8}
+var baseNumRange = tokenRange{start: 15, end: 26}
+var sequenceNumRange = tokenRange{start: 39, end: 47}
+
+func (p *Parser) hasHeader() (bool, error) {
+ firstLine, err := p.peekLine()
+ if err != nil {
+ return false, err
+ }
+ return strings.TrimSpace(firstLine[dbNameIdx:]) == "Genetic Sequence Data Bank", nil
+}
+
+func (p *Parser) parseHeader() (Header, error) {
+ res := Header{}
+
+ fileNameLine, err := p.readLine()
+ if err != nil {
+ return res, fmt.Errorf("failed to read file name: %w", err)
+ }
+ fileName := strings.TrimSpace(fileNameLine[fileNameRange.start:fileNameRange.end])
+ if fileName == "" {
+ return res, p.makeSyntaxError("empty file name")
+ }
+ res.FileName = fileName
+
+ dateLine, err := p.readLine()
+ if err != nil {
+ return res, fmt.Errorf("failed to read date: %w", err)
+ }
+ date, err := time.Parse(headerDateLayout, strings.TrimSpace(dateLine))
+ if err != nil {
+ return res, p.makeSyntaxError("failed to parse date", err)
+ }
+ res.Date = date
+
+ p.readLine() // Skip empty line
+
+ releaseLine, err := p.readLine()
+ if err != nil {
+ return res, fmt.Errorf("failed to read release: %w", err)
+ }
+ if len(releaseLine) < releaseNumRange.end {
+ return res, p.makeSyntaxError("release line too short")
+ }
+ release := strings.Split(strings.TrimSpace(releaseLine[releaseNumRange.start:releaseNumRange.end]), ".")
+ if len(release) != 2 {
+ return res, p.makeSyntaxError("malformed release version")
+ }
+ res.MajorRelease, err = strconv.Atoi(release[0])
+ if err != nil {
+ return res, p.makeSyntaxError("failed to parse major release", err)
+ }
+ res.MinorRelease, err = strconv.Atoi(release[1])
+ if err != nil {
+ return res, p.makeSyntaxError("failed to parse minor release", err)
+ }
+
+ p.readLine() // Skip empty line
+
+ titleLine, err := p.readLine()
+ if err != nil {
+ return res, fmt.Errorf("failed to read title: %w", err)
+ }
+ res.Title = strings.TrimSpace(titleLine)
+
+ p.readLine() // Skip empty line
+
+ statsLine, err := p.readLine()
+ if err != nil {
+ return res, fmt.Errorf("failed to read file statistics: %w", err)
+ }
+ if len(statsLine) < sequenceNumRange.end {
+ return res, p.makeSyntaxError("file statistics line too short")
+ }
+ res.NumEntries, err = strconv.Atoi(strings.TrimSpace(statsLine[entryNumRange.start:entryNumRange.end]))
+ if err != nil {
+ return res, p.makeSyntaxError("failed to parse number of entries/loci", err)
+ }
+ res.NumBases, err = strconv.Atoi(strings.TrimSpace(statsLine[baseNumRange.start:baseNumRange.end]))
+ if err != nil {
+ return res, p.makeSyntaxError("failed to parse number of bases", err)
+ }
+ res.NumSequences, err = strconv.Atoi(strings.TrimSpace(statsLine[sequenceNumRange.start:sequenceNumRange.end]))
+ if err != nil {
+ return res, p.makeSyntaxError("failed to parse number of sequences", err)
+ }
+
+ p.readLine() // Skip empty line
+
+ return res, nil
+}
+
+func (p *Parser) parseEntry() (entry Entry, reachedEOF bool, err error) {
+ keywordFuncs := map[string]func(parseContext) error{
+ "LOCUS": p.parseLocus,
+ "DEFINITION": p.parseDefinition,
+ "ACCESSION": p.parseAccession,
+ "VERSION": p.parseVersion,
+ "DBLINK": p.parseDBLink,
+ "KEYWORDS": p.parseKeywords,
+ "SEGMENT": p.parseSegment,
+ "SOURCE": p.parseSource,
+ "REFERENCE": p.parseReference,
+ "COMMENT": p.parseComment,
+ "FEATURES": p.parseFeatures,
+ }
+
+ res := Entry{}
+ pCtx := parseContext{entry: &res}
+ for {
+ linetype, err := p.peekNextLineType()
+ if err != nil {
+ return res, false, fmt.Errorf("could not parse entry: %w", err)
+ }
+
+ switch linetype {
+ case keyword:
+ err = p.dispatchByPrefix(pCtx, keywordFuncs)
+ if err == io.EOF {
+ return res, true, nil
+ } else if err != nil {
+ return res, false, err
+ } else if _, err := p.peekLine(); err == io.EOF {
+ return res, true, nil
+ }
+
+ case empty:
+ p.readLine() // We need to read the empty line to advance the reader
+ case eof:
+ return res, true, nil
+ case entryEnd:
+ return res, false, nil
+ default:
+ return res, false, p.makeWrongContextError(linetype)
+ }
+ }
+}
+
+/* KEYWORD PARSING FUNCTIONS */
+
+// See Genbank spec 3.4.12, which redirects to http://www.insdc.org/documents/feature-table
+func (p *Parser) parseFeatures(pCtx parseContext) error {
+ _, err := p.readLine()
+ if err != nil {
+ return err
+ }
+
+ for {
+ lType, err := p.peekNextLineType()
+ if err != nil {
+ return err
+ }
+ switch lType {
+ case featureKey:
+ feature, err := p.parseFeatureKey(pCtx)
+ if err != nil {
+ return err
+ }
+
+ pCtx.entry.Features = append(pCtx.entry.Features, feature)
+ case keyword:
+ return nil
+ default:
+ return p.makeWrongContextError(lType)
+ }
+ }
+}
+
+var featureKeyRange = tokenRange{start: 6, end: 20}
+var featureValueRange = tokenRange{start: 22, end: 80}
+
+func (p *Parser) parseFeatureKey(pCtx parseContext) (Feature, error) {
+ res := Feature{}
+ line, err := p.readLine()
+ if err != nil {
+ return res, err
+ }
+
+ if len(line) <= featureValueRange.start {
+ return res, p.makeSyntaxError("feature key line not long enough")
+ }
+ res.Key = strings.TrimSpace(line[featureKeyRange.start:featureKeyRange.end])
+
+ res.Location, err = ParseLocation(strings.TrimSpace(line[featureValueRange.start:]))
+ if err != nil {
+ return res, p.makeSyntaxError("could not parse location", err)
+ }
+
+ for {
+ lType, err := p.peekNextLineType()
+ if err != nil {
+ return res, err
+ }
+
+ switch lType {
+ case sequence:
+ key, value := parseQualifier()
+ prevValues, _ := res.Qualifiers[key]
+ res.Qualifiers[key] = append(prevValues, value)
+ case featureKey:
+ return res, err
+ case keyword:
+ return res, err
+ default:
+ return res, p.makeWrongContextError(lType)
+ }
+ }
+}
+
+// ParseLocation parses a GENBANK feature location from a string. See https://www.insdc.org/submitting-standards/feature-table/#3.4
+func ParseLocation(str string) (Location, error) {
+ return Location(str), nil
+}
+
+const locusDateLayout = "02-Jan-2006"
+
+// See Genbank spec 3.4.4
+func (p *Parser) parseLocus(pCtx parseContext) error {
+ line, err := p.readLine()
+ if err != nil {
+ return fmt.Errorf("failed to read LOCUS line: %w", err)
+ }
+
+ re := regexp.MustCompile(`\s+`)
+ tokens := re.Split(line, -1)
+ if len(tokens) != 8 {
+ return fmt.Errorf("LOCUS line should have 8 tokens, got %v", len(tokens))
+ }
+
+ pCtx.entry.Name = tokens[1]
+
+ length, err := strconv.Atoi(tokens[2])
+ if err != nil {
+ return p.makeSyntaxError("failed to parse sequence length", err)
+ }
+ pCtx.entry.Length = length
+
+ molecule := strings.Split(tokens[4], "-")
+ if len(molecule) == 2 {
+ pCtx.entry.Strandedness = Strandedness(molecule[0])
+ pCtx.entry.MoleculeType = MoleculeType(molecule[1])
+ } else {
+ pCtx.entry.Strandedness = Unknown
+ pCtx.entry.MoleculeType = MoleculeType(molecule[0])
+ }
+
+ pCtx.entry.MoleculeToplogy = MoleculeToplogy(tokens[5])
+
+ pCtx.entry.DivisionCode = DivisionCode(tokens[6])
+
+ date, err := time.Parse(locusDateLayout, tokens[7])
+ if err != nil {
+ return p.makeSyntaxError("failed to parse entry date", err)
+ }
+ pCtx.entry.UpdateDate = date
+
+ return nil
+}
+
+func (p *Parser) parseComment(pCtx parseContext) error {
+ line, err := p.readLine()
+ if err != nil {
+ return err
+ }
+ comment, _ := strings.CutPrefix(line, "COMMENT")
+ comment = strings.TrimSpace(comment)
+
+ for {
+ linetype, err := p.peekNextLineType()
+ if err != nil {
+ return fmt.Errorf("failed to read COMMENT line: %w", err)
+ }
+
+ switch linetype {
+ case continuation:
+ line, err := p.readLine()
+ if err != nil {
+ return err
+ }
+ comment = appendLine(comment, strings.TrimSpace(line))
+ case empty:
+ p.readLine()
+ comment = appendLine(comment, "")
+ default:
+ pCtx.entry.Comment = comment
+ return nil
+ }
+ }
+}
+
+// See Genbank spec 3.4.5
+func (p *Parser) parseDefinition(pCtx parseContext) error {
+ line, err := p.readLine()
+ if err != nil {
+ return fmt.Errorf("failed to read DEFINITION line: %w", err)
+ }
+
+ after, _ := strings.CutPrefix(line, "DEFINITION")
+ after = strings.TrimSpace(after)
+ pCtx.entry.Definition = appendLine(pCtx.entry.Definition, after)
+
+ for {
+ linetype, err := p.peekNextLineType()
+ if err != nil {
+ return err
+ }
+
+ switch linetype {
+ case empty:
+ _, err := p.readLine()
+ if err != nil {
+ return err
+ }
+ pCtx.entry.Definition = appendLine(pCtx.entry.Definition, "\n")
+ case continuation:
+ line, err := p.readLine()
+ if err != nil {
+ return err
+ }
+ pCtx.entry.Definition = appendLine(pCtx.entry.Definition, line[10:])
+ default:
+ return nil
+ }
+ }
+}
+
+// See Genbank spec 3.4.6
+func (p *Parser) parseAccession(pCtx parseContext) error {
+ line, err := p.readLine()
+ if err != nil {
+ return fmt.Errorf("failed to read ACCESSION line: %w", err)
+ }
+
+ after, _ := strings.CutPrefix(line, "ACCESSION")
+ after = strings.TrimSpace(after)
+ pCtx.entry.Accession += after
+
+ return nil
+}
+
+// See Genbank spec 3.4.7.1
+func (p *Parser) parseVersion(pCtx parseContext) error {
+ line, err := p.readLine()
+ if err != nil {
+ return fmt.Errorf("failed to read VERSION line: %w", err)
+ }
+
+ after, _ := strings.CutPrefix(line, "VERSION")
+ accessionVersion := strings.Split(strings.TrimSpace(after), ".")
+ if len(accessionVersion) != 2 {
+ return p.makeSyntaxError("could not parse accession version")
+ }
+ version, err := strconv.Atoi(accessionVersion[1])
+ if err != nil {
+ return p.makeSyntaxError("accession version should be an integer")
+ }
+ pCtx.entry.AccessionVersion = version
+
+ return nil
+}
+
+// See Genbank spec 3.4.7.2
+func (p *Parser) parseDBLink(pCtx parseContext) error {
+ line, err := p.readLine()
+ if err != nil {
+ return fmt.Errorf("failed to read DBLINK line: %w", err)
+ }
+
+ if pCtx.entry.DatabaseLinks == nil {
+ pCtx.entry.DatabaseLinks = make(map[string][]string)
+ }
+
+ after, _ := strings.CutPrefix(line, "DBLINK")
+ crossRefType, crossRefs, err := parseRawDBLink(after)
+ if err != nil {
+ return p.makeSyntaxError("failed to parse DBLINK entry", err)
+ }
+ pCtx.entry.DatabaseLinks[crossRefType] = append(pCtx.entry.DatabaseLinks[crossRefType], crossRefs...)
+
+ for {
+ lineType, err := p.peekNextLineType()
+ if err == io.EOF {
+ return nil
+ } else if err != nil {
+ return err
+ }
+
+ switch lineType {
+ case empty:
+ _, err := p.readLine()
+ if err != nil {
+ return err
+ }
+ case continuation:
+ line, err := p.readLine()
+ if err != nil {
+ return err
+ }
+ crossRefType, crossRefs, err := parseRawDBLink(line)
+ if err != nil {
+ return p.makeSyntaxError("failed to parse DBLINK entry", err)
+ }
+ pCtx.entry.DatabaseLinks[crossRefType] = append(pCtx.entry.DatabaseLinks[crossRefType], crossRefs...)
+ default:
+ return nil
+ }
+ }
+}
+
+func parseRawDBLink(data string) (crossRefType string, crossRefs []string, err error) {
+ before, after, found := strings.Cut(data, ":")
+ if !found {
+ return "", nil, fmt.Errorf("should be in the format TYPE:REFERENCE")
+ }
+
+ crossRefType = strings.TrimSpace(before)
+ for _, crossRef := range strings.Split(after, ",") {
+ crossRefs = append(crossRefs, strings.TrimSpace(crossRef))
+ }
+
+ return
+}
+
+// See Genbank spec section 3.4.8
+func (p *Parser) parseKeywords(pCtx parseContext) error {
+ line, err := p.readLine()
+ if err != nil {
+ return fmt.Errorf("failed to read KEYWORDS line: %w", err)
+ }
+
+ after, _ := strings.CutPrefix(line, "KEYWORDS")
+ after, endsWithPeriod := strings.CutSuffix(after, ".")
+ if !endsWithPeriod {
+ return p.makeSyntaxError("KEYWORDS line must end with a period")
+ }
+ keywords := strings.Split(after, ";")
+ for _, keyword := range keywords {
+ pCtx.entry.Keywords = append(pCtx.entry.Keywords, strings.TrimSpace(keyword))
+ }
+
+ return nil
+}
+
+// See Genbank spec section 3.4.9
+func (p *Parser) parseSegment(pCtx parseContext) error {
+ line, err := p.readLine()
+ if err != nil {
+ return fmt.Errorf("failed to read KEYWORDS line: %w", err)
+ }
+
+ after, _ := strings.CutPrefix(line, "SEGMENT")
+ segmentStr, totalSegmentsStr, found := strings.Cut(after, " of ")
+ if !found {
+ return p.makeSyntaxError("malformed SEGMENT line, should be in the form 'n of m'")
+ }
+ segment, err := strconv.Atoi(strings.TrimSpace(segmentStr))
+ if err != nil {
+ return p.makeSyntaxError("could not parse segment number", err)
+ }
+ totalSegments, err := strconv.Atoi(strings.TrimSpace(totalSegmentsStr))
+ if err != nil {
+ return p.makeSyntaxError("could not parse total number of segments", err)
+ }
+
+ pCtx.entry.Segment = segment
+ pCtx.entry.TotalSegments = totalSegments
+
+ return nil
+
+}
+
+// See Genbank spec section 3.4.10
+func (p *Parser) parseSource(pCtx parseContext) error {
+ line, err := p.readLine()
+ if err != nil {
+ return fmt.Errorf("failed to read SOURCE line: %w", err)
+ }
+
+ after, _ := strings.CutPrefix(line, "SOURCE")
+ after, _ = strings.CutSuffix(strings.TrimSpace(after), ".")
+ //if !endsWithPeriod {
+ // return p.makeSyntaxError("SOURCE line must end with a period")
+ //}
+ pCtx.entry.Source.Name = strings.TrimSpace(after)
+
+ return p.parseOrganism(pCtx)
+
+}
+
+// See Genbank spec section 3.4.10
+func (p *Parser) parseOrganism(pCtx parseContext) error {
+ organismLine, err := p.readLine()
+ if err != nil {
+ return fmt.Errorf("failed to read ORGANISM line: %w", err)
+ }
+
+ after, organismKeyword := strings.CutPrefix(organismLine, " ORGANISM")
+ if !organismKeyword {
+ return p.makeSyntaxError("SOURCE keyword must be followed by ORGANISM subkeyword")
+ }
+ after, _ = strings.CutSuffix(strings.TrimSpace(after), ".")
+ //if !endsWithPeriod {
+ // return p.makeSyntaxError("ORGANISM line must end with a period")
+ //}
+
+ pCtx.entry.Source.ScientificName = strings.TrimSpace(after)
+
+ return p.parseTaxonomy(pCtx)
+}
+
+// See Genbank spec section 3.4.10
+func (p *Parser) parseTaxonomy(pCtx parseContext) error {
+ for {
+ lineType, err := p.peekNextLineType()
+ if err != nil {
+ return err
+ }
+
+ switch lineType {
+ case empty:
+ p.readLine() // skip empty lines
+ case continuation:
+ line, err := p.readLine()
+ if err != nil {
+ return err
+ }
+
+ lineType, err := p.peekNextLineType()
+ if err != nil {
+ return err
+ }
+ if lineType != continuation && !strings.HasSuffix(line, ".") {
+ return p.makeSyntaxError("final line of ORGANISM subkeyword taxonomy must end in a period")
+ } else if lineType == continuation && !strings.HasSuffix(line, ";") {
+ return p.makeSyntaxError("ORGANISM subkeyword taxonomy lines must end in a semicolon")
+ }
+
+ taxa := strings.Split(strings.TrimSpace(line[:len(line)-1]), ";")
+ for _, taxon := range taxa {
+ pCtx.entry.Source.Taxonomy = append(pCtx.entry.Source.Taxonomy, strings.TrimSpace(taxon))
+ }
+ default:
+ return nil
+ }
+
+ }
+}
+
+// See Genbank spec section 3.4.11
+func (p *Parser) parseReference(pCtx parseContext) error {
+ line, err := p.readLine()
+ if err != nil {
+ return err
+ }
+
+ after, _ := strings.CutPrefix(line, "REFERENCE")
+ after = strings.TrimSpace(after)
+
+ re := regexp.MustCompile(`(\d+)\s+\(bases (\d)+ to (\d+)`)
+ matches := re.FindStringSubmatch(after)
+ if matches == nil {
+ return p.makeSyntaxError("malformed REFERENCE line")
+ }
+
+ refNum, _ := strconv.Atoi(matches[0])
+ basesFrom, _ := strconv.ParseUint(matches[1], 10, 0)
+ basesTo, _ := strconv.ParseUint(matches[2], 10, 0)
+
+ pCtx.entry.References = append(pCtx.entry.References, Reference{
+ Number: refNum,
+ BaseRange: Range{
+ From: uint(basesFrom),
+ To: uint(basesTo),
+ },
+ })
+ pCtx.reference = &pCtx.entry.References[len(pCtx.entry.References)-1]
+
+ subKeywordFuncs := map[string]func(parseContext) error{
+ " AUTHORS": p.parseAuthors,
+ " TITLE": p.parseTitle,
+ " JOURNAL": p.parseJournal,
+ " MEDLINE": p.parseMedline,
+ " PUBMED": p.parsePubMed,
+ " CONSRTM": p.parseConsortium,
+ " REMARK": p.parseRemark,
+ }
+
+ for {
+ lineType, err := p.peekNextLineType()
+ if err != nil {
+ return fmt.Errorf("could not parse REFERENCE keyword: %w", err)
+ }
+
+ switch lineType {
+ case subKeyword:
+ err = p.dispatchByPrefix(pCtx, subKeywordFuncs)
+ if err != nil {
+ return err
+ }
+ case empty:
+ p.readLine() // We need to read the empty line to advance the reader
+ default:
+ pCtx.reference = nil
+ return nil
+ }
+ }
+}
+
+// See Genbank spec section 3.4.11
+func (p *Parser) parseAuthors(pCtx parseContext) error {
+ line, err := p.readLine()
+ if err != nil {
+ return err
+ }
+
+ authors, _ := strings.CutPrefix(line, " AUTHORS")
+ authors = strings.TrimSpace(authors)
+ for {
+ lineType, err := p.peekNextLineType()
+ if err != nil {
+ return fmt.Errorf("could not parse AUTHORS subkeyword: %w", err)
+ }
+ switch lineType {
+ case continuation:
+ line, err := p.readLine()
+ if err != nil {
+ return err
+ }
+
+ authors += strings.TrimSpace(line[:len(line)-1])
+ case empty:
+ p.readLine() // We need to read the empty line to advance the reader
+ default:
+ return nil
+ }
+ }
+}
+
+func (p *Parser) parseTitle(pCtx parseContext) error {
+ line, err := p.readLine()
+ if err != nil {
+ return err
+ }
+
+ title, _ := strings.CutPrefix(line, " TITLE")
+ title = strings.TrimSpace(title)
+ for {
+ lineType, err := p.peekNextLineType()
+ if err != nil {
+ return fmt.Errorf("could not parse TITLE subkeyword: %w", err)
+ }
+ switch lineType {
+ case continuation:
+ line, err := p.readLine()
+ if err != nil {
+ return err
+ }
+ title += " "
+ title += strings.TrimSpace(line)
+ case empty:
+ p.readLine() // Skip empty lines
+ default:
+ pCtx.reference.Title = strings.TrimSpace(title)
+ return nil
+ }
+ }
+}
+
+func (p *Parser) parseJournal(pCtx parseContext) error {
+ line, err := p.readLine()
+ if err != nil {
+ return err
+ }
+
+ journal, _ := strings.CutPrefix(line, " JOURNAL")
+ journal = strings.TrimSpace(journal)
+
+ // Detect if JOURNAL entry refers to a book.
+ if strings.HasPrefix(journal, "(in)") {
+ pCtx.reference.IsBook = true
+ }
+
+ for {
+ lineType, err := p.peekNextLineType()
+ if err != nil {
+ return fmt.Errorf("could not parse JOURNAL subkeyword: %w", err)
+ }
+ switch lineType {
+ case continuation:
+ line, err := p.readLine()
+ if err != nil {
+ return err
+ }
+ journal += " "
+ journal += strings.TrimSpace(line)
+ case empty:
+ p.readLine()
+ default:
+ pCtx.reference.Journal = strings.TrimSpace(journal)
+ return nil
+ }
+ }
+}
+
+func (p *Parser) parseMedline(pCtx parseContext) error {
+ line, err := p.readLine()
+ if err != nil {
+ return err
+ }
+
+ medline, _ := strings.CutPrefix(line, " MEDLINE")
+ pCtx.reference.Medline = strings.TrimSpace(medline)
+ return nil
+}
+
+func (p *Parser) parsePubMed(pCtx parseContext) error {
+ line, err := p.readLine()
+ if err != nil {
+ return err
+ }
+
+ pubMed, _ := strings.CutPrefix(line, " PUBMED")
+ pCtx.reference.PubMed = strings.TrimSpace(pubMed)
+ return nil
+}
+
+func (p *Parser) parseConsortium(pCtx parseContext) error {
+ line, err := p.readLine()
+ if err != nil {
+ return err
+ }
+
+ consortium, _ := strings.CutPrefix(line, " CONSRTM")
+ pCtx.reference.Consortium = strings.TrimSpace(consortium)
+ return nil
+
+}
+
+func (p *Parser) parseRemark(pCtx parseContext) error {
+ line, err := p.readLine()
+ if err != nil {
+ return err
+ }
+
+ remark, _ := strings.CutPrefix(line, " REMARK")
+ pCtx.reference.Remark = strings.TrimSpace(remark)
+ return nil
+}
+
+/* OTHER UTILITY FUNCTIONS */
+
+// appendLine appends a line to s. If s is empty, simply
+// returns append. If s is not empty, ensures that s is followed
+// by a newline and then what is contained in append.
+func appendLine(s string, append string) string {
+ if s == "" {
+ return append
+ }
+
+ return strings.TrimSuffix(s, "\n") + "\n" + append
+}
diff --git a/io/genbank/parser_test.go b/io/genbank/parser_test.go
new file mode 100644
index 000000000..0b4110414
--- /dev/null
+++ b/io/genbank/parser_test.go
@@ -0,0 +1,246 @@
+package genbank
+
+import (
+ "os"
+ "strings"
+ "testing"
+ "time"
+
+ "github.com/google/go-cmp/cmp"
+)
+
+func TestParseHeaderErrs(t *testing.T) {
+ testcases := []struct {
+ name string
+ data string
+ }{{
+ name: "fails on premature EOF",
+ data: `GBBCT1.SEQ Genetic Sequence Data Bank
+ October 15 2023
+
+ NCBI-GenBank Flat File Release`,
+ }}
+
+ for _, tc := range testcases {
+ t.Run(tc.name, func(t *testing.T) {
+ p := NewParser(strings.NewReader(tc.data))
+ got, err := p.parseHeader()
+
+ if err == nil {
+ t.Fatalf("no error returned by parseHeader on invalid input, got: %v", got)
+ }
+ })
+ }
+
+}
+
+func TestParseHeaderRoundTrip(t *testing.T) {
+ testcases := []struct {
+ name string
+ data Header
+ }{{
+ name: "parses example header from spec",
+ data: Header{
+ FileName: "GBBCT1.SEQ",
+ Date: time.Date(2023, time.October, 15, 0, 0, 0, 0, time.UTC),
+ Title: "Bacterial Sequences (Part 1)",
+ NumEntries: 51396,
+ NumBases: 92682287,
+ NumSequences: 51396,
+ MajorRelease: 258,
+ MinorRelease: 0,
+ }}, {
+ name: "parses header with largest possible ints in stats line",
+ data: Header{
+ FileName: "GBBCT1.SEQ",
+ Date: time.Date(2023, time.October, 15, 0, 0, 0, 0, time.UTC),
+ Title: "Bacterial Sequences (Part 1)",
+ NumEntries: 99999999,
+ NumBases: 99999999999,
+ NumSequences: 99999999,
+ MajorRelease: 258,
+ MinorRelease: 0,
+ }}, {
+ name: "parses header with smallest possible ints in stats line",
+ data: Header{
+ FileName: "GBBCT1.SEQ",
+ Date: time.Date(2023, time.October, 15, 0, 0, 0, 0, time.UTC),
+ Title: "Bacterial Sequences (Part 1)",
+ NumEntries: 1,
+ NumBases: 1,
+ NumSequences: 1,
+ MajorRelease: 258,
+ MinorRelease: 0,
+ }}}
+
+ for _, tc := range testcases {
+ t.Run(tc.name, func(t *testing.T) {
+ headerStr := tc.data.String()
+
+ p := NewParser(strings.NewReader(headerStr))
+ got, err := p.parseHeader()
+
+ if err != nil {
+ t.Fatalf("unexpected error from parseHeader: %v", err)
+ }
+
+ if diff := cmp.Diff(tc.data, got); diff != "" {
+ t.Fatalf("parseHeader returned incorrect header after round trip, (-want, +got): %v", diff)
+ }
+
+ })
+ }
+}
+
+func TestParseEntry(t *testing.T) {
+ testcases := []struct {
+ name string
+ data string
+ want Entry
+ wantReachedEOF bool
+ wantErr bool
+ }{{
+ name: "parses LOCUS keyword",
+ data: "LOCUS CP032762 5868661 bp DNA circular BCT 15-OCT-2018",
+ wantReachedEOF: true,
+ want: Entry{
+ Name: "CP032762",
+ Length: 5868661,
+ Strandedness: Unknown,
+ MoleculeType: DNA,
+ MoleculeToplogy: Circular,
+ DivisionCode: BacterialDivision,
+ UpdateDate: time.Date(2018, time.October, 15, 0, 0, 0, 0, time.UTC),
+ },
+ }, {
+ name: "parses DEFINITION keyword",
+ data: "DEFINITION test",
+ wantReachedEOF: true,
+ want: Entry{Definition: "test"},
+ }, {
+ name: "parses multiline DEFINITION keyword",
+ data: "DEFINITION test\n another line\n\n yet another line",
+ wantReachedEOF: true,
+ want: Entry{Definition: "test\nanother line\n\nyet another line"},
+ }, {
+ name: "parses repeated DEFINITION keywords",
+ data: "DEFINITION test\nDEFINITION another line\n\n yet another line",
+ wantReachedEOF: true,
+ want: Entry{Definition: "test\nanother line\n\nyet another line"},
+ }, {
+ name: "parses only first entry in reader",
+ data: `DEFINITION test
+//
+DEFINITION another test`,
+ want: Entry{Definition: "test"},
+ }, {
+ name: "parses VERSION keyword",
+ data: "VERSION ASDF.130",
+ want: Entry{AccessionVersion: 130},
+ wantReachedEOF: true,
+ }, {
+ name: "parses ACCESSION keyword",
+ data: "ACCESSION HIIII",
+ want: Entry{Accession: "HIIII"},
+ wantReachedEOF: true,
+ }, {
+ name: "parses simple DBLINK keyword",
+ data: "DBLINK SomeDB: someReference",
+ want: Entry{DatabaseLinks: map[string][]string{"SomeDB": {"someReference"}}},
+ wantReachedEOF: true,
+ }, {
+ name: "parses DBLINK keyword with multiple refs for a single DB",
+ data: "DBLINK SomeDB: someReference, anotherReference",
+ want: Entry{DatabaseLinks: map[string][]string{"SomeDB": {"someReference", "anotherReference"}}},
+ wantReachedEOF: true,
+ }, {
+ name: "parses DBLINK keyword with multiple DBs",
+ data: "DBLINK SomeDB: someReference, anotherReference\n AnotherDB: yetAnotherReference",
+ want: Entry{DatabaseLinks: map[string][]string{"SomeDB": {"someReference", "anotherReference"}, "AnotherDB": {"yetAnotherReference"}}},
+ wantReachedEOF: true,
+ }, {
+ name: "parses KEYWORDS keyword",
+ data: "KEYWORDS first; second; last.",
+ want: Entry{Keywords: []string{"first", "second", "last"}},
+ wantReachedEOF: true,
+ }, {
+ name: "KEYWORDS line must end with period",
+ data: "KEYWORDS first; second; last",
+ wantErr: true,
+ }, {
+ name: "parses SOURCE keyword",
+ data: `SOURCE common name (more info) molecule type.
+ ORGANISM Scientific name.
+ Taxon 1; Taxon 2; Taxon 3;
+ Taxon 4; Taxon 5.
+`,
+ want: Entry{Source: Source{
+ Name: "common name (more info) molecule type",
+ ScientificName: "Scientific name",
+ Taxonomy: []string{"Taxon 1", "Taxon 2", "Taxon 3", "Taxon 4", "Taxon 5"},
+ }},
+ wantReachedEOF: true,
+ }}
+
+ for _, tc := range testcases {
+ t.Run(tc.name, func(t *testing.T) {
+ p := NewParser(strings.NewReader(tc.data))
+ got, gotReachedEOF, err := p.parseEntry()
+
+ if gotErr := err != nil; gotErr != tc.wantErr {
+ t.Fatalf("incorrect error returned from parseEntry, wantErr: %v, err: %v", tc.wantErr, err)
+ }
+
+ if gotReachedEOF != tc.wantReachedEOF {
+ t.Fatalf("incorrect reached EOF status returned from parseEntry, want: %v, got: %v", tc.wantReachedEOF, gotReachedEOF)
+ }
+
+ if diff := cmp.Diff(tc.want, got); !tc.wantErr && diff != "" {
+ t.Fatalf("parseEntry returned incorrect Entry, (-want, +got): %v", diff)
+ }
+
+ })
+ }
+}
+
+func TestRoundtrip(t *testing.T) {
+ testCases := []struct {
+ name string
+ file string
+ }{
+ {
+ name: "Saccharomyces cerevisiae S288C chromosome IX, complete sequence",
+ file: "./data/NC_001141.2.gb",
+ }, {
+ name: "Saccharomyces cerevisiae TCP1-beta gene, partial cds; and Axl2p (AXL2) and Rev7p (REV7) genes, complete cds",
+ file: "./data/U49845.1.gb",
+ },
+ }
+
+ for _, tc := range testCases {
+ t.Run(tc.name, func(t *testing.T) {
+ f, err := os.Open(tc.file)
+ if err != nil {
+ t.Fatalf("failed to open file %v: %v", tc.file, err)
+ }
+ p := NewParser(f)
+
+ firstParse, err := p.Parse()
+ if err != nil {
+ t.Fatalf("failed initial parsing for %v: %v", tc.file, err)
+ }
+ firstWrite := firstParse.String()
+
+ p2 := NewParser(strings.NewReader(firstWrite))
+ secondParse, err := p2.Parse()
+ if err != nil {
+ t.Fatalf("failed second parsing for %v: %v", tc.file, err)
+ }
+ secondWrite := secondParse.String()
+
+ if diff := cmp.Diff(firstWrite, secondWrite); diff != "" {
+ t.Errorf("mismatch in roundtrip parsing (-firstParse,+secondParse):\n%s", diff)
+ }
+ })
+ }
+}
diff --git a/io/genbank/types.go b/io/genbank/types.go
new file mode 100644
index 000000000..0772578af
--- /dev/null
+++ b/io/genbank/types.go
@@ -0,0 +1,163 @@
+/*
+Package genbank provides genbank parsers and writers.
+
+GenBank is a flat text file format developed in the 1980s to annotate genetic
+sequences, and has since become the standard for sharing annotated genetic
+sequences. A full specification of the GenBank flatfile format can be found at
+https://www.ncbi.nlm.nih.gov/genbank/release/current/.
+
+This package provides a parser and writer to convert between the GenBank file
+format and the more general GenBank struct.
+*/
+package genbank
+
+import "time"
+
+// Genbank is the main struct for the Genbank file format.
+type Genbank struct {
+ Header Header
+ Entries []Entry
+}
+
+// Header holds the information at the beginning of all
+// Genbank files (see Genbank spec section 3.1)
+type Header struct {
+ FileName string
+ Title string
+ Date time.Time
+ MajorRelease int
+ MinorRelease int
+ NumEntries int
+ NumBases int
+ NumSequences int
+}
+
+// Strandedness represents whether a sequence is
+// single-, double-, or mixed-stranded (see Genbank
+// spec section 3.4.4.2).
+type Strandedness string
+
+const (
+ DoubleStranded Strandedness = "ds"
+ SingleStranded Strandedness = "ss"
+ MixedStranded Strandedness = "ms"
+ Unknown Strandedness = ""
+)
+
+// MoleculeType represents what kind of molecule a sequence
+// consists of (see Genbank spec section 3.4.4.2).
+type MoleculeType string
+
+const (
+ NucleicAcid MoleculeType = "NA"
+ DNA MoleculeType = "DNA"
+ RNA MoleculeType = "RNA"
+ TransferRNA MoleculeType = "tRNA"
+ RibosomalRNA MoleculeType = "rRNA"
+ MessengerRNA MoleculeType = "mRNA"
+ SmallNuclearRNA MoleculeType = "uRNA"
+ ViralCRNA MoleculeType = "cRNA"
+)
+
+// MoleculeType represents the topology of a molecule
+// (see Genbank spec section 3.4.4.2).
+type MoleculeToplogy string
+
+const (
+ Circular MoleculeToplogy = "circular"
+ Linear MoleculeToplogy = "linear"
+)
+
+// DivisionCode represents which division of Genbank an entry
+// belongs to (see Genbank spec section 3.4.4.2).
+type DivisionCode string
+
+const (
+ PrimateDivision DivisionCode = "PRI"
+ RodentDivision DivisionCode = "ROD"
+ OtherMammalianDivision DivisionCode = "MAM"
+ OtherVertebrateDivision DivisionCode = "VRT"
+ InvertebrateDivision DivisionCode = "INV"
+ PlantFungalAlgalDivision DivisionCode = "PLN"
+ BacterialDivision DivisionCode = "BCT"
+ ViralDivision DivisionCode = "VRL"
+ BacteriophageDivision DivisionCode = "PHG"
+ SyntheticDivision DivisionCode = "SYN"
+ UnnanotatedDivision DivisionCode = "UNA"
+ ExpressedSequenceTagDivision DivisionCode = "EST"
+ PatentDivision DivisionCode = "PAT"
+ SequenceTaggedSiteDivision DivisionCode = "STS"
+ GenomeSurveyDivision DivisionCode = "GSS"
+ HighThroughputGenomicDivision DivisionCode = "HTG"
+ HighThroughputCDNADivision DivisionCode = "HTC"
+ EnvironmentalSamplingDivision DivisionCode = "ENV"
+ ConstructedDivision DivisionCode = "CON"
+ TranscriptomeShotgunDivision DivisionCode = "TSA"
+)
+
+type Range struct {
+ From uint
+ To uint
+}
+
+// Reference is a reference to a publication (see
+// Genbank spec section 3.4.11).
+type Reference struct {
+ Number int
+ BaseRange Range
+ Authors string
+ Title string
+ Medline string
+ PubMed string
+ PageNumbers string
+ Remark string
+ Year string
+ Journal string
+ IsBook bool
+ Consortium string
+}
+
+// Source describes the source of an entry (see
+// Genbank spec section 3.4.10).
+type Source struct {
+ Name string
+ ScientificName string
+ Taxonomy []string
+}
+
+// A Feature represents genes, gene products, and
+// other areas of significance in an entry (see
+// Genbank spec section 3.4.12).
+type Feature struct {
+ Key string
+ Location Location
+ Qualifiers map[string][]string
+}
+
+type Location string
+
+// Entry represents a single entry in the Genbank file
+// (see Genbank spec section 3.1).
+type Entry struct {
+ Name string
+ Length int
+ Strandedness Strandedness
+ MoleculeType MoleculeType
+ MoleculeToplogy MoleculeToplogy
+ DivisionCode DivisionCode
+ UpdateDate time.Time
+ Definition string
+ Accession string
+ AccessionVersion int
+ CrossReferences map[string][]string
+ References []Reference
+ Source Source
+ Features []Feature
+ Origin string
+ Sequence string
+ DatabaseLinks map[string][]string
+ Keywords []string
+ Segment int
+ TotalSegments int
+ Comment string
+}
diff --git a/io/genbank/write.go b/io/genbank/write.go
new file mode 100644
index 000000000..1f6bd05dd
--- /dev/null
+++ b/io/genbank/write.go
@@ -0,0 +1,119 @@
+package genbank
+
+import (
+ "fmt"
+ "strings"
+)
+
+const (
+ dbNameIdx = 21
+ dateIdx = 26
+ titleIdx = 24
+)
+
+// String converts a Genbank to its Genbank flatfile representation.
+func (g Genbank) String() string {
+ builder := strings.Builder{}
+
+ builder.WriteString(g.Header.String())
+
+ for _, entry := range g.Entries {
+ builder.WriteString(entry.String())
+ }
+
+ return builder.String()
+}
+
+func (e Entry) String() string {
+ builder := strings.Builder{}
+
+ // Write LOCUS entry.
+ builder.WriteString(fmt.Sprintf("%-12s%-17s", "LOCUS", e.Name))
+ if len(e.Name) >= 17 { // Ensure space after long names.
+ builder.WriteRune(' ')
+ }
+ builder.WriteString(fmt.Sprintf(
+ "%11d bp %3s%-4s %8s %3s %s",
+ e.Length,
+ e.Strandedness,
+ e.MoleculeType,
+ e.MoleculeToplogy,
+ e.DivisionCode,
+ strings.ToUpper(e.UpdateDate.Format("02-Jan-2006")),
+ ))
+
+ builder.WriteString(fmt.Sprintf("%-12s%s", "DEFINITION", e.Definition))
+ builder.WriteString(fmt.Sprintf("%-12s%s", "ACCESSION", e.Accession))
+ builder.WriteString(fmt.Sprintf("%-12s%s.%d", "VERSION", e.Accession, e.AccessionVersion))
+
+ wroteDBLinkKeyword := false
+ for refType, refs := range e.DatabaseLinks {
+ if !wroteDBLinkKeyword {
+ builder.WriteString(fmt.Sprintf("%-12s%s:%s", "DBLINK", refType, strings.Join(refs, ",")))
+ wroteDBLinkKeyword = true
+ } else {
+ builder.WriteString(fmt.Sprintf("%-12s%s:%s", "", refType, strings.Join(refs, ",")))
+ }
+ }
+
+ builder.WriteString(fmt.Sprintf("%-12s", "KEYWORDS"))
+ width := 0
+ for i, keyword := range e.Keywords {
+ width += len(keyword)
+ if width > 80 { // If we exceed line width, newline before the keyword.
+ builder.WriteString(fmt.Sprintf(";\n%-12s%s", "", keyword))
+ width = 0
+ } else if i > 0 {
+ builder.WriteString(fmt.Sprintf("; %s", keyword))
+ } else {
+ builder.WriteString(keyword)
+ }
+ }
+ builder.WriteRune('.')
+
+ return builder.String()
+}
+
+// String converts a Header to its Genbank flatfile representation.
+func (h Header) String() string {
+ builder := strings.Builder{}
+
+ // Line 1: File name and database name
+ builder.WriteString(h.FileName)
+ builder.WriteString(strings.Repeat(" ", dbNameIdx-len(h.FileName)))
+ builder.WriteString("Genetic Sequence Data Bank")
+ builder.WriteRune('\n')
+
+ // Line 2: Date
+ builder.WriteString(strings.Repeat(" ", dateIdx))
+ builder.WriteString(h.Date.Format(headerDateLayout))
+ builder.WriteRune('\n')
+
+ // Line 3: Blank
+ builder.WriteRune('\n')
+
+ // Line 4: Genbank release number
+ builder.WriteString(fmt.Sprintf(" NCBI-GenBank Flat File Release %v.%v", h.MajorRelease, h.MinorRelease))
+ builder.WriteRune('\n')
+
+ // Line 5: Blank
+ builder.WriteRune('\n')
+
+ // Line 6: File title
+ builder.WriteString(strings.Repeat(" ", titleIdx))
+ builder.WriteString(h.Title)
+ builder.WriteRune('\n')
+
+ // Line 7: Blank
+ builder.WriteRune('\n')
+
+ // Line 8: File statistics
+ builder.WriteString(fmt.Sprintf(
+ "%8v loci, %11v bases, from %8v reported sequences",
+ h.NumEntries,
+ h.NumBases,
+ h.NumSequences))
+ builder.WriteRune('\n')
+
+ return builder.String()
+}
diff --git a/io/slow5/slow5_test.go b/io/slow5/slow5_test.go
index 950faf740..8a85c167f 100644
--- a/io/slow5/slow5_test.go
+++ b/io/slow5/slow5_test.go
@@ -103,7 +103,7 @@ func testParseReadsHelper(t *testing.T, fileTarget string, errorMessage string)
}
}
if len(targetErr) == 0 {
- t.Errorf(errorMessage)
+ t.Errorf("Test failed: %s", errorMessage)
}
}
diff --git a/primers/pcr/pcr.go b/primers/pcr/pcr.go
index 648bc8ac6..ce574e853 100644
--- a/primers/pcr/pcr.go
+++ b/primers/pcr/pcr.go
@@ -32,7 +32,10 @@ import (
)
// https://doi.org/10.1089/dna.1994.13.75
-var minimalPrimerLength int = 15
+const minimalPrimerLength int = 7
+
+// what we want for designs
+const designedMinimalPrimerLength int = 15
// DesignPrimersWithOverhangs designs two primers to amplify a target sequence,
// adding on an overhang to the forward and reverse strand. This overhang can
@@ -40,13 +43,13 @@ var minimalPrimerLength int = 15
// or GoldenGate restriction enzyme sites.
func DesignPrimersWithOverhangs(sequence, forwardOverhang, reverseOverhang string, targetTm float64) (string, string) {
sequence = strings.ToUpper(sequence)
- forwardPrimer := sequence[0:minimalPrimerLength]
+ forwardPrimer := sequence[0:designedMinimalPrimerLength]
for additionalNucleotides := 0; primers.MeltingTemp(forwardPrimer) < targetTm; additionalNucleotides++ {
- forwardPrimer = sequence[0 : minimalPrimerLength+additionalNucleotides]
+ forwardPrimer = sequence[0 : designedMinimalPrimerLength+additionalNucleotides]
}
- reversePrimer := transform.ReverseComplement(sequence[len(sequence)-minimalPrimerLength:])
+ reversePrimer := transform.ReverseComplement(sequence[len(sequence)-designedMinimalPrimerLength:])
for additionalNucleotides := 0; primers.MeltingTemp(reversePrimer) < targetTm; additionalNucleotides++ {
- reversePrimer = transform.ReverseComplement(sequence[len(sequence)-(minimalPrimerLength+additionalNucleotides):])
+ reversePrimer = transform.ReverseComplement(sequence[len(sequence)-(designedMinimalPrimerLength+additionalNucleotides):])
}
// Add overhangs to primer
@@ -168,6 +171,12 @@ func SimulateSimple(sequences []string, targetTm float64, circular bool, primerL
// in your reaction, which can lead to confusing results. The variable
// `circular` is for if the target template is circular, like a plasmid.
func Simulate(sequences []string, targetTm float64, circular bool, primerList []string) ([]string, error) {
+ // make sure no primers are too short
+ for _, primer := range primerList {
+ if len(primer) < minimalPrimerLength {
+ return nil, errors.New("Primers are too short.")
+ }
+ }
initialAmplification := SimulateSimple(sequences, targetTm, circular, primerList)
subsequentAmplification := SimulateSimple(sequences, targetTm, circular, append(primerList, initialAmplification...))
if len(initialAmplification) != len(subsequentAmplification) {
diff --git a/align/align.go b/search/align/align.go
similarity index 99%
rename from align/align.go
rename to search/align/align.go
index 0eaddae70..ff94f9956 100644
--- a/align/align.go
+++ b/search/align/align.go
@@ -66,7 +66,7 @@ Tim
package align
import (
- "github.com/bebop/poly/align/matrix"
+ "github.com/bebop/poly/search/align/matrix"
)
// Scoring is a struct that holds the scoring matrix for match, mismatch, and gap penalties.
diff --git a/align/align_test.go b/search/align/align_test.go
similarity index 98%
rename from align/align_test.go
rename to search/align/align_test.go
index 547d1c086..b96ed9b27 100644
--- a/align/align_test.go
+++ b/search/align/align_test.go
@@ -3,9 +3,9 @@ package align_test
import (
"testing"
- "github.com/bebop/poly/align"
- "github.com/bebop/poly/align/matrix"
"github.com/bebop/poly/alphabet"
+ "github.com/bebop/poly/search/align"
+ "github.com/bebop/poly/search/align/matrix"
)
func TestNeedlemanWunsch(t *testing.T) {
diff --git a/align/example_test.go b/search/align/example_test.go
similarity index 96%
rename from align/example_test.go
rename to search/align/example_test.go
index 3d455a108..82ce02a54 100644
--- a/align/example_test.go
+++ b/search/align/example_test.go
@@ -4,9 +4,9 @@ package align_test
import (
"fmt"
- "github.com/bebop/poly/align"
- "github.com/bebop/poly/align/matrix"
"github.com/bebop/poly/alphabet"
+ "github.com/bebop/poly/search/align"
+ "github.com/bebop/poly/search/align/matrix"
)
func ExampleNeedlemanWunsch() {
diff --git a/align/matrix/matrices.go b/search/align/matrix/matrices.go
similarity index 100%
rename from align/matrix/matrices.go
rename to search/align/matrix/matrices.go
diff --git a/align/matrix/matrix.go b/search/align/matrix/matrix.go
similarity index 100%
rename from align/matrix/matrix.go
rename to search/align/matrix/matrix.go
diff --git a/align/matrix/matrix_test.go b/search/align/matrix/matrix_test.go
similarity index 96%
rename from align/matrix/matrix_test.go
rename to search/align/matrix/matrix_test.go
index d1e5d0027..88eba1584 100644
--- a/align/matrix/matrix_test.go
+++ b/search/align/matrix/matrix_test.go
@@ -3,8 +3,8 @@ package matrix_test
import (
"testing"
- "github.com/bebop/poly/align/matrix"
"github.com/bebop/poly/alphabet"
+ "github.com/bebop/poly/search/align/matrix"
"github.com/stretchr/testify/assert"
)
diff --git a/search/bwt/bitvector.go b/search/bwt/bitvector.go
new file mode 100644
index 000000000..d6cbf8144
--- /dev/null
+++ b/search/bwt/bitvector.go
@@ -0,0 +1,77 @@
+package bwt
+
+import (
+ "fmt"
+ "math"
+)
+
+const wordSize = 64
+
+// bitvector a sequence of 1's and 0's. You can also think
+// of this as an array of bits. This allows us to encode
+// data in a memory efficient manner.
+type bitvector struct {
+ bits []uint64
+ numberOfBits int
+}
+
+// newBitVector will return an initialized bitvector with
+// the specified number of zeroed bits.
+func newBitVector(initialNumberOfBits int) bitvector {
+ capacity := getNumOfBitSetsNeededForNumOfBits(initialNumberOfBits)
+ bits := make([]uint64, capacity)
+ return bitvector{
+ bits: bits,
+ numberOfBits: initialNumberOfBits,
+ }
+}
+
+// getBitSet gets the while word as some offset from the
+// bitvector. Useful if you'd prefer to work with the
+// word rather than with individual bits.
+func (b bitvector) getBitSet(bitSetPos int) uint64 {
+ return b.bits[bitSetPos]
+}
+
+// getBit returns the value of the bit at a given offset
+// True represents 1
+// False represents 0
+func (b bitvector) getBit(i int) bool {
+ b.checkBounds(i)
+
+ chunkStart := i / wordSize
+ offset := i % wordSize
+
+ return (b.bits[chunkStart] & (uint64(1) << (63 - offset))) != 0
+}
+
+// setBit sets the value of the bit at a given offset
+// True represents 1
+// False represents 0
+func (b bitvector) setBit(i int, val bool) {
+ b.checkBounds(i)
+
+ chunkStart := i / wordSize
+ offset := i % wordSize
+
+ if val {
+ b.bits[chunkStart] |= uint64(1) << (63 - offset)
+ } else {
+ b.bits[chunkStart] &= ^(uint64(1) << (63 - offset))
+ }
+}
+
+func (b bitvector) checkBounds(i int) {
+ if i >= b.len() || i < 0 {
+ msg := fmt.Sprintf("access of %d is out of bounds for bitvector with length %d", i, b.len())
+ panic(msg)
+ }
+}
+
+func (b bitvector) len() int {
+ return b.numberOfBits
+}
+
+func getNumOfBitSetsNeededForNumOfBits(n int) int {
+ return int(math.Ceil(float64(n) / wordSize))
+}
diff --git a/search/bwt/bitvector_test.go b/search/bwt/bitvector_test.go
new file mode 100644
index 000000000..44a1ac118
--- /dev/null
+++ b/search/bwt/bitvector_test.go
@@ -0,0 +1,119 @@
+package bwt
+
+import (
+ "testing"
+)
+
+type GetBitTestCase struct {
+ position int
+ expected bool
+}
+
+func TestBitVector(t *testing.T) {
+ initialNumberOfBits := wordSize*10 + 1
+
+ bv := newBitVector(initialNumberOfBits)
+
+ if bv.len() != initialNumberOfBits {
+ t.Fatalf("expected len to be %d but got %d", initialNumberOfBits, bv.len())
+ }
+
+ for i := 0; i < initialNumberOfBits; i++ {
+ bv.setBit(i, true)
+ }
+
+ bv.setBit(3, false)
+ bv.setBit(11, false)
+ bv.setBit(13, false)
+ bv.setBit(23, false)
+ bv.setBit(24, false)
+ bv.setBit(25, false)
+ bv.setBit(42, false)
+ bv.setBit(63, false)
+ bv.setBit(64, false)
+ bv.setBit(255, false)
+ bv.setBit(256, false)
+
+ getBitTestCases := []GetBitTestCase{
+ {0, true},
+ {1, true},
+ {2, true},
+ {3, false},
+ {4, true},
+ {7, true},
+ {8, true},
+ {9, true},
+ {10, true},
+ {11, false},
+ {12, true},
+ {13, false},
+ {23, false},
+ {24, false},
+ {25, false},
+ {42, false},
+ {15, true},
+ {16, true},
+ {62, true},
+ {63, false},
+ {64, false},
+ // Test past the first word
+ {65, true},
+ {72, true},
+ {79, true},
+ {80, true},
+ {255, false},
+ {256, false},
+ {511, true},
+ {512, true},
+ }
+
+ for _, v := range getBitTestCases {
+ actual := bv.getBit(v.position)
+ if actual != v.expected {
+ t.Fatalf("expected %dth bit to be %t but got %t", v.position, v.expected, actual)
+ }
+ }
+}
+
+func TestBitVectorBoundPanic_GetBit_Lower(t *testing.T) {
+ defer func() { _ = recover() }()
+
+ initialNumberOfBits := wordSize*10 + 1
+ bv := newBitVector(initialNumberOfBits)
+ bv.getBit(-1)
+
+ t.Fatalf("expected get bit lower bound panic")
+}
+
+func TestBitVectorBoundPanic_GetBit_Upper(t *testing.T) {
+ defer func() { _ = recover() }()
+ initialNumberOfBits := wordSize*10 + 1
+ bv := newBitVector(initialNumberOfBits)
+ bv.getBit(initialNumberOfBits)
+
+ t.Fatalf("expected get bit upper bound panic")
+}
+
+func TestBitVectorBoundPanic_SetBit_Lower(t *testing.T) {
+ defer func() {
+ if r := recover(); r != nil {
+ return
+ }
+ t.Fatalf("expected set bit lower bound panic")
+ }()
+ initialNumberOfBits := wordSize*10 + 1
+ bv := newBitVector(initialNumberOfBits)
+ bv.setBit(-1, true)
+}
+
+func TestBitVectorBoundPanic_SetBit_Upper(t *testing.T) {
+ defer func() {
+ if r := recover(); r != nil {
+ return
+ }
+ t.Fatalf("expected set bit upper bound panic")
+ }()
+ initialNumberOfBits := wordSize*10 + 1
+ bv := newBitVector(initialNumberOfBits)
+ bv.setBit(initialNumberOfBits, true)
+}
diff --git a/search/bwt/bwt.go b/search/bwt/bwt.go
new file mode 100644
index 000000000..fedce4002
--- /dev/null
+++ b/search/bwt/bwt.go
@@ -0,0 +1,699 @@
+/*
+Package bwt is a package for performing burrows-wheeler transforms on sequences.
+
+The BWT is a lossless compression algorithm that can be used to reduce the memory
+footprint of a sequence while still maintaining the ability to search, align, and
+extract the original sequence. This is useful for sequences so large that it would
+be beneficial to reduce its memory footprint while also maintaining a way to analyze
+and work with the sequence. BWT is used in both bioinformatics(burrows wheeler alignment)
+and data compression (bzip2).
+*/
+package bwt
+
+import (
+ "errors"
+ "fmt"
+ "math"
+ "strings"
+
+ "golang.org/x/exp/slices"
+)
+
+/*
+
+For the BWT usage, please read the BWT methods
+below. To understand what it is and how
+it works for either curiosity or maintenance, then read below.
+
+# BWT
+
+BWT Stands for (B)urrows-(W)heeler (T)ransform. The BWT aids in
+text compression and acts as a search index for any arbitrary
+sequence of characters. With the BWT and some auxiliary data
+structures, we can analyze a sequence in a memory and run time
+efficient manner.
+
+## BWT Transform
+
+The first step to build the BWT is to get the BWT itself.
+
+This is done by:
+1. Appending a null terminating character to the end of a sequence
+2. Rotate the sequence so that the last character is now the first
+3. Repeat 2. N times where N is the length of the sequence
+4. Lexicographically sort the NxN matrix of rotated sequences where
+ the null termination character is always the least-valued
+5. Store the first and last column of the matrix. The last column
+ is the output of the BWT. The first column is needed to run queries
+ on the BWT of the original sequence.
+
+Lets use banana as an example.
+
+banana$ $banana
+$banana a$banan
+a$banan ana$ban
+na$bana => anana$b
+ana$ban banana$
+nana$ba na$bana
+anana$b nana$ba
+
+Output:
+
+Last Column (BWT): annb$aa
+First Column: $aaabnn
+
+## LF Mapping Properties
+
+From now on we will refer to the Last Column as L and the First as F
+
+There are a few special properties here to be aware of. First, notice
+how the characters of the same rank show up in the same order for each
+column:
+
+L: a0 n0 n1 b0 $0 a1 a2
+
+F: $0 a0 a1 a2 b0 n0 n1
+
+That is to say the characters' rank for each column appear in ascending
+order. For example: a0 < a1 < a2. This is true for all BWTs
+
+The other important property to observe is that since the BWT is the
+result of rotating each string, each character in the L column precedes
+the corresponding character in the F column.
+
+To best way to show this is to rebuild the original sequence
+using the F and L columns. We do this by rebuilding the original string
+in reverse order starting with the nullChar.
+
+Original string: ______$0
+F($0) -> L(a0) -> _____a0$0
+F(a0) -> L(n0) -> ____n0a0$0
+F(n0) -> L(a1) -> ___a1n0a0$0
+F(a1) -> L(n1) -> __n1a1n0a0$0
+F(n1) -> L(a2) -> _a2n1a1n0a0$0
+F(a2) -> L(b0) -> b0a2n1a1n0a0$0
+F(b0) -> L($0) -> Complete
+
+If we take the rank subscripts away from: b0a2n1a1n0a0$0
+We get... "banana$" !
+
+## LF Mapping Usage
+
+From these properties, the most important concept emerges- the LF Mapping.
+The LF mapping is what enables us to query and analyze the BWT to gain
+insight about the original sequence.
+
+For example, let's say we wanted to count the number of occurrences of the
+pattern "ana" in "banana". We can do this by:
+
+1. Lookup the last char of the sequence, a, in the F column
+2. Find that range of a's, [1, 4)
+3. Take the next previous character in the pattern, n
+4. Find the rank of n before the range from 2. [0, 1) = 0
+5. Find the rank of n in the range from 2. [1, 4) = 2
+6. Look up the start range of the n's in the F column, 5
+7. Add the result from 4 and 5 respectively to form the next
+ L search range: [5+0, 5+2) = [5, 7)
+8. Take next previous character in the pattern, a
+9. Take the rank of "a" before position 5, which is 1
+10. Take the rank of "a" before position 7, which is 3
+11. Lookup the a's in the F column again, but add the results
+ from 9 and 10 to the start range to get the next search
+ range = [1+1, 1+3) = [2, 4)
+12. That is beginning of our pattern, we sub subtract the end and start
+ of the search range to get our count, 4-2=2
+
+Another way to look at this is that we are constantly refining our search
+range for each character of the pattern we are searching for. Once we
+reach the end of the pattern, our final range represents the a's which
+start our pattern. If the range < 0, then at some point our search ranged
+has collapsed and we can conclude that there is no matching pattern.
+
+## Suffix Array
+
+For other operations such as Locate and Extract, we need another auxiliary
+data structure, the suffix array. Since rows of the BWT can map to any
+position within the original sequence, we need some kind of reference as to
+which BWT rows map to which positions in the original sequence. We can do this by storing
+the positions of each character from the original sequence to each of the corresponding
+rows in the BWT column. With our banana example:
+
+F: $0 a0 a1 a2 b0 n0 n1
+SA: [6 5 3 1 0 4 2]
+
+If we take our count example for the pattern "ana" above, you'll remember
+that our final search range was [2, 4). You'll also remember that we counted
+2 occurrences of "ana" by subtracting the end of the range from the start, 4-2=2.
+If iterate from 2 to 4, we can lookup the corresponding SA entry for the BWT rows 2 and 3.
+If we look up 2 in the SA, we'll find that our first offset is at position 3 in the original sequence ban"ana"
+If we look up 3 in the SA, we'll find that our second offset is at position 1 in the original sequence b"ana"na
+
+## Notes on Performance
+
+The explanation above leads to a very naive implementation. For example,
+having the full SA would take way more memory than the BWT itself. Assuming
+int64, that would 8 times the amount of memory of the BWT in its plain text
+representation! In the implementation below, we may instead sample the SA
+and do additional look ups as needed to find the offsets we need.
+
+Similarly, storing both the F and L column as plain text would take double the
+amount of memory to store the original sequence... BWT is used for text
+compression, not expansion! That's why in the below implementation, you
+will see other data structures that lower the amount of memory
+needed. You will also notice that we can make huge improvements by
+compressing sequences by runs of characters like with the F column.
+
+Instead of:
+
+F: $0 a0 a1 a2 b0 n0 n1
+
+Since F is lexicographically sorted, we can have:
+
+F: {$: [0, 1)}, {a: [1, 4)}, {b: [4, 5)} {n: [5, 7)}
+
+Although these performance enhancements may lead to a different implementation to what is
+described above, any implementation will just be an LF mapping- just with a few more steps.
+
+
+NOTE: The above is just to explain what is happening at a high level. Please
+reference the implementation below to see how the BWT is actually currently
+working
+
+Many of the Ideas come from Ben Langmead.
+He has a whole YouTube playlist about BWT Indexing: https://www.youtube.com/watch?v=5G2Db41pSHE&list=PL2mpR0RYFQsADmYpW2YWBrXJZ_6EL_3nu
+*/
+
+const nullChar = "$"
+
+// BWT Burrows-Wheeler Transform
+// Compresses and Indexes a given sequence so that it can be
+// be used for search, alignment, and text extraction. This is
+// useful for sequences so large that it would be beneficial
+// to reduce its memory footprint while also maintaining a way
+// to analyze and work with the sequence.
+type BWT struct {
+ // firstColumnSkipList is the first column of the BWT. It is
+ // represented as a list of skipEntries because the first column of
+ // the BWT is always lexicographically ordered. This saves time and memory.
+ firstColumnSkipList []skipEntry
+ // suffixArray an array that allows us to map a position in the first
+ // column to a position in the original sequence. This is needed to be
+ // able to extract text from the BWT.
+ suffixArray []int
+ // runLengthCompressedBWT is the compressed version of the BWT. The compression
+ // is for each run. For Example:
+ // the sequence "banana" has BWT "annb$aa"
+ // the run length compression of "annb$aa" is "anb$a"
+ // This helps us save a lot of memory while still having a search index we can
+ // use to align the original sequence. This allows us to understand how many
+ // runs of a certain character there are and where a run of a certain rank exists.
+ runBWTCompression waveletTree
+ // runStartPositions are the starting position of each run in the original sequence
+ // For example:
+ // "annb$aa" will have the runStartPositions [0, 1, 3, 4, 5]
+ // This helps us map our search range from "uncompressed BWT Space" to its
+ // "compressed BWT Run Space". With this, we can understand which runs we need
+ // to consider during LF mapping.
+ runStartPositions runInfo
+ // runCumulativeCounts is the cumulative count of characters for each run.
+ // This helps us efficiently lookup the number of occurrences of a given
+ // character before a given offset in "uncompressed BWT Space"
+ // For Example:
+ // "annb$aa" will have the runCumulativeCounts:
+ // "a": [0, 1, 3],
+ // "n": [0, 2],
+ // "b": [0, 1],
+ // "$": [0, 1],
+ runCumulativeCounts map[string]runInfo
+
+ // flag for turning on BWT debugging
+ debug bool
+}
+
+// Count represents the number of times the provided pattern
+// shows up in the original sequence.
+func (bwt BWT) Count(pattern string) (count int, err error) {
+ defer bwtRecovery("Count", &err)
+ err = isValidPattern(pattern)
+ if err != nil {
+ return 0, err
+ }
+
+ searchRange := bwt.lfSearch(pattern)
+ return searchRange.end - searchRange.start, nil
+}
+
+// Locate returns a list of offsets at which the beginning
+// of the provided pattern occurs in the original
+// sequence.
+func (bwt BWT) Locate(pattern string) (offsets []int, err error) {
+ defer bwtRecovery("Locate", &err)
+ err = isValidPattern(pattern)
+ if err != nil {
+ return nil, err
+ }
+
+ searchRange := bwt.lfSearch(pattern)
+ if searchRange.start >= searchRange.end {
+ return nil, nil
+ }
+
+ numOfOffsets := searchRange.end - searchRange.start
+ offsets = make([]int, numOfOffsets)
+ for i := 0; i < numOfOffsets; i++ {
+ offsets[i] = bwt.suffixArray[searchRange.start+i]
+ }
+
+ return offsets, nil
+}
+
+// Extract this allows us to extract parts of the original
+// sequence from the BWT.
+// start is the beginning of the range of text to extract inclusive.
+// end is the end of the range of text to extract exclusive.
+// If either start or end are out of bounds, Extract will panic.
+func (bwt BWT) Extract(start, end int) (extracted string, err error) {
+ defer bwtRecovery("Extract", &err)
+ err = validateRange(start, end)
+ if err != nil {
+ return "", err
+ }
+
+ if end > bwt.getLenOfOriginalStringWithNullChar()-1 {
+ return "", fmt.Errorf("end [%d] exceeds the max range of the BWT [%d]", end, bwt.getLenOfOriginalStringWithNullChar()-1)
+ }
+
+ if start < 0 {
+ return "", fmt.Errorf("start [%d] exceeds the min range of the BWT [0]", start)
+ }
+
+ strB := strings.Builder{}
+ for i := start; i < end; i++ {
+ fPos := bwt.getFCharPosFromOriginalSequenceCharPos(i)
+ skip := bwt.lookupSkipByOffset(fPos)
+ strB.WriteByte(skip.char)
+ }
+
+ return strB.String(), nil
+}
+
+// Len return the length of the sequence used to build the BWT
+func (bwt BWT) Len() int {
+ return bwt.getLenOfOriginalStringWithNullChar() - 1
+}
+
+// GetTransform returns the last column of the BWT transform of the original sequence.
+func (bwt BWT) GetTransform() string {
+ lastColumn := strings.Builder{}
+ lastColumn.Grow(bwt.getLenOfOriginalStringWithNullChar())
+ for i := 0; i < bwt.runBWTCompression.length; i++ {
+ currChar := bwt.runBWTCompression.Access(i)
+ var currCharEnd int
+ if i+1 >= len(bwt.runStartPositions) {
+ currCharEnd = bwt.getLenOfOriginalStringWithNullChar()
+ } else {
+ currCharEnd = bwt.runStartPositions[i+1]
+ }
+ for lastColumn.Len() < currCharEnd {
+ lastColumn.WriteByte(currChar)
+ }
+ }
+ return lastColumn.String()
+}
+
+//lint:ignore U1000 Ignore unused function. This is valuable for future debugging
+func (bwt BWT) getFirstColumnStr() string {
+ firstColumn := strings.Builder{}
+ firstColumn.Grow(bwt.getLenOfOriginalStringWithNullChar())
+ for i := 0; i < len(bwt.firstColumnSkipList); i++ {
+ e := bwt.firstColumnSkipList[i]
+ for j := e.openEndedInterval.start; j < e.openEndedInterval.end; j++ {
+ firstColumn.WriteByte(e.char)
+ }
+ }
+ return firstColumn.String()
+}
+
+// getFCharPosFromOriginalSequenceCharPos looks up mapping from the original position
+// of the sequence to its corresponding position in the First Column of the BWT
+// NOTE: This clearly isn't ideal. Instead of improving this implementation, this will be replaced with
+// something like r-index in the near future.
+func (bwt BWT) getFCharPosFromOriginalSequenceCharPos(originalPos int) int {
+ for i := range bwt.suffixArray {
+ if bwt.suffixArray[i] == originalPos {
+ return i
+ }
+ }
+ panic("Unable to find the corresponding original position for a character in the original sequence in the suffix array. This should not be possible and indicates a malformed BWT.")
+}
+
+// lfSearch LF Search- Last First Search.
+// Finds the valid range within the BWT index where the provided pattern is possible.
+// If the final range is <= 0, then the pattern does not exist in the original sequence.
+func (bwt BWT) lfSearch(pattern string) interval {
+ searchRange := interval{start: 0, end: bwt.getLenOfOriginalStringWithNullChar()}
+ for i := 0; i < len(pattern); i++ {
+ if bwt.debug {
+ printLFDebug(bwt, searchRange, i)
+ }
+ if searchRange.end-searchRange.start <= 0 {
+ return interval{}
+ }
+
+ c := pattern[len(pattern)-1-i]
+ nextStart := bwt.getNextLfSearchOffset(c, searchRange.start)
+ nextEnd := bwt.getNextLfSearchOffset(c, searchRange.end)
+ searchRange.start = nextStart
+ searchRange.end = nextEnd
+ }
+ return searchRange
+}
+
+func (bwt BWT) getNextLfSearchOffset(c byte, offset int) int {
+ nearestRunStart := bwt.runStartPositions.FindNearestRunStartPosition(offset + 1)
+ maxRunInCompressedSpace := bwt.runBWTCompression.Rank(c, nearestRunStart)
+
+ skip, ok := bwt.lookupSkipByChar(c)
+ if !ok {
+ return 0
+ }
+
+ cumulativeCounts, ok := bwt.runCumulativeCounts[string(c)]
+ if !ok {
+ return 0
+ }
+
+ cumulativeCountBeforeMaxRun := cumulativeCounts[maxRunInCompressedSpace]
+
+ currRunStart := bwt.runStartPositions.FindNearestRunStartPosition(offset)
+ currentRunChar := string(bwt.runBWTCompression.Access(currRunStart))
+ extraOffset := 0
+ // It is possible that an offset currently lies within a run of the same
+ // character we are inspecting. In this case, cumulativeCountBeforeMaxRun
+ // is not enough since the Max Run in this case does not include the run
+ // the offset is currently in. To adjust for this, we must count the number
+ // of character occurrences since the beginning of the run that the offset
+ // is currently in.
+ if c == currentRunChar[0] {
+ o := bwt.runStartPositions[nearestRunStart]
+ extraOffset += offset - o
+ }
+
+ return skip.openEndedInterval.start + cumulativeCountBeforeMaxRun + extraOffset
+}
+
+// lookupSkipByChar looks up a skipEntry by its character in the First Column
+func (bwt BWT) lookupSkipByChar(c byte) (entry skipEntry, ok bool) {
+ for i := range bwt.firstColumnSkipList {
+ if bwt.firstColumnSkipList[i].char == c {
+ return bwt.firstColumnSkipList[i], true
+ }
+ }
+ return skipEntry{}, false
+}
+
+// lookupSkipByOffset looks up a skipEntry based off of an
+// offset of the Fist Column of the BWT.
+func (bwt BWT) lookupSkipByOffset(offset int) skipEntry {
+ if offset > bwt.getLenOfOriginalStringWithNullChar()-1 {
+ msg := fmt.Sprintf("offset [%d] exceeds the max bound of the BWT [%d]", offset, bwt.getLenOfOriginalStringWithNullChar()-1)
+ panic(msg)
+ }
+ if offset < 0 {
+ msg := fmt.Sprintf("offset [%d] exceeds the min bound of the BWT [0]", offset)
+ panic(msg)
+ }
+
+ for skipIndex := range bwt.firstColumnSkipList {
+ if bwt.firstColumnSkipList[skipIndex].openEndedInterval.start <= offset && offset < bwt.firstColumnSkipList[skipIndex].openEndedInterval.end {
+ return bwt.firstColumnSkipList[skipIndex]
+ }
+ }
+ msg := fmt.Sprintf("could not find the skip entry that falls within the range of the skip column at a given offset. range: [0, %d) offset: %d", bwt.getLenOfOriginalStringWithNullChar(), offset)
+ panic(msg)
+}
+
+func (bwt BWT) getLenOfOriginalStringWithNullChar() int {
+ return bwt.firstColumnSkipList[len(bwt.firstColumnSkipList)-1].openEndedInterval.end
+}
+
+type interval struct {
+ start int
+ end int
+}
+
+type skipEntry struct {
+ char byte
+ // openEndedInterval start is inclusive and end is exclusive
+ openEndedInterval interval
+}
+
+// New returns a BWT of the provided sequence
+// The provided sequence must not contain the nullChar
+// defined in this package. If it does, New will return
+// an error.
+func New(sequence string) (BWT, error) {
+ err := validateSequenceBeforeTransforming(&sequence)
+ if err != nil {
+ return BWT{}, err
+ }
+
+ sequence += nullChar
+
+ prefixArray := make([]string, len(sequence))
+ for i := 0; i < len(sequence); i++ {
+ prefixArray[i] = sequence[len(sequence)-i-1:]
+ }
+
+ sortPrefixArray(prefixArray)
+
+ suffixArray := make([]int, len(sequence))
+ charCount := 0
+ runBWTCompressionBuilder := strings.Builder{}
+ var runStartPositions runInfo
+ runCumulativeCounts := make(map[string]runInfo)
+
+ var prevChar *byte
+ for i := 0; i < len(prefixArray); i++ {
+ currChar := sequence[getBWTIndex(len(sequence), len(prefixArray[i]))]
+ if prevChar == nil {
+ prevChar = &currChar
+ }
+
+ if currChar != *prevChar {
+ runBWTCompressionBuilder.WriteByte(*prevChar)
+ runStartPositions = append(runStartPositions, i-charCount)
+ addRunCumulativeCountEntry(runCumulativeCounts, *prevChar, charCount)
+
+ charCount = 0
+ prevChar = &currChar
+ }
+
+ charCount++
+ suffixArray[i] = len(sequence) - len(prefixArray[i])
+ }
+ runBWTCompressionBuilder.WriteByte(*prevChar)
+ runStartPositions = append(runStartPositions, len(prefixArray)-charCount)
+ addRunCumulativeCountEntry(runCumulativeCounts, *prevChar, charCount)
+
+ fb := strings.Builder{}
+ for i := 0; i < len(prefixArray); i++ {
+ fb.WriteByte(prefixArray[i][0])
+ }
+
+ skipList := buildSkipList(prefixArray)
+
+ wt, err := newWaveletTreeFromString(runBWTCompressionBuilder.String())
+ if err != nil {
+ return BWT{}, err
+ }
+ return BWT{
+ firstColumnSkipList: skipList,
+ suffixArray: suffixArray,
+ runBWTCompression: wt,
+ runStartPositions: runStartPositions,
+ runCumulativeCounts: runCumulativeCounts,
+ }, nil
+}
+
+func addRunCumulativeCountEntry(rumCumulativeCounts map[string]runInfo, char byte, charCount int) {
+ cumulativeCountsOfChar, ok := rumCumulativeCounts[string(char)]
+ if ok {
+ cumulativeCountsOfChar = append(cumulativeCountsOfChar, charCount+cumulativeCountsOfChar[len(cumulativeCountsOfChar)-1])
+ } else {
+ cumulativeCountsOfChar = runInfo{0, charCount}
+ }
+ rumCumulativeCounts[string(char)] = cumulativeCountsOfChar
+}
+
+// buildSkipList compressed the First Column of the BWT into a skip list
+func buildSkipList(prefixArray []string) []skipEntry {
+ prevChar := prefixArray[0][0]
+ skipList := []skipEntry{{char: prevChar, openEndedInterval: interval{start: 0}}}
+ for i := 1; i < len(prefixArray); i++ {
+ currChar := prefixArray[i][0]
+ if currChar != prevChar {
+ skipList[len(skipList)-1].openEndedInterval.end = i
+ skipList = append(skipList, skipEntry{
+ char: currChar,
+ openEndedInterval: interval{start: i},
+ })
+ prevChar = currChar
+ }
+ }
+ skipList[len(skipList)-1].openEndedInterval.end = len(prefixArray)
+ return skipList
+}
+
+// getBWTIndex helps us calculate the corresponding character that would
+// be in the L column without having to rotate the full string.
+// For example:
+// Original string: banana$
+// Rotation: ana$___
+// Position: 7-4-1= 2
+// Original[3]: n
+func getBWTIndex(lenOfSequenceBeingBuilt, lenOfSuffixArrayVisited int) int {
+ bwtCharIndex := lenOfSequenceBeingBuilt - lenOfSuffixArrayVisited - 1
+ if bwtCharIndex == -1 {
+ bwtCharIndex = lenOfSequenceBeingBuilt - 1
+ }
+ return bwtCharIndex
+}
+
+func sortPrefixArray(prefixArray []string) {
+ slices.SortFunc(prefixArray, func(a, b string) bool {
+ minLen := int(math.Min(float64(len(a)), float64(len(b))))
+ for i := 0; i < minLen; i++ {
+ if a[i] == b[i] {
+ continue
+ }
+ if a[i] == nullChar[0] {
+ return true
+ }
+ if b[i] == nullChar[0] {
+ return false
+ }
+ return a[i] < b[i]
+ }
+
+ return len(a) < len(b)
+ })
+}
+
+func bwtRecovery(operation string, err *error) {
+ if r := recover(); r != nil {
+ rErr := fmt.Errorf("BWT %s InternalError=%s", operation, r)
+ *err = rErr
+ }
+}
+
+// runInfo each element of runInfo should represent an offset i where i
+// corresponds to the start of a run in a given sequence. For example,
+// aaaabbccc would have the run info [0, 4, 6]
+type runInfo []int
+
+// FindNearestRunStartPosition given some offset, find the nearest starting position for the.
+// beginning of a run. Another way of saying this is give me the max i where runStartPositions[i] <= offset.
+// This is needed so we can understand which run an offset is a part of.
+func (r runInfo) FindNearestRunStartPosition(offset int) int {
+ start := 0
+ end := len(r) - 1
+ for start < end {
+ mid := start + (end-start)/2
+ if r[mid] < offset {
+ start = mid + 1
+ continue
+ }
+ if r[mid] > offset {
+ end = mid - 1
+ continue
+ }
+
+ return mid
+ }
+
+ if r[start] > offset {
+ return start - 1
+ }
+
+ return start
+}
+
+func isValidPattern(s string) (err error) {
+ if len(s) == 0 {
+ return errors.New("Pattern can not be empty")
+ }
+ return nil
+}
+
+func validateRange(start, end int) (err error) {
+ if start >= end {
+ return errors.New("Start must be strictly less than end")
+ }
+ return nil
+}
+
+func validateSequenceBeforeTransforming(sequence *string) (err error) {
+ if len(*sequence) == 0 {
+ return fmt.Errorf("Provided sequence must not by empty. BWT cannot be constructed")
+ }
+ if strings.Contains(*sequence, nullChar) {
+ return fmt.Errorf("Provided sequence contains the nullChar %s. BWT cannot be constructed", nullChar)
+ }
+ return nil
+}
+
+// printLFDebug this will print the first column and last column of the BWT along with some ascii visualizations.
+// This is very helpful for debugging the LF mapping. For example, lets say you're in the middle of making some changes to the LF
+// mapping and the test for counting starts to fail. To understand where the LF search is going wrong, you
+// can do something like the below to outline which parts of the BWT are being searched some given iteration.
+//
+// For Example, if you had the BWT of:
+// "rowrowrowyourboat"
+// and wanted to Count the number of occurrences of "row"
+// Then the iterations of the LF search would look something like:
+//
+// BWT Debug Begin Iteration: 0
+// torbyrrru$wwaoooow
+// $abooooorrrrtuwwwy
+// ^^^^^^^^^^^^^^^^^^X
+//
+// BWT Debug Begin Iteration: 1
+// torbyrrru$wwaoooow
+// $abooooorrrrtuwwwy
+// ______________^^^X
+//
+// BWT Debug Begin Iteration: 2
+// torbyrrru$wwaoooow
+// $abooooorrrrtuwwwy
+// _____^^^X
+//
+// Where:
+// * '^' denotes the active search range
+// * 'X' denotes one character after the end of the active search searchRange
+// * '_' is visual padding to help align the active search range
+//
+// NOTE: It can also be helpful to include the other auxiliary data structures. For example, it can be very helpful to include
+// a similar visualization for the run length compression to help debug and understand which run were used to compute the active
+// search window during each iteration.
+func printLFDebug(bwt BWT, searchRange interval, iteration int) {
+ first := bwt.getFirstColumnStr()
+ last := bwt.GetTransform()
+ lastRunCompression := bwt.runBWTCompression.reconstruct()
+
+ fullASCIIRange := strings.Builder{}
+ fullASCIIRange.Grow(searchRange.end + 1)
+ for i := 0; i < searchRange.start; i++ {
+ fullASCIIRange.WriteRune('_')
+ }
+ for i := searchRange.start; i < searchRange.end; i++ {
+ fullASCIIRange.WriteRune('^')
+ }
+ fullASCIIRange.WriteRune('X')
+
+ fmt.Println("BWT Debug Begin Iteration:", iteration)
+ fmt.Println(last)
+ fmt.Println(first)
+ fmt.Println(fullASCIIRange.String())
+ fmt.Println(lastRunCompression)
+}
diff --git a/search/bwt/bwt_test.go b/search/bwt/bwt_test.go
new file mode 100644
index 000000000..15703a28f
--- /dev/null
+++ b/search/bwt/bwt_test.go
@@ -0,0 +1,580 @@
+package bwt
+
+import (
+ "bytes"
+ "io"
+ "os"
+ "strings"
+ "testing"
+
+ "golang.org/x/exp/slices"
+)
+
+type BWTCountTestCase struct {
+ seq string
+ expected int
+}
+
+func TestBWT_Count(t *testing.T) {
+ baseTestStr := "thequickbrownfoxjumpsoverthelazydogwithanovertfrownafterfumblingitsparallelogramshapedbananagramallarounddowntown"
+ testStr := strings.Join([]string{baseTestStr, baseTestStr, baseTestStr}, "")
+
+ bwt, err := New(testStr)
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ testTable := []BWTCountTestCase{
+ {"uick", 3},
+ {"over", 6},
+ {"own", 12},
+ {"ana", 6},
+ {"an", 9},
+ {"na", 9},
+ {"rown", 6},
+ {"frown", 3},
+ {"brown", 3},
+ {"all", 6},
+ {"alle", 3},
+ {"alla", 3},
+ {"l", 21},
+ {"the", 6},
+ {"town", 3},
+ {"townthe", 2},
+ {"nt", 5},
+ // patterns that should not exist
+ {"@", 0},
+ {"zzz", 0},
+ {"clown", 0},
+ {"crown", 0},
+ {"spark", 0},
+ {"brawn", 0},
+ {"overtly", 0},
+ }
+
+ for _, v := range testTable {
+ count, err := bwt.Count(v.seq)
+ if err != nil {
+ t.Fatalf("seq=%s unexpectedError=%s", v.seq, err)
+ }
+ if count != v.expected {
+ t.Fatalf("seq=%s expectedCount=%v actualCount=%v", v.seq, v.expected, count)
+ }
+ }
+}
+
+func TestBWT_Count_EmptyPattern(t *testing.T) {
+ testStr := "banana"
+ bwt, err := New(testStr)
+ if err != nil {
+ t.Fatal(err)
+ }
+ _, err = bwt.Count("")
+ if err == nil {
+ t.Fatal("Expected error for empty pattern but got nil")
+ }
+}
+
+type BWTLocateTestCase struct {
+ seq string
+ expected []int
+}
+
+func TestBWT_Locate(t *testing.T) {
+ baseTestStr := "thequickbrownfoxjumpsoverthelazydogwithanovertfrownafterfumblingitsparallelogramshapedbananagramallarounddowntown" // len == 112
+ testStr := strings.Join([]string{baseTestStr, baseTestStr, baseTestStr}, "")
+
+ bwt, err := New(testStr)
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ testTable := []BWTLocateTestCase{
+ {"uick", []int{4, 117, 230}},
+ {"over", []int{21, 41, 134, 154, 247, 267}},
+ {"own", []int{10, 48, 106, 110, 123, 161, 219, 223, 236, 274, 332, 336}},
+ {"ana", []int{87, 89, 200, 202, 313, 315}},
+ {"an", []int{39, 87, 89, 152, 200, 202, 265, 313, 315}},
+ {"na", []int{50, 88, 90, 163, 201, 203, 276, 314, 316}},
+ {"rown", []int{9, 47, 122, 160, 235, 273}},
+ {"frown", []int{46, 159, 272}},
+ {"brown", []int{8, 121, 234}},
+ {"all", []int{70, 96, 183, 209, 296, 322}},
+ {"alle", []int{70, 183, 296}},
+ {"alla", []int{96, 209, 322}},
+ {"l", []int{28, 60, 71, 72, 74, 97, 98, 141, 173, 184, 185, 187, 210, 211, 254, 286, 297, 298, 300, 323, 324}},
+ {"the", []int{0, 25, 113, 138, 226, 251}},
+ {"town", []int{109, 222, 335}},
+ {"townthe", []int{109, 222}},
+ {"nt", []int{108, 112, 221, 225, 334}},
+ {"overtly", nil},
+
+ // patterns that should not exist
+ {"zzz", nil},
+ {"@", nil},
+ {"clown", nil},
+ {"crown", nil},
+ {"spark", nil},
+ {"brawn", nil},
+ }
+
+ for _, v := range testTable {
+ offsets, err := bwt.Locate(v.seq)
+ if err != nil {
+ t.Fatalf("seq=%s unexpectedError=%s", v.seq, err)
+ }
+ slices.Sort(offsets)
+ if len(offsets) != len(v.expected) {
+ t.Fatalf("seq=%s expectedOffsets=%v actualOffsets=%v", v.seq, v.expected, offsets)
+ }
+ for i := range offsets {
+ if offsets[i] != v.expected[i] {
+ t.Fatalf("seq=%s expectedOffsets=%v actualOffsets=%v", v.seq, v.expected, offsets)
+ }
+ }
+ }
+}
+
+func TestBWT_Locate_EmptyPattern(t *testing.T) {
+ testStr := "banana"
+ bwt, err := New(testStr)
+ if err != nil {
+ t.Fatal(err)
+ }
+ _, err = bwt.Locate("")
+ if err == nil {
+ t.Fatal("Expected error for empty pattern but got nil")
+ }
+}
+
+type BWTExtractTestCase struct {
+ start int
+ end int
+ expected string
+}
+
+func TestBWT_Extract(t *testing.T) {
+ baseTestStr := "thequickbrownfoxjumpsoverthelazydogwithanovertfrownafterfumblingitsparallelogramshapedbananagramallarounddowntown" // len == 112
+ testStr := strings.Join([]string{baseTestStr, baseTestStr, baseTestStr}, "")
+
+ bwt, err := New(testStr)
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ testTable := []BWTExtractTestCase{
+ {4, 8, "uick"},
+ {117, 121, "uick"},
+ {230, 234, "uick"},
+ {0, 3, "the"},
+ {25, 28, "the"},
+ {113, 116, "the"},
+ {138, 141, "the"},
+ {226, 229, "the"},
+ {251, 254, "the"},
+ {21, 25, "over"},
+ {41, 45, "over"},
+ {134, 138, "over"},
+ {154, 158, "over"},
+ {247, 251, "over"},
+ {267, 271, "over"},
+ {10, 13, "own"},
+ {48, 51, "own"},
+ {106, 109, "own"},
+ {123, 126, "own"},
+ {161, 164, "own"},
+ {219, 222, "own"},
+ {223, 226, "own"},
+ {236, 239, "own"},
+ {274, 277, "own"},
+ {332, 335, "own"},
+ {336, 339, "own"},
+ {87, 90, "ana"},
+ {89, 92, "ana"},
+ {200, 203, "ana"},
+ {202, 205, "ana"},
+ {313, 316, "ana"},
+ {315, 318, "ana"},
+ {39, 41, "an"},
+ {87, 89, "an"},
+ {152, 154, "an"},
+ {200, 202, "an"},
+ {202, 204, "an"},
+ {265, 267, "an"},
+ {313, 315, "an"},
+ {50, 52, "na"},
+ {88, 90, "na"},
+ {163, 165, "na"},
+ {201, 203, "na"},
+ {203, 205, "na"},
+ {276, 278, "na"},
+ {314, 316, "na"},
+ {316, 318, "na"},
+ {9, 13, "rown"},
+ {47, 51, "rown"},
+ {122, 126, "rown"},
+ {160, 164, "rown"},
+ {235, 239, "rown"},
+ {273, 277, "rown"},
+ {109, 116, "townthe"},
+ {222, 229, "townthe"},
+ }
+
+ for _, v := range testTable {
+ str, err := bwt.Extract(v.start, v.end)
+ if err != nil {
+ t.Fatalf("extractRange=(%d, %d) unexpectedError=%s", v.start, v.end, err)
+ }
+ if str != v.expected {
+ t.Fatalf("extractRange=(%d, %d) expected=%s actual=%s", v.start, v.end, v.expected, str)
+ }
+ }
+}
+
+func TestBWT_Extract_InvalidRanges(t *testing.T) {
+ testStr := "banana"
+ bwt, err := New(testStr)
+ if err != nil {
+ t.Fatal(err)
+ }
+ _, err = bwt.Extract(5, 4)
+ if err == nil {
+ t.Fatal("Expected error for invalid range but got nil")
+ }
+ _, err = bwt.Extract(4, 4)
+ if err == nil {
+ t.Fatal("Expected error for invalid range but got nil")
+ }
+}
+
+func TestBWT_Extract_DoNotAllowExtractionOfLastNullChar(t *testing.T) {
+ testStr := "banana"
+
+ bwt, err := New(testStr)
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ str, err := bwt.Extract(0, 6)
+ if err != nil {
+ t.Fatalf("extractRange=(%d, %d) unexpectedError=%s", 0, 6, err)
+ }
+ if str != testStr {
+ t.Fatalf("extractRange=(%d, %d) expected=%s actual=%s", 0, 6, testStr, str)
+ }
+
+ _, err = bwt.Extract(0, 7)
+
+ if err == nil {
+ t.Fatalf("extractRange=(%d, %d) expected err but was nil", 0, 7)
+ }
+
+ if !strings.Contains(err.Error(), "exceeds the max range") {
+ t.Fatalf("expected error to contain \"exceeds the max range\" but received \"%s\"", err)
+ }
+}
+
+func TestBWT_GetTransform(t *testing.T) {
+ baseTestStr := "thequickbrownfoxjumpsoverthelazydogwithanovertfrownafterfumblingitsparallelogramshapedbananagramallarounddowntown" // len == 112
+ testStr := strings.Join([]string{baseTestStr, baseTestStr, baseTestStr}, "")
+
+ bwt, err := New(testStr)
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ expected := "nnnnnnnmmmrrrrrrrrrnnnbbbhhhhhhppplllllldddmmmkkkiiieeennnyyydddppphhhlllhhhtttvvvvvvnnntttaaarrrnnnaaaooooootttsssttttttuuulllwwwgggxxxcccllleeelllbbbaaaaaaeeeaaauuuuuuaaawwwwaaaaaauuuwwwiiiaaawwwwwllldddrrrnnnssstrrrrrrttdddfffsssaaammmeeeaaaggggggeeeaaafffbbbeeeeeemmmppptttfffrrriiirrrnn$nnniiiqqqfffjjjooooooooogggooooooooooooooozzzaaa"
+ actual := bwt.GetTransform()
+ if expected != actual {
+ t.Fatalf("expected did not match actual\nexpected:\t%s\nactual:\t%s", expected, actual)
+ }
+}
+
+func TestBWT_Len(t *testing.T) {
+ testStr := "banana"
+
+ bwt, err := New(testStr)
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ if bwt.Len() != len(testStr) {
+ t.Fatalf("expected Len to be %d but got %d", len(testStr), bwt.Len())
+ }
+}
+
+type sparseOnesTestCase struct {
+ pos int
+ expected int
+}
+
+func TestRunInfo_FindNearestRunStartPosition(t *testing.T) {
+ runs := runInfo{
+ 0,
+ 6,
+ 12,
+ 33,
+ 99,
+ 204,
+ 205,
+ 300,
+ 302,
+ 305,
+ 306,
+ 999,
+ }
+
+ testCases := []sparseOnesTestCase{
+ {0, 0},
+ {4, 0},
+ {5, 0},
+ {6, 1},
+
+ {7, 1},
+ {8, 1},
+ {9, 1},
+ {11, 1},
+ {12, 2},
+
+ {13, 2},
+ {15, 2},
+ {22, 2},
+ {32, 2},
+ {33, 3},
+
+ {56, 3},
+ {64, 3},
+ {65, 3},
+ {79, 3},
+ {98, 3},
+ {99, 4},
+
+ {100, 4},
+ {112, 4},
+ {168, 4},
+ {197, 4},
+ {199, 4},
+ {203, 4},
+ {204, 5},
+
+ {205, 6},
+
+ {206, 6},
+ {271, 6},
+ {299, 6},
+ {300, 7},
+
+ {301, 7},
+ {302, 8},
+
+ {303, 8},
+ {304, 8},
+ {305, 9},
+
+ {306, 10},
+ {307, 10},
+ {999, 11},
+
+ {1000, 11},
+ }
+
+ for _, v := range testCases {
+ actual := runs.FindNearestRunStartPosition(v.pos)
+ if actual != v.expected {
+ t.Fatalf("expected RankOnes(%d) to be %d but got %d", v.pos, v.expected, actual)
+ }
+ }
+}
+
+func TestNewBWTWithSequenceContainingNullChar(t *testing.T) {
+ nc := nullChar
+ testStr := "banana" + nc
+
+ _, err := New(testStr)
+ if err == nil {
+ t.Fatal("expected error but got nil")
+ }
+}
+
+func TestNewBWTEmptySequence(t *testing.T) {
+ testStr := ""
+
+ _, err := New(testStr)
+ if err == nil {
+ t.Fatal("expected error but got nil")
+ }
+}
+
+// TestBWTReconstruction this helps us ensure that the LF mapping is correct and that the suffix array lookup
+// must be well formed. Otherwise, we would not be able to recreate the original sequence.
+func TestBWTReconstruction(t *testing.T) {
+ baseTestStr := "thequickbrownfoxjumpsoverthelazydogwithanovertfrownafterfumblingitsparallelogramshapedbananagramallarounddowntown"
+ testStr := strings.Join([]string{baseTestStr, baseTestStr, baseTestStr}, "")
+
+ bwt, err := New(testStr)
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ extracted, err := bwt.Extract(0, bwt.Len())
+ if err != nil {
+ t.Fatal(err)
+ }
+ if extracted != testStr {
+ t.Log("Reconstruction failed")
+ t.Log("Expected:\t", testStr)
+ t.Log("Actual:\t", extracted)
+ t.Fail()
+ }
+
+ // This will either result in an even or all alphabet. The alphabet matters.
+ testStrWithOneMoreAlpha := testStr + "!"
+ bwt, err = New(testStrWithOneMoreAlpha)
+ if err != nil {
+ t.Fatal(err)
+ }
+ extracted, err = bwt.Extract(0, bwt.Len())
+ if err != nil {
+ t.Fatal(err)
+ }
+ if extracted != testStrWithOneMoreAlpha {
+ t.Log("Reconstruction failed with extra alpha character")
+ t.Log("Expected:\t", testStr)
+ t.Log("Actual:\t", extracted)
+ t.Fail()
+ }
+}
+
+func TestBWTStartError(t *testing.T) {
+ testStr := "banana"
+
+ bwt, err := New(testStr)
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ _, err = bwt.Extract(-1, 6)
+ if err == nil {
+ t.Fatal("expected error but got nil")
+ }
+}
+func TestBWT_GetFCharPosFromOriginalSequenceCharPos_Panic(t *testing.T) {
+ testStr := "banana"
+ bwt, err := New(testStr)
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ // Call the function with an invalid original position
+ originalPos := -1
+ defer func() {
+ if r := recover(); r == nil {
+ t.Errorf("Expected panic, but it did not occur")
+ }
+ }()
+ bwt.getFCharPosFromOriginalSequenceCharPos(originalPos)
+}
+func TestBWT_LFSearch_InvalidChar(t *testing.T) {
+ testStr := "banana"
+ bwt, err := New(testStr)
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ pattern := "x" // Invalid character
+
+ result := bwt.lfSearch(pattern)
+
+ if result.start != 0 || result.end != 0 {
+ t.Fatalf("Expected search range to be (0, 0), but got (%d, %d)", result.start, result.end)
+ }
+}
+func TestBWT_LookupSkipByOffset_PanicOffsetExceedsMaxBound(t *testing.T) {
+ testStr := "banana"
+ bwt, err := New(testStr)
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ offset := bwt.getLenOfOriginalStringWithNullChar()
+ defer func() {
+ if r := recover(); r == nil {
+ t.Errorf("Expected panic, but it did not occur")
+ }
+ }()
+ bwt.lookupSkipByOffset(offset)
+}
+
+func TestBWT_LookupSkipByOffset_PanicOffsetExceedsMinBound(t *testing.T) {
+ testStr := "banana"
+ bwt, err := New(testStr)
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ offset := -1
+ defer func() {
+ if r := recover(); r == nil {
+ t.Errorf("Expected panic, but it did not occur")
+ }
+ }()
+ bwt.lookupSkipByOffset(offset)
+}
+
+func TestBWTRecovery(t *testing.T) {
+ // Test panic recovery for bwtRecovery function
+ var err error
+ operation := "test operation"
+
+ defer func() {
+ if err == nil {
+ t.Fatal("expected bwtRecovery to recover from the panic and set an error message, but got nil")
+ }
+ }()
+ defer bwtRecovery(operation, &err)
+ doPanic()
+}
+
+func doPanic() {
+ panic("test panic")
+}
+func TestPrintLFDebug(t *testing.T) {
+ bwt, err := New("banana")
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ searchRange := interval{start: 2, end: 5}
+ iteration := 1
+
+ expectedOutput := "BWT Debug Begin Iteration: 1" + "\n"
+ expectedOutput += "annb$aa" + "\n"
+ expectedOutput += "$aaabnn" + "\n"
+ expectedOutput += "__^^^X" + "\n"
+ expectedOutput += "anb$a" + "\n"
+
+ // Redirect stdout to capture the output
+ old := os.Stdout
+ r, w, _ := os.Pipe()
+ os.Stdout = w
+
+ printLFDebug(bwt, searchRange, iteration)
+
+ // Reset stdout
+ w.Close()
+ os.Stdout = old
+
+ // Read the captured output
+ var buf bytes.Buffer
+ _, err = io.Copy(&buf, r)
+
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ // Compare the output with the expected value
+ actualOutput := buf.String()
+ if actualOutput != expectedOutput {
+ t.Errorf("Unexpected output:\nExpected:\n%s\nActual:\n%s", expectedOutput, actualOutput)
+ }
+}
diff --git a/search/bwt/example_test.go b/search/bwt/example_test.go
new file mode 100644
index 000000000..4d1ec7390
--- /dev/null
+++ b/search/bwt/example_test.go
@@ -0,0 +1,92 @@
+package bwt_test
+
+import (
+ "fmt"
+ "log"
+
+ "github.com/bebop/poly/search/bwt"
+ "golang.org/x/exp/slices"
+)
+
+// This example shows how BWT can be used for exact pattern
+// matching by returning the offsets at which the pattern exists.
+// This can be useful for alignment when you need need to reduce
+// the memory footprint of a reference sequence without loosing
+// any data since BWT is a lossless compression.
+func ExampleBWT_basic() {
+ inputSequence := "AACCTGCCGTCGGGGCTGCCCGTCGCGGGACGTCGAAACGTGGGGCGAAACGTG"
+
+ bwt, err := bwt.New(inputSequence)
+ if err != nil {
+ log.Fatal(err)
+ }
+
+ offsets, err := bwt.Locate("GCC")
+ if err != nil {
+ log.Fatal(err)
+ }
+ slices.Sort(offsets)
+ fmt.Println(offsets)
+ // Output: [5 17]
+}
+
+func ExampleBWT_Count() {
+ inputSequence := "AACCTGCCGTCGGGGCTGCCCGTCGCGGGACGTCGAAACGTGGGGCGAAACGTG"
+
+ bwt, err := bwt.New(inputSequence)
+ if err != nil {
+ log.Fatal(err)
+ }
+
+ count, err := bwt.Count("CG")
+ if err != nil {
+ log.Fatal(err)
+ }
+ fmt.Println(count)
+ // Output: 10
+}
+
+func ExampleBWT_Locate() {
+ inputSequence := "AACCTGCCGTCGGGGCTGCCCGTCGCGGGACGTCGAAACGTGGGGCGAAACGTG"
+
+ bwt, err := bwt.New(inputSequence)
+ if err != nil {
+ log.Fatal(err)
+ }
+
+ offsets, err := bwt.Locate("CG")
+ if err != nil {
+ log.Fatal(err)
+ }
+ slices.Sort(offsets)
+ fmt.Println(offsets)
+ // Output: [7 10 20 23 25 30 33 38 45 50]
+}
+
+func ExampleBWT_Extract() {
+ inputSequence := "AACCTGCCGTCGGGGCTGCCCGTCGCGGGACGTCGAAACGTGGGGCGAAACGTG"
+
+ bwt, err := bwt.New(inputSequence)
+ if err != nil {
+ log.Fatal(err)
+ }
+
+ extracted, err := bwt.Extract(48, 54)
+ if err != nil {
+ log.Fatal(err)
+ }
+ fmt.Println(extracted)
+ // Output: AACGTG
+}
+
+func ExampleBWT_GetTransform() {
+ inputSequence := "banana"
+
+ bwt, err := bwt.New(inputSequence)
+ if err != nil {
+ log.Fatal(err)
+ }
+
+ fmt.Println(bwt.GetTransform())
+ // Output: annb$aa
+}
diff --git a/search/bwt/rsa_bitvector.go b/search/bwt/rsa_bitvector.go
new file mode 100644
index 000000000..fc2e9ceb8
--- /dev/null
+++ b/search/bwt/rsa_bitvector.go
@@ -0,0 +1,192 @@
+package bwt
+
+import "math/bits"
+
+// rsaBitVector allows us to perform RSA: (R)ank, (S)elect, and (A)ccess
+// queries in a memory performant and memory compact way.
+// To learn about how Rank, Select, and Access work, take a look at the
+// examples in each respective method.
+type rsaBitVector struct {
+ bv bitvector
+ totalOnesRank int
+ jrc []chunk
+ jrSubChunksPerChunk int
+ jrBitsPerChunk int
+ jrBitsPerSubChunk int
+ oneSelectMap map[int]int
+ zeroSelectMap map[int]int
+}
+
+// newRSABitVectorFromBitVector allows us to build the auxiliary components
+// needed to perform RSA queries on top of the provided bitvector.
+// WARNING: Do not modify the underlying bitvector. The rsaBitvector will
+// get out of sync with the original bitvector.
+func newRSABitVectorFromBitVector(bv bitvector) rsaBitVector {
+ jacobsonRankChunks, jrSubChunksPerChunk, jrBitsPerSubChunk, totalOnesRank := buildJacobsonRank(bv)
+ ones, zeros := buildSelectMaps(bv)
+
+ return rsaBitVector{
+ bv: bv,
+ totalOnesRank: totalOnesRank,
+ jrc: jacobsonRankChunks,
+ jrSubChunksPerChunk: jrSubChunksPerChunk,
+ jrBitsPerChunk: jrSubChunksPerChunk * jrBitsPerSubChunk,
+ jrBitsPerSubChunk: jrBitsPerSubChunk,
+ oneSelectMap: ones,
+ zeroSelectMap: zeros,
+ }
+}
+
+// Rank returns the rank of the given value up to, but not including
+// the ith bit.
+// For Example:
+// Given the bitvector 001000100001
+// Rank(true, 1) = 0
+// Rank(true, 2) = 0
+// Rank(true, 3) = 1
+// Rank(true, 8) = 2
+// Rank(false, 8) = 6
+func (rsa rsaBitVector) Rank(val bool, i int) int {
+ if i == rsa.bv.len() {
+ if val {
+ return rsa.totalOnesRank
+ }
+ return rsa.bv.len() - rsa.totalOnesRank
+ }
+
+ chunkPos := (i / rsa.jrBitsPerChunk)
+ chunk := rsa.jrc[chunkPos]
+
+ subChunkPos := (i % rsa.jrBitsPerChunk) / rsa.jrBitsPerSubChunk
+ subChunk := chunk.subChunks[subChunkPos]
+
+ bitOffset := i % rsa.jrBitsPerSubChunk
+
+ bitSet := rsa.bv.getBitSet(chunkPos*rsa.jrSubChunksPerChunk + subChunkPos)
+
+ shiftRightAmount := uint64(rsa.jrBitsPerSubChunk - bitOffset)
+ if val {
+ remaining := bitSet >> shiftRightAmount
+ return chunk.onesCumulativeRank + subChunk.onesCumulativeRank + bits.OnesCount64(remaining)
+ }
+ remaining := ^bitSet >> shiftRightAmount
+
+ // cumulative ranks for 0 should just be the sum of the compliment of cumulative ranks for 1
+ return (chunkPos*rsa.jrBitsPerChunk - chunk.onesCumulativeRank) + (subChunkPos*rsa.jrBitsPerSubChunk - subChunk.onesCumulativeRank) + bits.OnesCount64(remaining)
+}
+
+// Select returns the position of the given value with the provided Rank
+// For Example:
+// Given the bitvector 001000100001
+// Select(true, 1) = 2
+// Rank(false, 5) = 5
+// Rank(false, 1) = 1
+// Rank(false, 0) = 0
+func (rsa rsaBitVector) Select(val bool, rank int) (i int, ok bool) {
+ if val {
+ i, ok := rsa.oneSelectMap[rank]
+ return i, ok
+ } else {
+ i, ok := rsa.zeroSelectMap[rank]
+ return i, ok
+ }
+}
+
+// Access returns the value of a bit at a given offset
+func (rsa rsaBitVector) Access(i int) bool {
+ return rsa.bv.getBit(i)
+}
+
+type chunk struct {
+ subChunks []subChunk
+ onesCumulativeRank int
+}
+
+type subChunk struct {
+ onesCumulativeRank int
+}
+
+/*
+buildJacobsonRank Jacobson rank is a succinct data structure. This allows us to represent something
+normally would require O(N) worth of memory with less that N memory. Jacobson Rank allows for
+sub linear growth. Jacobson rank also allows us to lookup rank for some value of a bitvector in O(1)
+time. Theoretically, Jacobson Rank Requires:
+1. Creating log(N) "Chunks"
+2. Creating 2log(N) "Sub Chunks"
+3. Having "Sub Chunks" be 0.5log(N) in length
+4. For each "Chunk", store the cumulative rank of set bits relative to the overall bitvector
+5. For each "Sub Chunk", store the cumulative rank of set bits relative to the parent "Chunk"
+6. We can One's count the N bit word if possible. We will only consider this possibility :)
+
+For simplicity and all around decent results, we just have "Sub Chunks" of size 64 bits.
+
+It is O(1) because given some offset i, all we have to do is calculate rank is:
+rank = CumulativeRank(ChunkOfi(i))) + CumulativeRank(SubChunkOfi(i))) + OnesCount(SubChunkOfi(i))
+
+To understand why it is sub linear in space, you can refer to Ben Langmead and other literature that
+describes the space complexity.
+https://www.youtube.com/watch?v=M1sUZxXVjG8&list=PL2mpR0RYFQsADmYpW2YWBrXJZ_6EL_3nu&index=7
+*/
+func buildJacobsonRank(inBv bitvector) (jacobsonRankChunks []chunk, numOfSubChunksPerChunk, numOfBitsPerSubChunk, totalRank int) {
+ numOfSubChunksPerChunk = 4
+
+ totalRank = 0
+ chunkCumulativeRank := 0
+ subChunkCumulativeRank := 0
+
+ var currSubChunks []subChunk
+ for i := range inBv.bits {
+ if len(currSubChunks) == numOfSubChunksPerChunk {
+ jacobsonRankChunks = append(jacobsonRankChunks, chunk{
+ subChunks: currSubChunks,
+ onesCumulativeRank: chunkCumulativeRank,
+ })
+
+ chunkCumulativeRank += subChunkCumulativeRank
+
+ currSubChunks = nil
+ subChunkCumulativeRank = 0
+ }
+ currSubChunks = append(currSubChunks, subChunk{
+ onesCumulativeRank: subChunkCumulativeRank,
+ })
+
+ onesCount := bits.OnesCount64(inBv.getBitSet(i))
+ subChunkCumulativeRank += onesCount
+ totalRank += onesCount
+ }
+
+ if currSubChunks != nil {
+ jacobsonRankChunks = append(jacobsonRankChunks, chunk{
+ subChunks: currSubChunks,
+ onesCumulativeRank: chunkCumulativeRank,
+ })
+ }
+
+ return jacobsonRankChunks, numOfSubChunksPerChunk, wordSize, totalRank
+}
+
+// This is not good. We should find a better means of select- like Clark's Select
+func buildSelectMaps(inBv bitvector) (oneSelectMap, zeroSelectMap map[int]int) {
+ oneSelectMap = make(map[int]int)
+ zeroSelectMap = make(map[int]int)
+ oneCount := 0
+ zeroCount := 0
+ for i := 0; i < inBv.len(); i++ {
+ bit := inBv.getBit(i)
+ if bit {
+ oneSelectMap[oneCount] = i
+ oneCount++
+ } else {
+ zeroSelectMap[zeroCount] = i
+ zeroCount++
+ }
+ }
+
+ // Account for the case where we need to find the
+ // position for the max rank for both 0's and 1's
+ oneSelectMap[oneCount] = inBv.len()
+ zeroSelectMap[zeroCount] = inBv.len()
+
+ return oneSelectMap, zeroSelectMap
+}
diff --git a/search/bwt/rsa_bitvector_test.go b/search/bwt/rsa_bitvector_test.go
new file mode 100644
index 000000000..d09a9eb2b
--- /dev/null
+++ b/search/bwt/rsa_bitvector_test.go
@@ -0,0 +1,353 @@
+package bwt
+
+import (
+ "testing"
+)
+
+type rsaRankTestCase struct {
+ val bool
+ bitPosition int
+ expectedRank int
+}
+
+func TestRSARank_singlePartialChunk(t *testing.T) {
+ if wordSize != 64 {
+ t.Skip()
+ }
+
+ bitsToTruncate := 22
+ initialNumberOfBits := wordSize*2 - bitsToTruncate
+
+ rsa := newTestRSAFromWords(initialNumberOfBits,
+ 0xffffffff00000000,
+ 0x00000000ffc00000,
+ )
+
+ testCases := []rsaRankTestCase{
+ {true, 0, 0}, {false, 0, 0},
+
+ {true, 64, 32}, {false, 64, 32},
+
+ {true, 96, 32}, {false, 96, 64},
+
+ {true, 105, 41}, {false, 105, 64},
+ }
+
+ for _, tc := range testCases {
+ rank := rsa.Rank(tc.val, tc.bitPosition)
+ if rank != tc.expectedRank {
+ t.Fatalf("expected rank(%t, %d) to be %d but got %d", tc.val, tc.bitPosition, tc.expectedRank, rank)
+ }
+ }
+}
+
+func TestRSARank_singleCompleteChunk_PastBounds_Ones(t *testing.T) {
+ rsa := newTestRSAFromWords(64*4,
+ 0x0000000000000000,
+ 0xffffffffffffffff,
+ 0x0000000000000000,
+ 0xffffffffffffffff,
+ )
+
+ testCases := []rsaRankTestCase{
+ {true, 0, 0}, {false, 0, 0},
+ {true, 255, 127}, {false, 255, 128},
+ {true, 256, 128}, {false, 256, 128},
+ }
+
+ for _, tc := range testCases {
+ rank := rsa.Rank(tc.val, tc.bitPosition)
+ if rank != tc.expectedRank {
+ t.Fatalf("expected rank(%t, %d) to be %d but got %d", tc.val, tc.bitPosition, tc.expectedRank, rank)
+ }
+ }
+}
+
+func TestRSARank_singleCompleteChunk_PastBounds_Zeros(t *testing.T) {
+ rsa := newTestRSAFromWords(64*4,
+ 0xffffffffffffffff,
+ 0x0000000000000000,
+ 0xffffffffffffffff,
+ 0x0000000000000000,
+ )
+
+ testCases := []rsaRankTestCase{
+ {true, 0, 0}, {false, 0, 0},
+ {true, 255, 128}, {false, 255, 127},
+ {true, 256, 128}, {false, 256, 128},
+ }
+
+ for _, tc := range testCases {
+ rank := rsa.Rank(tc.val, tc.bitPosition)
+ if rank != tc.expectedRank {
+ t.Fatalf("expected rank(%t, %d) to be %d but got %d", tc.val, tc.bitPosition, tc.expectedRank, rank)
+ }
+ }
+}
+
+func TestRSARank_singleCompleteChunk(t *testing.T) {
+ initialNumberOfBits := wordSize * 4
+
+ rsa := newTestRSAFromWords(initialNumberOfBits,
+ 0x8000000000000001,
+ 0xff0f30fffacea80d,
+ 0x90e0a0e0b0e0cf0c,
+ 0x3d0f064f7206f717,
+ )
+
+ testCases := []rsaRankTestCase{
+ {true, 0, 0}, {false, 0, 0},
+ {true, 1, 1}, {false, 1, 0},
+ {true, 2, 1}, {false, 2, 1},
+ {true, 3, 1}, {false, 3, 2},
+ {true, 62, 1}, {false, 62, 61},
+ {true, 63, 1}, {false, 63, 62},
+
+ {true, 64, 2}, {false, 64, 62},
+ {true, 65, 3}, {false, 65, 62},
+ {true, 72, 10}, {false, 72, 62},
+ {true, 127, 40}, {false, 127, 87},
+
+ {true, 128, 41}, {false, 128, 87},
+ {true, 129, 42}, {false, 129, 87},
+ {true, 130, 42}, {false, 130, 88},
+ {true, 131, 42}, {false, 131, 89},
+ {true, 132, 43}, {false, 132, 89},
+ {true, 133, 43}, {false, 133, 90},
+ {true, 159, 51}, {false, 159, 108},
+ {true, 160, 51}, {false, 160, 109},
+ {true, 161, 52}, {false, 161, 109},
+ {true, 162, 52}, {false, 162, 110},
+ {true, 163, 53}, {false, 163, 110},
+ {true, 164, 54}, {false, 164, 110},
+ {true, 165, 54}, {false, 165, 111},
+ {true, 176, 57}, {false, 176, 119},
+ {true, 177, 58}, {false, 177, 119},
+ {true, 178, 59}, {false, 178, 119},
+ {true, 179, 59}, {false, 179, 120},
+ {true, 180, 59}, {false, 180, 121},
+ {true, 183, 62}, {false, 183, 121},
+ {true, 184, 63}, {false, 184, 121},
+ {true, 185, 63}, {false, 185, 122},
+ {true, 186, 63}, {false, 186, 123},
+ {true, 187, 63}, {false, 187, 124},
+ {true, 188, 63}, {false, 188, 125},
+ {true, 189, 64}, {false, 189, 125},
+ {true, 190, 65}, {false, 190, 125},
+ {true, 191, 65}, {false, 191, 126},
+
+ {true, 192, 65}, {false, 192, 127},
+ {true, 193, 65}, {false, 193, 128},
+ {true, 194, 65}, {false, 194, 129},
+ {true, 195, 66}, {false, 195, 129},
+ {true, 196, 67}, {false, 196, 129},
+ {true, 248, 94}, {false, 248, 154},
+ {true, 249, 94}, {false, 249, 155},
+ {true, 250, 94}, {false, 250, 156},
+ {true, 251, 94}, {false, 251, 157},
+ {true, 252, 95}, {false, 252, 157},
+ {true, 253, 95}, {false, 253, 158},
+ {true, 254, 96}, {false, 254, 158},
+ {true, 255, 97}, {false, 255, 158},
+ }
+
+ for _, tc := range testCases {
+ rank := rsa.Rank(tc.val, tc.bitPosition)
+ if rank != tc.expectedRank {
+ t.Fatalf("expected rank(%t, %d) to be %d but got %d", tc.val, tc.bitPosition, tc.expectedRank, rank)
+ }
+ }
+}
+
+func TestRSARank_multipleChunks(t *testing.T) {
+ rsa := newTestRSAFromWords((8*4+3)*64,
+ 0x0000000000000000,
+ 0xffffffffffffffff,
+ 0x0000000000000000,
+ 0xffffffffffffffff,
+
+ 0xffffffffffffffff,
+ 0x0000000000000000,
+ 0xffffffffffffffff,
+ 0x0000000000000000,
+
+ 0x0000000000000000,
+ 0xffffffffffffffff,
+ 0x0000000000000000,
+ 0xffffffffffffffff,
+
+ 0xffffffffffffffff,
+ 0x0000000000000000,
+ 0xffffffffffffffff,
+ 0x0000000000000000,
+
+ // If Jacobson rank is still there, this should go past the first
+ // chunk
+ 0xffffffffffffffff,
+ 0x0000000000000000,
+ 0xffffffffffffffff,
+ 0x0000000000000000,
+
+ 0xffffffffffffffff,
+ 0x0000000000000000,
+ 0xffffffffffffffff,
+ 0x0000000000000000,
+
+ 0xffffffffffffffff,
+ 0x0000000000000000,
+ 0xffffffffffffffff,
+ 0x0000000000000000,
+
+ 0xffffffffffffffff,
+ 0x0000000000000000,
+ 0xffffffffffffffff,
+ 0x0000000000000000,
+
+ // If Jacobson rank is still there, this should go past the second
+ // chunk
+ 0xffffffffffffffff,
+ 0x0000000000000000,
+ 0xffffffffffffffff,
+ )
+
+ testCases := []rsaRankTestCase{
+ {true, 0, 0}, {false, 0, 0},
+
+ {true, 64, 0}, {false, 64, 64},
+ {true, 128, 64}, {false, 128, 64},
+ {true, 192, 64}, {false, 192, 128},
+ {true, 256, 128}, {false, 256, 128},
+
+ {true, 320, 192}, {false, 256, 128},
+ {true, 384, 192}, {false, 384, 192},
+ {true, 448, 256}, {false, 448, 192},
+ {true, 512, 256}, {false, 512, 256},
+
+ {true, 576, 256}, {false, 576, 320},
+ {true, 640, 320}, {false, 640, 320},
+ {true, 704, 320}, {false, 704, 384},
+ {true, 768, 384}, {false, 768, 384},
+
+ {true, 832, 448}, {false, 832, 384},
+ {true, 896, 448}, {false, 896, 448},
+
+ {true, 1024, 512}, {false, 1024, 512},
+
+ {true, 2048, 1024}, {false, 2048, 1024},
+ }
+
+ for _, tc := range testCases {
+ rank := rsa.Rank(tc.val, tc.bitPosition)
+ if rank != tc.expectedRank {
+ t.Fatalf("expected rank(%t, %d) to be %d but got %d", tc.val, tc.bitPosition, tc.expectedRank, rank)
+ }
+ }
+}
+
+type rsaSelectTestCase struct {
+ val bool
+ rank int
+ expectedPosition int
+}
+
+func TestRSASelect(t *testing.T) {
+ bitsToTruncate := 17
+ initialNumberOfBits := wordSize*4 - bitsToTruncate
+ rsa := newTestRSAFromWords(initialNumberOfBits,
+ 0x8010000000010000, // 1Count = 3
+ 0xfff1ffffffffffff, // 1Count = 63
+ 0x0000010000000000, // 1Count = 1
+ 0xffffffffffffffff, // Possible 1Count = 47
+ )
+
+ testCases := []rsaSelectTestCase{
+ {true, 0, 0},
+ {true, 1, 11},
+ {true, 2, 47},
+ {false, 0, 1},
+ {false, 1, 2},
+ {false, 3, 4},
+ {false, 8, 9},
+ {false, 9, 10},
+ {false, 10, 12},
+ {false, 11, 13},
+ {false, 60, 63},
+
+ {true, 3, 64},
+ {true, 9, 70},
+ {true, 13, 74},
+ {true, 14, 75},
+ {true, 15, 79},
+ {true, 16, 80},
+ {true, 63, 127},
+ {false, 61, 76},
+ {false, 62, 77},
+ {false, 63, 78},
+
+ {true, 64, 151},
+ {true, 65, 192},
+ {true, 111, 238},
+ {false, 64, 128},
+
+ {false, 126, 191},
+
+ // Select of penultimate ranks should be the positions at which they appear.
+ {true, 111, rsa.bv.len() - 1},
+ {false, 126, 191},
+
+ // Max bitvector positions for the max rank should be at the ends of the bitvector
+ {true, 112, rsa.bv.len()},
+ {false, 127, rsa.bv.len()},
+ }
+
+ for _, tc := range testCases {
+ position, ok := rsa.Select(tc.val, tc.rank)
+
+ if !ok {
+ t.Fatalf("expected select(%t, %d) to be %d but went out of range", tc.val, tc.rank, tc.expectedPosition)
+ }
+
+ if position != tc.expectedPosition {
+ t.Fatalf("expected select(%t, %d) to be %d but got %d", tc.val, tc.rank, tc.expectedPosition, position)
+ }
+ }
+}
+
+func TestRSASelect_notOk(t *testing.T) {
+ bitsToTruncate := 17
+ initialNumberOfBits := wordSize*4 - bitsToTruncate
+ rsa := newTestRSAFromWords(initialNumberOfBits,
+ 0x8010000000010000,
+ 0xfff1ffffffffffff,
+ 0x0000010000000000,
+ 0xffffffffffffffff,
+ )
+
+ if _, ok := rsa.Select(true, -1); ok {
+ t.Fatalf("expected select(true, -1) to be not ok but somehow returned a value")
+ }
+
+ pos, ok := rsa.Select(true, 111)
+ if !ok {
+ t.Fatalf("expected select(true, 111) to be ok but somehow got not ok")
+ }
+
+ if pos != 238 {
+ t.Fatalf("expected select(true, 111) to be 238 but got %d", pos)
+ }
+
+ if _, ok := rsa.Select(true, 239); ok {
+ t.Fatalf("expected select(true, 239) to be not ok but somehow returned a value")
+ }
+}
+
+func newTestRSAFromWords(sizeInBits int, wordsToCopy ...uint64) rsaBitVector {
+ bv := newBitVector(sizeInBits)
+ for i := 0; i < sizeInBits; i++ {
+ w := wordsToCopy[i/64]
+ mask := uint64(1) << uint64(63-i%64)
+ bit := w&mask != 0
+ bv.setBit(i, bit)
+ }
+ return newRSABitVectorFromBitVector(bv)
+}
diff --git a/search/bwt/wavelet.go b/search/bwt/wavelet.go
new file mode 100644
index 000000000..7978fc8e7
--- /dev/null
+++ b/search/bwt/wavelet.go
@@ -0,0 +1,438 @@
+package bwt
+
+import (
+ "errors"
+ "fmt"
+ "math"
+
+ "golang.org/x/exp/slices"
+)
+
+/*
+
+For the waveletTree's usage, please read its
+method documentation. To understand what it is and how
+it works for either curiosity or maintenance, then read below.
+
+# WaveletTree
+
+The Wavelet Tree allows us to conduct RSA queries on strings in
+a memory and run time efficient manner.
+RSA stands for (R)ank, (S)elect, (A)ccess.
+
+See this blog post by Alex Bowe for an additional explanation:
+https://www.alexbowe.com/wavelet-trees/
+
+## The Character's Path Encoding
+
+Each character from a sequence's alphabet will be assigned a path.
+This path encoding represents a path from the Wavelet Tree's root to some
+leaf node that represents a character.
+For example, given the alphabet A B C D E F G H, a possible encoding is:
+
+A: 000
+B: 001
+C: 010
+D: 011
+E: 100
+F: 101
+G: 110
+H: 111
+
+If we wanted to get to the leaf that represents the character D, we'd have
+to use D's path encoding to traverse the tree.
+Consider 0 as the left and 1 as the right.
+If we follow D's encoding, 011, then we'd take a path that looks like:
+
+ root
+ /
+left
+ \
+ right
+ \
+ right
+
+## The Data Represented at each node
+
+Let us consider the sequence "bananas"
+It has the alphabet b, a, n, s
+Let's say it has the encoding:
+a: 00
+n: 01
+b: 10
+s: 11
+and that 0 is left and 1 is right
+We can represent this tree with bitvectors:
+
+ 0010101
+ bananas
+ / \
+ 1000 001
+ baaa nns
+ / \ / \
+a n b s
+
+If we translate each bit vector to its corresponding string, then it becomes:
+
+ bananas
+ / \
+ baaa nns
+ / \ / \
+a b n s
+
+Each node of the tree consists of a bitvector whose values indicate whether
+the character at a particular index is in the left (0) or right (1) child of the
+tree.
+
+## RSA
+
+At this point, we can talk about RSA. RSA stands for (R)ank, (S)elect, (A)ccess.
+
+### Rank Example
+
+WaveletTree.Rank(c, n) returns the rank of character c at index n in a sequence, i.e. how many
+times c has occurred in a sequence before index n.
+
+To get WaveletTree.Rank(a, 4) of bananas where a's encoding is 00
+1. root.Rank(0, 4) of 0010101 is 3
+2. Visit Left Child
+3. child.Rank(0, 3) of 1000 is 2
+4. Visit Left Child
+5. We are at a leaf node, so return our last recorded rank: 2
+
+### Select Example
+
+To get WaveletTree.Select(n, 1) of bananas where n's encoding is 01
+1. Go down to n's leaf using the path encoding is 01
+2. Go back to n's leaf's parent
+3. parent.Select(0, 1) of 001 is 0
+4. Go to the next parent
+5. parent.Select(1, 0) of 0010101 is 2
+6. return 2 since we are at the root.
+
+### Access Example
+
+Take the tree we constructed earlier to represent the sequence "bananas".
+
+ 0010101
+ / \
+ 1000 001
+ / \ / \
+a n b s
+
+To access the 4th character of the sequence, we would call WaveletTree.Access(3),
+which performs the following operations:
+
+1. root[3] is 0 and root.Rank(0, 3) is 2
+2. Since root[3] is 0, visit left child
+3. child[2] is 0 and child.Rank(0, 2) is 1
+4. Since child[2] is 0, visit left child
+5. Left child is a leaf, so we've found our value (a)!
+
+NOTE: The waveletTree does not literally have to be a tree. There are other forms that it may
+exist in like the concatenation of order level representation of all its node's bitvectors...
+as one example. Please reference the implementation if you'd like to understand how this
+specific waveletTree works.
+
+*/
+
+// waveletTree is a data structure that allows us to index a sequence
+// in a memory efficient way that allows us to conduct RSA, (R)ank (S)elect (A)ccess
+// queries on strings. This is very useful in situations where you'd like to understand
+// certain aspects of a sequence like:
+// * the number of times a character appears
+// * counting how the frequency of a character up to certain offset
+// * locating characters of certain rank within the sequence
+// * accessing the character at a given position
+type waveletTree struct {
+ root *node
+ alpha []charInfo
+ length int
+}
+
+// Access will return the ith character of the original
+// string used to build the waveletTree
+func (wt waveletTree) Access(i int) byte {
+ if wt.root.isLeaf() {
+ return *wt.root.char
+ }
+
+ curr := wt.root
+ for !curr.isLeaf() {
+ bit := curr.data.Access(i)
+ i = curr.data.Rank(bit, i)
+ if bit {
+ curr = curr.right
+ } else {
+ curr = curr.left
+ }
+ }
+ return *curr.char
+}
+
+// Rank allows us to get the rank of a specified character in
+// the original string
+func (wt waveletTree) Rank(char byte, i int) int {
+ if wt.root.isLeaf() {
+ return wt.root.data.Rank(true, i)
+ }
+
+ curr := wt.root
+ ci, ok := wt.lookupCharInfo(char)
+ if !ok {
+ return 0
+ }
+ level := 0
+ var rank int
+ for !curr.isLeaf() {
+ pathBit := ci.path.getBit(ci.path.len() - 1 - level)
+ rank = curr.data.Rank(pathBit, i)
+ if pathBit {
+ curr = curr.right
+ } else {
+ curr = curr.left
+ }
+ level++
+ i = rank
+ }
+ return rank
+}
+
+// Select allows us to get the corresponding position of a character
+// in the original string given its rank.
+func (wt waveletTree) Select(char byte, rank int) int {
+ if wt.root.isLeaf() {
+ s, ok := wt.root.data.Select(true, rank)
+ if !ok {
+ msg := fmt.Sprintf("could not find a corresponding bit for node.Select(true, %d) root as leaf node", rank)
+ panic(msg)
+ }
+ return s
+ }
+
+ curr := wt.root
+ ci, ok := wt.lookupCharInfo(char)
+ if !ok {
+ return 0
+ }
+ level := 0
+
+ for !curr.isLeaf() {
+ pathBit := ci.path.getBit(ci.path.len() - 1 - level)
+ if pathBit {
+ curr = curr.right
+ } else {
+ curr = curr.left
+ }
+ level++
+ }
+
+ for curr.parent != nil {
+ curr = curr.parent
+ level--
+ pathBit := ci.path.getBit(ci.path.len() - 1 - level)
+ nextRank, ok := curr.data.Select(pathBit, rank)
+ if !ok {
+ msg := fmt.Sprintf("could not find a corresponding bit for node.Select(%t, %d) for characterInfo %+v", pathBit, rank, ci)
+ panic(msg)
+ }
+ rank = nextRank
+ }
+
+ return rank
+}
+
+func (wt waveletTree) reconstruct() string {
+ str := ""
+ for i := 0; i < wt.length; i++ {
+ str += string(wt.Access(i))
+ }
+ return str
+}
+
+func (wt waveletTree) lookupCharInfo(char byte) (charInfo, bool) {
+ for i := range wt.alpha {
+ if wt.alpha[i].char == char {
+ return wt.alpha[i], true
+ }
+ }
+ return charInfo{}, false
+}
+
+type node struct {
+ data rsaBitVector
+ char *byte
+ parent *node
+ left *node
+ right *node
+}
+
+func (n node) isLeaf() bool {
+ return n.char != nil
+}
+
+type charInfo struct {
+ char byte
+ maxRank int
+ path bitvector
+}
+
+func newWaveletTreeFromString(str string) (waveletTree, error) {
+ err := validateWaveletTreeBuildInput(&str)
+ if err != nil {
+ return waveletTree{}, err
+ }
+
+ bytes := []byte(str)
+
+ alpha := getCharInfoDescByRank(bytes)
+ root := buildWaveletTree(0, alpha, bytes)
+
+ // Handle the case where the provided sequence only has an alphabet
+ // of size 1
+ if root.isLeaf() {
+ bv := newBitVector(len(bytes))
+ for i := 0; i < bv.len(); i++ {
+ bv.setBit(i, true)
+ }
+ root.data = newRSABitVectorFromBitVector(bv)
+ }
+
+ return waveletTree{
+ root: root,
+ alpha: alpha,
+ length: len(str),
+ }, nil
+}
+
+func buildWaveletTree(currentLevel int, alpha []charInfo, bytes []byte) *node {
+ if len(alpha) == 0 {
+ return nil
+ }
+
+ if len(alpha) == 1 {
+ return &node{char: &alpha[0].char}
+ }
+
+ leftAlpha, rightAlpha := partitionAlpha(currentLevel, alpha)
+
+ var leftBytes []byte
+ var rightBytes []byte
+
+ bv := newBitVector(len(bytes))
+ for i := range bytes {
+ if isInAlpha(rightAlpha, bytes[i]) {
+ bv.setBit(i, true)
+ rightBytes = append(rightBytes, bytes[i])
+ } else {
+ leftBytes = append(leftBytes, bytes[i])
+ }
+ }
+
+ root := &node{
+ data: newRSABitVectorFromBitVector(bv),
+ }
+
+ leftTree := buildWaveletTree(currentLevel+1, leftAlpha, leftBytes)
+ rightTree := buildWaveletTree(currentLevel+1, rightAlpha, rightBytes)
+
+ root.left = leftTree
+ root.right = rightTree
+
+ if leftTree != nil {
+ leftTree.parent = root
+ }
+ if rightTree != nil {
+ rightTree.parent = root
+ }
+
+ return root
+}
+
+func isInAlpha(alpha []charInfo, b byte) bool {
+ for _, a := range alpha {
+ if a.char == b {
+ return true
+ }
+ }
+ return false
+}
+
+// partitionAlpha partitions the alphabet in half based on whether its corresponding path bit
+// is a 0 or 1. 0 will comprise the left tree while 1 will comprise the right. The alphabet
+// should be sorted in such a way that we remove the most amount of characters nearest to the
+// root of the tree to reduce the memory footprint as much as possible.
+func partitionAlpha(currentLevel int, alpha []charInfo) (left []charInfo, right []charInfo) {
+ for _, a := range alpha {
+ if a.path.getBit(a.path.len() - 1 - currentLevel) {
+ right = append(right, a)
+ } else {
+ left = append(left, a)
+ }
+ }
+
+ return left, right
+}
+
+// getCharInfoDescByRank takes in the bytes of the original
+// string and return a sorted list of character metadata descending
+// by rank. The character metadata is important for building the rest
+// of the tree along with querying it later on. The sorting is important
+// because this allows us to build the tree in the most memory efficient
+// way since the characters with the greatest counts will be removed first
+// before build the subsequent nodes in the lower levels.
+// NOTE: alphabets are expected to be small for real usecases
+func getCharInfoDescByRank(b []byte) []charInfo {
+ ranks := make(map[byte]int)
+ for i := 0; i < len(b); i++ {
+ if _, ok := ranks[b[i]]; ok {
+ ranks[b[i]] += 1
+ } else {
+ ranks[b[i]] = 0
+ }
+ }
+
+ var sortedInfo []charInfo
+ for k := range ranks {
+ sortedInfo = append(sortedInfo, charInfo{char: k, maxRank: ranks[k]})
+ }
+
+ slices.SortFunc(sortedInfo, func(a, b charInfo) bool {
+ if a.maxRank == b.maxRank {
+ return a.char < b.char
+ }
+ return a.maxRank > b.maxRank
+ })
+
+ numOfBits := getTreeHeight(sortedInfo)
+ for i := range sortedInfo {
+ bv := newBitVector(numOfBits)
+ encodeCharPathIntoBitVector(bv, uint64(i))
+ sortedInfo[i].path = bv
+ }
+
+ return sortedInfo
+}
+
+func encodeCharPathIntoBitVector(bv bitvector, n uint64) {
+ shift := 0
+ for n>>shift > 0 {
+ if n>>shift%2 == 1 {
+ bv.setBit(bv.len()-1-shift, true)
+ } else {
+ bv.setBit(bv.len()-1-shift, false)
+ }
+ shift++
+ }
+}
+
+func getTreeHeight(alpha []charInfo) int {
+ return int(math.Log2(float64(len(alpha)))) + 1
+}
+
+func validateWaveletTreeBuildInput(sequence *string) error {
+ if len(*sequence) == 0 {
+ return errors.New("Sequence can not be empty")
+ }
+ return nil
+}
diff --git a/search/bwt/wavelet_test.go b/search/bwt/wavelet_test.go
new file mode 100644
index 000000000..f9471f079
--- /dev/null
+++ b/search/bwt/wavelet_test.go
@@ -0,0 +1,276 @@
+package bwt
+
+import (
+ "strings"
+ "testing"
+)
+
+type WaveletTreeAccessTestCase struct {
+ pos int
+ expected string
+}
+
+func TestWaveletTree_Access(t *testing.T) {
+ testStr := "AAAACCCCTTTTGGGG" + "ACTG" + "TGCA" + "TTAA" + "CCGG" + "GGGGTTTTCCCCAAAA"
+ wt, err := newWaveletTreeFromString(testStr)
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ testCases := []WaveletTreeAccessTestCase{
+ {0, "A"},
+ {3, "A"},
+ {4, "C"},
+ {7, "C"},
+ {8, "T"},
+ {9, "T"},
+ {11, "T"},
+ {12, "G"},
+ {13, "G"},
+ {15, "G"},
+
+ {16, "A"},
+ {17, "C"},
+ {18, "T"},
+ {19, "G"},
+
+ {20, "T"},
+ {21, "G"},
+ {22, "C"},
+ {23, "A"},
+
+ {24, "T"},
+ {25, "T"},
+ {26, "A"},
+ {27, "A"},
+
+ {28, "C"},
+ {29, "C"},
+ {30, "G"},
+ {31, "G"},
+
+ {32, "G"},
+ {35, "G"},
+ {36, "T"},
+ {39, "T"},
+ {40, "C"},
+ {41, "C"},
+ {43, "C"},
+ {44, "A"},
+ {46, "A"},
+ {47, "A"},
+ }
+
+ for _, tc := range testCases {
+ actual := string(wt.Access(tc.pos))
+ if actual != tc.expected {
+ t.Fatalf("expected access(%d) to be %s but got %s", tc.pos, tc.expected, actual)
+ }
+ }
+}
+
+type WaveletTreeRankTestCase struct {
+ char string
+ pos int
+ expected int
+}
+
+func TestWaveletTree_Rank_Genomic(t *testing.T) {
+ testStr := "AAAACCCCTTTTGGGG" + "ACTG" + "TGCA" + "TTAA" + "CCGG" + "GGGGTTTTCCCCAAAA"
+ wt, err := newWaveletTreeFromString(testStr)
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ testCases := []WaveletTreeRankTestCase{
+ {"A", 0, 0},
+ {"A", 2, 2},
+ {"A", 3, 3},
+ {"A", 8, 4},
+ {"C", 4, 0},
+ {"C", 6, 2},
+ {"C", 12, 4},
+ {"T", 2, 0},
+ {"T", 8, 0},
+ {"T", 12, 4},
+ {"T", 15, 4},
+ {"G", 15, 3},
+
+ {"A", 16, 4},
+ {"A", 17, 5},
+ {"G", 16, 4},
+
+ {"T", 20, 5},
+ {"A", 23, 5},
+
+ {"T", 24, 6},
+ {"T", 27, 8},
+
+ {"C", 28, 6},
+ {"G", 31, 7},
+
+ {"G", 32, 8},
+ {"G", 33, 9},
+ {"T", 36, 8},
+ {"T", 38, 10},
+ {"C", 40, 8},
+ {"C", 43, 11},
+ {"A", 44, 8},
+ {"A", 47, 11},
+ }
+
+ for _, tc := range testCases {
+ actual := wt.Rank(tc.char[0], tc.pos)
+ if actual != tc.expected {
+ t.Fatalf("expected rank(%s, %d) to be %d but got %d", tc.char, tc.pos, tc.expected, actual)
+ }
+ }
+}
+
+type WaveletTreeSelectTestCase struct {
+ char string
+ rank int
+ expected int
+}
+
+func TestWaveletTree_Select(t *testing.T) {
+ testStr := "AAAACCCCTTTTGGGG" + "ACTG" + "TGCA" + "TTAA" + "CCGG" + "GGGGTTTTCCCCAAAA"
+ wt, err := newWaveletTreeFromString(testStr)
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ testCases := []WaveletTreeSelectTestCase{
+ {"@", 0, 0},
+ {"A", 0, 0},
+ {"A", 1, 1},
+ {"A", 2, 2},
+ {"A", 3, 3},
+ {"C", 0, 4},
+ {"C", 3, 7},
+
+ {"A", 4, 16},
+ {"C", 4, 17},
+ {"T", 4, 18},
+ {"G", 4, 19},
+
+ {"@", 5, 0},
+ {"T", 5, 20},
+ {"G", 5, 21},
+ {"C", 5, 22},
+ {"A", 5, 23},
+
+ {"T", 6, 24},
+ {"T", 7, 25},
+ {"A", 6, 26},
+
+ {"C", 6, 28},
+ {"G", 6, 30},
+ {"G", 7, 31},
+
+ {"G", 8, 32},
+ {"A", 11, 47},
+
+ {"@", 200, 0},
+ }
+
+ for _, tc := range testCases {
+ actual := wt.Select(tc.char[0], tc.rank)
+ if actual != tc.expected {
+ t.Fatalf("expected select(%s, %d) to be %d but got %d", tc.char, tc.rank, tc.expected, actual)
+ }
+ }
+}
+
+// TestWaveletTree_Access_Reconstruction these tests are to ensure that the wavelet tree is formed correctly. If we can reconstruct the string, we can be
+// fairly confident that the WaveletTree is well formed.
+func TestWaveletTree_Access_Reconstruction(t *testing.T) {
+ // Build with a fair sized alphabet
+ enhancedQuickBrownFox := "the quick brown fox jumps over the lazy dog with an overt frown after fumbling its parallelogram shaped bananagram all around downtown"
+ enhancedQuickBrownFoxRepeated := strings.Join([]string{enhancedQuickBrownFox, enhancedQuickBrownFox, enhancedQuickBrownFox, enhancedQuickBrownFox, enhancedQuickBrownFox}, " ")
+ // Make it very large to account for any succinct data structures being used under the hood. For example, this helped uncover and errors
+ // diagnose issues with the Jacobson's Rank used under the hood.
+ enhancedQuickBrownFoxSuperLarge := ""
+ for i := 0; i < 100; i++ {
+ enhancedQuickBrownFoxSuperLarge += enhancedQuickBrownFoxRepeated
+ }
+
+ testCases := []string{
+ "the quick brown fox jumped over the lazy dog",
+ "the quick brown fox jumped over the lazy dog!", // odd numbered alphabet
+ enhancedQuickBrownFox,
+ enhancedQuickBrownFoxRepeated,
+ enhancedQuickBrownFoxSuperLarge,
+ }
+
+ for _, str := range testCases {
+ wt, err := newWaveletTreeFromString(str)
+ if err != nil {
+ t.Fatal(err)
+ }
+ actual := wt.reconstruct()
+ if actual != str {
+ t.Fatalf("expected to rebuild:\n%s\nbut instead got:\n%s", str, actual)
+ }
+ }
+}
+
+func TestWaveletTreeEmptyStr(t *testing.T) {
+ str := ""
+ _, err := newWaveletTreeFromString(str)
+ if err == nil {
+ t.Fatal("expected error but got nil")
+ }
+}
+
+func TestWaveletTreeSingleChar(t *testing.T) {
+ char := "l"
+ wt, err := newWaveletTreeFromString(char)
+ if err != nil {
+ t.Fatal(err)
+ }
+ r := wt.Rank(char[0], 1)
+ s := wt.Select(char[0], 0)
+ a := wt.Access(0)
+
+ if r != 1 {
+ t.Fatalf("expected Rank(%s, %d) to be %d but got %d", char, 1, 1, r)
+ }
+ if s != 0 {
+ t.Fatalf("expected Select(%s, %d) to be %d but got %d", char, 0, 0, s)
+ }
+ if a != char[0] {
+ t.Fatalf("expected Access(%d) to be %d but got %d", 1, 1, s)
+ }
+}
+
+func TestWaveletTreeSingleAlpha(t *testing.T) {
+ str := "lll"
+ wt, err := newWaveletTreeFromString(str)
+ if err != nil {
+ t.Fatal(err)
+ }
+ r := wt.Rank(str[0], 1)
+ s := wt.Select(str[0], 1)
+ a := wt.Access(0)
+
+ if r != 1 {
+ t.Fatalf("expected Rank(%s, %d) to be %d but got %d", str, 1, 1, r)
+ }
+ if s != 1 {
+ t.Fatalf("expected Select(%s, %d) to be %d but got %d", str, 1, 1, s)
+ }
+ if a != str[0] {
+ t.Fatalf("expected Access(%d) to be %d but got %d", 1, 1, s)
+ }
+}
+func TestBuildWaveletTree_ZeroAlpha(t *testing.T) {
+ bytes := []byte("AAAACCCCTTTTGGGG")
+ alpha := []charInfo{}
+
+ root := buildWaveletTree(0, alpha, bytes)
+
+ if root != nil {
+ t.Fatalf("expected root to be nil but got %v", root)
+ }
+}
diff --git a/mash/example_test.go b/search/mash/example_test.go
similarity index 91%
rename from mash/example_test.go
rename to search/mash/example_test.go
index 76a5db618..e307054cf 100644
--- a/mash/example_test.go
+++ b/search/mash/example_test.go
@@ -3,7 +3,7 @@ package mash_test
import (
"fmt"
- "github.com/bebop/poly/mash"
+ "github.com/bebop/poly/search/mash"
)
func ExampleMash() {
diff --git a/mash/mash.go b/search/mash/mash.go
similarity index 100%
rename from mash/mash.go
rename to search/mash/mash.go
diff --git a/mash/mash_test.go b/search/mash/mash_test.go
similarity index 98%
rename from mash/mash_test.go
rename to search/mash/mash_test.go
index f22f4715a..b5810a9d4 100644
--- a/mash/mash_test.go
+++ b/search/mash/mash_test.go
@@ -3,7 +3,7 @@ package mash_test
import (
"testing"
- "github.com/bebop/poly/mash"
+ "github.com/bebop/poly/search/mash"
)
func TestMash(t *testing.T) {
diff --git a/search/search.go b/search/search.go
new file mode 100644
index 000000000..7388d1324
--- /dev/null
+++ b/search/search.go
@@ -0,0 +1,8 @@
+/*
+Package search provides utilities for searching sequence data.
+*/
+package search
+
+/*
+This package is supposed to be empty and only exists to provide a doc string.
+*/
diff --git a/seqhash/seqhash_test.go b/seqhash/seqhash_test.go
index 1abd279d7..41989d6e9 100644
--- a/seqhash/seqhash_test.go
+++ b/seqhash/seqhash_test.go
@@ -35,33 +35,33 @@ func TestHash(t *testing.T) {
// Test circular double stranded hashing
seqhash, _ := Hash("TTAGCCCAT", "DNA", true, true)
if seqhash != "v1_DCD_a376845b679740014f3eb501429b45e592ecc32a6ba8ba922cbe99217f6e9287" {
- t.Errorf("Circular double stranded hashing failed. Expected v1_DCD_a376845b679740014f3eb501429b45e592ecc32a6ba8ba922cbe99217f6e9287, got: " + seqhash)
+ t.Errorf("Circular double stranded hashing failed. Expected v1_DCD_a376845b679740014f3eb501429b45e592ecc32a6ba8ba922cbe99217f6e9287, got: %s", seqhash)
}
// Test circular single stranded hashing
seqhash, _ = Hash("TTAGCCCAT", "DNA", true, false)
if seqhash != "v1_DCS_ef79b6e62394e22a176942dfc6a5e62eeef7b5281ffcb2686ecde208ec836ba4" {
- t.Errorf("Circular single stranded hashing failed. Expected v1_DCS_ef79b6e62394e22a176942dfc6a5e62eeef7b5281ffcb2686ecde208ec836ba4, got: " + seqhash)
+ t.Errorf("Circular single stranded hashing failed. Expected v1_DCS_ef79b6e62394e22a176942dfc6a5e62eeef7b5281ffcb2686ecde208ec836ba4, got: %s", seqhash)
}
// Test linear double stranded hashing
seqhash, _ = Hash("TTAGCCCAT", "DNA", false, true)
if seqhash != "v1_DLD_c2c9fc44df72035082a152e94b04492182331bc3be2f62729d203e072211bdbf" {
- t.Errorf("Linear double stranded hashing failed. Expected v1_DLD_c2c9fc44df72035082a152e94b04492182331bc3be2f62729d203e072211bdbf, got: " + seqhash)
+ t.Errorf("Linear double stranded hashing failed. Expected v1_DLD_c2c9fc44df72035082a152e94b04492182331bc3be2f62729d203e072211bdbf, got: %s", seqhash)
}
// Test linear single stranded hashing
seqhash, _ = Hash("TTAGCCCAT", "DNA", false, false)
if seqhash != "v1_DLS_063ea37d1154351639f9a48546bdae62fd8a3c18f3d3d3061060c9a55352d967" {
- t.Errorf("Linear single stranded hashing failed. Expected v1_DLS_063ea37d1154351639f9a48546bdae62fd8a3c18f3d3d3061060c9a55352d967, got: " + seqhash)
+ t.Errorf("Linear single stranded hashing failed. Expected v1_DLS_063ea37d1154351639f9a48546bdae62fd8a3c18f3d3d3061060c9a55352d967, got: %s", seqhash)
}
// Test RNA Seqhash
seqhash, _ = Hash("TTAGCCCAT", "RNA", false, false)
if seqhash != "v1_RLS_063ea37d1154351639f9a48546bdae62fd8a3c18f3d3d3061060c9a55352d967" {
- t.Errorf("Linear single stranded hashing failed. Expected v1_RLS_063ea37d1154351639f9a48546bdae62fd8a3c18f3d3d3061060c9a55352d967, got: " + seqhash)
+ t.Errorf("Linear single stranded hashing failed. Expected v1_RLS_063ea37d1154351639f9a48546bdae62fd8a3c18f3d3d3061060c9a55352d967, got: %s", seqhash)
}
// Test Protein Seqhash
seqhash, _ = Hash("MGC*", "PROTEIN", false, false)
if seqhash != "v1_PLS_922ec11f5227ce77a42f07f565a7a1a479772b5cf3f1f6e93afc5ecbc0fd5955" {
- t.Errorf("Linear single stranded hashing failed. Expected v1_PLS_922ec11f5227ce77a42f07f565a7a1a479772b5cf3f1f6e93afc5ecbc0fd5955, got: " + seqhash)
+ t.Errorf(`Linear single stranded hashing failed. Expected v1_PLS_922ec11f5227ce77a42f07f565a7a1a479772b5cf3f1f6e93afc5ecbc0fd5955, got: %s`, seqhash)
}
}
diff --git a/synthesis/codon/codon.go b/synthesis/codon/codon.go
index 75ae10f16..17550f473 100644
--- a/synthesis/codon/codon.go
+++ b/synthesis/codon/codon.go
@@ -117,19 +117,46 @@ type TranslationTable struct {
}
// Copy returns a deep copy of the translation table. This is to prevent an unintended update of data used in another
-// process, since the tables are generated at build time.
-func (table *TranslationTable) Copy() *TranslationTable {
- return &TranslationTable{
- StartCodons: table.StartCodons,
- StopCodons: table.StopCodons,
- AminoAcids: table.AminoAcids,
+// process.
+func (table *TranslationTable) Copy() (*TranslationTable, error) {
+ newTranslationMap := map[string]string{}
+ newStartCodonTable := map[string]string{}
- StartCodonTable: table.StartCodonTable,
- TranslationMap: table.TranslationMap,
- Choosers: table.Choosers,
+ for k, v := range table.TranslationMap {
+ newTranslationMap[k] = v
+ }
- Stats: table.Stats,
+ for k, v := range table.StartCodonTable {
+ newStartCodonTable[k] = v
}
+
+ newAAs := []AminoAcid{}
+ for _, v := range table.AminoAcids {
+ newAAs = append(newAAs, AminoAcid{
+ Letter: "",
+ Codons: append([]Codon{}, v.Codons...),
+ })
+ }
+
+ newChoosers, err := newAminoAcidChoosers(newAAs)
+ if err != nil {
+ return nil, err
+ }
+
+ return &TranslationTable{
+ StartCodons: append([]string{}, table.StartCodons...),
+ StopCodons: append([]string{}, table.StopCodons...),
+ AminoAcids: append([]AminoAcid{}, table.AminoAcids...),
+
+ TranslationMap: newTranslationMap,
+ StartCodonTable: newStartCodonTable,
+ Choosers: newChoosers,
+
+ Stats: &Stats{
+ StartCodonCount: table.Stats.StartCodonCount,
+ GeneCount: table.Stats.GeneCount,
+ },
+ }, nil
}
// GetWeightedAminoAcids returns the amino acids along with their associated codon weights
@@ -385,7 +412,7 @@ Tim
******************************************************************************/
// Function to generate default codon tables from NCBI https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi
-func generateCodonTable(aminoAcids, starts string) *TranslationTable {
+func generateCodonTable(aminoAcids, starts string) (*TranslationTable, error) {
base1 := "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG"
base2 := "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG"
base3 := "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"
@@ -432,7 +459,7 @@ func generateCodonTable(aminoAcids, starts string) *TranslationTable {
// This function is run at buildtime and failure here means we have an invalid codon table.
chooser, err := newAminoAcidChoosers(aminoAcidSlice)
if err != nil {
- panic(fmt.Errorf("tried to generate an invalid codon table %w", err))
+ return nil, fmt.Errorf("tried to generate an invalid codon table %w", err)
}
return &TranslationTable{
@@ -443,41 +470,42 @@ func generateCodonTable(aminoAcids, starts string) *TranslationTable {
StartCodonTable: startCodonsMap,
Choosers: chooser,
Stats: NewStats(),
- }
+ }, nil
}
// NewTranslationTable takes the index of desired NCBI codon table and returns it.
-func NewTranslationTable(index int) *TranslationTable {
- return translationTablesByNumber[index].Copy()
+func NewTranslationTable(index int) (*TranslationTable, error) {
+ return generateCodonTable(translationTablesByNumber[index][0], translationTablesByNumber[index][1])
}
-// translationTablesByNumber stores all codon tables published by NCBI https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi using numbered indices.
-var translationTablesByNumber = map[int]*TranslationTable{
- 1: generateCodonTable("FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "---M------**--*----M---------------M----------------------------"),
- 2: generateCodonTable("FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG", "----------**--------------------MMMM----------**---M------------"),
- 3: generateCodonTable("FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "----------**----------------------MM---------------M------------"),
- 4: generateCodonTable("FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "--MM------**-------M------------MMMM---------------M------------"),
- 5: generateCodonTable("FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSSSVVVVAAAADDEEGGGG", "---M------**--------------------MMMM---------------M------------"),
- 6: generateCodonTable("FFLLSSSSYYQQCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "--------------*--------------------M----------------------------"),
- 9: generateCodonTable("FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG", "----------**-----------------------M---------------M------------"),
- 10: generateCodonTable("FFLLSSSSYY**CCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "----------**-----------------------M----------------------------"),
- 11: generateCodonTable("FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "---M------**--*----M------------MMMM---------------M------------"),
- 12: generateCodonTable("FFLLSSSSYY**CC*WLLLSPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "----------**--*----M---------------M----------------------------"),
- 13: generateCodonTable("FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSGGVVVVAAAADDEEGGGG", "---M------**----------------------MM---------------M------------"),
- 14: generateCodonTable("FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG", "-----------*-----------------------M----------------------------"),
- 16: generateCodonTable("FFLLSSSSYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "----------*---*--------------------M----------------------------"),
- 21: generateCodonTable("FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNNKSSSSVVVVAAAADDEEGGGG", "----------**-----------------------M---------------M------------"),
- 22: generateCodonTable("FFLLSS*SYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "------*---*---*--------------------M----------------------------"),
- 23: generateCodonTable("FF*LSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "--*-------**--*-----------------M--M---------------M------------"),
- 24: generateCodonTable("FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSSKVVVVAAAADDEEGGGG", "---M------**-------M---------------M---------------M------------"),
- 25: generateCodonTable("FFLLSSSSYY**CCGWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "---M------**-----------------------M---------------M------------"),
- 26: generateCodonTable("FFLLSSSSYY**CC*WLLLAPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "----------**--*----M---------------M----------------------------"),
- 27: generateCodonTable("FFLLSSSSYYQQCCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "--------------*--------------------M----------------------------"),
- 28: generateCodonTable("FFLLSSSSYYQQCCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "----------**--*--------------------M----------------------------"),
- 29: generateCodonTable("FFLLSSSSYYYYCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "--------------*--------------------M----------------------------"),
- 30: generateCodonTable("FFLLSSSSYYEECC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "--------------*--------------------M----------------------------"),
- 31: generateCodonTable("FFLLSSSSYYEECCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "----------**-----------------------M----------------------------"),
- 33: generateCodonTable("FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSSKVVVVAAAADDEEGGGG", "---M-------*-------M---------------M---------------M------------")}
+// translationTablesByNumber stores all data necessary to generate codon tables from sequences published by NCBI https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi using numbered indices.
+var translationTablesByNumber = map[int][]string{
+ 1: {"FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "---M------**--*----M---------------M----------------------------"},
+ 2: {"FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG", "----------**--------------------MMMM----------**---M------------"},
+ 3: {"FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "----------**----------------------MM---------------M------------"},
+ 4: {"FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "--MM------**-------M------------MMMM---------------M------------"},
+ 5: {"FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSSSVVVVAAAADDEEGGGG", "---M------**--------------------MMMM---------------M------------"},
+ 6: {"FFLLSSSSYYQQCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "--------------*--------------------M----------------------------"},
+ 9: {"FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG", "----------**-----------------------M---------------M------------"},
+ 10: {"FFLLSSSSYY**CCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "----------**-----------------------M----------------------------"},
+ 11: {"FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "---M------**--*----M------------MMMM---------------M------------"},
+ 12: {"FFLLSSSSYY**CC*WLLLSPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "----------**--*----M---------------M----------------------------"},
+ 13: {"FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSGGVVVVAAAADDEEGGGG", "---M------**----------------------MM---------------M------------"},
+ 14: {"FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG", "-----------*-----------------------M----------------------------"},
+ 16: {"FFLLSSSSYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "----------*---*--------------------M----------------------------"},
+ 21: {"FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNNKSSSSVVVVAAAADDEEGGGG", "----------**-----------------------M---------------M------------"},
+ 22: {"FFLLSS*SYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "------*---*---*--------------------M----------------------------"},
+ 23: {"FF*LSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "--*-------**--*-----------------M--M---------------M------------"},
+ 24: {"FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSSKVVVVAAAADDEEGGGG", "---M------**-------M---------------M---------------M------------"},
+ 25: {"FFLLSSSSYY**CCGWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "---M------**-----------------------M---------------M------------"},
+ 26: {"FFLLSSSSYY**CC*WLLLAPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "----------**--*----M---------------M----------------------------"},
+ 27: {"FFLLSSSSYYQQCCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "--------------*--------------------M----------------------------"},
+ 28: {"FFLLSSSSYYQQCCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "----------**--*--------------------M----------------------------"},
+ 29: {"FFLLSSSSYYYYCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "--------------*--------------------M----------------------------"},
+ 30: {"FFLLSSSSYYEECC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "--------------*--------------------M----------------------------"},
+ 31: {"FFLLSSSSYYEECCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "----------**-----------------------M----------------------------"},
+ 33: {"FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSSKVVVVAAAADDEEGGGG", "---M-------*-------M---------------M---------------M------------"},
+}
/******************************************************************************
Nov, 20, 2020
@@ -591,7 +619,10 @@ func CompromiseCodonTable(firstCodonTable, secondCodonTable *TranslationTable, c
//
// this take start and stop strings from first table
// and use them as start + stops in final codonTable
- mergedTable := firstCodonTable.Copy()
+ mergedTable, err := firstCodonTable.Copy()
+ if err != nil {
+ return nil, err
+ }
// Check if cutOff is too high or low (this is converted to a percent)
if cutOff < 0 {
@@ -662,7 +693,7 @@ func CompromiseCodonTable(firstCodonTable, secondCodonTable *TranslationTable, c
finalAminoAcids = append(finalAminoAcids, AminoAcid{firstAa.Letter, finalCodons})
}
- err := mergedTable.UpdateWeights(finalAminoAcids)
+ err = mergedTable.UpdateWeights(finalAminoAcids)
if err != nil {
return nil, err
}
@@ -689,9 +720,12 @@ func AddCodonTable(firstCodonTable, secondCodonTable *TranslationTable) (*Transl
finalAminoAcids = append(finalAminoAcids, AminoAcid{firstAa.Letter, finalCodons})
}
- mergedTable := firstCodonTable.Copy()
+ mergedTable, err := firstCodonTable.Copy()
+ if err != nil {
+ return nil, err
+ }
- err := mergedTable.UpdateWeights(finalAminoAcids)
+ err = mergedTable.UpdateWeights(finalAminoAcids)
if err != nil {
return nil, err
}
diff --git a/synthesis/codon/codon_test.go b/synthesis/codon/codon_test.go
index 8d34e7cd6..57663a047 100644
--- a/synthesis/codon/codon_test.go
+++ b/synthesis/codon/codon_test.go
@@ -8,6 +8,7 @@ import (
"github.com/bebop/poly/io/genbank"
"github.com/google/go-cmp/cmp"
+ "github.com/google/go-cmp/cmp/cmpopts"
weightedRand "github.com/mroth/weightedrand"
"github.com/stretchr/testify/assert"
)
@@ -16,14 +17,23 @@ func TestTranslation(t *testing.T) {
gfpTranslation := "MASKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLVTTFSYGVQCFSRYPDHMKRHDFFKSAMPEGYVQERTISFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYNSHNVYITADKQKNGIKANFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYK*"
gfpDnaSequence := "ATGGCTAGCAAAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGTTGAATTAGATGGTGATGTTAATGGGCACAAATTTTCTGTCAGTGGAGAGGGTGAAGGTGATGCTACATACGGAAAGCTTACCCTTAAATTTATTTGCACTACTGGAAAACTACCTGTTCCATGGCCAACACTTGTCACTACTTTCTCTTATGGTGTTCAATGCTTTTCCCGTTATCCGGATCATATGAAACGGCATGACTTTTTCAAGAGTGCCATGCCCGAAGGTTATGTACAGGAACGCACTATATCTTTCAAAGATGACGGGAACTACAAGACGCGTGCTGAAGTCAAGTTTGAAGGTGATACCCTTGTTAATCGTATCGAGTTAAAAGGTATTGATTTTAAAGAAGATGGAAACATTCTCGGACACAAACTCGAGTACAACTATAACTCACACAATGTATACATCACGGCAGACAAACAAAAGAATGGAATCAAAGCTAACTTCAAAATTCGCCACAACATTGAAGATGGATCCGTTCAACTAGCAGACCATTATCAACAAAATACTCCAATTGGCGATGGCCCTGTCCTTTTACCAGACAACCATTACCTGTCGACACAATCTGCCCTTTCGAAAGATCCCAACGAAAAGCGTGACCACATGGTCCTTCTTGAGTTTGTAACTGCTGCTGGGATTACACATGGCATGGATGAGCTCTACAAATAA"
- if got, _ := NewTranslationTable(11).Translate(gfpDnaSequence); got != gfpTranslation {
+ table, err := NewTranslationTable(11)
+ if err != nil {
+ t.Fatalf("failed to initialise codon table: %s", err)
+ }
+
+ if got, _ := table.Translate(gfpDnaSequence); got != gfpTranslation {
t.Errorf("TestTranslation has failed. Translate has returned %q, want %q", got, gfpTranslation)
}
}
func TestTranslationErrorsOnEmptyAminoAcidString(t *testing.T) {
- nonEmptyCodonTable := NewTranslationTable(1)
- _, err := nonEmptyCodonTable.Translate("")
+ nonEmptyCodonTable, err := NewTranslationTable(1)
+ if err != nil {
+ t.Fatalf("failed to initialise codon table: %s", err)
+ }
+
+ _, err = nonEmptyCodonTable.Translate("")
if err != errEmptySequenceString {
t.Error("Translation should return an error if given an empty sequence string")
@@ -33,7 +43,12 @@ func TestTranslationErrorsOnEmptyAminoAcidString(t *testing.T) {
func TestTranslationMixedCase(t *testing.T) {
gfpTranslation := "MASKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLVTTFSYGVQCFSRYPDHMKRHDFFKSAMPEGYVQERTISFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYNSHNVYITADKQKNGIKANFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYK*"
gfpDnaSequence := "atggctagcaaaggagaagaacttttcactggagttgtcccaaTTCTTGTTGAATTAGATGGTGATGTTAATGGGCACAAATTTTCTGTCAGTGGAGAGGGTGAAGGTGATGCTACATACGGAAAGCTTACCCTTAAATTTATTTGCACTACTGGAAAACTACCTGTTCCATGGCCAACACTTGTCACTACTTTCTCTTATGGTGTTCAATGCTTTTCCCGTTATCCGGATCATATGAAACGGCATGACTTTTTCAAGAGTGCCATGCCCGAAGGTTATGTACAGGAACGCACTATATCTTTCAAAGATGACGGGAACTACAAGACGCGTGCTGAAGTCAAGTTTGAAGGTGATACCCTTGTTAATCGTATCGAGTTAAAAGGTATTGATTTTAAAGAAGATGGAAACATTCTCGGACACAAACTCGAGTACAACTATAACTCACACAATGTATACATCACGGCAGACAAACAAAAGAATGGAATCAAAGCTAACTTCAAAATTCGCCACAACATTGAAGATGGATCCGTTCAACTAGCAGACCATTATCAACAAAATACTCCAATTGGCGATGGCCCTGTCCTTTTACCAGACAACCATTACCTGTCGACACAATCTGCCCTTTCGAAAGATCCCAACGAAAAGCGTGACCACATGGTCCTTCTTGAGTTTGTAACTGCTGCTGGGATTACACATGGCATGGATGAGCTCTACAAATAA"
- if got, _ := NewTranslationTable(11).Translate(gfpDnaSequence); got != gfpTranslation {
+ table, err := NewTranslationTable(11)
+ if err != nil {
+ t.Fatalf("failed to initialise codon table: %s", err)
+ }
+
+ if got, _ := table.Translate(gfpDnaSequence); got != gfpTranslation {
t.Errorf("TestTranslationMixedCase has failed. Translate has returned %q, want %q", got, gfpTranslation)
}
}
@@ -41,7 +56,13 @@ func TestTranslationMixedCase(t *testing.T) {
func TestTranslationLowerCase(t *testing.T) {
gfpTranslation := "MASKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLVTTFSYGVQCFSRYPDHMKRHDFFKSAMPEGYVQERTISFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYNSHNVYITADKQKNGIKANFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYK*"
gfpDnaSequence := "atggctagcaaaggagaagaacttttcactggagttgtcccaattcttgttgaattagatggtgatgttaatgggcacaaattttctgtcagtggagagggtgaaggtgatgctacatacggaaagcttacccttaaatttatttgcactactggaaaactacctgttccatggccaacacttgtcactactttctcttatggtgttcaatgcttttcccgttatccggatcatatgaaacggcatgactttttcaagagtgccatgcccgaaggttatgtacaggaacgcactatatctttcaaagatgacgggaactacaagacgcgtgctgaagtcaagtttgaaggtgatacccttgttaatcgtatcgagttaaaaggtattgattttaaagaagatggaaacattctcggacacaaactcgagtacaactataactcacacaatgtatacatcacggcagacaaacaaaagaatggaatcaaagctaacttcaaaattcgccacaacattgaagatggatccgttcaactagcagaccattatcaacaaaatactccaattggcgatggccctgtccttttaccagacaaccattacctgtcgacacaatctgccctttcgaaagatcccaacgaaaagcgtgaccacatggtccttcttgagtttgtaactgctgctgggattacacatggcatggatgagctctacaaataa"
- if got, _ := NewTranslationTable(11).Translate(gfpDnaSequence); got != gfpTranslation {
+
+ table, err := NewTranslationTable(11)
+ if err != nil {
+ t.Fatalf("failed to initialise codon table: %s", err)
+ }
+
+ if got, _ := table.Translate(gfpDnaSequence); got != gfpTranslation {
t.Errorf("TestTranslationLowerCase has failed. Translate has returned %q, want %q", got, gfpTranslation)
}
}
@@ -51,13 +72,20 @@ func TestOptimize(t *testing.T) {
sequence, _ := genbank.Read("../../data/puc19.gbk")
- table := NewTranslationTable(11)
- err := table.UpdateWeightsWithSequence(sequence)
+ table, err := NewTranslationTable(11)
+ if err != nil {
+ t.Fatalf("failed to initialise codon table: %s", err)
+ }
+
+ err = table.UpdateWeightsWithSequence(sequence)
if err != nil {
t.Error(err)
}
- codonTable := NewTranslationTable(11)
+ codonTable, err := NewTranslationTable(11)
+ if err != nil {
+ t.Fatalf("failed to initialise codon table: %s", err)
+ }
optimizedSequence, _ := table.Optimize(gfpTranslation)
optimizedSequenceTranslation, _ := codonTable.Translate(optimizedSequence)
@@ -70,8 +98,12 @@ func TestOptimize(t *testing.T) {
func TestOptimizeSameSeed(t *testing.T) {
var gfpTranslation = "MASKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLVTTFSYGVQCFSRYPDHMKRHDFFKSAMPEGYVQERTISFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYNSHNVYITADKQKNGIKANFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYK*"
var sequence, _ = genbank.Read("../../data/puc19.gbk")
- optimizationTable := NewTranslationTable(11)
- err := optimizationTable.UpdateWeightsWithSequence(sequence)
+ optimizationTable, err := NewTranslationTable(11)
+ if err != nil {
+ t.Fatalf("failed to initialise codon table: %s", err)
+ }
+
+ err = optimizationTable.UpdateWeightsWithSequence(sequence)
if err != nil {
t.Error(err)
}
@@ -92,8 +124,12 @@ func TestOptimizeSameSeed(t *testing.T) {
func TestOptimizeDifferentSeed(t *testing.T) {
var gfpTranslation = "MASKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLVTTFSYGVQCFSRYPDHMKRHDFFKSAMPEGYVQERTISFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYNSHNVYITADKQKNGIKANFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYK*"
var sequence, _ = genbank.Read("../../data/puc19.gbk")
- optimizationTable := NewTranslationTable(11)
- err := optimizationTable.UpdateWeightsWithSequence(sequence)
+ optimizationTable, err := NewTranslationTable(11)
+ if err != nil {
+ t.Fatalf("failed to initialise codon table: %s", err)
+ }
+
+ err = optimizationTable.UpdateWeightsWithSequence(sequence)
if err != nil {
t.Error(err)
}
@@ -107,8 +143,12 @@ func TestOptimizeDifferentSeed(t *testing.T) {
}
func TestOptimizeErrorsOnEmptyAminoAcidString(t *testing.T) {
- nonEmptyCodonTable := NewTranslationTable(1)
- _, err := nonEmptyCodonTable.Optimize("")
+ nonEmptyCodonTable, err := NewTranslationTable(1)
+ if err != nil {
+ t.Fatalf("failed to initialise codon table: %s", err)
+ }
+
+ _, err = nonEmptyCodonTable.Optimize("")
if err != errEmptyAminoAcidString {
t.Error("Optimize should return an error if given an empty amino acid string")
@@ -116,14 +156,22 @@ func TestOptimizeErrorsOnEmptyAminoAcidString(t *testing.T) {
}
func TestOptimizeErrorsOnInvalidAminoAcid(t *testing.T) {
aminoAcids := "TOP"
- table := NewTranslationTable(1) // does not contain 'O'
+ table, err := NewTranslationTable(1)
+ if err != nil {
+ t.Fatalf("failed to initialise codon table: %s", err)
+ }
+ // does not contain 'O'
_, optimizeErr := table.Optimize(aminoAcids)
assert.EqualError(t, optimizeErr, invalidAminoAcidError{'O'}.Error())
}
func TestGetCodonFrequency(t *testing.T) {
- translationTable := NewTranslationTable(11).TranslationMap
+ table, err := NewTranslationTable(11)
+ if err != nil {
+ t.Fatalf("failed to initialise codon table: %s", err)
+ }
+ translationTable := table.TranslationMap
var codons strings.Builder
@@ -197,14 +245,22 @@ func TestCompromiseCodonTable(t *testing.T) {
// weight our codon optimization table using the regions we collected from the genbank file above
- optimizationTable := NewTranslationTable(11)
- err := optimizationTable.UpdateWeightsWithSequence(sequence)
+ optimizationTable, err := NewTranslationTable(11)
+ if err != nil {
+ t.Fatalf("failed to initialise codon table: %s", err)
+ }
+
+ err = optimizationTable.UpdateWeightsWithSequence(sequence)
if err != nil {
t.Error(err)
}
sequence2, _ := genbank.Read("../../data/phix174.gb")
- optimizationTable2 := NewTranslationTable(11)
+ optimizationTable2, err := NewTranslationTable(11)
+ if err != nil {
+ t.Fatalf("failed to initialise codon table: %s", err)
+ }
+
err = optimizationTable2.UpdateWeightsWithSequence(sequence2)
if err != nil {
t.Error(err)
@@ -239,14 +295,22 @@ func TestAddCodonTable(t *testing.T) {
// weight our codon optimization table using the regions we collected from the genbank file above
- optimizationTable := NewTranslationTable(11)
- err := optimizationTable.UpdateWeightsWithSequence(sequence)
+ optimizationTable, err := NewTranslationTable(11)
+ if err != nil {
+ t.Fatalf("failed to initialise codon table: %s", err)
+ }
+
+ err = optimizationTable.UpdateWeightsWithSequence(sequence)
if err != nil {
t.Error(err)
}
sequence2, _ := genbank.Read("../../data/phix174.gb")
- optimizationTable2 := NewTranslationTable(11)
+ optimizationTable2, err := NewTranslationTable(11)
+ if err != nil {
+ t.Fatalf("failed to initialise codon table: %s", err)
+ }
+
err = optimizationTable2.UpdateWeightsWithSequence(sequence2)
if err != nil {
t.Error(err)
@@ -273,8 +337,12 @@ func TestCapitalizationRegression(t *testing.T) {
sequence, _ := genbank.Read("../../data/puc19.gbk")
- optimizationTable := NewTranslationTable(11)
- err := optimizationTable.UpdateWeightsWithSequence(sequence)
+ optimizationTable, err := NewTranslationTable(11)
+ if err != nil {
+ t.Fatalf("failed to initialise codon table: %s", err)
+ }
+
+ err = optimizationTable.UpdateWeightsWithSequence(sequence)
if err != nil {
t.Error(err)
}
@@ -350,8 +418,12 @@ func TestOptimizeSequence(t *testing.T) {
t.Run(tt.name, func(t *testing.T) {
t.Parallel()
- optimizationTable := NewTranslationTable(11)
- err := optimizationTable.UpdateWeightsWithSequence(tt.updateWeightsWith)
+ optimizationTable, err := NewTranslationTable(11)
+ if err != nil {
+ t.Fatalf("failed to initialise codon table: %s", err)
+ }
+
+ err = optimizationTable.UpdateWeightsWithSequence(tt.updateWeightsWith)
if !errors.Is(err, tt.wantUpdateWeightsErr) {
t.Errorf("got %v, want %v", err, tt.wantUpdateWeightsErr)
}
@@ -453,7 +525,8 @@ func TestUpdateWeights(t *testing.T) {
chooserFn func(choices ...weightedRand.Choice) (*weightedRand.Chooser, error)
- wantErr error
+ wantInitErr error
+ wantErr error
}{
{
name: "ok",
@@ -493,7 +566,8 @@ func TestUpdateWeights(t *testing.T) {
return nil, mockError
},
- wantErr: mockError,
+ wantInitErr: mockError,
+ wantErr: mockError,
},
}
@@ -506,12 +580,99 @@ func TestUpdateWeights(t *testing.T) {
newChooserFn = weightedRand.NewChooser
}()
- optimizationTable := NewTranslationTable(11)
+ optimizationTable, err := NewTranslationTable(11)
+ if !errors.Is(err, tt.wantInitErr) {
+ t.Fatalf("got %v, want %v", err, tt.wantInitErr)
+ return
+ }
+
+ if tt.wantInitErr != nil {
+ return
+ }
+
+ err = optimizationTable.UpdateWeights(tt.aminoAcids)
+ if !errors.Is(err, tt.wantErr) {
+ t.Errorf("got %v, want %v", err, tt.wantErr)
+ }
+ })
+ }
+}
+
+func TestCopy(t *testing.T) {
+ t.Parallel()
+
+ cmpOptions := []cmp.Option{
+ cmpopts.IgnoreUnexported(weightedRand.Chooser{}),
+ }
+
+ tests := []struct {
+ name string
+
+ wantErr error
+ }{
+ {
+ name: "ok",
+
+ wantErr: nil,
+ },
+ }
+
+ for _, tt := range tests {
+ var tt = tt
+ t.Run(tt.name, func(t *testing.T) {
+ t.Parallel()
- err := optimizationTable.UpdateWeights(tt.aminoAcids)
+ original, err := NewTranslationTable(11)
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ // perform a deep copy (changing the copy will not change the original)
+
+ deepCopy, err := original.Copy()
if !errors.Is(err, tt.wantErr) {
t.Errorf("got %v, want %v", err, tt.wantErr)
}
+
+ // modify fields
+
+ deepCopy.StartCodons[0] = "🍌"
+ deepCopy.StopCodons[0] = "🐗"
+ deepCopy.AminoAcids = []AminoAcid{}
+ deepCopy.Choosers = map[string]weightedRand.Chooser{}
+ deepCopy.Stats = &Stats{}
+ deepCopy.TranslationMap = map[string]string{}
+
+ // this compares pointers
+ if cmp.Equal(deepCopy, original, cmpOptions...) {
+ t.Errorf("deepCopy and original matched, we did not want them to %s", cmp.Diff(deepCopy, original, cmpOptions...))
+ }
+
+ // we compare the table's fields
+
+ if cmp.Equal(deepCopy.StartCodonTable, original.StartCodons) {
+ t.Errorf("deepCopy and original matched, we did not want them to %s", cmp.Diff(deepCopy.StartCodonTable, original.StartCodons))
+ }
+
+ if cmp.Equal(deepCopy.StopCodons, original.StopCodons) {
+ t.Errorf("deepCopy and original matched, we did not want them to %s", cmp.Diff(deepCopy.StopCodons, original.StopCodons))
+ }
+
+ if cmp.Equal(deepCopy.AminoAcids, original.AminoAcids) {
+ t.Errorf("deepCopy and original matched, we did not want them to %s", cmp.Diff(deepCopy.AminoAcids, original.AminoAcids))
+ }
+
+ if cmp.Equal(deepCopy.Choosers, original.Choosers) {
+ t.Errorf("deepCopy and original matched, we did not want them to %s", cmp.Diff(deepCopy.Choosers, original.Choosers))
+ }
+
+ if cmp.Equal(deepCopy.Stats, original.Stats) {
+ t.Errorf("deepCopy and original matched, we did not want them to %s", cmp.Diff(deepCopy.Stats, original.Stats))
+ }
+
+ if cmp.Equal(deepCopy.TranslationMap, original.TranslationMap) {
+ t.Errorf("deepCopy and original matched, we did not want them to %s", cmp.Diff(deepCopy.TranslationMap, original.TranslationMap))
+ }
})
}
}
diff --git a/synthesis/codon/example_test.go b/synthesis/codon/example_test.go
index 0fae9bbb9..0061c76b2 100644
--- a/synthesis/codon/example_test.go
+++ b/synthesis/codon/example_test.go
@@ -11,7 +11,13 @@ import (
func ExampleTranslationTable_Translate() {
gfpTranslation := "MASKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLVTTFSYGVQCFSRYPDHMKRHDFFKSAMPEGYVQERTISFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYNSHNVYITADKQKNGIKANFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYK*"
gfpDnaSequence := "ATGGCTAGCAAAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGTTGAATTAGATGGTGATGTTAATGGGCACAAATTTTCTGTCAGTGGAGAGGGTGAAGGTGATGCTACATACGGAAAGCTTACCCTTAAATTTATTTGCACTACTGGAAAACTACCTGTTCCATGGCCAACACTTGTCACTACTTTCTCTTATGGTGTTCAATGCTTTTCCCGTTATCCGGATCATATGAAACGGCATGACTTTTTCAAGAGTGCCATGCCCGAAGGTTATGTACAGGAACGCACTATATCTTTCAAAGATGACGGGAACTACAAGACGCGTGCTGAAGTCAAGTTTGAAGGTGATACCCTTGTTAATCGTATCGAGTTAAAAGGTATTGATTTTAAAGAAGATGGAAACATTCTCGGACACAAACTCGAGTACAACTATAACTCACACAATGTATACATCACGGCAGACAAACAAAAGAATGGAATCAAAGCTAACTTCAAAATTCGCCACAACATTGAAGATGGATCCGTTCAACTAGCAGACCATTATCAACAAAATACTCCAATTGGCGATGGCCCTGTCCTTTTACCAGACAACCATTACCTGTCGACACAATCTGCCCTTTCGAAAGATCCCAACGAAAAGCGTGACCACATGGTCCTTCTTGAGTTTGTAACTGCTGCTGGGATTACACATGGCATGGATGAGCTCTACAAATAA"
- testTranslation, _ := codon.NewTranslationTable(11).Translate(gfpDnaSequence) // need to specify which codons map to which amino acids per NCBI table
+ table, err := codon.NewTranslationTable(11)
+ if err != nil {
+ fmt.Printf("error running example: %s\n", err)
+ return
+ }
+
+ testTranslation, _ := table.Translate(gfpDnaSequence) // need to specify which codons map to which amino acids per NCBI table
fmt.Println(gfpTranslation == testTranslation)
// output: true
@@ -19,14 +25,19 @@ func ExampleTranslationTable_Translate() {
func ExampleTranslationTable_UpdateWeights() {
gfpTranslation := "MASKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLVTTFSYGVQCFSRYPDHMKRHDFFKSAMPEGYVQERTISFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYNSHNVYITADKQKNGIKANFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYK*"
- sequenceWithCustomWeights := "ATGGCAAGTAAGGGAGAAGAGCTTTTTACCGGCGTAGTACCAATTCTGGTAGAACTGGATGGTGATGTAAACGGTCACAAATTTAGTGTAAGCGGAGAAGGTGAGGGTGATGCTACCTATGGCAAACTGACCCTAAAGTTTATATGCACGACTGGAAAACTTCCGGTACCGTGGCCAACGTTAGTTACAACGTTTTCTTATGGAGTACAGTGCTTCAGCCGCTACCCAGATCATATGAAACGCCATGATTTCTTTAAGAGCGCCATGCCAGAGGGTTATGTTCAGGAGCGCACGATCTCGTTTAAGGATGATGGTAACTATAAGACTCGTGCTGAGGTGAAGTTCGAAGGCGATACCCTTGTAAATCGTATTGAATTGAAGGGTATAGACTTCAAGGAGGATGGAAATATTCTTGGACATAAGCTGGAATACAATTACAATTCACATAACGTTTATATAACTGCCGACAAGCAAAAAAACGGGATAAAAGCTAATTTTAAAATACGCCACAACATAGAGGACGGGTCGGTGCAACTAGCCGATCATTATCAACAAAACACACCAATCGGCGACGGACCAGTTCTGTTGCCCGATAATCATTACTTATCAACCCAAAGTGCCTTAAGTAAGGATCCGAACGAAAAGCGCGATCATATGGTACTTCTTGAGTTTGTTACCGCTGCAGGCATAACGCATGGCATGGACGAGCTATACAAATAA"
- table := codon.NewTranslationTable(11)
+ sequenceWithCustomWeights := "ATGGCGAGCAAGGGCGAAGAGCTTTTTACTGGAGTGGTACCCATCCTTGTGGAGCTGGATGGGGATGTTAATGGGCACAAGTTTTCTGTGTCCGGTGAGGGGGAGGGTGACGCGACCTATGGCAAACTAACGTTGAAGTTTATCTGCACCACCGGCAAGCTCCCTGTCCCTTGGCCGACGCTGGTAACCACTTTTTCATACGGAGTGCAATGCTTTTCACGATACCCAGACCACATGAAACGGCACGACTTCTTCAAGAGCGCGATGCCAGAAGGTTATGTGCAAGAGCGTACGATCTCATTCAAGGACGACGGGAATTATAAGACAAGAGCAGAGGTGAAATTTGAGGGGGACACGTTAGTAAATCGGATTGAATTAAAGGGAATCGACTTTAAGGAGGATGGGAACATACTTGGTCACAAACTGGAATATAATTACAATTCACACAATGTTTACATCACTGCCGACAAGCAAAAAAATGGGATTAAAGCAAATTTCAAAATTCGGCATAATATTGAGGATGGTAGTGTCCAGCTCGCGGATCACTATCAGCAAAACACACCTATCGGAGACGGACCCGTTTTACTACCGGATAATCATTACTTAAGCACCCAATCAGCGTTATCCAAAGATCCGAACGAAAAACGTGACCACATGGTTCTCTTGGAGTTCGTCACCGCAGCTGGAATAACTCATGGAATGGACGAACTATACAAATAA"
+
+ table, err := codon.NewTranslationTable(11)
+ if err != nil {
+ fmt.Printf("error running example: %s\n", err)
+ return
+ }
// this example is using custom weights for different codons for Arginine. Use this if you would rather use your own
// codon weights, they can also be computed for you with `UpdateWeightsWithSequence`.
- err := table.UpdateWeights([]codon.AminoAcid{
+ err = table.UpdateWeights([]codon.AminoAcid{
{
Letter: "R",
Codons: []codon.Codon{
@@ -57,7 +68,11 @@ func ExampleTranslationTable_UpdateWeights() {
fmt.Println("Could not update weights in example")
}
- optimizedSequence, _ := table.Optimize(gfpTranslation, 1)
+ optimizedSequence, err := table.Optimize(gfpTranslation, 1)
+ if err != nil {
+ fmt.Printf("error running example: %s\n", err)
+ return
+ }
fmt.Println(optimizedSequence == sequenceWithCustomWeights)
// output: true
@@ -67,7 +82,12 @@ func ExampleTranslationTable_Optimize() {
gfpTranslation := "MASKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLVTTFSYGVQCFSRYPDHMKRHDFFKSAMPEGYVQERTISFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYNSHNVYITADKQKNGIKANFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYK*"
sequence, _ := genbank.Read("../../data/puc19.gbk")
- codonTable := codon.NewTranslationTable(11)
+ codonTable, err := codon.NewTranslationTable(11)
+ if err != nil {
+ fmt.Printf("error running example: %s\n", err)
+ return
+ }
+
_ = codonTable.UpdateWeightsWithSequence(sequence)
// Here, we double check if the number of genes is equal to the number of stop codons
@@ -122,14 +142,24 @@ func ExampleCompromiseCodonTable() {
sequence, _ := genbank.Read("../../data/puc19.gbk")
// weight our codon optimization table using the regions we collected from the genbank file above
- optimizationTable := codon.NewTranslationTable(11)
- err := optimizationTable.UpdateWeightsWithSequence(sequence)
+ optimizationTable, err := codon.NewTranslationTable(11)
+ if err != nil {
+ fmt.Printf("error running example: %s\n", err)
+ return
+ }
+
+ err = optimizationTable.UpdateWeightsWithSequence(sequence)
if err != nil {
panic(fmt.Errorf("got unexpected error in an example: %w", err))
}
sequence2, _ := genbank.Read("../../data/phix174.gb")
- optimizationTable2 := codon.NewTranslationTable(11)
+ optimizationTable2, err := codon.NewTranslationTable(11)
+ if err != nil {
+ fmt.Printf("error running example: %s\n", err)
+ return
+ }
+
err = optimizationTable2.UpdateWeightsWithSequence(sequence2)
if err != nil {
panic(fmt.Errorf("got unexpected error in an example: %w", err))
@@ -143,21 +173,31 @@ func ExampleCompromiseCodonTable() {
}
}
}
- //output: 2727
+ //output: 3863
}
func ExampleAddCodonTable() {
sequence, _ := genbank.Read("../../data/puc19.gbk")
// weight our codon optimization table using the regions we collected from the genbank file above
- optimizationTable := codon.NewTranslationTable(11)
- err := optimizationTable.UpdateWeightsWithSequence(sequence)
+ optimizationTable, err := codon.NewTranslationTable(11)
+ if err != nil {
+ fmt.Printf("error running example: %s\n", err)
+ return
+ }
+
+ err = optimizationTable.UpdateWeightsWithSequence(sequence)
if err != nil {
panic(fmt.Errorf("got unexpected error in an example: %w", err))
}
sequence2, _ := genbank.Read("../../data/phix174.gb")
- optimizationTable2 := codon.NewTranslationTable(11)
+ optimizationTable2, err := codon.NewTranslationTable(11)
+ if err != nil {
+ fmt.Printf("error running example: %s\n", err)
+ return
+ }
+
err = optimizationTable2.UpdateWeightsWithSequence(sequence2)
if err != nil {
panic(fmt.Errorf("got unexpected error in an example: %w", err))
@@ -175,5 +215,5 @@ func ExampleAddCodonTable() {
}
}
}
- //output: 90
+ //output: 51
}
diff --git a/synthesis/fix/synthesis_test.go b/synthesis/fix/synthesis_test.go
index 6ca7ea088..de726c29d 100644
--- a/synthesis/fix/synthesis_test.go
+++ b/synthesis/fix/synthesis_test.go
@@ -48,11 +48,11 @@ func BenchmarkCds(b *testing.B) {
for _, cutSite := range []string{"GAAGAC", "GGTCTC", "GCGATG", "CGTCTC", "GCTCTTC", "CACCTGC"} {
if strings.Contains(optimizedSeq, cutSite) {
fmt.Println(changes)
- b.Errorf("phusion" + " contains " + cutSite)
+ b.Errorf("phusion contains %s", cutSite)
}
if strings.Contains(transform.ReverseComplement(optimizedSeq), cutSite) {
fmt.Println(changes)
- b.Errorf("phusion" + " reverse complement contains " + cutSite)
+ b.Errorf("phusion reverse complement contains %s", cutSite)
}
}
}
@@ -84,10 +84,10 @@ func TestCds(t *testing.T) {
for _, cutSite := range []string{"GAAGAC", "GGTCTC", "GCGATG", "CGTCTC", "GCTCTTC", "CACCTGC"} {
if strings.Contains(optimizedSeq, cutSite) {
- t.Errorf("phusion" + " contains " + cutSite)
+ t.Errorf("phusion contains %s", cutSite)
}
if strings.Contains(transform.ReverseComplement(optimizedSeq), cutSite) {
- t.Errorf("phusion" + " reverse complement contains " + cutSite)
+ t.Errorf("phusion reverse complement contains %s", cutSite)
}
}
diff --git a/synthesis/fragment/fragment_test.go b/synthesis/fragment/fragment_test.go
index ab3f0c153..f21b3d3d1 100644
--- a/synthesis/fragment/fragment_test.go
+++ b/synthesis/fragment/fragment_test.go
@@ -9,7 +9,7 @@ func TestFragment(t *testing.T) {
_, _, err := Fragment(gene, 90, 110, []string{})
if err != nil {
- t.Errorf(err.Error())
+ t.Error(err.Error())
}
}
@@ -18,7 +18,7 @@ func TestUnfragmentable(t *testing.T) {
polyA := "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
_, _, err := Fragment(polyA, 40, 80, []string{})
if err == nil {
- t.Errorf("polyA should fail to fragment")
+ t.Error("polyA should fail to fragment")
}
}
@@ -27,12 +27,12 @@ func TestFragmentSizes(t *testing.T) {
lacZ := "ATGACCATGATTACGCCAAGCTTGCATGCCTGCAGGTCGACTCTAGAGGATCCCCGGGTACCGAGCTCGAATTCACTGGCCGTCGTTTTACAACGTCGTGACTGGGAAAACCCTGGCGTTACCCAACTTAATCGCCTTGCAGCACATCCCCCTTTCGCCAGCTGGCGTAATAGCGAAGAGGCCCGCACCGATCGCCCTTCCCAACAGTTGCGCAGCCTGAATGGCGAATGGCGCCTGATGCGGTATTTTCTCCTTACGCATCTGTGCGGTATTTCACACCGCATATGGTGCACTCTCAGTACAATCTGCTCTGATGCCGCATAG"
_, _, err := Fragment(lacZ, 105, 95, []string{})
if err == nil {
- t.Errorf("Fragment should fail when minFragmentSize > maxFragmentSize")
+ t.Error("Fragment should fail when minFragmentSize > maxFragmentSize")
}
_, _, err = Fragment(lacZ, 7, 95, []string{})
if err == nil {
- t.Errorf("Fragment should fail when minFragmentSize < 8")
+ t.Error("Fragment should fail when minFragmentSize < 8")
}
}
@@ -51,7 +51,7 @@ func TestLongFragment(t *testing.T) {
gene := "GGAGGGTCTCAATGCTGGACGATCGCAAATTCAGCGAACAGGAGCTGGTCCGTCGCAACAAATACAAAACGCTGGTCGAGCAAAACAAAGACCCGTACAAGATTACGAACTGGAAACGCAATACCACCCTGCTGAAACTGAATGAGAAATACAAAGACTATAGCAAGGAGGACCTGTTGAACCTGAATCAAGAACTGGTCGTTGTTGCAGGTCGTATCAAACTGTATCGTGAAGCCGGTAAAAAAGCTGCCTTTGTGAACATTGATGATCAAGACTCCTCTATTCAGTTGTACGTGCGCCTGGATGAGATCGGTGATCAGAGCTTCGAGGATTTCCGCAATTTCGACCTGGGTGACATCATTGGTGTTAAAGGTATCATGATGCGCACCGACCACGGCGAGTTGAGCATCCGTTGTAAGGAAGTCGTGCTGCTGAGCAAGGCCCTGCGTCCGCTGCCGGATAAACACGCGGGCATTCAGGATATTGAGGAAAAGTACCGCCGTCGCTATGTGGACCTGATTATGAATCACGACGTGCGCAAGACGTTCCAGGCGCGTACCAAGATCATTCGTACCTTGCAAAACTTTCTGGATAATAAGGGTTACATGGAGGTCGAAACCCCGATCCTGCATAGCCTGAAGGGTGGCGCGAGCGCGAAACCGTTTATTACCCACTACAATGTGCTGAATACGGATGTGTATCTGCGTATCGCGACCGAGCTGCACCTGAAACGCCTGATTGTTGGCGGTTTCGAGGGTGTGTATGAGATCGGTCGCATCTTTCGCAATGAAGGTATGTCCACGCGTCACAATCCGGAATTCACGTCTATCGAACTGTATGTCGCCTATGAGGACATGTTCTTTTTGATGGATCTGACCGAAGAGATTTTTCGCGTTTGTAATGCCGCAGTCAACAGCTCCAGCATCATTGAGTATAACAACGTGAAAATTGACCTGAGCAAGCCGTTTAAGCGCCTGCATATGGTTGACGGTATTAAACAGGTGACCGGCGTCGACTTCTGGCAGGAGATGACGGTCCAACAGGCTCTGGAGCTGGCCAAAAAGCATAAAGTGCACGTTGAAAAACATCAAGAGTCTGTTGGTCACATTATCAATTTGTTCTATGAGGAGTTCGTGGAGTCCACGATTGTTGAGCCGACGTTCGTGTACGGTCACCCGAAGGAAATCTCTCCGCTGGCTAAGAGCAATCCGTCTGACCCGCGTTTCACGGACCGTTTCGAGCTGTTCATTCTGGGTCGTGAGTATGCGAATGCGTTTAGCGAGCTGAATGACCCGATTGACCAGTACGAACGCTTCAAGGCTCAGATTGAGGAGGAAAGCAAGGGCAACGATGAAGCCAACGACATGGACATTGATTTCATCGAGGCTCTGGAACACGCCATGCCGCCGACCGCGGGTATTGGTATCGGCATTGATCGCTTGGTTATGCTGCTGACGAATAGCGAATCCATCAAAGACGTGCTGTTGTTCCCGCAAATGAAGCCGCGCGAATGAAGAGCTTAGAGACCCGCT"
frags, _, err := Fragment(gene, 79, 94, []string{})
if err != nil {
- t.Errorf(err.Error())
+ t.Error(err.Error())
}
for _, frag := range frags {
if len(frag) > 94 {
@@ -72,7 +72,7 @@ func TestCheckLongRegresssion(t *testing.T) {
}
}
if foundGTCT {
- t.Errorf("Should not have found GTCT since it is the reverse complement of AGAC")
+ t.Error("Should not have found GTCT since it is the reverse complement of AGAC")
}
}
@@ -92,6 +92,6 @@ func TestFragmentWithOverhangs(t *testing.T) {
_, _, err := FragmentWithOverhangs(gene, 90, 110, []string{}, defaultOverhangs)
if err != nil {
- t.Errorf(err.Error())
+ t.Error(err.Error())
}
}
diff --git a/tutorials/001_input_output_test.go b/tutorials/001_input_output_test.go
index 3c456b135..225f60029 100644
--- a/tutorials/001_input_output_test.go
+++ b/tutorials/001_input_output_test.go
@@ -47,7 +47,7 @@ Tim
******************************************************************************/
// if you're using VS-CODE you should see a DEBUG TEST button right below this
-// comment. Please set break points and use it early and often.
+// comment. Please set break points and use them early and often.
func TestFileIOTutorial(t *testing.T) {
// First we're going to read in a Genbank file for the well known plasmid
// backbone puc19. Plasmids are super small rings of "Circular DNA" that are
@@ -110,7 +110,7 @@ func TestFileIOTutorial(t *testing.T) {
}
// We'll go into more detail about features and DNA parts
- // in the next tutorial but for now know that we can also
+ // later in this tutorial series but for now know that we can also
// get the sequence of each feature using the GetSequence method.
feature := puc19.Features[1]
diff --git a/tutorials/002_dna_parts_test.go b/tutorials/002_dna_parts_test.go
deleted file mode 100644
index 9d2d4144d..000000000
--- a/tutorials/002_dna_parts_test.go
+++ /dev/null
@@ -1,3 +0,0 @@
-package tutorials_test
-
-// TODO: Write a tutorial on DNA parts
diff --git a/tutorials/002_primer_design_test.go b/tutorials/002_primer_design_test.go
new file mode 100644
index 000000000..de8dc6fca
--- /dev/null
+++ b/tutorials/002_primer_design_test.go
@@ -0,0 +1,128 @@
+package tutorials_test
+
+import (
+ "fmt"
+ "log"
+ "testing"
+
+ "github.com/bebop/poly/io/genbank"
+ "github.com/bebop/poly/primers/pcr"
+)
+
+/******************************************************************************
+Sep, 12, 2022
+
+== Designing Primers for Just About Anything ==
+
+Now that you've learned what plasmids are you should probably know what primers
+are as well.
+
+"Primers are short sequences of DNA that can be used to amplify DNA sequences
+and they are the workhorse of modern molecular biology.
+
+Essentially primers are short pieces of single stranded DNA that can
+bind to a target sequence of single stranded DNA. These primers serve as a
+marker for polymerases (the enzyme Poly is named after!) to bind and start adding
+free floating nucleotides (ACTGs) to a single strand piece of DNA to form a
+double stranded piece of DNA.
+
+This is a crucial step in the process of PCR (polymerase chain reaction).
+https://en.wikipedia.org/wiki/Polymerase_chain_reaction
+
+Here's also a video animation from Cold Spring Harbor's DNA Learning center explaining the process.
+https://youtu.be/2KoLnIwoZKU?si=wqKs1NU5ZhU5O7Ui
+
+You can read more about that at the link above but just know that an absolute huge
+number of protocols from diagnostics to plasmid cloning use these primers so they're
+super important."
+
+- From the Poly's primer design package level documentation
+
+
+Primers are the workhorse of modern molecular biology. They're involved in almost
+every molecular biology experiment and are a crucial component in modern lab
+diagnostics.
+
+In This tutorial we're going to design a large set of primers to help us extract and
+isolate every protein coding region in the bacillus subtilis genome so we can express
+and characterize each protein's structure individually in vivo (in the lab).
+
+We could also use these primers for RNA interference experiments to suppress
+protein expression in vivo to better understand how these proteins interact.
+
+Point is that primers are incredibly versatile tools by automating their design
+we can do some pretty interesting (and even potentially lucrative) experiments.
+
+TTFN,
+Tim
+******************************************************************************/
+
+// if you're using VS-CODE you should see a DEBUG TEST button right below this
+// comment. Please set break points and use them early and often.
+func TestPrimersTutorial(t *testing.T) {
+
+ // This is a struct that we'll use to store the results of our primer designs for cloning out each gene
+ type CloneOut struct {
+ CDS genbank.Feature
+ Sequence string
+ ForwardPrimer string
+ ReversePrimer string
+ }
+
+ var reactions []CloneOut // <- declaring our list of primers so we can append to it
+
+ // First let's get our annotated bacillus subtillus genome
+ bsub, err := genbank.Read("../data/bsub.gbk")
+
+ if err != nil {
+ log.Fatal(err)
+ }
+
+ // For each feature in the genome we're going to design a primer pair if the feature is a coding sequence
+ for _, feature := range bsub.Features {
+ if feature.Type == "CDS" { // CDS stands for coding sequence (which means that it codes for a protein in this case)
+
+ var reaction CloneOut // initialize our reaction that will be appended
+
+ // store the feature and its sequence in our reaction in case we need it later
+ reaction.CDS = feature
+ reaction.Sequence, _ = feature.GetSequence()
+
+ // generate forward and reverse primers and store it in our struct
+ forward, reverse := pcr.DesignPrimers(reaction.Sequence, 56.0) // <- 56.0 is our melting temp. The temperature at which we want our primers to bind to double stranded DNA. Again. don't hardcode values like this in real life. Put it in a constant or something.
+ reaction.ForwardPrimer = forward
+ reaction.ReversePrimer = reverse
+
+ // append our reaction to a our reactions slice (slice is essentially Go's version of a list, or vector)
+ reactions = append(reactions, reaction)
+ }
+ }
+
+ fmt.Println("Total reactions:", len(reactions))
+ // We've now just generated ~5000 primers.
+ // Notice how the only numerical parameter we give was "melting temp" this is the temp at which they'll anneal to denatured DNA.
+ // As mentioned in this Cold Spring Harbor video. PCR reactions are conducted in ~30 cycles of heating and cooling over 3 temperature stages.
+
+ // Stage 1: We raise the temperature of our reaction to 95C to denature (split) our DNA such that our primers can bind to it.
+ // Stage 2: We lower the temperature of our reaction to 55C so that our primers can bind to complementary regions of newly accessible single stranded DNA.
+ // Stage 3: We raise the temperature of our reaction to ~72C (or whatever temperature is best for the polymerase we're using) to activate our polymerase
+ // Stage 3 (cont): to bind to our primers and begin constructing a brand new second strand to our denatured DNA. Then we go back to Stage 1.
+
+ // For each cycle of the above stage we end up doubling the number of copies of the gene we want so after we have n^30 copies of our desired region.
+
+ // What we've done is design a gigantic set of primers that share the same melting temp. This makes it possible to run all of these reactions
+ // concurrently within a single PCR run but we've ignored a lot of other design considerations.
+
+ // This primers aren't particularly well designed. All pcr.DesignPrimers has done is figure out how long each primer should be so that they all bind
+ // at a specific temperature while assuming that they should bind at the very beginning and end of each given sequence. This is an extremely common
+ // use case but there are several caveats.
+
+ // 1. The designed primers could be dimers (The primers could bind to each other and not to their target sequence)
+ // 2. One primer in a pair could be a "hairpin" that binds to itself (which is something the fold package can help detect)
+ // 3. Your primers may actually need a specific configuration to bind to the intended target (something the fold package may be able to help with)
+
+ // Depending on your situation you may need to get creative. Lots of scientists design their primers to bind up or downstream of their gene of
+ // interest to avoid primer dimers or hairpins. Some scientists don't care if they copy the whole gene but just want to copy enough of the
+ // gene to verify that it's there. There's probably a million different way to design primers but I'd guess that poly itself covers about 95%
+ // of what most scientists would need on a daily basis.
+}
diff --git a/tutorials/004_codon_optimization_test.go b/tutorials/003_codon_optimization_test.go
similarity index 100%
rename from tutorials/004_codon_optimization_test.go
rename to tutorials/003_codon_optimization_test.go
diff --git a/tutorials/003_transforming_sequences_test.go b/tutorials/003_transforming_sequences_test.go
deleted file mode 100644
index d7fc2ef69..000000000
--- a/tutorials/003_transforming_sequences_test.go
+++ /dev/null
@@ -1,3 +0,0 @@
-package tutorials_test
-
-// TODO: Write a tutorial on transforming sequences
diff --git a/tutorials/005_primer_design_test.go b/tutorials/005_primer_design_test.go
deleted file mode 100644
index 822e26d50..000000000
--- a/tutorials/005_primer_design_test.go
+++ /dev/null
@@ -1,3 +0,0 @@
-package tutorials_test
-
-//TODO: Write a tutorial on primer design