|
1 | | -# biocommons.example |
| 1 | +# eutils -- simplified interface to NCBI E-Utilities |
2 | 2 |
|
3 | | -[](https://img.shields.io/github/v/release/biocommons/python-package) |
4 | | -[](https://github.com/biocommons/python-package/actions/workflows/main.yml?query=branch%3Amain) |
5 | | -[](https://codecov.io/gh/biocommons/python-package) |
6 | | -[](https://img.shields.io/github/commit-activity/m/biocommons/python-package) |
7 | | -[](https://img.shields.io/github/license/biocommons/python-package) |
| 3 | +[](https://img.shields.io/github/v/release/biocommons/eutils) |
| 4 | +[](https://github.com/biocommons/eutils/actions/workflows/main.yml?query=branch%3Amain) |
| 5 | +[](https://codecov.io/gh/biocommons/eutils) |
| 6 | +[](https://img.shields.io/github/commit-activity/m/biocommons/eutils) |
| 7 | +[](https://img.shields.io/github/license/biocommons/eutils) |
8 | 8 |
|
9 | | -Package Description |
| 9 | +**eutils is a Python package to simplify searching, fetching, and |
| 10 | +parsing records from NCBI using their E-utilities_ interface** |
10 | 11 |
|
11 | | -This project is a product of the [biocommons](https://biocommons.org/) community. |
| 12 | +## Features |
12 | 13 |
|
13 | | -- **Github repository**: <https://github.com/biocommons/python-package/> |
14 | | -- **Documentation** <https://biocommons.github.io/python-package/> |
| 14 | +* simple Pythonic interface for searching and fetching |
| 15 | +* Support for [NCBI API keys](https://ncbiinsights.ncbi.nlm.nih.gov/2017/11/02/new-api-keys-for-the-e-utilities/), and rate throttling when no key is available |
| 16 | +* optional sqlite-based caching of compressed replies |
| 17 | +* "façades" that facilitate access to essential attributes in XML replies |
15 | 18 |
|
16 | | -## Python Package Installation |
| 19 | +- **Github repository**: <https://github.com/biocommons/eutils/> |
| 20 | +- **Documentation** <https://eutils.readthedocs.io/en/stable/> |
17 | 21 |
|
18 | | -Install from PyPI with `pip install biocommons.example` or `uv pip install biocommons.example`, then try it: |
19 | | - |
20 | | - $ source venv/bin/activate |
21 | | - |
22 | | - $ python3 -m biocommons.example |
23 | | - Marvin says: |
24 | | - There's only one life-form as intelligent as me within thirty parsecs... |
25 | | - |
26 | | - $ marvin-quote |
27 | | - Marvin says: |
28 | | - You think you've got problems? What are you supposed to do if you... |
| 22 | +## Example Usage |
29 | 23 |
|
| 24 | + $ uv pip install eutils |
| 25 | + $ export NCBI_API_KEY=8d4b... |
30 | 26 | $ ipython |
31 | | - >>> from biocommons.example import __version__, get_quote_from_marvin |
32 | | - >>> __version__ |
33 | | - '0.1.dev8+gd5519a8.d20211123' |
34 | | - >>> get_quote() |
35 | | - "The first ten million years were the worst, ... |
| 27 | + |
| 28 | + >>> import os |
| 29 | + >>> from biocommons.eutils import Client |
| 30 | + |
| 31 | + # Initialize a client. This client handles all caching and query |
| 32 | + # throttling. For example: |
| 33 | + >>> ec = Client(api_key=os.environ.get("NCBI_API_KEY", None)) |
| 34 | + |
| 35 | + # search for tumor necrosis factor genes |
| 36 | + # any valid NCBI query may be used |
| 37 | + >>> esr = ec.esearch(db='gene',term='tumor necrosis factor') |
| 38 | + |
| 39 | + # esearch returns a list of entity IDs associated with your search. preview some of them: |
| 40 | + >>> esr.ids[:5] |
| 41 | + [136114222, 136113226, 136112112, 136111930, 136111620] |
| 42 | + |
| 43 | + # fetch data for an ID (gene id 7157 is human TNF) |
| 44 | + >>> egs = ec.efetch(db='gene', id=7157) |
| 45 | + |
| 46 | + # One may fetch multiple genes at a time. These are returned as an |
| 47 | + # EntrezgeneSet. We'll grab the first (and only) child, which returns |
| 48 | + # an instance of the Entrezgene class. |
| 49 | + >>> eg = egs.entrezgenes[0] |
| 50 | + |
| 51 | + # Easily access some basic information about the gene |
| 52 | + >>> eg.hgnc, eg.maploc, eg.description, eg.type, eg.genus_species |
| 53 | + ('TP53', '17p13.1', 'tumor protein p53', 'protein-coding', 'Homo sapiens') |
| 54 | + |
| 55 | + # get a list of genomic references |
| 56 | + >>> sorted([(r.acv, r.label) for r in eg.references]) |
| 57 | + [('NC_000017.11', 'Chromosome 17 Reference GRCh38...'), |
| 58 | + ('NC_018928.2', 'Chromosome 17 Alternate ...'), |
| 59 | + ('NG_017013.2', 'RefSeqGene')] |
| 60 | + |
| 61 | + # Get the first three products defined on GRCh38 |
| 62 | + >>> [p.acv for p in eg.references[0].products][:3] |
| 63 | + ['NM_001126112.2', 'NM_001276761.1', 'NM_000546.5'] |
| 64 | + |
| 65 | + # As a sample, grab the first product defined on this reference (order is arbitrary) |
| 66 | + >>> mrna = [i for i in eg.references[0].products if i.type == "mRNA"][0] |
| 67 | + >>> str(mrna) |
| 68 | + 'GeneCommentary(acv=NM_001126112.2,type=mRNA,heading=Reference,label=transcript variant 2)' |
| 69 | + |
| 70 | + # mrna.genomic_coords provides access to the exon definitions on this reference |
| 71 | + >>> mrna.genomic_coords.gi, mrna.genomic_coords.strand |
| 72 | + ('568815581', -1) |
| 73 | + |
| 74 | + >>> mrna.genomic_coords.intervals |
| 75 | + [(7687376, 7687549), (7676520, 7676618), (7676381, 7676402), |
| 76 | + (7675993, 7676271), (7675052, 7675235), (7674858, 7674970), |
| 77 | + (7674180, 7674289), (7673700, 7673836), (7673534, 7673607), |
| 78 | + (7670608, 7670714), (7668401, 7669689)] |
| 79 | + |
| 80 | + # and if the mrna has a product, the resulting protein: |
| 81 | + >>> str(mrna.products[0]) |
| 82 | + 'GeneCommentary(acv=NP_001119584.1,type=peptide,heading=Reference,label=isoform a)' |
36 | 83 |
|
37 | 84 |
|
38 | 85 | ## Developer Setup |
|
0 commit comments