|
9 | 9 | "# Notebook Preamble" |
10 | 10 | ] |
11 | 11 | }, |
12 | | - { |
13 | | - "cell_type": "markdown", |
14 | | - "metadata": {}, |
15 | | - "source": [ |
16 | | - "## IPython Magic" |
17 | | - ] |
18 | | - }, |
19 | 12 | { |
20 | 13 | "cell_type": "code", |
21 | 14 | "execution_count": null, |
|
26 | 19 | "%autoreload 3" |
27 | 20 | ] |
28 | 21 | }, |
29 | | - { |
30 | | - "cell_type": "markdown", |
31 | | - "metadata": { |
32 | | - "tags": [] |
33 | | - }, |
34 | | - "source": [ |
35 | | - "## Notebook Imports" |
36 | | - ] |
37 | | - }, |
38 | 22 | { |
39 | 23 | "cell_type": "code", |
40 | 24 | "execution_count": null, |
|
47 | 31 | "import sys\n", |
48 | 32 | "from pathlib import Path\n", |
49 | 33 | "\n", |
50 | | - "# We need to set these environment variables prior to importing our intake catalog.\n", |
51 | | - "# You can also set them in your own shell environment instead.\n", |
52 | | - "os.environ[\"PUDL_INTAKE_CACHE\"] = str(Path.home() / \".cache/intake\")\n", |
53 | | - "os.environ[\"PUDL_INTAKE_PATH\"] = \"gs://intake.catalyst.coop/test\"\n", |
| 34 | + "logger = logging.getLogger()\n", |
| 35 | + "logger.setLevel(logging.INFO)\n", |
| 36 | + "handler = logging.StreamHandler(stream=sys.stdout)\n", |
| 37 | + "formatter = logging.Formatter(\"%(message)s\")\n", |
| 38 | + "handler.setFormatter(formatter)\n", |
| 39 | + "logger.handlers = [handler]\n", |
| 40 | + "\n", |
| 41 | + "# Where to cache downloaded data locally. Defaults to ~/.intake/cache\n", |
| 42 | + "# os.environ[\"PUDL_INTAKE_CACHE\"] = str(Path.home() / \".cache/intake\")\n", |
54 | 43 | "\n", |
55 | | - "# Local data if you've got it!\n", |
| 44 | + "# You can override the default path to the data in your environment if need be\n", |
56 | 45 | "# os.environ[\"PUDL_INTAKE_PATH\"] = str(Path.cwd().parent() / \"data\")\n", |
| 46 | + "# os.environ[\"PUDL_INTAKE_PATH\"] = \"gs://intake.catalyst.coop/dev\"\n", |
57 | 47 | "\n", |
58 | 48 | "# 3rd Party Imports:\n", |
59 | 49 | "import intake\n", |
|
68 | 58 | "cell_type": "markdown", |
69 | 59 | "metadata": {}, |
70 | 60 | "source": [ |
71 | | - "## Set up a logger" |
72 | | - ] |
73 | | - }, |
74 | | - { |
75 | | - "cell_type": "code", |
76 | | - "execution_count": null, |
77 | | - "metadata": {}, |
78 | | - "outputs": [], |
79 | | - "source": [ |
80 | | - "logger = logging.getLogger()\n", |
81 | | - "logger.setLevel(logging.INFO)\n", |
82 | | - "handler = logging.StreamHandler(stream=sys.stdout)\n", |
83 | | - "formatter = logging.Formatter(\"%(message)s\")\n", |
84 | | - "handler.setFormatter(formatter)\n", |
85 | | - "logger.handlers = [handler]" |
86 | | - ] |
87 | | - }, |
88 | | - { |
89 | | - "cell_type": "markdown", |
90 | | - "metadata": {}, |
91 | | - "source": [ |
92 | | - "## What Intake data sources are installed?" |
| 61 | + "# Explore installed Intake catalogs" |
93 | 62 | ] |
94 | 63 | }, |
95 | 64 | { |
|
155 | 124 | "cell_type": "markdown", |
156 | 125 | "metadata": {}, |
157 | 126 | "source": [ |
158 | | - "## Normal usage" |
| 127 | + "# Reading some data from the catalog" |
159 | 128 | ] |
160 | 129 | }, |
161 | 130 | { |
|
212 | 181 | "outputs": [], |
213 | 182 | "source": [ |
214 | 183 | "%%time\n", |
215 | | - "df1 = pd.read_parquet(\"gs://intake.catalyst.coop/test/hourly_emissions_epacems/epacems-2020-ID.parquet\")" |
| 184 | + "df1 = pd.read_parquet(f\"{os.environ['PUDL_INTAKE_PATH']}/hourly_emissions_epacems/epacems-2020-ID.parquet\")" |
216 | 185 | ] |
217 | 186 | }, |
218 | 187 | { |
|
251 | 220 | "from pprint import pprint\n", |
252 | 221 | "import fsspec\n", |
253 | 222 | "epacems_pq = pq.read_table(\n", |
254 | | - " \"gs://intake.catalyst.coop/test/hourly_emissions_epacems/epacems-2020-ID.parquet\",\n", |
| 223 | + " f\"{os.environ['PUDL_INTAKE_PATH']}/hourly_emissions_epacems/epacems-2020-ID.parquet\",\n", |
255 | 224 | " filesystem=fsspec.filesystem(\"gs\"),\n", |
256 | 225 | ")\n", |
257 | 226 | "dtype_dict = {name: dtype for name, dtype in zip(epacems_pq.schema.names, epacems_pq.schema.types)}\n", |
|
0 commit comments