|
| 1 | +#!/usr/bin/env python |
| 2 | + |
| 3 | +import altair as alt |
| 4 | +import io |
| 5 | +import numpy as np |
| 6 | +import pandas as pd |
| 7 | +import random |
| 8 | + |
| 9 | + |
| 10 | +DATA_TYPES = { |
| 11 | + 'quantitative': 'Q', |
| 12 | + 'ordinal': 'O', |
| 13 | + 'nominal': 'N', |
| 14 | +} |
| 15 | + |
| 16 | +DELIMITERS = { |
| 17 | + 'comma': ',', |
| 18 | + 'semi-colon': ';', |
| 19 | + 'space': ' ', |
| 20 | + 'tab': '\t', |
| 21 | +} |
| 22 | + |
| 23 | +COLOR_SCHEMES = ( |
| 24 | + 'reds', |
| 25 | + 'blues', |
| 26 | + 'category10', |
| 27 | +) |
| 28 | + |
| 29 | + |
| 30 | +def create_topo_data(url, feature): |
| 31 | + return alt.topo_feature(url, feature) |
| 32 | + |
| 33 | + |
| 34 | + |
| 35 | +def read_data(uri, encoding=None, delimiter=None): |
| 36 | + if uri.lower().endswith('.csv'): |
| 37 | + return pd.read_csv(uri, encoding=encoding, delimiter=delimiter) |
| 38 | + elif uri.lower().endswith('.xlsx'): |
| 39 | + return pd.read_excel(uri) |
| 40 | + else: |
| 41 | + raise ValueError(f'file type of ""{uri}"" is unknown') |
| 42 | + |
| 43 | + |
| 44 | +def read_file(name, byte_stream, encoding=None, delimiter=None): |
| 45 | + if name.lower().endswith('.csv'): |
| 46 | + return pd.read_csv(io.BytesIO(byte_stream), encoding=encoding, |
| 47 | + delimiter=delimiter) |
| 48 | + elif name.lower().endswith('.xlsx'): |
| 49 | + return pd.read_excel(io.BytesIO(byte_stream)) |
| 50 | + else: |
| 51 | + raise ValueError(f'file type of ""{name}"" is unknown') |
| 52 | + |
| 53 | + |
| 54 | +def validate_data(data): |
| 55 | + if len(data.columns) < 2: |
| 56 | + raise ValueError('data should have at least a NIS code and a data column') |
| 57 | + nis_code_pos = [idx for idx, col_name in enumerate(data.columns) |
| 58 | + if col_name.lower() == 'niscode'] |
| 59 | + if len(nis_code_pos) > 1: |
| 60 | + raise ValueError('multiple columns have NIS code name') |
| 61 | + elif len(nis_code_pos) == 0: |
| 62 | + raise ValueError('NIS code column is missing') |
| 63 | + else: |
| 64 | + columns = list(data.columns) |
| 65 | + columns[nis_code_pos[0]] = 'niscode' |
| 66 | + data.columns = columns |
| 67 | + if data.niscode.dtype != np.int64: |
| 68 | + raise ValueError('data type for NIS code is incorret, should be integer') |
| 69 | + |
| 70 | + |
| 71 | +def create_plot(topo_data, data, column_name, data_type, tooltip_columns=None, |
| 72 | + stroke='lightgrey', strokeWidth=0.5, legend_title=None, |
| 73 | + scheme='reds'): |
| 74 | + lookup_columns = [column_name] |
| 75 | + if tooltip_columns is not None: |
| 76 | + lookup_columns.extend(tooltip_columns) |
| 77 | + if legend_title is None: |
| 78 | + legend_title = column_name |
| 79 | + return alt.Chart(topo_data)\ |
| 80 | + .mark_geoshape(stroke=stroke, strokeWidth=strokeWidth)\ |
| 81 | + .encode( |
| 82 | + color=alt.Color(f'{column_name}:{data_type}', |
| 83 | + legend=alt.Legend(title=legend_title), |
| 84 | + scale=alt.Scale(scheme=scheme)), |
| 85 | + tooltip=[f'{column}:N' for column in tooltip_columns], |
| 86 | + ).transform_lookup( |
| 87 | + lookup='properties.CODE_INS', |
| 88 | + from_=alt.LookupData(data, 'niscode', lookup_columns) |
| 89 | + ) |
| 90 | + |
| 91 | + |
| 92 | +def create_quantitative_plot(topo_data, data, column_name, |
| 93 | + stroke='lightgrey', strokeWidth=0.5, |
| 94 | + legend_title=None, scheme='reds'): |
| 95 | + return create_plot(topo_data, data, column_name, 'Q', |
| 96 | + stroke, strokeWidth, legend_title, scheme) |
| 97 | + |
| 98 | + |
| 99 | +def create_nominal_plot(topo_data, data, column_name, |
| 100 | + stroke='lightgrey', strokeWidth=0.5, |
| 101 | + legend_title=None, scheme='reds'): |
| 102 | + return create_plot(topo_data, data, column_name, 'N', |
| 103 | + stroke, strokeWidth, legend_title, scheme) |
0 commit comments