diff --git a/boundaryservice/management/commands/loadshapefiles.py b/boundaryservice/management/commands/loadshapefiles.py index 79b54c8..fefa8ac 100644 --- a/boundaryservice/management/commands/loadshapefiles.py +++ b/boundaryservice/management/commands/loadshapefiles.py @@ -1,7 +1,7 @@ import logging log = logging.getLogger('boundaries.api.load_shapefiles') from optparse import make_option -import os, os.path +import os import sys from zipfile import ZipFile @@ -23,10 +23,10 @@ class Command(BaseCommand): help = 'Import boundaries described by shapefiles.' option_list = BaseCommand.option_list + ( make_option('-c', '--clear', action='store_true', dest='clear', - help='Clear all jurisdictions in the DB.'), + help='Clear all jurisdictions in the DB.'), make_option('-d', '--data-dir', action='store', dest='data_dir', - default=DEFAULT_SHAPEFILES_DIR, - help='Load shapefiles from this directory'), + default=DEFAULT_SHAPEFILES_DIR, + help='Load shapefiles from this directory'), make_option('-e', '--except', action='store', dest='except', default=False, help='Don\'t load these kinds of Areas, comma-delimited.'), @@ -48,7 +48,7 @@ def handle(self, *args, **options): if options['only']: only = options['only'].upper().split(',') - # TODO: stripping whitespace here because optparse doesn't handle + # TODO: stripping whitespace here because optparse doesn't handle # it correctly sources = [s for s in SHAPEFILES if s.replace(' ', '').upper() in only] @@ -90,7 +90,7 @@ def load_set(self, kind, config, options): 'delete' % kind) path = os.path.join(options['data_dir'], config['file']) - datasources = create_datasources(path) + datasources = create_datasources(path, config['encoding']) layer = datasources[0][0] @@ -109,7 +109,7 @@ def load_set(self, kind, config, options): metadata_fields=layer.fields ) log.info("Created with slug %s and id %s" % (bset.slug, bset.id)) - + for datasource in datasources: log.info("Loading %s from %s" % (kind, datasource.name)) # Assume only a single-layer in shapefile @@ -178,28 +178,12 @@ def add_boundaries_for_layer(self, config, layer, bset, database): metadata = {} for field in layer.fields: - - # Decode string fields using encoding specified in definitions - # config - if config['encoding'] != '': - try: - metadata[field] = feature.get(field).decode( - config['encoding']) - # Only strings will be decoded, get value in normal way if - # int etc. - except AttributeError: - metadata[field] = feature.get(field) - else: - metadata[field] = feature.get(field) + # Decoding handled by geodjango DataSource encoding + metadata[field] = feature.get(field) external_id = config['ider'](feature) feature_name = config['namer'](feature) - # If encoding is specified, decode id and feature name - if config['encoding'] != '': - external_id = external_id.decode(config['encoding']) - feature_name = feature_name.decode(config['encoding']) - if config['kind_first']: display_name = '%s %s' % (config['singular'], feature_name) else: @@ -216,23 +200,31 @@ def add_boundaries_for_layer(self, config, layer, bset, database): simple_shape=simple_geometry.wkt, centroid=geometry.geos.centroid) -def create_datasources(path): + +def create_datasources(path, encoding=None): # Optional to specify encoding for non-ASCII data sets if path.endswith('.zip'): path = temp_shapefile_from_zip(path) if path.endswith('.shp'): - return [DataSource(path)] + if encoding and encoding != '': + return [DataSource(path, False, False, encoding)] + else: + return [DataSource(path)] # assume it's a directory... sources = [] for fn in os.listdir(path): - fn = os.path.join(path,fn) + fn = os.path.join(path, fn) if fn.endswith('.zip'): fn = temp_shapefile_from_zip(fn) if fn.endswith('.shp'): - sources.append(DataSource(fn)) + if encoding and encoding != '': + sources.append(DataSource(fn, False, False, encoding)) + else: + sources.append(DataSource(fn)) return sources + def temp_shapefile_from_zip(zip_path): """ Given a path to a ZIP file, unpack it into a temp dir and return the path diff --git a/boundaryservice/resources.py b/boundaryservice/resources.py index 3452a82..462eb2d 100644 --- a/boundaryservice/resources.py +++ b/boundaryservice/resources.py @@ -2,6 +2,7 @@ from django.conf import settings from django.contrib.gis.measure import D +from django.http import HttpResponse from tastypie import fields from tastypie.serializers import Serializer from tastypie.constants import ALL, ALL_WITH_RELATIONS @@ -130,3 +131,26 @@ def build_filters(self, filters=None): orm_filters.update({'shape__intersects': bbox}) return orm_filters + + def build_content_type(self, format, encoding='utf-8'): + """ + Appends character encoding to the provided format if not already present. + + Grabbed from http://stackoverflow.com/questions/17280513/tastypie-json-header-to-use-utf-8 + """ + if 'charset' in format: + return format + + return "%s; charset=%s" % (format, encoding) + + def create_response(self, request, data, response_class=HttpResponse, **response_kwargs): + """ + Extracts the common "which-format/serialize/return-response" cycle. + + Mostly a useful shortcut/hook. + + Grabbed from http://stackoverflow.com/questions/17280513/tastypie-json-header-to-use-utf-8 + """ + desired_format = self.determine_format(request) + serialized = self.serialize(request, data, desired_format) + return response_class(content=serialized, content_type=self.build_content_type(desired_format), **response_kwargs) diff --git a/boundaryservice/utils.py b/boundaryservice/utils.py index 59cea5c..83a238b 100644 --- a/boundaryservice/utils.py +++ b/boundaryservice/utils.py @@ -1,9 +1,10 @@ from django.conf import settings + def get_site_url_root(): domain = getattr(settings, 'MY_SITE_DOMAIN', 'localhost') protocol = getattr(settings, 'MY_SITE_PROTOCOL', 'http') - port = getattr(settings, 'MY_SITE_PORT', '') + port = getattr(settings, 'MY_SITE_PORT', '') url = '%s://%s' % (protocol, domain) if port: url += ':%s' % port @@ -13,13 +14,14 @@ def get_site_url_root(): # Utility methods for transforming shapefile columns into useful representations # + class static_namer(): """ Name features with a single, static name. """ def __init__(self, name): self.name = name - + def __call__(self, feature): return self.name @@ -31,7 +33,7 @@ class index_namer(): def __init__(self, prefix): self.prefix = prefix self.i = 0 - + def __call__(self, feature): out = '%s%i' % (self.prefix, self.i) self.i += 1 @@ -40,7 +42,7 @@ def __call__(self, feature): class simple_namer(): """ - Name features with a joined combination of attributes, optionally passing + Name features with a joined combination of attributes, optionally passing the result through a normalizing function. """ def __init__(self, attribute_names, seperator=' ', normalizer=None): @@ -49,15 +51,26 @@ def __init__(self, attribute_names, seperator=' ', normalizer=None): self.normalizer = normalizer def __call__(self, feature): - attribute_values = map(str, map(feature.get, self.attribute_names)) + attribute_values = map(unicode, map(feature.get, self.attribute_names)) name = self.seperator.join(attribute_values).strip() - + if self.normalizer: normed = self.normalizer(name) if not normed: raise ValueError('Failed to normalize \"%s\".' % name) else: name = normed - + return name + +class even_simpler_namer(): + """ + Very simple naming function using one field. + """ + def __init__(self, attribute_name): + self.attribute_name = attribute_name + + def __call__(self, feature): + name = feature.get(self.attribute_name) + return name