@@ -3,16 +3,21 @@ library(openssl)
33raw_fields <- function (df , columnSpecs = list ()) {
44 validate_columnSpecs(columnSpecs )
55 cols <- colnames(df )
6+ if (nrow(df ) > 1000 ) {
7+ df_sample <- df [sample(nrow(df ),1000 ),]
8+ } else {
9+ df_sample <- df
10+ }
611 props <- lapply(seq_along(cols ), function (i ) {
7- infer_prop(cols [i ], i , df , columnSpecs )
12+ infer_prop(cols [i ], i , df_sample , columnSpecs )
813 })
914 return (props )
1015}
1116
1217infer_prop <- function (col , i = NULL , df , columnSpecs = list ()) {
1318 s <- df [[col ]]
14- semantic_type <- ifelse((col %in% names(columnSpecs )), columnSpecs [[col ]]$ semanticType , infer_semantic(s ))
15- analytic_type <- ifelse((col %in% names(columnSpecs )), columnSpecs [[col ]]$ analyticalType , infer_analytic(s ))
19+ semantic_type <- ifelse((col %in% names(columnSpecs )), columnSpecs [[col ]]$ semanticType , infer_semantic(s , col ))
20+ analytic_type <- ifelse((col %in% names(columnSpecs )), columnSpecs [[col ]]$ analyticalType , infer_analytic(s , col ))
1621 prop <- list (
1722 fid = fname_encode(col ),
1823 name = col ,
@@ -22,28 +27,33 @@ infer_prop <- function(col, i = NULL, df, columnSpecs = list()) {
2227 return (prop )
2328}
2429
25- infer_semantic <- function (s ) {
30+ is_geo_field <- function (field_name ) {
31+ field_name <- tolower(trimws(field_name , which = " both" , whitespace = " ." ))
32+ return (field_name %in% c(" latitude" , " longitude" , " lat" , " long" , " lon" ))
33+ }
34+
35+ infer_semantic <- function (s , field_name ) {
2636 v_cnt <- length(unique(s ))
2737 kind <- class(s )
28- if (any(sapply(c( ' numeric' , ' integer' ), inherits , x = s )) & v_cnt > 16 ) {
38+ if (all( kind %in% c( " numeric" , " integer" , " double " , " complex " )) || is_geo_field( field_name ) ) {
2939 return (' quantitative' )
30- } else if (any(sapply(c(' POSIXct' , ' POSIXlt' , ' Date' ), inherits , x = s ))) {
40+ } else if (any(sapply(c(' POSIXct' , ' POSIXlt' , ' POSIXt ' , ' Date' ), inherits , x = s ))) {
3141 return (' temporal' )
32- } else if (inherits(s , ' ordered' )) {
33- return (' ordinal' )
3442 } else {
3543 return (' nominal' )
3644 }
3745}
3846
39- infer_analytic <- function (s ) {
47+ infer_analytic <- function (s , field_name ) {
4048 v_cnt <- length(unique(s ))
4149 kind <- class(s )
42- if ((inherits(s , ' numeric' )) | (inherits(s , ' integer' ) & v_cnt > 16 )) {
43- return (' measure' )
44- } else {
45- return (' dimension' )
50+ if (is_geo_field(field_name )) {
51+ return (" dimension" )
52+ }
53+ if (all(kind %in% c(" numeric" , " integer" , " double" , " complex" ))) {
54+ return (" measure" )
4655 }
56+ return (" dimension" )
4757}
4858
4959validate_columnSpecs <- function (columnSpecs ) {
0 commit comments