Skip to content

Commit 6155ba7

Browse files
committed
fixed #4
1 parent 3e499cf commit 6155ba7

File tree

19 files changed

+211
-28
lines changed

19 files changed

+211
-28
lines changed

R/data.R

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
#' It provides insights into the transactions that have occurred, including order dates, delivery dates, customer and store information,
55
#' as well as product details.
66
#'
7-
#' @format A data frame with 17 columns:
7+
#' @format A data frame with sales columns:
88
#' \describe{
99
#' \item{order_key}{\code{double} Unique identifier for each order.}
1010
#' \item{line_number}{\code{double} Line number within the order (for multi-line orders).}
@@ -19,10 +19,13 @@
1919
#' \item{unit_cost}{\code{double} The cost per unit of the product.}
2020
#' \item{currency_code}{\code{character} The currency code used for the transaction (e.g., USD, EUR).}
2121
#' \item{exchange_rate}{\code{double} The exchange rate applied to the currency, if applicable.}
22-
#' \item{revenue}{\code{double} A product's unit_price multiplied by quantity.}
22+
#' \item{gross_revenue}{\code{double} A product's unit_price multiplied by quantity.}
23+
#' \item{net_revenue}{\code{double} A product's net_price multiplied by quantity.}
24+
#' \item{unit_discount}{\code{double} A product's unit_price minute net_price.}
25+
#' \item{discounts}{\code{double} A product's unit_discount multiplied by quantity.}
2326
#' \item{cogs}{\code{double} A product's unit_cost multiplied by quantity.}
24-
#' \item{margin}{\code{double} A product's revenue minus cogs.}
25-
#' \item{unit_margin}{\code{double} A product unit_price minus unit_cost.}
27+
#' \item{margin}{\code{double} A product's net_revenue minus cogs.}
28+
#' \item{unit_margin}{\code{double} A product margin divided by quantity.}
2629
#' }
2730
#' @source https://github.com/sql-bi/Contoso-Data-Generator-V2-Data/releases/tag/ready-to-use-data
2831
"sales"
@@ -131,7 +134,7 @@
131134
#' \item{currency_code}{\code{character} The currency code used for the order (e.g., USD, EUR).}
132135
#' }
133136
#' @source https://github.com/sql-bi/Contoso-Data-Generator-V2-Data/releases/tag/ready-to-use-data
134-
"order"
137+
"orders"
135138

136139

137140

R/database.R

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ create_contoso_duckdb <- function(dir="temp"){
5252
duckdb::duckdb_register(con,"date" ,contoso::date,overwrite = TRUE)
5353
duckdb::duckdb_register(con,"fx" ,contoso::fx,overwrite = TRUE)
5454
duckdb::duckdb_register(con,"store" ,contoso::store,overwrite = TRUE)
55-
duckdb::duckdb_register(con,"order" ,contoso::order,overwrite = TRUE)
55+
duckdb::duckdb_register(con,"orders" ,contoso::orders,overwrite = TRUE)
5656
duckdb::duckdb_register(con,"orderrows",contoso::orderrows,overwrite = TRUE)
5757

5858

@@ -61,6 +61,8 @@ create_contoso_duckdb <- function(dir="temp"){
6161
product_db <- dplyr::tbl(con,dplyr::sql("select * from product"))
6262
customer_db <- dplyr::tbl(con,dplyr::sql("select * from customer"))
6363
store_db <- dplyr::tbl(con,dplyr::sql("select * from store"))
64+
orders_db <- dplyr::tbl(con,dplyr::sql("select * from orders"))
65+
orderrows_db <- dplyr::tbl(con,dplyr::sql("select * from orderrows"))
6466
fx_db <- dplyr::tbl(con,dplyr::sql("select * from fx"))
6567
date_db <- dplyr::tbl(con,dplyr::sql("select * from date"))
6668

@@ -72,6 +74,8 @@ create_contoso_duckdb <- function(dir="temp"){
7274
,store=store_db
7375
,fx=fx_db
7476
,date=date_db
77+
,orders=orders_db
78+
,orderrows=orderrows_db
7579
)
7680

7781
return(out)

README.md

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,10 @@ processing techniques
4747
This dataset is perfect for practicing time series analysis, financial
4848
modeling, or any business intelligence-related tasks.
4949

50+
Using view, you can see the columns’ label using the
51+
[labelled](https://larmarange.github.io/labelled/index.html)
52+
package.[^1]
53+
5054
The data is sourced from the
5155
[sqlbi](https://github.com/sql-bi/Contoso-Data-Generator-V2-Data/releases/tag/ready-to-use-data)
5256
github site
@@ -77,7 +81,8 @@ pak::pak("alejandrohagan/contoso")
7781

7882
## Example
7983

80-
Example of how to create a duckdb database with Conotoso tables loaded
84+
Example of how to create a duckdb database with Contoso tables loaded is
85+
below:
8186

8287
``` r
8388
library(contoso)
@@ -88,3 +93,6 @@ contoso_db <- create_contoso_duckdb(dir = "temp")
8893
# Access the sales dataset from the database
8994
sales_data <- contoso_db$sales
9095
```
96+
97+
[^1]: Inspiration from [Crystal
98+
Lewis](https://cghlewis.com/blog/dict_clean/) excellent blog post

README.qmd

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ The package comes with the following datasets:
2727
- **date**:
2828
- Contains date-related information, including date, week, month, quarter, and year for use in time-based analysis.
2929

30-
- **order**:
30+
- **orders**:
3131
- Contains information about individual orders, including order key, customer key, order date, and store information.
3232

3333
- **orderrows**:
@@ -40,6 +40,8 @@ The package comes with the following datasets:
4040

4141
This dataset is perfect for practicing time series analysis, financial modeling, or any business intelligence-related tasks.
4242

43+
Using view, you can see the columns' label using the [labelled](https://larmarange.github.io/labelled/index.html) package.^[Inspiration from [Crystal Lewis](https://cghlewis.com/blog/dict_clean/) excellent blog post]
44+
4345
The data is sourced from the [sqlbi](https://github.com/sql-bi/Contoso-Data-Generator-V2-Data/releases/tag/ready-to-use-data) github site
4446

4547
## Dataset overview
@@ -84,7 +86,7 @@ pak::pak("alejandrohagan/contoso")
8486

8587
## Example
8688

87-
Example of how to create a duckdb database with Conotoso tables loaded
89+
Example of how to create a duckdb database with Contoso tables loaded is below:
8890

8991

9092
```{r}

data-raw/customer.R

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,4 +15,37 @@ customer <- sales |>
1515

1616
rm(sales)
1717

18+
customer_labels <- list(
19+
customer_key = "Unique customer identifier",
20+
geo_area_key = "Geographical area identifier",
21+
start_dt = "Start date of customer record",
22+
end_dt = "End date of customer record",
23+
continent = "Continent of the customer",
24+
gender = "Gender of the customer",
25+
title = "Title of the customer (e.g., Mr., Mrs., Dr.)",
26+
given_name = "First name of the customer",
27+
middle_initial = "Middle initial of the customer",
28+
surname = "Last name of the customer",
29+
street_address = "Street address of the customer",
30+
city = "City where the customer resides",
31+
state = "State or region where the customer resides",
32+
state_full = "Full name of the state or region",
33+
zip_code = "Postal code of the customer’s address",
34+
country = "Country where the customer resides",
35+
country_full = "Full name of the country",
36+
birthday = "Birthday of the customer",
37+
age = "Age of the customer",
38+
occupation = "Occupation of the customer",
39+
company = "Company where the customer works",
40+
vehicle = "Vehicle owned by the customer",
41+
latitude = "Latitude of the customer's location",
42+
longitude = "Longitude of the customer's location"
43+
)
44+
45+
# Example: Assuming 'customers' is your data frame
46+
var_labels(customer) <- customer_labels
47+
48+
49+
50+
1851
usethis::use_data(customer, overwrite = TRUE)

data-raw/date.R

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,5 +6,28 @@ dir <- "data-raw"
66

77
date <- readr::read_csv(file.path(dir,"date.csv"),name_repair = janitor::make_clean_names,show_col_types = FALSE)
88

9+
date_labels <- list(
10+
date = "Full date",
11+
date_key = "Unique date identifier",
12+
year = "Year of the date",
13+
year_quarter = "Year and quarter of the date",
14+
year_quarter_number = "Quarter number within the year",
15+
quarter = "Quarter of the year (Q1, Q2, Q3, Q4)",
16+
year_month = "Year and month of the date",
17+
year_month_short = "Short year and month (e.g., 2023-05)",
18+
year_month_number = "Numeric representation of the year and month",
19+
month = "Month of the year (1-12)",
20+
month_short = "Short month name (e.g., Jan, Feb)",
21+
month_number = "Numeric month (1-12)",
22+
dayof_week = "Day of the week (e.g., Monday, Tuesday)",
23+
dayof_week_short = "Short form of the day of the week (e.g., Mon, Tue)",
24+
dayof_week_number = "Numeric day of the week (1 = Sunday, 7 = Saturday)",
25+
working_day = "Indicates if it's a working day (TRUE/FALSE)",
26+
working_day_number = "Numeric representation of the working day only (1 = Monday, 2=Tuesday, 0 = weekend)"
27+
)
28+
29+
# Example: Assuming 'dates' is your data frame
30+
labelled::var_labels(date) <- date_labels
31+
932

1033
usethis::use_data(date, overwrite = TRUE)

data-raw/fx.R

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,16 @@ dir <- "data-raw"
55

66
fx <- readr::read_csv(file.path(dir,"currencyexchange.csv"),name_repair = janitor::make_clean_names,show_col_types = FALSE)
77

8+
9+
fx_labels <- list(
10+
date = "Date of the exchange rate",
11+
from_currency = "Currency being exchanged from",
12+
to_currency = "Currency being exchanged to",
13+
exchange = "Exchange rate between the two currencies"
14+
)
15+
16+
# Example: Assuming 'exchange_data' is your data frame
17+
var_labels(fx) <- fx_labels
18+
19+
820
usethis::use_data(fx, overwrite = TRUE)

data-raw/order.R

Lines changed: 0 additions & 8 deletions
This file was deleted.

data-raw/orderrows.R

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,17 @@ dir <- "data-raw"
66
orderrows <- readr::read_csv(file.path(dir,"orderrows.csv"),name_repair = janitor::make_clean_names,show_col_types = FALSE)
77

88

9+
orderrows_labels <- list(
10+
order_key = "Unique order identifier",
11+
line_number = "Line number of the order",
12+
product_key = "Unique product identifier",
13+
quantity = "Quantity of the product ordered",
14+
unit_price = "Price per unit of the product",
15+
net_price = "Total price for the line item after any discounts",
16+
unit_cost = "Cost per unit of the product"
17+
)
18+
labelled::var_labels(orderrows) <- orderrows_labels
19+
20+
21+
922
usethis::use_data(orderrows, overwrite = TRUE)

data-raw/orders.R

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
## code to prepare `order` dataset goes here
2+
fpaR::create_data_raw()
3+
4+
dir <- "data-raw"
5+
6+
orders <- readr::read_csv(file.path(dir,"orders.csv"),name_repair = janitor::make_clean_names,show_col_types = FALSE)
7+
8+
9+
order_labels <- list(
10+
order_key = "Unique order identifier",
11+
customer_key = "Unique customer identifier",
12+
store_key = "Unique store identifier",
13+
order_date = "Date when the order was placed",
14+
delivery_date = "Actual delivery date",
15+
currency_code = "Currency code (e.g., USD, EUR)"
16+
)
17+
18+
# Example: Assuming 'orders' is your data frame
19+
20+
labelled::var_labels(orders) <- order_labels
21+
22+
usethis::use_data(orders, overwrite = TRUE)

0 commit comments

Comments
 (0)