-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathStatementAnalyzerDanskeBank.R
More file actions
123 lines (108 loc) · 4.42 KB
/
StatementAnalyzerDanskeBank.R
File metadata and controls
123 lines (108 loc) · 4.42 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
# Script to fetch, rinse&clean, analyse and show data regarding Danish bank
# statements from Danske Bank. The original script has been modernised to use
# tidyverse tooling, provide reusable helper functions and make it easier to
# tweak the configuration.
library(dplyr)
library(forcats)
library(ggplot2)
library(lubridate)
library(purrr)
library(readxl)
library(stringr)
# --------------------------------------------------------------------------- #
# Configuration
# --------------------------------------------------------------------------- #
# Update this constant to point to the desired statement export. The script
# expects the export produced by Danske Bank (CSV/Excel) with columns matching
# the structure used below.
statement_path <- "DanskeKonto-3618712532-20181107.xlsx"
# Mapping of spending categories to the keywords that should trigger them. The
# keywords are compiled to case-insensitive regular expressions, so feel free to
# add new entries without worrying about letter casing.
category_keywords <- list(
Transport = c("DSB", "Metrostation", "gomore", "Flixbus"),
Cash = c("ATM", "BANK", "client"),
Food = c("Foetex", "Bilka", "Netto", "7-?ELEVEN", "Irma", "Fakta", "ALDI"),
Flights = c("EASYJET", "RYANAIR", "AIRWAYS", "flights"),
Gas = c("oil", "tank", "F24", "Shell", "Circle K", "Bonus", "Gasoline", "UnoX"),
Rent = c("rent"),
Freetime = c("Boldklub", "McDonald", "Street Food", "Hotel", "Old Irish", "Sushi",
"Bar", "Temabar", "Burger", "King", "Fitness", "Shawarmahuset"),
Exceptional = c("payment", "fixing"),
Medical = c("Apotek", "medical", "Doctor", "Apotheke")
)
# --------------------------------------------------------------------------- #
# Helper functions
# --------------------------------------------------------------------------- #
#' Read and tidy the Danske Bank statement export.
#'
#' @param path Path to the Excel file produced by Danske Bank.
#' @return A tibble with parsed dates and months.
read_statement <- function(path) {
read_excel(
path,
col_types = c("text", "text", "numeric", "numeric", "text", "text")
) %>%
mutate(
Date = dmy(Date),
Month = floor_date(Date, unit = "month")
)
}
#' Categorise each transaction based on the provided keyword mapping.
#'
#' @param data A tibble containing at least a `Text` column.
#' @param keywords A named list mapping category names to keyword vectors.
#' @return The original tibble with an additional `Category` column.
categorise_transactions <- function(data, keywords) {
patterns <- map(keywords, ~ regex(str_c(.x, collapse = "|"), ignore_case = TRUE))
data %>%
mutate(
Category = map_chr(Text, function(text) {
safe_text <- coalesce(text, "")
matched <- detect(names(patterns), ~ str_detect(safe_text, patterns[[.x]]))
if (is.null(matched)) "Other" else matched
})
)
}
# --------------------------------------------------------------------------- #
# Data preparation
# --------------------------------------------------------------------------- #
statement <- statement_path %>%
read_statement() %>%
filter(Amount < 0) %>%
categorise_transactions(category_keywords) %>%
mutate(Amount = abs(Amount))
monthly_spend <- statement %>%
group_by(Month, Category) %>%
summarise(Total = sum(Amount), .groups = "drop")
average_spend <- monthly_spend %>%
group_by(Category) %>%
summarise(Average = mean(Total), .groups = "drop")
# --------------------------------------------------------------------------- #
# Visualisations
# --------------------------------------------------------------------------- #
ggplot(monthly_spend, aes(Month, Total, colour = Category)) +
facet_wrap(~Category, ncol = 2, scales = "free_y") +
geom_smooth(method = "loess", se = FALSE) +
geom_point() +
scale_y_continuous(labels = scales::label_number_si(prefix = "DKK ")) +
theme_minimal(base_size = 12) +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "none"
) +
labs(
x = NULL,
y = "Monthly total (DKK)",
title = "Monthly expenditure by category"
)
ggplot(average_spend, aes(x = fct_reorder(Category, Average), y = Average)) +
geom_col(fill = "#2C7FB8") +
coord_flip() +
scale_y_continuous(labels = scales::label_number_si(prefix = "DKK ")) +
theme_minimal(base_size = 12) +
labs(
x = NULL,
y = "Average monthly expense (DKK)",
title = "Average spend by category"
)