Skip to content

Commit 4ef6b05

Browse files
authored
Merge pull request #19 from grinya007/dev
Vim plugins => docker image, Dockerfile build stage removed, fixes
2 parents e0a5a44 + c5b736d commit 4ef6b05

File tree

3 files changed

+71
-42
lines changed

3 files changed

+71
-42
lines changed

Dockerfile

Lines changed: 34 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,54 +1,53 @@
1-
FROM postgres:14 AS base
2-
3-
RUN apt-get update && \
4-
apt-get install -y --no-install-recommends python3 python3-pip postgresql-plpython3-14 less vim && \
5-
apt-get remove --purge -y --auto-remove -o APT::AutoRemove::RecommendsImportant=false && \
6-
rm -rf /usr/share/man /tmp/* /var/cache/apt/archives/* /var/lib/apt/lists/* /var/lib/dpkg/info/*
1+
FROM postgres:14
2+
3+
RUN apt-get update \
4+
&& apt-get install -y --no-install-recommends \
5+
python3 \
6+
python3-pandas \
7+
python3-pip \
8+
python3-psycopg2 \
9+
postgresql-plpython3-14 \
10+
curl \
11+
git \
12+
less \
13+
vim \
14+
vim-airline \
15+
&& apt-get remove --purge -y --auto-remove -o APT::AutoRemove::RecommendsImportant=false \
16+
&& rm -rf /usr/share/man /tmp/* /var/cache/apt/archives/* /var/lib/apt/lists/* /var/lib/dpkg/info/*
717

818
RUN ln -s /usr/bin/python3 /usr/bin/python
919

10-
11-
FROM base AS build
12-
13-
RUN apt-get update && \
14-
apt-get install -y --no-install-recommends build-essential python3-dev postgresql-server-dev-14 && \
15-
apt-get remove --purge -y --auto-remove -o APT::AutoRemove::RecommendsImportant=false && \
16-
rm -rf /usr/share/man /tmp/* /var/cache/apt/archives/* /var/lib/apt/lists/* /var/lib/dpkg/info/*
17-
18-
19-
RUN mkdir /pip
20-
ENV PIP_TARGET=/pip
21-
ENV PYTHONPATH=/pip
22-
RUN pip install pandas psycopg2 pgcli
23-
24-
25-
FROM base
20+
RUN pip install pgcli
2621

2722
ENV EDITOR=vim
2823
ENV PAGER="less -S"
2924
ENV PYTHONPYCACHEPREFIX=/tmp
3025
ENV POSTGRES_HOST_AUTH_METHOD=trust
3126
ENV TABLES_META=/tmp/tables_meta.pickle
3227
ENV CSV_DIR=/csv
33-
ENV PIP_TARGET=/pip
34-
ENV PYTHONPATH=/pip
35-
ENV PATH="$PATH:/pip/bin"
36-
37-
RUN mkdir /pip
38-
RUN mkdir /csv
39-
RUN mkdir /src
40-
COPY src/* /src/
41-
WORKDIR /src
42-
43-
COPY --from=build /pip/ /pip/
4428

4529
USER postgres
4630
COPY db/init.sql /docker-entrypoint-initdb.d/
4731
RUN docker-entrypoint.sh
4832

4933
USER root
50-
RUN mkdir -p /root/.config/pgcli
51-
COPY db/pgcli_config /root/.config/pgcli/config
34+
ENV HOME=/root
35+
RUN mkdir -p $HOME/.config/pgcli
36+
COPY db/pgcli_config $HOME/.config/pgcli/config
37+
38+
# Vim plugins
39+
ENV GIT_SSL_NO_VERIFY=true
40+
RUN mkdir -p $HOME/.vim/pack/plugins/start
41+
RUN git clone --depth=1 https://github.com/nanotech/jellybeans.vim.git $HOME/.vim/pack/plugins/start/jellybeans
42+
RUN git clone --depth=1 https://github.com/ervandew/supertab.git $HOME/.vim/pack/plugins/start/supertab
43+
RUN git clone --depth=1 https://github.com/preservim/nerdcommenter.git $HOME/.vim/pack/plugins/start/nerdcommenter
44+
45+
COPY .vimrc $HOME/
46+
47+
RUN mkdir /csv
48+
RUN mkdir /src
49+
COPY src/* /src/
50+
WORKDIR /src
5251

5352
RUN python load.py --just-compile
5453
RUN python dump.py --just-compile

src/dump.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,14 @@ def dump(self, task: dict) -> None:
5151
with task['file'].open('w') as f:
5252
f.write(','.join(task['fields']) + "\n")
5353
conn.cursor().copy_expert(f"copy {task['copy_expr']} to stdout csv", f)
54-
print(f"Done dumping [{task['filename']}]")
55-
54+
if task['remove_newline']:
55+
with task['file'].open('rb+') as f:
56+
f.seek(-1, os.SEEK_END)
57+
if f.read(1) == b'\n':
58+
f.seek(-1, os.SEEK_END)
59+
f.truncate()
60+
61+
print(f"Done dumping [{task['filename']}]")
5662
conn.cursor().close()
5763
conn.close()
5864

src/load.py

Lines changed: 29 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
import sys
88

99
from time import time, sleep
10-
from pandas.core.frame import DataFrame
1110
from pandas.core.series import Series
1211
from pathlib import Path
1312
from psycopg2 import OperationalError
@@ -23,23 +22,35 @@
2322
CSV_DIR = os.getenv('CSV_DIR')
2423
TABLES_META = os.getenv('TABLES_META')
2524

25+
2626
def simplify_name(fname: str) -> str:
2727
fname = re.sub('\.csv$', '', fname)
2828
fname = re.sub('[^a-zA-Z0-9]', '_', fname)
2929
return fname.lower()
3030

31+
3132
def copy_expression(tname: str, fields: Series) -> str:
3233
tfields = list()
3334
for field, ftype in fields.items():
3435
tfield = simplify_name(field)
3536
if ftype == 'bool':
36-
tfields.append(f'case when "{tfield}" = true then \'True\' else \'False\' end as "{tfield}"')
37+
tfields.append(
38+
f'case when "{tfield}" = true then \'TRUE\''
39+
f' else \'FALSE\' end as "{tfield}"'
40+
)
41+
elif ftype == 'float64':
42+
tfields.append(
43+
f'case when "{tfield}"::text not like \'%.%\''
44+
f' then "{tfield}"::text||\'.0\''
45+
f' else "{tfield}"::text end as "{tfield}"'
46+
)
3747
else:
3848
tfields.append(f'"{tfield}"')
3949

4050
tfields = ','.join(tfields)
4151
return f"(select {tfields} from {tname} order by id)"
4252

53+
4354
def create_table(conn: connection, fname: str, fields: Series) -> dict:
4455
tname = simplify_name(fname)
4556
meta = {
@@ -48,6 +59,7 @@ def create_table(conn: connection, fname: str, fields: Series) -> dict:
4859
'filename': fname,
4960
'fields': [],
5061
'tablefields': [],
62+
'remove_newline': True,
5163
}
5264

5365
tfields = list()
@@ -61,9 +73,13 @@ def create_table(conn: connection, fname: str, fields: Series) -> dict:
6173

6274
tfields.append(f'"{tfield}" {DATA_TYPES[ftype.name]}')
6375

64-
conn.cursor().execute(f"create table {tname} (id serial, {','.join(tfields)})")
76+
conn.cursor().execute(
77+
f"create table {tname}"
78+
f" (id serial, {','.join(tfields)})"
79+
)
6580
return meta
6681

82+
6783
def load(conn: connection, csv_file: Path) -> dict:
6884
df = pd.read_csv(csv_file, iterator=True, chunksize=10000)
6985

@@ -76,7 +92,14 @@ def load(conn: connection, csv_file: Path) -> dict:
7692

7793
fields = ','.join([f'"{f}"' for f in meta['tablefields']])
7894
with csv_file.open('r') as f:
79-
conn.cursor().copy_expert(f"copy {meta['tablename']}({fields}) from stdin with header csv", f)
95+
conn.cursor().copy_expert(
96+
f"copy {meta['tablename']}({fields})"
97+
f" from stdin with header csv", f
98+
)
99+
with csv_file.open('rb') as f:
100+
f.seek(-1, os.SEEK_END)
101+
if f.read(1) == b'\n':
102+
meta['remove_newline'] = False
80103

81104
return meta
82105

@@ -86,7 +109,8 @@ def load(conn: connection, csv_file: Path) -> dict:
86109
parser.add_argument(
87110
"--just-compile",
88111
action="store_true",
89-
help="Don't do actual load but just compile the script to speedup the first load",
112+
help="Don't do actual load but just compile"
113+
" the script to speedup the first load",
90114
)
91115
args = parser.parse_args()
92116
if args.just_compile:

0 commit comments

Comments
 (0)