Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/check_n_push_image.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ jobs:
pip install $(python3 setup.py --install-requirements)
pip install $(python3 setup.py --build-requirements)
pip install --requirement docs/notebooks/requirements.txt
pip install pre-commit==3.4.0
pip install pre-commit==4.1.0
make pre-commit

docker-build-test-autotest:
Expand Down
4 changes: 2 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ default_language_version:
python: python3.10
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.6.4
rev: v0.9.7
hooks:
- id: ruff
args: [ --fix ]
Expand All @@ -15,4 +15,4 @@ repos:
language: node
pass_filenames: false
types: [ python ]
additional_dependencies: [ '[email protected].305' ]
additional_dependencies: [ '[email protected].394' ]
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
UTIL_VERSION := 0.5.12
UTIL_VERSION := 0.5.13
UTIL_NAME := codeplag
PWD := $(shell pwd)

USER_UID ?= $(shell id --user)
USER_GID ?= $(shell id --group)

BASE_DOCKER_VERSION := 1.0
BASE_DOCKER_VERSION := 1.1
BASE_DOCKER_TAG := $(shell echo $(UTIL_NAME)-base-ubuntu22.04:$(BASE_DOCKER_VERSION) | tr A-Z a-z)
TEST_DOCKER_TAG := $(shell echo $(UTIL_NAME)-test-ubuntu22.04:$(UTIL_VERSION) | tr A-Z a-z)
DOCKER_TAG ?= $(shell echo $(UTIL_NAME)-ubuntu22.04:$(UTIL_VERSION) | tr A-Z a-z)
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@

- Testing for analyzers with pytest lib (required preinstalled pytest framework).
```
$ pip3 install pytest==7.4.0 pytest-mock==3.11.1
$ pip3 install pytest==8.3.4 pytest-mock==3.14.0
$ make test
```

Expand Down
2 changes: 1 addition & 1 deletion docker/test_ubuntu2204.dockerfile.in
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ ENV DEBIAN_FRONTEND=noninteractive

RUN apt-get update
RUN apt-get install -y debhelper
RUN pip3 install pytest==7.4.0 pytest-mock==3.11.1 @PYTHON_BUILD_LIBS@
RUN pip3 install pytest==8.3.4 pytest-mock==3.14.0 @PYTHON_BUILD_LIBS@
RUN mkdir -p @LOGS_PATH@

# TODO: Move to middle docker file or make another solution
Expand Down
8 changes: 4 additions & 4 deletions docs/notebooks/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
matplotlib~=3.7.3
numpy~=1.23.5
pandas~=2.0.3
matplotlib~=3.10.0
numpy~=1.26.4
pandas~=2.2.3
python-decouple~=3.8
scipy~=1.10.1
scipy~=1.15.2
54 changes: 39 additions & 15 deletions docs/notebooks/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os
import re
import sys
from datetime import datetime
from time import perf_counter
from typing import Literal
Expand Down Expand Up @@ -34,28 +35,32 @@ def remove_unnecessary_blank_lines(source_code: str) -> str:
return re.sub(pattern, "\n", source_code)


def get_data_from_dir(path: str = "./data", max_count_lines: int | None = None) -> pd.DataFrame:
def get_data_from_dir(
path: str = "./data", max_count_lines: int | None = None
) -> pd.DataFrame | None:
df = pd.DataFrame()
for filename in os.listdir(path):
if not re.search(r".csv$", filename):
continue

tmp_df = pd.read_csv(os.path.join(path, filename), sep=";", index_col=0)
tmp_df = pd.read_csv(os.path.join(path, filename), sep=";", index_col=0) # type: ignore
df = df.append(tmp_df, ignore_index=True)

if max_count_lines:
return df[df.count_lines_without_blank_lines < max_count_lines]
result = df[df.count_lines_without_blank_lines < max_count_lines]
assert isinstance(result, pd.DataFrame) or result is None
return result

return df


def save_works_from_repo_url(url: str, check_policy: bool = True) -> None:
def save_works_from_repo_url(url: str, check_policy: bool = True, min_lines: int = 5) -> None:
current_repo_name = url.split("/")[-1]
env_config = Config(RepositoryEnv("../../.env"))
gh = GitHubParser(
file_extensions=(re.compile(r".py$"),),
check_all=check_policy,
access_token=env_config.get("ACCESS_TOKEN"),
access_token=env_config.get("ACCESS_TOKEN", default=""), # type: ignore
)
files = list(gh.get_files_generator_from_repo_url(url))
files = [(remove_unnecessary_blank_lines(file.code), file.link) for file in files]
Expand All @@ -76,22 +81,34 @@ def save_works_from_repo_url(url: str, check_policy: bool = True) -> None:
],
}
)
df = df[df["count_lines_without_blank_lines"] > 5]
filtered_df = df["count_lines_without_blank_lines"]
assert filtered_df is not None
df = df[filtered_df > min_lines]
if df is None:
print(f"Nothing to save with minimal count of lines '{min_lines}'.", file=sys.stderr)
return
df.to_csv(os.path.join("./data/", current_repo_name + ".csv"), sep=";")


def get_time_to_meta(df: pd.DataFrame, iterations: int = 10) -> pd.DataFrame:
count_lines = []
to_meta_time = []
for index, content in df[["content", "link", "count_lines_without_blank_lines"]].iterrows():
filtered_df = df[["content", "link", "count_lines_without_blank_lines"]]
if filtered_df is None:
raise Exception("DataFrame is empty, nothing to parse.")
for index, content in filtered_df.iterrows():
code = content[0]
filepath = content[1]
assert isinstance(code, str)
assert isinstance(filepath, str)
print(index, " " * 20, end="\r")
for _ in range(iterations):
tree = get_ast_from_content(content[0], content[1])
tree = get_ast_from_content(code, filepath)
if tree is None:
break
try:
start = perf_counter()
get_features_from_ast(tree, content[1])
get_features_from_ast(tree, filepath)
end = perf_counter() - start
to_meta_time.append(end)
count_lines.append(content[2])
Expand Down Expand Up @@ -130,7 +147,7 @@ def plot_and_save_result(
p = np.poly1d(z)
plt.plot(unique_count_lines, p(unique_count_lines), "r--", label="Линейный тренд.")
elif trend == "n^2":
popt_cons, _ = curve_fit(
popt_cons, _ = curve_fit( # type: ignore
square_func,
unique_count_lines,
mean_times,
Expand All @@ -144,7 +161,7 @@ def plot_and_save_result(
label="Квадратичный тренд.",
)
elif trend == "n^3":
popt_cons, _ = curve_fit(
popt_cons, _ = curve_fit( # type: ignore
cube_func,
unique_count_lines,
mean_times,
Expand All @@ -156,7 +173,7 @@ def plot_and_save_result(
p = np.poly1d(popt_cons)
plt.plot(unique_count_lines, p(unique_count_lines), "r--", label="Кубический тренд.")
elif trend == "n^4":
popt_cons, _ = curve_fit(
popt_cons, _ = curve_fit( # type: ignore
quart_func,
unique_count_lines,
mean_times,
Expand Down Expand Up @@ -200,14 +217,21 @@ def get_time_algorithms(
raise Exception("Unexpected error when parsing first work.")

features1 = get_features_from_ast(tree1, work.link)
for index, content in df[["content", "link", "count_lines_without_blank_lines"]].iterrows():
filtered_df = df[["content", "link", "count_lines_without_blank_lines"]]
if filtered_df is None:
raise Exception("DataFrame is empty, nothing to parse.")
for index, content in filtered_df.iterrows():
code = content[0]
filepath = content[1]
assert isinstance(code, str)
assert isinstance(filepath, str)
for _ in range(iterations):
print(index, " " * 20, end="\r")
tree2 = get_ast_from_content(content[0], content[1])
tree2 = get_ast_from_content(code, filepath)
if tree2 is None:
continue
try:
features2 = get_features_from_ast(tree2, content[1])
features2 = get_features_from_ast(tree2, filepath)
except Exception:
continue

Expand Down
18 changes: 9 additions & 9 deletions locales/codeplag.pot
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
#, fuzzy
msgid ""
msgstr ""
"Project-Id-Version: codeplag 0.5.12\n"
"POT-Creation-Date: 2025-01-03 14:06+0300\n"
"Project-Id-Version: codeplag 0.5.13\n"
"POT-Creation-Date: 2025-02-25 22:01+0300\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: Artyom Semidolin\n"
"Language-Team: LANGUAGE <[email protected]>\n"
Expand Down Expand Up @@ -190,33 +190,33 @@ msgid ""
"languages."
msgstr ""

#: src/codeplag/codeplagcli.py:366
#: src/codeplag/codeplagcli.py:365
msgid "Print current version number and exit."
msgstr ""

#: src/codeplag/codeplagcli.py:372
#: src/codeplag/codeplagcli.py:371
msgid "Commands help."
msgstr ""

#: src/codeplag/codeplagcli.py:387
#: src/codeplag/codeplagcli.py:386
msgid "No command is provided; please choose one from the available (--help)."
msgstr ""

#: src/codeplag/codeplagcli.py:398
#: src/codeplag/codeplagcli.py:397
msgid "There is nothing to modify; please provide at least one argument."
msgstr ""

#: src/codeplag/codeplagcli.py:402
#: src/codeplag/codeplagcli.py:401
msgid "The'repo-regexp' option requires the provided 'github-user' option."
msgstr ""

#: src/codeplag/codeplagcli.py:410
#: src/codeplag/codeplagcli.py:409
msgid ""
"The'path-regexp' option requires the provided 'directories', 'github-"
"user', or 'github-project-folder' options."
msgstr ""

#: src/codeplag/codeplagcli.py:421 src/codeplag/handlers/report.py:95
#: src/codeplag/codeplagcli.py:420 src/codeplag/handlers/report.py:95
msgid "All paths must be provided."
msgstr ""

Expand Down
16 changes: 8 additions & 8 deletions locales/translations/en/LC_MESSAGES/codeplag.po
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#
msgid ""
msgstr ""
"Project-Id-Version: codeplag 0.5.12\n"
"Project-Id-Version: codeplag 0.5.13\n"
"POT-Creation-Date: 2024-05-21 09:28+0300\n"
"PO-Revision-Date: 2024-05-16 19:15+0300\n"
"Last-Translator: Artyom Semidolin\n"
Expand Down Expand Up @@ -214,35 +214,35 @@ msgstr ""
"Program help to find similar parts of source codes for the different "
"languages."

#: src/codeplag/codeplagcli.py:366
#: src/codeplag/codeplagcli.py:365
msgid "Print current version number and exit."
msgstr "Print current version number and exit."

#: src/codeplag/codeplagcli.py:372
#: src/codeplag/codeplagcli.py:371
msgid "Commands help."
msgstr "Commands help."

#: src/codeplag/codeplagcli.py:387
#: src/codeplag/codeplagcli.py:386
msgid "No command is provided; please choose one from the available (--help)."
msgstr "No command is provided; please choose one from the available (--help)."

#: src/codeplag/codeplagcli.py:398
#: src/codeplag/codeplagcli.py:397
msgid "There is nothing to modify; please provide at least one argument."
msgstr "There is nothing to modify; please provide at least one argument."

#: src/codeplag/codeplagcli.py:402
#: src/codeplag/codeplagcli.py:401
msgid "The'repo-regexp' option requires the provided 'github-user' option."
msgstr "The'repo-regexp' option requires the provided 'github-user' option."

#: src/codeplag/codeplagcli.py:410
#: src/codeplag/codeplagcli.py:409
msgid ""
"The'path-regexp' option requires the provided 'directories', 'github-"
"user', or 'github-project-folder' options."
msgstr ""
"The'path-regexp' option requires the provided 'directories', 'github-"
"user', or 'github-project-folder' options."

#: src/codeplag/codeplagcli.py:421 src/codeplag/handlers/report.py:95
#: src/codeplag/codeplagcli.py:420 src/codeplag/handlers/report.py:95
msgid "All paths must be provided."
msgstr "All or none of the root paths must be specified."

Expand Down
16 changes: 8 additions & 8 deletions locales/translations/ru/LC_MESSAGES/codeplag.po
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#
msgid ""
msgstr ""
"Project-Id-Version: codeplag 0.5.12\n"
"Project-Id-Version: codeplag 0.5.13\n"
"POT-Creation-Date: 2024-05-21 09:28+0300\n"
"PO-Revision-Date: 2024-05-11 12:05+0300\n"
"Last-Translator: Artyom Semidolin\n"
Expand Down Expand Up @@ -224,39 +224,39 @@ msgstr ""
"Программа помогает находить схожие части исходных кодов для разных языков"
" программирования."

#: src/codeplag/codeplagcli.py:366
#: src/codeplag/codeplagcli.py:365
msgid "Print current version number and exit."
msgstr "Выводит текущую версию программы."

#: src/codeplag/codeplagcli.py:372
#: src/codeplag/codeplagcli.py:371
msgid "Commands help."
msgstr "Справка по командам."

#: src/codeplag/codeplagcli.py:387
#: src/codeplag/codeplagcli.py:386
msgid "No command is provided; please choose one from the available (--help)."
msgstr ""
"Ни одна из команд не выбрана, пожалуйста, выбери одну из доступных команд"
" (--help)."

#: src/codeplag/codeplagcli.py:398
#: src/codeplag/codeplagcli.py:397
msgid "There is nothing to modify; please provide at least one argument."
msgstr ""
"Нечего модифицировать, пожалуйста, выберите один из параметров для "
"модификации."

#: src/codeplag/codeplagcli.py:402
#: src/codeplag/codeplagcli.py:401
msgid "The'repo-regexp' option requires the provided 'github-user' option."
msgstr "Аргумент 'repo-regexp' требует заданного параметра 'github-user'."

#: src/codeplag/codeplagcli.py:410
#: src/codeplag/codeplagcli.py:409
msgid ""
"The'path-regexp' option requires the provided 'directories', 'github-"
"user', or 'github-project-folder' options."
msgstr ""
"Аргумент 'path-regexp' требует заданного параметра 'directories', "
"'github-user' или 'github-project-folder'."

#: src/codeplag/codeplagcli.py:421 src/codeplag/handlers/report.py:95
#: src/codeplag/codeplagcli.py:420 src/codeplag/handlers/report.py:95
msgid "All paths must be provided."
msgstr "Необходимо указать все корневые пути или не указывать ни одного."

Expand Down
Loading
Loading