Skip to content

Commit 147941f

Browse files
authored
feat: show commit date into check reports and add async GitHub parser
- Add showing commit date when comparing GitHub works; - Use sessions for interactions between GitHub; - Some logic optimizations and add async github parser module.
1 parent 4d4a0e5 commit 147941f

File tree

15 files changed

+1064
-410
lines changed

15 files changed

+1064
-410
lines changed

Makefile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
UTIL_VERSION := 0.3.4
1+
UTIL_VERSION := 0.3.5
22
UTIL_NAME := codeplag
33
PWD := $(shell pwd)
44

5-
BASE_DOCKER_VERSION := 1.1
5+
BASE_DOCKER_VERSION := 1.2
66
BASE_DOCKER_TAG := $(shell echo $(UTIL_NAME)-base-ubuntu20.04:$(BASE_DOCKER_VERSION) | tr A-Z a-z)
77
TEST_DOCKER_TAG := $(shell echo $(UTIL_NAME)-test-ubuntu20.04:$(UTIL_VERSION) | tr A-Z a-z)
88
DOCKER_TAG ?= $(shell echo $(UTIL_NAME)-ubuntu20.04:$(UTIL_VERSION) | tr A-Z a-z)

docker.mk

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,16 @@ docker-test: docker-test-image
2121
"$(TEST_DOCKER_TAG)"
2222

2323
docker-autotest: docker-test-image
24-
docker run --rm \
25-
--volume $(PWD)/test:/usr/src/$(UTIL_NAME)/test \
26-
--env-file .env \
27-
"$(TEST_DOCKER_TAG)" bash -c \
28-
"make && make autotest"
24+
@if [ $(shell find . -maxdepth 1 -type f -name .env | wc --lines) != 1 ]; then \
25+
echo "Requires '.env' file with provided GitHub token for running autotests."; \
26+
exit 200; \
27+
else \
28+
docker run --rm \
29+
--volume $(PWD)/test:/usr/src/$(UTIL_NAME)/test \
30+
--env-file .env \
31+
"$(TEST_DOCKER_TAG)" bash -c \
32+
"make && make autotest"; \
33+
fi
2934

3035
docker-build-package: docker-test-image
3136
docker run --rm \
@@ -50,7 +55,9 @@ docker-image: docker-base-image docker-test-image
5055
)
5156

5257
docker-run: docker-image
58+
@touch .env
5359
docker run --rm --tty --interactive \
60+
--env-file .env \
5461
"$(DOCKER_TAG)"
5562

5663
docker-rmi:

setup.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,9 @@
1515
'python-decouple~=3.6',
1616
'requests~=2.31.0',
1717
'typing-extensions~=4.3.0',
18+
'aiohttp~=3.8.5',
19+
'cachetools==5.3.1',
20+
'gidgethub~=5.3.0',
1821
]
1922
UTIL_NAME = os.getenv('UTIL_NAME')
2023
UTIL_VERSION = os.getenv('UTIL_VERSION')

src/codeplag/cplag/utils.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from codeplag.display import eprint
1212
from codeplag.getfeatures import AbstractGetter, get_files_path_from_directory
1313
from codeplag.types import ASTFeatures
14+
from webparsers.types import WorkInfo
1415

1516

1617
def get_cursor_from_file(filepath: Path,
@@ -83,20 +84,21 @@ def __init__(
8384
path_regexp=path_regexp
8485
)
8586

86-
def get_from_content(self, file_content: str, url_to_file: str) -> Optional[ASTFeatures]:
87+
def get_from_content(self, work_info: WorkInfo) -> Optional[ASTFeatures]:
8788
with open(FILE_DOWNLOAD_PATH, 'w', encoding='utf-8') as out_file:
88-
out_file.write(file_content)
89+
out_file.write(work_info.code)
8990
cursor = get_cursor_from_file(FILE_DOWNLOAD_PATH, COMPILE_ARGS)
90-
if not cursor:
91+
if cursor is None:
9192
self.logger.error(
92-
"Unsuccessfully attempt to get AST from the file %s.", url_to_file
93+
"Unsuccessfully attempt to get AST from the file %s.", work_info.link
9394
)
9495
return
9596

9697
# hook for correct filtering info while parsing source code
9798
features = get_features(cursor, FILE_DOWNLOAD_PATH)
9899
os.remove(FILE_DOWNLOAD_PATH)
99-
features.filepath = url_to_file
100+
features.filepath = work_info.link
101+
features.modify_date = work_info.commit.date
100102

101103
return features
102104

src/codeplag/display.py

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -99,12 +99,21 @@ def print_compare_result(features1: ASTFeatures,
9999

100100
print(" " * 40)
101101
print('+' * 40)
102-
print(
103-
'May be similar:',
104-
features1.filepath,
105-
features2.filepath,
106-
end='\n\n', sep='\n'
107-
)
102+
if features1.modify_date is not None and features2.modify_date is not None:
103+
message = (
104+
'-----\n'
105+
f'{features1.filepath}\n{features1.modify_date}\n'
106+
'-----\n'
107+
f'{features2.filepath}\n{features2.modify_date}\n'
108+
'-----\n'
109+
)
110+
else:
111+
message = (
112+
f'{features1.filepath}\n'
113+
f'{features2.filepath}\n'
114+
)
115+
116+
print('May be similar:', message, end='\n\n', sep='\n')
108117
main_metrics_df = pd.DataFrame(
109118
[compare_info.fast], index=['Similarity'],
110119
columns=pd.Index(

src/codeplag/getfeatures.py

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from typing import List, Literal, Optional, Union, overload
88

99
from decouple import Config, RepositoryEnv
10+
from requests import Session
1011

1112
from codeplag.consts import (
1213
ALL_EXTENSIONS,
@@ -18,6 +19,7 @@
1819
from codeplag.logger import get_logger
1920
from codeplag.types import ASTFeatures, Extension, Extensions
2021
from webparsers.github_parser import GitHubParser
22+
from webparsers.types import WorkInfo
2123

2224

2325
def get_files_path_from_directory(
@@ -101,7 +103,7 @@ def _set_access_token(self, env_path: Optional[Path]) -> None:
101103
self._access_token: str = os.environ.get('ACCESS_TOKEN', '')
102104
else:
103105
env_config = Config(RepositoryEnv(env_path))
104-
self._access_token: str = env_config.get('ACCESS_TOKEN', default='')
106+
self._access_token: str = env_config.get('ACCESS_TOKEN', default='') # type: ignore
105107

106108
if not self._access_token:
107109
self.logger.warning('GitHub access token is not defined.')
@@ -111,11 +113,12 @@ def _set_github_parser(self, branch_policy: bool) -> None:
111113
file_extensions=SUPPORTED_EXTENSIONS[self.extension],
112114
check_all=branch_policy,
113115
access_token=self._access_token,
114-
logger=get_logger('webparsers', LOG_PATH)
116+
logger=get_logger('webparsers', LOG_PATH),
117+
session=Session()
115118
)
116119

117120
@abstractmethod
118-
def get_from_content(self, file_content: str, url_to_file: str) -> Optional[ASTFeatures]:
121+
def get_from_content(self, work_info: WorkInfo) -> Optional[ASTFeatures]:
119122
...
120123

121124
@abstractmethod
@@ -165,8 +168,8 @@ def get_from_github_files(self, github_files: List[str]) -> List[ASTFeatures]:
165168

166169
self.logger.debug(f"{GET_FRAZE} GitHub urls")
167170
for github_file in github_files:
168-
file_content = self.github_parser.get_file_from_url(github_file)[0]
169-
features = self.get_from_content(file_content, github_file)
171+
work_info = self.github_parser.get_file_from_url(github_file)
172+
features = self.get_from_content(work_info)
170173
if features:
171174
works.append(features)
172175

@@ -200,8 +203,8 @@ def get_from_github_project_folders(
200203
gh_prj_files = self.github_parser.get_files_generator_from_dir_url(
201204
github_project, path_regexp=self.path_regexp
202205
)
203-
for file_content, url_file in gh_prj_files:
204-
features = self.get_from_content(file_content, url_file)
206+
for work_info in gh_prj_files:
207+
features = self.get_from_content(work_info)
205208
if features is None:
206209
continue
207210

@@ -251,8 +254,8 @@ def get_from_users_repos(
251254
files = self.github_parser.get_files_generator_from_repo_url(
252255
repo.html_url, path_regexp=self.path_regexp
253256
)
254-
for file_content, url_file in files:
255-
features = self.get_from_content(file_content, url_file)
257+
for work_info in files:
258+
features = self.get_from_content(work_info)
256259
if features is None:
257260
continue
258261

src/codeplag/pyplag/utils.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from codeplag.logger import get_logger
1010
from codeplag.pyplag.astwalkers import ASTWalker
1111
from codeplag.types import ASTFeatures
12+
from webparsers.types import WorkInfo
1213

1314
# TODO: Remove from globals
1415
logger = get_logger(__name__, LOG_PATH)
@@ -130,15 +131,16 @@ def __init__(
130131
path_regexp=path_regexp
131132
)
132133

133-
def get_from_content(self, file_content: str, url_to_file: str) -> Optional[ASTFeatures]:
134-
tree = get_ast_from_content(file_content, url_to_file)
134+
def get_from_content(self, work_info: WorkInfo) -> Optional[ASTFeatures]:
135+
tree = get_ast_from_content(work_info.code, work_info.link)
135136
if tree is not None:
136-
return get_features_from_ast(tree, url_to_file)
137+
features = get_features_from_ast(tree, work_info.link)
138+
features.modify_date = work_info.commit.date
139+
return features
137140

138141
self.logger.error(
139-
"Unsuccessfully attempt to get AST from the file %s.", url_to_file
142+
"Unsuccessfully attempt to get AST from the file %s.", work_info.link
140143
)
141-
return
142144

143145
def get_from_files(self, files: List[Path]) -> List[ASTFeatures]:
144146
if not files:

src/codeplag/types.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ class ASTFeatures:
4848
"""Class contains the source code metadata."""
4949

5050
filepath: Union[Path, str]
51+
modify_date: Optional[str] = None
5152

5253
count_of_nodes: int = 0
5354
head_nodes: List[str] = field(default_factory=list)
@@ -86,10 +87,13 @@ class CompareInfo(NamedTuple):
8687
structure: Optional[StructuresInfo] = None
8788

8889

90+
# TODO: Rework it structure
8991
class WorksReport(TypedDict):
9092
date: str
9193
first_path: str
9294
second_path: str
95+
first_modify_date: NotRequired[str]
96+
second_modify_date: NotRequired[str]
9397
first_heads: List[str]
9498
second_heads: List[str]
9599
fast: Dict[str, int] # dict from FastMetrics

src/codeplag/utils.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,10 @@ def save_result(self,
207207
fast=fast_metrics._asdict(),
208208
structure=struct_info_dict
209209
)
210+
if first_work.modify_date:
211+
report["first_modify_date"] = first_work.modify_date
212+
if second_work.modify_date:
213+
report["second_modify_date"] = second_work.modify_date
210214

211215
try:
212216
report_file = self.reports / f'{uuid.uuid4().hex}.json'

0 commit comments

Comments
 (0)