11"""MIT License.
22
3- Written 2025 by Stepan Pahomov, Daniil Lokosov
3+ Written 2025 by Stepan Pahomov, Daniil Lokosov, Artyom Semidolin.
44"""
55
66import atexit
7- from datetime import datetime
8- from typing import Final , NamedTuple
7+ from pathlib import Path
8+ from typing import Final
99
1010from pymongo import MongoClient
1111from pymongo .collection import Collection
2929 deserialize_compare_result_from_dict ,
3030 serialize_compare_result_to_dict ,
3131)
32- from codeplag .types import ASTFeatures , FullCompareInfo
32+ from codeplag .types import ASTFeatures , FullCompareInfo , Settings
3333
3434
3535class MongoDBConnection :
36- DB_NAME : Final [ str ] = f"{ UTIL_NAME } _cache"
36+ DB_NAME : Final = f"{ UTIL_NAME } _cache"
3737
3838 def __init__ (
3939 self : Self ,
@@ -56,22 +56,34 @@ def __init__(
5656 self .password : str = password
5757 self .url : str = f"mongodb://{ user } :{ password } @{ host } :{ port } /"
5858
59- # Connecting to MongoDB
6059 try :
6160 self .client = MongoClient (self .url , serverSelectionTimeoutMS = 3000 )
62- self .client .admin .command ("ping" ) # Checking the connection
61+ self .client .admin .command ("ping" )
6362 except ConnectionFailure as err :
6463 logger .error ("Failed to connect to MongoDB: %s" , err )
6564 raise Exception (
6665 "Can't connect to MongoDB with selected 'mongo'. Check your settings. "
6766 "Please note if the application is running in Docker, the host may change."
6867 ) from err
69- logger .debug ("Successfully connected to MongoDB! " )
68+ logger .debug ("Successfully connected to the MongoDB. " )
7069 self .db = self .client [self .DB_NAME ]
7170
7271 # Registering the disconnect method for execution upon program termination
7372 atexit .register (self .disconnect )
7473
74+ @classmethod
75+ def from_settings (
76+ cls : type ["MongoDBConnection" ], settings_conf : Settings
77+ ) -> "MongoDBConnection" :
78+ host = settings_conf .get ("mongo_host" , DEFAULT_MONGO_HOST )
79+ port = settings_conf .get ("mongo_port" , DEFAULT_MONGO_PORT )
80+ user = settings_conf .get ("mongo_user" , DEFAULT_MONGO_USER )
81+ password = settings_conf .get ("mongo_pass" )
82+ if password is None :
83+ raise ValueError ("'mongo' reports_exception provided, but 'mongo-pass' is missing" )
84+
85+ return cls (host = host , port = port , user = user , password = password )
86+
7587 def disconnect (self : Self ) -> None :
7688 """Close the connection to MongoDB.
7789
@@ -99,16 +111,7 @@ def clear_db(self: Self) -> None:
99111
100112
101113class ReportRepository :
102- class CompareInfoDocument (NamedTuple ):
103- """Compare Info Document structure."""
104-
105- first_sha256 : str
106- second_sha256 : str
107- first_modify_date : datetime
108- second_modify_date : datetime
109- compare_info : FullCompareInfo
110-
111- COLLECTION_NAME : str = "compare_info"
114+ COLLECTION_NAME : Final = "compare_info"
112115
113116 def __init__ (self : Self , mongo_connection : MongoDBConnection ) -> None :
114117 """Initialization of the repository for the compare_info collection."""
@@ -119,83 +122,57 @@ def __init__(self: Self, mongo_connection: MongoDBConnection) -> None:
119122 self .collection : Collection = collection
120123
121124 def get_compare_info (
122- self : Self , work1 : ASTFeatures , work2 : ASTFeatures
123- ) -> CompareInfoDocument | None :
125+ self : Self , first_filepath : str | Path , second_filepath : str | Path
126+ ) -> FullCompareInfo | None :
124127 """Retrieve comparison result between two files from the compare_info collection.
125128
126- The document is identified by sorted file paths:
127- _id = {"first": min(filepath), "second": max(filepath)}.
129+ The document is identified by sorted file paths.
130+
128131 Returns None if SHA-256 hashes of either file do not match stored values.
129132
130133 Args:
131- work1 (ASTFeatures ): First file metadata .
132- work2 (ASTFeatures ): Second file metadata .
134+ first_filepath (str | Path ): First filepath .
135+ second_filepath (str | path ): Second filepath .
133136
134137 Returns:
135- ReportType | None: Deserialized comparison result if found and valid.
138+ FullCompareInfo | None: Deserialized comparison result if found and valid.
136139 """
137140 # Sort works by filepath to form the unique key
138- work1 , work2 = sorted ([work1 , work2 ])
139- first_path , second_path = [str (work1 .filepath ), str (work2 .filepath )]
141+ first_path , second_path = sorted ([str (first_filepath ), str (second_filepath )])
140142 document_id = {"first" : first_path , "second" : second_path }
141-
142- # Find document in collection
143143 document = self .collection .find_one ({"_id" : document_id })
144144 if not document :
145145 logger .trace ("No compare_info found for file path: (%s, %s)" , first_path , second_path ) # type: ignore
146146 return None
147147 logger .trace ("Compare_info found for file path: (%s, %s)" , first_path , second_path ) # type: ignore
148148
149- # Deserialize and return compare_info
150- compare_info = deserialize_compare_result_from_dict (document ["compare_info" ])
151- return self .CompareInfoDocument (
152- first_sha256 = document ["first_sha256" ],
153- second_sha256 = document ["second_sha256" ],
154- first_modify_date = document ["first_modify_date" ],
155- second_modify_date = document ["second_modify_date" ],
156- compare_info = compare_info ,
157- )
149+ return deserialize_compare_result_from_dict (document )
158150
159- def write_compare_info (
160- self : Self , work1 : ASTFeatures , work2 : ASTFeatures , compare_info : FullCompareInfo
161- ) -> None :
151+ def write_compare_info (self : Self , compare_info : FullCompareInfo ) -> None :
162152 """Insert or update a document in the compare_info collection.
163153
164154 The primary key (_id) is formed as a dictionary with sorted file paths.
165155
166156 Args:
167- work1 (ASTFeatures): The first file for comparison.
168- work2 (ASTFeatures): The second file for comparison.
169157 compare_info (CompareInfo): Information about the comparison results.
170158 """
171- # Sorting paths to create a unique primary key
172- work1 , work2 = sorted ([work1 , work2 ])
173- first_path , second_path = [str (work1 .filepath ), str (work2 .filepath )]
174-
175- # Forming _id as a string of sorted paths
176- document_id = {"first" : first_path , "second" : second_path }
177-
178- # Using the serialize_compare_result_to_dict function to convert data
179- serialized_compare_info = serialize_compare_result_to_dict (compare_info )
180-
181- document = {
182- "_id" : document_id ,
183- "first_sha256" : work1 .sha256 ,
184- "second_sha256" : work2 .sha256 ,
185- "first_modify_date" : work1 .modify_date ,
186- "second_modify_date" : work2 .modify_date ,
187- "compare_info" : serialized_compare_info ,
159+ document_id = {
160+ "first" : str (compare_info .first_path ),
161+ "second" : str (compare_info .second_path ),
188162 }
163+ document = {"_id" : document_id , ** serialize_compare_result_to_dict (compare_info )}
189164
190165 # Insert or update the document
191166 self .collection .update_one ({"_id" : document_id }, {"$set" : document }, upsert = True )
192167 logger .trace ( # type: ignore
193- "Document for (%s, %s) successfully inserted/updated." , first_path , second_path
168+ "Document for (%s, %s) successfully inserted/updated." ,
169+ compare_info .first_path ,
170+ compare_info .second_path ,
194171 )
195172
196173
197174class FeaturesRepository :
198- COLLECTION_NAME : str = "features"
175+ COLLECTION_NAME : Final = "features"
199176
200177 def __init__ (self : Self , mongo_connection : MongoDBConnection ) -> None :
201178 """Initialization of the repository for the features collection."""
@@ -260,21 +237,14 @@ class MongoReporter(AbstractReporter):
260237 def __init__ (self : Self , repository : ReportRepository ) -> None :
261238 self .repository = repository
262239
263- def save_result (
264- self : Self ,
265- first_work : ASTFeatures ,
266- second_work : ASTFeatures ,
267- compare_info : FullCompareInfo ,
268- ) -> None :
240+ def save_result (self : Self , compare_info : FullCompareInfo ) -> None :
269241 """Updates the cache with new comparisons and writes it to the MongoDB.
270242
271243 Args:
272- first_work (ASTFeatures): Contains the first work metadata.
273- second_work (ASTFeatures): Contains the second work metadata.
274244 compare_info (CompareInfo): Contains information about comparisons
275245 between the first and second works.
276246 """
277- self .repository .write_compare_info (first_work , second_work , compare_info )
247+ self .repository .write_compare_info (compare_info )
278248
279249 def get_result (
280250 self : Self ,
@@ -287,14 +257,14 @@ def get_result(
287257 work1 (ASTFeatures): Contains the first work metadata.
288258 work2 (ASTFeatures): Contains the second work metadata.
289259 """
290- cache_val = self .repository .get_compare_info (work1 , work2 )
260+ cache_val = self .repository .get_compare_info (work1 . filepath , work2 . filepath )
291261
292262 if (
293263 cache_val
294264 and cache_val .first_sha256 == work1 .sha256
295265 and cache_val .second_sha256 == work2 .sha256
296266 ):
297- return cache_val . compare_info
267+ return cache_val
298268 else :
299269 return None
300270
0 commit comments