Skip to content

Commit 7a7b061

Browse files
committed
new scoring
1 parent 86f0c87 commit 7a7b061

File tree

8 files changed

+247
-75
lines changed

8 files changed

+247
-75
lines changed

psl_proof/__main__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,11 @@
1515
def load_config() -> Dict[str, Any]:
1616
"""Load proof configuration from environment variables."""
1717
config = {
18-
'dlp_id': 4,
18+
'dlp_id': 21,
1919
'input_dir': INPUT_DIR,
2020
'salt': '5EkntCWI',
2121
# 'validator_base_api_url': 'https://api.vana.genesis.dfusion.ai'
22-
'validator_base_api_url': ' https://27ef7490d36e.ngrok-free.app'
22+
'validator_base_api_url': 'https://b1858a42a344.ngrok-free.app'
2323
}
2424
logging.info(f"Using config: {json.dumps(config, indent=2)}")
2525
return config

psl_proof/models/cargo_data.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,8 +135,10 @@ def to_submission_json(self) :
135135
return json
136136

137137
def submission_by(self):
138+
# Bot always stores reference as 'telegram:{userId}' regardless of source
139+
# So normalize telegramMiner -> telegram for token verification
138140
source = self.source
139-
if (source == DataSource.telegramMiner):
141+
if source == DataSource.telegramMiner:
140142
source = DataSource.telegram
141143
return f"{source.name}:{self.user}"
142144

psl_proof/models/submission_dtos.py

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from typing import List, Dict, Any
1+
from typing import List, Dict, Any, Optional
22
from dataclasses import dataclass, field
33
from datetime import datetime
44

@@ -31,4 +31,30 @@ class SubmissionHistory:
3131
@dataclass
3232
class SubmitDataResponse:
3333
is_valid: bool
34-
error_text: str
34+
error_text: str
35+
36+
@dataclass
37+
class ChatEvaluationSummary:
38+
"""Summary of evaluation for a single chat"""
39+
source_chat_id: str
40+
message_count: int
41+
chat_quality: float
42+
chat_uniqueness: float
43+
44+
@dataclass
45+
class EvaluationDetails:
46+
"""Details of the evaluation"""
47+
total_messages: int
48+
unique_messages: int
49+
llm_reasoning: Optional[str] = None
50+
chat_summaries: List[ChatEvaluationSummary] = field(default_factory=list)
51+
52+
@dataclass
53+
class EvaluateSubmissionResponse:
54+
"""Response from the /api/submissions/evaluate endpoint"""
55+
is_valid: bool
56+
error_text: str
57+
quality: float = 0.0
58+
uniqueness: float = 0.0
59+
score: float = 0.0 # Final score: Quality × Uniqueness (multiplicative)
60+
details: Optional[EvaluationDetails] = None

psl_proof/models/verification_dtos.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,4 @@ class VerifyTokenResult:
77
is_valid: bool
88
error_text: str
99
proof_token: str
10+
cooldown_period_hours: float = 0 # Cooldown period from backend config

psl_proof/proof.py

Lines changed: 44 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,7 @@
88
from psl_proof.models.proof_response import ProofResponse
99
from psl_proof.utils.hashing_utils import salted_data, serialize_bloom_filter_base64, deserialize_bloom_filter_base64
1010
from psl_proof.models.cargo_data import SourceChatData, CargoData, SourceData, DataSource, MetaData, DataSource
11-
from psl_proof.utils.validate_data import validate_data, get_total_score
12-
from psl_proof.utils.submission import submit_data
11+
from psl_proof.utils.submission import submit_data, evaluate_submission
1312
from psl_proof.utils.verification import verify_token, VerifyTokenResult
1413
from psl_proof.models.submission_dtos import ChatHistory, SubmissionChat, SubmissionHistory
1514
from psl_proof.utils.submission import get_submission_historical_data
@@ -29,12 +28,13 @@ def generate(self) -> ProofResponse:
2928
current_timestamp = datetime.now(timezone.utc)
3029

3130
source_data = None
31+
raw_input_data = None # Keep raw data for evaluate endpoint
3232
for input_filename in os.listdir(self.config['input_dir']):
3333
input_file = os.path.join(self.config['input_dir'], input_filename)
3434
with open(input_file, 'r') as f:
35-
input_data = json.load(f)
35+
raw_input_data = json.load(f)
3636
source_data = get_source_data(
37-
input_data,
37+
raw_input_data,
3838
current_timestamp
3939
)
4040
break
@@ -50,11 +50,13 @@ def generate(self) -> ProofResponse:
5050
source_data
5151
)
5252
is_data_authentic = verify_result
53+
cooldown_period_hours = 4
5354
if is_data_authentic:
5455
#print(f"verify_result: {verify_result}")
5556
is_data_authentic = verify_result.is_valid
5657
proof_failed_reason = verify_result.error_text
5758
source_data.proof_token = verify_result.proof_token
59+
cooldown_period_hours = verify_result.cooldown_period_hours
5860

5961
cargo_data = CargoData(
6062
source_data = source_data,
@@ -73,11 +75,11 @@ def generate(self) -> ProofResponse:
7375
cargo_data.chat_histories = submission_history_data.chat_histories
7476
cargo_data.last_submission = submission_history_data.last_submission
7577

76-
cool_down_period = 4 # hours
78+
# Use cooldown period from backend config
7779
submission_time_elapsed = cargo_data.submission_time_elapsed()
78-
if is_data_authentic and cargo_data.last_submission and submission_time_elapsed < cool_down_period:
80+
if is_data_authentic and cargo_data.last_submission and submission_time_elapsed < cooldown_period_hours:
7981
is_data_authentic = False
80-
proof_failed_reason = f"Last submission was made within the past {cool_down_period} hours"
82+
proof_failed_reason = f"Last submission was made within the past {cooldown_period_hours} hours"
8183

8284
metadata = MetaData(
8385
source_id = source_user_hash_64,
@@ -102,54 +104,58 @@ def generate(self) -> ProofResponse:
102104
logging.info(f"ProofResponseAttributes: {json.dumps(self.proof_response.attributes, indent=2)}")
103105
return self.proof_response
104106

105-
#validate/proof data ...
106-
validate_data(
107+
# Call backend evaluate endpoint for quality and uniqueness scores
108+
# This replaces the local validate_data call
109+
evaluate_result = evaluate_submission(
107110
self.config,
108-
cargo_data,
109-
self.proof_response
111+
source_data,
112+
raw_input_data
110113
)
114+
115+
if not evaluate_result.is_valid:
116+
print(f"Evaluation failed: {evaluate_result.error_text}")
117+
self.proof_response.set_proof_is_invalid()
118+
self.proof_response.attributes = {
119+
'proof_valid': False,
120+
'proof_failed_reason': evaluate_result.error_text,
121+
'did_score_content': False,
122+
'source': source_data.source.name,
123+
'revision': data_revision,
124+
'submitted_on': current_timestamp.isoformat()
125+
}
126+
self.proof_response.metadata = metadata
127+
logging.info(f"ProofResponseAttributes: {json.dumps(self.proof_response.attributes, indent=2)}")
128+
return self.proof_response
111129

112-
maximum_score = 1
113-
reward_factor = 100 # Maximium VFSN, Max. reward per chat --> 1 VFSN.
114-
self.proof_response.quality = cargo_data.total_quality / reward_factor
115-
if (self.proof_response.quality > maximum_score):
116-
self.proof_response.quality = maximum_score
117-
118-
self.proof_response.uniqueness = cargo_data.total_uniqueness / reward_factor
119-
if (self.proof_response.uniqueness > maximum_score):
120-
self.proof_response.uniqueness = maximum_score
121-
#score data
122-
total_score = get_total_score(
123-
self.proof_response.quality,
124-
self.proof_response.uniqueness
125-
)
126-
print(f"Scores >> Quality: {self.proof_response.quality} | Uniqueness: {self.proof_response.uniqueness} | Total: {total_score}")
130+
# Use scores from backend evaluation (multiplicative formula: Quality × Uniqueness)
131+
maximum_score = 1.0
132+
self.proof_response.quality = min(evaluate_result.quality, maximum_score)
133+
self.proof_response.uniqueness = min(evaluate_result.uniqueness, maximum_score)
134+
135+
# Use backend's pre-calculated score directly (Quality × Uniqueness)
136+
total_score = min(evaluate_result.score, maximum_score)
137+
print(f"Scores >> Quality: {self.proof_response.quality:.4f} | Uniqueness: {self.proof_response.uniqueness:.4f} | Total: {total_score:.4f} (Q×U)")
127138

128-
minimum_score = 0.05 / reward_factor
129-
self.proof_response.valid = True # might other factor affect it
130-
self.proof_response.score = total_score
131-
if total_score < minimum_score:
132-
self.proof_response.score = minimum_score
133-
if total_score > maximum_score:
134-
self.proof_response.score = maximum_score
139+
minimum_score = 0.0005 # 0.05 / 100
140+
self.proof_response.valid = True
141+
self.proof_response.score = max(minimum_score, min(total_score, maximum_score))
135142

136-
print(f"Proof score: {self.proof_response.score }")
143+
print(f"Proof score: {self.proof_response.score}")
137144
self.proof_response.attributes = {
138145
'score': self.proof_response.score,
139146
'did_score_content': True,
140147
'source': source_data.source.name,
141148
'revision': data_revision,
142-
'submitted_on': current_timestamp.isoformat() #,
143-
#'chat_data': cargo_data.get_chat_list_data()
149+
'submitted_on': current_timestamp.isoformat()
144150
}
145151
self.proof_response.metadata = metadata
146152

147-
#Submit Source data to server
153+
# Submit source data to server (submission metadata only, raw data already stored by evaluate)
148154
submit_data_result = submit_data(
149155
self.config,
150156
source_data
151157
)
152-
if submit_data_result and not submit_data_result.is_valid :
158+
if submit_data_result and not submit_data_result.is_valid:
153159
logging.info(f"submit data failed: {submit_data_result.error_text}")
154160
self.proof_response.set_proof_is_invalid()
155161
self.proof_response.attributes.pop('score', None)
Lines changed: 26 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,26 @@
1-
from keybert import KeyBERT
2-
from transformers import pipeline
3-
4-
def get_keywords_keybert(chats):
5-
kw_model = KeyBERT(model="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
6-
keywords = kw_model.extract_keywords(chats)
7-
return keywords
8-
9-
def get_sentiment_data(chats):
10-
#Patrick_ToCheck this model do not work...
11-
#sentiment_analyzer = pipeline("sentiment-analysis", model="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
12-
sentiment_analyzer = pipeline("sentiment-analysis", model="cardiffnlp/twitter-xlm-roberta-base-sentiment-multilingual")
13-
14-
messages = chats.split(">") #TODO use real way to split out different messages
15-
#TODO: make sure no single message is too long for classification, can break it up if length too long
16-
sentiments = sentiment_analyzer(messages)
17-
category_scores = {"positive": 0, "neutral": 0, "negative": 0}
18-
category_counts = {"positive": 0, "neutral": 0, "negative": 0}
19-
for result in sentiments:
20-
label = result['label'].lower()
21-
category_scores[label] += result['score']
22-
category_counts[label] += 1
23-
# Normalize scores by dividing by the total number of messages
24-
total_messages = len(messages)
25-
normalized_scores = {key: (category_scores[key] / total_messages) for key in category_scores}
26-
return normalized_scores
1+
from keybert import KeyBERT
2+
from transformers import pipeline
3+
4+
def get_keywords_keybert(chats):
5+
kw_model = KeyBERT(model="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
6+
keywords = kw_model.extract_keywords(chats)
7+
return keywords
8+
9+
def get_sentiment_data(chats):
10+
#Patrick_ToCheck this model do not work...
11+
#sentiment_analyzer = pipeline("sentiment-analysis", model="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
12+
sentiment_analyzer = pipeline("sentiment-analysis", model="cardiffnlp/twitter-xlm-roberta-base-sentiment-multilingual")
13+
14+
messages = chats.split(">") #TODO use real way to split out different messages
15+
#TODO: make sure no single message is too long for classification, can break it up if length too long
16+
sentiments = sentiment_analyzer(messages)
17+
category_scores = {"positive": 0, "neutral": 0, "negative": 0}
18+
category_counts = {"positive": 0, "neutral": 0, "negative": 0}
19+
for result in sentiments:
20+
label = result['label'].lower()
21+
category_scores[label] += result['score']
22+
category_counts[label] += 1
23+
# Normalize scores by dividing by the total number of messages
24+
total_messages = len(messages)
25+
normalized_scores = {key: (category_scores[key] / total_messages) for key in category_scores}
26+
return normalized_scores

0 commit comments

Comments
 (0)