Skip to content

Commit 5148777

Browse files
committed
feat: enhance session file parsing and metadata extraction
- Introduced a new function to extract text from content in session records. - Updated the parse_session_file function to handle various record types, improving metadata extraction for session ID, timestamp, and project realpath. - Enhanced error handling for malformed session lines and improved the handling of message content, including filtering out meta blocks. - Refactored the logic for retrieving session IDs from the latest session file, accommodating changes in the record structure.
1 parent d723b2a commit 5148777

File tree

1 file changed

+175
-65
lines changed

1 file changed

+175
-65
lines changed

src-tauri/src/services/session.rs

Lines changed: 175 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
use serde::{Deserialize, Serialize};
2+
use serde_json::Value;
23
use std::fs;
34
use std::path::Path;
45
use walkdir::WalkDir;
@@ -29,14 +30,20 @@ pub struct Conversation {
2930
pub project_realpath: Option<String>,
3031
}
3132

32-
#[derive(Debug, Deserialize)]
33-
struct SessionRecord {
34-
id: Option<String>,
35-
timestamp: Option<String>,
36-
#[serde(rename = "type", default)]
37-
message_type: Option<String>,
38-
role: Option<String>,
39-
content: Option<serde_json::Value>,
33+
fn extract_text_from_content(value: &Value) -> String {
34+
if let Some(array) = value.as_array() {
35+
array
36+
.iter()
37+
.filter_map(|entry| entry.as_object())
38+
.filter_map(|object| object.get("text"))
39+
.filter_map(|value| value.as_str())
40+
.collect::<Vec<_>>()
41+
.join("")
42+
} else if let Some(text) = value.as_str() {
43+
text.to_string()
44+
} else {
45+
String::new()
46+
}
4047
}
4148

4249
pub fn parse_session_file(content: &str, file_path: &Path) -> Option<Conversation> {
@@ -51,44 +58,131 @@ pub fn parse_session_file(content: &str, file_path: &Path) -> Option<Conversatio
5158
let mut project_realpath: Option<String> = None;
5259

5360
for line in &lines {
54-
if let Ok(record) = serde_json::from_str::<SessionRecord>(line) {
55-
// Get session metadata
56-
if record.id.is_some() && record.timestamp.is_some() {
57-
session_id = record.id;
58-
session_timestamp = record.timestamp;
61+
let record: Value = match serde_json::from_str(line) {
62+
Ok(value) => value,
63+
Err(error) => {
64+
eprintln!("Skipping malformed session line in {:?}: {}", file_path, error);
65+
continue;
66+
}
67+
};
68+
69+
let record_type = record
70+
.get("type")
71+
.and_then(|value| value.as_str())
72+
.unwrap_or_default();
73+
74+
if session_id.is_none() {
75+
if let Some(id) = record.get("id").and_then(|value| value.as_str()) {
76+
session_id = Some(id.to_string());
5977
}
78+
}
79+
80+
if session_timestamp.is_none() {
81+
if let Some(ts) = record.get("timestamp").and_then(|value| value.as_str()) {
82+
session_timestamp = Some(ts.to_string());
83+
}
84+
}
85+
86+
let payload = record.get("payload");
87+
88+
match record_type {
89+
"session_meta" => {
90+
if let Some(meta) = payload.and_then(|value| value.as_object()) {
91+
if let Some(id) = meta.get("id").and_then(|value| value.as_str()) {
92+
session_id = Some(id.to_string());
93+
}
6094

61-
// Parse messages (check for "type": "message")
62-
if record.message_type.as_deref() == Some("message")
63-
&& record.role.is_some()
64-
&& record.content.is_some()
65-
{
66-
let role = record.role.unwrap();
67-
let content_value = record.content.unwrap();
68-
69-
let content_text = if let Some(array) = content_value.as_array() {
70-
array
71-
.iter()
72-
.filter_map(|item| {
73-
if let Some(obj) = item.as_object() {
74-
if let Some(text) = obj.get("text") {
75-
text.as_str()
76-
} else {
77-
None
95+
if let Some(ts) = meta.get("timestamp").and_then(|value| value.as_str()) {
96+
session_timestamp = Some(ts.to_string());
97+
}
98+
99+
if project_realpath.is_none() {
100+
if let Some(cwd) = meta.get("cwd").and_then(|value| value.as_str()) {
101+
let trimmed = cwd.trim();
102+
if !trimmed.is_empty() {
103+
project_realpath = Some(trimmed.to_string());
104+
}
105+
}
106+
}
107+
}
108+
}
109+
"response_item" => {
110+
if let Some(item) = payload.and_then(|value| value.as_object()) {
111+
let payload_type = item
112+
.get("type")
113+
.and_then(|value| value.as_str())
114+
.unwrap_or_default();
115+
116+
if payload_type != "message" {
117+
continue;
118+
}
119+
120+
let role = item
121+
.get("role")
122+
.and_then(|value| value.as_str())
123+
.unwrap_or("user")
124+
.to_string();
125+
126+
let content_value = item.get("content").cloned().unwrap_or(Value::Null);
127+
128+
let content_text = extract_text_from_content(&content_value);
129+
130+
if project_realpath.is_none() {
131+
if content_text.contains("<environment_context>") && content_text.contains("<cwd>") {
132+
if let (Some(start), Some(end)) = (content_text.find("<cwd>"), content_text.find("</cwd>")) {
133+
if end > start + 5 {
134+
let start_idx = start + 5;
135+
let cwd = content_text[start_idx..end].trim();
136+
if !cwd.is_empty() {
137+
project_realpath = Some(cwd.to_string());
138+
}
78139
}
79-
} else {
80-
None
81140
}
82-
})
83-
.collect::<Vec<_>>()
84-
.join("")
85-
} else if let Some(text) = content_value.as_str() {
86-
text.to_string()
87-
} else {
88-
String::new()
89-
};
90-
91-
// Capture project cwd from environment_context (may be recorded as user message)
141+
}
142+
}
143+
144+
if !content_text.trim().is_empty() {
145+
let is_meta_block = content_text.contains("<user_instructions>")
146+
|| content_text.contains("<environment_context>");
147+
148+
let timestamp = record
149+
.get("timestamp")
150+
.and_then(|value| value.as_str())
151+
.and_then(|ts| chrono::DateTime::parse_from_rfc3339(ts).ok())
152+
.map(|dt| dt.timestamp_millis())
153+
.or_else(|| {
154+
session_timestamp.as_ref().and_then(|ts| {
155+
chrono::DateTime::parse_from_rfc3339(ts)
156+
.map(|dt| dt.timestamp_millis())
157+
.ok()
158+
})
159+
})
160+
.unwrap_or_else(|| chrono::Utc::now().timestamp_millis());
161+
162+
if !is_meta_block {
163+
let message_id_prefix = session_id.clone().unwrap_or_else(|| "unknown".to_string());
164+
messages.push(ChatMessage {
165+
id: format!("{}-{}-{}", message_id_prefix, role, timestamp),
166+
role,
167+
content: content_text.trim().to_string(),
168+
timestamp,
169+
});
170+
}
171+
}
172+
}
173+
}
174+
"message" => {
175+
let role = record
176+
.get("role")
177+
.and_then(|value| value.as_str())
178+
.unwrap_or("user")
179+
.to_string();
180+
181+
let content_text = record
182+
.get("content")
183+
.map(|value| extract_text_from_content(value))
184+
.unwrap_or_default();
185+
92186
if project_realpath.is_none() {
93187
if content_text.contains("<environment_context>") && content_text.contains("<cwd>") {
94188
if let (Some(start), Some(end)) = (content_text.find("<cwd>"), content_text.find("</cwd>")) {
@@ -104,33 +198,35 @@ pub fn parse_session_file(content: &str, file_path: &Path) -> Option<Conversatio
104198
}
105199

106200
if !content_text.trim().is_empty() {
107-
// Filter out meta/system blocks from transcript rendering, but still use them to extract metadata
108201
let is_meta_block = content_text.contains("<user_instructions>")
109202
|| content_text.contains("<environment_context>");
110203

111-
let timestamp = if let Some(ts) = &session_timestamp {
112-
chrono::DateTime::parse_from_rfc3339(ts)
113-
.map(|dt| dt.timestamp_millis())
114-
.unwrap_or_else(|_| chrono::Utc::now().timestamp_millis())
115-
} else {
116-
chrono::Utc::now().timestamp_millis()
117-
};
204+
let timestamp = record
205+
.get("timestamp")
206+
.and_then(|value| value.as_str())
207+
.and_then(|ts| chrono::DateTime::parse_from_rfc3339(ts).ok())
208+
.map(|dt| dt.timestamp_millis())
209+
.or_else(|| {
210+
session_timestamp.as_ref().and_then(|ts| {
211+
chrono::DateTime::parse_from_rfc3339(ts)
212+
.map(|dt| dt.timestamp_millis())
213+
.ok()
214+
})
215+
})
216+
.unwrap_or_else(|| chrono::Utc::now().timestamp_millis());
118217

119218
if !is_meta_block {
219+
let message_id_prefix = session_id.clone().unwrap_or_else(|| "unknown".to_string());
120220
messages.push(ChatMessage {
121-
id: format!(
122-
"{}-{}-{}",
123-
session_id.as_ref().unwrap_or(&"unknown".to_string()),
124-
role,
125-
timestamp
126-
),
221+
id: format!("{}-{}-{}", message_id_prefix, role, timestamp),
127222
role,
128223
content: content_text.trim().to_string(),
129224
timestamp,
130225
});
131226
}
132227
}
133228
}
229+
_ => {}
134230
}
135231
}
136232

@@ -276,15 +372,29 @@ pub async fn get_latest_session_id() -> Result<Option<String>, String> {
276372
if let Some((file_path, _)) = latest_file {
277373
// Read the first line to get session ID
278374
if let Ok(content) = fs::read_to_string(&file_path) {
279-
let first_line = content.lines().next().unwrap_or("");
280-
if let Ok(record) = serde_json::from_str::<SessionRecord>(first_line) {
281-
if let Some(id) = record.id {
282-
let full_session_id = if id.starts_with("codex-event-") {
283-
id
284-
} else {
285-
format!("codex-event-{}", id)
286-
};
287-
return Ok(Some(full_session_id));
375+
if let Some(first_line) = content.lines().next() {
376+
if let Ok(record) = serde_json::from_str::<Value>(first_line) {
377+
let mut id_value = record
378+
.get("id")
379+
.and_then(|value| value.as_str())
380+
.map(|s| s.to_string());
381+
382+
if id_value.is_none() {
383+
if let Some(payload) = record.get("payload").and_then(|value| value.as_object()) {
384+
if let Some(meta_id) = payload.get("id").and_then(|value| value.as_str()) {
385+
id_value = Some(meta_id.to_string());
386+
}
387+
}
388+
}
389+
390+
if let Some(id) = id_value {
391+
let full_session_id = if id.starts_with("codex-event-") {
392+
id
393+
} else {
394+
format!("codex-event-{}", id)
395+
};
396+
return Ok(Some(full_session_id));
397+
}
288398
}
289399
}
290400
}

0 commit comments

Comments
 (0)