Skip to content

Commit daa57c5

Browse files
committed
working on julia
1 parent 9eec90d commit daa57c5

File tree

3 files changed

+107
-71
lines changed

3 files changed

+107
-71
lines changed

apps/openenv/julia_utils.py

Lines changed: 103 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -21,21 +21,59 @@ def get_julia_system_prompt() -> str:
2121
2222
Write a **single Julia function** that correctly solves the problem described below.
2323
24+
CRITICAL - Julia is NOT Python! Use correct Julia syntax:
25+
- Use `lowercase()` NOT `tolower()`
26+
- Use `uppercase()` NOT `upper()`
27+
- Use `reverse()` NOT `rev()` or `reversed()`
28+
- Use `parse(Int, x)` or `Int(x)` for type conversion, NOT `int(x)`
29+
- Use `string()` for string conversion, NOT `str()`
30+
- Use `filter()` NOT `subset()`
31+
- Use `length()` NOT `len()`
32+
- Use `push!()` to append to arrays, NOT `append()`
33+
- String indexing: `str[i]` returns a Char, use `str[i:i]` for single-char String
34+
- Arrays are 1-indexed, NOT 0-indexed
35+
- Use `println()` NOT `print()` for line output
36+
- Use `Dict()` NOT `dict()`
37+
- Boolean operators: `&&` for AND, `||` for OR, `!` for NOT
38+
- Check string contains: `occursin(needle, haystack)` NOT `in` or `contains(haystack, needle)`
39+
40+
Example - Convert string to uppercase and reverse:
41+
```julia
42+
function process_text(text::String)
43+
upper_text = uppercase(text) # NOT upper()
44+
reversed_text = reverse(upper_text) # NOT rev()
45+
return reversed_text
46+
end
47+
```
48+
49+
Example - Work with integers and arrays:
50+
```julia
51+
function sum_digits(n::Int)
52+
total = 0
53+
digits_arr = Int[] # Empty array
54+
while n > 0
55+
digit = n % 10
56+
push!(digits_arr, digit) # NOT append()
57+
total += digit
58+
n = div(n, 10)
59+
end
60+
return total
61+
end
62+
```
63+
2464
Rules:
25-
- The code must be syntactically correct and runnable as is.
26-
- Do not use arrow functions, ternary operators, or modern syntax that may cause issues.
27-
- Use only the Julia standard library.
28-
- Do **not** wrap the code in a module or add a `main` function.
29-
- Do **not** include any test code in your response.
30-
- Do **not** hardcode specific test cases or outputs — the function must work for general inputs.
31-
- The **function name must exactly match** the one used in the provided tests.
65+
- The code must be syntactically correct and runnable as is
66+
- Use only the Julia standard library
67+
- Do **not** wrap the code in a module or add a `main` function
68+
- Do **not** include any test code in your response
69+
- Do **not** hardcode specific test cases or outputs — the function must work for general inputs
70+
- The **function name must exactly match** the one used in the provided tests
3271
- Respond with **only the Julia function** and nothing else (no explanations, no comments, no extra text)
33-
- The function name must exactly match the one used in the provided tests.
34-
- Return only the Julia function.
35-
- character literal should not contain multiple characters.
36-
- take care of object types and mind that spaces matter in julia so cannot add random spaces
72+
- Character literal should not contain multiple characters
73+
- Take care of object types and mind that spaces matter in Julia
3774
3875
Passing tests and clean, compilable code are rewarded. Hardcoding or failing tests is penalized.
76+
3977
FORMAT YOUR RESPONSE AS:
4078
4179
```julia
@@ -141,6 +179,15 @@ def evaluate_julia_response(result, response: str, sample: Dict[str, Any]) -> fl
141179
print(code)
142180
print("-" * 80)
143181

182+
# Validate for common Python-like syntax errors
183+
is_valid, validation_warnings = validate_julia_syntax(code)
184+
if not is_valid:
185+
print("SYNTAX VALIDATION WARNINGS:")
186+
for warning in validation_warnings:
187+
print(f" {warning}")
188+
print("-" * 80)
189+
record_metric("reward/julia/syntax_warnings", len(validation_warnings), Reduce.SUM)
190+
144191
# Extract reward from result
145192
reward = result.reward if result.reward is not None else 0.0
146193

@@ -197,6 +244,51 @@ def extract_julia_code(response: str) -> str:
197244
return text.strip()
198245

199246

247+
def validate_julia_syntax(code: str) -> tuple[bool, list[str]]:
248+
"""
249+
Validate Julia code for common Python-like syntax errors.
250+
251+
Args:
252+
code: Julia code string to validate
253+
254+
Returns:
255+
Tuple of (is_valid, list of warning messages)
256+
"""
257+
warnings = []
258+
259+
# Common Python functions that don't exist in Julia
260+
python_functions = {
261+
r'\btolower\(': 'tolower() -> use lowercase()',
262+
r'\bupper\(': 'upper() -> use uppercase()',
263+
r'\brev\(': 'rev() -> use reverse()',
264+
r'\bint\(': 'int() -> use parse(Int, x) or Int(x)',
265+
r'\bstr\(': 'str() -> use string()',
266+
r'\blen\(': 'len() -> use length()',
267+
r'\bsubset\(': 'subset() -> use filter()',
268+
r'\bappend\(': 'append() -> use push!()',
269+
r'\bdict\(': 'dict() -> use Dict()',
270+
r'\breversed\(': 'reversed() -> use reverse()',
271+
r'\.append\(': '.append() -> use push!()',
272+
r'\.lower\(': '.lower() -> use lowercase()',
273+
r'\.upper\(': '.upper() -> use uppercase()',
274+
}
275+
276+
for pattern, suggestion in python_functions.items():
277+
if re.search(pattern, code, re.IGNORECASE):
278+
warnings.append(f"⚠ Found Python-like syntax: {suggestion}")
279+
280+
# Check for 0-indexing patterns (common Python mistake)
281+
if re.search(r'\[\s*0\s*\]', code):
282+
warnings.append("⚠ Found [0] indexing - Julia arrays are 1-indexed")
283+
284+
# Check for incomplete function definitions
285+
if 'function' in code and not re.search(r'\bend\b', code):
286+
warnings.append("⚠ Function missing 'end' keyword")
287+
288+
is_valid = len(warnings) == 0
289+
return is_valid, warnings
290+
291+
200292
def transform_julia_sample(sample: Dict[str, Any], tokenizer) -> Dict[str, Any] | None:
201293
"""
202294
Transform raw dataset sample into training format.

apps/openenv/llama3_8b_julia.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ batch_size: 2
88
max_req_tokens: 1024
99
max_res_tokens: 1024
1010
model: "meta-llama/Meta-Llama-3.1-8B-Instruct"
11-
off_by_n: 4
11+
off_by_n: 1
1212

1313
# Main loop configuration
1414
rollout_threads: 1

apps/openenv/main.py

Lines changed: 3 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -848,66 +848,9 @@ async def continuous_training():
848848
curr_policy_version=training_step
849849
)
850850
if batch is None:
851-
if sample_wait_start is None:
852-
sample_wait_start = time.time()
853-
logger.warning(
854-
f"[HANG DETECTION] Buffer returned None at step {training_step}. "
855-
f"Starting timeout timer ({sample_timeout_s}s)."
856-
)
857-
858-
consecutive_none_count += 1
859-
elapsed = time.time() - sample_wait_start
860-
861-
# Log diagnostic info every 30 seconds
862-
if consecutive_none_count % 300 == 0: # 300 * 0.1s = 30s
863-
buffer_size = await replay_buffer._numel.call_one()
864-
generator_versions = await policy.get_version.route()
865-
generator_version = generator_versions[0] if generator_versions else None
866-
logger.warning(
867-
f"[HANG DETECTION] Still waiting for samples (elapsed: {elapsed:.1f}s):\n"
868-
f" - Training step: {training_step}\n"
869-
f" - Buffer size: {buffer_size}\n"
870-
f" - Generator version: {generator_version}\n"
871-
f" - Required policy version: {training_step}\n"
872-
f" - max_policy_age: {cfg.replay_buffer.max_policy_age}\n"
873-
f" - Consecutive None returns: {consecutive_none_count}"
874-
)
875-
876-
# Timeout exceeded
877-
if elapsed > sample_timeout_s:
878-
buffer_size = await replay_buffer._numel.call_one()
879-
generator_versions = await policy.get_version.route()
880-
generator_version = generator_versions[0] if generator_versions else None
881-
error_msg = (
882-
f"\n{'='*80}\n"
883-
f"FATAL ERROR: Training loop hung waiting for buffer samples!\n"
884-
f"{'='*80}\n"
885-
f"Waited {elapsed:.1f}s for buffer.sample() to return data.\n\n"
886-
f"DIAGNOSIS:\n"
887-
f" - Training step: {training_step}\n"
888-
f" - Buffer size: {buffer_size}\n"
889-
f" - Generator version: {generator_version}\n"
890-
f" - Required policy version: {training_step}\n"
891-
f" - max_policy_age: {cfg.replay_buffer.max_policy_age}\n\n"
892-
f"LIKELY CAUSE:\n"
893-
f" If max_policy_age=0 and generator_version < training_step,\n"
894-
f" this is a race condition between weight updates and buffer sampling.\n"
895-
f" All episodes in the buffer have policy_version={generator_version},\n"
896-
f" but eviction requires policy_version >= {training_step - cfg.replay_buffer.max_policy_age}.\n\n"
897-
f"SOLUTIONS:\n"
898-
f" 1. Set 'off_by_n' (max_policy_age) to 1 or higher in your config\n"
899-
f" 2. Increase 'sample_timeout_s' if weight updates are very slow\n"
900-
f" 3. Use faster GPUs or reduce model size to speed up weight updates\n"
901-
f"{'='*80}\n"
902-
)
903-
logger.error(error_msg)
904-
raise RuntimeError(error_msg)
905-
906-
await asyncio.sleep(0.1)
851+
logger.debug("Running out of batch, now waiting")
852+
await asyncio.sleep(1)
907853
else:
908-
# Reset timeout tracking on successful sample
909-
sample_wait_start = None
910-
consecutive_none_count = 0
911854

912855
t.step("waiting_for_buffer")
913856

@@ -951,6 +894,7 @@ async def continuous_training():
951894
print("Training interrupted by user")
952895
except Exception as e:
953896
import traceback
897+
954898
print(f"Training failed with error: {e}")
955899
print("\nFull traceback:")
956900
traceback.print_exc()

0 commit comments

Comments
 (0)