working on julia

wukaixingxp · wukaixingxp · commit daa57c5e0746 · 2025-11-11T21:37:04.000-08:00
diff --git a/apps/openenv/julia_utils.py b/apps/openenv/julia_utils.py
@@ -21,21 +21,59 @@ def get_julia_system_prompt() -> str:
 
 Write a **single Julia function** that correctly solves the problem described below.
 
+CRITICAL - Julia is NOT Python! Use correct Julia syntax:
+- Use `lowercase()` NOT `tolower()`
+- Use `uppercase()` NOT `upper()`
+- Use `reverse()` NOT `rev()` or `reversed()`
+- Use `parse(Int, x)` or `Int(x)` for type conversion, NOT `int(x)`
+- Use `string()` for string conversion, NOT `str()`
+- Use `filter()` NOT `subset()`
+- Use `length()` NOT `len()`
+- Use `push!()` to append to arrays, NOT `append()`
+- String indexing: `str[i]` returns a Char, use `str[i:i]` for single-char String
+- Arrays are 1-indexed, NOT 0-indexed
+- Use `println()` NOT `print()` for line output
+- Use `Dict()` NOT `dict()`
+- Boolean operators: `&&` for AND, `||` for OR, `!` for NOT
+- Check string contains: `occursin(needle, haystack)` NOT `in` or `contains(haystack, needle)`
+
+Example - Convert string to uppercase and reverse:
+```julia
+function process_text(text::String)
+    upper_text = uppercase(text)  # NOT upper()
+    reversed_text = reverse(upper_text)  # NOT rev()
+    return reversed_text
+end
+```
+
+Example - Work with integers and arrays:
+```julia
+function sum_digits(n::Int)
+    total = 0
+    digits_arr = Int[]  # Empty array
+    while n > 0
+        digit = n % 10
+        push!(digits_arr, digit)  # NOT append()
+        total += digit
+        n = div(n, 10)
+    end
+    return total
+end
+```
+
 Rules:
-- The code must be syntactically correct and runnable as is.
-- Do not use arrow functions, ternary operators, or modern syntax that may cause issues.
-- Use only the Julia standard library.
-- Do **not** wrap the code in a module or add a `main` function.
-- Do **not** include any test code in your response.
-- Do **not** hardcode specific test cases or outputs — the function must work for general inputs.
-- The **function name must exactly match** the one used in the provided tests.
+- The code must be syntactically correct and runnable as is
+- Use only the Julia standard library
+- Do **not** wrap the code in a module or add a `main` function
+- Do **not** include any test code in your response
+- Do **not** hardcode specific test cases or outputs — the function must work for general inputs
+- The **function name must exactly match** the one used in the provided tests
 - Respond with **only the Julia function** and nothing else (no explanations, no comments, no extra text)
-- The function name must exactly match the one used in the provided tests.
-- Return only the Julia function.
-- character literal should not contain multiple characters.
-- take care of object types and mind that spaces matter in julia so cannot add random spaces
+- Character literal should not contain multiple characters
+- Take care of object types and mind that spaces matter in Julia
 
 Passing tests and clean, compilable code are rewarded. Hardcoding or failing tests is penalized.
+
 FORMAT YOUR RESPONSE AS:
 
 ```julia
@@ -141,6 +179,15 @@ def evaluate_julia_response(result, response: str, sample: Dict[str, Any]) -> fl
         print(code)
         print("-" * 80)
 
+        # Validate for common Python-like syntax errors
+        is_valid, validation_warnings = validate_julia_syntax(code)
+        if not is_valid:
+            print("SYNTAX VALIDATION WARNINGS:")
+            for warning in validation_warnings:
+                print(f"  {warning}")
+            print("-" * 80)
+            record_metric("reward/julia/syntax_warnings", len(validation_warnings), Reduce.SUM)
+
         # Extract reward from result
         reward = result.reward if result.reward is not None else 0.0
 
@@ -197,6 +244,51 @@ def extract_julia_code(response: str) -> str:
     return text.strip()
 
 
+def validate_julia_syntax(code: str) -> tuple[bool, list[str]]:
+    """
+    Validate Julia code for common Python-like syntax errors.
+
+    Args:
+        code: Julia code string to validate
+
+    Returns:
+        Tuple of (is_valid, list of warning messages)
+    """
+    warnings = []
+
+    # Common Python functions that don't exist in Julia
+    python_functions = {
+        r'\btolower\(': 'tolower() -> use lowercase()',
+        r'\bupper\(': 'upper() -> use uppercase()',
+        r'\brev\(': 'rev() -> use reverse()',
+        r'\bint\(': 'int() -> use parse(Int, x) or Int(x)',
+        r'\bstr\(': 'str() -> use string()',
+        r'\blen\(': 'len() -> use length()',
+        r'\bsubset\(': 'subset() -> use filter()',
+        r'\bappend\(': 'append() -> use push!()',
+        r'\bdict\(': 'dict() -> use Dict()',
+        r'\breversed\(': 'reversed() -> use reverse()',
+        r'\.append\(': '.append() -> use push!()',
+        r'\.lower\(': '.lower() -> use lowercase()',
+        r'\.upper\(': '.upper() -> use uppercase()',
+    }
+
+    for pattern, suggestion in python_functions.items():
+        if re.search(pattern, code, re.IGNORECASE):
+            warnings.append(f"⚠ Found Python-like syntax: {suggestion}")
+
+    # Check for 0-indexing patterns (common Python mistake)
+    if re.search(r'\[\s*0\s*\]', code):
+        warnings.append("⚠ Found [0] indexing - Julia arrays are 1-indexed")
+
+    # Check for incomplete function definitions
+    if 'function' in code and not re.search(r'\bend\b', code):
+        warnings.append("⚠ Function missing 'end' keyword")
+
+    is_valid = len(warnings) == 0
+    return is_valid, warnings
+
+
 def transform_julia_sample(sample: Dict[str, Any], tokenizer) -> Dict[str, Any] | None:
     """
     Transform raw dataset sample into training format.
diff --git a/apps/openenv/llama3_8b_julia.yaml b/apps/openenv/llama3_8b_julia.yaml
@@ -8,7 +8,7 @@ batch_size: 2
 max_req_tokens: 1024
 max_res_tokens: 1024
 model: "meta-llama/Meta-Llama-3.1-8B-Instruct"
-off_by_n: 4
+off_by_n: 1
 
 # Main loop configuration
 rollout_threads: 1
diff --git a/apps/openenv/main.py b/apps/openenv/main.py
@@ -848,66 +848,9 @@ async def continuous_training():
                 curr_policy_version=training_step
             )
             if batch is None:
-                if sample_wait_start is None:
-                    sample_wait_start = time.time()
-                    logger.warning(
-                        f"[HANG DETECTION] Buffer returned None at step {training_step}. "
-                        f"Starting timeout timer ({sample_timeout_s}s)."
-                    )
-
-                consecutive_none_count += 1
-                elapsed = time.time() - sample_wait_start
-
-                # Log diagnostic info every 30 seconds
-                if consecutive_none_count % 300 == 0:  # 300 * 0.1s = 30s
-                    buffer_size = await replay_buffer._numel.call_one()
-                    generator_versions = await policy.get_version.route()
-                    generator_version = generator_versions[0] if generator_versions else None
-                    logger.warning(
-                        f"[HANG DETECTION] Still waiting for samples (elapsed: {elapsed:.1f}s):\n"
-                        f"  - Training step: {training_step}\n"
-                        f"  - Buffer size: {buffer_size}\n"
-                        f"  - Generator version: {generator_version}\n"
-                        f"  - Required policy version: {training_step}\n"
-                        f"  - max_policy_age: {cfg.replay_buffer.max_policy_age}\n"
-                        f"  - Consecutive None returns: {consecutive_none_count}"
-                    )
-
-                # Timeout exceeded
-                if elapsed > sample_timeout_s:
-                    buffer_size = await replay_buffer._numel.call_one()
-                    generator_versions = await policy.get_version.route()
-                    generator_version = generator_versions[0] if generator_versions else None
-                    error_msg = (
-                        f"\n{'='*80}\n"
-                        f"FATAL ERROR: Training loop hung waiting for buffer samples!\n"
-                        f"{'='*80}\n"
-                        f"Waited {elapsed:.1f}s for buffer.sample() to return data.\n\n"
-                        f"DIAGNOSIS:\n"
-                        f"  - Training step: {training_step}\n"
-                        f"  - Buffer size: {buffer_size}\n"
-                        f"  - Generator version: {generator_version}\n"
-                        f"  - Required policy version: {training_step}\n"
-                        f"  - max_policy_age: {cfg.replay_buffer.max_policy_age}\n\n"
-                        f"LIKELY CAUSE:\n"
-                        f"  If max_policy_age=0 and generator_version < training_step,\n"
-                        f"  this is a race condition between weight updates and buffer sampling.\n"
-                        f"  All episodes in the buffer have policy_version={generator_version},\n"
-                        f"  but eviction requires policy_version >= {training_step - cfg.replay_buffer.max_policy_age}.\n\n"
-                        f"SOLUTIONS:\n"
-                        f"  1. Set 'off_by_n' (max_policy_age) to 1 or higher in your config\n"
-                        f"  2. Increase 'sample_timeout_s' if weight updates are very slow\n"
-                        f"  3. Use faster GPUs or reduce model size to speed up weight updates\n"
-                        f"{'='*80}\n"
-                    )
-                    logger.error(error_msg)
-                    raise RuntimeError(error_msg)
-
-                await asyncio.sleep(0.1)
+                logger.debug("Running out of batch, now waiting")
+                await asyncio.sleep(1)
             else:
-                # Reset timeout tracking on successful sample
-                sample_wait_start = None
-                consecutive_none_count = 0
 
                 t.step("waiting_for_buffer")
 
@@ -951,6 +894,7 @@ async def continuous_training():
         print("Training interrupted by user")
     except Exception as e:
         import traceback
+
         print(f"Training failed with error: {e}")
         print("\nFull traceback:")
         traceback.print_exc()