@@ -536,6 +536,8 @@ export default function InteractiveRLLab(){
536536 const tRef = useRef ( 0 ) ;
537537 const totalReturnRef = useRef ( 0 ) ;
538538 const currentEpReturnRef = useRef ( 0 ) ;
539+ const gameWonRef = useRef ( false ) ; // Track game won state in ref to avoid race conditions
540+ const levelTransitionRef = useRef < ReturnType < typeof setTimeout > | null > ( null ) ; // Track timeout to clean up on unmount
539541 const psRef = useRef < PSLayer > ( new PSLayer ( gridW , gridH ) ) ;
540542 const gridRef = useRef ( grid ) ; useEffect ( ( ) => { gridRef . current = grid } , [ grid ] ) ;
541543 const agentRef = useRef ( agent ) ; useEffect ( ( ) => { agentRef . current = agent } , [ agent ] ) ;
@@ -621,17 +623,33 @@ export default function InteractiveRLLab(){
621623 setEpisodeReturns ( [ ] ) ;
622624 setCurrentEpReturn ( 0 ) ;
623625 setGameWon ( false ) ;
626+ gameWonRef . current = false ; // Also reset the ref
624627 tRef . current = 0 ;
625628 totalReturnRef . current = 0 ;
626629 currentEpReturnRef . current = 0 ;
627630 psRef . current = new PSLayer ( level . gridW , level . gridH ) ;
631+ startPosRef . current = newStartPos ; // Ensure ref is in sync
628632 } ;
629633
630634 // Load initial level
631635 useEffect ( ( ) => {
632636 loadLevel ( currentLevel ) ;
633637 } , [ ] ) ;
634638
639+ // Sync gameWonRef with gameWon state
640+ useEffect ( ( ) => {
641+ gameWonRef . current = gameWon ;
642+ } , [ gameWon ] ) ;
643+
644+ // Cleanup timeout on unmount
645+ useEffect ( ( ) => {
646+ return ( ) => {
647+ if ( levelTransitionRef . current ) {
648+ clearTimeout ( levelTransitionRef . current ) ;
649+ }
650+ } ;
651+ } , [ ] ) ;
652+
635653 useEffect ( ( ) => { psRef . current = new PSLayer ( gridW , gridH ) ; } , [ gridW , gridH ] ) ;
636654
637655 useEffect ( ( ) => {
@@ -724,7 +742,10 @@ export default function InteractiveRLLab(){
724742 }
725743
726744 function attemptMove ( x :number , y :number , a :number ) {
727- const next = stepXY ( x , y , ACTIONS [ windJitter ( a ) ] ) ;
745+ const actionIdx = windJitter ( a ) ;
746+ if ( actionIdx < 0 || actionIdx >= ACTIONS . length ) return { x, y} ;
747+
748+ const next = stepXY ( x , y , ACTIONS [ actionIdx ] ) ;
728749 if ( ! legal ( next . x , next . y ) ) return { x, y} ;
729750 return next ;
730751 }
@@ -741,23 +762,37 @@ export default function InteractiveRLLab(){
741762 if ( newReturns . length >= 5 ) {
742763 const last5 = newReturns . slice ( - 5 ) ;
743764 const allGood = last5 . every ( ep => ep . G > winThreshold ) ;
744- if ( allGood && ! gameWon ) {
765+ if ( allGood && ! gameWonRef . current ) {
766+ // Mark game as won immediately to stop further ticks
767+ gameWonRef . current = true ;
768+ setGameWon ( true ) ;
769+ setRunning ( false ) ;
770+
745771 if ( currentLevel < LEVELS . length ) {
746- // Advance to next level
747- const nextLevel = currentLevel + 1 ;
748- setTimeout ( ( ) => {
772+ // Clear any existing timeout
773+ if ( levelTransitionRef . current ) {
774+ clearTimeout ( levelTransitionRef . current ) ;
775+ }
776+
777+ // Schedule level transition after brief delay
778+ levelTransitionRef . current = setTimeout ( ( ) => {
779+ const nextLevel = currentLevel + 1 ;
749780 setCurrentLevel ( nextLevel ) ;
750781 loadLevel ( nextLevel ) ;
751- setRunning ( true ) ; // Auto-start next level
752- } , 2000 ) ; // Brief pause to show win message
782+ // Reset game won for next level
783+ gameWonRef . current = false ;
784+ setGameWon ( false ) ;
785+ setRunning ( true ) ;
786+ levelTransitionRef . current = null ;
787+ } , 2000 ) ;
753788 }
754- setGameWon ( true ) ;
755- setRunning ( false ) ; // Stop the simulation
756789 }
757790 }
758791
759792 return newReturns ;
760793 } ) ;
794+
795+ // Reset for next episode (but don't reset game won state here)
761796 setEpisode ( e => e + 1 ) ;
762797 setCurrentEpReturn ( 0 ) ;
763798 currentEpReturnRef . current = 0 ;
@@ -767,24 +802,47 @@ export default function InteractiveRLLab(){
767802 }
768803
769804 function tick ( ) {
770- if ( gameWon ) return ; // Don't continue if game is won
805+ // Check game won using ref to avoid stale state issues
806+ if ( gameWonRef . current ) return ;
771807
772- const { x, y} = agentRef . current ;
773- const a = pickAction ( x , y ) ;
774- psRef . current . decayGlow ( psGlowEtaRef . current ) ;
775- psRef . current . addGlow ( x , y , a , 1 ) ;
776- const s1 = attemptMove ( x , y , a ) ;
777- const r = envReward ( s1 . x , s1 . y ) ;
778- psRef . current . rewardUpdate ( r , psGammaRef . current , psLambdaRef . current ) ;
779- psRef . current . normalize ( ) ;
780- tRef . current += 1 ;
781- totalReturnRef . current += r ;
782- setRewardTrace ( tr => { const nxt = [ ...tr , { t :tRef . current , R :r } ] ; return nxt . length > 50 ?nxt . slice ( - 50 ) :nxt ; } ) ; // Limit to 50 points
783- setCumTrace ( ct => { const nxt = [ ...ct , { t :tRef . current , C :totalReturnRef . current } ] ; return nxt . length > 500 ?nxt . slice ( - 500 ) :nxt ; } ) ; // Limit to 500 points
784- setCurrentEpReturn ( v => v + r ) ;
785- setAgent ( s1 ) ;
786- agentRef . current = s1 ;
787- if ( isTerminal ( s1 . x , s1 . y ) ) restartEpisode ( r ) ;
808+ try {
809+ const { x, y} = agentRef . current ;
810+
811+ // Validate agent position
812+ if ( x < 0 || y < 0 || x >= gridW || y >= gridH ) {
813+ console . warn ( "Agent out of bounds, resetting to start position" ) ;
814+ setAgent ( startPosRef . current ) ;
815+ agentRef . current = startPosRef . current ;
816+ return ;
817+ }
818+
819+ const a = pickAction ( x , y ) ;
820+ psRef . current . decayGlow ( psGlowEtaRef . current ) ;
821+ psRef . current . addGlow ( x , y , a , 1 ) ;
822+ const s1 = attemptMove ( x , y , a ) ;
823+
824+ // Validate new position
825+ if ( s1 . x < 0 || s1 . y < 0 || s1 . x >= gridW || s1 . y >= gridH ) {
826+ console . warn ( "Move resulted in out of bounds position" ) ;
827+ return ;
828+ }
829+
830+ const r = envReward ( s1 . x , s1 . y ) ;
831+ psRef . current . rewardUpdate ( r , psGammaRef . current , psLambdaRef . current ) ;
832+ psRef . current . normalize ( ) ;
833+ tRef . current += 1 ;
834+ totalReturnRef . current += r ;
835+ setRewardTrace ( tr => { const nxt = [ ...tr , { t :tRef . current , R :r } ] ; return nxt . length > 50 ?nxt . slice ( - 50 ) :nxt ; } ) ; // Limit to 50 points
836+ setCumTrace ( ct => { const nxt = [ ...ct , { t :tRef . current , C :totalReturnRef . current } ] ; return nxt . length > 500 ?nxt . slice ( - 500 ) :nxt ; } ) ; // Limit to 500 points
837+ setCurrentEpReturn ( v => v + r ) ;
838+ setAgent ( s1 ) ;
839+ agentRef . current = s1 ;
840+ if ( isTerminal ( s1 . x , s1 . y ) ) restartEpisode ( r ) ;
841+ } catch ( error ) {
842+ console . error ( "Error during tick:" , error ) ;
843+ setRunning ( false ) ;
844+ gameWonRef . current = true ;
845+ }
788846 }
789847
790848 const cellSize = 50 ;
0 commit comments