From 8e6c2c2c20312cbef8c434a19381e1b9c42f969b Mon Sep 17 00:00:00 2001 From: Neel Kant Date: Tue, 26 Aug 2025 00:47:13 -0700 Subject: [PATCH] change prints, max achieved throughput --- fle/env/gym_env/environment.py | 7 +++---- fle/env/gym_env/run_eval.py | 16 ---------------- fle/env/gym_env/trajectory_logger.py | 5 +++++ fle/env/gym_env/trajectory_runner.py | 12 ++++++------ fle/env/tools/agent/connect_entities/client.py | 3 --- fle/env/tools/agent/craft_item/client.py | 3 --- fle/env/tools/agent/harvest_resource/client.py | 3 --- fle/env/tools/agent/move_to/client.py | 3 --- fle/env/tools/agent/sleep/client.py | 3 --- .../algorithms/independent/gym_run_config.json | 4 ++-- fle/eval/tasks/throughput_task.py | 4 +++- 11 files changed, 19 insertions(+), 44 deletions(-) diff --git a/fle/env/gym_env/environment.py b/fle/env/gym_env/environment.py index 7bec2867..13953545 100644 --- a/fle/env/gym_env/environment.py +++ b/fle/env/gym_env/environment.py @@ -10,7 +10,7 @@ from fle.env import FactorioInstance from fle.commons.models.game_state import GameState from fle.env.gym_env.action import Action from fle.commons.models.achievements import ProductionFlows -from fle.env.utils.profits import get_achievements +from fle.env.utils.achievements import calculate_achievements from fle.agents import Response, TaskResponse from fle.env.gym_env.observation import ( Observation, @@ -433,9 +433,7 @@ class FactorioGymEnv(gym.Env): output_game_state = GameState.from_instance(self.instance) # Get post-execution flows and calculate achievements current_flows = ProductionFlows.from_dict(namespace._get_production_stats()) - achievements = get_achievements( - start_production_flows.__dict__, current_flows.__dict__ - ) + achievements = calculate_achievements(start_production_flows, current_flows) # Store for next step self._last_production_flows[agent_idx] = current_flows.__dict__ @@ -467,6 +465,7 @@ class FactorioGymEnv(gym.Env): "last_message_timestamp": self.last_message_timestamps[agent_idx], "task_verification": task_response, "output_game_state": output_game_state, + "achievements": achievements, } # pause the game until the next step if this is part of a trajectory diff --git a/fle/env/gym_env/run_eval.py b/fle/env/gym_env/run_eval.py index 57af3530..1ff5148d 100644 --- a/fle/env/gym_env/run_eval.py +++ b/fle/env/gym_env/run_eval.py @@ -1,4 +1,3 @@ -import argparse import asyncio import json import multiprocessing @@ -129,18 +128,3 @@ async def main(config_path): # Wait for all processes to complete for p in processes: p.join() - - -if __name__ == "__main__": - multiprocessing.set_start_method("spawn") - parser = argparse.ArgumentParser( - description="Run Factorio Learning Environment evaluation" - ) - parser.add_argument( - "--run-config", - type=str, - required=True, - help="Path to run configuration JSON file", - ) - args = parser.parse_args() - asyncio.run(main(args.run_config)) diff --git a/fle/env/gym_env/trajectory_logger.py b/fle/env/gym_env/trajectory_logger.py index 3c8439b1..ec10b5f5 100644 --- a/fle/env/gym_env/trajectory_logger.py +++ b/fle/env/gym_env/trajectory_logger.py @@ -107,6 +107,11 @@ class TrajectoryLogger: with open(obs_file, "w") as f: f.write(formatted_obs) + raw_text = agent.observation_formatter.format_raw_text(observation.raw_text) + for line in raw_text.split("\n"): + if "Error" in line: + print("raw_text Error:", line) + def add_iteration_time(self, iteration_time: float): """Add an iteration time to the tracking list diff --git a/fle/env/gym_env/trajectory_runner.py b/fle/env/gym_env/trajectory_runner.py index 0c256054..1668c0cf 100644 --- a/fle/env/gym_env/trajectory_runner.py +++ b/fle/env/gym_env/trajectory_runner.py @@ -1,6 +1,6 @@ import time from itertools import product -from typing import List, Optional, Tuple +from typing import Any, List, Dict, Optional, Tuple from fle.agents import CompletionReason, CompletionResult from fle.agents.gym_agent import GymAgent @@ -67,11 +67,8 @@ class GymTrajectoryRunner: iteration_time = time.time() - iteration_start self.logger.add_iteration_time(iteration_time) - # Log progress every 10 steps - if agent_step % 10 == 0: - self.logger.log_progress(agent, agent_step, program.value) - - # Log observation and program + # Log progress, observation and program + self.logger.log_progress(agent, agent_step, program.value) self.logger.log_observation_and_program( agent, agent_idx, agent_step, observation, program ) @@ -83,6 +80,7 @@ class GymTrajectoryRunner: reward: float, response: str, error_occurred: bool, + achievements: Dict[str, Any], game_state: GameState, ) -> Program: """Create a Program object from a Policy and environment results @@ -114,6 +112,7 @@ class GymTrajectoryRunner: version_description=self.config.version_description, value=reward, state=game_state, + achievements=achievements, meta={ "model": self.agents[agent_idx].model, "process_id": self.process_id, @@ -214,6 +213,7 @@ class GymTrajectoryRunner: reward=reward, response=obs_dict["raw_text"], error_occurred=info["error_occurred"], + achievements=info["achievements"], game_state=output_game_state, ) diff --git a/fle/env/tools/agent/connect_entities/client.py b/fle/env/tools/agent/connect_entities/client.py index dab8c89d..de2612a5 100644 --- a/fle/env/tools/agent/connect_entities/client.py +++ b/fle/env/tools/agent/connect_entities/client.py @@ -164,9 +164,6 @@ class ConnectEntities(Tool): real_world_sleep = ( ticks_added / 60 / game_speed if game_speed > 0 else 0 ) - print( - f"connect_entities: Sleeping for {real_world_sleep:.3f} seconds ({ticks_added} ticks at speed {game_speed}x)" - ) sleep(real_world_sleep) if dry_run: diff --git a/fle/env/tools/agent/craft_item/client.py b/fle/env/tools/agent/craft_item/client.py index 999bbfa0..ee5ab399 100644 --- a/fle/env/tools/agent/craft_item/client.py +++ b/fle/env/tools/agent/craft_item/client.py @@ -47,9 +47,6 @@ class CraftItem(Tool): if ticks_added > 0: game_speed = self.game_state.instance.get_speed() real_world_sleep = ticks_added / 60 / game_speed if game_speed > 0 else 0 - print( - f"craft_item: Sleeping for {real_world_sleep:.3f} seconds ({ticks_added} ticks at speed {game_speed}x)" - ) sleep(real_world_sleep) if not self.game_state.instance.fast: diff --git a/fle/env/tools/agent/harvest_resource/client.py b/fle/env/tools/agent/harvest_resource/client.py index ca20f3ef..ed3ecbf5 100644 --- a/fle/env/tools/agent/harvest_resource/client.py +++ b/fle/env/tools/agent/harvest_resource/client.py @@ -50,9 +50,6 @@ class HarvestResource(Tool): if ticks_added > 0: game_speed = self.game_state.instance.get_speed() real_world_sleep = ticks_added / 60 / game_speed if game_speed > 0 else 0 - print( - f"harvest_resource: Sleeping for {real_world_sleep:.3f} seconds ({ticks_added} ticks at speed {game_speed}x)" - ) sleep(real_world_sleep) if response != {} and response == 0 or isinstance(response, str): diff --git a/fle/env/tools/agent/move_to/client.py b/fle/env/tools/agent/move_to/client.py index 432fcaa8..3bf610fb 100644 --- a/fle/env/tools/agent/move_to/client.py +++ b/fle/env/tools/agent/move_to/client.py @@ -71,9 +71,6 @@ class MoveTo(Tool): real_world_sleep = ( ticks_added / 60 / game_speed if game_speed > 0 else 0 ) - print( - f"move_to: Sleeping for {real_world_sleep:.3f} seconds ({ticks_added} ticks at speed {game_speed}x)" - ) sleep(real_world_sleep) if isinstance(response, int) and response == 0: diff --git a/fle/env/tools/agent/sleep/client.py b/fle/env/tools/agent/sleep/client.py index 738a2ef9..ac3efd94 100644 --- a/fle/env/tools/agent/sleep/client.py +++ b/fle/env/tools/agent/sleep/client.py @@ -25,9 +25,6 @@ class Sleep(Tool): if ticks_added > 0: game_speed = self.game_state.instance.get_speed() real_world_sleep = ticks_added / 60 / game_speed if game_speed > 0 else 0 - print( - f"sleep: Sleeping for {real_world_sleep:.3f} seconds ({ticks_added} ticks at speed {game_speed}x)" - ) sleep(real_world_sleep) return True diff --git a/fle/eval/algorithms/independent/gym_run_config.json b/fle/eval/algorithms/independent/gym_run_config.json index 78fa505e..b4faf912 100644 --- a/fle/eval/algorithms/independent/gym_run_config.json +++ b/fle/eval/algorithms/independent/gym_run_config.json @@ -1,6 +1,6 @@ [ { - "env_id": "iron_ore_throughput", - "model": "claude-3-5-sonnet-latest" + "env_id": "steel_plate_throughput", + "model": "claude-4-opus-20250514" } ] \ No newline at end of file diff --git a/fle/eval/tasks/throughput_task.py b/fle/eval/tasks/throughput_task.py index 3f03dd4f..c8dcf560 100644 --- a/fle/eval/tasks/throughput_task.py +++ b/fle/eval/tasks/throughput_task.py @@ -82,7 +82,9 @@ class ThroughputTask(TaskABC): break return TaskResponse( success=max_achieved_throughput >= self.quota, - meta={}, + meta={ + f"{self.throughput_entity}_achieved_throughput": max_achieved_throughput, + }, ) def _to_dict(self) -> Dict[str, Any]: