当前位置: 首页 > news >正文

RLLM工具:Python 沙箱(LCB沙箱)

主函数定义在/rllm/tools/code_tools/python_interpreter.py

    def _init_backend(self):"""初始化沙箱"""# 默认使用LCBPythonInterpreterif self.backend_type == "local":self.backend: LCBPythonInterpreter | E2BPythonInterpreter | TogetherCodeTool = LCBPythonInterpreter()elif self.backend_type == "e2b":self.backend = E2BPythonInterpreter(n_sandboxes=self.n_sandboxes, api_key=self.api_key)elif self.backend_type == "together":self.backend = TogetherCodeTool(api_key=self.api_key)else:raise ValueError(f"Unsupported backend type: {self.backend_type}")def forward(self, code: str, timeout: int = 12, **kwargs) -> CodeToolOutput:"""在沙箱中执行codeArgs:code: Python code to executetimeout: Maximum execution time in seconds**kwargs: Additional parameters specific to the backend implementationReturns:CodeToolOutput containing execution results, stdout, and stderr"""return self.backend.forward(code=code, timeout=timeout, **kwargs)

LCB解释器定义在tools/code_tools/lcb_tool.py中,可以看到,这个本地沙箱的主要的防护为:

  1. 防止有害操作(例如对本地文件的读写等)
  2. 进行超时处理
import ast
import faulthandler
import multiprocessing
import queue
import signal
import tracebackfrom rllm.rewards.code_utils.livecodebench import (Capturing,clean_if_name,compile_code,get_function,make_function,reliability_guard,timeout_handler,
)
from rllm.tools.code_tools.code_tool import CodeTool, CodeToolOutputdef ensure_return_value(code):"""Ensures the code has a return statement for the last expression.Only converts the last statement to a return statement if it's an expression.Args:code (str): Python code to processReturns:str: Modified code with return statement if needed"""if not code.strip():return codetry:# Parse the codetree = ast.parse(code)body = tree.body# If the last element is an expression, convert it to a return statementif body and isinstance(body[-1], ast.Expr):value = body[-1].valuebody[-1] = ast.Return(value=value)# Preserve the line numbers and column offsets for better error messagesast.fix_missing_locations(tree)# Unparse the modified AST back to codereturn ast.unparse(tree)except SyntaxError:# If the code has syntax errors, return the original codereturn codeexcept Exception as e:# Log other unexpected errors but return the original codeprint(f"Warning: Could not process code: {e}")return codedef execute_code(code, timeout):"""Execute the provided code with safety measures and timeout handling.Args:code (str): Python code to executetimeout (int): Maximum execution time in secondsReturns:tuple: (stdout, stderr, result) containing execution output and result"""signal.signal(signal.SIGALRM, timeout_handler)stdout, stderr, result = None, None, None# Disable functionalities that can make destructive changes to the test.# 限制读写操作reliability_guard()signal.alarm(timeout)try:code = clean_if_name(code)## 将代码包裹在一个函数中code = make_function(code)# 执行代码compiled_sol = compile_code(code, timeout)if compiled_sol is None:stderr = "Failed to compile code"return stdout, stderr, resultmethod = get_function(compiled_sol, "wrapped_function")if method is None:stderr = "Failed to get function 'wrapped_function'"return stdout, stderr, result# 用于记录是否超时signal.alarm(timeout)faulthandler.enable()signal.alarm(timeout)# 捕获标准的输入输出with Capturing() as captured_output:try:try:result = method()except SystemExit as e:stderr = f"SystemExit: {e}"finally:pass# reset the alarmsignal.alarm(0)except Exception as e:signal.alarm(0)if "timeoutexception" in repr(e).lower():stderr = "Time Limit Exceeded."else:stderr = traceback.format_exc()finally:signal.alarm(0)faulthandler.disable()stdout = captured_output[0] if captured_output else ""return stdout, stderr, resultexcept Exception:return stdout, stderr, resultfinally:signal.alarm(0)def _wrapper_exec_fn(sample, timeout, result_queue):# 执行代码并且放入队列"""Helper function to execute code and put results in the queue"""res = execute_code(sample, timeout=timeout)result_queue.put(res)def lcb_sandbox(code, timeout):"""防止有害操作,进行超时处理。是执行代码的主函数Args:code (str): Python code to executetimeout (int): Maximum execution time in secondsReturns:tuple: (stdout, stderr, result) containing the execution output and result"""# Preprocess the code to ensure the last expression is returnedcode = ensure_return_value(code)# Use multiprocessing to isolate code execution in a separate processmanager = multiprocessing.Manager()result_queue = manager.Queue()# Create and start the processp = multiprocessing.Process(target=_wrapper_exec_fn,args=(code, timeout, result_queue),)p.start()# Wait for the process to complete with additional buffer timep.join(timeout=(timeout + 1) + 5)try:# Get the result from the queueres = result_queue.get()return resexcept queue.Empty:# Return timeout message if no result is availablereturn "Timeout", "", ""finally:# Ensure the process is terminated if still runningif p.is_alive():p.terminate()p.join(timeout=1)if p.is_alive():p.kill()class LCBPythonInterpreter(CodeTool):"""A tool for executing Python code in a sandboxed environment.This tool provides a safe way to execute Python code with timeout protectionand isolation from the main process, using the LiveCodeBench execution environment."""def __init__(self):"""Initialize the Python interpreter tool with appropriate settings."""super().__init__(name="python",description="Execute python code in the same environment as the LiveCodeBench benchmark.",n_sandboxes=-1,)def forward(self, code: str, timeout: int = 12, **kwargs) -> CodeToolOutput:"""Execute Python code using the LiveCodeBench sandbox environment.Args:code (str): Python code to executetimeout (int): Maximum execution time in seconds, defaults to 12**kwargs: Additional parameters (unused but kept for compatibility)Returns:CodeToolOutput: Contains execution results with stdout, stderr, and result fields"""try:stdout, stderr, result = lcb_sandbox(code, timeout=timeout)return CodeToolOutput(name=self.name or "python", stdout=stdout, stderr=stderr, output=result)except Exception as e:return CodeToolOutput(name=self.name or "python",error=f"Sandbox Error: {type(e).__name__} - {str(e)}",)if __name__ == "__main__":# Create a Python interpreter instanceinterpreter = LCBPythonInterpreter()# Example code to executetest_code = """
# Generate a large amount of code
result = 0
for i in range(1000):exec(f"var_{i} = {i}")result += i# Final expression after lots of code
result  # Should be converted to return
"""# Run codeprint(interpreter(code=test_code))
http://www.jsqmd.com/news/115432/

相关文章:

  • 【DPFSP问题】基于matlab鳄鱼伏击算法CAOA求解分布式置换流水车间调度DPFSP【含Matlab源码 14744期】
  • Android定制系统参数
  • 【NWFSP问题】基于matlab鳄鱼伏击算法CAOA求解零等待流水车间调度问题NWFSP【含Matlab源码 14745期】
  • 详细介绍:Java基础篇——一文搞懂 HashMap 底层原理
  • 【拯救HMI】构建您的HMI组件库:提升设计一致性与开发效率的终极法则
  • [Windows] 360极速浏览器v23.1.1137.64绿简版
  • 痞子衡嵌入式:16MB以上NOR Flash地址模式切换会造成软复位后i.MXRT无法正常启动
  • [Android] 网易云音乐v6.1.70 车机官方定制版
  • 最近在拆解某大厂量产的6.6kW OBC程序时,发现他们PFC和LLC的代码架构有点意思。先看这段PFC的电压环控制核心代码
  • 2025年12月Plc,PLC电工,学plc系统厂家品牌推荐榜,彰显工控国产技术实力 - 品牌鉴赏师
  • cesium126,230816,Ce for Ue 在 Sublevel 中添加各自的天气光照系统的要点:
  • 通用音频系统全链路实战指南
  • 当数字员工搭载AI销冠系统,如何迅速提升销售效率?
  • .net 6及以上版本 普通控制台程序 初始化项目介绍(非WebAPI)
  • 2025年12月plc程序,西门子PLC,三菱PLC厂家推荐:工控行业权威盘点与品质红榜发布 - 品牌鉴赏师
  • 还在手动创建优惠券?RPA一键生成希音活动,效率提升50倍![特殊字符]
  • 融合 Dify 与数眼智能:打造大学生专属 AI 模拟面试助手全攻略
  • 智能衣柜—穿搭助手,内置温湿度传感器,潮湿天气启动除湿功能,防止衣服发霉,APP还能记录衣服穿着频率,推荐久没穿的衣服,避免穿搭重复。
  • hal!HalRequestSoftwareInterrupt是KAPC的情况和hal!HalpApcInterrupt调试记录
  • LLM学习宝典:从理论基础到工程实践的完整路径_大模型入门学习教程(非常详细)看这一篇就够了!
  • pbootcms产品添加与修改
  • vmware安装macos
  • 9 个降AI率工具推荐,继续教育学生必看!
  • 2025大模型学习全攻略:零基础也能快速上手_【小白入门大模型】从零开始学大模型
  • 还在手动分析用户行为?RPA+AI解码希音消费密码,效率暴增100倍![特殊字符]
  • jsxjfnnfdm
  • 研究生福音:8款免费AI工具实测,1小时生成万字问卷论文,真实参考文献必备!
  • 【接口测试】8_Postman _Postman测试报告
  • jdjxjfjcjc
  • 从男孩到男人:爱、榜样与放手的艺术