# 體驗 ML-Agent (RollerBall) 簡單的滾球吃目標  目標:碰到綠色方塊 代理:一個紅球 首先設置環境和代理 放上地板(設定範圍) 加上目標Target(綠色方塊)和代理Agent(紅球)  開始設計規則和Agent的行動 ``` public class RollerAgent : Agent { private Rigidbody rBody; public Transform target; // Start is called before the first frame update void Start() { rBody = GetComponent<Rigidbody>(); } public override void OnEpisodeBegin() { if(this.transform.localPosition.y < 0) { this.rBody.velocity = Vector3.zero; this.rBody.angularVelocity = Vector3.zero; this.transform.localPosition = new Vector3(0, 0.5f, 0); } target.localPosition = new Vector3(Random.Range(-4f, 4f), 0.5f, Random.Range(-4f, 4f)); } public override void CollectObservations(VectorSensor sensor) //需要觀測agent或是target行為的點 //8個 { sensor.AddObservation(target.localPosition); //3 sensor.AddObservation(this.transform.localPosition); //3 sensor.AddObservation(rBody.velocity.x); //1 sensor.AddObservation(rBody.velocity.z); //1 } public float forceMult = 10f; public override void OnActionReceived(ActionBuffers actions) //動作和給予獎勵 { float x = actions.ContinuousActions[0]; float z = actions.ContinuousActions[1]; rBody.AddForce(new Vector3(x, 0, z) * forceMult); float dist = Vector3.Distance(this.transform.localPosition, target.localPosition); if (dist < 1.42f) { SetReward(1.0f); EndEpisode(); } else if (this.transform.localPosition.y < 0) { EndEpisode(); } } public override void Heuristic(in ActionBuffers actionsOut) //keyboard控制test { var continuousActionsOut = actionsOut.ContinuousActions; continuousActionsOut[0] = Input.GetAxis("Horizontal"); continuousActionsOut[1] = Input.GetAxis("Vertical"); } } ```  調整好後,就將全部複製以方便訓練  訓練參數設置: ``` behaviors: RollerBall: trainer_type: ppo hyperparameters: batch_size: 10 buffer_size: 100 learning_rate: 3.0e-4 beta: 5.0e-4 epsilon: 0.2 lambd: 0.99 num_epoch: 3 learning_rate_schedule: linear beta_schedule: constant epsilon_schedule: linear network_settings: normalize: false hidden_units: 128 num_layers: 2 reward_signals: extrinsic: gamma: 0.99 strength: 1.0 max_steps: 50000 time_horizon: 64 summary_freq: 10000 ``` 以上都完成,就可以開啟在Anaconda設置好的環境來進行訓練了 ``` mlagents-learn config/rollerball.yaml --run-id=RollerBall ```  成功出現Unity圖型就表示參數呼叫成功,回到Unity按下開始就會開始訓練了 訓練完成   成品: {%youtube mWf2kcA2kqE %} 使用GPU 
×
Sign in
Email
Password
Forgot password
or
By clicking below, you agree to our
terms of service
.
Sign in via Facebook
Sign in via Twitter
Sign in via GitHub
Sign in via Dropbox
Sign in with Wallet
Wallet (
)
Connect another wallet
New to HackMD?
Sign up