PCB Environment 2
Loading...
Searching...
No Matches
Agent.hpp
1#ifndef GYM_PCB_RL_AGENT_H
2#define GYM_PCB_RL_AGENT_H
3
4#include "Defs.hpp"
5#include "Py.hpp"
6#include "Parameter.hpp"
7#include "Signals.hpp"
8#include "RL/Action.hpp"
9#include "RL/Stats.hpp"
10#include "UI/LockStep.hpp"
11#include <chrono>
12#include <map>
13#include <memory>
14#include <mutex>
15#include <set>
16#include <vector>
17
18class ActionSpace;
19class Policy;
21class PCBoard;
22class Connection;
23
24constexpr const uint PCB_PY_I_METHOD_EVENT = 0;
25constexpr const uint PCB_PY_I_METHOD_V = 1;
26constexpr const uint PCB_PY_I_METHOD_Q = 2;
27constexpr const uint PCB_PY_I_METHOD_P = 3;
28constexpr const uint PCB_PY_I_METHOD_P_V = 4;
29
30class Agent
31{
32public:
33 static Agent *create(const std::string &type);
34public:
35 Agent(const std::string &name);
36 virtual ~Agent();
37 const std::string& name() const { return mName; }
38 virtual void setStateRepresentationParams(PyObject *) { }
39 void setParameters(PyObject *);
40 virtual void setParameter(const std::string &varName, int index, PyObject *);
41 const std::map<std::string, Parameter *>& getParameters() const { return mParameters; }
42 void setPythonInterface(PyObject *);
43 void setTrainingMode(bool on);
44
45 virtual void setPCB(const std::shared_ptr<PCBoard>&);
46 virtual void setManagedConnections(const std::set<Connection *>&);
47 void setManagedConnectionsToUnrouted();
48 void setActionSpace(const ActionSpace&);
51 void setRewardFn(RewardFunction *);
52 void setStateRepresentation(StateRepresentation *);
53
54 virtual bool _run() { return false; }
55 bool run();
56 void setRunArgs(PyObject *);
57 void setTimeout(uint64_t usec);
58 void setTimeoutExpired();
59 void setActionLimit(int64_t count);
60 void setClearBoardBeforeRun(bool);
61 float getProgress() const { return mProgress; }
62
63 virtual PyObject *step(PyObject *) { return 0; }
64 virtual PyObject *get_state(PyObject *) { return 0; }
65 virtual PyObject *reset() { return 0; }
66
67 void eraseManagedConnections();
68
70 PCBoard *getPCB() const { return mPCB.get(); }
71 RewardFunction& getRewardFn() const { return *mRewardFn.get(); }
72 Policy *getPolicy() const { return mPolicy.get(); }
73 StateRepresentation *getSR() const { return mStateRep.get(); }
74 uint32_t getIteration() const { return mIteration; }
75 uint64_t numActions() const { return mNumActions; }
76 uint64_t usecsSinceStart() const;
77 const std::vector<Connection *>& getConnections() const { return mConnections; }
78 const std::set<Connection *>& getConnectionSet() const { return mConnectionSet; }
79 Connection *setConnectionLRU(PyObject *);
80 Connection *getConnectionLRU() const { return mConnectionLRU; }
81 Action::Result evaluateCurrentState() const;
82
83 void startTimer();
84 bool hasTimerExpired() const;
85 void resetActionCount();
86 void resetActionLimit();
87 void countActions(int);
88 bool actionLimitExceeded() const;
89
93 std::vector<float> py_V(PyObject *states) { return pycall_floatarray(PCB_PY_I_METHOD_V, states); }
94 std::vector<float> py_Q(PyObject *states) { return pycall_floatarray(PCB_PY_I_METHOD_Q, states); }
95 std::vector<float> py_P(PyObject *states) { return pycall_floatarray(PCB_PY_I_METHOD_P, states); }
96 std::vector<float> py_P_V(PyObject *states) { return pycall_floatarray(PCB_PY_I_METHOD_P_V, states); }
98 void py_event(PyObject *event);
99
100 StepLock& getStepLock() { return mStepLock; }
101protected:
102 std::shared_ptr<PCBoard> mPCB;
103 std::vector<Connection *> mConnections;
104 std::set<Connection *> mConnectionSet;
105 Connection *mConnectionLRU{0};
106 const ActionSpace *mActionSpace{0};
107 std::unique_ptr<Policy> mPolicy;
108 std::unique_ptr<StateRepresentation> mStateRep;
109 std::unique_ptr<RewardFunction> mRewardFn;
110 ResultCollection mStats;
111 uint32_t mIteration{0};
112 float mProgress{-1.0f}; // set this from 0 to 1 when running if you want the UI to show it
113 bool mClearBoardBeforeRun{false};
114 SignalContext mSignals;
115 std::map<std::string, Parameter *> mParameters;
116
117 void initParameters();
118
119 PyObject *mPython{0};
120 PyObject *mMethodNamesPy[5]{0,0,0,0,0};
121private:
122 int64_t mActionLimit{std::numeric_limits<int64_t>::max()};
123 int64_t mNumActions{0};
124 int64_t mActionsLeft{std::numeric_limits<int64_t>::max()};
125 uint64_t mTimeoutUSecs{0};
126 std::chrono::time_point<std::chrono::system_clock> mTimerStartPoint;
127 std::chrono::time_point<std::chrono::system_clock> mTimeoutPoint;
128 bool mTraining{false};
129 mutable StepLock mStepLock;
130 const std::string mName;
131
132 std::vector<float> pycall_floatarray(uint index, PyObject *);
133};
134
135inline void Agent::setRewardFn(RewardFunction *RF)
136{
137 if (RF && mPCB)
138 RF->setContext(*mPCB);
139 mRewardFn.reset(RF);
140}
141
142inline void Agent::setActionSpace(const ActionSpace &A)
143{
144 mActionSpace = &A;
145}
146
147inline void Agent::setClearBoardBeforeRun(bool b)
148{
149 mClearBoardBeforeRun = b;
150}
151
152inline void Agent::setTimeout(uint64_t usec)
153{
154 mTimeoutUSecs = usec;
155}
156inline uint64_t Agent::usecsSinceStart() const
157{
158 const auto now = std::chrono::system_clock::now();
159 return
160 std::chrono::duration_cast<std::chrono::microseconds>(now - mTimerStartPoint).count();
161}
162inline void Agent::setTimeoutExpired()
163{
164 mTimeoutPoint = std::chrono::system_clock::now();
165}
166inline void Agent::startTimer()
167{
168 mTimerStartPoint = std::chrono::system_clock::now();
169 if (mTimeoutUSecs)
170 mTimeoutPoint = mTimerStartPoint + std::chrono::microseconds(mTimeoutUSecs);
171 else
172 mTimeoutPoint = std::chrono::system_clock::time_point::max();
173}
174inline bool Agent::hasTimerExpired() const
175{
176 return std::chrono::system_clock::now() >= mTimeoutPoint;
177}
178
179inline void Agent::setActionLimit(int64_t n)
180{
181 mActionLimit = (n > 0) ? n : std::numeric_limits<int64_t>::max();
182}
183inline void Agent::resetActionCount()
184{
185 mNumActions = 0;
186}
187inline void Agent::resetActionLimit()
188{
189 mActionsLeft = mActionLimit;
190}
191inline void Agent::countActions(int n)
192{
193 mNumActions += n;
194 mActionsLeft -= n;
195 mStepLock.wait(10);
196}
197inline bool Agent::actionLimitExceeded() const
198{
199 return mActionsLeft <= 0;
200}
201
202inline void Agent::setTrainingMode(bool b)
203{
204 mTraining = b;
205}
206
207#endif // GYM_PCB_RL_AGENT_H
Definition ActionSpace.hpp:13
void py_event(PyObject *event)
Tell anything else to your Python interface.
PCBoard * getPCB() const
These are only valid as long as you have got a hold of the agent.
Definition Agent.hpp:70
void setPolicy(Policy *)
Ownership of Policy, RewardFunction, and StateRepresentation are transferred to the agent.
std::vector< float > py_V(PyObject *states)
Definition Agent.hpp:93
Definition Connection.hpp:17
Definition PCBoard.hpp:36
Definition Policy.hpp:8
Definition Reward.hpp:25
The subclass create(PyObject *) functions check if the PyObject matches and return a new instace if i...
Definition StateRepresentation.hpp:17
Definition LockStep.hpp:9