1 Star 1 Fork 1

静辰寒韵 / SNARM-UAV-Learning

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
SNARM_main1.py 28.16 KB
一键复制 编辑 原始数据 按行查看 历史
静辰寒韵 提交于 2023-03-13 09:42 . 仿真代码
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806
# -*- coding: utf-8 -*-
"""
Created on Nov. 7
Modified on Nov. 25, 2019
@author: Yong Zeng
For cellular-connected UAV, implement simultaneous navigation and radio mapping (SNARM) via
deep reinforcement learning (DRL)
"""
import numpy as np
from tensorflow.keras.callbacks import TensorBoard
import tensorflow as tf
from collections import deque
import time
import random
from tensorflow.keras.layers import Input, Dense,Lambda
from tensorflow.keras.models import Model
from tqdm import tqdm
import os
import matplotlib.pyplot as plt
from numpy import linalg as LA
import tensorflow.keras.backend as K
import radio_environment as rad_env #the actual radio environment
from radio_mapping import RadioMap #the radio map class
X_MAX=2000.0
Y_MAX=2000.0 #The area region in meters
MAX_VALS=np.array([[X_MAX,Y_MAX]])
DESTINATION=np.array([[1400,1600]],dtype="float32")#UAV flying destination in meter
DIST_TOLERANCE=30#considered as reach destination if UAV reaches the vicinity within DIST_TOLERANCE meters
DISCOUNT = 1
REPLAY_MEMORY_SIZE = 120_000 # How many last steps to keep for model training
MIN_REPLAY_MEMORY_SIZE = 6_000 # Minimum number of steps in a memory to start training,修改
MINIBATCH_SIZE = 64 # How many steps (samples) to use for training,修改
UPDATE_TARGET_EVERY = 5 # Terminal states (end of episodes)
MAX_STEP=200 #maximum number of time steps per episode
MODEL_NAME = 'snarm1'
MIN_REWARD = -1000 # For model save
nSTEP=50 #parameter for multi-step learning
# Environment settings
EPISODES = 2000#Number of training episodes
# Exploration settings
epsilon =0.5 # not a constant, going to be decayed
EPSILON_DECAY = 0.998
MIN_EPSILON = 0
episode_all=np.arange(EPISODES)
epsilon_all=epsilon*EPSILON_DECAY**episode_all
epsilon_all=np.maximum(epsilon_all,MIN_EPSILON)
plt.figure()
plt.plot(episode_all,epsilon_all,'b',linewidth=2)
plt.grid(True,which='major',axis='both')
plt.show()
# Stats settings
AGGREGATE_STATS_EVERY = 50 # episodes
SHOW_PREVIEW = False
delta_t=0.5 #time step length in seconds
#penalty measured in terms of the time required to reach destination
REACH_DES_REWARD=200
MOVE_PENALTY = 1
NON_COVER_PENALTY = 40
OUT_BOUND_PENALTY = 10000
x=np.linspace(0,X_MAX,200)
y=np.linspace(0,Y_MAX,200)
OBSERVATION_SPACE_VALUES=(2,)#2-dimensional UAV flight, x-y coordinate of UAV
ACTIONS=np.array([[0, 1],
[1,0],
[0,-1],
[-1,0]],dtype=int)#the possible actions (UAV flying directions)
ACTION_SPACE_SIZE = ACTIONS.shape[0]
MAX_SPEED=20 #maximum UAV speed in m/s
STEP_DISPLACEMENT=MAX_SPEED*delta_t #The displacement per time step
# Create models folder
if not os.path.isdir('models'):
os.makedirs('models')
# Own Tensorboard class
class ModifiedTensorBoard(TensorBoard):
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.step = 1
self.writer = tf.summary.create_file_writer(self.log_dir)
self._log_write_dir = self.log_dir
def set_model(self, model):
self.model = model
self._train_dir = os.path.join(self._log_write_dir, 'train')
self._train_step = self.model._train_counter
self._val_dir = os.path.join(self._log_write_dir, 'validation')
self._val_step = self.model._test_counter
self._should_write_train_graph = False
def on_epoch_end(self, epoch, logs=None):
self.update_stats(**logs)
def on_batch_end(self, batch, logs=None):
pass
def on_train_end(self, _):
pass
def update_stats(self, **stats):
with self.writer.as_default():
for key, value in stats.items():
tf.summary.scalar(key, value, step = self.step)
self.writer.flush()
class UAVEnv:
def reset(self):
self.episode_step = 0
s0=self.random_generate_states(num_states=1)
return s0
def random_generate_states(self,num_states):
loc_x=np.random.uniform(50,X_MAX-50,(num_states,1))
loc_y=np.random.uniform(50,Y_MAX-50,(num_states,1))
loc=np.concatenate((loc_x,loc_y),axis=1)
return loc
#for each location visited by the UAV, it will have the J signal measurements from
#each of the M cellular BSs
#based on these M*J measurements, calculate the empirical outage probability
def get_empirical_outage(self, location):
#location given in meters
#convert the location to kilometer
loc_km=np.zeros(shape=(1,3))
loc_km[0,:2]=location/1000
loc_km[0,2]=0.1
Pout=rad_env.getPointMiniOutage(loc_km)
return Pout[0]
def step(self, current_state, action_idx, cur_traj):#the actual step
self.episode_step += 1
next_state=current_state+STEP_DISPLACEMENT*ACTIONS[action_idx]
outbound=False
out_bound_check1=next_state<0
out_bound_check2=next_state[0,0]>X_MAX
out_bound_check3=next_state[0,1]>Y_MAX
if out_bound_check1.any() or out_bound_check2.any() or out_bound_check3.any():
outbound=True
next_state[next_state<0]=0
next_state[0,0]=np.minimum(X_MAX,next_state[0,0])
next_state[0,1]=np.minimum(Y_MAX,next_state[0,1])
if LA.norm(next_state-DESTINATION)<=DIST_TOLERANCE:
terminal=True
print('Reach destination====================================================================================!!!!!!!!')
else:
terminal=False
if terminal or outbound:
reward=-MOVE_PENALTY
else:
Pout=self.get_empirical_outage(next_state)
reward=-MOVE_PENALTY-NON_COVER_PENALTY*Pout
Pout=np.array(Pout)
Pout=Pout.reshape((-1,1))
new_row=np.concatenate((next_state,Pout),axis=1)
radio_map.add_new_measured_data(new_row)#store the measured data to the database of radio map
done = False
if terminal or self.episode_step >= MAX_STEP or outbound:
done = True
return next_state, reward, terminal,outbound,done
def simulated_step(self,current_state, action_idx, cur_traj):
#the simulated step: the UAV does not actually take the fly, but use the
#radio map to have a simulated step
self.episode_step+=1
next_state=current_state+STEP_DISPLACEMENT*ACTIONS[action_idx]
outbound=False
out_bound_check1=next_state<0
out_bound_check2=next_state[0,0]>X_MAX
out_bound_check3=next_state[0,1]>Y_MAX
if out_bound_check1.any() or out_bound_check2.any() or out_bound_check3.any():
outbound=True
next_state[next_state<0]=0
next_state[0,0]=np.minimum(X_MAX,next_state[0,0])
next_state[0,1]=np.minimum(Y_MAX,next_state[0,1])
if LA.norm(next_state-DESTINATION)<=DIST_TOLERANCE:
terminal=True
print('Simulated: Reach destination================================================================================!!!!!!!!')
else:
terminal=False
if terminal or outbound:
reward=-MOVE_PENALTY
else:#This part makes a difference between the actual step and the simulated step
Pout=radio_map.predict_outage_prob(next_state)#The outage probability is predicted based on the radio map, instead of being measured
reward=-MOVE_PENALTY-NON_COVER_PENALTY*Pout[0]
done = False
if terminal or self.episode_step >= MAX_STEP or outbound:
done = True
return next_state, reward, terminal,outbound,done
env = UAVEnv()
sim_env=UAVEnv()#Simulated UAV environment
radio_map=RadioMap(X_MAX,Y_MAX)
# For more repetitive results
random.seed(1)
np.random.seed(1)
tf.random.set_seed(1)
# Agent class
class DQNAgent:
def __init__(self):
# Main model
self.model = self.create_model(dueling=False)#改
self.initilize_model()
# Target network
self.target_model = self.create_model(dueling=False)#改
self.target_model.set_weights(self.model.get_weights())
self.replay_memory = deque(maxlen=REPLAY_MEMORY_SIZE)
# Custom tensorboard object
self.tensorboard = ModifiedTensorBoard(log_dir="logs/{}-{}".format(MODEL_NAME, int(time.time())))
# Used to count when to update target network with main network's weights
self.target_update_counter = 0
def create_model(self, dueling):
inp = Input(shape=OBSERVATION_SPACE_VALUES)
outp=Dense(512,activation='relu')(inp)
outp=Dense(256,activation='relu')(outp)
outp=Dense(128,activation='relu')(outp)
outp=Dense(128,activation='relu')(outp)
if(dueling):
# Have the network estimate the Advantage function as an intermediate layer
outp=Dense(ACTION_SPACE_SIZE+1, activation='linear')(outp)
outp=Lambda(lambda i: K.expand_dims(i[:,0],-1) + i[:,1:] - K.mean(i[:,1:], keepdims=True), output_shape=(ACTION_SPACE_SIZE,))(outp)
else:
outp=Dense(ACTION_SPACE_SIZE,activation='linear')(outp)
model=Model(inp,outp)
model.compile(optimizer='adam',loss='mean_squared_error',metrics=['mean_absolute_error', 'mean_squared_error'])
model.summary()
return model
def normalize_data(self,input_data):
return input_data/MAX_VALS
def initilize_model(self):
#initialize the DQN so that the Q values of each (state,action) pair
#equal to: -MOVE_PENALTY*distance/STEP_DISPLACEMENT,
#where distance is the distance between the next state and the destination
#this will encourage shortest path flying initially when there is no information on the coverage map
xx,yy=np.meshgrid(x,y,indexing='ij')
plt.figure(0)
plt.plot(DESTINATION[0,0],DESTINATION[0,1],'r>',markersize=15)
plt.show()
num_states=100_000
xy_loc=env.random_generate_states(num_states)
Actions_aug=np.zeros((1,xy_loc.shape[1],ACTION_SPACE_SIZE),dtype=int)
for i in range(Actions_aug.shape[2]):
Actions_aug[:,:,i]=ACTIONS[i]
Actions_aug=np.tile(Actions_aug,(xy_loc.shape[0],1,1))
xy_loc_aug=np.zeros((xy_loc.shape[0],xy_loc.shape[1],1))
xy_loc_aug[:,:,0]=xy_loc
xy_loc_aug=np.repeat(xy_loc_aug,ACTION_SPACE_SIZE,axis=2)
xy_loc_next_state=xy_loc_aug+STEP_DISPLACEMENT*Actions_aug
xy_loc_next_state[xy_loc_next_state<0]=0
xy_loc_next_state[:,0,:]=np.minimum(X_MAX,xy_loc_next_state[:,0,:])
xy_loc_next_state[:,1,:]=np.minimum(Y_MAX,xy_loc_next_state[:,1,:])
end_loc_reshaped=np.zeros((1,2,1))
end_loc_reshaped[0,:,0]=DESTINATION
distance_to_destination=LA.norm(xy_loc_next_state-end_loc_reshaped,axis=1)#compute the distance to destination
Q_init=-distance_to_destination/STEP_DISPLACEMENT*MOVE_PENALTY
train_data=xy_loc[:int(num_states*0.8),:]
train_label=Q_init[:int(num_states*0.8),:]
test_data=xy_loc[int(num_states*0.8):,:]
test_label=Q_init[int(num_states*0.8):,:]
history=self.model.fit(self.normalize_data(train_data),train_label,epochs=20,validation_split=0.2,verbose=1)
history_dict = history.history
history_dict.keys()
mse = history_dict['mean_squared_error']
val_mse = history_dict['val_mean_squared_error']
mae = history_dict['mean_absolute_error']
val_mae=history_dict['val_mean_absolute_error']
epochs = range(1, len(mse) + 1)
plt.figure()
plt.plot(epochs, mse, 'bo', label='Training MSE')
plt.plot(epochs, val_mse, 'r', label='Validation MSE')
plt.title('Training and validation MSE')
# plt.ylim(0,100)
plt.xlabel('Epochs')
plt.ylabel('MSE')
plt.legend()
plt.show()
plt.figure() # clear figure
plt.plot(epochs, mae, 'bo', label='Training MAE')
plt.plot(epochs, val_mae, 'r', label='Validation MAE')
plt.title('Training and validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('MAE')
# plt.ylim(0,15)
plt.legend()
plt.show()
result=self.model.evaluate(self.normalize_data(test_data),test_label)
print(result)
#Add data to replay memory for n-step return
#(St, At, R_nstep, S_{t+n}, terminal, outbound, done)
#where R_nstep=R_{t+1}+gamma*R_{t+2}+gamma^2*R_{t+3}....+gamma^(nSTEP-1)*R_{t+n}
def update_replay_memory_nStepLearning(self,slide_window,nSTEP,endEpisode):
#update only after n steps
if len(slide_window)<nSTEP:
return
# slide_window contains the list in the following order:
# (current_state,action_idx,reward,next_state,terminal,outbound,done)
rewards_nsteps= [transition[2] for transition in slide_window]
discount_nsteps=DISCOUNT**np.arange(nSTEP)
R_nstep=sum(rewards_nsteps*discount_nsteps)
St=slide_window[0][0]
At=slide_window[0][1]
St_plus_n=slide_window[-1][3]
terminal=slide_window[-1][4]
outbound=slide_window[-1][5]
done=slide_window[-1][6]
""" Store experience in memory buffer
"""
self.replay_memory.append((St,At,R_nstep,St_plus_n,terminal,outbound,done))
if endEpisode:#Truncated n-step return for the last few steps at the end of the episode
for i in range(1,nSTEP):
rewards_i=rewards_nsteps[i:]
discount_i=DISCOUNT**np.arange(nSTEP-i)
R_i=sum(rewards_i*discount_i)
St_i=slide_window[i][0]
At_i=slide_window[i][1]
self.replay_memory.append((St_i,At_i,R_i,St_plus_n,terminal,outbound,done))
def sample_batch_from_replay_memory(self):
minibatch = random.sample(self.replay_memory, MINIBATCH_SIZE)
current_state_batch = np.zeros((MINIBATCH_SIZE, OBSERVATION_SPACE_VALUES[0]))
next_state_batch = np.zeros((MINIBATCH_SIZE, OBSERVATION_SPACE_VALUES[0]))
actions_idx, rewards, terminal, outbound, done= [], [], [],[],[]
for idx, val in enumerate(minibatch):
current_state_batch[idx] = val[0]
actions_idx.append(val[1])
rewards.append(val[2])
next_state_batch[idx] = val[3]
terminal.append(val[4])
outbound.append(val[5])
done.append(val[6])
return current_state_batch, actions_idx, rewards, next_state_batch, terminal, outbound, done
def deepDoubleQlearn(self,episode_done):
# Start training only if certain number of samples is already saved
if len(self.replay_memory) < MIN_REPLAY_MEMORY_SIZE:
return
current_state_batch, actions_idx, rewards, next_state_batch, terminal,outbound, done = self.sample_batch_from_replay_memory()
current_Q_values=self.model.predict(self.normalize_data(current_state_batch))
next_Q_values_currentNetwork=self.model.predict(self.normalize_data(next_state_batch)) # use the current network to evaluate action
next_actions=np.argmax(next_Q_values_currentNetwork,axis=1)
next_Q_values = self.target_model.predict(self.normalize_data(next_state_batch)) # still use the target network to evaluate value
Y=current_Q_values
for i in range(MINIBATCH_SIZE):
if terminal[i]:
target = rewards[i]+REACH_DES_REWARD
elif outbound[i]:
target=rewards[i]-OUT_BOUND_PENALTY
else:
target = rewards[i] + DISCOUNT**nSTEP*next_Q_values[i,next_actions[i]]
Y[i,actions_idx[i]]=target
self.model.fit(self.normalize_data(current_state_batch), Y, batch_size=MINIBATCH_SIZE,verbose=0, shuffle=False, callbacks=[self.tensorboard] if episode_done else None)
# Update target network counter every episode
if episode_done:
self.target_update_counter += 1
# If counter reaches set value, update target network with weights of main network
if self.target_update_counter >= UPDATE_TARGET_EVERY:
self.target_model.set_weights(self.model.get_weights())
self.target_update_counter = 0
def choose_action(self,current_state,cur_traj,epsilon):
next_possible_states=current_state+STEP_DISPLACEMENT*ACTIONS
next_possible_states[next_possible_states<0]=0
next_possible_states[:,0]=np.minimum(next_possible_states[:,0],X_MAX)
next_possible_states[:,1]=np.minimum(next_possible_states[:,1],Y_MAX)
next_possible_states=next_possible_states.tolist()
no_repetition=[]
cur_traj=cur_traj[-10:] #no return to the previous few locations
for state in next_possible_states:
no_repetition.append(state not in cur_traj)
actions_idx_all=np.arange(ACTION_SPACE_SIZE)
actions_idx_valid=actions_idx_all[no_repetition]
if np.random.rand()<=epsilon or len(actions_idx_valid)==0:#Exploration
action_idx=np.random.randint(0,ACTION_SPACE_SIZE)
return action_idx
else:
Q_value=self.model.predict(self.normalize_data(current_state))
Q_value=Q_value[0]
action_idx_maxVal=np.argmax(Q_value)
if action_idx_maxVal in actions_idx_valid:
action_idx=action_idx_maxVal
else:
action_idx=random.sample(actions_idx_valid.tolist(),1)
action_idx=action_idx[0]
return action_idx
agent = DQNAgent()
ep_rewards,ep_trajecotry,ep_reach_terminal,ep_outbound,ep_actions=[],[],[],[],[]
ep_MSE,ep_MAE,ep_Max_Absolute_Error,ep_bin_cross_entr=[],[],[],[]
ep_sim_rewards,ep_sim_trajecotry,ep_sim_reach_terminal,ep_sim_outbound,ep_sim_actions=[],[],[],[],[]
# Iterate over episodes
for episode in tqdm(range(1, EPISODES + 1), ascii=True, unit='episodes'):
# Update tensorboard step every episode
agent.tensorboard.step = episode
# Restarting episode - reset episode reward and step number
episode_reward = 0
episode_sim_reward = 0
# Reset environment and get initial state
current_state = env.reset()
cur_trajectory=[]
# cur_trajectory=np.array([]).reshape(0,OBSERVATION_SPACE_VALUES[0])
cur_actions=[]
slide_window=deque(maxlen=nSTEP)
# Reset flag and start iterating until episode ends
done=False
#The starting state of the simulated trajectory
# sim_current_state=env.random_generate_states(num_states=1)
sim_current_state=sim_env.reset()
sim_cur_trajectory=[]
sim_slide_window=deque(maxlen=nSTEP)
sim_done=False
sim_step_per_act_step=int(np.floor(episode/100))#Number of simulation steps performed per actual step
sim_step_per_act_step=np.minimum(sim_step_per_act_step,10)
while not done:
#The actual UAV flight
cur_trajectory.append(np.squeeze(current_state).tolist())
action_idx=agent.choose_action(current_state,cur_trajectory,epsilon)
#actual step and measurement
next_state, reward, terminal, outbound, done = env.step(current_state,action_idx,cur_trajectory)
radio_map.update_radio_map(verbose_on=0)
episode_reward += reward
slide_window.append((current_state,action_idx,reward,next_state,terminal,outbound,done))
agent.update_replay_memory_nStepLearning(slide_window,nSTEP,done)
agent.deepDoubleQlearn(done)
current_state = next_state
#The simulated trajectory
for temp_counter in range(sim_step_per_act_step):
sim_cur_trajectory.append(np.squeeze(sim_current_state).tolist())
sim_action_idx=agent.choose_action(sim_current_state,sim_cur_trajectory,epsilon)
sim_next_state, sim_reward, sim_terminal, sim_outbound, sim_done =sim_env.simulated_step(sim_current_state,sim_action_idx,sim_cur_trajectory)
episode_sim_reward+=sim_reward
sim_slide_window.append((sim_current_state,sim_action_idx,sim_reward,sim_next_state,sim_terminal,sim_outbound,sim_done))
agent.update_replay_memory_nStepLearning(sim_slide_window,nSTEP,sim_done)
agent.deepDoubleQlearn(sim_done)
sim_current_state = sim_next_state
if sim_done:#start a new episode if the simulation trajectory completes one episode
ep_sim_rewards.append(episode_sim_reward)
ep_sim_trajecotry.append(sim_cur_trajectory)
ep_sim_reach_terminal.append(sim_terminal)
ep_sim_outbound.append(sim_outbound)
episode_sim_reward = 0
sim_current_state=sim_env.reset()
sim_cur_trajectory=[]
sim_slide_window=deque(maxlen=nSTEP)
sim_done=False
MSE,MAE,Max_Absolute_Error,bin_cross_entr=radio_map.check_radio_map_acc()
ep_MSE.append(MSE)
# print('ep_MSE:', ep_MSE)
ep_MAE.append(MAE)
# print('ep_MAE:', ep_MAE)
ep_Max_Absolute_Error.append(Max_Absolute_Error)
# print('ep_Max_Absolute_Error:', ep_Max_Absolute_Error)
ep_bin_cross_entr.append(bin_cross_entr)
# print('bin_cross_entr:', ep_bin_cross_entr)
# Append episode reward to a list and log stats (every given number of episodes)
ep_rewards.append(episode_reward)
ep_trajecotry.append(cur_trajectory)
ep_reach_terminal.append(terminal)
ep_outbound.append(outbound)
# ep_actions.append(cur_actions)
if episode%10 == 0:
# dist_to_dest=LA.norm(start_loc-end_loc)
# print("Start location:{}, distance to destination:{}".format(start_loc,dist_to_dest))
print("Episode: {}, total steps: {}, final return: {}".format(episode,len(cur_trajectory),episode_reward))
if not episode % AGGREGATE_STATS_EVERY or episode == 1:
average_reward = sum(ep_rewards[-AGGREGATE_STATS_EVERY:])/len(ep_rewards[-AGGREGATE_STATS_EVERY:])
min_reward = min(ep_rewards[-AGGREGATE_STATS_EVERY:])
max_reward = max(ep_rewards[-AGGREGATE_STATS_EVERY:])
agent.tensorboard.update_stats(reward_avg=average_reward, reward_min=min_reward, reward_max=max_reward, epsilon=epsilon)
# Save model, but only when min reward is greater or equal a set value
if min_reward >= MIN_REWARD:
agent.model.save(f'models/{MODEL_NAME}__{max_reward:_>7.2f}max_{average_reward:_>7.2f}avg_{min_reward:_>7.2f}min__{int(time.time())}.model')
# Decay epsilon
if epsilon > MIN_EPSILON:
epsilon *= EPSILON_DECAY
epsilon = max(MIN_EPSILON, epsilon)
fig=plt.figure(60)
plt.plot(np.arange(len(ep_MSE))+1,ep_MSE,'b-',linewidth=2)
plt.grid(which='major', axis='both')
plt.xlabel('Episode')
plt.ylabel('MSE')
fig.savefig('(snarm1)MSE.eps')
fig.savefig('(snarm1)MSE.pdf')
fig.savefig('(snarm1)MSE.jpg')
fig=plt.figure(70)
plt.plot(np.arange(len(ep_MAE))+1,ep_MAE,'b-',linewidth=2)
plt.grid(which='major', axis='both')
plt.xlabel('Episode')
plt.ylabel('MAE')
fig.savefig('(snarm1)MAE.eps')
fig.savefig('(snarm1)MAE.pdf')
fig.savefig('(snarm1)MAE.jpg')
fig=plt.figure(80)
plt.plot(np.arange(len(ep_Max_Absolute_Error))+1,ep_Max_Absolute_Error,'b-',linewidth=2)
plt.grid(which='major', axis='both')
plt.xlabel('Episode')
plt.ylabel('Maximum absolute eror')
fig.savefig('(snarm1)Max_Absolute_Error.eps')
fig.savefig('(snarm1)Max_Absolute_Error.pdf')
fig.savefig('(snarm1)Max_Absolute_Error.jpg')
fig=plt.figure(90)
plt.plot(np.arange(len(ep_bin_cross_entr))+1,ep_bin_cross_entr,'b-',linewidth=2)
plt.grid(which='major', axis='both')
plt.xlabel('Episode')
plt.ylabel('Binary cross entropy')
fig.savefig('(snarm1)Bin_cross_entr.eps')
fig.savefig('(snarm1)Bin_cross_entr.pdf')
fig.savefig('(snarm1)Bin_cross_entr.jpg')
def get_moving_average(mylist,N):
cumsum, moving_aves = [0], []
for i, x in enumerate(mylist, 1):
cumsum.append(cumsum[i-1] + x)
if i>=N:
moving_ave = (cumsum[i] - cumsum[i-N])/N
moving_aves.append(moving_ave)
return moving_aves
fig=plt.figure()
plt.xlabel('Episode',fontsize=14)
plt.ylabel('Return per episode',fontsize=14,labelpad=-2)
N=200
return_mov_avg=get_moving_average(ep_rewards,N)
plt.plot(np.arange(len(return_mov_avg))+N,return_mov_avg,'r-',linewidth=5)
#plt.ylim(-6000,0)
fig.savefig('(snarm1)return.eps')
fig.savefig('(snarm1)return.pdf')
fig.savefig('(snarm1)return.jpg')
##============VIew the learned radio map for given height
UAV_height=0.1 # UAV height in km
step=101 #include the start point at 0 and end point, the space between two sample points is D/(step-1)
D=2
X_vec=range(D*(step-1)+1)
Y_vec=range(D*(step-1)+1)
numX,numY=np.size(X_vec),np.size(Y_vec)
OutageMapLearned=np.zeros(shape=(numX,numY))
#Loc_All=np.zeros(shape=(0,3))
for i in range(numX):
Loc_cur=np.zeros(shape=(numY,2))
Loc_cur[:,0]=X_vec[i]/step
Loc_cur[:,1]=np.array(Y_vec)/step
Loc_cur=Loc_cur*1000 #convert to meter
OutageMapLearned[i,:]=radio_map.predict_outage_prob(Loc_cur)
# Loc_cur[:,2]=UAV_height
# Loc_All=np.concatenate((Loc_All,Loc_cur),axis=0)
fig=plt.figure(20)
#plt.style.use('classic')
plt.contourf(np.array(X_vec)*10,np.array(Y_vec)*10,1-OutageMapLearned)
v = np.linspace(0, 1.0, 11, endpoint=True)
cbar=plt.colorbar(ticks=v)
cbar.set_label('coverage probability',labelpad=20, rotation=270,fontsize=14)
plt.xlabel('x (meter)',fontsize=14)
plt.ylabel('y (meter)',fontsize=14)
plt.show()
fig.savefig('(snarm1)CoverageMapLearned.eps')
fig.savefig('(snarm1)CoverageMapLearned.pdf')
fig.savefig('(snarm1)CoverageMapLearned.jpg')
npzfile = np.load('radioenvir.npz')
OutageMapActual=npzfile['arr_0']
X_vec=npzfile['arr_1']
Y_vec=npzfile['arr_2']
fig=plt.figure(30)
#xx,yy=np.meshgrid(x,y,indexing='ij')
#plt.contourf(xx,yy,coverage_map)
plt.contourf(np.array(X_vec)*10,np.array(Y_vec)*10,1-OutageMapActual)
v = np.linspace(0, 1.0, 11, endpoint=True)
cbar=plt.colorbar(ticks=v)
#v = np.linspace(0, 1.0, 11, endpoint=True)
#cbar=plt.colorbar(ticks=v)
#cbar.ax.set_yticklabels(['0','0.2','0.4','0.6','0.8','1.0'])
cbar.set_label('coverage probability',labelpad=20, rotation=270,fontsize=14)
for episode_idx in range(episode-200, episode):
S_seq=ep_trajecotry[episode_idx]
S_seq=np.squeeze(np.asarray(S_seq))
if S_seq.ndim>1:
plt.plot(S_seq[0,0],S_seq[0,1],'rx',markersize=5)
plt.plot(S_seq[:,0],S_seq[:,1],'-')
else:
plt.plot(S_seq[0],S_seq[1],'rx',markersize=5)
plt.plot(S_seq[0],S_seq[1],'-')
plt.plot(DESTINATION[0,0],DESTINATION[0,1],'b^',markersize=25)
plt.plot(DESTINATION[0,0],DESTINATION[0,1],'b^',markersize=25)
plt.xlabel('x (meter)',fontsize=14)
plt.ylabel('y (meter)',fontsize=14)
plt.show()
fig.savefig('(snarm1)trajectoriesSNARM.eps')
fig.savefig('(snarm1)trajectoriesSNARM.pdf')
fig.savefig('(snarm1)trajectoriesSNARM.jpg')
print('{}/{} episodes reach terminal'.format(ep_reach_terminal.count(True),episode))
#Save the simulation ressult
np.savez('SNARM_main_Results1.npz',return_mov_avg,ep_rewards,ep_trajecotry)
Python
1
https://gitee.com/XGX_CURRY_TOM/SNARM-UAV-Learning.git
git@gitee.com:XGX_CURRY_TOM/SNARM-UAV-Learning.git
XGX_CURRY_TOM
SNARM-UAV-Learning
SNARM-UAV-Learning
master

搜索帮助