1 Star 3 Fork 0

何群山 / DGraphFin_GNN

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
main_GCN.py 5.87 KB
一键复制 编辑 原始数据 按行查看 历史
何群山 提交于 2022-10-25 22:54 . init commit
#!/usr/bin/env python
# coding: utf-8
# In[1]:
# 导入相关包
from utils import DGraphFin
from utils.utils import prepare_folder
from utils.evaluator import Evaluator
import pandas as pd
import torch
import torch.nn.functional as F
import torch.nn as nn
import torch_geometric.transforms as T
import numpy as np
from torch_geometric.data import Data
import os
# 随机种子
torch.manual_seed(666)
#设置gpu设备
device = 0
device = f'cuda:{device}' if torch.cuda.is_available() else 'cpu'
device = torch.device(device)
# CUDA is available
print(torch.cuda.is_available())
# In[3]:
# 定义网络模型
# 导入需要的包
import torch
import torch.nn.functional as F
import torch_geometric.transforms as T
from torch_geometric.nn import GCNConv # 导入 GAT 层
from typing import Union
from torch import Tensor
from torch_sparse import SparseTensor
# Model Definition
class GCN(torch.nn.Module):
def __init__(self, in_channels, hidden_channels, out_channels, dropout):
super(GCN, self).__init__()
self.gcnConv1 = GCNConv(in_channels,hidden_channels,cached=True)
self.gcnConv2 = GCNConv(hidden_channels,out_channels,cached=True)
self.dropout = dropout
def reset_parameters(self):
self.gcnConv1.reset_parameters()
self.gcnConv2.reset_parameters()
def forward(self,x,edge_index):
x = self.gcnConv1(x,edge_index)
x = F.relu(x)
x = F.dropout(x,p=self.dropout,training=self.training)
x = self.gcnConv2(x,edge_index)
return F.log_softmax(x,dim=-1)
def train(model, data, train_idx, optimizer):
model.train()
optimizer.zero_grad()
out = model(data.x,data.edge_index)[train_idx]
loss = F.nll_loss(out,data.y[train_idx])
loss.backward()
optimizer.step()
return loss.item()
def test(model, data, split_idx, evaluator):
with torch.no_grad():
model.eval()
out = model(data.x, data.edge_index)
y_pred = out.exp()
losses,eval_results = dict(),dict()
for key in['train','valid']:
node_id = split_idx[key]
losses[key] = F.nll_loss(out[node_id], data.y[node_id]).item()
eval_results[key] = evaluator.eval(data.y[node_id], y_pred[node_id])['auc']
return eval_results, losses, y_pred
def predict(data,node_id):
"""
加载模型和模型预测
:param node_id: int, 需要进行预测节点的下标
:return: tensor, 类0以及类1的概率, torch.size[1,2]
"""
model = GCN(in_channels = 20, hidden_channels = 128, out_channels = 2, dropout = 0)
model.load_state_dict(torch.load(save_dir+'/model_gcn.pt')) #载入验证集上表现最好的模型
with torch.no_grad():
model.eval()
out = model(data.x,data.edge_index)[node_id]
y_pred = out.exp()
return y_pred
# In[4]:
path='./datasets/632d74d4e2843a53167ee9a1-momodel/' #数据保存路径
save_dir='./results/' #模型保存路径
dataset_name='DGraph'
dataset = DGraphFin(root=path, name=dataset_name, transform=T.ToSparseTensor())
nlabels = dataset.num_classes
if dataset_name in ['DGraph']:
nlabels = 2 #本实验中仅需预测类0和类1
data = dataset[0]
data.adj_t = data.adj_t.to_symmetric() #将有向图转化为无向图
row, col, _ = data.adj_t.t().coo() #data is torch_geometric.data.data.Data
data.edge_index = torch.stack([row, col], axis=0)
if dataset_name in ['DGraph']:
x = data.x
x = (x - x.mean(0)) / x.std(0)
data.x = x
if data.y.dim() == 2:
data.y = data.y.squeeze(1)
split_idx = {'train': data.train_mask, 'valid': data.valid_mask, 'test': data.test_mask} #划分训练集,验证集
train_idx = split_idx['train']
result_dir = prepare_folder(dataset_name,'GCN')
# 查看数据维度
print(data)
print(data.x.shape) #feature
print(data.y.shape) #label
# In[5]:
# 定义网络模型
model = GCN(in_channels = data.x.size(-1), hidden_channels = 128, out_channels = nlabels, dropout = 0)
print('Model GCN initialized')
eval_metric = 'auc'
evaluator = Evaluator(eval_metric)
epochs = 200
# In[ ]:
# 训练网络模型
import gc
gc.collect()
print(sum(p.numel() for p in model.parameters()))
model.reset_parameters()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-7)
min_valid_loss = 1e8
#
model = model.to(device)
data.edge_index = data.edge_index.to(device)
data.x = data.x.to(device)
data.y = data.y.to(device)
data.test_mask=data.test_mask.to(device)
data.train_mask=data.train_mask.to(device)
data.valid_mask=data.valid_mask.to(device)
Epochs = []
Loss = []
Train_AUC = []
Valid_AUC = []
for epoch in range(1,epochs + 1):
loss = train(model, data, train_idx, optimizer)
eval_results, losses, out = test(model, data, split_idx, evaluator)
train_eval, valid_eval = eval_results['train'], eval_results['valid']
train_loss, valid_loss = losses['train'], losses['valid']
if valid_loss < min_valid_loss:
min_valid_loss = valid_loss
torch.save(model.state_dict(), save_dir+'/model_gcn.pt') #将表现最好的模型保存
print(f'Epoch: {epoch:02d}, '
f'Loss: {loss:.4f}, '
f'Train: {100 * train_eval:.3f}%, ' # 我们将AUC值乘上100,使其在0-100的区间内
f'Valid: {100 * valid_eval:.3f}% ')
print('{{"metric": "Loss", "value": {:.4f}, "epoch": {} }}'.format(loss,epoch))
print('{{"metric": "Train AUC", "value": {:.4f}, "epoch": {} }}'.format(100*train_eval,epoch))
print('{{"metric": "Valid AUC", "value": {:.4f}, "epoch": {} }}'.format(100*valid_eval,epoch))
Epochs.append(epoch)
Loss.append(loss)
Train_AUC.append(train_eval)
Valid_AUC.append(valid_eval)
# In[ ]:
dataLogger = pd.DataFrame()
dataLogger.insert(0,"Epochs", Epochs)
dataLogger.insert(1,"Loss",Loss)
dataLogger.insert(2,"Train_AUC",Train_AUC)
dataLogger.insert(2,"Valid_AUC",Valid_AUC)
dataLogger.to_excel('GCN_trainingLog.xlsx',float_format="%.4f",index=False)
1
https://gitee.com/qunshanhe/dgraphfin_gnn.git
git@gitee.com:qunshanhe/dgraphfin_gnn.git
qunshanhe
dgraphfin_gnn
DGraphFin_GNN
master

搜索帮助