我们有个交易量的数据 从分布上看符合正态分布 为了合理设置机器的容量 需要对该数据进行测算 找到分布的具体参数

用python写出来如下:
def normfun(x,mu,sigma):
return np.exp(-((x - mu)*2)/(2sigma*2)) / (sigma np.sqrt(2*np.pi))

需要拟合的数据如下格式

Value为交易量 Timestamp为时间戳

我们取一天早高峰的数据
df = pd.read_csv('trans_grafana_data_export.csv')
df = df.fillna(0)
cpu = np.array(df['Value'].tolist()[3118:4279])
x = np.arange(0., len(cpu))

然后建立模型开始训练
#三个参数分别是mu sigma lamda
loss_fn = torch.nn.MSELoss()
def minnormfun(x,cpuindex,cpu):
pdf = torch.exp(-((cpuindex - x[0])*2)/(2x[1]*2)) / (x[1] np.sqrt(2np.pi))
return loss_fn(pdf
x[2],cpu)

#运用梯度下降法算出三个参数
#训练代码

# with torch.cuda.device(0):
#     x = Variable(torch.DoubleTensor([100,100,100]),requires_grad = True)
#     cpu = np.array(df['Value'].tolist()[3586:3860])/300.0
#     cpuindex = np.arange(0., len(cpu))
#     cpu = torch.from_numpy(cpu)
#     cpuindex = torch.from_numpy(cpuindex)
#     print(type(x))
#     optimizer = torch.optim.Adam([x],lr=1e-3)
#
#     for step in range(200000):
#         pred = minnormfun(x,cpuindex,cpu)
#         optimizer.zero_grad()
#         pred.backward()
#         optimizer.step()
#         if step % 2000 == 0:
#             print('step{}:x={},f(x)={}'.format(step, x.tolist(), pred.item()))
算出来得到三个参数
#x=[142.95024248449738, 81.2642226162778, 189.98641718925109]

验证拟合效果

完整代码如下:

import numpy as np
import matplotlib.mlab as mlab
import matplotlib.pyplot as plt
import pandas as pd
from torch.autograd import Variable
import torch
#正态分布的概率密度函数。可以理解成 x 是 mu(均值)和 sigma(标准差)的函数
def normfun(x,mu,sigma):
    return np.exp(-((x - mu)**2)/(2*sigma**2)) / (sigma * np.sqrt(2*np.pi))

df = pd.read_csv('trans_grafana_data_export.csv')
df = df.fillna(0)
cpu = np.array(df['Value'].tolist()[3118:4279])
x = np.arange(0., len(cpu))
#plt.plot(x, cpu, 'r--')
#plt.show()

#三个参数分别是mu sigma lamda
loss_fn = torch.nn.MSELoss()
def minnormfun(x,cpuindex,cpu):
    pdf = torch.exp(-((cpuindex - x[0])**2)/(2*x[1]**2)) / (x[1] * np.sqrt(2*np.pi))
    return loss_fn(pdf*x[2],cpu)

#运用梯度下降法算出三个参数
#训练代码
# with torch.cuda.device(0):
#     x = Variable(torch.DoubleTensor([100,100,100]),requires_grad = True)
#     cpu = np.array(df['Value'].tolist()[3586:3860])/300.0
#     cpuindex = np.arange(0., len(cpu))
#     cpu = torch.from_numpy(cpu)
#     cpuindex = torch.from_numpy(cpuindex)
#     print(type(x))
#     optimizer = torch.optim.Adam([x],lr=1e-3)
#
#     for step in range(200000):
#         pred = minnormfun(x,cpuindex,cpu)
#         optimizer.zero_grad()
#         pred.backward()
#         optimizer.step()
#         if step % 2000 == 0:
#             print('step{}:x={},f(x)={}'.format(step, x.tolist(), pred.item()))

#x=[142.95024248449738, 81.2642226162778, 189.98641718925109]
cpuindex = np.arange(0., len(cpu))
cpu = (np.exp(-((cpuindex - 142.95024248449738)**2)/(2*81.2642226162778**2)) / (81.2642226162778 * np.sqrt(2*np.pi)))*189.98641718925109*300
plt.plot(cpuindex, cpu, 'r--')

cpu = np.array(df['Value'].tolist()[3586:3860])
cpuindex = np.arange(0., len(cpu))
plt.plot(cpuindex, cpu, 'g--')
plt.show()