Skip to content
代码片段 群组 项目
未验证 提交 455300c7 编辑于 作者: dlagul's avatar dlagul 提交者: GitHub
浏览文件

Add files via upload

上级 48f5e336
分支
无相关合并请求
import numpy as np
import argparse
import pandas as pd
import os, sys
import math
import string
import torch
import time
from sklearn import preprocessing
def data_preprocess(raw_data_file,label_file,train_data_path,test_data_path,test_time,win_size=36,l=10,T=20,fr=(-1,1)):
if not os.path.exists(raw_data_file):
raise ValueError('Unknown input data file: {}'.format(raw_data_file))
if not os.path.exists(label_file):
raise ValueError('Unknown input label file: {}'.format(label_file))
# get raw data and the corresponding labels
raw_data = np.array(pd.read_csv(raw_data_file, header = 0), dtype=np.float64).T
# data normalization, default is [-1,1]
min_max_scaler = preprocessing.MinMaxScaler(feature_range=fr)
scaled_data = min_max_scaler.fit_transform(raw_data).T
raw_ts_label = np.array(pd.read_csv(label_file, header = None), dtype=np.int64)
raw_ts = raw_ts_label[0,:]
raw_ts = np.array([raw_ts.tolist()])
raw_label = raw_ts_label[1,:]
raw_label = np.array([raw_label.tolist()])
rectangle_samples = []
rectangle_labels = []
rectangle_tss = []
for j in range(l):
rectangle_sample = []
rectangle_label = []
rectangle_ts = []
for i in range(0, scaled_data.shape[1]-win_size, l):
if i+j <= scaled_data.shape[1]-win_size:
scaled_data_tmp = scaled_data[:,i+j:i+j+win_size]
rectangle_sample.append(scaled_data_tmp.tolist())
raw_label_tmp = raw_label[:,i+j:i+j+win_size]
rectangle_label.append(raw_label_tmp.tolist())
raw_ts_tmp = raw_ts[:,i+j:i+j+win_size]
rectangle_ts.append(raw_ts_tmp.tolist())
rectangle_samples.append(np.array(rectangle_sample))
rectangle_labels.append(np.array(rectangle_label))
rectangle_tss.append(np.array(rectangle_ts))
if not os.path.exists(train_data_path):
os.makedirs(train_data_path)
if not os.path.exists(test_data_path):
os.makedirs(test_data_path)
train_sample_id = 1
test_sample_id = 1
test_time_stamp = int(time.mktime(time.strptime(str(test_time), '%Y%m%d%H%M%S')))
for i in range(len(rectangle_samples)):
for data_id in range(T, len(rectangle_samples[i])):
kpi_data = rectangle_samples[i][data_id-T:data_id]
kpi_label = rectangle_labels[i][data_id-T:data_id]
kpi_ts = rectangle_tss[i][data_id-T:data_id]
kpi_data = torch.tensor(kpi_data).unsqueeze(1)
data = {'ts':kpi_ts,
'label':kpi_label,
'value':kpi_data}
cur_timestamp = kpi_ts[-1][-1][-1]
cur_time_stamp = int(time.mktime(time.strptime(str(cur_timestamp), '%Y%m%d%H%M%S')))
if cur_time_stamp < test_time_stamp:
path_temp = os.path.join(train_data_path, str(train_sample_id))
torch.save(data, path_temp + '.seq')
train_sample_id += 1
else:
path_temp = os.path.join(test_data_path, str(test_sample_id))
torch.save(data, path_temp + '.seq')
test_sample_id += 1
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--raw_data_file', type=str, default='')
parser.add_argument('--label_file', type=str, default='')
parser.add_argument('--train_data_path', type=str, default='')
parser.add_argument('--test_data_path', type=str, default='')
parser.add_argument('--test_start_time', type=str, default='')
parser.add_argument('--T', type=int, default=20)
parser.add_argument('--win_size', type=int, default=36)
parser.add_argument('--l', type=int, default=10)
args = parser.parse_args()
data_preprocess(args.raw_data_file,
args.label_file,
args.train_data_path,
args.test_data_path,
args.test_start_time,
win_size = args.win_size,
l = args.l,
T = args.T)
if __name__ == '__main__':
main()
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册