Skip to content
GitLab
探索
登录
注册
主导航
搜索或转到…
项目
S
SDFVAE
管理
动态
成员
标记
计划
议题
0
议题看板
里程碑
Wiki
代码
合并请求
0
仓库
分支
提交
标签
仓库图
比较修订版本
代码片段
构建
流水线
作业
流水线计划
产物
部署
发布
软件包库
运维
环境
Terraform 模块
监控
事件
服务台
分析
价值流分析
Contributor analytics
CI/CD 分析
仓库分析
模型实验
帮助
帮助
支持
GitLab 文档
比较 GitLab 各版本
社区论坛
为极狐GitLab 提交贡献
提交反馈
快捷键
?
支持
扫码加入微信群
1. 获取企业级DevOps解决方案
2. 免费或优惠考取极狐GitLab官方培训认证
代码片段
群组
项目
AIOps-NanKai
model
SDFVAE
提交
455300c7
未验证
提交
455300c7
编辑于
4年前
作者:
dlagul
提交者:
GitHub
4年前
浏览文件
操作
下载
补丁
差异文件
Add files via upload
上级
48f5e336
分支
分支 包含提交
无相关合并请求
变更
1
隐藏空白变更内容
行内
左右并排
显示
1 个更改的文件
data_preprocess/data_preprocess.py
+101
-0
101 个添加, 0 个删除
data_preprocess/data_preprocess.py
有
101 个添加
和
0 个删除
data_preprocess/data_preprocess.py
0 → 100644
+
101
−
0
浏览文件 @
455300c7
import
numpy
as
np
import
argparse
import
pandas
as
pd
import
os
,
sys
import
math
import
string
import
torch
import
time
from
sklearn
import
preprocessing
def
data_preprocess
(
raw_data_file
,
label_file
,
train_data_path
,
test_data_path
,
test_time
,
win_size
=
36
,
l
=
10
,
T
=
20
,
fr
=
(
-
1
,
1
)):
if
not
os
.
path
.
exists
(
raw_data_file
):
raise
ValueError
(
'
Unknown input data file: {}
'
.
format
(
raw_data_file
))
if
not
os
.
path
.
exists
(
label_file
):
raise
ValueError
(
'
Unknown input label file: {}
'
.
format
(
label_file
))
# get raw data and the corresponding labels
raw_data
=
np
.
array
(
pd
.
read_csv
(
raw_data_file
,
header
=
0
),
dtype
=
np
.
float64
).
T
# data normalization, default is [-1,1]
min_max_scaler
=
preprocessing
.
MinMaxScaler
(
feature_range
=
fr
)
scaled_data
=
min_max_scaler
.
fit_transform
(
raw_data
).
T
raw_ts_label
=
np
.
array
(
pd
.
read_csv
(
label_file
,
header
=
None
),
dtype
=
np
.
int64
)
raw_ts
=
raw_ts_label
[
0
,:]
raw_ts
=
np
.
array
([
raw_ts
.
tolist
()])
raw_label
=
raw_ts_label
[
1
,:]
raw_label
=
np
.
array
([
raw_label
.
tolist
()])
rectangle_samples
=
[]
rectangle_labels
=
[]
rectangle_tss
=
[]
for
j
in
range
(
l
):
rectangle_sample
=
[]
rectangle_label
=
[]
rectangle_ts
=
[]
for
i
in
range
(
0
,
scaled_data
.
shape
[
1
]
-
win_size
,
l
):
if
i
+
j
<=
scaled_data
.
shape
[
1
]
-
win_size
:
scaled_data_tmp
=
scaled_data
[:,
i
+
j
:
i
+
j
+
win_size
]
rectangle_sample
.
append
(
scaled_data_tmp
.
tolist
())
raw_label_tmp
=
raw_label
[:,
i
+
j
:
i
+
j
+
win_size
]
rectangle_label
.
append
(
raw_label_tmp
.
tolist
())
raw_ts_tmp
=
raw_ts
[:,
i
+
j
:
i
+
j
+
win_size
]
rectangle_ts
.
append
(
raw_ts_tmp
.
tolist
())
rectangle_samples
.
append
(
np
.
array
(
rectangle_sample
))
rectangle_labels
.
append
(
np
.
array
(
rectangle_label
))
rectangle_tss
.
append
(
np
.
array
(
rectangle_ts
))
if
not
os
.
path
.
exists
(
train_data_path
):
os
.
makedirs
(
train_data_path
)
if
not
os
.
path
.
exists
(
test_data_path
):
os
.
makedirs
(
test_data_path
)
train_sample_id
=
1
test_sample_id
=
1
test_time_stamp
=
int
(
time
.
mktime
(
time
.
strptime
(
str
(
test_time
),
'
%Y%m%d%H%M%S
'
)))
for
i
in
range
(
len
(
rectangle_samples
)):
for
data_id
in
range
(
T
,
len
(
rectangle_samples
[
i
])):
kpi_data
=
rectangle_samples
[
i
][
data_id
-
T
:
data_id
]
kpi_label
=
rectangle_labels
[
i
][
data_id
-
T
:
data_id
]
kpi_ts
=
rectangle_tss
[
i
][
data_id
-
T
:
data_id
]
kpi_data
=
torch
.
tensor
(
kpi_data
).
unsqueeze
(
1
)
data
=
{
'
ts
'
:
kpi_ts
,
'
label
'
:
kpi_label
,
'
value
'
:
kpi_data
}
cur_timestamp
=
kpi_ts
[
-
1
][
-
1
][
-
1
]
cur_time_stamp
=
int
(
time
.
mktime
(
time
.
strptime
(
str
(
cur_timestamp
),
'
%Y%m%d%H%M%S
'
)))
if
cur_time_stamp
<
test_time_stamp
:
path_temp
=
os
.
path
.
join
(
train_data_path
,
str
(
train_sample_id
))
torch
.
save
(
data
,
path_temp
+
'
.seq
'
)
train_sample_id
+=
1
else
:
path_temp
=
os
.
path
.
join
(
test_data_path
,
str
(
test_sample_id
))
torch
.
save
(
data
,
path_temp
+
'
.seq
'
)
test_sample_id
+=
1
def
main
():
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
'
--raw_data_file
'
,
type
=
str
,
default
=
''
)
parser
.
add_argument
(
'
--label_file
'
,
type
=
str
,
default
=
''
)
parser
.
add_argument
(
'
--train_data_path
'
,
type
=
str
,
default
=
''
)
parser
.
add_argument
(
'
--test_data_path
'
,
type
=
str
,
default
=
''
)
parser
.
add_argument
(
'
--test_start_time
'
,
type
=
str
,
default
=
''
)
parser
.
add_argument
(
'
--T
'
,
type
=
int
,
default
=
20
)
parser
.
add_argument
(
'
--win_size
'
,
type
=
int
,
default
=
36
)
parser
.
add_argument
(
'
--l
'
,
type
=
int
,
default
=
10
)
args
=
parser
.
parse_args
()
data_preprocess
(
args
.
raw_data_file
,
args
.
label_file
,
args
.
train_data_path
,
args
.
test_data_path
,
args
.
test_start_time
,
win_size
=
args
.
win_size
,
l
=
args
.
l
,
T
=
args
.
T
)
if
__name__
==
'
__main__
'
:
main
()
This diff is collapsed.
Click to expand it.
预览
0%
请重试
或
添加新附件
.
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
保存评论
取消
想要评论请
注册
或
登录