From 07a0fc6960cb04d237ebb0c2670538197ff48379 Mon Sep 17 00:00:00 2001
From: openaiops <openaiops@example.com>
Date: Tue, 27 Feb 2024 22:50:24 +0800
Subject: [PATCH] Initial commit

---
 README.md                                     |   18 +
 requirements.txt                              |   35 +
 sample_dataset/id_manager/latency_range.yml   | 3316 +++++++++++++++++
 sample_dataset/id_manager/operation_id.yml    | 1489 ++++++++
 sample_dataset/id_manager/service_id.yml      |  316 ++
 sample_dataset/id_manager/status_id.yml       |   21 +
 sample_dataset/test.csv                       |   51 +
 sample_dataset/train.csv                      |   22 +
 sample_dataset/val.csv                        |   22 +
 test.sh                                       |    4 +
 tracegnn/__init__.py                          |    0
 tracegnn/cli/__init__.py                      |    0
 tracegnn/cli/data_process.py                  |  108 +
 tracegnn/constants.py                         |   13 +
 tracegnn/data/__init__.py                     |    3 +
 tracegnn/data/bytes_db.py                     |  242 ++
 tracegnn/data/trace_graph.py                  |  617 +++
 tracegnn/data/trace_graph_db.py               |  108 +
 tracegnn/models/__init__.py                   |    0
 tracegnn/models/trace_vae/__init__.py         |    0
 tracegnn/models/trace_vae/constants.py        |    3 +
 tracegnn/models/trace_vae/dataset.py          |  134 +
 tracegnn/models/trace_vae/distributions.py    |  356 ++
 tracegnn/models/trace_vae/evaluation.py       |  570 +++
 tracegnn/models/trace_vae/graph_utils.py      |  424 +++
 tracegnn/models/trace_vae/model/__init__.py   |    1 +
 tracegnn/models/trace_vae/model/gnn_layers.py |  190 +
 .../models/trace_vae/model/latency_vae.py     |  444 +++
 .../models/trace_vae/model/model_utils.py     |   35 +
 .../trace_vae/model/operation_embedding.py    |   25 +
 tracegnn/models/trace_vae/model/pooling.py    |   75 +
 .../models/trace_vae/model/realnvp_flow.py    |   49 +
 tracegnn/models/trace_vae/model/struct_vae.py |  414 ++
 tracegnn/models/trace_vae/model/trace_vae.py  |  161 +
 tracegnn/models/trace_vae/tensor_utils.py     |  225 ++
 tracegnn/models/trace_vae/test.py             |  275 ++
 tracegnn/models/trace_vae/test_utils.py       |  106 +
 tracegnn/models/trace_vae/train.py            |  531 +++
 tracegnn/models/trace_vae/types.py            |   74 +
 tracegnn/utils/__init__.py                    |    9 +
 tracegnn/utils/analyze_nll.py                 |  134 +
 tracegnn/utils/array_buffer.py                |   39 +
 tracegnn/utils/data_utils.py                  |   23 +
 tracegnn/utils/fscore_utils.py                |   37 +
 tracegnn/utils/graph_conversion.py            |   35 +
 tracegnn/utils/id_assign.py                   |   58 +
 tracegnn/utils/latency_codec.py               |  135 +
 tracegnn/utils/latency_range_file.py          |   80 +
 tracegnn/utils/misc.py                        |   54 +
 tracegnn/utils/summary_callback.py            |   75 +
 train.sh                                      |    4 +
 51 files changed, 11160 insertions(+)
 create mode 100644 README.md
 create mode 100644 requirements.txt
 create mode 100644 sample_dataset/id_manager/latency_range.yml
 create mode 100644 sample_dataset/id_manager/operation_id.yml
 create mode 100644 sample_dataset/id_manager/service_id.yml
 create mode 100644 sample_dataset/id_manager/status_id.yml
 create mode 100644 sample_dataset/test.csv
 create mode 100644 sample_dataset/train.csv
 create mode 100644 sample_dataset/val.csv
 create mode 100644 test.sh
 create mode 100644 tracegnn/__init__.py
 create mode 100644 tracegnn/cli/__init__.py
 create mode 100644 tracegnn/cli/data_process.py
 create mode 100644 tracegnn/constants.py
 create mode 100644 tracegnn/data/__init__.py
 create mode 100644 tracegnn/data/bytes_db.py
 create mode 100644 tracegnn/data/trace_graph.py
 create mode 100644 tracegnn/data/trace_graph_db.py
 create mode 100644 tracegnn/models/__init__.py
 create mode 100644 tracegnn/models/trace_vae/__init__.py
 create mode 100644 tracegnn/models/trace_vae/constants.py
 create mode 100644 tracegnn/models/trace_vae/dataset.py
 create mode 100644 tracegnn/models/trace_vae/distributions.py
 create mode 100644 tracegnn/models/trace_vae/evaluation.py
 create mode 100644 tracegnn/models/trace_vae/graph_utils.py
 create mode 100644 tracegnn/models/trace_vae/model/__init__.py
 create mode 100644 tracegnn/models/trace_vae/model/gnn_layers.py
 create mode 100644 tracegnn/models/trace_vae/model/latency_vae.py
 create mode 100644 tracegnn/models/trace_vae/model/model_utils.py
 create mode 100644 tracegnn/models/trace_vae/model/operation_embedding.py
 create mode 100644 tracegnn/models/trace_vae/model/pooling.py
 create mode 100644 tracegnn/models/trace_vae/model/realnvp_flow.py
 create mode 100644 tracegnn/models/trace_vae/model/struct_vae.py
 create mode 100644 tracegnn/models/trace_vae/model/trace_vae.py
 create mode 100644 tracegnn/models/trace_vae/tensor_utils.py
 create mode 100644 tracegnn/models/trace_vae/test.py
 create mode 100644 tracegnn/models/trace_vae/test_utils.py
 create mode 100644 tracegnn/models/trace_vae/train.py
 create mode 100644 tracegnn/models/trace_vae/types.py
 create mode 100644 tracegnn/utils/__init__.py
 create mode 100644 tracegnn/utils/analyze_nll.py
 create mode 100644 tracegnn/utils/array_buffer.py
 create mode 100644 tracegnn/utils/data_utils.py
 create mode 100644 tracegnn/utils/fscore_utils.py
 create mode 100644 tracegnn/utils/graph_conversion.py
 create mode 100644 tracegnn/utils/id_assign.py
 create mode 100644 tracegnn/utils/latency_codec.py
 create mode 100644 tracegnn/utils/latency_range_file.py
 create mode 100644 tracegnn/utils/misc.py
 create mode 100644 tracegnn/utils/summary_callback.py
 create mode 100644 train.sh

diff --git a/README.md b/README.md
new file mode 100644
index 0000000..184db47
--- /dev/null
+++ b/README.md
@@ -0,0 +1,18 @@
+# TraceVAE
+This is the source code for "Unsupervised Anomaly Detection on Microservice Traces through Graph VAE".
+
+## Usage
+1. `pip3 install -r requirements.txt`.
+2. Convert the dataset with `python3 -m tracegnn.cli.data_process preprocess -i [input_path] -o [dataset_path]`. The sample dataset is under `sample_dataset`. (Note: This sample dataset only shows data format and usage, and cannot be used to evaluate model performance. Please replace it with your dataset.)
+sample:
+```
+python3 -m tracegnn.cli.data_process preprocess -i sample_dataset -o sample_dataset
+```
+3. Train the model with `bash train.sh [dataset_path]`:
+```
+bash train.sh sample_dataset
+```
+4. Evaluate the model with `bash teset.sh [model_path] [dataset_path]`. The default model path is under `results/train/models/final.pt`:
+```
+bash test.sh results/train/models/final.pt sample_dataset
+```
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..7e40a0b
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,35 @@
+git+https://gitee.com/haowen-xu/ml-essentials
+git+https://gitee.com/haowen-xu/tensorkit
+git+https://gitee.com/mirrors/ZhuSuan.git
+click
+jinja2
+networkx
+numpy
+pandas
+PyYAML
+python-snappy
+scikit-learn
+seaborn
+semver
+natsort
+imageio
+fs
+lazy_object_proxy
+kafka-python
+tqdm
+loguru
+tensorboard==1.15
+numba
+
+-f https://download.pytorch.org/whl/cu116/torch_stable.html
+torch==1.12.0+cu116
+
+-f https://data.dgl.ai/wheels/repo.html
+dgl-cu116
+
+-f https://data.pyg.org/whl/torch-1.12.0+cu116.html
+torch-scatter
+torch-sparse
+torch-cluster
+torch-spline-conv
+torch-geometric
diff --git a/sample_dataset/id_manager/latency_range.yml b/sample_dataset/id_manager/latency_range.yml
new file mode 100644
index 0000000..1aa1da0
--- /dev/null
+++ b/sample_dataset/id_manager/latency_range.yml
@@ -0,0 +1,3316 @@
+1:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+3:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+4:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+5:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+6:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+7:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+8:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+9:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+10:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+11:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+12:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+13:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+15:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+16:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+17:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+19:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+20:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+21:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+23:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+26:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+27:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+28:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+29:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+30:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+47:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+48:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+62:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+63:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+64:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+65:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+67:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+68:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+69:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+70:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+71:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+72:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+73:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+74:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+75:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+76:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+77:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+78:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+79:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+80:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+83:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+84:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+94:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+96:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+98:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+99:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+102:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+103:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+105:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+106:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+107:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+108:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+110:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+112:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+113:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+115:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+116:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+117:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+118:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+119:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+121:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+122:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+123:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+124:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+139:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+140:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+141:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+142:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+143:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+144:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+145:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+146:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+147:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+148:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+149:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+150:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+151:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+152:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+153:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+154:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+155:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+156:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+157:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+158:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+159:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+160:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+161:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+181:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+184:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+186:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+187:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+188:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+189:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+190:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+191:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+192:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+193:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+194:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+195:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+196:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+197:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+198:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+199:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+200:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+201:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+202:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+203:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+204:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+206:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+207:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+208:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+209:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+210:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+211:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+212:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+213:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+214:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+215:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+216:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+217:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+218:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+219:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+220:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+221:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+222:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+224:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+225:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+226:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+227:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+228:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+229:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+230:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+231:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+235:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+236:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+238:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+239:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+240:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+242:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+243:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+244:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+245:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+248:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+249:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+251:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+252:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+253:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+254:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+255:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+256:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+261:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+263:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+264:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+266:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+267:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+268:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+269:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+270:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+271:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+272:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+273:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+274:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+275:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+276:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+278:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+281:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+282:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+283:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+285:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+291:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+293:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+299:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+300:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+301:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+303:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+305:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+306:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+308:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+310:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+314:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+315:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+316:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+317:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+318:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+319:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+320:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+322:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+323:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+326:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+327:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+328:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+329:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+330:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+331:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+332:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+333:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+335:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+336:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+337:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+339:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+340:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+341:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+342:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+344:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+345:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+346:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+347:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+349:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+350:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+351:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+352:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+353:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+354:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+355:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+356:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+358:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+402:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+403:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+404:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+406:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+407:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+411:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+412:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+413:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+414:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+417:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+418:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+421:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+422:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+424:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+425:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+427:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+430:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+431:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+432:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+433:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+434:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+435:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+436:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+437:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+438:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+439:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+441:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+442:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+444:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+445:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+446:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+447:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+448:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+454:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+455:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+456:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+457:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+458:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+459:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+470:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+472:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+473:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+478:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+479:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+481:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+493:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+494:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+495:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+496:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+497:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+498:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+499:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+500:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+501:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+502:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+503:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+504:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+505:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+507:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+509:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+510:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+511:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+512:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+514:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+515:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+526:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+527:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+544:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+545:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+546:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+548:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+549:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+551:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+552:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+553:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+557:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+558:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+559:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+560:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+561:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+562:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+563:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+564:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+565:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+566:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+567:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+568:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+570:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+571:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+572:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+573:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+574:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+575:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+576:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+577:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+578:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+580:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+582:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+583:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+584:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+587:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+588:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+589:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+590:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+592:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+593:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+596:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+598:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+599:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+600:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+602:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+628:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+631:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+632:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+634:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+635:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+636:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+638:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+639:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+640:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+641:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+642:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+643:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+644:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+646:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+647:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+649:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+650:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+653:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+655:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+656:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+657:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+658:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+659:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+660:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+661:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+662:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+663:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+664:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+665:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+666:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+667:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+668:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+669:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+670:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+671:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+672:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+673:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+674:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+675:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+676:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+677:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+680:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+683:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+684:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+685:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+686:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+687:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+688:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+689:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+690:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+692:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+693:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+696:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+697:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+698:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+699:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+700:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+701:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+702:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+703:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+704:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+705:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+706:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+707:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+708:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+709:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+710:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+711:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+712:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+713:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+714:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+715:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+716:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+717:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+718:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+719:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+720:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+722:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+723:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+724:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+725:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+726:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+729:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+732:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+733:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+734:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+735:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+736:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+738:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+741:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+743:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+745:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+746:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+749:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+751:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+752:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+753:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+755:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+756:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+757:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+758:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+759:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+761:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+762:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+768:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+769:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+776:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+777:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+778:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+779:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+781:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+782:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+785:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+788:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+790:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+791:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+792:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+794:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+796:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+801:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+802:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+803:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+804:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+811:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+812:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+813:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+814:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+837:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+843:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+844:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+845:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+846:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+847:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+852:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+870:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+874:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+876:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+877:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+878:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+879:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+880:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+938:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+939:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+940:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+941:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+942:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+943:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+945:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+946:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+947:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+948:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+949:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+950:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+952:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+953:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+954:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+955:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+956:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+957:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+958:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+959:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+961:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+962:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+963:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+964:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+965:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+966:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+967:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+968:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+971:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+972:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+973:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+975:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+976:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+977:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+978:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+989:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+990:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+992:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+995:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+996:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+997:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+999:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1000:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1001:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1002:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1013:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1014:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1020:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1021:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1023:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1024:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1025:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1027:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1029:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1030:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1031:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1032:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1033:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1034:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1035:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1036:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1045:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1046:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1047:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1048:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1049:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1050:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1051:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1059:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1061:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1063:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1065:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1067:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1069:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1072:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1073:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1075:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1076:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1078:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1082:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1083:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1084:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1090:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1091:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1092:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1096:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1099:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1101:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1102:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1103:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1104:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1105:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1106:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1107:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1108:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1113:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1114:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1115:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1116:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1117:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1120:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1122:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1123:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1128:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1129:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1130:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1131:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1132:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1133:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1134:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1136:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1138:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1141:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1142:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1144:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1145:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1146:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1148:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1149:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1150:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1151:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1152:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1153:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1154:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1155:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1156:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1157:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1158:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1159:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1160:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1161:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1162:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1163:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1165:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1168:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1171:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1172:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1181:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1182:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1183:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1184:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1185:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1186:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1187:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1189:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1190:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1191:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1193:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1194:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1195:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1196:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1202:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1205:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1207:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1208:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1209:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1211:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1212:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1213:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1214:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1215:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1216:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1217:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1218:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1219:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1220:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1221:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1222:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1224:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1226:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1230:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1231:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1232:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1233:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1234:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1235:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1236:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1237:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1238:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1239:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1242:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1243:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1244:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1246:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1247:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1248:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1249:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1250:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1251:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1252:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1253:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1255:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1256:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1258:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1259:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1260:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1261:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1262:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1263:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1264:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1265:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1266:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1267:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1270:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1271:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1272:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1273:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1274:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1276:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1277:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1280:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1281:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1282:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1284:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1285:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1286:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1287:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1288:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1291:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1292:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1294:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1295:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1296:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1298:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1300:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1301:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1302:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1303:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1304:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1305:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1306:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1307:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1308:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1310:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1311:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1312:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1313:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1314:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1315:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1316:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1317:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1318:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1319:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1320:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1321:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1322:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1323:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1324:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1325:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1326:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1327:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1329:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1330:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1331:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1333:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1334:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1336:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1337:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1338:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1340:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1342:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1344:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1346:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1349:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1352:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1355:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1356:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1358:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1360:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1364:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1365:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1366:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1367:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1369:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1370:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1371:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1372:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1373:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1374:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1375:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1376:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1377:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1378:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1380:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1381:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1382:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1383:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1384:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1385:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1386:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1387:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1388:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1389:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1390:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1391:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1393:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1394:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1395:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1396:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1397:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1398:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1399:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1400:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1402:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1403:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1404:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1406:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1407:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1408:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1409:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1410:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1411:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1412:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1413:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1414:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1415:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1417:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1420:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1421:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1422:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1423:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1424:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1426:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1427:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1428:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1429:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1430:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1432:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1433:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1434:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1435:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1436:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1438:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1442:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1443:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1444:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1445:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1450:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1452:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1453:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1454:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1455:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1456:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1458:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1459:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1462:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1463:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1464:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1466:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1469:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1471:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1473:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1476:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1479:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1481:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1484:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
+1487:
+  mean: 1.0
+  p99: 3.0
+  std: 1.0
diff --git a/sample_dataset/id_manager/operation_id.yml b/sample_dataset/id_manager/operation_id.yml
new file mode 100644
index 0000000..eed078d
--- /dev/null
+++ b/sample_dataset/id_manager/operation_id.yml
@@ -0,0 +1,1489 @@
+? ''
+: 0
+1/1: 1
+10/50: 50
+10/51: 51
+10/52: 52
+10/53: 53
+100/444: 444
+100/445: 445
+100/446: 446
+100/447: 447
+100/448: 448
+101/449: 449
+101/450: 450
+101/451: 451
+101/452: 452
+101/453: 453
+102/454: 454
+102/455: 455
+102/456: 456
+102/457: 457
+102/458: 458
+102/459: 459
+103/460: 460
+103/461: 461
+103/462: 462
+103/463: 463
+103/464: 464
+103/465: 465
+103/466: 466
+103/467: 467
+103/468: 468
+103/469: 469
+104/470: 470
+105/471: 471
+106/472: 472
+107/473: 473
+108/474: 474
+109/475: 475
+109/476: 476
+109/477: 477
+11/54: 54
+11/55: 55
+11/56: 56
+11/57: 57
+11/58: 58
+11/59: 59
+11/60: 60
+11/61: 61
+110/478: 478
+111/479: 479
+112/480: 480
+112/481: 481
+112/482: 482
+113/483: 483
+113/484: 484
+113/485: 485
+113/486: 486
+113/487: 487
+114/488: 488
+114/489: 489
+114/490: 490
+114/491: 491
+115/492: 492
+115/493: 493
+115/494: 494
+115/495: 495
+115/496: 496
+115/497: 497
+115/498: 498
+115/499: 499
+115/500: 500
+115/501: 501
+115/502: 502
+115/503: 503
+116/504: 504
+116/505: 505
+116/506: 506
+116/507: 507
+116/508: 508
+116/509: 509
+116/510: 510
+117/511: 511
+117/512: 512
+118/513: 513
+119/514: 514
+12/62: 62
+12/63: 63
+120/515: 515
+121/516: 516
+121/517: 517
+121/518: 518
+122/519: 519
+123/520: 520
+124/521: 521
+124/522: 522
+125/523: 523
+126/524: 524
+127/525: 525
+127/526: 526
+127/527: 527
+128/528: 528
+128/529: 529
+128/530: 530
+128/531: 531
+128/532: 532
+128/533: 533
+128/534: 534
+128/535: 535
+128/536: 536
+128/537: 537
+128/538: 538
+128/539: 539
+128/540: 540
+128/541: 541
+128/542: 542
+129/543: 543
+129/544: 544
+129/545: 545
+129/546: 546
+129/547: 547
+129/548: 548
+129/549: 549
+129/550: 550
+129/551: 551
+13/64: 64
+13/65: 65
+13/66: 66
+13/67: 67
+13/68: 68
+13/69: 69
+13/70: 70
+13/71: 71
+13/72: 72
+13/73: 73
+130/552: 552
+131/553: 553
+132/554: 554
+132/555: 555
+133/556: 556
+134/557: 557
+135/558: 558
+135/559: 559
+135/560: 560
+135/561: 561
+135/562: 562
+136/563: 563
+136/564: 564
+136/565: 565
+137/566: 566
+138/567: 567
+139/568: 568
+14/74: 74
+14/75: 75
+14/76: 76
+140/569: 569
+141/570: 570
+142/571: 571
+142/572: 572
+142/573: 573
+142/574: 574
+143/575: 575
+144/576: 576
+144/577: 577
+144/578: 578
+145/579: 579
+146/580: 580
+146/581: 581
+146/582: 582
+146/583: 583
+146/584: 584
+146/585: 585
+146/586: 586
+146/587: 587
+146/588: 588
+146/589: 589
+146/590: 590
+146/591: 591
+146/592: 592
+146/593: 593
+146/594: 594
+146/595: 595
+147/596: 596
+148/597: 597
+148/598: 598
+148/599: 599
+148/600: 600
+148/601: 601
+148/602: 602
+149/603: 603
+149/604: 604
+149/605: 605
+149/606: 606
+149/607: 607
+149/608: 608
+149/609: 609
+149/610: 610
+149/611: 611
+149/612: 612
+149/613: 613
+149/614: 614
+149/615: 615
+149/616: 616
+149/617: 617
+149/618: 618
+15/77: 77
+15/78: 78
+15/79: 79
+150/619: 619
+150/620: 620
+150/621: 621
+150/622: 622
+150/623: 623
+150/624: 624
+150/625: 625
+150/626: 626
+150/627: 627
+151/628: 628
+151/629: 629
+151/630: 630
+152/631: 631
+152/632: 632
+152/633: 633
+152/634: 634
+152/635: 635
+152/636: 636
+152/637: 637
+152/638: 638
+152/639: 639
+152/640: 640
+152/641: 641
+152/642: 642
+153/643: 643
+153/644: 644
+153/645: 645
+153/646: 646
+153/647: 647
+153/648: 648
+153/649: 649
+154/650: 650
+155/651: 651
+155/652: 652
+155/653: 653
+155/654: 654
+156/655: 655
+156/656: 656
+156/657: 657
+156/658: 658
+156/659: 659
+156/660: 660
+156/661: 661
+156/662: 662
+156/663: 663
+156/664: 664
+156/665: 665
+156/666: 666
+156/667: 667
+156/668: 668
+156/669: 669
+156/670: 670
+156/671: 671
+156/672: 672
+156/673: 673
+156/674: 674
+156/675: 675
+156/676: 676
+156/677: 677
+156/678: 678
+156/679: 679
+156/680: 680
+156/681: 681
+157/682: 682
+158/683: 683
+158/684: 684
+158/685: 685
+158/686: 686
+158/687: 687
+158/688: 688
+158/689: 689
+158/690: 690
+158/691: 691
+158/692: 692
+159/693: 693
+159/694: 694
+159/695: 695
+159/696: 696
+159/697: 697
+159/698: 698
+159/699: 699
+159/700: 700
+159/701: 701
+159/702: 702
+159/703: 703
+16/80: 80
+160/704: 704
+160/705: 705
+160/706: 706
+160/707: 707
+160/708: 708
+160/709: 709
+160/710: 710
+161/711: 711
+161/712: 712
+162/713: 713
+162/714: 714
+163/715: 715
+163/716: 716
+163/717: 717
+164/718: 718
+164/719: 719
+165/720: 720
+166/721: 721
+166/722: 722
+166/723: 723
+166/724: 724
+166/725: 725
+167/726: 726
+168/727: 727
+168/728: 728
+168/729: 729
+169/730: 730
+169/731: 731
+17/81: 81
+170/732: 732
+171/733: 733
+171/734: 734
+171/735: 735
+171/736: 736
+171/737: 737
+172/738: 738
+173/739: 739
+174/740: 740
+174/741: 741
+174/742: 742
+174/743: 743
+174/744: 744
+174/745: 745
+174/746: 746
+174/747: 747
+174/748: 748
+174/749: 749
+174/750: 750
+174/751: 751
+174/752: 752
+175/753: 753
+175/754: 754
+175/755: 755
+175/756: 756
+175/757: 757
+175/758: 758
+175/759: 759
+175/760: 760
+175/761: 761
+176/762: 762
+176/763: 763
+176/764: 764
+176/765: 765
+176/766: 766
+176/767: 767
+176/768: 768
+176/769: 769
+176/770: 770
+176/771: 771
+176/772: 772
+177/773: 773
+177/774: 774
+178/775: 775
+178/776: 776
+178/777: 777
+178/778: 778
+179/779: 779
+179/780: 780
+179/781: 781
+179/782: 782
+179/783: 783
+179/784: 784
+179/785: 785
+18/82: 82
+180/786: 786
+180/787: 787
+181/788: 788
+181/789: 789
+181/790: 790
+181/791: 791
+181/792: 792
+181/793: 793
+181/794: 794
+181/795: 795
+181/796: 796
+182/797: 797
+182/798: 798
+183/799: 799
+183/800: 800
+184/801: 801
+184/802: 802
+185/803: 803
+185/804: 804
+186/805: 805
+186/806: 806
+186/807: 807
+186/808: 808
+186/809: 809
+186/810: 810
+187/811: 811
+187/812: 812
+187/813: 813
+187/814: 814
+188/815: 815
+188/816: 816
+188/817: 817
+188/818: 818
+188/819: 819
+188/820: 820
+188/821: 821
+188/822: 822
+188/823: 823
+189/824: 824
+189/825: 825
+189/826: 826
+189/827: 827
+189/828: 828
+189/829: 829
+189/830: 830
+189/831: 831
+189/832: 832
+189/833: 833
+189/834: 834
+189/835: 835
+19/83: 83
+190/836: 836
+191/837: 837
+192/838: 838
+192/839: 839
+193/840: 840
+194/841: 841
+195/842: 842
+195/843: 843
+196/844: 844
+197/845: 845
+198/846: 846
+198/847: 847
+199/848: 848
+2/2: 2
+2/3: 3
+2/4: 4
+2/5: 5
+2/6: 6
+2/7: 7
+2/8: 8
+2/9: 9
+20/84: 84
+200/849: 849
+200/850: 850
+200/851: 851
+200/852: 852
+200/853: 853
+200/854: 854
+200/855: 855
+200/856: 856
+200/857: 857
+200/858: 858
+200/859: 859
+200/860: 860
+200/861: 861
+200/862: 862
+200/863: 863
+200/864: 864
+200/865: 865
+200/866: 866
+200/867: 867
+200/868: 868
+201/869: 869
+201/870: 870
+201/871: 871
+202/872: 872
+203/873: 873
+203/874: 874
+203/875: 875
+203/876: 876
+203/877: 877
+203/878: 878
+203/879: 879
+203/880: 880
+204/881: 881
+204/882: 882
+204/883: 883
+204/884: 884
+204/885: 885
+204/886: 886
+204/887: 887
+204/888: 888
+204/889: 889
+205/890: 890
+205/891: 891
+205/892: 892
+205/893: 893
+205/894: 894
+205/895: 895
+205/896: 896
+205/897: 897
+205/898: 898
+205/899: 899
+205/900: 900
+205/901: 901
+205/902: 902
+205/903: 903
+206/904: 904
+206/905: 905
+206/906: 906
+206/907: 907
+206/908: 908
+206/909: 909
+206/910: 910
+206/911: 911
+206/912: 912
+207/913: 913
+207/914: 914
+207/915: 915
+207/916: 916
+207/917: 917
+207/918: 918
+208/919: 919
+208/920: 920
+208/921: 921
+208/922: 922
+208/923: 923
+208/924: 924
+208/925: 925
+208/926: 926
+209/927: 927
+209/928: 928
+209/929: 929
+209/930: 930
+209/931: 931
+209/932: 932
+209/933: 933
+209/934: 934
+209/935: 935
+209/936: 936
+209/937: 937
+21/85: 85
+21/86: 86
+21/87: 87
+21/88: 88
+21/89: 89
+210/938: 938
+210/939: 939
+210/940: 940
+211/941: 941
+211/942: 942
+211/943: 943
+211/944: 944
+212/945: 945
+213/946: 946
+214/947: 947
+215/948: 948
+215/949: 949
+216/950: 950
+216/951: 951
+217/952: 952
+217/953: 953
+217/954: 954
+217/955: 955
+217/956: 956
+218/957: 957
+219/958: 958
+219/959: 959
+219/960: 960
+219/961: 961
+219/962: 962
+219/963: 963
+219/964: 964
+22/90: 90
+22/91: 91
+220/965: 965
+220/966: 966
+220/967: 967
+220/968: 968
+220/969: 969
+220/970: 970
+220/971: 971
+220/972: 972
+220/973: 973
+221/974: 974
+222/975: 975
+222/976: 976
+222/977: 977
+223/978: 978
+224/979: 979
+224/980: 980
+224/981: 981
+224/982: 982
+224/983: 983
+224/984: 984
+224/985: 985
+224/986: 986
+224/987: 987
+224/988: 988
+225/989: 989
+225/990: 990
+225/991: 991
+225/992: 992
+225/993: 993
+225/994: 994
+225/995: 995
+225/996: 996
+225/997: 997
+225/998: 998
+225/999: 999
+226/1000: 1000
+226/1001: 1001
+226/1002: 1002
+227/1003: 1003
+227/1004: 1004
+227/1005: 1005
+227/1006: 1006
+227/1007: 1007
+227/1008: 1008
+227/1009: 1009
+227/1010: 1010
+227/1011: 1011
+227/1012: 1012
+227/1013: 1013
+227/1014: 1014
+227/1015: 1015
+227/1016: 1016
+227/1017: 1017
+227/1018: 1018
+228/1019: 1019
+229/1020: 1020
+229/1021: 1021
+229/1022: 1022
+229/1023: 1023
+229/1024: 1024
+229/1025: 1025
+229/1026: 1026
+229/1027: 1027
+23/92: 92
+23/93: 93
+230/1028: 1028
+230/1029: 1029
+230/1030: 1030
+230/1031: 1031
+230/1032: 1032
+230/1033: 1033
+230/1034: 1034
+230/1035: 1035
+231/1036: 1036
+231/1037: 1037
+231/1038: 1038
+232/1039: 1039
+232/1040: 1040
+233/1041: 1041
+234/1042: 1042
+234/1043: 1043
+234/1044: 1044
+235/1045: 1045
+235/1046: 1046
+235/1047: 1047
+235/1048: 1048
+236/1049: 1049
+236/1050: 1050
+236/1051: 1051
+236/1052: 1052
+237/1053: 1053
+237/1054: 1054
+238/1055: 1055
+239/1056: 1056
+239/1057: 1057
+24/94: 94
+240/1058: 1058
+241/1059: 1059
+241/1060: 1060
+241/1061: 1061
+241/1062: 1062
+242/1063: 1063
+243/1064: 1064
+244/1065: 1065
+244/1066: 1066
+244/1067: 1067
+244/1068: 1068
+244/1069: 1069
+244/1070: 1070
+244/1071: 1071
+244/1072: 1072
+244/1073: 1073
+244/1074: 1074
+244/1075: 1075
+244/1076: 1076
+244/1077: 1077
+244/1078: 1078
+244/1079: 1079
+244/1080: 1080
+244/1081: 1081
+244/1082: 1082
+244/1083: 1083
+244/1084: 1084
+245/1085: 1085
+245/1086: 1086
+245/1087: 1087
+245/1088: 1088
+245/1089: 1089
+246/1090: 1090
+246/1091: 1091
+246/1092: 1092
+247/1093: 1093
+248/1094: 1094
+249/1095: 1095
+25/95: 95
+25/96: 96
+25/97: 97
+25/98: 98
+250/1096: 1096
+250/1097: 1097
+251/1098: 1098
+252/1099: 1099
+253/1100: 1100
+254/1101: 1101
+254/1102: 1102
+255/1103: 1103
+256/1104: 1104
+256/1105: 1105
+256/1106: 1106
+256/1107: 1107
+256/1108: 1108
+257/1109: 1109
+257/1110: 1110
+258/1111: 1111
+259/1112: 1112
+26/99: 99
+260/1113: 1113
+260/1114: 1114
+261/1115: 1115
+261/1116: 1116
+262/1117: 1117
+262/1118: 1118
+262/1119: 1119
+262/1120: 1120
+262/1121: 1121
+262/1122: 1122
+262/1123: 1123
+262/1124: 1124
+262/1125: 1125
+262/1126: 1126
+262/1127: 1127
+262/1128: 1128
+262/1129: 1129
+262/1130: 1130
+262/1131: 1131
+262/1132: 1132
+262/1133: 1133
+262/1134: 1134
+262/1135: 1135
+262/1136: 1136
+262/1137: 1137
+262/1138: 1138
+262/1139: 1139
+262/1140: 1140
+262/1141: 1141
+262/1142: 1142
+262/1143: 1143
+262/1144: 1144
+263/1145: 1145
+263/1146: 1146
+263/1147: 1147
+263/1148: 1148
+263/1149: 1149
+263/1150: 1150
+263/1151: 1151
+263/1152: 1152
+263/1153: 1153
+263/1154: 1154
+263/1155: 1155
+263/1156: 1156
+263/1157: 1157
+263/1158: 1158
+263/1159: 1159
+263/1160: 1160
+263/1161: 1161
+263/1162: 1162
+263/1163: 1163
+263/1164: 1164
+263/1165: 1165
+263/1166: 1166
+264/1167: 1167
+265/1168: 1168
+266/1169: 1169
+267/1170: 1170
+268/1171: 1171
+269/1172: 1172
+27/100: 100
+27/101: 101
+27/102: 102
+270/1173: 1173
+270/1174: 1174
+270/1175: 1175
+270/1176: 1176
+270/1177: 1177
+270/1178: 1178
+270/1179: 1179
+271/1180: 1180
+271/1181: 1181
+271/1182: 1182
+272/1183: 1183
+272/1184: 1184
+272/1185: 1185
+273/1186: 1186
+274/1187: 1187
+275/1188: 1188
+275/1189: 1189
+275/1190: 1190
+276/1191: 1191
+276/1192: 1192
+276/1193: 1193
+276/1194: 1194
+276/1195: 1195
+276/1196: 1196
+276/1197: 1197
+276/1198: 1198
+276/1199: 1199
+276/1200: 1200
+276/1201: 1201
+276/1202: 1202
+276/1203: 1203
+276/1204: 1204
+276/1205: 1205
+277/1206: 1206
+277/1207: 1207
+277/1208: 1208
+277/1209: 1209
+278/1210: 1210
+279/1211: 1211
+279/1212: 1212
+279/1213: 1213
+28/103: 103
+28/104: 104
+28/105: 105
+28/106: 106
+28/107: 107
+28/108: 108
+28/109: 109
+280/1214: 1214
+280/1215: 1215
+280/1216: 1216
+280/1217: 1217
+280/1218: 1218
+280/1219: 1219
+280/1220: 1220
+280/1221: 1221
+280/1222: 1222
+280/1223: 1223
+280/1224: 1224
+280/1225: 1225
+280/1226: 1226
+280/1227: 1227
+280/1228: 1228
+280/1229: 1229
+280/1230: 1230
+280/1231: 1231
+280/1232: 1232
+280/1233: 1233
+280/1234: 1234
+280/1235: 1235
+280/1236: 1236
+280/1237: 1237
+280/1238: 1238
+280/1239: 1239
+280/1240: 1240
+280/1241: 1241
+280/1242: 1242
+280/1243: 1243
+280/1244: 1244
+280/1245: 1245
+280/1246: 1246
+280/1247: 1247
+280/1248: 1248
+280/1249: 1249
+280/1250: 1250
+280/1251: 1251
+280/1252: 1252
+280/1253: 1253
+280/1254: 1254
+280/1255: 1255
+280/1256: 1256
+280/1257: 1257
+280/1258: 1258
+280/1259: 1259
+280/1260: 1260
+280/1261: 1261
+280/1262: 1262
+280/1263: 1263
+281/1264: 1264
+281/1265: 1265
+281/1266: 1266
+281/1267: 1267
+281/1268: 1268
+281/1269: 1269
+281/1270: 1270
+281/1271: 1271
+281/1272: 1272
+281/1273: 1273
+282/1274: 1274
+283/1275: 1275
+284/1276: 1276
+284/1277: 1277
+284/1278: 1278
+285/1279: 1279
+286/1280: 1280
+286/1281: 1281
+287/1282: 1282
+287/1283: 1283
+287/1284: 1284
+287/1285: 1285
+288/1286: 1286
+288/1287: 1287
+288/1288: 1288
+289/1289: 1289
+289/1290: 1290
+29/110: 110
+290/1291: 1291
+290/1292: 1292
+291/1293: 1293
+291/1294: 1294
+291/1295: 1295
+291/1296: 1296
+291/1297: 1297
+291/1298: 1298
+291/1299: 1299
+291/1300: 1300
+291/1301: 1301
+291/1302: 1302
+291/1303: 1303
+291/1304: 1304
+291/1305: 1305
+292/1306: 1306
+293/1307: 1307
+294/1308: 1308
+294/1309: 1309
+295/1310: 1310
+296/1311: 1311
+297/1312: 1312
+297/1313: 1313
+297/1314: 1314
+297/1315: 1315
+297/1316: 1316
+297/1317: 1317
+297/1318: 1318
+297/1319: 1319
+297/1320: 1320
+297/1321: 1321
+297/1322: 1322
+297/1323: 1323
+297/1324: 1324
+297/1325: 1325
+298/1326: 1326
+299/1327: 1327
+299/1328: 1328
+299/1329: 1329
+299/1330: 1330
+299/1331: 1331
+299/1332: 1332
+299/1333: 1333
+299/1334: 1334
+299/1335: 1335
+299/1336: 1336
+299/1337: 1337
+299/1338: 1338
+3/10: 10
+3/11: 11
+3/12: 12
+3/13: 13
+3/14: 14
+3/15: 15
+3/16: 16
+30/111: 111
+30/112: 112
+30/113: 113
+30/114: 114
+30/115: 115
+30/116: 116
+30/117: 117
+30/118: 118
+30/119: 119
+30/120: 120
+30/121: 121
+30/122: 122
+30/123: 123
+30/124: 124
+300/1339: 1339
+300/1340: 1340
+300/1341: 1341
+300/1342: 1342
+300/1343: 1343
+300/1344: 1344
+300/1345: 1345
+300/1346: 1346
+300/1347: 1347
+300/1348: 1348
+300/1349: 1349
+301/1350: 1350
+301/1351: 1351
+301/1352: 1352
+301/1353: 1353
+301/1354: 1354
+301/1355: 1355
+301/1356: 1356
+301/1357: 1357
+301/1358: 1358
+302/1359: 1359
+302/1360: 1360
+302/1361: 1361
+302/1362: 1362
+302/1363: 1363
+303/1364: 1364
+303/1365: 1365
+303/1366: 1366
+303/1367: 1367
+304/1368: 1368
+305/1369: 1369
+305/1370: 1370
+305/1371: 1371
+306/1372: 1372
+306/1373: 1373
+307/1374: 1374
+308/1375: 1375
+308/1376: 1376
+308/1377: 1377
+308/1378: 1378
+308/1379: 1379
+308/1380: 1380
+308/1381: 1381
+308/1382: 1382
+308/1383: 1383
+308/1384: 1384
+308/1385: 1385
+308/1386: 1386
+308/1387: 1387
+308/1388: 1388
+308/1389: 1389
+308/1390: 1390
+308/1391: 1391
+308/1392: 1392
+308/1393: 1393
+309/1394: 1394
+309/1395: 1395
+309/1396: 1396
+31/125: 125
+31/126: 126
+31/127: 127
+31/128: 128
+31/129: 129
+31/130: 130
+31/131: 131
+31/132: 132
+31/133: 133
+31/134: 134
+31/135: 135
+31/136: 136
+310/1397: 1397
+310/1398: 1398
+310/1399: 1399
+310/1400: 1400
+310/1401: 1401
+310/1402: 1402
+310/1403: 1403
+310/1404: 1404
+310/1405: 1405
+310/1406: 1406
+310/1407: 1407
+310/1408: 1408
+310/1409: 1409
+310/1410: 1410
+310/1411: 1411
+310/1412: 1412
+310/1413: 1413
+310/1414: 1414
+310/1415: 1415
+311/1416: 1416
+312/1417: 1417
+313/1418: 1418
+313/1419: 1419
+313/1420: 1420
+313/1421: 1421
+314/1422: 1422
+314/1423: 1423
+314/1424: 1424
+314/1425: 1425
+314/1426: 1426
+314/1427: 1427
+314/1428: 1428
+314/1429: 1429
+314/1430: 1430
+314/1431: 1431
+314/1432: 1432
+314/1433: 1433
+314/1434: 1434
+314/1435: 1435
+314/1436: 1436
+314/1437: 1437
+314/1438: 1438
+314/1439: 1439
+314/1440: 1440
+314/1441: 1441
+314/1442: 1442
+314/1443: 1443
+314/1444: 1444
+314/1445: 1445
+314/1446: 1446
+314/1447: 1447
+314/1448: 1448
+314/1449: 1449
+314/1450: 1450
+314/1451: 1451
+314/1452: 1452
+314/1453: 1453
+314/1454: 1454
+314/1455: 1455
+314/1456: 1456
+314/1457: 1457
+314/1458: 1458
+314/1459: 1459
+314/1460: 1460
+314/1461: 1461
+314/1462: 1462
+314/1463: 1463
+314/1464: 1464
+314/1465: 1465
+314/1466: 1466
+314/1467: 1467
+314/1468: 1468
+314/1469: 1469
+314/1470: 1470
+314/1471: 1471
+314/1472: 1472
+314/1473: 1473
+314/1474: 1474
+314/1475: 1475
+314/1476: 1476
+314/1477: 1477
+314/1478: 1478
+314/1479: 1479
+314/1480: 1480
+314/1481: 1481
+314/1482: 1482
+314/1483: 1483
+314/1484: 1484
+314/1485: 1485
+314/1486: 1486
+314/1487: 1487
+32/137: 137
+32/138: 138
+32/139: 139
+32/140: 140
+32/141: 141
+32/142: 142
+32/143: 143
+32/144: 144
+32/145: 145
+32/146: 146
+32/147: 147
+32/148: 148
+32/149: 149
+32/150: 150
+32/151: 151
+32/152: 152
+32/153: 153
+32/154: 154
+32/155: 155
+32/156: 156
+32/157: 157
+32/158: 158
+32/159: 159
+32/160: 160
+32/161: 161
+33/162: 162
+33/163: 163
+33/164: 164
+33/165: 165
+33/166: 166
+33/167: 167
+33/168: 168
+33/169: 169
+33/170: 170
+33/171: 171
+33/172: 172
+33/173: 173
+33/174: 174
+33/175: 175
+33/176: 176
+33/177: 177
+33/178: 178
+33/179: 179
+33/180: 180
+34/181: 181
+35/182: 182
+36/183: 183
+36/184: 184
+37/185: 185
+38/186: 186
+38/187: 187
+38/188: 188
+38/189: 189
+38/190: 190
+38/191: 191
+38/192: 192
+38/193: 193
+38/194: 194
+39/195: 195
+39/196: 196
+39/197: 197
+39/198: 198
+39/199: 199
+39/200: 200
+39/201: 201
+39/202: 202
+39/203: 203
+39/204: 204
+39/205: 205
+39/206: 206
+39/207: 207
+39/208: 208
+39/209: 209
+39/210: 210
+39/211: 211
+39/212: 212
+39/213: 213
+39/214: 214
+39/215: 215
+39/216: 216
+39/217: 217
+39/218: 218
+39/219: 219
+39/220: 220
+39/221: 221
+39/222: 222
+39/223: 223
+39/224: 224
+39/225: 225
+39/226: 226
+39/227: 227
+39/228: 228
+39/229: 229
+39/230: 230
+39/231: 231
+4/17: 17
+4/18: 18
+4/19: 19
+4/20: 20
+40/232: 232
+40/233: 233
+41/234: 234
+41/235: 235
+41/236: 236
+41/237: 237
+41/238: 238
+41/239: 239
+41/240: 240
+41/241: 241
+41/242: 242
+41/243: 243
+41/244: 244
+41/245: 245
+41/246: 246
+41/247: 247
+41/248: 248
+42/249: 249
+43/250: 250
+44/251: 251
+45/252: 252
+45/253: 253
+45/254: 254
+45/255: 255
+45/256: 256
+45/257: 257
+45/258: 258
+45/259: 259
+45/260: 260
+45/261: 261
+45/262: 262
+45/263: 263
+45/264: 264
+45/265: 265
+45/266: 266
+46/267: 267
+46/268: 268
+46/269: 269
+46/270: 270
+46/271: 271
+46/272: 272
+46/273: 273
+46/274: 274
+46/275: 275
+46/276: 276
+46/277: 277
+46/278: 278
+47/279: 279
+47/280: 280
+48/281: 281
+48/282: 282
+48/283: 283
+48/284: 284
+48/285: 285
+49/286: 286
+49/287: 287
+49/288: 288
+49/289: 289
+49/290: 290
+49/291: 291
+49/292: 292
+5/21: 21
+5/22: 22
+5/23: 23
+5/24: 24
+5/25: 25
+5/26: 26
+50/293: 293
+51/294: 294
+52/295: 295
+52/296: 296
+52/297: 297
+53/298: 298
+54/299: 299
+55/300: 300
+55/301: 301
+56/302: 302
+56/303: 303
+56/304: 304
+56/305: 305
+56/306: 306
+56/307: 307
+56/308: 308
+56/309: 309
+57/310: 310
+58/311: 311
+58/312: 312
+58/313: 313
+59/314: 314
+59/315: 315
+6/27: 27
+6/28: 28
+60/316: 316
+60/317: 317
+60/318: 318
+61/319: 319
+61/320: 320
+61/321: 321
+62/322: 322
+62/323: 323
+63/324: 324
+64/325: 325
+65/326: 326
+65/327: 327
+65/328: 328
+65/329: 329
+66/330: 330
+66/331: 331
+66/332: 332
+66/333: 333
+67/334: 334
+68/335: 335
+68/336: 336
+68/337: 337
+68/338: 338
+68/339: 339
+68/340: 340
+68/341: 341
+68/342: 342
+68/343: 343
+68/344: 344
+68/345: 345
+68/346: 346
+68/347: 347
+68/348: 348
+68/349: 349
+68/350: 350
+69/351: 351
+7/29: 29
+7/30: 30
+70/352: 352
+71/353: 353
+71/354: 354
+72/355: 355
+72/356: 356
+73/357: 357
+73/358: 358
+74/359: 359
+75/360: 360
+76/361: 361
+76/362: 362
+76/363: 363
+76/364: 364
+76/365: 365
+77/366: 366
+78/367: 367
+78/368: 368
+78/369: 369
+79/370: 370
+79/371: 371
+79/372: 372
+79/373: 373
+79/374: 374
+79/375: 375
+79/376: 376
+79/377: 377
+79/378: 378
+8/31: 31
+8/32: 32
+8/33: 33
+8/34: 34
+8/35: 35
+8/36: 36
+8/37: 37
+8/38: 38
+8/39: 39
+8/40: 40
+8/41: 41
+8/42: 42
+8/43: 43
+8/44: 44
+8/45: 45
+8/46: 46
+80/379: 379
+80/380: 380
+80/381: 381
+80/382: 382
+80/383: 383
+80/384: 384
+80/385: 385
+80/386: 386
+80/387: 387
+80/388: 388
+80/389: 389
+80/390: 390
+80/391: 391
+81/392: 392
+81/393: 393
+81/394: 394
+81/395: 395
+81/396: 396
+81/397: 397
+81/398: 398
+81/399: 399
+81/400: 400
+81/401: 401
+82/402: 402
+83/403: 403
+84/404: 404
+85/405: 405
+86/406: 406
+86/407: 407
+87/408: 408
+88/409: 409
+88/410: 410
+89/411: 411
+89/412: 412
+9/47: 47
+9/48: 48
+9/49: 49
+90/413: 413
+90/414: 414
+91/415: 415
+91/416: 416
+92/417: 417
+93/418: 418
+93/419: 419
+93/420: 420
+93/421: 421
+93/422: 422
+93/423: 423
+93/424: 424
+93/425: 425
+93/426: 426
+94/427: 427
+94/428: 428
+94/429: 429
+94/430: 430
+94/431: 431
+94/432: 432
+94/433: 433
+95/434: 434
+95/435: 435
+95/436: 436
+95/437: 437
+95/438: 438
+96/439: 439
+97/440: 440
+97/441: 441
+98/442: 442
+99/443: 443
diff --git a/sample_dataset/id_manager/service_id.yml b/sample_dataset/id_manager/service_id.yml
new file mode 100644
index 0000000..dfc83f7
--- /dev/null
+++ b/sample_dataset/id_manager/service_id.yml
@@ -0,0 +1,316 @@
+? ''
+: 0
+'1': 1
+'10': 10
+'100': 100
+'101': 101
+'102': 102
+'103': 103
+'104': 104
+'105': 105
+'106': 106
+'107': 107
+'108': 108
+'109': 109
+'11': 11
+'110': 110
+'111': 111
+'112': 112
+'113': 113
+'114': 114
+'115': 115
+'116': 116
+'117': 117
+'118': 118
+'119': 119
+'12': 12
+'120': 120
+'121': 121
+'122': 122
+'123': 123
+'124': 124
+'125': 125
+'126': 126
+'127': 127
+'128': 128
+'129': 129
+'13': 13
+'130': 130
+'131': 131
+'132': 132
+'133': 133
+'134': 134
+'135': 135
+'136': 136
+'137': 137
+'138': 138
+'139': 139
+'14': 14
+'140': 140
+'141': 141
+'142': 142
+'143': 143
+'144': 144
+'145': 145
+'146': 146
+'147': 147
+'148': 148
+'149': 149
+'15': 15
+'150': 150
+'151': 151
+'152': 152
+'153': 153
+'154': 154
+'155': 155
+'156': 156
+'157': 157
+'158': 158
+'159': 159
+'16': 16
+'160': 160
+'161': 161
+'162': 162
+'163': 163
+'164': 164
+'165': 165
+'166': 166
+'167': 167
+'168': 168
+'169': 169
+'17': 17
+'170': 170
+'171': 171
+'172': 172
+'173': 173
+'174': 174
+'175': 175
+'176': 176
+'177': 177
+'178': 178
+'179': 179
+'18': 18
+'180': 180
+'181': 181
+'182': 182
+'183': 183
+'184': 184
+'185': 185
+'186': 186
+'187': 187
+'188': 188
+'189': 189
+'19': 19
+'190': 190
+'191': 191
+'192': 192
+'193': 193
+'194': 194
+'195': 195
+'196': 196
+'197': 197
+'198': 198
+'199': 199
+'2': 2
+'20': 20
+'200': 200
+'201': 201
+'202': 202
+'203': 203
+'204': 204
+'205': 205
+'206': 206
+'207': 207
+'208': 208
+'209': 209
+'21': 21
+'210': 210
+'211': 211
+'212': 212
+'213': 213
+'214': 214
+'215': 215
+'216': 216
+'217': 217
+'218': 218
+'219': 219
+'22': 22
+'220': 220
+'221': 221
+'222': 222
+'223': 223
+'224': 224
+'225': 225
+'226': 226
+'227': 227
+'228': 228
+'229': 229
+'23': 23
+'230': 230
+'231': 231
+'232': 232
+'233': 233
+'234': 234
+'235': 235
+'236': 236
+'237': 237
+'238': 238
+'239': 239
+'24': 24
+'240': 240
+'241': 241
+'242': 242
+'243': 243
+'244': 244
+'245': 245
+'246': 246
+'247': 247
+'248': 248
+'249': 249
+'25': 25
+'250': 250
+'251': 251
+'252': 252
+'253': 253
+'254': 254
+'255': 255
+'256': 256
+'257': 257
+'258': 258
+'259': 259
+'26': 26
+'260': 260
+'261': 261
+'262': 262
+'263': 263
+'264': 264
+'265': 265
+'266': 266
+'267': 267
+'268': 268
+'269': 269
+'27': 27
+'270': 270
+'271': 271
+'272': 272
+'273': 273
+'274': 274
+'275': 275
+'276': 276
+'277': 277
+'278': 278
+'279': 279
+'28': 28
+'280': 280
+'281': 281
+'282': 282
+'283': 283
+'284': 284
+'285': 285
+'286': 286
+'287': 287
+'288': 288
+'289': 289
+'29': 29
+'290': 290
+'291': 291
+'292': 292
+'293': 293
+'294': 294
+'295': 295
+'296': 296
+'297': 297
+'298': 298
+'299': 299
+'3': 3
+'30': 30
+'300': 300
+'301': 301
+'302': 302
+'303': 303
+'304': 304
+'305': 305
+'306': 306
+'307': 307
+'308': 308
+'309': 309
+'31': 31
+'310': 310
+'311': 311
+'312': 312
+'313': 313
+'314': 314
+'32': 32
+'33': 33
+'34': 34
+'35': 35
+'36': 36
+'37': 37
+'38': 38
+'39': 39
+'4': 4
+'40': 40
+'41': 41
+'42': 42
+'43': 43
+'44': 44
+'45': 45
+'46': 46
+'47': 47
+'48': 48
+'49': 49
+'5': 5
+'50': 50
+'51': 51
+'52': 52
+'53': 53
+'54': 54
+'55': 55
+'56': 56
+'57': 57
+'58': 58
+'59': 59
+'6': 6
+'60': 60
+'61': 61
+'62': 62
+'63': 63
+'64': 64
+'65': 65
+'66': 66
+'67': 67
+'68': 68
+'69': 69
+'7': 7
+'70': 70
+'71': 71
+'72': 72
+'73': 73
+'74': 74
+'75': 75
+'76': 76
+'77': 77
+'78': 78
+'79': 79
+'8': 8
+'80': 80
+'81': 81
+'82': 82
+'83': 83
+'84': 84
+'85': 85
+'86': 86
+'87': 87
+'88': 88
+'89': 89
+'9': 9
+'90': 90
+'91': 91
+'92': 92
+'93': 93
+'94': 94
+'95': 95
+'96': 96
+'97': 97
+'98': 98
+'99': 99
diff --git a/sample_dataset/id_manager/status_id.yml b/sample_dataset/id_manager/status_id.yml
new file mode 100644
index 0000000..343578e
--- /dev/null
+++ b/sample_dataset/id_manager/status_id.yml
@@ -0,0 +1,21 @@
+? ''
+: 0
+'0': 1
+'1': 2
+'400': 3
+'401': 4
+'403': 5
+'404': 6
+'423': 7
+'429': 8
+'500': 9
+'503': 10
+'504': 11
+CompletionException: 12
+HystrixRuntimeException: 13
+ProcessingException: 14
+SOAERROR_1006: 15
+SOAERROR_2017: 16
+SOAERROR_2026: 17
+SOAERROR_2028: 18
+UNFINISHED: 19
diff --git a/sample_dataset/test.csv b/sample_dataset/test.csv
new file mode 100644
index 0000000..ff69df4
--- /dev/null
+++ b/sample_dataset/test.csv
@@ -0,0 +1,51 @@
+traceIdHigh,traceIdLow,parentSpanId,spanId,startTime,duration,nanosecond,DBhash,status,operationName,serviceName,nodeLatencyLabel,graphLatencyLabel,graphStructureLabel
+1596357529536981502,4448694259929862755,0,-1990250972,2022-05-02 18:10:09,4188,102000000,0,0,1413,310,0,0,1
+1596357529536981502,4448694259929862755,-1990250972,-281558527,2022-05-02 18:10:09,3913,347000000,0,0,1399,310,0,0,1
+1596357529536981502,4448694259929862755,-1990250972,-335450563,2022-05-02 18:10:09,74,256000000,0,0,1400,310,0,0,1
+1596357529536981502,4448694259929862755,-1990250972,159928739,2022-05-02 18:10:09,14,166000000,0,0,1402,310,0,0,1
+1596357529536981502,4448694259929862755,-1990250972,1875814534,2022-05-02 18:10:09,20,180000000,0,0,1402,310,0,0,1
+1596357529536981502,4448694259929862755,-1990250972,-1741606249,2022-05-02 18:10:09,20,250000000,0,0,1402,310,0,0,1
+1596357529536981502,4448694259929862755,-1990250972,-1617443693,2022-05-02 18:10:09,12,268000000,0,0,1402,310,0,0,1
+1596357529536981502,4448694259929862755,-1990250972,-278055518,2022-05-02 18:10:09,12,328000000,0,0,1402,310,0,0,1
+1596357529536981502,4448694259929862755,-1990250972,-370879640,2022-05-02 18:10:09,11,349000000,0,0,1402,310,0,0,1
+1596357529536981502,4448694259929862755,-1990250972,2071447605,2022-05-02 18:10:13,15,265000000,0,0,1402,310,0,0,1
+1596357529536981502,4448694259929862755,-1990250972,-1819031907,2022-05-02 18:10:13,12,278000000,0,0,1402,310,0,0,1
+1596357529536981502,4448694259929862755,-1990250972,1842451354,2022-05-02 18:10:13,13,287000000,0,0,1402,310,0,0,1
+1596357529536981502,4448694259929862755,-1990250972,1935560096,2022-05-02 18:10:13,15,295000000,0,0,1402,310,0,0,1
+1596357529536981502,4448694259929862755,-1990250972,-2003257890,2022-05-02 18:10:14,14,306000000,0,500,1412,310,0,0,1
+1596357529536981502,4448694259929862755,-281558527,-700384075,2022-05-02 18:10:09,3907,353000000,0,0,124,30,0,0,1
+1596357529536981502,4448694259929862755,-335450563,-21249935,2022-05-02 18:10:09,63,267000000,0,0,249,42,0,0,1
+1596357529536981502,4448694259929862755,-2003257890,1900376561,2022-05-02 18:10:14,2,318000000,0,500,1370,305,0,0,1
+1596357529536981502,4448694259929862755,-700384075,-803543939,2022-05-02 18:10:09,23,367000000,0,0,113,30,0,0,1
+1596357529536981502,4448694259929862755,-700384075,-736459037,2022-05-02 18:10:09,19,371000000,0,0,113,30,0,0,1
+1596357529536981502,4448694259929862755,-700384075,-1183351125,2022-05-02 18:10:11,16,34000000,0,0,113,30,0,0,1
+1596357529536981502,4448694259929862755,-700384075,350862234,2022-05-02 18:10:12,10,580000000,0,0,113,30,0,0,1
+1596357529536981502,4448694259929862755,-700384075,1682015471,2022-05-02 18:10:13,11,169000000,0,0,113,30,0,0,1
+1596357529536981502,4448694259929862755,-700384075,2040964166,2022-05-02 18:10:13,11,259000000,0,0,113,30,0,0,1
+1596357529536981502,4448694259929862755,-700384075,1762151399,2022-05-02 18:10:10,955,85000000,0,0,117,30,0,0,1
+1596357529536981502,4448694259929862755,-700384075,-334971362,2022-05-02 18:10:12,583,577000000,0,0,117,30,0,0,1
+1596357529536981502,4448694259929862755,-700384075,-15543737,2022-05-02 18:10:09,51,379000000,0,0,119,30,0,0,1
+1596357529536981502,4448694259929862755,-700384075,583885210,2022-05-02 18:10:14,11,269000000,0,0,121,30,0,0,1
+1596357529536981502,4448694259929862755,1762151399,681721006,2022-05-02 18:10:10,840,160000000,0,0,1270,281,0,0,1
+1596357529536981502,4448694259929862755,-334971362,1067395986,2022-05-02 18:10:12,479,611000000,0,0,1270,281,0,0,1
+1596357529536981502,4448694259929862755,-15543737,408810147,2022-05-02 18:10:09,33,397000000,0,0,1277,284,0,0,1
+1596357529536981502,4448694259929862755,583885210,-304841780,2022-05-02 18:10:14,3,277000000,0,0,1369,305,0,0,1
+5662566985137355690,7477298637177050925,0,-491131385,2022-05-02 17:47:41,5176,914000000,0,0,1393,308,0,1,0
+5662566985137355690,7477298637177050925,-491131385,-652820602,2022-05-02 17:47:41,14,976000000,0,0,1378,308,0,1,0
+5662566985137355690,7477298637177050925,-491131385,-1333335415,2022-05-02 17:47:47,11,69000000,0,0,1380,308,0,1,0
+5662566985137355690,7477298637177050925,-491131385,703056482,2022-05-02 17:47:41,10,950000000,0,0,1381,308,0,1,0
+5662566985137355690,7477298637177050925,-491131385,687971319,2022-05-02 17:47:47,7,83000000,0,0,1386,308,1,1,0
+5662566985137355690,7477298637177050925,-491131385,1792075409,2022-05-02 17:47:41,7,963000000,0,0,1389,308,0,1,0
+5662566985137355690,7477298637177050925,-491131385,48140695,2022-05-02 17:47:47,26,14000000,0,0,1391,308,0,1,0
+5662566985137355690,7477298637177050925,-652820602,1379876670,2022-05-02 17:47:41,11,979000000,0,0,310,57,0,1,0
+5662566985137355690,7477298637177050925,703056482,-699583531,2022-05-02 17:47:41,5,945000000,0,0,356,72,0,1,0
+5662566985137355690,7477298637177050925,687971319,-1212461088,2022-05-02 17:47:47,4,86000000,0,0,1212,279,0,1,0
+5662566985137355690,7477298637177050925,48140695,-1178781667,2022-05-02 17:47:47,11,19000000,0,0,1301,291,0,1,0
+3646047056327432861,6658453249126145996,0,1931323897,2022-05-02 15:59:59,60,450000000,0,0,403,83,0,0,0
+3646047056327432861,6658453249126145996,1931323897,711219543,2022-05-02 15:59:59,57,453000000,0,0,649,153,0,0,0
+3646047056327432861,6658453249126145996,711219543,-1561583093,2022-05-02 15:59:59,4,466000000,0,0,643,153,0,0,0
+3646047056327432861,6658453249126145996,711219543,81837891,2022-05-02 15:59:59,24,486000000,0,0,643,153,0,0,0
+3646047056327432861,6658453249126145996,711219543,1963630877,2022-05-02 15:59:59,5,505000000,0,0,643,153,0,0,0
+3646047056327432861,6658453249126145996,711219543,-1370474143,2022-05-02 15:59:59,5,505000000,0,0,643,153,0,0,0
+3646047056327432861,6658453249126145996,711219543,1992379152,2022-05-02 15:59:59,17,473000000,0,500,644,153,0,0,0
+3646047056327432861,6658453249126145996,1992379152,360300743,2022-05-02 15:59:59,13,467000000,0,500,673,156,0,0,0
diff --git a/sample_dataset/train.csv b/sample_dataset/train.csv
new file mode 100644
index 0000000..213be91
--- /dev/null
+++ b/sample_dataset/train.csv
@@ -0,0 +1,22 @@
+traceIdHigh,traceIdLow,parentSpanId,spanId,startTime,duration,nanosecond,DBhash,status,operationName,serviceName
+7565692060307280094,4149727419449628353,0,-1160783149,2022-05-02 18:32:16,8,522000000,0,0,3,2
+7565692060307280094,4149727419449628353,-1160783149,-663518905,2022-05-02 18:32:16,5,525000000,0,0,6,2
+7565692060307280094,4149727419449628353,-663518905,292134812,2022-05-02 18:32:16,2,528000000,0,0,17,4
+7469867095312772506,9054005929149387783,0,-116188428,2022-05-02 15:59:34,9,401000000,0,0,3,2
+7469867095312772506,9054005929149387783,-116188428,-936201595,2022-05-02 15:59:34,6,404000000,0,0,6,2
+7469867095312772506,9054005929149387783,-936201595,203651095,2022-05-02 15:59:34,4,406000000,0,0,17,4
+1302365061014104190,4713847854262197922,0,147046579,2022-05-02 16:05:08,8,262000000,0,0,3,2
+1302365061014104190,4713847854262197922,147046579,-815705052,2022-05-02 16:05:08,5,265000000,0,0,6,2
+1302365061014104190,4713847854262197922,-815705052,-1767599182,2022-05-02 16:05:08,3,267000000,0,0,17,4
+4520607604792678538,90296779370241065,0,-1192584432,2022-05-02 15:03:15,10,800000000,0,0,3,2
+4520607604792678538,90296779370241065,-1192584432,2102356101,2022-05-02 15:03:15,7,803000000,0,0,6,2
+4520607604792678538,90296779370241065,2102356101,-1201056520,2022-05-02 15:03:15,5,805000000,0,0,17,4
+4301483373465897605,1921512395340371532,0,-116188428,2022-05-02 15:59:34,9,401000000,0,0,3,2
+4301483373465897605,1921512395340371532,-116188428,-936201595,2022-05-02 15:59:34,6,404000000,0,0,6,2
+4301483373465897605,1921512395340371532,-936201595,203651095,2022-05-02 15:59:34,4,406000000,0,0,17,4
+4516399473592649105,3122656494628515993,0,-940181392,2022-05-02 16:32:32,8,162000000,0,0,3,2
+4516399473592649105,3122656494628515993,-940181392,-1775474648,2022-05-02 16:32:32,6,164000000,0,0,6,2
+4516399473592649105,3122656494628515993,-1775474648,-614412147,2022-05-02 16:32:32,4,166000000,0,0,17,4
+2001615163061348505,1103669949841503114,0,-251538214,2022-05-02 17:19:09,7,523000000,0,0,3,2
+2001615163061348505,1103669949841503114,-251538214,-295534211,2022-05-02 17:19:09,5,525000000,0,0,6,2
+2001615163061348505,1103669949841503114,-295534211,-1868571256,2022-05-02 17:19:09,3,527000000,0,0,17,4
diff --git a/sample_dataset/val.csv b/sample_dataset/val.csv
new file mode 100644
index 0000000..1363df4
--- /dev/null
+++ b/sample_dataset/val.csv
@@ -0,0 +1,22 @@
+traceIdHigh,traceIdLow,parentSpanId,spanId,startTime,duration,nanosecond,DBhash,status,operationName,serviceName
+3718860522599049613,8245757295869652015,0,744605799,2022-05-02 15:34:49,10,630000000,0,0,3,2
+3718860522599049613,8245757295869652015,744605799,258669622,2022-05-02 15:34:49,6,634000000,0,0,6,2
+3718860522599049613,8245757295869652015,258669622,-2140429749,2022-05-02 15:34:49,4,626000000,0,0,17,4
+171786705856344778,8432392905725602200,0,834436815,2022-05-02 15:38:25,22,928000000,0,0,3,2
+171786705856344778,8432392905725602200,834436815,1941867149,2022-05-02 15:38:25,19,931000000,0,0,6,2
+171786705856344778,8432392905725602200,1941867149,-2120879518,2022-05-02 15:38:25,17,933000000,0,0,17,4
+1884068908740226663,198674487451014796,0,-1192584432,2022-05-02 15:03:15,10,800000000,0,0,3,2
+1884068908740226663,198674487451014796,-1192584432,2102356101,2022-05-02 15:03:15,7,803000000,0,0,6,2
+1884068908740226663,198674487451014796,2102356101,-1201056520,2022-05-02 15:03:15,5,805000000,0,0,17,4
+6433112438253994909,8162558342722269388,0,-940181392,2022-05-02 16:32:32,8,162000000,0,0,3,2
+6433112438253994909,8162558342722269388,-940181392,-1775474648,2022-05-02 16:32:32,6,164000000,0,0,6,2
+6433112438253994909,8162558342722269388,-1775474648,-614412147,2022-05-02 16:32:32,4,166000000,0,0,17,4
+6877930537907535995,8837251782173988176,0,-112915630,2022-05-02 16:57:11,7,243000000,0,0,3,2
+6877930537907535995,8837251782173988176,-112915630,-1653931621,2022-05-02 16:57:11,4,236000000,0,0,6,2
+6877930537907535995,8837251782173988176,-1653931621,-48204267,2022-05-02 16:57:11,2,238000000,0,0,17,4
+790603823422444737,8049481303438475592,0,-269253468,2022-05-02 15:00:08,14,176000000,0,0,3,2
+790603823422444737,8049481303438475592,-269253468,1292285861,2022-05-02 15:00:08,11,179000000,0,0,6,2
+790603823422444737,8049481303438475592,1292285861,-205310853,2022-05-02 15:00:08,4,186000000,0,0,17,4
+4016062494274678058,802407659628105677,0,-1067224499,2022-05-02 15:14:32,20,520000000,0,0,3,2
+4016062494274678058,802407659628105677,-1067224499,1026915991,2022-05-02 15:14:32,17,523000000,0,0,6,2
+4016062494274678058,802407659628105677,1026915991,-1777842468,2022-05-02 15:14:32,15,525000000,0,0,17,4
diff --git a/test.sh b/test.sh
new file mode 100644
index 0000000..00b4fff
--- /dev/null
+++ b/test.sh
@@ -0,0 +1,4 @@
+echo "Usage: bash test.sh [model_path] [dataset_path]"
+echo "MODEL: $1"
+echo "DATASET: $2"
+python3 -m tracegnn.models.trace_vae.test evaluate-nll -M "$1" --use-train-val -D "$2" --device cpu --use-std-limit --std-limit-global
diff --git a/tracegnn/__init__.py b/tracegnn/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tracegnn/cli/__init__.py b/tracegnn/cli/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tracegnn/cli/data_process.py b/tracegnn/cli/data_process.py
new file mode 100644
index 0000000..03f6303
--- /dev/null
+++ b/tracegnn/cli/data_process.py
@@ -0,0 +1,108 @@
+import math
+import pickle as pkl
+import random
+import shutil
+import sys
+
+import click
+import numpy as np
+from tqdm import tqdm
+
+from tracegnn.constants import *
+from tracegnn.data import *
+from tracegnn.utils import *
+
+
+def get_graph_key(g):
+    node_types = set()
+    stack = [g.root]
+    while stack:
+        nd = stack.pop()
+        node_types.add(nd.operation_id)
+        stack.extend(nd.children)
+    return g.root.operation_id, g.max_depth, tuple(sorted(node_types))
+
+
+@click.group()
+def main():
+    pass
+
+
+@main.command()
+@click.option('-i', '--input-dir')
+@click.option('-o', '--output-dir')
+@click.option('-n', '--name', type=str, required=True)
+def csv_to_db(input_dir, output_dir, name):
+    # check the parameters
+    input_dir = os.path.abspath(input_dir)
+    output_dir = os.path.abspath(output_dir)
+
+    input_path = os.path.join(input_dir, f"{name}.csv")
+    output_path = os.path.join(output_dir, "processed", name)
+
+    # Load id_manager
+    id_manager = TraceGraphIDManager(os.path.join(input_dir, 'id_manager'))
+
+    # process the traces
+    # load the graphs
+    if 'test' not in name:
+        df = load_trace_csv(input_path)
+        trace_graphs = df_to_trace_graphs(
+            df,
+            id_manager=id_manager,
+            merge_spans=True,
+        )
+
+        # write to db
+        if os.path.exists(output_path):
+            shutil.rmtree(output_path)
+
+        db = BytesSqliteDB(output_path, write=True)
+        with db, db.write_batch():
+            for g in tqdm(trace_graphs, desc='Save graphs'):
+                db.add(g.to_bytes())
+    else:
+        # read test data
+        df = load_trace_csv(input_path, is_test=True)
+
+        for i in range(3):
+            trace_graphs = df_to_trace_graphs(
+                df,
+                id_manager=id_manager,
+                merge_spans=True,
+                test_label=i
+            )
+
+            # write to db
+            if i == 0:
+                output_path = os.path.join(output_dir, 'processed', 'test')
+            elif i == 1:
+                output_path = os.path.join(output_dir, 'processed', 'test-drop')
+            else:
+                output_path = os.path.join(output_dir, 'processed', 'test-latency')
+
+            if os.path.exists(output_path):
+                shutil.rmtree(output_path)
+
+            db = BytesSqliteDB(output_path, write=True)
+            with db, db.write_batch():
+                for g in tqdm(trace_graphs, desc='Save graphs'):
+                    db.add(g.to_bytes())
+
+
+@main.command()
+@click.option('-i', '--input-dir')
+@click.option('-o', '--output_dir')
+def preprocess(input_dir, output_dir):
+    print("Convert datasets...")
+    print("------------> Train")
+    os.system(f"python3 -m tracegnn.cli.data_process csv-to-db -i {input_dir} -o {output_dir} -n train")
+    print("------------> Val")
+    os.system(f"python3 -m tracegnn.cli.data_process csv-to-db -i {input_dir} -o {output_dir} -n val")
+    print("------------> Test")
+    os.system(f"python3 -m tracegnn.cli.data_process csv-to-db -i {input_dir} -o {output_dir} -n test")
+
+    print("Finished!")
+
+if __name__ == '__main__':
+    main()
diff --git a/tracegnn/constants.py b/tracegnn/constants.py
new file mode 100644
index 0000000..fdc685d
--- /dev/null
+++ b/tracegnn/constants.py
@@ -0,0 +1,13 @@
+import os
+
+# if MIN_NODE_COUNT <= 2 <= MAX_NODE_COUNT, then the graph will be chosen
+MAX_NODE_COUNT = int(os.environ.get('MAX_NODE_COUNT', '32'))
+MAX_SPAN_COUNT = int(os.environ.get('MAX_SPAN_COUNT', '32'))
+
+# whether or not to use multi-dimensional latency codec?
+# If not set, will normalize the latency w.r.t. each operation.
+USE_MULTI_DIM_LATENCY_CODEC = os.environ.get('USE_MULTI_DIM_LATENCY_CODEC', '0') == '1'
+
+# If USE_MULTI_DIM_LATENCY_CODEC, then encode the codec parameters.
+MAX_LATENCY_DIM = int(os.environ.get('MAX_LATENCY_DIM', '5'))
+MAX_DEPTH = int(os.environ.get('MAX_DEPTH', '4'))
diff --git a/tracegnn/data/__init__.py b/tracegnn/data/__init__.py
new file mode 100644
index 0000000..6359ac6
--- /dev/null
+++ b/tracegnn/data/__init__.py
@@ -0,0 +1,3 @@
+from .bytes_db import *
+from .trace_graph import *
+from .trace_graph_db import *
diff --git a/tracegnn/data/bytes_db.py b/tracegnn/data/bytes_db.py
new file mode 100644
index 0000000..5ee9a5d
--- /dev/null
+++ b/tracegnn/data/bytes_db.py
@@ -0,0 +1,242 @@
+"""Databases for large-scale datasets."""
+import bisect
+import os
+import pickle
+import sqlite3
+from contextlib import contextmanager
+from typing import *
+
+import numpy as np
+import snappy
+
+__all__ = [
+    'BytesDB',
+    'BytesSqliteDB',
+    'BytesMultiDB',
+]
+
+
+class BytesDB(object):
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        if not exc_type:
+            self.commit()
+        self.close()
+
+    def __len__(self) -> int:
+        return self.data_count()
+
+    def __getitem__(self, item: int):
+        return self.get(item)
+
+    def __iter__(self):
+        for i in range(self.data_count()):
+            yield self.get(i)
+
+    def __repr__(self):
+        desc = self.describe().strip()
+        if '\n' in desc:
+            desc = '\n'.join(f'  {l}' for l in desc.split('\n'))
+            desc = f'\n{desc}\n'
+        return f'{self.__class__.__name__}({desc})'
+
+    def describe(self) -> str:
+        raise NotImplementedError()
+
+    def sample_n(self, n: int) -> List[bytes]:
+        ret = []
+        indices = np.random.randint(self.data_count(), size=n)
+        for i in indices:
+            ret.append(self.get(i))
+        return ret
+
+    def data_count(self) -> int:
+        raise NotImplementedError()
+
+    def get(self, item: int) -> bytes:
+        raise NotImplementedError()
+
+    def add(self, val: bytes) -> int:
+        raise NotImplementedError()
+
+    @contextmanager
+    def write_batch(self):
+        raise NotImplementedError()
+
+    def commit(self):
+        raise NotImplementedError()
+
+    def optimize(self):
+        raise NotImplementedError()
+
+    def close(self):
+        raise NotImplementedError()
+
+
+class BytesSqliteDB(BytesDB):
+
+    class WB(object):
+
+        def __init__(self, conn, cur, table_name, buf_size=8192):
+            self.conn = conn
+            self.cur = cur
+            self.table_name = table_name
+            self.buf = []
+            self.buf_size = buf_size
+
+        def add(self, id, value):
+            self.buf.append((id, snappy.compress(value)))
+            if len(self.buf) >= self.buf_size:
+                self.commit()
+
+        def commit(self):
+            if self.buf:
+                self.cur.executemany(
+                    f'INSERT INTO "{self.table_name}"("key", "value") VALUES (?, ?)',
+                    self.buf
+                )
+                self.conn.commit()
+                self.buf.clear()
+
+        def rollback(self):
+            self.conn.rollback()
+            self.buf.clear()
+
+    conn: sqlite3.Connection
+    path: str
+    file_name: str
+    _data_count: int
+
+    def __init__(self, path: str, write: bool = False, table_name: str = 'data',
+                 file_name: str = '_bytes.db'):
+        self.path = path
+        self.table_name = table_name
+        self.file_name = file_name
+
+        if write and not os.path.isdir(path):
+            os.makedirs(path, exist_ok=True)
+
+        self.conn = sqlite3.connect(os.path.join(self.path, file_name))
+        self.conn.text_factory = bytes
+        with self._scoped_cursor() as cur:
+            cur.execute(
+                f'CREATE TABLE IF NOT EXISTS "{self.table_name}" ('
+                '  "key" INT PRIMARY KEY,'
+                '  "value" BLOB'
+                ');'
+            )
+            self.conn.commit()
+            self._data_count = cur.execute(f'SELECT COUNT(*) FROM "{self.table_name}"').fetchone()[0]
+        self._wb = None
+
+    @contextmanager
+    def _scoped_cursor(self):
+        cur = self.conn.cursor()
+        try:
+            yield cur
+        finally:
+            cur.close()
+
+    def describe(self) -> str:
+        p = self.path
+        if self.file_name != '_bytes.db':
+            p = os.path.join(p, self.file_name)
+        if any(c in p for c in '(),'):
+            return repr(p)
+        return p
+
+    def data_count(self) -> int:
+        return self._data_count
+
+    def get(self, item: int) -> bytes:
+        with self._scoped_cursor() as cur:
+            cur.execute(f'SELECT "value" FROM "{self.table_name}" WHERE "key" = {item}')
+            row = cur.fetchone()
+            if row is not None:
+                return snappy.decompress(row[0])
+
+    def add(self, val: bytes) -> int:
+        if self._wb is None:
+            with self.write_batch():
+                return self.add(val)
+        else:
+            key = self._data_count
+            self._wb.add(key, val)
+            self._data_count += 1
+            return key
+
+    @contextmanager
+    def write_batch(self):
+        if self._wb is not None:
+            raise RuntimeError(f'Another write_batch is already open!')
+        try:
+            self._wb = self.WB(self.conn, self.conn.cursor(), self.table_name)
+            yield self
+            self._wb.commit()
+            self._wb = None
+        except:
+            self._wb.rollback()
+            self._wb = None
+            raise
+
+    def commit(self):
+        if self._wb is not None:
+            self._wb.commit()
+
+    def optimize(self):
+        pass
+
+    def close(self):
+        self.commit()
+        self._wb = None
+        self.conn.close()
+
+
+class BytesMultiDB(BytesDB):
+
+    db_list: List[BytesDB]
+    db_sizes: List[int]
+    _db_offset: List[int]
+    _data_count: int
+
+    def __init__(self, *db_list):
+        self.db_list = list(db_list)
+        self.db_sizes = [db.data_count() for db in self.db_list]
+        self._db_offset = []
+        i = 0
+        for db in self.db_list:
+            self._db_offset.append(i)
+            i += db.data_count()
+        self._data_count = i
+
+    def describe(self) -> str:
+        return '\n'.join(f'{db.describe()},' for db in self.db_list).rstrip(',')
+
+    def data_count(self) -> int:
+        return self._data_count
+
+    def get(self, item: int) -> bytes:
+        if item < 0 or item >= self._data_count:
+            raise IndexError(item)
+        i = bisect.bisect_left(self._db_offset, item + 1) - 1
+        return self.db_list[i].get(item - self._db_offset[i])
+
+    def add(self, val: bytes) -> int:
+        raise RuntimeError(f'BytesMultiDB is not writeable.')
+
+    @contextmanager
+    def write_batch(self):
+        raise RuntimeError(f'BytesMultiDB is not writeable.')
+
+    def commit(self):
+        pass
+
+    def optimize(self):
+        raise RuntimeError(f'BytesMultiDB is not writeable.')
+
+    def close(self):
+        for db in self.db_list:
+            db.close()
diff --git a/tracegnn/data/trace_graph.py b/tracegnn/data/trace_graph.py
new file mode 100644
index 0000000..b443629
--- /dev/null
+++ b/tracegnn/data/trace_graph.py
@@ -0,0 +1,617 @@
+import os
+import pickle as pkl
+import sys
+from dataclasses import dataclass
+from datetime import datetime, timedelta
+from typing import *
+
+import networkx as nx
+import numpy as np
+import pandas as pd
+from tqdm import tqdm
+
+from ..utils import *
+
+__all__ = [
+    'TraceGraphNodeFeatures',
+    'TraceGraphNodeReconsScores',
+    'TraceGraphNode',
+    'TraceGraphVectors',
+    'TraceGraph',
+    'TraceGraphIDManager',
+    'load_trace_csv',
+    'df_to_trace_graphs',
+]
+
+
+SERVICE_ID_YAML_FILE = 'service_id.yml'
+OPERATION_ID_YAML_FILE = 'operation_id.yml'
+
+
+@dataclass
+class TraceGraphNodeFeatures(object):
+    __slots__ = ['span_count', 'max_latency', 'min_latency', 'avg_latency']
+
+    span_count: int  # number of duplicates in the parent
+    avg_latency: float  # for span_count == 1, avg == max == min
+    max_latency: float
+    min_latency: float
+
+
+@dataclass
+class TraceGraphNodeReconsScores(object):
+    # probability of the node
+    edge_logit: float
+    operation_logit: float
+
+    # probability of the latency
+    avg_latency_nstd: float  # (avg_latency - avg_latency_mean) / avg_latency_std
+
+
+@dataclass
+class TraceGraphSpan(object):
+    __slots__ = [
+        'span_id', 'start_time', 'latency',
+    ]
+
+    span_id: Optional[int]
+    start_time: Optional[datetime]
+    latency: float
+
+
+@dataclass
+class TraceGraphNode(object):
+    __slots__ = [
+        'node_id', 'service_id', 'operation_id',
+        'features', 'children', 'spans', 'scores',
+        'anomaly',
+    ]
+
+    node_id: Optional[int]  # the node id of the graph
+    service_id: Optional[int]  # the service id
+    operation_id: int  # the operation id
+    features: TraceGraphNodeFeatures  # the node features
+    children: List['TraceGraphNode']  # children nodes
+    spans: Optional[List[TraceGraphSpan]]  # detailed spans information (from the original data)
+    scores: Optional[TraceGraphNodeReconsScores]
+    anomaly: Optional[int]  # 1: drop anomaly; 2: latency anomaly; 3: service type anomaly
+
+    def __eq__(self, other):
+        return other is self
+
+    def __hash__(self):
+        return id(self)
+
+    @staticmethod
+    def new_sampled(node_id: int,
+                    operation_id: int,
+                    features: TraceGraphNodeFeatures,
+                    scores: Optional[TraceGraphNodeReconsScores] = None
+                    ):
+        return TraceGraphNode(
+            node_id=node_id,
+            service_id=None,
+            operation_id=operation_id,
+            features=features,
+            children=[],
+            spans=None,
+            scores=scores,
+            anomaly=None,
+        )
+
+    def iter_bfs(self,
+                 depth: int = 0,
+                 with_parent: bool = False
+                 ) -> Generator[
+                    Union[
+                        Tuple[int, 'TraceGraphNode'],
+                        Tuple[int, 'TraceGraphNode', 'TraceGraphNode']
+                    ],
+                    None,
+                    None
+                ]:
+        """Iterate through the nodes in BFS order."""
+        if with_parent:
+            depth = depth
+            level = [(self, None, 0)]
+
+            while level:
+                next_level: List[Tuple[TraceGraphNode, TraceGraphNode, int]] = []
+                for nd, parent, idx in level:
+                    yield depth, idx, nd, parent
+                    for c_idx, child in enumerate(nd.children):
+                        next_level.append((child, nd, c_idx))
+                depth += 1
+                level = next_level
+
+        else:
+            depth = depth
+            level = [self]
+
+            while level:
+                next_level: List[TraceGraphNode] = []
+                for nd in level:
+                    yield depth, nd
+                    next_level.extend(nd.children)
+                depth += 1
+                level = next_level
+
+    def count_nodes(self) -> int:
+        ret = 0
+        for _ in self.iter_bfs():
+            ret += 1
+        return ret
+
+
+@dataclass
+class TraceGraphVectors(object):
+    """Cached result of `TraceGraph.graph_vectors()`."""
+    __slots__ = [
+        'u', 'v',
+        'node_type',
+        'node_depth', 'node_idx',
+        'span_count', 'avg_latency', 'max_latency', 'min_latency',
+        'node_features',
+    ]
+
+    # note that it is guaranteed that u[i] < v[i], i.e., upper triangle matrix
+    u: np.ndarray
+    v: np.ndarray
+
+    # node type
+    node_type: np.ndarray
+
+    # node depth
+    node_depth: np.ndarray
+
+    # node idx
+    node_idx: np.ndarray
+
+    # node feature
+    span_count: np.ndarray
+    avg_latency: np.ndarray
+    max_latency: np.ndarray
+    min_latency: np.ndarray
+
+
+@dataclass
+class TraceGraph(object):
+    __slots__ = [
+        'version',
+        'trace_id', 'parent_id', 'root', 'node_count', 'max_depth', 'data',
+    ]
+
+    version: int  # version control
+    trace_id: Optional[Tuple[int, int]]
+    parent_id: Optional[int]
+    root: TraceGraphNode
+    node_count: Optional[int]
+    max_depth: Optional[int]
+    data: Dict[str, Any]  # any data about the graph
+
+    @staticmethod
+    def default_version() -> int:
+        return 0x2
+
+    @staticmethod
+    def new_sampled(root: TraceGraphNode, node_count: int, max_depth: int):
+        return TraceGraph(
+            version=TraceGraph.default_version(),
+            trace_id=None,
+            parent_id=None,
+            root=root,
+            node_count=node_count,
+            max_depth=max_depth,
+            data={},
+        )
+
+    @property
+    def edge_count(self) -> Optional[int]:
+        if self.node_count is not None:
+            return self.node_count - 1
+
+    def iter_bfs(self,
+                 with_parent: bool = False
+                 ):
+        """Iterate through the nodes in BFS order."""
+        yield from self.root.iter_bfs(with_parent=with_parent)
+
+    def merge_spans_and_assign_id(self):
+        """
+        Merge spans with the same (service, operation) under the same parent,
+        and re-assign node IDs.
+        """
+        node_count = 0
+        max_depth = 0
+
+        for depth, parent in self.iter_bfs():
+            max_depth = max(max_depth, depth)
+
+            # assign ID to this node
+            parent.node_id = node_count
+            node_count += 1
+
+            # merge the children of this node
+            children = []
+            for child in sorted(parent.children, key=lambda o: o.operation_id):
+                if children and children[-1].operation_id == child.operation_id:
+                    prev_child = children[-1]
+
+                    # merge the features
+                    f1, f2 = prev_child.features, child.features
+                    f1.span_count += f2.span_count
+                    f1.avg_latency += (f2.avg_latency - f1.avg_latency) * (f2.span_count / f1.span_count)
+                    f1.max_latency = max(f1.max_latency, f2.max_latency)
+                    f1.min_latency = min(f1.min_latency, f2.min_latency)
+
+                    # merge the children
+                    if child.children:
+                        if prev_child.children:
+                            prev_child.children.extend(child.children)
+                        else:
+                            prev_child.children = child.children
+
+                    # merge the spans
+                    if child.spans:
+                        if prev_child.spans:
+                            prev_child.spans.extend(child.spans)
+                        else:
+                            prev_child.spans = child.spans
+                else:
+                    children.append(child)
+
+            # re-assign the merged children
+            parent.children = children
+
+        # record node count and depth
+        self.node_count = node_count
+        self.max_depth = max_depth
+
+    def assign_node_id(self):
+        """Assign node IDs to the graph nodes by pre-root order."""
+        node_count = 0
+        max_depth = 0
+
+        for depth, node in self.iter_bfs():
+            max_depth = max(max_depth, depth)
+
+            # assign id to this node
+            node.node_id = node_count
+            node_count += 1
+
+        # record node count and depth
+        self.node_count = node_count
+        self.max_depth = max_depth
+
+    def graph_vectors(self):
+        # edge index
+        u = np.empty([self.edge_count], dtype=np.int64)
+        v = np.empty([self.edge_count], dtype=np.int64)
+
+        # node type
+        node_type = np.zeros([self.node_count], dtype=np.int64)
+
+        # node depth
+        node_depth = np.zeros([self.node_count], dtype=np.int64)
+
+        # node idx
+        node_idx = np.zeros([self.node_count], dtype=np.int64)
+
+        # node feature
+        span_count = np.zeros([self.node_count], dtype=np.int64)
+        avg_latency = np.zeros([self.node_count], dtype=np.float32)
+        max_latency = np.zeros([self.node_count], dtype=np.float32)
+        min_latency = np.zeros([self.node_count], dtype=np.float32)
+
+        # X = np.zeros([self.node_count, x_dim], dtype=np.float32)
+
+        edge_idx = 0
+        for depth, idx, node, parent in self.iter_bfs(with_parent=True):
+            j = node.node_id
+            feat = node.features
+
+            # node type
+            node_type[j] = node.operation_id
+
+            # node depth
+            node_depth[j] = depth
+
+            # node idx
+            node_idx[j] = idx
+
+            # node feature
+            span_count[j] = feat.span_count
+            avg_latency[j] = feat.avg_latency
+            max_latency[j] = feat.max_latency
+            min_latency[j] = feat.min_latency
+            # X[parent.node_id, parent.operation_id] = 1   # one-hot encoded node feature
+
+            # edge index
+            for child in node.children:
+                u[edge_idx] = node.node_id
+                v[edge_idx] = child.node_id
+                edge_idx += 1
+
+        if len(u) != self.edge_count:
+            raise ValueError(f'`len(u)` != `self.edge_count`: {len(u)} != {self.edge_count}')
+
+        return TraceGraphVectors(
+            # edge index
+            u=u, v=v,
+            # node type
+            node_type=node_type,
+            # node depth
+            node_depth=node_depth,
+            # node idx
+            node_idx=node_idx,
+            # node feature
+            span_count=span_count,
+            avg_latency=avg_latency,
+            max_latency=max_latency,
+            min_latency=min_latency,
+        )
+
+    def networkx_graph(self, id_manager: 'TraceGraphIDManager') -> nx.Graph:
+        gv = self.graph_vectors()
+        self_nodes = {nd.node_id: nd for _, nd in self.iter_bfs()}
+        g = nx.Graph()
+        # graph
+        for k, v in self.data.items():
+            g.graph[k] = v
+        # nodes
+        g.add_nodes_from(range(self.node_count))
+        # edges
+        g.add_edges_from([(i, j) for i, j in zip(gv.u, gv.v)])
+        # node features
+        for i in range(len(gv.node_type)):
+            nd = g.nodes[i]
+            nd['node_type'] = gv.node_type[i]
+            nd['operation'] = id_manager.operation_id.reverse_map(gv.node_type[i])
+            for attr in TraceGraphNodeFeatures.__slots__:
+                nd[attr] = getattr(gv, attr)[i]
+            if self_nodes[i].scores:
+                nd['avg_latency_nstd'] = self_nodes[i].scores.avg_latency_nstd
+        return g
+
+    def to_bytes(self, protocol: int = pkl.DEFAULT_PROTOCOL) -> bytes:
+        return pkl.dumps(self, protocol=protocol)
+
+    @staticmethod
+    def from_bytes(content: bytes) -> 'TraceGraph':
+        r = pkl.loads(content)
+
+        # for deserializing old versions of TraceGraph
+        if not hasattr(r, 'version'):
+            r.version = 0x0
+
+        if r.version < 0x1:  # upgrade 0x0 => 0x2
+            for _, nd in r.root.iter_bfs():
+                nd.scores = None
+                nd.anomaly = None
+            r.version = 0x2
+
+        if r.version < 0x2:  # upgrade 0x1 => 0x2
+            for _, nd in r.root.iter_bfs():
+                nd.anomaly = None
+            r.version = 0x2
+
+        return r
+
+    def deepcopy(self) -> 'TraceGraph':
+        return TraceGraph.from_bytes(self.to_bytes())
+
+
+@dataclass
+class TempGraphNode(object):
+    __slots__ = ['trace_id', 'parent_id', 'node']
+
+    trace_id: Tuple[int, int]
+    parent_id: int
+    node: 'TraceGraphNode'
+
+
+class TraceGraphIDManager(object):
+    __slots__ = ['root_dir', 'service_id', 'operation_id']
+
+    root_dir: str
+    service_id: IDAssign
+    operation_id: IDAssign
+
+    def __init__(self, root_dir: str):
+        self.root_dir = os.path.abspath(root_dir)
+        self.service_id = IDAssign(os.path.join(self.root_dir, SERVICE_ID_YAML_FILE))
+        self.operation_id = IDAssign(os.path.join(self.root_dir, OPERATION_ID_YAML_FILE))
+
+    def __enter__(self):
+        self.service_id.__enter__()
+        self.operation_id.__enter__()
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.service_id.__exit__(exc_type, exc_val, exc_tb)
+        self.operation_id.__exit__(exc_type, exc_val, exc_tb)
+
+    @property
+    def num_operations(self) -> int:
+        return len(self.operation_id)
+
+    def dump_to(self, output_dir: str):
+        self.service_id.dump_to(os.path.join(output_dir, SERVICE_ID_YAML_FILE))
+        self.operation_id.dump_to(os.path.join(output_dir, OPERATION_ID_YAML_FILE))
+
+
+def load_trace_csv(input_path: str, is_test: bool=False) -> pd.DataFrame:
+    if is_test:
+        dtype = {
+            'traceIdHigh': int,
+            'traceIdLow': int,
+            'spanId': int,
+            'parentSpanId': int,
+            'serviceName': str,
+            'operationName': str,
+            'startTime': str,
+            'duration': float,
+            'nanosecond': int,
+            'DBhash': int,
+            'nodeLatencyLabel': int,
+            'graphLatencyLabel': int,
+            'graphStructureLabel': int
+        }
+    else:
+        dtype = {
+            'traceIdHigh': int,
+            'traceIdLow': int,
+            'spanId': int,
+            'parentSpanId': int,
+            'serviceName': str,
+            'operationName': str,
+            'startTime': str,
+            'duration': float,
+            'nanosecond': int,
+            'DBhash': int,
+        }
+
+    return pd.read_csv(
+        input_path,
+        engine='c',
+        usecols=list(dtype),
+        dtype=dtype
+    )
+
+
+def df_to_trace_graphs(df: pd.DataFrame,
+                       id_manager: TraceGraphIDManager,
+                       test_label: int = None,
+                       min_node_count: int = 2,
+                       max_node_count: int = 32,
+                       summary_file: Optional[str] = None,
+                       merge_spans: bool = False,
+                       ) -> List[TraceGraph]:
+    summary = []
+    trace_spans = {}
+    df = df[df['DBhash'] == 0]
+
+    # read the spans
+    with id_manager:
+        for row in tqdm(df.itertuples(), desc='Read spans', total=len(df)):
+            graph_label = 0
+
+            if test_label is not None:
+                if row.graphStructureLabel != 0:
+                    graph_label = 1
+                elif row.graphLatencyLabel != 0:
+                    graph_label = 2
+                if graph_label != test_label:
+                    continue
+
+            if row.serviceName not in id_manager.service_id._mapping:
+                print(row.serviceName, ": Service not in file!")
+                continue
+            if f'{row.serviceName}/{row.operationName}' not in id_manager.operation_id._mapping:
+                print(f'{row.serviceName}/{row.operationName}', ": Operation not in file!")
+                continue
+
+            trace_id = (row.traceIdHigh, row.traceIdLow)
+            span_dict = trace_spans.get(trace_id, None)
+            if span_dict is None:
+                trace_spans[trace_id] = span_dict = {}
+
+            span_latency = row.duration
+            span_dict[row.spanId] = TempGraphNode(
+                trace_id=trace_id,
+                parent_id=row.parentSpanId,
+                node=TraceGraphNode(
+                    node_id=None,
+                    service_id=id_manager.service_id.get_or_assign(row.serviceName),
+                    operation_id=id_manager.operation_id.get_or_assign(f'{row.serviceName}/{row.operationName}'),
+                    features=TraceGraphNodeFeatures(
+                        span_count=1,
+                        avg_latency=span_latency,
+                        max_latency=span_latency,
+                        min_latency=span_latency,
+                    ),
+                    children=[],
+                    spans=[
+                        TraceGraphSpan(
+                            span_id=row.spanId,
+                            start_time=(
+                                datetime.strptime(row.startTime, '%Y-%m-%d %H:%M:%S') +
+                                timedelta(microseconds=row.nanosecond / 1_000)
+                            ),
+                            latency=span_latency,
+                        ),
+                    ],
+                    scores=None,
+                    anomaly=None,
+                )
+            )
+
+    summary.append(f'Span count: {len(trace_spans)}')
+
+    # construct the traces
+    trace_graphs = []
+
+    if test_label is None or test_label == 0:
+        graph_data = {}
+    elif test_label == 1:
+        graph_data = {
+            'is_anomaly': True,
+            'anomaly_type': 'drop'
+        }
+    else:
+        graph_data = {
+            'is_anomaly': True,
+            'anomaly_type': 'latency'
+        }
+
+    for _, trace in tqdm(trace_spans.items(), total=len(trace_spans), desc='Build graphs'):
+        nodes = sorted(
+            trace.values(),
+            key=(lambda nd: (nd.node.service_id, nd.node.operation_id, nd.node.spans[0].start_time))
+        )
+        for nd in nodes:
+            parent_id = nd.parent_id
+            if (parent_id == 0) or (parent_id not in trace):
+                # if only a certain service is taken from the database, then just the sub-trees
+                # of a trace are obtained, which leads to orphan nodes (parent_id != 0 and not in trace
+                trace_graphs.append(TraceGraph(
+                    version=TraceGraph.default_version(),
+                    trace_id=nd.trace_id,
+                    parent_id=nd.parent_id,
+                    root=nd.node,
+                    node_count=None,
+                    max_depth=None,
+                    data=graph_data,
+                ))
+            else:
+                trace[parent_id].node.children.append(nd.node)
+
+    # merge spans and assign id
+    if merge_spans:
+        for trace in tqdm(trace_graphs, desc='Merge spans and assign node id'):
+            trace.merge_spans_and_assign_id()
+    else:
+        for trace in tqdm(trace_graphs, desc='Assign node id'):
+            trace.assign_node_id()
+
+    # gather the final results
+    ret = []
+    too_small = 0
+    too_large = 0
+
+    for trace in trace_graphs:
+        if trace.node_count < min_node_count:
+            too_small += 1
+        elif trace.node_count > max_node_count:
+            too_large += 1
+        else:
+            ret.append(trace)
+
+    summary.append(f'Imported graph: {len(trace_graphs)}; dropped graph: too small = {too_small}, too large = {too_large}')
+    if summary_file:
+        with open(summary_file, 'w', encoding='utf-8') as f:
+            f.write('\n'.join(summary) + '\n')
+    else:
+        print('\n'.join(summary), file=sys.stderr)
+
+    return ret
diff --git a/tracegnn/data/trace_graph_db.py b/tracegnn/data/trace_graph_db.py
new file mode 100644
index 0000000..25478c9
--- /dev/null
+++ b/tracegnn/data/trace_graph_db.py
@@ -0,0 +1,108 @@
+"""Wraps a BytesDB into TraceGraphDB."""
+import os
+import pickle as pkl
+import re
+from contextlib import contextmanager
+from typing import *
+
+import numpy as np
+
+from .bytes_db import *
+from .trace_graph import *
+
+__all__ = ['TraceGraphDB', 'open_trace_graph_db']
+
+
+class TraceGraphDB(object):
+    bytes_db: BytesDB
+    protocol: int
+
+    def __init__(self, bytes_db: BytesDB, protocol: Optional[int] = None):
+        if protocol is None:
+            protocol = pkl.DEFAULT_PROTOCOL
+        self.bytes_db = bytes_db
+        self.protocol = protocol
+
+    def __enter__(self):
+        self.bytes_db.__enter__()
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.bytes_db.__exit__(exc_type, exc_val, exc_tb)
+
+    def __len__(self) -> int:
+        return self.data_count()
+
+    def __getitem__(self, item: int):
+        return self.get(item)
+
+    def __iter__(self):
+        for i in range(self.data_count()):
+            yield self.get(i)
+
+    def __repr__(self):
+        desc = repr(self.bytes_db)
+        desc = desc[desc.find('(') + 1: -1]
+        return f'TraceGraphDB({desc})'
+
+    def sample_n(self,
+                 n: int,
+                 with_id: bool = False
+                 ) -> List[Union[TraceGraph, Tuple[int, TraceGraph]]]:
+        ret = []
+        indices = np.random.randint(self.data_count(), size=n)
+        for i in indices:
+            g = self.get(i)
+            if with_id:
+                ret.append((int(i), g))
+            else:
+                ret.append(g)
+        return ret
+
+    def data_count(self) -> int:
+        return self.bytes_db.data_count()
+
+    def get(self, item: int) -> TraceGraph:
+        return TraceGraph.from_bytes(self.bytes_db.get(item))
+
+    def add(self, g: TraceGraph) -> int:
+        return self.bytes_db.add(g.to_bytes(protocol=self.protocol))
+
+    @contextmanager
+    def write_batch(self):
+        with self.bytes_db.write_batch():
+            yield self
+
+    def commit(self):
+        self.bytes_db.commit()
+
+    def optimize(self):
+        self.bytes_db.optimize()
+
+    def close(self):
+        self.bytes_db.close()
+
+
+def open_trace_graph_db(input_dir: str,
+                        names: Optional[Sequence[str]] = (),
+                        protocol: Optional[int] = None,
+                        ) -> Tuple[TraceGraphDB, TraceGraphIDManager]:
+    file_name = f'_bytes_{protocol}.db' if protocol else '_bytes.db'
+
+    id_manager = TraceGraphIDManager(os.path.join(input_dir, 'id_manager'))
+
+    if len(names) == 1:
+        db = TraceGraphDB(
+            BytesSqliteDB(os.path.join(input_dir, 'processed', names[0]), file_name=file_name),
+            protocol=protocol,
+        )
+    else:
+        db = TraceGraphDB(
+            BytesMultiDB(*[
+                BytesSqliteDB(os.path.join(input_dir, 'processed', name), file_name=file_name)
+                for name in names
+            ]),
+            protocol=protocol,
+        )
+
+    return db, id_manager
diff --git a/tracegnn/models/__init__.py b/tracegnn/models/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tracegnn/models/trace_vae/__init__.py b/tracegnn/models/trace_vae/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tracegnn/models/trace_vae/constants.py b/tracegnn/models/trace_vae/constants.py
new file mode 100644
index 0000000..71cc541
--- /dev/null
+++ b/tracegnn/models/trace_vae/constants.py
@@ -0,0 +1,3 @@
+from tracegnn.constants import *
+
+LATENCY_DIM = 1
diff --git a/tracegnn/models/trace_vae/dataset.py b/tracegnn/models/trace_vae/dataset.py
new file mode 100644
index 0000000..6dd1556
--- /dev/null
+++ b/tracegnn/models/trace_vae/dataset.py
@@ -0,0 +1,134 @@
+from dataclasses import dataclass
+from typing import *
+
+import dgl
+import mltk
+import numpy as np
+import torch
+from tensorkit import tensor as T
+
+from tracegnn.data import *
+from tracegnn.utils import *
+from .constants import *
+
+__all__ = [
+    'trace_graph_to_dgl',
+    'TraceGraphDataStream',
+]
+
+
+def trace_graph_to_dgl(graph: TraceGraph,
+                       num_node_types: int,
+                       add_self_loop: bool,
+                       latency_range: Optional[TraceGraphLatencyRangeFile] = None,
+                       directed: Union[bool, str] = False,  # True, False or 'reverse'
+                       ):
+    with T.no_grad():
+        gv = graph.graph_vectors()
+
+        # build edges
+        # todo: use heterogeneous graph to distinguish between "parent -> child" edge and opposite direction
+        #       here we just add edges for the both direction (as an initial step)
+        if directed == 'reverse':
+            u = T.as_tensor(gv.v, dtype=T.int64)
+            v = T.as_tensor(gv.u, dtype=T.int64)
+        elif directed is True:
+            u = T.as_tensor(gv.u, dtype=T.int64)
+            v = T.as_tensor(gv.v, dtype=T.int64)
+        elif directed is False:
+            u = T.as_tensor(
+                np.concatenate([gv.u, gv.v], axis=0),
+                dtype=T.int64,
+            )
+            v = T.as_tensor(
+                np.concatenate([gv.v, gv.u], axis=0),
+                dtype=T.int64
+            )
+        else:
+            raise ValueError(f'Unsupported value for directed: {directed!r}')
+
+        g = dgl.graph((u, v), num_nodes=graph.node_count)
+        if add_self_loop:
+            g = dgl.add_self_loop(g)
+
+        # node type (use nn.Embedding later to map the node type => node embedding)
+        g.ndata['node_type'] = T.as_tensor(gv.node_type, dtype=T.int64)
+
+        # the index of the node under its parent
+        g.ndata['node_idx'] = T.as_tensor(gv.node_idx, dtype=T.int64)
+
+        # node depth
+        g.ndata['node_depth'] = T.as_tensor(gv.node_depth, dtype=T.int64)
+
+        # span count
+        g.ndata['span_count'] = T.as_tensor(np.minimum(gv.span_count, MAX_SPAN_COUNT), dtype=T.int64)
+
+        # latency
+        if USE_MULTI_DIM_LATENCY_CODEC:
+            for pfx in ('avg_', 'max_', 'min_'):
+                codec, onehot = encode_latency(getattr(gv, f'{pfx}latency'), MAX_LATENCY_DIM)
+                g.ndata[f'{pfx}latency_codec'] = T.as_tensor(codec, dtype=T.float32)
+                g.ndata[f'{pfx}latency_onehot'] = T.as_tensor(onehot, dtype=T.float32)
+        else:
+            for pfx in ('avg_', 'max_', 'min_'):
+                latency_array = getattr(gv, f'{pfx}latency')
+                latency = []
+                for i in range(graph.node_count):
+                    mu, std = latency_range[gv.node_type[i]]
+                    latency.append((latency_array[i] - mu) / (std + 1e-5))
+                g.ndata[f'{pfx}latency'] = T.as_tensor(np.reshape(latency, (-1, 1)), dtype=T.float32)
+            g.ndata['latency'] = T.concat(
+                [
+                    g.ndata['avg_latency'],
+                    g.ndata['min_latency'],
+                    g.ndata['max_latency'],
+                ],
+                axis=-1,
+            )
+
+    return g
+
+
+class TraceGraphDataStream(mltk.data.MapperDataStream):
+
+    def __init__(self,
+                 db: TraceGraphDB,
+                 id_manager: TraceGraphIDManager,
+                 batch_size: int,
+                 shuffle: bool = False,
+                 skip_incomplete: bool = False,
+                 random_state: Optional[np.random.RandomState] = None,
+                 data_count: Optional[int] = None,
+                 ):
+        if (data_count is not None) and (data_count < len(db)) and shuffle:
+            indices = np.arange(len(db))
+            np.random.shuffle(indices)
+            indices = indices[:data_count]
+            source_cls = lambda **kwargs: mltk.DataStream.arrays([indices], **kwargs)
+        else:
+            if data_count is None:
+                data_count = len(db)
+            source_cls = lambda **kwargs: mltk.DataStream.int_seq(data_count, **kwargs)
+
+        source = source_cls(
+            batch_size=batch_size,
+            shuffle=shuffle,
+            skip_incomplete=skip_incomplete,
+            random_state=random_state,
+        )
+
+        def mapper(indices):
+            return (np.array(
+                [
+                    db.get(idx)
+                    for idx in indices
+                ]
+            ),)
+
+        super().__init__(
+            source=source,
+            mapper=mapper,
+            array_count=1,
+            data_shapes=((),)
+        )
+
diff --git a/tracegnn/models/trace_vae/distributions.py b/tracegnn/models/trace_vae/distributions.py
new file mode 100644
index 0000000..d7f4706
--- /dev/null
+++ b/tracegnn/models/trace_vae/distributions.py
@@ -0,0 +1,356 @@
+import math
+from typing import *
+
+import tensorkit as tk
+from tensorkit import tensor as T
+from tensorkit.typing_ import TensorOrData
+from tensorkit.distributions.utils import copy_distribution
+
+__all__ = [
+    'MaskedDistribution',
+    'BiasedBernoulli',
+    'BiasedCategorical',
+    'BiasedOneHotCategorical',
+    'BiasedNormal',
+    'SafeNormal',
+    'AnomalyDetectionNormal',
+]
+
+
+class MaskedDistribution(tk.Distribution):
+    """
+    A wrapper distribution to mask some elements, in order to mimic "variadic length"
+    in the event dimensions.
+    """
+
+    def __init__(self,
+                 distribution: tk.Distribution,
+                 mask: TensorOrData,  # should be right-aligned with the underlying log_prob
+                 log_prob_weight: Optional[TensorOrData] = None,  # should be right-aligned with the underlying log_prob
+                 *,
+                 event_ndims: Optional[int] = None,
+                 validate_tensors: Optional[bool] = None,
+                 ):
+        # validate the arguments
+        if validate_tensors is None:
+            validate_tensors = distribution.validate_tensors
+
+        # compute event ndims
+        batch_shape = distribution.batch_shape
+        value_shape = distribution.value_shape
+        min_event_ndims = distribution.event_ndims
+        max_event_ndims = distribution.value_ndims
+
+        if event_ndims is None:
+            event_ndims = min_event_ndims
+        if not (min_event_ndims <= event_ndims <= max_event_ndims):
+            raise ValueError(
+                f'`event_ndims` out of range: got {event_ndims}, but '
+                f'the minimum allowed value is {min_event_ndims}, '
+                f'and the maximum allowed value is {max_event_ndims}.'
+            )
+        batch_shape = batch_shape[: len(batch_shape) - (event_ndims - min_event_ndims)]
+
+        super().__init__(
+            dtype=distribution.dtype,
+            value_shape=value_shape,
+            batch_shape=batch_shape,
+            continuous=distribution.continuous,
+            reparameterized=distribution.reparameterized,
+            event_ndims=event_ndims,
+            min_event_ndims=min_event_ndims,
+            device=distribution.device,
+            validate_tensors=validate_tensors,
+        )
+        self._base_distribution = distribution
+        self.mask = T.as_tensor(mask, device=distribution.device)
+        self.log_prob_weight = T.as_tensor(log_prob_weight, device=distribution.device) \
+            if log_prob_weight is not None else None
+
+    @property
+    def base_distribution(self) -> tk.Distribution:
+        return self._base_distribution
+
+    def _apply_mask_on_log_prob(self, log_prob):
+        r = log_prob * T.as_tensor(self.mask, dtype=T.get_dtype(log_prob))
+        if self.log_prob_weight is not None:
+            r = r * T.as_tensor(self.log_prob_weight, dtype=T.get_dtype(log_prob))
+        return r
+
+    def _apply_mask_on_samples(self, samples):
+        mask = T.as_tensor(self.mask, dtype=T.get_dtype(samples))
+        return samples * T.reshape(
+            mask,
+            T.shape(mask) + ([1] * self.min_event_ndims)  # expand mask to match the samples
+        )
+
+    def _sample(self,
+                n_samples: Optional[int],
+                group_ndims: int,
+                reduce_ndims: int,
+                reparameterized: bool) -> 'tk.StochasticTensor':
+        x = self._base_distribution.sample(
+            n_samples=n_samples,
+            reparameterized=reparameterized
+        )
+        t = tk.StochasticTensor(
+            tensor=self._apply_mask_on_samples(x.tensor),
+            distribution=self,
+            n_samples=n_samples,
+            group_ndims=group_ndims,
+            reparameterized=reparameterized
+        )
+        return t
+
+    def _log_prob(self,
+                  given: T.Tensor,
+                  group_ndims: int,
+                  reduce_ndims: int) -> T.Tensor:
+        log_prob = self._base_distribution.log_prob(given)
+        log_prob = self._apply_mask_on_log_prob(log_prob)
+        if reduce_ndims > 0:
+            log_prob = T.reduce_sum(log_prob, axis=T.int_range(-reduce_ndims, 0))
+        return log_prob
+
+    def copy(self, **overrided_params):
+        return copy_distribution(
+            cls=MaskedDistribution,
+            base=self,
+            attrs=(('distribution', '_base_distribution'), 'mask',
+                   'event_ndims', 'validate_tensors'),
+            overrided_params=overrided_params,
+        )
+
+
+def _biased_Bernoulli_or_Categorical_log_prob(log_prob, alpha, threshold_logit, reduce_ndims):
+    dtype = T.get_dtype(log_prob)
+    log_prob = T.where(
+        log_prob < T.float_scalar(threshold_logit, dtype=dtype),
+        log_prob * T.float_scalar(alpha, dtype=dtype),
+        log_prob,
+    )
+    if reduce_ndims > 0:
+        log_prob = T.reduce_sum(log_prob, axis=T.int_range(-reduce_ndims, 0))
+    return log_prob
+
+
+class BiasedBernoulli(tk.distributions.Bernoulli):
+    """Bernoulli whose log p(x) is biased towards error."""
+
+    alpha: float
+    threshold: float
+
+    def __init__(self, alpha: float = 1.0, threshold: float = 0.5, **kwargs):
+        super().__init__(**kwargs)
+        self.alpha = alpha
+        self.threshold = threshold
+        self._threshold_logit = math.log(threshold)
+
+    def _log_prob(self,
+                  given: T.Tensor,
+                  group_ndims: int,
+                  reduce_ndims: int) -> T.Tensor:
+        return _biased_Bernoulli_or_Categorical_log_prob(
+            T.random.bernoulli_log_prob(
+                given=given,
+                logits=self.logits,
+                group_ndims=0,
+            ),
+            self.alpha,
+            self._threshold_logit,
+            reduce_ndims,
+        )
+
+    def copy(self, **overrided_params):
+        return copy_distribution(
+            cls=BiasedBernoulli,
+            base=self,
+            attrs=('alpha', 'threshold', 'dtype', 'event_ndims', 'epsilon', 'device', 'validate_tensors'),
+            mutual_attrs=(('logits', 'probs'),),
+            compute_deps={'logits': ('epsilon',)},
+            original_mutual_params=self._mutual_params,
+            overrided_params=overrided_params,
+        )
+
+
+class BiasedCategorical(tk.distributions.Categorical):
+    """Categorical whose log p(x) is biased towards error."""
+
+    alpha: float
+    threshold: float
+
+    def __init__(self, alpha: float = 1.0, threshold: float = 0.5, **kwargs):
+        super().__init__(**kwargs)
+        self.alpha = alpha
+        self.threshold = threshold
+        self._threshold_logit = math.log(threshold)
+
+    def _log_prob(self,
+                  given: T.Tensor,
+                  group_ndims: int,
+                  reduce_ndims: int) -> T.Tensor:
+        return _biased_Bernoulli_or_Categorical_log_prob(
+            T.random.categorical_log_prob(
+                given=given,
+                logits=self.logits,
+                group_ndims=0,
+            ),
+            self.alpha,
+            self._threshold_logit,
+            reduce_ndims,
+        )
+
+    def copy(self, **overrided_params):
+        return copy_distribution(
+            cls=BiasedCategorical,
+            base=self,
+            attrs=('alpha', 'threshold', 'dtype', 'event_ndims', 'epsilon', 'device', 'validate_tensors'),
+            mutual_attrs=(('logits', 'probs'),),
+            compute_deps={'logits': ('epsilon',)},
+            original_mutual_params=self._mutual_params,
+            overrided_params=overrided_params,
+        )
+
+
+class BiasedOneHotCategorical(tk.distributions.OneHotCategorical):
+    """OneHotCategorical whose log p(x) is biased towards error."""
+
+    alpha: float
+    threshold: float
+
+    def __init__(self, alpha: float = 1.0, threshold: float = 0.5, **kwargs):
+        super().__init__(**kwargs)
+        self.alpha = alpha
+        self.threshold = threshold
+        self._threshold_logit = math.log(threshold)
+
+    def _log_prob(self,
+                  given: T.Tensor,
+                  group_ndims: int,
+                  reduce_ndims: int) -> T.Tensor:
+        return _biased_Bernoulli_or_Categorical_log_prob(
+            T.random.one_hot_categorical_log_prob(
+                given=given,
+                logits=self.logits,
+                group_ndims=0,
+            ),
+            self.alpha,
+            self._threshold_logit,
+            reduce_ndims,
+        )
+
+    def copy(self, **overrided_params):
+        return copy_distribution(
+            cls=BiasedOneHotCategorical,
+            base=self,
+            attrs=('alpha', 'threshold', 'dtype', 'event_ndims', 'epsilon', 'device', 'validate_tensors'),
+            mutual_attrs=(('logits', 'probs'),),
+            compute_deps={'logits': ('epsilon',)},
+            original_mutual_params=self._mutual_params,
+            overrided_params=overrided_params,
+        )
+
+
+class BiasedNormal(tk.distributions.Normal):
+    """Normal whose log p(x) is biased towards error."""
+
+    alpha: float
+    std_threshold: float
+
+    _extra_args = ('alpha', 'std_threshold')
+
+    def __init__(self, alpha: float = 1.0, std_threshold: float = 3.0, **kwargs):
+        super().__init__(**kwargs)
+        self.alpha = alpha
+        self.std_threshold = std_threshold
+
+    def _log_prob(self,
+                  given: T.Tensor,
+                  group_ndims: int,
+                  reduce_ndims: int) -> T.Tensor:
+        log_prob = T.random.normal_log_pdf(
+            given=given,
+            mean=self.mean,
+            logstd=self.logstd,
+            group_ndims=0,
+            validate_tensors=self.validate_tensors,
+        )
+        dtype = T.get_dtype(log_prob)
+        log_prob = T.where(
+            T.abs(given - self.mean) > (T.float_scalar(self.std_threshold, dtype=dtype) * self.std),
+            log_prob * T.float_scalar(self.alpha, dtype=dtype),
+            log_prob,
+        )
+        if reduce_ndims > 0:
+            log_prob = T.reduce_sum(log_prob, axis=T.int_range(-reduce_ndims, 0))
+        return log_prob
+
+
+class SafeNormal(tk.distributions.Normal):
+    """Normal whose log p(x) is computed with |x-mean| clipped within nstd * std."""
+
+    std_threshold: float
+
+    _extra_args = ('std_threshold',)
+
+    def __init__(self, std_threshold: float = 5.0, **kwargs):
+        super().__init__(**kwargs)
+        self.std_threshold = std_threshold
+
+    def _log_prob(self,
+                  given: T.Tensor,
+                  group_ndims: int,
+                  reduce_ndims: int) -> T.Tensor:
+        min_val = T.stop_grad(self.mean - self.std_threshold * self.std)
+        max_val = T.stop_grad(self.mean + self.std_threshold * self.std)
+        return T.random.normal_log_pdf(
+            given=T.maximum(
+                T.minimum(given, max_val),
+                min_val,
+            ),
+            mean=self.mean,
+            logstd=self.logstd,
+            group_ndims=reduce_ndims,
+            validate_tensors=self.validate_tensors,
+        )
+
+
+class AnomalyDetectionNormal(tk.distributions.Normal):
+    """Normal whose log p(x) is replaced by clipped Normal-CDF for anomaly detection."""
+
+    SQRT2 = math.sqrt(2)
+    LOG2 = math.log(2)
+
+    std_threshold: float
+    bias_alpha: float
+    bias_threshold: float
+
+    _extra_args = ('std_threshold', 'bias_alpha', 'bias_threshold',)
+
+    def __init__(self,
+                 std_threshold: float = 3.0,
+                 bias_alpha: float = 1.0,
+                 bias_threshold: float = 0.5,
+                 **kwargs):
+        super().__init__(**kwargs)
+        self.std_threshold = std_threshold
+        self.bias_alpha = bias_alpha
+        self.bias_threshold = bias_threshold
+        self._log_bias_threshold = math.log(bias_threshold)
+
+    def _log_prob(self,
+                  given: T.Tensor,
+                  group_ndims: int,
+                  reduce_ndims: int) -> T.Tensor:
+        # t = abs(X) - std_threshold
+        # prob = 1 - normal_cdf(t)
+        #      = 0.5 * (1 - erf(t / sqrt(2)))
+        # log_prob = -log(2) + log1p(-erf(t / sqrt(2)))
+        t = T.abs((given - self.mean) / self.std) - self.std_threshold
+        log_prob = -self.LOG2 + T.log1p(-T.erf(t / self.SQRT2))
+        return _biased_Bernoulli_or_Categorical_log_prob(
+            log_prob,
+            self.bias_alpha,
+            self._log_bias_threshold,
+            reduce_ndims,
+        )
diff --git a/tracegnn/models/trace_vae/evaluation.py b/tracegnn/models/trace_vae/evaluation.py
new file mode 100644
index 0000000..db75bc9
--- /dev/null
+++ b/tracegnn/models/trace_vae/evaluation.py
@@ -0,0 +1,570 @@
+import json
+import math
+from pprint import pprint
+from typing import *
+
+import mltk
+import tensorkit as tk
+import yaml
+from tensorkit import tensor as T
+from tqdm import tqdm
+import pickle
+import snappy
+import numpy as np
+import os
+
+from tracegnn.utils import *
+from tracegnn.data import *
+from ...data import TraceGraph, TraceGraphNode
+from ...utils import TraceGraphLatencyRangeFile
+from .graph_utils import p_net_to_trace_graphs, trace_graph_key
+from .model import TraceVAE
+from .tensor_utils import *
+from .types import TraceGraphBatch
+
+__all__ = [
+    'do_evaluate_nll',
+    'do_evaluate_prior',
+    'do_anomaly_detect'
+]
+
+
+def do_evaluate_nll(test_stream: mltk.DataStream,
+                    vae: TraceVAE,
+                    id_manager: TraceGraphIDManager,
+                    latency_range: TraceGraphLatencyRangeFile,
+                    n_z: int,
+                    use_biased: bool = True,
+                    use_latency_biased: bool = True,
+                    no_latency: bool = False,
+                    no_struct: bool = False,
+                    std_limit: Optional[T.Tensor] = None,
+                    latency_log_prob_weight: bool = False,
+                    latency_logstd_min: Optional[float] = None,
+                    test_threshold: Optional[float] = None,
+                    test_loop=None,
+                    summary_writer=None,
+                    clip_nll=None,
+                    use_embeddings: bool = False,
+                    num_embedding_samples=None,
+                    nll_output_file=None,
+                    proba_cdf_file=None,
+                    auc_curve_file=None,
+                    latency_hist_file=None,
+                    operation_id_dict_out=None,  # corresponding to latency_std_dict_out
+                    latency_std_dict_out=None,
+                    latency_reldiff_dict_out=None,
+                    p_node_count_dict_out=None,
+                    p_edge_dict_out=None,
+                    latency_dict_prefix='',
+                    ):
+    # check params
+    if std_limit is not None:
+        std_limit = T.as_tensor(std_limit, dtype=T.float32)
+
+    # result buffer
+    nll_list = []
+    label_list = []
+    trace_id_list = []
+    graph_key_list = []
+    z_buffer = []  # the z embedding buffer of the graph
+    z2_buffer = []  # the z2 embedding buffer of the graph
+    z_label = []  # the label for z and z2
+    latency_samples = {}
+    result_dict = {}
+
+    if operation_id_dict_out is not None:
+        for key in ('normal', 'drop', 'latency'):
+            if key not in operation_id_dict_out:
+                operation_id_dict_out[latency_dict_prefix + key] = ArrayBuffer(81920, dtype=np.int)
+
+    if latency_std_dict_out is not None:
+        for key in ('normal', 'drop', 'latency'):
+            if key not in latency_std_dict_out:
+                latency_std_dict_out[latency_dict_prefix + key] = ArrayBuffer(81920)
+
+    if latency_reldiff_dict_out is not None:
+        for key in ('normal', 'drop', 'latency'):
+            if key not in latency_reldiff_dict_out:
+                latency_reldiff_dict_out[latency_dict_prefix + key] = ArrayBuffer(81920)
+
+    if p_node_count_dict_out is not None:
+        for key in ('normal', 'drop', 'latency'):
+            if key not in p_node_count_dict_out:
+                p_node_count_dict_out[latency_dict_prefix + key] = ArrayBuffer(81920)
+
+    if p_edge_dict_out is not None:
+        for key in ('normal', 'drop', 'latency'):
+            if key not in p_edge_dict_out:
+                p_edge_dict_out[latency_dict_prefix + key] = ArrayBuffer(81920)
+
+    def add_embedding(buffer, label, tag, limit=None):
+        if limit is not None:
+            indices = np.arange(len(buffer))
+            np.random.shuffle(indices)
+            indices = indices[:limit]
+            buffer = buffer[indices]
+            label = label[indices]
+        summary_writer.add_embedding(
+            buffer,
+            metadata=label,
+            tag=tag,
+        )
+
+    # run evaluation
+    def eval_step(trace_graphs: List[TraceGraph]):
+        G = TraceGraphBatch(
+            id_manager=id_manager,
+            latency_range=latency_range,
+            trace_graphs=trace_graphs,
+        )
+        chain = vae.q(G, n_z=n_z, no_latency=no_latency).chain(
+            vae.p,
+            latent_axis=0,
+            G=G,
+            use_biased=use_biased,
+            use_latency_biased=use_latency_biased,
+            no_latency=no_latency,
+            latency_logstd_min=latency_logstd_min,
+            latency_log_prob_weight=latency_log_prob_weight,
+            std_limit=std_limit,
+        )
+        if no_struct:
+            q, p = chain.q, chain.p
+            del q['z']
+            del p['z']
+            del p['adj']
+            del p['node_count']
+            del p['node_type']
+            chain = q.chain(lambda *args, **kwargs: p, latent_axis=0)
+
+        loss = chain.vi.training.sgvb()
+        nll = -chain.vi.evaluation.is_loglikelihood()
+
+        # clip the nll, and treat 'NaN' or 'Inf' nlls as `config.test.clip_nll`
+        if clip_nll is not None:
+            clip_limit = T.float_scalar(clip_nll)
+            loss = T.where(loss < clip_limit, loss, clip_limit)
+            nll = T.where(nll < clip_limit, nll, clip_limit)
+
+        # the nlls and labels of this step
+        step_label_list = np.array([
+            0 if not g.data.get('is_anomaly') else (
+                1 if g.data['anomaly_type'] == 'drop' else 2)
+            for g in trace_graphs
+        ])
+
+        # Load the graph_key
+        step_graph_key_list = [trace_graph_key(g) for g in trace_graphs]
+        step_trace_id_list = [g.trace_id for g in trace_graphs]
+
+        if not no_struct:
+            # collect operation id
+            if operation_id_dict_out is not None:
+                collect_operation_id(operation_id_dict_out[f'{latency_dict_prefix}normal'], chain, step_label_list == 0)
+                collect_operation_id(operation_id_dict_out[f'{latency_dict_prefix}drop'], chain, step_label_list == 1)
+                collect_operation_id(operation_id_dict_out[f'{latency_dict_prefix}latency'], chain, step_label_list == 2)
+
+            # collect latency
+            if latency_std_dict_out is not None:
+                collect_latency_std(latency_std_dict_out[f'{latency_dict_prefix}normal'], chain, step_label_list == 0)
+                collect_latency_std(latency_std_dict_out[f'{latency_dict_prefix}drop'], chain, step_label_list == 1)
+                collect_latency_std(latency_std_dict_out[f'{latency_dict_prefix}latency'], chain, step_label_list == 2)
+
+            # collect relative diff
+            if latency_reldiff_dict_out is not None:
+                collect_latency_reldiff(latency_reldiff_dict_out[f'{latency_dict_prefix}normal'], chain, step_label_list == 0)
+                collect_latency_reldiff(latency_reldiff_dict_out[f'{latency_dict_prefix}drop'], chain, step_label_list == 1)
+                collect_latency_reldiff(latency_reldiff_dict_out[f'{latency_dict_prefix}latency'], chain, step_label_list == 2)
+
+            # collect p node count
+            if p_node_count_dict_out is not None:
+                collect_p_node_count(p_node_count_dict_out[f'{latency_dict_prefix}normal'], chain, step_label_list == 0)
+                collect_p_node_count(p_node_count_dict_out[f'{latency_dict_prefix}drop'], chain, step_label_list == 1)
+                collect_p_node_count(p_node_count_dict_out[f'{latency_dict_prefix}latency'], chain, step_label_list == 2)
+
+            # collect p edge
+            if p_edge_dict_out is not None:
+                collect_p_edge(p_edge_dict_out[f'{latency_dict_prefix}normal'], chain, step_label_list == 0)
+                collect_p_edge(p_edge_dict_out[f'{latency_dict_prefix}drop'], chain, step_label_list == 1)
+                collect_p_edge(p_edge_dict_out[f'{latency_dict_prefix}latency'], chain, step_label_list == 2)
+
+            # inspect the internals of every trace graph
+            if 'latency' in chain.p:
+                p_latency = chain.p['latency'].distribution.base_distribution
+                p_latency_mu, p_latency_std = p_latency.mean, p_latency.std
+                if len(T.shape(p_latency.mean)) == 4:
+                    p_latency_mu = p_latency_mu[0]
+                    p_latency_std = p_latency_std[0]
+
+                latency_sample = T.to_numpy(T.random.normal(p_latency_mu, p_latency_std))
+
+                for i, tg in enumerate(trace_graphs):
+                    assert isinstance(tg, TraceGraph)
+                    if step_label_list[i] == 0:
+                        for j in range(tg.node_count):
+                            node_type = int(T.to_numpy(G.dgl_graphs[i].ndata['node_type'][j]))
+                            if node_type not in latency_samples:
+                                latency_samples[node_type] = []
+                            mu, std = latency_range[node_type]
+                            latency_samples[node_type].append(latency_sample[i, j, 0] * std + mu)
+
+            if use_embeddings:
+                for i in range(len(trace_graphs)):
+                    if step_label_list[i] == 0:
+                        node_type = trace_graphs[i].root.operation_id
+                        node_label = id_manager.operation_id.reverse_map(node_type)
+                        z_label.append(node_label)
+                        z_buffer.append(T.to_numpy(chain.q['z'].tensor[0, i]))
+                        if 'z2' in chain.q:
+                            z2_buffer.append(T.to_numpy(chain.q['z2'].tensor[0, i]))
+
+        # memorize the outputs
+        nll_list.extend(T.to_numpy(nll))
+        label_list.extend(step_label_list)
+        trace_id_list.extend(step_trace_id_list)
+        graph_key_list.extend(step_graph_key_list)
+
+        # return a dict of the test result
+        ret = {}
+        normal_losses = T.to_numpy(loss)[step_label_list == 0]
+        if len(normal_losses) > 0:
+            test_loss = np.nanmean(normal_losses)
+            if not math.isnan(test_loss):
+                ret['loss'] = test_loss
+        return ret
+
+    with T.no_grad():
+        # run test on test set
+        if test_loop is not None:
+            with test_loop.timeit('eval_time'):
+                r = test_loop.run(eval_step, test_stream)
+                if 'loss' in r:
+                    r['test_loss'] = r['loss']
+                if 'test_loss' in r:
+                    result_dict['test_loss'] = r['test_loss']
+        else:
+            test_losses = []
+            test_weights = []
+            for [trace_graphs] in tqdm(test_stream, total=test_stream.batch_count):
+                r = eval_step(trace_graphs)
+                if 'loss' in r:
+                    test_losses.append(r['loss'])
+                    test_weights.append(len(trace_graphs))
+            test_weights = np.asarray(test_weights)
+            result_dict['test_loss'] = np.sum(
+                np.asarray(test_losses) *
+                (test_weights / np.sum(test_weights))
+            )
+
+        # save the evaluation results
+        nll_list = np.asarray(nll_list)
+        label_list = np.asarray(label_list)
+        graph_key_list = np.asarray(pickle.dumps(graph_key_list))
+
+        # analyze nll
+        result_dict.update(
+            analyze_anomaly_nll(
+                nll_list=nll_list,
+                label_list=label_list,
+                proba_cdf_file=proba_cdf_file,
+                auc_curve_file=auc_curve_file,
+                threshold=test_threshold,
+            )
+        )
+
+        if nll_output_file is not None:
+            np.savez_compressed(
+                ensure_parent_exists(nll_output_file),
+                nll_list=nll_list,
+                label_list=label_list,
+                graph_key_list=graph_key_list,
+                anomaly_degree=nll_list / result_dict['best_threshold_latency']
+            )
+
+            print(f'{latency_dict_prefix} file saved to {nll_output_file}')
+
+        # z embedding
+        if use_embeddings:
+            # add the operation embedding
+            operation_buffer = T.to_numpy(vae.operation_embedding(
+                T.arange(0, id_manager.num_operations, dtype=T.int64)))
+            operation_label = [
+                id_manager.operation_id.reverse_map(i)
+                for i in range(id_manager.num_operations)
+            ]
+            add_embedding(operation_buffer, operation_label, 'operation')
+
+            # add z & z2 embedding
+            z_label = np.stack(z_label, axis=0)
+            add_embedding(
+                np.stack(z_buffer, axis=0),
+                z_label,
+                tag='z',
+                limit=num_embedding_samples
+            )
+            if z2_buffer:
+                add_embedding(
+                    np.stack(z2_buffer, axis=0),
+                    z_label,
+                    tag='z2',
+                    limit=num_embedding_samples
+                )
+
+    # return the results
+    result_dict = {k: float(v) for k, v in result_dict.items()}
+    return result_dict
+
+
+def do_evaluate_prior(vae: TraceVAE,
+                      id_manager: TraceGraphIDManager,
+                      latency_range: TraceGraphLatencyRangeFile,
+                      n_samples: int,
+                      batch_size: int,
+                      eval_n_z: int,
+                      nll_threshold: Optional[float] = None,
+                      use_biased: bool = True,
+                      output_file: Optional[str] = None,
+                      latency_hist_out: Optional[str] = None,
+                      ):
+    with T.no_grad():
+        # results
+        sample_count = 0
+        drop_count = 0
+        result_dict = {}
+        latency_map = {}
+
+        def add_sample(g: TraceGraph):
+            if latency_hist_out is not None:
+                for _, nd in g.iter_bfs():
+                    assert isinstance(nd, TraceGraphNode)
+                    if nd.operation_id not in latency_map:
+                        latency_map[nd.operation_id] = []
+                    latency_map[nd.operation_id].append(nd.features.avg_latency)
+
+        # run by sample from prior
+        n_batches = (n_samples + batch_size - 1) // batch_size
+        for _ in tqdm(range(n_batches), total=n_batches, desc='Sample graphs from prior'):
+            # sample from prior
+            p = vae.p(n_z=batch_size)
+            trace_graphs = p_net_to_trace_graphs(
+                p,
+                id_manager=id_manager,
+                latency_range=latency_range,
+                discard_node_with_type_0=True,
+                discard_node_with_unknown_latency_range=True,
+                discard_graph_with_error_node_count=True,
+            )
+
+            sample_count += len(trace_graphs)
+            drop_count += sum(g is None for g in trace_graphs)
+            trace_graphs = [g for g in trace_graphs if g is not None]
+
+            # evaluate the NLLs
+            G = TraceGraphBatch(
+                id_manager=id_manager,
+                latency_range=latency_range,
+                trace_graphs=trace_graphs,
+            )
+            chain = vae.q(G=G, n_z=eval_n_z). \
+                chain(vae.p, n_z=eval_n_z, latent_axis=0, use_biased=use_biased)
+            eval_nlls = T.to_numpy(chain.vi.evaluation.is_loglikelihood(reduction='none'))
+
+            # purge too-low NLL graphs
+            for g, nll in zip(trace_graphs, eval_nlls):
+                if nll >= nll_threshold:
+                    drop_count += 1
+                else:
+                    add_sample(g)
+
+    # save the results
+    drop_rate = float(drop_count / sample_count)
+    result_dict.update({
+        'drop_rate': drop_rate,
+    })
+    pprint(result_dict)
+
+    if output_file is not None:
+        _, ext = os.path.splitext(output_file)
+        if ext == '.json':
+            result_cont = json.dumps(result_dict)
+        else:
+            result_cont = yaml.safe_dump(result_dict)
+
+        with open(output_file, 'w', encoding='utf-8') as f:
+            f.write(result_cont)
+
+
+def do_anomaly_detect(test_stream: mltk.DataStream,
+                    vae: TraceVAE,
+                    id_manager: TraceGraphIDManager,
+                    latency_range: TraceGraphLatencyRangeFile,
+                    n_z: int,
+                    use_biased: bool = True,
+                    use_latency_biased: bool = True,
+                    no_latency: bool = False,
+                    no_struct: bool = False,
+                    std_limit: Optional[T.Tensor] = None,
+                    latency_log_prob_weight: bool = False,
+                    latency_logstd_min: Optional[float] = None,
+                    test_threshold: Optional[float] = None,
+                    test_loop=None,
+                    summary_writer=None,
+                    clip_nll=None,
+                    use_embeddings: bool = False,
+                    num_embedding_samples=None,
+                    nll_output_file=None,
+                    proba_cdf_file=None,
+                    auc_curve_file=None,
+                    latency_hist_file=None,
+                    operation_id_dict_out=None,  # corresponding to latency_std_dict_out
+                    latency_std_dict_out=None,
+                    latency_reldiff_dict_out=None,
+                    p_node_count_dict_out=None,
+                    p_edge_dict_out=None,
+                    latency_dict_prefix='',
+                    ):
+    # check params
+    if std_limit is not None:
+        std_limit = T.as_tensor(std_limit, dtype=T.float32)
+
+    # result buffer
+    nll_list = []
+    label_list = []
+    graph_key_list = []
+    z_buffer = []  # the z embedding buffer of the graph
+    z2_buffer = []  # the z2 embedding buffer of the graph
+    z_label = []  # the label for z and z2
+
+    def add_embedding(buffer, label, tag, limit=None):
+        if limit is not None:
+            indices = np.arange(len(buffer))
+            np.random.shuffle(indices)
+            indices = indices[:limit]
+            buffer = buffer[indices]
+            label = label[indices]
+        summary_writer.add_embedding(
+            buffer,
+            metadata=label,
+            tag=tag,
+        )
+
+    # run evaluation
+    def eval_step(trace_graphs):
+        G = TraceGraphBatch(
+            id_manager=id_manager,
+            latency_range=latency_range,
+            trace_graphs=trace_graphs,
+        )
+        chain = vae.q(G, n_z=n_z, no_latency=no_latency).chain(
+            vae.p,
+            latent_axis=0,
+            G=G,
+            use_biased=use_biased,
+            use_latency_biased=use_latency_biased,
+            no_latency=no_latency,
+            latency_logstd_min=latency_logstd_min,
+            latency_log_prob_weight=latency_log_prob_weight,
+            std_limit=std_limit,
+        )
+        if no_struct:
+            q, p = chain.q, chain.p
+            del q['z']
+            del p['z']
+            del p['adj']
+            del p['node_count']
+            del p['node_type']
+            chain = q.chain(lambda *args, **kwargs: p, latent_axis=0)
+
+        loss = chain.vi.training.sgvb()
+        nll = -chain.vi.evaluation.is_loglikelihood()
+
+        # clip the nll, and treat 'NaN' or 'Inf' nlls as `config.test.clip_nll`
+        if clip_nll is not None:
+            clip_limit = T.float_scalar(clip_nll)
+            loss = T.where(loss < clip_limit, loss, clip_limit)
+            nll = T.where(nll < clip_limit, nll, clip_limit)
+
+        # the nlls and labels of this step
+        step_label_list = np.array([
+            0 if not g.data.get('is_anomaly') else (
+                1 if g.data['anomaly_type'] == 'drop' else 2)
+            for g in trace_graphs
+        ])
+
+        # Load the graph_key
+        step_graph_key_list = [trace_graph_key(g) for g in trace_graphs]
+
+        if not no_struct:
+            if use_embeddings:
+                for i in range(len(trace_graphs)):
+                    if step_label_list[i] == 0:
+                        node_type = trace_graphs[i].root.operation_id
+                        node_label = id_manager.operation_id.reverse_map(node_type)
+                        z_label.append(node_label)
+                        z_buffer.append(T.to_numpy(chain.q['z'].tensor[0, i]))
+                        if 'z2' in chain.q:
+                            z2_buffer.append(T.to_numpy(chain.q['z2'].tensor[0, i]))
+
+        # memorize the outputs
+        nll_list.extend(T.to_numpy(nll))
+        label_list.extend(step_label_list)
+        graph_key_list.extend(step_graph_key_list)
+
+        # return a dict of the test result
+        ret = {}
+        normal_losses = T.to_numpy(loss)[step_label_list == 0]
+        if len(normal_losses) > 0:
+            test_loss = np.nanmean(normal_losses)
+            if not math.isnan(test_loss):
+                ret['loss'] = test_loss
+        return ret
+
+    with T.no_grad():
+        # run test on test set
+        if test_loop is not None:
+            with test_loop.timeit('eval_time'):
+                r = test_loop.run(eval_step, test_stream)
+                if 'loss' in r:
+                    r['test_loss'] = r['loss']
+        else:
+            test_losses = []
+            test_weights = []
+            for [trace_graphs] in tqdm(test_stream, total=test_stream.batch_count):
+                r = eval_step(trace_graphs)
+                if 'loss' in r:
+                    test_losses.append(r['loss'])
+                    test_weights.append(len(trace_graphs))
+            test_weights = np.asarray(test_weights)
+
+        # save the evaluation results
+        nll_list = np.asarray(nll_list)
+        label_list = np.asarray(label_list)
+        graph_key_list = np.asarray(pickle.dumps(graph_key_list))
+
+        # z embedding
+        if use_embeddings:
+            # add the operation embedding
+            operation_buffer = T.to_numpy(vae.operation_embedding(
+                T.arange(0, id_manager.num_operations, dtype=T.int64)))
+            operation_label = [
+                id_manager.operation_id.reverse_map(i)
+                for i in range(id_manager.num_operations)
+            ]
+            add_embedding(operation_buffer, operation_label, 'operation')
+
+            # add z & z2 embedding
+            z_label = np.stack(z_label, axis=0)
+            add_embedding(
+                np.stack(z_buffer, axis=0),
+                z_label,
+                tag='z',
+                limit=num_embedding_samples
+            )
+            if z2_buffer:
+                add_embedding(
+                    np.stack(z2_buffer, axis=0),
+                    z_label,
+                    tag='z2',
+                    limit=num_embedding_samples
+                )
diff --git a/tracegnn/models/trace_vae/graph_utils.py b/tracegnn/models/trace_vae/graph_utils.py
new file mode 100644
index 0000000..340417a
--- /dev/null
+++ b/tracegnn/models/trace_vae/graph_utils.py
@@ -0,0 +1,424 @@
+import math
+from dataclasses import dataclass
+from typing import *
+
+import networkx as nx
+import numpy as np
+import tensorkit as tk
+from tensorkit import tensor as T
+
+from tracegnn.data import *
+from tracegnn.utils import *
+from .constants import *
+from .tensor_utils import *
+import dgl
+import torch
+
+__all__ = [
+    'flat_to_nx_graphs',
+    'p_net_to_trace_graphs',
+    'GraphNodeMatch', 'GraphNodeDiff',
+    'diff_graph',
+]
+
+
+# util to reshape an array
+def reshape_to(x, ndims):
+    shape = T.shape(x)
+    return T.reshape(x, [-1] + shape[len(shape) - ndims + 1:])
+
+
+def to_scalar(x):
+    return T.to_numpy(x).tolist()
+
+
+def flat_to_nx_graphs(p: tk.BayesianNet,
+                      id_manager: TraceGraphIDManager,
+                      latency_range: TraceGraphLatencyRangeFile,
+                      min_edge_weight: float = 0.2,
+                      ) -> List[nx.Graph]:
+    """Convert `p` net sampled from a flat TraceVAE to nx.Graph."""
+    # extract features
+    adjs = reshape_to(p['adj'].distribution.probs, 2)
+    node_counts = T.to_numpy(reshape_to(p['node_count'].tensor, 1))
+    node_types = T.to_numpy(reshape_to(p['node_type'].tensor, 2))
+    # span_counts = reshape_to(p['span_count'].tensor, 2)
+
+    if 'latency' in p:
+        latency_src = T.to_numpy(reshape_to(p['latency'].distribution.base_distribution.mean, 3))
+        latencies = np.zeros(latency_src.shape, dtype=np.float32)
+        for i in range(node_types.shape[0]):
+            for j in range(node_types.shape[1]):
+                try:
+                    node_type = int(node_types[i, j])
+                    mu, std = latency_range[node_type]
+                    latencies[i, j] = latency_src[i, j] * std + mu
+                except KeyError:
+                    latencies[i, j] = -1.  # todo: is this okay?
+    else:
+        latencies = None
+
+    # build the graph
+    ret = []
+    for i, node_count in enumerate(node_counts):
+        g = nx.Graph()
+
+        # add nodes
+        for j in range(node_count):
+            g.add_node(j)
+
+        # add edges
+        adj = triu_to_dense(adjs[i: i+1], MAX_NODE_COUNT)
+        for u in range(node_count):
+            for v in range(u + 1, node_count):
+                w = float(to_scalar(adj[u, v]))
+                if w >= min_edge_weight:
+                    g.add_edge(u, v, weight=w)
+
+        # add node attributes
+        for j in range(node_count):
+            node_type = int(node_types[i, j])
+            g.nodes[j]['node_type'] = node_type
+            g.nodes[j]['operation'] = id_manager.operation_id.reverse_map(node_type)
+            if latencies is not None:
+                for k, pfx in enumerate(('avg_', 'max_', 'min_')):
+                    if k < LATENCY_DIM:
+                        g.nodes[j][f'{pfx}latency'] = latencies[i, j, k]
+
+        #     g.nodes[j]['span_count'] = to_scalar(span_counts[i, j])
+        #     for pfx in ('avg_', 'max_', 'min_'):
+        #         g.nodes[j][f'{pfx}latency'] = latencies[f'{pfx}latency'][i, j]
+
+        ret.append(g)
+
+    # return the graphs
+    return ret
+
+
+def p_net_to_trace_graphs(p: tk.BayesianNet,
+                          id_manager: TraceGraphIDManager,
+                          latency_range: TraceGraphLatencyRangeFile,
+                          discard_node_with_type_0: bool = True,
+                          discard_node_with_unknown_latency_range: bool = True,
+                          discard_graph_with_error_node_count: bool = False,
+                          keep_front_shape: bool = False,
+                          ) -> Union[List[Optional[TraceGraph]], np.ndarray]:
+    """Convert `p` net sampled from a flat TraceVAE to TraceGraph."""
+    if USE_MULTI_DIM_LATENCY_CODEC:
+        raise RuntimeError(f'`USE_MULTI_DIM_LATENCY_CODEC` is not supported.')
+
+    # find the base distribution (Normal, Categorical, OneHotCategorical)
+    def find_base(t: tk.StochasticTensor):
+        d = t.distribution
+        while not isinstance(d, (tk.Normal,
+                                 tk.Bernoulli,
+                                 tk.Categorical,
+                                 tk.OneHotCategorical)):
+            d = d.base_distribution
+        return d
+
+    # extract features
+    def get_adj(t, pad_value=0):
+        t = reshape_to(t, 2)
+        return np.stack(
+            [
+                T.to_numpy(triu_to_dense(
+                    t[i: i + 1],
+                    MAX_NODE_COUNT,
+                    pad_value=pad_value
+                ))
+                for i in range(len(t))
+            ],
+            axis=0
+        )
+
+    def bernoulli_log_prob(l):
+        # log(1 / (1 + exp(-l)) = log(exp(l) / (1 + exp(l)))
+        return T.where(
+            l >= 0,
+            -T.log1p(T.exp(-l)),
+            l - T.log1p(T.exp(l)),
+        )
+
+    def softmax_log_prob(l):
+        # log(exp(l) / sum(exp(l))
+        return l - T.log_sum_exp(l, axis=[-1], keepdims=True)
+
+    front_shape = T.shape(p['adj'].tensor)[:-1]
+
+    adjs = get_adj(p['adj'].tensor)
+    adj_probs = get_adj(find_base(p['adj']).probs)
+    adj_logits = get_adj(bernoulli_log_prob(find_base(p['adj']).logits), pad_value=-100000)
+
+    node_counts = T.to_numpy(reshape_to(p['node_count'].tensor, 1))
+    node_types = T.to_numpy(reshape_to(p['node_type'].tensor, 2))
+    node_count_logits = T.to_numpy(reshape_to(softmax_log_prob(find_base(p['node_count']).logits), 2))
+    node_type_logits = T.to_numpy(reshape_to(softmax_log_prob(find_base(p['node_type']).logits), 3))
+
+    if 'latency' in p:
+        latencies = T.to_numpy(reshape_to(p['latency'].tensor, 3))
+        avg_latencies = latencies[..., 0]
+        latency_means = T.to_numpy(reshape_to(find_base(p['latency']).mean, 3))
+        latency_stds = T.to_numpy(reshape_to(find_base(p['latency']).std, 3))
+
+    # build the graph
+    ret = []
+    for i, node_count in enumerate(node_counts):
+        # extract the arrays
+        adj = adjs[i][:node_count][:, :node_count]
+        adj_prob = adj_probs[i][:node_count][:, :node_count]
+        adj_logit = adj_logits[i]  # [:node_count][:, :node_count]
+        node_type = node_types[i]  # [:node_count]
+        node_mask = np.full([node_count], True, dtype=np.bool)
+        node_count_logit = node_count_logits[i]
+        node_type_logit = node_type_logits[i]
+
+        if 'latency' in p:
+            avg_latency = avg_latencies[i]
+            latency_mean = latency_means[i]
+            latency_std = latency_stds[i]
+
+        # if `discard_node_with_type_0`, set all adjs that from / to `node_type == 0` as 0
+        node_count_new = node_count
+        for j in range(node_count):
+            n_type = int(node_type[j])
+            if (discard_node_with_type_0 and n_type == 0) or \
+                    (discard_node_with_unknown_latency_range and n_type not in latency_range):
+                node_mask[j] = False
+                node_count_new -= 1
+                adj[:, j] = 0
+                adj[j, :] = 0
+                adj_prob[:, j] = 0
+                adj_prob[j, :] = 0
+
+        # for each column in `adj`, if there are more than 2 candidate in-edges,
+        # or no in-edge, then choose an edge sampled w.r.t. to adj_prob
+        for j in range(node_count):
+            if node_mask[j] and np.sum(adj[:, j]) != 1:
+                prob_vec = adj_prob[:, j]
+                prob_sum = np.sum(prob_vec)
+                if prob_sum > 1e-7:
+                    pvals = prob_vec / np.sum(prob_vec)
+                    pvals_mask = pvals > 1e-7
+                    indices = np.arange(len(pvals))[pvals_mask]
+                    k = indices[np.argmax(np.random.multinomial(1, pvals[pvals_mask]))]
+                    adj[:, j] = 0
+                    adj[k, j] = 1
+
+        # select the edges
+        edges = list(zip(*np.where(adj)))
+        if len(edges) < node_count_new - 1:
+            # pick out the root sub-graph
+            union_set = {j: -1 for j in range(node_count) if node_mask[j]}
+
+            def find_root(s):
+                t = union_set[s]
+                if t == -1:
+                    return s
+                r = find_root(t)
+                if r != t:
+                    union_set[s] = r
+                return r
+
+            def link_edge(s, t):
+                union_set[t] = s
+
+            edges_new = []
+            for s, t in edges:
+                link_edge(s, t)
+            for s, t in edges:
+                if s == 0 or find_root(s) == 0:
+                    edges_new.append((s, t))
+
+            edges = edges_new
+            node_count_new = len(edges_new) + 1
+
+        if discard_graph_with_error_node_count and (node_count_new != node_count):
+            ret.append(None)
+            continue
+
+        # build the trace graph
+        def get_node(s):
+            if s not in nodes:
+                n_type = node_type[s]
+                if 'latency' in p:
+                    latency = avg_latency[s]
+                    if n_type in latency_range:
+                        mu, std = latency_range[n_type]
+                        latency = latency * std + mu
+                    features = TraceGraphNodeFeatures(
+                        span_count=1,
+                        avg_latency=latency,
+                        max_latency=latency,
+                        min_latency=latency,
+                    )
+                    avg_latency_nstd = float(
+                        abs(avg_latency[s] - latency_mean[s, 0]) /
+                        latency_std[s, 0]
+                    )
+                else:
+                    features = TraceGraphNodeFeatures(
+                        span_count=1,
+                        avg_latency=math.nan,
+                        max_latency=math.nan,
+                        min_latency=math.nan,
+                    )
+                    avg_latency_nstd = 0
+
+                nodes[s] = TraceGraphNode.new_sampled(
+                    node_id=s,
+                    operation_id=node_type[s],
+                    features=features,
+                    scores=TraceGraphNodeReconsScores(
+                        edge_logit=0,
+                        operation_logit=node_type_logit[s, n_type],
+                        avg_latency_nstd=avg_latency_nstd,
+                    )
+                )
+            return nodes[s]
+
+        nodes = {}
+        edges.sort()
+        for u, v in edges:
+            if node_mask[u] and node_mask[v]:
+                v_node = get_node(v)
+                get_node(u).children.append(v_node)
+                v_node.scores.edge_logit = adj_logit[u, v]
+
+        if 0 in nodes:
+            g = TraceGraph.new_sampled(nodes[0], len(nodes), -1)
+            g.merge_spans_and_assign_id()
+            ret.append(g)
+        else:
+            ret.append(None)
+
+    # return the graphs
+    if keep_front_shape:
+        ret = np.array(ret).reshape(front_shape)
+
+    return ret
+
+
+@dataclass(init=False)
+class GraphNodeMatch(object):
+    __slots__ = [
+        'g1_to_g2',
+        'g2_to_g1',
+    ]
+
+    g1_to_g2: Dict[TraceGraphNode, TraceGraphNode]
+    g2_to_g1: Dict[TraceGraphNode, TraceGraphNode]
+
+    def __init__(self):
+        self.g1_to_g2 = {}
+        self.g2_to_g1 = {}
+
+    def add_match(self, node1, node2):
+        self.g1_to_g2[node1] = node2
+        self.g2_to_g1[node2] = node1
+
+
+@dataclass(init=False)
+class GraphNodeDiff(object):
+    __slots__ = [
+        'parent', 'depth', 'node', 'offset', 'node_count',
+    ]
+
+    parent: Optional[TraceGraphNode]
+    depth: int
+    node: TraceGraphNode
+    offset: int  # -1: present in g but absent in g2; 1: present in g2 but absent in g1
+    node_count: int  # count of nodes in this branch
+
+    def __init__(self, parent, depth, node, offset):
+        self.parent = parent
+        self.depth = depth
+        self.node = node
+        self.offset = offset
+        self.node_count = node.count_nodes()
+
+    def __repr__(self):
+        return f'GraphNodeDiff(depth={self.depth}, offset={self.offset})'
+
+
+def diff_graph(g1: TraceGraph,
+               g2: TraceGraph
+               ) -> Tuple[GraphNodeMatch, List[GraphNodeDiff]]:
+    m = GraphNodeMatch()
+    ret = []
+
+    def match_node(depth: int,
+                   parent1: Optional[TraceGraphNode],
+                   parent2: Optional[TraceGraphNode],
+                   node1: Optional[TraceGraphNode],
+                   node2: Optional[TraceGraphNode]):
+        if node1 is None:
+            if node2 is None:
+                pass
+            else:
+                ret.append(GraphNodeDiff(parent=parent2, depth=depth, node=node2, offset=1))
+        else:
+            if node2 is None:
+                ret.append(GraphNodeDiff(parent=parent1, depth=depth, node=node1, offset=-1))
+            elif node1.operation_id != node2.operation_id:
+                ret.append(GraphNodeDiff(parent=parent1, depth=depth, node=node1, offset=-1))
+                ret.append(GraphNodeDiff(parent=parent2, depth=depth, node=node2, offset=1))
+            else:
+                m.add_match(node1, node2)
+                c_depth = depth + 1
+
+                i, j = 0, 0
+                while i < len(node1.children) and j < len(node2.children):
+                    c1 = node1.children[i]
+                    c2 = node2.children[j]
+                    if c1.operation_id < c2.operation_id:
+                        match_node(c_depth, node1, None, c1, None)
+                        i += 1
+                    elif c2.operation_id < c1.operation_id:
+                        match_node(c_depth, None, node2, None, c2)
+                        j += 1
+                    else:
+                        match_node(c_depth, node1, node2, c1, c2)
+                        i += 1
+                        j += 1
+
+                while i < len(node1.children):
+                    c1 = node1.children[i]
+                    match_node(c_depth, node1, None, c1, None)
+                    i += 1
+
+                while j < len(node2.children):
+                    c2 = node2.children[j]
+                    match_node(c_depth, None, node2, None, c2)
+                    j += 1
+
+    match_node(0, None, None, g1.root, g2.root)
+    return m, ret
+
+
+def dgl_graph_key(graph: dgl.DGLGraph) -> str:
+    return edges_to_key(graph.ndata['operation_id'], *graph.edges())
+
+@torch.jit.script
+def edges_to_key(operation_id: torch.Tensor, u_list: torch.Tensor, v_list: torch.Tensor) -> str:
+    mask = u_list != v_list
+    u_id: List[int] = operation_id[u_list][mask].tolist()
+    v_id: List[int] = operation_id[v_list][mask].tolist()
+
+    graph_key = f'0,{operation_id[0].item()};' + ';'.join(sorted([f'{u},{v}' for (u, v) in zip(u_id, v_id)]))
+
+    return graph_key
+
+def trace_graph_key(graph: TraceGraph) -> str:
+    def dfs(nd: TraceGraphNode, pa_id: int, cnt: int=1):
+        cur_cnt = cnt * len(nd.spans)
+        spans = [f'{pa_id},{nd.operation_id}'] * cur_cnt
+
+        for child in nd.children:
+            spans += dfs(child, nd.operation_id, cur_cnt)
+
+        return spans
+        
+    spans = dfs(graph.root, 0, 1)
+
+    return ';'.join(sorted(spans))
diff --git a/tracegnn/models/trace_vae/model/__init__.py b/tracegnn/models/trace_vae/model/__init__.py
new file mode 100644
index 0000000..fde6b5f
--- /dev/null
+++ b/tracegnn/models/trace_vae/model/__init__.py
@@ -0,0 +1 @@
+from .trace_vae import *
diff --git a/tracegnn/models/trace_vae/model/gnn_layers.py b/tracegnn/models/trace_vae/model/gnn_layers.py
new file mode 100644
index 0000000..c9e8871
--- /dev/null
+++ b/tracegnn/models/trace_vae/model/gnn_layers.py
@@ -0,0 +1,190 @@
+from enum import Enum
+from typing import *
+
+import mltk
+import tensorkit as tk
+from dgl import nn as gnn
+from tensorkit import tensor as T
+
+__all__ = [
+    'GNNLayerType',
+    'GNNLayerConfig',
+    'make_gnn_layers',
+    'apply_gnn_layer',
+    'GNNSequential',
+    'GATConvAgg',
+    'GraphConv',
+]
+
+
+class GNNLayerType(str, Enum):
+    GAT = 'GAT'
+    GraphConv = 'GraphConv'
+
+
+class GNNLayerConfig(mltk.Config):
+    type: GNNLayerType = GNNLayerType.GAT
+
+    # whether to use batch norm?
+    use_batch_norm: bool = True
+
+    # config for GAT
+    class gat(mltk.Config):
+        num_attention_heads: int = 2
+
+
+def make_gnn_layers(config: GNNLayerConfig,
+                    input_dim: int,
+                    gnn_layers: List[int],
+                    ):
+    if config.use_batch_norm:
+        normalization_factory = tk.layers.BatchNorm
+    else:
+        normalization_factory = lambda num_inputs: None
+
+    layers = []
+    for size in gnn_layers:
+        if config.type == GNNLayerType.GAT:
+            layers.append(GATConvAgg(
+                input_dim,
+                size,
+                config.gat.num_attention_heads,
+                activation=tk.layers.LeakyReLU(),
+                normalization_factory=normalization_factory,
+            ))
+        elif config.type == GNNLayerType.GraphConv:
+            layers.append(GraphConv(
+                input_dim,
+                size,
+                activation=tk.layers.LeakyReLU(),
+                normalization_factory=normalization_factory,
+            ))
+        else:
+            raise ValueError(f'Unsupported GNN type: {config.type!r}')
+        input_dim = layers[-1].output_dim
+
+    return input_dim, layers
+
+
+def apply_gnn_layer(layer, g, h):
+    if isinstance(g, (list, tuple)):
+        if len(h.shape) == 3:
+            if len(g) != h.shape[0]:
+                raise ValueError(f'len(g) != h.shape[0]: {len(g)} vs {h.shape[0]}')
+            return T.stack(
+                [
+                    layer(g[i], h[i])
+                    for i in range(len(g))
+                ],
+                axis=0
+            )
+        else:
+            return T.stack(
+                [
+                    layer(g[i], h)
+                    for i in range(len(g))
+                ],
+                axis=0
+            )
+    else:
+        if len(h.shape) == 3:
+            return T.stack(
+                [
+                    layer(g, h[i])
+                    for i in range(h.shape[0])
+                ],
+                axis=0
+            )
+        else:
+            return layer(g, h)
+
+
+class GNNSequential(tk.layers.BaseLayer):
+
+    def __init__(self, layers):
+        super().__init__()
+        self.gnn = gnn.Sequential(*layers)
+
+    def forward(self, g, h):
+        return apply_gnn_layer(self.gnn, g, h)
+
+
+class GATConvAgg(tk.layers.BaseLayer):
+    """First apply `dgl.nn.GATConv` then aggregate the multi attention heads."""
+
+    aggregate_mode: str
+    output_dim: int
+
+    def __init__(self, input_dim: int, output_dim: int, num_heads: int,
+                 aggregate_mode: str = 'concat', activation=None,
+                 normalization_factory=None):
+        super().__init__()
+
+        if aggregate_mode == 'concat':
+            self.output_dim = output_dim * num_heads
+        elif aggregate_mode in ('mean', 'avg'):
+            self.output_dim = output_dim
+        else:
+            raise ValueError(f'Unsupported aggregate_mode: {aggregate_mode!r}')
+
+        self.activation = activation
+        self.normalization = None if normalization_factory is None else \
+            normalization_factory(self.output_dim)
+
+        self.gnn = gnn.GATConv(
+            input_dim,
+            output_dim,
+            num_heads,
+            activation=None,
+        )
+        self.aggregate_mode = aggregate_mode
+
+    def forward(self, g, h):
+        h = self.gnn(g, h)
+        if self.aggregate_mode == 'concat':
+            h = T.concat(
+                [h[..., i, :] for i in range(h.shape[-2])],
+                axis=-1
+            )
+        else:
+            h = T.reduce_mean(h, axis=[-2])
+
+        if self.normalization is not None:
+            h = self.normalization(h)
+        if self.activation is not None:
+            h = self.activation(h)
+
+        return h
+
+
+class GraphConv(tk.layers.BaseLayer):
+
+    output_dim: int
+
+    def __init__(self, input_dim: int, output_dim: int, activation=None,
+                 normalization_factory=None):
+        super().__init__()
+        self.output_dim = output_dim
+
+        self.activation = activation
+        self.normalization = None if normalization_factory is None else \
+            normalization_factory(self.output_dim)
+
+        self.gnn = gnn.GraphConv(
+            input_dim,
+            output_dim,
+            norm='both',
+            weight=self.normalization is None,
+            bias=self.normalization is None,
+            activation=None,
+        )
+
+    def forward(self, g, h):
+        h = self.gnn(g, h)
+
+        if self.normalization is not None:
+            h = self.normalization(h)
+        if self.activation is not None:
+            h = self.activation(h)
+
+        return h
diff --git a/tracegnn/models/trace_vae/model/latency_vae.py b/tracegnn/models/trace_vae/model/latency_vae.py
new file mode 100644
index 0000000..1252675
--- /dev/null
+++ b/tracegnn/models/trace_vae/model/latency_vae.py
@@ -0,0 +1,444 @@
+from typing import *
+
+import dgl
+import mltk
+import tensorkit as tk
+from tensorkit import tensor as T
+
+from ..constants import *
+from ..distributions import *
+from ..tensor_utils import node_count_mask
+from .gnn_layers import *
+from .model_utils import *
+from .operation_embedding import *
+from .pooling import *
+from .realnvp_flow import *
+
+__all__ = [
+    'TraceLatencyVAEConfig',
+    'TraceLatencyVAE',
+]
+
+
+class TraceLatencyVAEConfig(mltk.Config):
+    # whether to use the operation embedding? (but grad will be blocked)
+    use_operation_embedding: bool = True
+
+    # the dimension of z2 (to encode latency)
+    z2_dim: int = 10
+
+    # the config of posterior / prior flow
+    realnvp: RealNVPFlowConfig = RealNVPFlowConfig()
+
+    # whether to use BatchNorm?
+    use_batch_norm: bool = True
+
+    class encoder(mltk.Config):
+        # ================
+        # h(G) for q(z2|G)
+        # ================
+        # the gnn layer config
+        gnn: GNNLayerConfig = GNNLayerConfig()
+
+        # the gnn layer sizes for q(z2|...)
+        gnn_layers: List[int] = [500, 500, 500, 500]
+
+        # whether to stop gradient to operation_embedding along this path?
+        operation_embedding_stop_grad: bool = True
+
+        # =============
+        # graph pooling
+        # =============
+        pool_type: PoolingType = PoolingType.AVG
+        pool_config: PoolingConfig = PoolingConfig()
+
+        # =======
+        # q(z2|G)
+        # =======
+        z2_logstd_min: Optional[float] = -7
+        z2_logstd_max: Optional[float] = 2
+
+        # whether to use realnvp posterior flow?
+        use_posterior_flow: bool = False
+
+    class decoder(mltk.Config):
+        # ====================
+        # decoder architecture
+        # ====================
+        use_prior_flow: bool = False
+
+        # p(z2|z) n_mixtures
+        z2_prior_mixtures: int = 1
+
+        # whether z2 should condition on z?
+        condition_on_z: bool = True
+
+        # z2 given z hidden layers
+        z2_given_z_stop_grad: bool = True
+        z2_given_z_hidden_layers: List[int] = [250, 250]
+        z2_logstd_min: Optional[float] = -5
+        z2_logstd_max: Optional[float] = 2
+
+        # =======
+        # latency
+        # =======
+        # gnn layer config
+        gnn: GNNLayerConfig = GNNLayerConfig()
+
+        # the node types from node embedding e
+        gnn_layers: List[int] = [500, 500, 500, 500]
+
+        # hidden layers for graph embedding from z
+        graph_embedding_layers: List[int] = [500, 500]
+
+        # size of the latent embedding e
+        latent_embedding_size: int = 40
+
+        # whether to stop gradient to operation_embedding along this path?
+        operation_embedding_stop_grad: bool = True
+
+        # ==============
+        # p(latency|...)
+        # ==============
+        # the minimum value for latency logstd
+        latency_logstd_min: Optional[float] = -7
+
+        # whether to use mask on p(latency|...)?
+        use_latency_mask: bool = True
+
+        # whether to clip the latency to one dim even if three dim is provided?
+        clip_latency_to_one_dim: bool = False
+
+        # whether to use biased in p(latency|...)?
+        use_biased_latency: bool = False
+
+        # whether to use `AnomalyDetectionNormal`?
+        use_anomaly_detection_normal: bool = False
+
+        # the `std_threshold` for AnomalyDetectionNormal or BiasedNormal in testing
+        biased_normal_std_threshold: float = 4.0
+
+        # the `std_threshold` for SafeNormal in training
+        safe_normal_std_threshold: float = 6.0
+
+
+class TraceLatencyVAE(tk.layers.BaseLayer):
+
+    config: TraceLatencyVAEConfig
+    num_node_types: int
+
+    def __init__(self,
+                 config: TraceLatencyVAEConfig,
+                 z_dim: int,  # z dimension of the struct_vae
+                 operation_embedding: OperationEmbedding,
+                 ):
+        super().__init__()
+
+        # ===================
+        # memorize the config
+        # ===================
+        self.config = config
+        self.z_dim = z_dim
+
+        # =============================
+        # node embedding for operations
+        # =============================
+        self.operation_embedding = operation_embedding
+        self.num_node_types = operation_embedding.num_operations
+
+        # ========================
+        # standard layer arguments
+        # ========================
+        layer_args = tk.layers.LayerArgs()
+        layer_args.set_args(['dense'], activation=tk.layers.LeakyReLU)
+        if config.use_batch_norm:
+            layer_args.set_args(['dense'], normalizer=tk.layers.BatchNorm)
+
+        # ===========================
+        # q(z2|adj,node_type,latency)
+        # ===========================
+        if config.use_operation_embedding:
+            input_size = self.operation_embedding.embedding_dim
+        else:
+            input_size = self.num_node_types
+        output_size, gnn_layers = make_gnn_layers(
+            config.encoder.gnn,
+            (
+                input_size +
+                LATENCY_DIM  # avg, min, max
+            ),
+            config.encoder.gnn_layers,
+        )
+        self.qz2_gnn_layers = GNNSequential(
+            gnn_layers + [
+                make_graph_pooling(
+                    output_size,
+                    config.encoder.pool_type,
+                    config.encoder.pool_config
+                ),
+            ]
+        )
+        self.qz2_mean = tk.layers.Linear(output_size, config.z2_dim)
+        self.qz2_logstd = tk.layers.Linear(output_size, config.z2_dim)
+
+        if config.encoder.use_posterior_flow:
+            self.qz_flow = make_realnvp_flow(config.z2_dim, config.realnvp)
+
+        # ================
+        # p(z2) or p(z2|z)
+        # ================
+        if config.decoder.condition_on_z:
+            if config.decoder.use_prior_flow and config.decoder.z2_prior_mixtures > 1:
+                raise ValueError(f'`use_prior_flow == True` and `z2_prior_mixtures > 1` cannot be both True.')
+
+            n_mixtures = config.decoder.z2_prior_mixtures
+            z2_given_z_builder = tk.layers.SequentialBuilder(
+                self.z_dim,
+                layer_args=layer_args
+            )
+            for size in config.decoder.z2_given_z_hidden_layers:
+                z2_given_z_builder.dense(size)
+            self.z2_given_z_hidden_layers = z2_given_z_builder.build(flatten_to_ndims=True)
+            self.pz2_mean = z2_given_z_builder.as_input().linear(config.z2_dim * n_mixtures).build()
+            self.pz2_logstd = z2_given_z_builder.as_input().linear(config.z2_dim * n_mixtures).build()
+
+        if config.decoder.use_prior_flow:
+            self.pz2_flow = make_realnvp_flow(config.z2_dim, config.realnvp).invert()
+
+        # node features from gnn
+        input_size = config.z2_dim
+
+        if config.use_operation_embedding:
+            input_size += self.operation_embedding.embedding_dim
+        else:
+            input_size += self.num_operations
+
+        output_size, gnn_layers = make_gnn_layers(
+            config.decoder.gnn,
+            input_size,
+            config.decoder.gnn_layers,
+        )
+        self.pG_node_features = GNNSequential(
+            gnn_layers +
+            [
+                GraphConv(  # p(latency|e)
+                    output_size,
+                    2 * LATENCY_DIM  # (mean, logstd) * (avg, min, max)
+                ),
+            ]
+        )
+
+    def _is_attr_included_in_repr(self, attr: str, value: Any) -> bool:
+        if attr == 'config':
+            return False
+        return super()._is_attr_included_in_repr(attr, value)
+
+    def q(self,
+          net: tk.BayesianNet,
+          g: dgl.DGLGraph,
+          n_z: Optional[int] = None):
+        config = self.config
+
+        # compose feature vector
+        if config.use_operation_embedding:
+            h2 = self.operation_embedding(g.ndata['node_type'])
+            if config.encoder.operation_embedding_stop_grad:
+                h2 = T.stop_grad(h2)
+        else:
+            h2 = T.one_hot(
+                g.ndata['node_type'],
+                self.num_node_types,
+                dtype=T.float32,
+            )
+        h = T.concat([h2, g.ndata['latency'][..., :LATENCY_DIM]], axis=-1)
+
+        # feed into gnn and get node embeddings
+        h = self.qz2_gnn_layers(g, h)
+
+        # mean and logstd for q(z2|G)
+        z2_mean = self.qz2_mean(h)
+        z2_logstd = T.maybe_clip(
+            self.qz2_logstd(h),
+            min_val=config.encoder.z2_logstd_min,
+            max_val=config.encoder.z2_logstd_max,
+        )
+
+        # add 'z2' random variable
+        qz2 = tk.Normal(mean=z2_mean, logstd=z2_logstd, event_ndims=1)
+        if config.encoder.use_posterior_flow:
+            qz2 = tk.FlowDistribution(qz2, self.qz_flow)
+        z2 = net.add('z2', qz2, n_samples=n_z)
+
+    def p(self,
+          net: tk.BayesianNet,
+          g: dgl.DGLGraph,
+          n_z: Optional[int] = None,
+          use_biased: bool = False,
+          latency_logstd_min: Optional[float] = None,
+          latency_log_prob_weight: bool = False,
+          std_limit: Optional[T.Tensor] = None,
+          ):
+        config = self.config
+
+        # sample z2 ~ p(z2) or p(z2|z)
+        if config.decoder.condition_on_z:
+            h = net['z'].tensor
+            if config.decoder.z2_given_z_stop_grad:
+                h = T.stop_grad(h)
+            h = self.z2_given_z_hidden_layers(h)
+            z2_mean = self.pz2_mean(h)
+            z2_logstd = T.maybe_clip(
+                self.pz2_logstd(h),
+                min_val=config.decoder.z2_logstd_min,
+                max_val=config.decoder.z2_logstd_max,
+            )
+
+            n_mixtures = config.decoder.z2_prior_mixtures
+            if n_mixtures > 1:
+                z2_mean_list = T.split(z2_mean, [config.z2_dim] * n_mixtures, axis=-1)
+                z2_logstd_list = T.split(z2_logstd, [config.z2_dim] * n_mixtures, axis=-1)
+                pz2 = tk.Mixture(
+                    categorical=tk.Categorical(
+                        logits=T.zeros(T.shape(z2_mean)[:-1] + [n_mixtures]),
+                    ),
+                    components=[
+                        tk.Normal(mean=mu, logstd=logstd, event_ndims=1)
+                        for mu, logstd in zip(z2_mean_list, z2_logstd_list)
+                    ],
+                    reparameterized=True,
+                )
+            else:
+                pz2 = tk.Normal(mean=z2_mean, logstd=z2_logstd, event_ndims=1)
+        else:
+            pz2 = tk.UnitNormal([1, config.z2_dim], event_ndims=1)
+
+        if config.decoder.use_prior_flow:
+            pz2 = tk.FlowDistribution(pz2, self.pz2_flow)
+
+        z2 = net.add('z2', pz2, n_samples=n_z)
+
+        # z2 as context
+        z2_shape = T.shape(z2.tensor)
+        h = T.reshape(z2.tensor, z2_shape[:-1] + [1, z2_shape[-1]])
+
+        # concat with node type information
+        if config.use_operation_embedding:
+            h2 = self.operation_embedding(net['node_type'].tensor)
+            if config.decoder.operation_embedding_stop_grad:
+                h2 = T.stop_grad(h2)
+        else:
+            h2 = T.one_hot(
+                net['node_type'].tensor,
+                self.num_node_types,
+                dtype=T.float32,
+            )
+        h = T.broadcast_concat(h, h2, axis=-1)
+        h2 = None
+
+        # node_features from gnn
+        h_shape = T.shape(h)
+        h = T.reshape(
+            h,
+            h_shape[:-3] + [h_shape[-3] * h_shape[-2], h_shape[-1]]
+        )
+        node_features = self.pG_node_features(g, h)
+
+        # mean & logstd for p(latency|z2,G)
+        if latency_logstd_min is not None:
+            if config.decoder.latency_logstd_min is not None:
+                latency_logstd_min = max(
+                    latency_logstd_min,
+                    config.decoder.latency_logstd_min
+                )
+        else:
+            latency_logstd_min = config.decoder.latency_logstd_min
+
+        latency_mean = T.reshape(
+            node_features[..., :LATENCY_DIM],  # avg, min, max
+            h_shape[:-1] + [LATENCY_DIM]
+        )
+        latency_logstd = T.maybe_clip(
+            T.reshape(
+                node_features[..., LATENCY_DIM: LATENCY_DIM*2],
+                h_shape[:-1] + [LATENCY_DIM]
+            ),
+            min_val=latency_logstd_min,
+        )
+
+        if std_limit is not None:
+            logstd_limit = T.log(
+                T.clip_left(
+                    std_limit[net['node_type'].tensor],
+                    1e-7
+                )
+            )
+            logstd_limit = T.stop_grad(logstd_limit)
+            logstd_limit = T.expand_dim(logstd_limit, axis=-1)
+            latency_logstd = T.minimum(latency_logstd, logstd_limit)
+
+        # clip the latency
+        if config.decoder.clip_latency_to_one_dim:
+            latency_mean = latency_mean[..., :1]
+            latency_logstd = latency_logstd[..., :1]
+
+        # p(latency|z2,G)
+        if config.decoder.use_latency_mask:
+            inner_event_ndims = 0
+        else:
+            inner_event_ndims = 2
+
+        if self.training:
+            p_latency = SafeNormal(
+                std_threshold=config.decoder.safe_normal_std_threshold,
+                mean=latency_mean,
+                logstd=latency_logstd,
+                event_ndims=inner_event_ndims,
+            )
+        elif use_biased and config.decoder.use_biased_latency:
+            if config.decoder.use_anomaly_detection_normal:
+                p_latency = AnomalyDetectionNormal(
+                    std_threshold=config.decoder.biased_normal_std_threshold,
+                    bias_alpha=MAX_NODE_COUNT,
+                    bias_threshold=0.5,
+                    mean=latency_mean,
+                    logstd=latency_logstd,
+                    event_ndims=inner_event_ndims,
+                )
+            else:
+                p_latency = BiasedNormal(
+                    alpha=MAX_NODE_COUNT,
+                    std_threshold=config.decoder.biased_normal_std_threshold,
+                    mean=latency_mean,
+                    logstd=latency_logstd,
+                    event_ndims=inner_event_ndims,
+                )
+        else:
+            p_latency = tk.Normal(
+                mean=latency_mean,
+                logstd=latency_logstd,
+                event_ndims=inner_event_ndims,
+            )
+
+        if config.decoder.use_latency_mask:
+            # mask
+            mask = node_count_mask(
+                net['node_count'].tensor,
+                MAX_NODE_COUNT,
+                dtype=T.boolean,
+            )
+            mask = T.stop_grad(mask)
+            mask = T.expand_dim(mask, axis=-1)
+
+            # log_prob_weight
+            if latency_log_prob_weight:
+                log_prob_weight = T.cast(net['node_count'].tensor, dtype=T.float32)
+                log_prob_weight = T.float_scalar(MAX_NODE_COUNT) / log_prob_weight
+                log_prob_weight = T.reshape(log_prob_weight, T.shape(log_prob_weight) + [1, 1])
+                log_prob_weight = T.stop_grad(log_prob_weight)
+            else:
+                log_prob_weight = None
+
+            # p(latency|...)
+            p_latency = MaskedDistribution(p_latency, mask, log_prob_weight, event_ndims=2)
+
+        latency = net.add('latency', p_latency)
diff --git a/tracegnn/models/trace_vae/model/model_utils.py b/tracegnn/models/trace_vae/model/model_utils.py
new file mode 100644
index 0000000..5f9d5de
--- /dev/null
+++ b/tracegnn/models/trace_vae/model/model_utils.py
@@ -0,0 +1,35 @@
+from typing import *
+from tensorkit import tensor as T
+
+from ..constants import *
+
+__all__ = [
+    'decoder_use_depth_and_idx',
+]
+
+
+def decoder_use_depth_and_idx(g,
+                              use_depth: bool,
+                              use_idx: bool
+                              ) -> Optional[T.Tensor]:
+    def use_tensor(name, num_classes):
+        if isinstance(g, list):
+            t = T.stack([g2.ndata[name] for g2 in g], axis=0)
+        else:
+            t = g.ndata[name]
+        t_shape = T.shape(t)
+        t = T.reshape(
+            t,
+            t_shape[:-1] + [t_shape[-1] // MAX_NODE_COUNT, MAX_NODE_COUNT]
+        )
+        t = T.one_hot(t, num_classes, dtype=T.float32)
+        return t
+
+    buf = []
+    if use_depth:
+        buf.append(use_tensor('node_depth', MAX_DEPTH + 1))
+    if use_idx:
+        buf.append(use_tensor('node_idx', MAX_NODE_COUNT))
+
+    if buf:
+        return T.concat(buf, axis=-1)
diff --git a/tracegnn/models/trace_vae/model/operation_embedding.py b/tracegnn/models/trace_vae/model/operation_embedding.py
new file mode 100644
index 0000000..c7c8c7c
--- /dev/null
+++ b/tracegnn/models/trace_vae/model/operation_embedding.py
@@ -0,0 +1,25 @@
+import tensorkit as tk
+from tensorkit import tensor as T
+from torch.nn import Embedding
+
+__all__ = [
+    'OperationEmbedding',
+]
+
+
+class OperationEmbedding(tk.layers.BaseLayer):
+
+    num_operations: int
+    embedding_dim: int
+
+    def __init__(self, num_operations: int, embedding_dim: int):
+        super().__init__()
+        self.num_operations = num_operations
+        self.embedding_dim = embedding_dim
+        self.node_embedding = Embedding(num_operations, embedding_dim)
+
+    def forward(self, node_type: T.Tensor) -> T.Tensor:
+        node_type, shape = T.flatten_to_ndims(node_type, 1)
+        node_type = self.node_embedding(node_type)
+        node_type = T.unflatten_from_ndims(node_type, shape)
+        return node_type
diff --git a/tracegnn/models/trace_vae/model/pooling.py b/tracegnn/models/trace_vae/model/pooling.py
new file mode 100644
index 0000000..c40a9a5
--- /dev/null
+++ b/tracegnn/models/trace_vae/model/pooling.py
@@ -0,0 +1,75 @@
+from enum import Enum
+from typing import *
+
+import mltk
+import tensorkit as tk
+import torch
+from dgl import nn as gnn
+from tensorkit import tensor as T
+
+__all__ = [
+    'PoolingType',
+    'PoolingConfig',
+    'make_graph_pooling',
+    'graph_node_offsets',
+    'RootPooling',
+]
+
+
+class PoolingType(str, Enum):
+    ROOT = 'root'
+    AVG = 'avg'
+    ATTENTION = 'attention'  # graph attention pooling
+
+
+class PoolingConfig(mltk.Config):
+    # whether to use batch norm?
+    use_batch_norm: bool = True
+
+    # config for ATTENTION
+    class attention(mltk.Config):
+        hidden_layers: List[int] = []
+
+
+def make_graph_pooling(feature_size: int,
+                       pool_type: Union[str, PoolingType],
+                       pool_config: PoolingConfig):
+    layer_args = tk.layers.LayerArgs()
+    layer_args.set_args(['dense'], activation=tk.layers.LeakyReLU)
+    if pool_config.use_batch_norm:
+        layer_args.set_args(['dense'], normalizer=tk.layers.BatchNorm)
+
+    if pool_type == PoolingType.ROOT:
+        return RootPooling()  # is this okay?
+    elif pool_type == PoolingType.AVG:
+        return gnn.AvgPooling()
+    elif pool_type == PoolingType.ATTENTION:
+        gap_nn_builder = tk.layers.SequentialBuilder(
+            feature_size,
+            layer_args=layer_args,
+        )
+        for size in pool_config.attention.hidden_layers:
+            gap_nn_builder.dense(size)
+        return gnn.GlobalAttentionPooling(gap_nn_builder.linear(1).build())
+    else:
+        raise ValueError(f'Unsupported `config.encoder.pool_type`: {pool_type!r}')
+
+
+def graph_node_offsets(seglen):
+    ret = torch.cumsum(
+        T.concat(
+            [
+                T.zeros([1], dtype=T.get_dtype(seglen), device=T.get_device(seglen)),
+                seglen
+            ],
+            axis=0
+        ),
+        dim=0
+    )
+    return ret[:-1]
+
+
+class RootPooling(tk.layers.BaseLayer):
+
+    def forward(self, graph, feat):
+        return feat[graph_node_offsets(graph.batch_num_nodes())]
diff --git a/tracegnn/models/trace_vae/model/realnvp_flow.py b/tracegnn/models/trace_vae/model/realnvp_flow.py
new file mode 100644
index 0000000..92d84cf
--- /dev/null
+++ b/tracegnn/models/trace_vae/model/realnvp_flow.py
@@ -0,0 +1,49 @@
+import mltk
+import tensorkit as tk
+
+__all__ = [
+    'RealNVPFlowConfig',
+    'make_realnvp_flow',
+]
+
+
+class RealNVPFlowConfig(mltk.Config):
+    flow_levels: int = 5
+    coupling_hidden_layer_count: int = 1
+    coupling_hidden_layer_units: int = 64
+    coupling_layer_scale: str = 'sigmoid'
+    strict_invertible: bool = False
+
+
+def make_realnvp_flow(z_dim: int, flow_config: RealNVPFlowConfig):
+    flows = []
+    for i in range(flow_config.flow_levels):
+        # act norm
+        flows.append(tk.flows.ActNorm(z_dim))
+
+        # coupling layer
+        n1 = z_dim // 2
+        n2 = z_dim - n1
+        b = tk.layers.SequentialBuilder(
+            n1,
+            layer_args=tk.layers.LayerArgs().
+                set_args(['dense'], activation=tk.layers.LeakyReLU)
+        )
+        for j in range(flow_config.coupling_hidden_layer_count):
+            b.dense(flow_config.coupling_hidden_layer_units)
+        shift_and_pre_scale = tk.layers.Branch(
+            branches=[
+                # shift
+                b.as_input().linear(n2, weight_init=tk.init.zeros).build(),
+                # pre_scale
+                b.as_input().linear(n2, weight_init=tk.init.zeros).build(),
+            ],
+            shared=b.build(),
+        )
+        flows.append(tk.flows.CouplingLayer(
+            shift_and_pre_scale, scale=flow_config.coupling_layer_scale))
+
+        # feature rearrangement by invertible dense
+        flows.append(tk.flows.InvertibleDense(z_dim, strict=flow_config.strict_invertible))
+
+    return tk.flows.SequentialFlow(flows)
diff --git a/tracegnn/models/trace_vae/model/struct_vae.py b/tracegnn/models/trace_vae/model/struct_vae.py
new file mode 100644
index 0000000..7cc8c56
--- /dev/null
+++ b/tracegnn/models/trace_vae/model/struct_vae.py
@@ -0,0 +1,414 @@
+from typing import *
+
+import dgl
+import mltk
+import tensorkit as tk
+import torch
+from tensorkit import tensor as T
+
+from ..constants import *
+from ..distributions import *
+from ..tensor_utils import *
+from .gnn_layers import *
+from .model_utils import *
+from .operation_embedding import *
+from .pooling import *
+from .realnvp_flow import *
+
+__all__ = [
+    'TraceStructVAEConfig',
+    'TraceStructVAE',
+]
+
+
+class TraceStructVAEConfig(mltk.Config):
+    # the dimension of z (to encode adj & node_type)
+    z_dim: int = 3
+
+    # the config of posterior / prior flow
+    realnvp: RealNVPFlowConfig = RealNVPFlowConfig()
+
+    # whether to use BatchNorm?
+    use_batch_norm: bool = True
+
+    class encoder(mltk.Config):
+        # ===============
+        # h(G) for q(z|G)
+        # ===============
+        # the gnn layer config
+        gnn: GNNLayerConfig = GNNLayerConfig()
+
+        # the gnn layer sizes for q(z|...)
+        gnn_layers: List[int] = [500, 500, 500, 500]
+
+        # =============
+        # graph pooling
+        # =============
+        pool_type: PoolingType = PoolingType.AVG
+        pool_config: PoolingConfig = PoolingConfig()
+
+        # ======
+        # q(z|G)
+        # ======
+        z_logstd_min: Optional[float] = -7
+        z_logstd_max: Optional[float] = 2
+
+        # whether to use realnvp posterior flow?
+        use_posterior_flow: bool = False
+
+    class decoder(mltk.Config):
+        # ====================
+        # decoder architecture
+        # ====================
+        use_prior_flow: bool = False
+
+        # whether to use `z` directly as context, instead of passing through
+        # the graph embedding layers?
+        z_as_context: bool = False
+
+        # whether to use `node_depth` and `node_idx` as extra information?
+        use_depth: bool = False
+        use_idx: bool = True
+
+        # =========
+        # structure
+        # =========
+        # gnn layer config
+        gnn: GNNLayerConfig = GNNLayerConfig()
+
+        # the node types from node embedding e
+        gnn_layers: List[int] = [500, 500, 500, 500]
+
+        # hidden layers for p(node_count|z)
+        node_count_layers: List[int] = [500]
+
+        # hidden layers for graph embedding from z
+        graph_embedding_layers: List[int] = [500, 500]
+
+        # size of the latent embedding e
+        latent_embedding_size: int = 40
+
+
+class TraceStructVAE(tk.layers.BaseLayer):
+
+    config: TraceStructVAEConfig
+    num_operations: int
+
+    def __init__(self,
+                 config: TraceStructVAEConfig,
+                 operation_embedding: OperationEmbedding,
+                 ):
+        super().__init__()
+
+        # ===================
+        # memorize the config
+        # ===================
+        self.config = config
+
+        # =============================
+        # node embedding for operations
+        # =============================
+        self.operation_embedding = operation_embedding
+        self.num_operations = operation_embedding.num_operations
+
+        # ========================
+        # standard layer arguments
+        # ========================
+        layer_args = tk.layers.LayerArgs()
+        layer_args.set_args(['dense'], activation=tk.layers.LeakyReLU)
+        if config.use_batch_norm:
+            layer_args.set_args(['dense'], normalizer=tk.layers.BatchNorm)
+
+        # ==================
+        # q(z|adj,node_type)
+        # ==================
+        output_size, gnn_layers = make_gnn_layers(
+            config.encoder.gnn,
+            self.operation_embedding.embedding_dim,
+            config.encoder.gnn_layers,
+        )
+        self.qz_gnn_layers = GNNSequential(
+            gnn_layers + [
+                make_graph_pooling(
+                    output_size,
+                    config.encoder.pool_type,
+                    config.encoder.pool_config
+                ),
+            ]
+        )
+        self.qz_mean = tk.layers.Linear(output_size, config.z_dim)
+        self.qz_logstd = tk.layers.Linear(output_size, config.z_dim)
+
+        if config.encoder.use_posterior_flow:
+            self.qz_flow = make_realnvp_flow(config.z_dim, config.realnvp)
+
+        # ====
+        # p(z)
+        # ====
+        if config.decoder.use_prior_flow:
+            self.pz_flow = make_realnvp_flow(config.z_dim, config.realnvp).invert()
+
+        # ===============
+        # p(node_count|z)
+        # ===============
+        node_count_builder = tk.layers.SequentialBuilder(
+            config.z_dim,
+            layer_args=layer_args
+        )
+        for size in config.decoder.node_count_layers:
+            node_count_builder.dense(size)
+        self.pG_node_count_logits = node_count_builder. \
+            linear(MAX_NODE_COUNT + 1). \
+            build(flatten_to_ndims=True)
+
+        # ========
+        # p(adj|z)
+        # ========
+        # graph embedding from z
+        graph_embedding_builder = tk.layers.SequentialBuilder(
+            config.z_dim,
+            layer_args=layer_args,
+        )
+        for size in config.decoder.graph_embedding_layers:
+            graph_embedding_builder.dense(size)
+        self.pG_graph_embedding = graph_embedding_builder.build(flatten_to_ndims=True)
+
+        # node embedding (akka, `e`) from the graph embedding
+        self.pG_node_embedding = tk.layers.Linear(
+            graph_embedding_builder.out_shape[-1],
+            MAX_NODE_COUNT * config.decoder.latent_embedding_size,
+        )
+
+        # note: p(adj) = outer-dot(e)
+
+        # ==================
+        # p(node_type|e,adj)
+        # ==================
+        if config.decoder.z_as_context:
+            input_size = (
+                config.z_dim +
+                int(config.decoder.use_idx) * MAX_NODE_COUNT +  # node_idx
+                int(config.decoder.use_depth) * (MAX_SPAN_COUNT + 1)  # node_depth
+            )
+        else:
+            input_size = config.decoder.latent_embedding_size
+
+        output_size, gnn_layers = make_gnn_layers(
+            config.decoder.gnn,
+            input_size,
+            config.decoder.gnn_layers,
+        )
+        self.pG_node_type_logits = GNNSequential(
+            gnn_layers +
+            [
+                GraphConv(output_size, self.num_operations),  # p(node_type|e)
+            ]
+        )
+
+    def _is_attr_included_in_repr(self, attr: str, value: Any) -> bool:
+        if attr == 'config':
+            return False
+        return super()._is_attr_included_in_repr(attr, value)
+
+    def q(self,
+          net: tk.BayesianNet,
+          g: dgl.DGLGraph,
+          n_z: Optional[int] = None):
+        config = self.config
+
+        # embedding lookup
+        h = self.operation_embedding(g.ndata['node_type'])
+
+        # feed into gnn and get node embeddings
+        h = self.qz_gnn_layers(g, h)
+
+        # mean and logstd for q(z|G)
+        z_mean = T.maybe_clip(
+            self.qz_mean(h),
+            # min_val=-5,
+            # max_val=5,
+        )
+        z_logstd = T.maybe_clip(
+            self.qz_logstd(h),
+            min_val=config.encoder.z_logstd_min,
+            max_val=config.encoder.z_logstd_max,
+        )
+
+        # add 'z' random variable
+        qz = tk.Normal(mean=z_mean, logstd=z_logstd, event_ndims=1)
+        if config.encoder.use_posterior_flow:
+            qz = tk.FlowDistribution(qz, self.qz_flow)
+        z = net.add('z', qz, n_samples=n_z)
+
+    def p(self,
+          net: tk.BayesianNet,
+          g: Optional[dgl.DGLGraph] = None,
+          n_z: Optional[int] = None,
+          use_biased: bool = False):
+        config = self.config
+
+        # sample z ~ p(z)
+        pz = tk.UnitNormal([1, config.z_dim], event_ndims=1)
+        if config.decoder.use_prior_flow:
+            pz = tk.FlowDistribution(pz, self.pz_flow)
+        z = net.add('z', pz, n_samples=n_z)
+
+        # p(node_count|z)
+        node_count_logits = self.pG_node_count_logits(z.tensor)
+        if use_biased:
+            p_node_count = BiasedCategorical(
+                alpha=MAX_NODE_COUNT * MAX_NODE_COUNT,
+                threshold=0.5,
+                logits=node_count_logits,
+            )
+        else:
+            p_node_count = tk.Categorical(logits=node_count_logits)
+        node_count = net.add('node_count', p_node_count)
+
+        # graph embedding
+        h = z.tensor
+        h = self.pG_graph_embedding(h)
+        h = self.pG_node_embedding(h)
+        h = T.reshape(
+            h,
+            T.shape(h)[:-1] + [
+                MAX_NODE_COUNT,
+                config.decoder.latent_embedding_size
+            ]
+        )
+
+        # p(A|e)
+        edge_logits = edge_logits_by_dot_product(h)
+        edge_logits = dense_to_triu(edge_logits, MAX_NODE_COUNT)
+
+        if use_biased:
+            p_adj = BiasedBernoulli(
+                alpha=MAX_NODE_COUNT,
+                threshold=0.5,
+                logits=edge_logits,
+                event_ndims=1,
+            )
+        else:
+            p_adj = tk.Bernoulli(logits=edge_logits, event_ndims=1)
+
+        adj = net.add('adj', p_adj)
+
+        if g is None:
+            # construct the `g` from the `adj`, assuming full MAX_NODE_COUNT adj
+            def make_graph(triu_adj):
+                adj = triu_to_dense(triu_adj, MAX_NODE_COUNT)
+
+                # make graph
+                u, v = T.where(adj)
+                g = dgl.graph((u, v), num_nodes=MAX_NODE_COUNT)
+                g = dgl.add_reverse_edges(g)
+                g = dgl.add_self_loop(g)
+
+                # make `node_idx`
+                node_idx = T.maximum(
+                    T.reduce_max(
+                        (adj * torch.cumsum(adj, dim=-1)),
+                        axis=[-2]
+                    ) - 1,
+                    T.int_scalar(0, dtype=T.int64),
+                )
+                g.ndata['node_idx'] = node_idx
+                return g
+
+            adj_shape = T.shape(adj.tensor)
+            if len(adj_shape) == 3:
+                g = [
+                    dgl.batch([
+                        make_graph(adj.tensor[i, j])
+                        for j in range(adj_shape[1])
+                    ])
+                    for i in range(adj_shape[0])
+                ]
+            elif len(adj_shape) == 2:
+                g = dgl.batch([
+                    make_graph(adj.tensor[i])
+                    for i in range(adj_shape[0])
+                ])
+            else:
+                raise RuntimeError(f'Unsupported adj.shape: {adj_shape}')
+
+        else:
+            # expand the node_count of each graph to MAX_NODE_COUNT
+            sub_graphs = []
+            for sub_g in dgl.unbatch(g):
+                # struct
+                sub_u, sub_v = sub_g.edges()
+                sub_node_idx = sub_g.ndata['node_idx']
+                mask = sub_u < sub_v
+                sub_u = sub_u[mask]
+                sub_v = sub_v[mask]
+                sub_g = dgl.graph((sub_u, sub_v), num_nodes=MAX_NODE_COUNT)
+                sub_g = dgl.add_reverse_edges(sub_g)
+                sub_g = dgl.add_self_loop(sub_g)
+
+                # feature
+                if sub_node_idx.shape[0] < MAX_NODE_COUNT:
+                    sub_node_idx = T.concat(
+                        [
+                            sub_node_idx,
+                            T.zeros([MAX_NODE_COUNT - sub_node_idx.shape[0]], dtype=T.int64)
+                        ],
+                        axis=0
+                    )
+                sub_g.ndata['node_idx'] = sub_node_idx
+
+                # add this graph
+                sub_graphs.append(sub_g)
+            g = dgl.batch(sub_graphs)
+
+        net.meta['g'] = g
+
+        # p(node_type|e)
+        if config.decoder.z_as_context:
+            # z as context
+            z_shape = T.shape(z.tensor)
+            h = T.repeat(
+                T.reshape(z.tensor, z_shape[:-1] + [1, z_shape[-1]]),
+                [1] * (len(z_shape) - 1) + [MAX_NODE_COUNT, 1]
+            )
+
+            # h = []
+            # for i, node_count in enumerate(g.batch_num_nodes()):
+            #     h.append(
+            #         T.repeat(
+            #             z.tensor[..., i: i+1, :],
+            #             [1] * (len(h_shape) - 1) + [int(T.to_numpy(node_count)), 1],
+            #         )
+            #     )
+            # h = T.concat(h, axis=-2)
+
+            # node_depth and node_idx
+            h2 = decoder_use_depth_and_idx(
+                g,
+                config.decoder.use_depth,
+                config.decoder.use_idx,
+            )
+            if h2 is not None:
+                h = T.broadcast_concat(h, h2, axis=-1)
+
+        h_shape = T.shape(h)
+        h = T.reshape(
+            h,
+            h_shape[:-3] + [h_shape[-3] * h_shape[-2], h_shape[-1]]
+        )
+
+        node_type_logits = self.pG_node_type_logits(g, h)
+        node_type_logits = T.reshape(node_type_logits, h_shape[:-1] + [self.num_operations])
+
+        # if use_biased:
+        #     p_node_type = BiasedCategorical(
+        #         alpha=MAX_NODE_COUNT,
+        #         threshold=0.5,
+        #         logits=node_type_logits,
+        #         event_ndims=1
+        #     )
+        # else:
+        #     p_node_type = tk.Categorical(logits=node_type_logits, event_ndims=1)
+
+        p_node_type = tk.Categorical(logits=node_type_logits, event_ndims=1)
+        node_type = net.add('node_type', p_node_type)
diff --git a/tracegnn/models/trace_vae/model/trace_vae.py b/tracegnn/models/trace_vae/model/trace_vae.py
new file mode 100644
index 0000000..71e076a
--- /dev/null
+++ b/tracegnn/models/trace_vae/model/trace_vae.py
@@ -0,0 +1,161 @@
+from enum import Enum
+from typing import *
+
+import mltk
+import tensorkit as tk
+from tensorkit import tensor as T
+from tensorkit.typing_ import TensorOrData
+
+from ..constants import *
+from ..tensor_utils import *
+from ..types import *
+from .latency_vae import *
+from .operation_embedding import *
+from .struct_vae import *
+
+__all__ = [
+    'TraceVAEArch',
+    'TraceVAEConfig',
+    'TraceVAE',
+]
+
+
+class TraceVAEArch(str, Enum):
+    DEFAULT = 'default'
+
+
+class TraceVAEConfig(mltk.Config):
+    # operation embedding
+    operation_embedding_dim: int = 40
+
+    # the architecture selector
+    arch: TraceVAEArch = TraceVAEArch.DEFAULT
+
+    # the default architecture
+    struct: TraceStructVAEConfig = TraceStructVAEConfig()
+    latency: TraceLatencyVAEConfig = TraceLatencyVAEConfig()
+    use_latency: bool = True
+
+
+class TraceVAE(tk.layers.BaseLayer):
+
+    config: TraceVAEConfig
+    num_operations: int
+
+    def __init__(self, config: TraceVAEConfig, num_operations: int):
+        super().__init__()
+
+        # ===================
+        # memorize the config
+        # ===================
+        self.config = config
+        self.num_operations = num_operations
+
+        # ==============
+        # the components
+        # ==============
+        self.operation_embedding = OperationEmbedding(
+            num_operations=num_operations,
+            embedding_dim=config.operation_embedding_dim,
+        )
+        if self.config.arch == TraceVAEArch.DEFAULT:
+            self.struct_vae = TraceStructVAE(config.struct, self.operation_embedding)
+            if self.config.use_latency:
+                self.latency_vae = TraceLatencyVAE(
+                    config.latency,
+                    config.struct.z_dim,
+                    self.operation_embedding,
+                )
+        else:
+            raise ValueError(f'Unsupported arch: {self.config.arch!r}')
+
+    def _is_attr_included_in_repr(self, attr: str, value: Any) -> bool:
+        if attr == 'config':
+            return False
+        return super()._is_attr_included_in_repr(attr, value)
+
+    def _call_graph_batch_build(self, G: TraceGraphBatch):
+        G.build_dgl(
+            add_self_loop=True,
+            directed=False,
+            # directed=('reverse' if self.config.edge.reverse_directed else False),
+        )
+
+    def q(self,
+          G: TraceGraphBatch,
+          observed: Optional[Mapping[str, TensorOrData]] = None,
+          n_z: Optional[int] = None,
+          no_latency: bool = False,
+          ):
+        config = self.config
+
+        self._call_graph_batch_build(G)
+        net = tk.BayesianNet(observed=observed)
+
+        self.struct_vae.q(net, G.dgl_batch, n_z=n_z)
+        if config.use_latency and not no_latency:
+            self.latency_vae.q(net, G.dgl_batch, n_z=n_z)
+
+        return net
+
+    def p(self,
+          observed: Optional[Mapping[str, TensorOrData]] = None,
+          G: Optional[TraceGraphBatch] = None,  # the observed `G`
+          n_z: Optional[int] = None,
+          no_latency: bool = False,
+          use_biased: bool = False,
+          use_latency_biased: bool = False,
+          latency_logstd_min: Optional[float] = None,
+          latency_log_prob_weight: bool = False,
+          std_limit: Optional[T.Tensor] = None,
+          ) -> tk.BayesianNet:
+        config = self.config
+
+        # populate `observed` from `G` if specified, and construct net
+        if G is not None:
+            self._call_graph_batch_build(G)
+            g = G.dgl_batch
+            observed = observed or {}
+
+            # struct
+            observed['node_count'] = G.dgl_batch.batch_num_nodes()
+            observed['adj'] = T.stack(
+                [
+                    dense_triu_adj(
+                        g,
+                        MAX_NODE_COUNT,
+                        reverse=False,
+                    )
+                    for g in G.dgl_graphs
+                ],
+                axis=0
+            )
+            # observed['span_count'] = pad_node_feature(G, 'span_count')
+            observed['node_type'] = pad_node_feature(G, 'node_type')
+
+            # latency
+            latency = pad_node_feature(G, 'latency')[..., :LATENCY_DIM]
+            if config.latency.decoder.clip_latency_to_one_dim:
+                latency = latency[..., :1]
+            observed['latency'] = latency
+        else:
+            g = None
+
+        # the Bayesian net
+        net = tk.BayesianNet(observed=observed)
+
+        # call components
+        self.struct_vae.p(net, g, n_z=n_z, use_biased=use_biased)
+        if config.use_latency and not no_latency:
+            g = net.meta['g']
+            self.latency_vae.p(
+                net,
+                g,
+                n_z=n_z,
+                use_biased=use_biased and use_latency_biased,
+                latency_logstd_min=latency_logstd_min,
+                latency_log_prob_weight=latency_log_prob_weight,
+                std_limit=std_limit,
+            )
+
+        return net
diff --git a/tracegnn/models/trace_vae/tensor_utils.py b/tracegnn/models/trace_vae/tensor_utils.py
new file mode 100644
index 0000000..818403e
--- /dev/null
+++ b/tracegnn/models/trace_vae/tensor_utils.py
@@ -0,0 +1,225 @@
+from typing import *
+
+import dgl
+import numpy as np
+import torch
+from tensorkit import tensor as T
+
+from tracegnn.models.trace_vae.constants import *
+from tracegnn.models.trace_vae.types import *
+from tracegnn.utils.array_buffer import ArrayBuffer
+
+__all__ = [
+    'latency_onehot_to_mask',
+    'edge_logits_by_dot_product',
+    'dense_to_triu',
+    'triu_to_dense',
+    'dense_triu_adj',
+    'pad_node_feature',
+    'get_moments',
+    'node_count_mask',
+    'collect_operation_id',
+    'collect_latency_std',
+    'collect_latency_reldiff',
+    'collect_p_node_count',
+    'collect_p_edge',
+]
+
+
+def latency_onehot_to_mask(onehot: T.Tensor) -> T.Tensor:
+    """
+    >>> onehot = T.as_tensor([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
+    >>> T.to_numpy(latency_onehot_to_mask(onehot))
+    array([[1, 0, 0],
+           [1, 1, 0],
+           [1, 1, 1]])
+    >>> T.to_numpy(latency_onehot_to_mask(T.cast(onehot, dtype=T.float32)))
+    array([[1., 0., 0.],
+           [1., 1., 0.],
+           [1., 1., 1.]], dtype=float32)
+    """
+    origin_dtype = T.get_dtype(onehot)
+    onehot = T.as_tensor(onehot, dtype=T.boolean)
+    shape = T.shape(onehot)
+    right = shape[-1] - 1
+    mask = T.full(shape, False, dtype=T.boolean)
+    mask[..., right] = onehot[..., right]
+    while right > 0:
+        old_right = right
+        right -= 1
+        mask[..., right] = T.logical_or(mask[..., old_right], onehot[..., right])
+    return T.cast(mask, dtype=origin_dtype)
+
+
+def edge_logits_by_dot_product(h: T.Tensor) -> T.Tensor:
+    left = h
+    right = T.swap_axes(h, -1, -2)
+    return T.matmul(left, right)
+
+
+def triu_mask(node_count: int) -> T.Tensor:
+    return torch.triu(T.full([node_count, node_count], True, T.boolean), 1)
+
+
+def dense_to_triu(x: T.Tensor, node_count: int) -> T.Tensor:
+    mask = triu_mask(node_count)
+    shape = T.shape(x)
+    return T.reshape(x, shape[:-2] + [-1])[..., mask.reshape(-1)]
+
+
+def triu_to_dense(x: T.Tensor,
+                  node_count: int,
+                  pad_value: Union[int, float] = 0) -> T.Tensor:
+    mask = triu_mask(node_count).reshape(-1)
+    ret = T.full([node_count * node_count], pad_value, dtype=T.get_dtype(x))
+    ret[mask] = x
+    return T.reshape(ret, [node_count, node_count])
+
+
+def dense_triu_adj(g: dgl.DGLGraph, node_count: int, reverse: bool = False) -> T.Tensor:
+    adj = T.zeros([node_count, node_count], dtype=T.float32)
+    u, v = g.edges()
+    if reverse:
+        v, u = u, v
+    adj[u, v] = 1
+    # adj = to_dense_adj(
+    #     T.stack([u, v], axis=0),
+    #     max_num_nodes=node_count
+    # )
+    return dense_to_triu(adj, node_count)
+
+
+def pad_node_feature(G: TraceGraphBatch,
+                     feature_name: str,
+                     max_node_count: int = MAX_NODE_COUNT):
+    # inspect graph count
+    graph_count = len(G.dgl_graphs)
+
+    # inspect features
+    vec = G.dgl_batch.ndata[feature_name]
+    value_shape = T.shape(vec)[1:]
+    dtype = T.get_dtype(vec)
+    device = T.get_device(vec)
+
+    # todo: whether or not it's better to use concat instead of copying into a new tensor?
+    with T.no_grad():
+        ret = T.zeros(
+            [graph_count, max_node_count] + value_shape,
+            dtype=dtype,
+            device=device,
+        )
+        for i in range(graph_count):
+            vec = G.dgl_graphs[i].ndata[feature_name]
+            ret[i, :T.shape(vec)[0]] = vec
+    return ret
+
+
+def get_moments(x,
+                axis: Optional[List[int]] = None,
+                clip_var: bool = False,
+                ) -> Tuple[T.Tensor, T.Tensor]:
+    mean = T.reduce_mean(x, axis=axis)
+    var = T.reduce_mean(x ** 2, axis=axis) - mean ** 2
+    if clip_var:
+        var = T.maximum(var, dtype=T.get_dtype(var))
+    return mean, var
+
+
+def node_count_mask(node_count,
+                    max_node_count: int,
+                    dtype: Optional[str] = None) -> T.Tensor:
+    h = T.arange(0, max_node_count, dtype=T.get_dtype(node_count))
+    node_count = T.expand_dim(node_count, axis=-1)
+    h = h < node_count
+    if dtype is not None:
+        h = T.cast(h, dtype)
+    return h
+
+
+def collect_operation_id(buf, chain, mask=None):
+    if 'node_type' in chain.p:
+        node_count = T.to_numpy(chain.p['node_count'].tensor)
+        node_type = chain.p['node_type'].tensor
+        if len(T.shape(node_type)) == 3:
+            node_type = node_type[0, ...]
+        node_type = T.to_numpy(node_type)
+        if mask is None:
+            for i, k in enumerate(node_count):
+                buf.extend(node_type[i, :k])
+        else:
+            for i, (k, m) in enumerate(zip(node_count, mask)):
+                if m:
+                    buf.extend(node_type[i, :k])
+
+
+def collect_latency_std(buf, chain, mask=None):
+    if 'latency' in chain.p:
+        node_count = T.to_numpy(chain.p['node_count'].tensor)
+        latency_std = chain.p['latency'].distribution.base_distribution.std
+        if len(T.shape(latency_std)) == 4:
+            latency_std = latency_std[0, ...]
+        latency_std = T.to_numpy(latency_std)
+
+        if mask is None:
+            for i, k in enumerate(node_count):
+                buf.extend(latency_std[i, :k, 0])
+        else:
+            for i, (k, m) in enumerate(zip(node_count, mask)):
+                if m:
+                    buf.extend(latency_std[i, :k, 0])
+
+def collect_p_node_count(buf, chain, mask=None):
+    node_count = chain.p['node_count'].distribution.probs[0]
+    truth_node_count = chain.p['node_count'].tensor.unsqueeze(1)
+    
+    node_count_p = torch.gather(node_count, 1, truth_node_count).squeeze(-1)
+
+    if mask is None:
+        buf.extend(T.to_numpy(node_count_p))
+    else:
+        buf.extend(T.to_numpy(node_count_p)[mask])
+
+def collect_p_edge(buf: ArrayBuffer, chain, mask=None):
+    # prob = np.exp(T.to_numpy(chain.p.log_prob('adj'))[0])
+    node_count = T.to_numpy(chain.p['node_count'].tensor)
+    p_edge = chain.p['adj'].distribution.probs[0]
+    truth_p_edge = chain.p['adj'].tensor
+    
+    if mask is None:
+        for i in range(p_edge.shape[0]):
+            cur_p_edge = T.to_numpy(triu_to_dense(p_edge[i], MAX_NODE_COUNT))[:node_count[i], :node_count[i]]
+            cur_truth = T.to_numpy(triu_to_dense(truth_p_edge[i], MAX_NODE_COUNT))[:node_count[i], :node_count[i]]
+            buf.extend(np.abs((1.0 - cur_truth) - cur_p_edge).reshape(-1))
+    else:
+        for i, m in enumerate(mask):
+            if m:
+                cur_p_edge = T.to_numpy(triu_to_dense(p_edge[i], MAX_NODE_COUNT))[:node_count[i], :node_count[i]]
+                cur_truth = T.to_numpy(triu_to_dense(truth_p_edge[i], MAX_NODE_COUNT))[:node_count[i], :node_count[i]]
+                buf.extend(np.abs((1.0 - cur_truth) - cur_p_edge).reshape(-1))
+
+def collect_latency_reldiff(buf, chain, mask=None, abs=True):
+    def collect_dist_val(attr=None):
+        if attr is None:
+            v = chain.p['latency'].tensor
+        else:
+            v = getattr(chain.p['latency'].distribution.base_distribution, attr)
+        if len(T.shape(v)) == 4:
+            v = v[0, ...]
+        return T.to_numpy(v[..., 0])
+
+    if 'latency' in chain.p:
+        node_count = T.to_numpy(chain.p['node_count'].tensor)
+        latency = collect_dist_val()
+        latency_mean = collect_dist_val('mean')
+        latency_std = collect_dist_val('std')
+        rel_diff = (latency - latency_mean) / np.maximum(latency_std, 1e-7)
+        if abs:
+            rel_diff = np.abs(rel_diff)
+
+        if mask is None:
+            for i, k in enumerate(node_count):
+                buf.extend(rel_diff[i, :k])
+        else:
+            for i, (k, m) in enumerate(zip(node_count, mask)):
+                if m:
+                    buf.extend(rel_diff[i, :k])
diff --git a/tracegnn/models/trace_vae/test.py b/tracegnn/models/trace_vae/test.py
new file mode 100644
index 0000000..7930575
--- /dev/null
+++ b/tracegnn/models/trace_vae/test.py
@@ -0,0 +1,275 @@
+import pickle
+from pprint import pprint
+from tempfile import TemporaryDirectory
+
+import os
+import mltk
+import click
+import tensorkit as tk
+import numpy as np
+from tensorkit import tensor as T
+from tensorkit.examples.utils import print_experiment_summary
+
+from tracegnn.data import *
+from tracegnn.models.trace_vae.dataset import TraceGraphDataStream
+from tracegnn.models.trace_vae.evaluation import *
+from tracegnn.models.trace_vae.graph_utils import *
+from tracegnn.models.trace_vae.test_utils import *
+from tracegnn.models.trace_vae.types import TraceGraphBatch
+from tracegnn.utils import *
+
+
+@click.group()
+def main():
+    pass
+
+
+@main.command(context_settings=dict(
+    ignore_unknown_options=True,
+    help_option_names=[],
+))
+@click.option('-D', '--data-dir', required=False)
+@click.option('-M', '--model-path', required=True)
+@click.option('-o', '--nll-out', required=False, default=None)
+@click.option('--proba-out', default=None, required=False)
+@click.option('--auc-out', default=None, required=False)
+@click.option('--latency-out', default=None, required=False)
+@click.option('--gui', is_flag=True, default=False, required=False)
+@click.option('--device', required=False, default=None)
+@click.option('--n_z', type=int, required=False, default=10)
+@click.option('--batch-size', type=int, default=128)
+@click.option('--clip-nll', type=float, default=100_000)
+@click.option('--no-biased', is_flag=True, default=False, required=False)
+@click.option('--no-latency-biased', is_flag=True, default=False, required=False)
+@click.option('--no-latency', is_flag=True, default=False, required=False)
+@click.option('--use-train-val', is_flag=True, default=False, required=False)
+@click.option('--infer-bias-std', is_flag=True, default=False, required=False)
+@click.option('--bias-std-normal-p', type=float, default=0.995, required=False)
+@click.option('--infer-threshold', is_flag=True, default=False, required=False)
+@click.option('--threshold-p', type=float, default=0.995, required=False)
+@click.option('--threshold-amplify', type=float, default=1.0, required=False)
+@click.option('--no-latency-log-prob-weight', is_flag=True, default=False, required=False)
+@click.option('--use-std-limit', is_flag=True, default=False, required=False)
+@click.option('--std-limit-global', is_flag=True, default=False, required=False)
+@click.option('--std-limit-fixed', type=float, default=None, required=False)
+@click.option('--std-limit-p', type=float, default=0.99, required=False)
+@click.option('--std-limit-amplify', type=float, default=1.0, required=False)
+@click.argument('extra_args', nargs=-1, type=click.UNPROCESSED)
+def evaluate_nll(data_dir, model_path, nll_out, proba_out, auc_out, latency_out, gui, device,
+                 n_z, batch_size, clip_nll, no_biased, no_latency_biased, no_latency,
+                 use_train_val, infer_bias_std, bias_std_normal_p, infer_threshold,
+                 threshold_p, threshold_amplify, no_latency_log_prob_weight,
+                 use_std_limit, std_limit_global, std_limit_fixed, std_limit_p, std_limit_amplify,
+                 extra_args):
+    N_LIMIT = None
+
+    if infer_bias_std or infer_threshold or use_std_limit:
+        use_train_val = True
+
+    with mltk.Experiment(mltk.Config, args=[]) as exp:
+        # check parameters
+        if gui:
+            proba_out = ':show:'
+            auc_out = ':show:'
+            latency_out = ':show:'
+
+        with T.use_device(device or T.first_gpu_device()):
+            # load the config
+            train_config = load_config(
+                model_path=model_path,
+                strict=False,
+                extra_args=extra_args,
+            )
+            if data_dir is None:
+                data_dir = train_config.dataset.root_dir
+
+            # load the dataset
+            data_names = ['test', 'test-drop', 'test-latency']
+            test_db, id_manager = open_trace_graph_db(
+                data_dir,
+                names=data_names
+            )
+            print('Test DB:', test_db)
+            latency_range = TraceGraphLatencyRangeFile(
+                id_manager.root_dir,
+                require_exists=True,
+            )
+            test_stream = TraceGraphDataStream(
+                test_db, id_manager=id_manager, batch_size=batch_size,
+                shuffle=False, skip_incomplete=False, data_count=N_LIMIT,
+            )
+
+            # also load train / val
+            if use_train_val:
+                train_db, _ = open_trace_graph_db(
+                    data_dir,
+                    names=['train'],
+                )
+                print('Train DB:', train_db)
+                val_db, _ = open_trace_graph_db(
+                    data_dir,
+                    names=['val']
+                )
+                print('Val DB:', val_db)
+                train_stream = TraceGraphDataStream(
+                    train_db, id_manager=id_manager, batch_size=batch_size,
+                    shuffle=True, skip_incomplete=False, data_count=N_LIMIT,
+                )
+                val_stream = TraceGraphDataStream(
+                    val_db, id_manager=id_manager, batch_size=batch_size,
+                    shuffle=True, skip_incomplete=False, data_count=N_LIMIT,
+                )
+            else:
+                train_stream = val_stream = None
+
+            print_experiment_summary(exp, train_stream, val_stream, test_stream)
+
+            # load the model
+            vae = load_model2(
+                model_path=model_path,
+                train_config=train_config,
+                id_manager=id_manager,
+            )
+            mltk.print_config(vae.config, title='Model Config')
+            vae = vae.to(T.current_device())
+
+            # do evaluation
+            operation_id = {}
+            latency_std = {}
+            latency_reldiff = {}
+            p_node_count = {}
+            p_edge = {}
+            nll_result = {}
+            thresholds = {}
+            std_group_limit = np.full([id_manager.num_operations], np.nan, dtype=np.float32)
+
+            def F(stream, category, n_z, threshold=None, std_limit=None):
+                # the save files kw
+                kw = dict(
+                    nll_output_file=ensure_parent_exists(nll_out),
+                    proba_cdf_file=ensure_parent_exists(proba_out),
+                    auc_curve_file=ensure_parent_exists(auc_out),
+                    latency_hist_file=ensure_parent_exists(latency_out),
+                )
+                differ_set = set()
+
+                for k in kw:
+                    if kw[k] is not None:
+                        s = kw[k].replace('test', category)
+                        if category == 'test' or s != kw[k]:
+                            differ_set.add(k)
+                        kw[k] = s
+                kw = {k: v for k, v in kw.items() if k in differ_set}
+
+                # the output temp dir
+                with TemporaryDirectory() as temp_dir:
+                    if 'nll_output_file' not in kw:
+                        kw['nll_output_file'] = ensure_parent_exists(
+                            os.path.join(temp_dir, 'nll.npz')
+                        )
+
+                    # do evaluation
+                    result_dict = do_evaluate_nll(
+                        test_stream=stream,
+                        vae=vae,
+                        id_manager=id_manager,
+                        latency_range=latency_range,
+                        n_z=n_z,
+                        use_biased=(not no_biased) and (category == 'test'),
+                        use_latency_biased=not no_latency_biased,
+                        no_latency=no_latency,
+                        no_struct=False,
+                        latency_log_prob_weight=not no_latency_log_prob_weight,
+                        std_limit=std_limit,
+                        test_threshold=threshold,
+                        clip_nll=clip_nll,
+                        use_embeddings=False,
+                        operation_id_dict_out=operation_id,
+                        latency_std_dict_out=latency_std,
+                        p_node_count_dict_out=p_node_count,
+                        p_edge_dict_out=p_edge,
+                        latency_reldiff_dict_out=latency_reldiff,
+                        latency_dict_prefix=f'{category}_',
+                        **kw,
+                    )
+                    result_dict = {f'{category}_{k}': v for k, v in result_dict.items()}
+                    exp.doc.update({'result': result_dict}, immediately=True)
+                    pprint(result_dict)
+
+                    # load the NLLs if category in ('train', 'val')
+                    if category in ('train', 'val'):
+                        nll_result[category] = np.load(kw['nll_output_file'])['nll_list']
+
+            tk.layers.set_eval_mode(vae)
+            with T.no_grad():
+                if use_train_val:
+                    F(train_stream, 'train', 1)
+                    F(val_stream, 'val', 1)
+
+                    if infer_bias_std:
+                        bias_std = np.percentile(latency_reldiff['val_normal'].array, bias_std_normal_p * 100)
+                        exp.doc.update({'result': {'bias_std': bias_std}}, immediately=True)
+                        print(f'Set bias_std = {bias_std:.3f}, bias_std_normal_p = {bias_std_normal_p:.3f}')
+                        vae.config.latency.decoder.biased_normal_std_threshold = bias_std
+
+                    if infer_threshold:
+                        for category in ('train', 'val'):
+                            th_cand = []
+                            for _ in range(10):
+                                nll_subset = nll_result[category]
+                                nll_subset = np.random.choice(nll_subset, replace=True, size=len(nll_subset))
+                                if clip_nll:
+                                    nll_subset = nll_subset[nll_subset < clip_nll - 1e-7]
+                                else:
+                                    nll_subset = nll_subset[np.isfinite(nll_subset)]
+                                th = np.percentile(nll_subset, threshold_p * 100) * threshold_amplify
+                                th_cand.append(th)
+                            thresholds[f'{category}_threshold'] = th = np.median(th_cand)
+                            print(
+                                f'Set {category}_threshold = {th:.3f}, '
+                                f'threshold_p = {threshold_p:.3f}, '
+                                f'threshold_amplify = {threshold_amplify:.3f}'
+                            )
+                        exp.doc.update({'result': thresholds}, immediately=True)
+
+                    if use_std_limit:
+                        if std_limit_fixed is not None:
+                            print(f'Std limit fixed: {std_limit_fixed:.4f}')
+                            std_group_limit[:] = std_limit_fixed
+                        elif std_limit_global:
+                            key = 'val_normal'
+                            std_limit = float(np.percentile(
+                                latency_std[key].array,
+                                std_limit_p * 100
+                            ))
+                            print(f'Std limit: {std_limit:.4f}')
+                            std_group_limit[:] = std_limit
+                        else:
+                            key = 'val_normal'
+                            v1 = operation_id[key].array
+                            v2 = latency_std[key].array
+                            max_limit = 0
+
+                            for srv_id in range(id_manager.num_operations):
+                                v = v2[v1 == srv_id]
+                                if len(v) > 0:
+                                    srv_limit = (
+                                        std_limit_amplify *
+                                        float(np.percentile(v, std_limit_p * 100))
+                                    )
+                                    std_group_limit[srv_id] = srv_limit
+                                    max_limit = max(max_limit, srv_limit)
+
+                            for srv_id in range(id_manager.num_operations):
+                                if np.isnan(std_group_limit[srv_id]):
+                                    std_group_limit[srv_id] = max_limit
+                            pprint({i: v for i, v in enumerate(std_group_limit)})
+
+                    else:
+                        std_group_limit = None
+
+                F(test_stream, 'test', n_z, thresholds.get('val_threshold'), std_limit=std_group_limit)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/tracegnn/models/trace_vae/test_utils.py b/tracegnn/models/trace_vae/test_utils.py
new file mode 100644
index 0000000..ef373eb
--- /dev/null
+++ b/tracegnn/models/trace_vae/test_utils.py
@@ -0,0 +1,106 @@
+from urllib.error import HTTPError
+from typing import *
+
+import mltk
+import tensorkit as tk
+import torch
+import yaml
+from tensorkit import tensor as T
+
+from tracegnn.models.trace_vae.model import TraceVAE
+from tracegnn.models.trace_vae.train import ExpConfig as TrainConfig
+from tracegnn.data import *
+from tracegnn.utils import *
+
+__all__ = [
+    'load_config',
+    'load_model',
+    'load_model2',
+]
+
+
+def _model_and_config_file(model_path: str) -> Tuple[str, str]:
+    # get model file and config file path
+    if model_path.endswith('.pt'):
+        model_file = model_path
+        config_file = model_path.rsplit('/', 2)[-3] + '/config.json'
+    else:
+        if not model_path.endswith('/'):
+            model_path += '/'
+        model_file = model_path + 'models/final.pt'
+        config_file = model_path + 'config.json'
+
+    return model_file, config_file
+
+
+def load_config(model_path: str, strict: bool, extra_args) -> TrainConfig:
+    # get model file and config file path
+    model_file, config_file = _model_and_config_file(model_path)
+
+    # load config
+    with as_local_file(config_file) as config_file:
+        config_loader = mltk.ConfigLoader(TrainConfig)
+        config_loader.load_file(config_file)
+
+    # also patch the config
+    if extra_args:
+        extra_args_dict = {}
+        for arg in extra_args:
+            if arg.startswith('--'):
+                arg = arg[2:]
+                if '=' not in arg:
+                    val = True
+                else:
+                    arg, val = arg.split('=', 1)
+                    val = yaml.safe_load(val)
+                extra_args_dict[arg] = val
+            else:
+                raise ValueError(f'Unsupported argument: {arg!r}')
+        config_loader.load_object(extra_args_dict)
+
+    # get the config
+    if strict:
+        discard_undefined = mltk.type_check.DiscardMode.NO
+    else:
+        discard_undefined = mltk.type_check.DiscardMode.WARN
+    return config_loader.get(discard_undefined=discard_undefined)
+
+
+def load_model(model_path: str,
+               id_manager: TraceGraphIDManager,
+               strict: bool,
+               extra_args,
+               ) -> Tuple[TraceVAE, TrainConfig]:
+    # load config
+    train_config = load_config(model_path, strict, extra_args)
+
+    # load model
+    vae = load_model2(model_path, train_config, id_manager)
+    return vae, train_config
+
+
+def load_model2(model_path: str,
+                train_config: TrainConfig,
+                id_manager: TraceGraphIDManager,
+                ) -> TraceVAE:
+    # get model file and config file path
+    model_file, config_file = _model_and_config_file(model_path)
+
+    # load the model
+    vae = TraceVAE(train_config.model, id_manager.num_operations)
+    try:
+        with as_local_file(model_file) as model_file:
+            vae.load_state_dict(torch.load(
+                model_file,
+                map_location=T.current_device()
+            ))
+    except HTTPError as ex:
+        if ex.code != 404:
+            raise
+        with as_local_file(model_file) as model_file:
+            vae.load_state_dict(torch.load(
+                model_file,
+                map_location=T.current_device()
+            ))
+    tk.init.set_initialized(vae)
+    return vae
diff --git a/tracegnn/models/trace_vae/train.py b/tracegnn/models/trace_vae/train.py
new file mode 100644
index 0000000..61053ce
--- /dev/null
+++ b/tracegnn/models/trace_vae/train.py
@@ -0,0 +1,531 @@
+import json
+import math
+import random
+import shutil
+import traceback
+from enum import Enum
+from functools import wraps
+from typing import *
+
+import os
+import sys
+import mltk
+import tensorkit as tk
+import numpy as np
+import torch
+import click
+from tensorkit import tensor as T
+from tensorkit.examples import utils
+from tensorkit.train import Checkpoint
+
+from tracegnn.data import *
+from tracegnn.models.trace_vae.evaluation import *
+from tracegnn.models.trace_vae.graph_utils import *
+from tracegnn.models.trace_vae.tensor_utils import *
+from tracegnn.models.trace_vae.types import *
+from tracegnn.models.trace_vae.model import *
+from tracegnn.models.trace_vae.dataset import *
+from tracegnn.utils import *
+
+
+class NANLossError(Exception):
+
+    def __init__(self, epoch):
+        super().__init__(epoch)
+
+    @property
+    def epoch(self) -> Optional[int]:
+        return self.args[0]
+
+    def __str__(self):
+        return f'NaN loss encountered at epoch {self.epoch}'
+
+
+class OptimizerType(str, Enum):
+    ADAM = 'adam'
+    RMSPROP = 'rmsprop'
+
+
+class ExpConfig(mltk.Config):
+    model: TraceVAEConfig = TraceVAEConfig()
+    device: Optional[str] = 'cpu'
+    seed: Optional[int] = 0
+
+    class train(mltk.Config):
+        max_epoch: int = 60
+        struct_pretrain_epochs: Optional[int] = 40  # number of epochs to pre-train the struct_vae
+        ckpt_epoch_freq: Optional[int] = 5
+        test_epoch_freq: Optional[int] = 5
+        latency_hist_epoch_freq: Optional[int] = 10
+        latency_std_hist_epoch_freq: Optional[int] = 5
+
+        use_early_stopping: bool = False
+        val_epoch_freq: Optional[int] = 2
+
+        kl_beta: float = 1.0
+        warm_up_epochs: Optional[int] = None  # number of epochs to warm-up the prior (KLD)
+
+        l2_reg: float = 0.0001
+        z_unit_ball_reg: Optional[float] = None
+        z2_unit_ball_reg: Optional[float] = None
+
+        init_batch_size: int = 64
+        batch_size: int = 64
+        val_batch_size: int = 64
+
+        optimizer: OptimizerType = OptimizerType.RMSPROP
+        initial_lr: float = 0.001
+        lr_anneal_ratio: float = 0.1
+        lr_anneal_epochs: int = 30
+        clip_norm: Optional[float] = None
+        global_clip_norm: Optional[float] = 10  # important for numerical stability
+
+        test_n_z: int = 10
+        num_plot_samples: int = 20
+
+    class test(mltk.Config):
+        batch_size: int = 64
+        eval_n_z: int = 10
+        use_biased: bool = True
+        latency_log_prob_weight: bool = True
+        clip_nll: Optional[float] = 100_000
+
+    class report(mltk.Config):
+        html_ext: str = '.html.gz'
+
+    class dataset(mltk.Config):
+        root_dir: str = os.path.abspath('./data/processed')
+
+
+def main(exp: mltk.Experiment[ExpConfig]):
+    # config
+    config = exp.config
+
+    # set random seed to encourage reproducibility (does it really work?)
+    if config.seed is not None:
+        T.random.set_deterministic(True)
+        T.random.seed(config.seed)
+        np.random.seed(config.seed)
+        random.seed(config.seed)
+
+    # Load data
+    id_manager = TraceGraphIDManager(os.path.join(config.dataset.root_dir, 'id_manager'))
+    latency_range = TraceGraphLatencyRangeFile(os.path.join(config.dataset.root_dir, 'id_manager'))
+
+    train_db = TraceGraphDB(BytesSqliteDB(os.path.join(config.dataset.root_dir, 'processed', 'train')))
+    val_db = TraceGraphDB(BytesSqliteDB(os.path.join(config.dataset.root_dir, 'processed', 'val')))
+    test_db = TraceGraphDB(
+        BytesMultiDB(
+            BytesSqliteDB(os.path.join(config.dataset.root_dir, 'processed', 'test')),
+            BytesSqliteDB(os.path.join(config.dataset.root_dir, 'processed', 'test-drop')),
+            BytesSqliteDB(os.path.join(config.dataset.root_dir, 'processed', 'test-latency')),
+        )
+    )
+    train_stream = TraceGraphDataStream(
+        train_db, id_manager=id_manager, batch_size=config.train.batch_size,
+        shuffle=True, skip_incomplete=False,
+    )
+    val_stream = TraceGraphDataStream(
+        val_db, id_manager=id_manager, batch_size=config.train.val_batch_size,
+        shuffle=False, skip_incomplete=False,
+    )
+    test_stream = TraceGraphDataStream(
+        test_db, id_manager=id_manager, batch_size=config.test.batch_size,
+        shuffle=False, skip_incomplete=False,
+    )
+
+    utils.print_experiment_summary(
+        exp,
+        train_data=train_stream,
+        val_data=val_stream,
+        test_data=test_stream
+    )
+    print('Train Data:', train_db)
+    print('Val Data:', val_db)
+    print('Test Data:', test_db)
+
+    # build the network
+    vae: TraceVAE = TraceVAE(
+        config.model,
+        id_manager.num_operations,
+    )
+    vae = vae.to(T.current_device())
+    params, param_names = utils.get_params_and_names(vae)
+    utils.print_parameters_summary(params, param_names)
+    print('')
+    mltk.print_with_time('Network constructed.')
+
+    # define the training method for a certain model part
+    def train_part(params, start_epoch, max_epoch, latency_only, do_final_eval):
+        # util to ensure all installed hooks will only run within this context
+        in_context = [True]
+
+        def F(func):
+            @wraps(func)
+            def wrapper(*args, **kwargs):
+                if in_context[0]:
+                    return func(*args, **kwargs)
+            return wrapper
+
+        # the train procedure
+        try:
+            # buffer to collect stds of each p(latency|z)
+            latency_std = {}
+            for key in ('train', 'val', 'test_normal', 'test_drop', 'test_latency'):
+                latency_std[key] = ArrayBuffer(81920)
+
+            def should_collect_latency_std():
+                return (
+                    config.train.latency_std_hist_epoch_freq and
+                    loop.epoch % config.train.latency_std_hist_epoch_freq == 0
+                )
+
+            def clear_std_buf():
+                for buf in latency_std.values():
+                    buf.clear()
+
+            # the initialization function
+            def initialize():
+                G = TraceGraphBatch(
+                    id_manager=id_manager,
+                    latency_range=latency_range,
+                    trace_graphs=train_db.sample_n(config.train.init_batch_size),
+                )
+                chain = vae.q(G).chain(
+                    vae.p,
+                    G=G,
+                )
+                loss = chain.vi.training.sgvb(reduction='mean')
+                mltk.print_with_time(f'Network initialized: loss = {T.to_numpy(loss)}')
+
+            # the train functions
+            def on_train_epoch_begin():
+                # set train mode
+                if latency_only:
+                    tk.layers.set_eval_mode(vae)
+                    tk.layers.set_train_mode(vae.latency_vae)
+                else:
+                    tk.layers.set_train_mode(vae)
+
+                # clear std buffer
+                clear_std_buf()
+
+            def train_step(trace_graphs):
+                G = TraceGraphBatch(
+                    id_manager=id_manager,
+                    latency_range=latency_range,
+                    trace_graphs=trace_graphs,
+                )
+                chain = vae.q(G).chain(
+                    vae.p,
+                    G=G,
+                )
+
+                # collect the latency std
+                if should_collect_latency_std():
+                    collect_latency_std(latency_std['train'], chain)
+
+                # collect the log likelihoods
+                p_obs = []
+                p_latent = []
+                q_latent = []
+                for name in chain.p:
+                    if name in chain.q:
+                        q_latent.append(chain.q[name].log_prob())
+                        p_latent.append(chain.p[name].log_prob())
+                    else:
+                        # print(name, chain.p[name].log_prob().mean())
+                        p_obs.append(chain.p[name].log_prob())
+
+                # get E[log p(x|z)] and KLD[q(z|x)||p(z)]
+                recons = T.reduce_mean(T.add_n(p_obs))
+                kl = T.reduce_mean(T.add_n(q_latent) - T.add_n(p_latent))
+
+                # KL beta
+                beta = config.train.kl_beta
+                if config.train.warm_up_epochs and loop.epoch < config.train.warm_up_epochs:
+                    beta = beta * (loop.epoch / config.train.warm_up_epochs)
+                loss = beta * kl - recons
+
+                # l2 regularization
+                if config.train.l2_reg:
+                    l2_params = []
+                    for p, n in zip(params, param_names):
+                        if 'bias' not in n:
+                            l2_params.append(p)
+                    loss = loss + config.train.l2_reg * T.nn.l2_regularization(l2_params)
+
+                # unit ball regularization
+                def add_unit_ball_reg(l, t, reg):
+                    if reg is not None:
+                        ball_mean, ball_var = get_moments(t, axis=[-1])
+                        l = l + reg * (
+                            T.reduce_mean(ball_mean ** 2) +
+                            T.reduce_mean((ball_var - 1) ** 2)
+                        )
+                    return l
+
+                loss = add_unit_ball_reg(loss, chain.q['z'].tensor, config.train.z_unit_ball_reg)
+                if 'z2' in chain.q:
+                    loss = add_unit_ball_reg(loss, chain.q['z2'].tensor, config.train.z2_unit_ball_reg)
+
+                # check and return the metrics
+                loss_val = T.to_numpy(loss)
+                if math.isnan(loss_val):
+                    raise NANLossError(loop.epoch)
+
+                return {'loss': loss, 'recons': recons, 'kl': kl}
+
+            # the validation function
+            def validate():
+                tk.layers.set_eval_mode(vae)
+
+                def val_step(trace_graphs):
+                    with T.no_grad():
+                        G = TraceGraphBatch(
+                            id_manager=id_manager,
+                            latency_range=latency_range,
+                            trace_graphs=trace_graphs,
+                        )
+                        chain = vae.q(G).chain(
+                            vae.p,
+                            G=G,
+                        )
+                        # collect the latency std
+                        if should_collect_latency_std():
+                            collect_latency_std(latency_std['val'], chain)
+                        loss = chain.vi.training.sgvb()
+                        return {'loss': T.to_numpy(T.reduce_mean(loss))}
+
+                val_loop = loop.validation()
+                result_dict = val_loop.run(val_step, val_stream)
+                result_dict = {
+                    f'val_{k}': v
+                    for k, v in result_dict.items()
+                }
+                summary_cb.update_metrics(result_dict)
+
+            # the evaluation function
+            def evaluate(n_z, eval_loop, eval_stream, epoch, use_embeddings=False,
+                         plot_latency_hist=False):
+                # latency_hist_file
+                latency_hist_file = None
+                if plot_latency_hist:
+                    latency_hist_file = exp.make_parent(f'./plotting/latency-sample/{epoch}.jpg')
+
+                # do evaluation
+                tk.layers.set_eval_mode(vae)
+                with T.no_grad():
+                    kw = {}
+                    if should_collect_latency_std():
+                        kw['latency_std_dict_out'] = latency_std
+                        kw['latency_dict_prefix'] = 'test_'
+                    result_dict = do_evaluate_nll(
+                        test_stream=eval_stream,
+                        vae=vae,
+                        id_manager=id_manager,
+                        latency_range=latency_range,
+                        n_z=n_z,
+                        use_biased=config.test.use_biased,
+                        latency_log_prob_weight=config.test.latency_log_prob_weight,
+                        test_loop=eval_loop,
+                        summary_writer=summary_cb,
+                        clip_nll=config.test.clip_nll,
+                        use_embeddings=use_embeddings,
+                        latency_hist_file=latency_hist_file,
+                        **kw,
+                    )
+
+                with open(exp.make_parent(f'./result/test-anomaly/{epoch}.json'), 'w', encoding='utf-8') as f:
+                    f.write(json.dumps(result_dict))
+                eval_loop.add_metrics(**result_dict)
+
+            def save_model(epoch=None):
+                epoch = epoch or loop.epoch
+                torch.save(vae.state_dict(), exp.make_parent(f'models/{epoch}.pt'))
+
+            # final evaluation
+            if do_final_eval:
+                tk.layers.set_eval_mode(vae)
+
+                # save the final model
+                save_model('final')
+
+                clear_std_buf()
+                evaluate(
+                    n_z=config.test.eval_n_z,
+                    eval_loop=mltk.TestLoop(),
+                    eval_stream=test_stream,
+                    epoch='final',
+                    use_embeddings=True,
+                    plot_latency_hist=True,
+                )
+
+            else:
+                # set train mode at the beginning of each epoch
+                loop.on_epoch_begin.do(F(on_train_epoch_begin))
+
+                # the optimizer and learning rate scheduler
+                if config.train.optimizer == OptimizerType.ADAM:
+                    optimizer = tk.optim.Adam(params)
+                elif config.train.optimizer == OptimizerType.RMSPROP:
+                    optimizer = tk.optim.RMSprop(params)
+
+                def update_lr():
+                    n_cycles = int(
+                        loop.epoch //  # (loop.epoch - start_epoch) //
+                        config.train.lr_anneal_epochs
+                    )
+                    lr_discount = config.train.lr_anneal_ratio ** n_cycles
+                    optimizer.set_lr(config.train.initial_lr * lr_discount)
+
+                update_lr()
+                loop.on_epoch_end.do(F(update_lr))
+
+                # install the validation function and early-stopping
+                if config.train.val_epoch_freq:
+                    loop.run_after_every(
+                        F(validate),
+                        epochs=config.train.val_epoch_freq,
+                    )
+
+                # install the evaluation function during training
+                if config.train.test_epoch_freq:
+                    loop.run_after_every(
+                        F(lambda: evaluate(
+                            n_z=config.train.test_n_z,
+                            eval_loop=loop.test(),
+                            eval_stream=test_stream,
+                            epoch=loop.epoch,
+                            plot_latency_hist=(
+                                config.train.latency_hist_epoch_freq and
+                                loop.epoch % config.train.latency_hist_epoch_freq == 0
+                            )
+                        )),
+                        epochs=config.train.test_epoch_freq,
+                    )
+
+                # install the plot and sample functions during training
+                def after_epoch():
+                    save_model()
+                loop.run_after_every(F(after_epoch), epochs=1)
+
+                # train the model
+                tk.layers.set_eval_mode(vae)
+                on_train_epoch_begin()
+                initialize()
+                utils.fit_model(
+                    loop=loop,
+                    optimizer=optimizer,
+                    fn=train_step,
+                    stream=train_stream,
+                    clip_norm=config.train.clip_norm,
+                    global_clip_norm=config.train.global_clip_norm,
+                    # pass to `loop.run()`
+                    limit=max_epoch,
+                )
+        finally:
+            in_context = [False]
+
+    # the train loop
+    loop = mltk.TrainLoop(max_epoch=config.train.max_epoch)
+
+    # checkpoint
+    ckpt = Checkpoint(vae=vae)
+    loop.add_callback(mltk.callbacks.AutoCheckpoint(
+        ckpt,
+        root_dir=exp.make_dirs('./checkpoint'),
+        epoch_freq=config.train.ckpt_epoch_freq,
+        max_checkpoints_to_keep=10,
+    ))
+
+    # early-stopping
+    if config.train.val_epoch_freq and config.train.use_early_stopping:
+        loop.add_callback(mltk.callbacks.EarlyStopping(
+            checkpoint=ckpt,
+            root_dir=exp.abspath('./early-stopping'),
+            metric_name='val_loss',
+        ))
+
+    # the summary writer
+    summary_cb = SummaryCallback(summary_dir=exp.abspath('./summary'))
+    loop.add_callback(summary_cb)
+
+    # pre-train the struct_vae
+    try:
+        with loop:
+            start_epoch = 1
+            part_params = params
+            latency_only = False
+
+            if (config.model.arch == TraceVAEArch.DEFAULT) and config.train.struct_pretrain_epochs:
+                # train struct_vae first
+                print(f'Start to train vae with {len(part_params)} params ...')
+                train_part(
+                    list(part_params),
+                    start_epoch=start_epoch,
+                    max_epoch=config.train.struct_pretrain_epochs,
+                    latency_only=latency_only,
+                    do_final_eval=False,
+                )
+
+                # train latency_vae next
+                part_params = [
+                    p for n, p in zip(param_names, params)
+                    if n.startswith('latency_vae')
+                ]
+                start_epoch = config.train.struct_pretrain_epochs + 1
+                latency_only = True
+                print(f'Start to train latency_vae with {len(part_params)} params ...')
+
+            train_part(
+                part_params,
+                start_epoch=start_epoch,
+                max_epoch=config.train.max_epoch,
+                latency_only=latency_only,
+                do_final_eval=False,
+            )
+
+        # do final evaluation
+        train_part(
+            [],
+            start_epoch=-1,
+            max_epoch=-1,
+            latency_only=False,
+            do_final_eval=True,
+        )
+
+    except KeyboardInterrupt:
+        print(
+            'Train interrupted, press Ctrl+C again to skip the final test ...',
+            file=sys.stderr,
+        )
+
+
+if __name__ == '__main__':
+    with mltk.Experiment(ExpConfig) as exp:
+        config = exp.config
+        device = config.device or T.first_gpu_device()
+        with T.use_device(device):
+            retrial = 0
+            while True:
+                try:
+                    main(exp)
+                except NANLossError as ex:
+                    if ex.epoch != 1 or retrial >= 10:
+                        raise
+                    retrial += 1
+                    print(
+                        f'\n'
+                        f'Restart the experiment for the {retrial}-th time '
+                        f'due to NaN loss at epoch {ex.epoch}.\n',
+                        file=sys.stderr
+                    )
+                    if ex.epoch == 1:
+                        for name in ['checkpoint', 'early-stopping', 'models',
+                                     'plotting', 'summary']:
+                            path = exp.abspath(name)
+                            if os.path.isdir(name):
+                                shutil.rmtree(path)
+                else:
+                    break
diff --git a/tracegnn/models/trace_vae/types.py b/tracegnn/models/trace_vae/types.py
new file mode 100644
index 0000000..38f471e
--- /dev/null
+++ b/tracegnn/models/trace_vae/types.py
@@ -0,0 +1,74 @@
+from dataclasses import dataclass
+from typing import *
+
+import dgl
+from tensorkit import tensor as T
+
+from tracegnn.data import *
+from tracegnn.utils import *
+
+__all__ = ['TraceGraphBatch']
+
+
+@dataclass(init=False)
+class TraceGraphBatch(object):
+    __slots__ = [
+        'id_manager', 'latency_range',
+        'trace_graphs', 'dgl_graphs', 'dgl_batch'
+    ]
+
+    id_manager: Optional[TraceGraphIDManager]
+    trace_graphs: Optional[List[TraceGraph]]  # the original trace graphs
+    dgl_graphs: Optional[List[dgl.DGLGraph]]  # graph components
+    dgl_batch: Optional[dgl.DGLGraph]  # the batched DGL graph
+
+    def __init__(self,
+                 *,
+                 id_manager: Optional[TraceGraphIDManager] = None,
+                 latency_range: Optional[TraceGraphLatencyRangeFile] = None,
+                 trace_graphs: Optional[List[TraceGraph]] = None,
+                 dgl_graphs: Optional[List[dgl.DGLGraph]] = None,
+                 dgl_batch: Optional[dgl.DGLGraph] = None,
+                 ):
+        if ((trace_graphs is None) or (id_manager is None)) and \
+                ((dgl_graphs is None) or (dgl_batch is None)):
+            raise ValueError('Insufficient arguments.')
+        self.id_manager = id_manager
+        self.latency_range = latency_range
+        self.trace_graphs = trace_graphs
+        self.dgl_graphs = dgl_graphs
+        self.dgl_batch = dgl_batch
+
+    def build_dgl(self,
+                  add_self_loop: bool = True,
+                  directed: Union[bool, str] = False,
+                  ):
+        from .dataset import trace_graph_to_dgl
+        if self.dgl_graphs is None:
+            with T.no_grad():
+                with T.use_device('cpu'):
+                    self.dgl_graphs = [
+                        trace_graph_to_dgl(
+                            g,
+                            num_node_types=self.id_manager.num_operations,
+                            add_self_loop=add_self_loop,
+                            latency_range=self.latency_range,
+                            directed=directed,
+                        )
+                        for g in self.trace_graphs
+                    ]
+        if self.dgl_batch is None:
+            with T.no_grad():
+                self.dgl_batch = dgl.batch(self.dgl_graphs).to(T.current_device())
+
+    # @property
+    # def dgl_graphs(self) -> List[dgl.DGLGraph]:
+    #     if self._dgl_graphs is None:
+    #         self.build_dgl()
+    #     return self._dgl_graphs
+    #
+    # @property
+    # def dgl_batch(self) -> dgl.DGLGraph:
+    #     if self._dgl_batch is None:
+    #         self.build_dgl()
+    #     return self._dgl_batch
diff --git a/tracegnn/utils/__init__.py b/tracegnn/utils/__init__.py
new file mode 100644
index 0000000..6d4c4da
--- /dev/null
+++ b/tracegnn/utils/__init__.py
@@ -0,0 +1,9 @@
+from .analyze_nll import *
+from .array_buffer import *
+from .data_utils import *
+from .fscore_utils import *
+from .id_assign import *
+from .latency_codec import *
+from .latency_range_file import *
+from .misc import *
+from .summary_callback import *
diff --git a/tracegnn/utils/analyze_nll.py b/tracegnn/utils/analyze_nll.py
new file mode 100644
index 0000000..63be715
--- /dev/null
+++ b/tracegnn/utils/analyze_nll.py
@@ -0,0 +1,134 @@
+import math
+import os
+import sys
+import traceback
+from functools import wraps
+from typing import *
+
+import numpy as np
+import pandas as pd
+from matplotlib import pyplot as plt
+from sklearn.metrics import f1_score
+
+from .fscore_utils import *
+
+__all__ = ['analyze_anomaly_nll']
+
+
+def analyze_anomaly_nll(nll_list: np.ndarray,
+                        label_list: np.ndarray,
+                        up_sample_normal: int = 1,
+                        threshold: Optional[float] = None,
+                        proba_cdf_file: Optional[str] = None,
+                        auc_curve_file: Optional[str] = None,
+                        method: Optional[str] = None,
+                        dataset: Optional[str] = None,
+                        save_dict: bool = False,
+                        save_filename: str = 'baseline.csv'
+                        ) -> Dict[str, float]:
+
+    def log_error(method, default_value=None):
+        @wraps(method)
+        def wrapper(*args, **kwargs):
+            try:
+                return method(*args, **kwargs)
+            except Exception:
+                print(''.join(traceback.format_exception(*sys.exc_info())), file=sys.stderr)
+                return default_value
+        return wrapper
+
+    def call_plot(fn_, *args, output_file, **kwargs):
+        if output_file == ':show:':
+            fig = fn_(*args, **kwargs)
+            plt.show()
+            plt.close()
+        else:
+            fn_(*args, output_file=output_file, **kwargs)
+
+    # up sample normal nll & label if required
+    if up_sample_normal and up_sample_normal > 1:
+        normal_nll = nll_list[label_list == 0]
+        normal_label = label_list[label_list == 0]
+        nll_list = np.concatenate(
+            [normal_nll] * (up_sample_normal - 1) + [nll_list],
+            axis=0
+        )
+        label_list = np.concatenate(
+            [normal_label] * (up_sample_normal - 1) + [label_list],
+            axis=0
+        )
+
+    # prepare for analyze
+    result_dict = {}
+    is_anomaly_list = label_list != 0
+
+    # separated nlls for different labels
+    result_dict['nll_normal'] = float(np.mean(nll_list[label_list == 0]))
+    result_dict['nll_drop'] = float(np.mean(nll_list[label_list == 1]))
+    result_dict['nll_latency'] = float(np.mean(nll_list[label_list == 2]))
+
+    # auc score
+    result_dict['auc'] = float(auc_score(nll_list, is_anomaly_list))
+
+    # best f-score
+    F = log_error(best_fscore, default_value=(math.nan, math.nan))
+
+    def best_fscore_for_label(label):
+        not_label = 2 if label == 1 else 1
+        mask = label_list != not_label
+        return F(nll_list[mask], label_list[mask] != 0)
+
+    best_fscore_total, _, best_pr_total, best_rc_total = F(nll_list, is_anomaly_list)
+    best_fscore_drop, _, best_pr_drop, best_rc_drop = best_fscore_for_label(1)
+    best_fscore_latency, best_threshold_latency, best_pr_latency, best_rc_latency = best_fscore_for_label(2)
+    result_dict.update({
+        'best_fscore': float(best_fscore_total),
+        'best_fscore_drop': float(best_fscore_drop),
+        'best_fscore_latency': float(best_fscore_latency),
+        'best_pr': float(best_pr_total),
+        'best_rc': float(best_rc_total),
+        'best_pr_drop': float(best_pr_drop),
+        'best_rc_drop': float(best_rc_drop),
+        'best_pr_latency': float(best_pr_latency),
+        'best_rc_latency': float(best_rc_latency),
+        'best_threshold_latency': float(best_threshold_latency)
+    })
+
+    # f-score
+    F = log_error(f1_score, default_value=math.nan)
+
+    def fscore_for_label(label):
+        not_label = 2 if label == 1 else 1
+        mask = label_list != not_label
+        return F(label_list[mask] != 0, nll_list[mask] > threshold)
+
+    if threshold is not None:
+        result_dict.update({
+            'fscore': float(F(is_anomaly_list, nll_list > threshold)),
+            'fscore_drop': float(fscore_for_label(1)),
+            'fscore_latency': float(fscore_for_label(2)),
+        })
+
+    # save result
+    if save_dict and method and dataset:
+        dataset = dataset.rstrip('/')
+
+        result_to_save = result_dict.copy()
+        result_to_save['dataset'] = dataset
+        result_to_save['method'] = method
+
+        if os.path.exists(f'paper-data/{save_filename}'):
+            df = pd.read_csv(f'paper-data/{save_filename}')
+            
+            if not df[(df['dataset']==dataset)&(df['method']==method)].empty:
+                df.iloc[df[(df['dataset']==dataset)&(df['method']==method)].index[0]] = result_to_save
+            else:
+                df = df.append(result_to_save, ignore_index=True)
+        else:
+            df = pd.DataFrame()
+            df = df.append(result_to_save, ignore_index=True)
+
+        os.makedirs('paper-data', exist_ok=True)
+        df.to_csv(f'paper-data/{save_filename}', index=False)
+
+    return result_dict
diff --git a/tracegnn/utils/array_buffer.py b/tracegnn/utils/array_buffer.py
new file mode 100644
index 0000000..4488d92
--- /dev/null
+++ b/tracegnn/utils/array_buffer.py
@@ -0,0 +1,39 @@
+import numpy as np
+
+__all__ = ['ArrayBuffer']
+
+
+class ArrayBuffer(object):
+
+    __slots__ = ['length', 'capacity', 'dtype', 'buffer']
+
+    def __init__(self, capacity: int = 32, dtype=np.float32):
+        self.length = 0
+        self.capacity = capacity
+        self.dtype = dtype
+        self.buffer = np.empty([capacity], dtype=dtype)
+
+    def __len__(self):
+        return self.length
+
+    def __iter__(self):
+        return iter(self.array)
+
+    @property
+    def array(self):
+        return self.buffer[:self.length]
+
+    def extend(self, items):
+        offset = self.length
+        new_length = len(items)
+        req_capacity = new_length + offset
+        if req_capacity > self.capacity:
+            self.capacity = capacity = max(self.capacity * 2, req_capacity)
+            buffer = np.empty([capacity], dtype=self.dtype)
+            buffer[:offset] = self.buffer[:offset]
+            self.buffer = buffer
+        self.buffer[offset: offset + new_length] = items
+        self.length += new_length
+
+    def clear(self):
+        self.length = 0
diff --git a/tracegnn/utils/data_utils.py b/tracegnn/utils/data_utils.py
new file mode 100644
index 0000000..b858204
--- /dev/null
+++ b/tracegnn/utils/data_utils.py
@@ -0,0 +1,23 @@
+from typing import *
+
+import numpy as np
+
+__all__ = [
+    'compute_cdf',
+]
+
+
+def compute_cdf(arr: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
+    # calculate bin size cdf
+    hist = {}
+    for v in arr:
+        if v not in hist:
+            hist[v] = 0
+        hist[v] += 1
+
+    keys = np.array(sorted(hist))
+    values = np.array([hist[v] for v in keys], dtype=np.float64)
+    values /= values.sum()
+    values = np.cumsum(values)
+
+    return keys, values
diff --git a/tracegnn/utils/fscore_utils.py b/tracegnn/utils/fscore_utils.py
new file mode 100644
index 0000000..23cb53a
--- /dev/null
+++ b/tracegnn/utils/fscore_utils.py
@@ -0,0 +1,37 @@
+from typing import *
+
+import numpy as np
+from sklearn.metrics import precision_recall_curve, average_precision_score
+
+__all__ = [
+    'fscore_for_precision_and_recall',
+    'best_fscore',
+    'auc_score',
+]
+
+
+def fscore_for_precision_and_recall(precision: np.ndarray,
+                                    recall: np.ndarray) -> np.ndarray:
+    precision = np.asarray(precision, dtype=np.float64)
+    recall = np.asarray(recall, dtype=np.float64)
+    return np.where(
+        (precision == 0) | (recall == 0),
+        0.0,
+        2. * np.exp(
+            np.log(np.maximum(precision, 1e-8)) +
+            np.log(np.maximum(recall, 1e-8)) -
+            np.log(np.maximum(precision + recall, 1e-8))
+        )
+    )
+
+
+def best_fscore(proba: np.ndarray,
+                truth: np.ndarray) -> Tuple[float, float]:
+    precision, recall, threshold = precision_recall_curve(truth, proba)
+    fscore = fscore_for_precision_and_recall(precision, recall)
+    idx = np.argmax(fscore[:-1])
+    return fscore[idx], threshold[idx], precision[idx], recall[idx]
+
+
+def auc_score(proba: np.ndarray, truth: np.ndarray) -> float:
+    return float(average_precision_score(truth, proba))
diff --git a/tracegnn/utils/graph_conversion.py b/tracegnn/utils/graph_conversion.py
new file mode 100644
index 0000000..4c4a2a8
--- /dev/null
+++ b/tracegnn/utils/graph_conversion.py
@@ -0,0 +1,35 @@
+import networkx as nx
+import numpy as np
+from tracegnn.data.trace_graph import TraceGraphIDManager
+
+
+def np_to_nx(DV: np.ndarray, DE: np.ndarray, id_manager: TraceGraphIDManager) -> nx.Graph:
+    """
+        DV: [n x d]
+        DE: [n x n x 1] or [n x n]
+    """
+    # Reshape DE to [n x n]
+    if len(DE.shape) == 3:
+        DE = DE[:,:,0]
+
+    # Choose Nodes
+    nodes_idx = (1.0-np.sum(DV[:,:len(id_manager.operation_id)], axis=-1)) < np.max(DV[:,:len(id_manager.operation_id)], axis=-1)
+    DV = DV[nodes_idx]
+    DE = DE[nodes_idx][:, nodes_idx]
+
+    DE = (DE + DE.T) / 2
+
+    # Get Node Type
+    node_type = np.argmax(DV[:,:len(id_manager.operation_id)], axis=-1)
+
+    # Generate nx Graph
+    g: nx.Graph = nx.from_numpy_matrix(DE, create_using=nx.Graph)
+    
+    for i in range(len(g.nodes)):
+        g.nodes[i]['node_type'] = node_type[i]
+        g.nodes[i]['operation'] = id_manager.operation_id.reverse_map(node_type[i])
+
+    # MST
+    # g = nx.maximum_spanning_tree(g)
+
+    return g
diff --git a/tracegnn/utils/id_assign.py b/tracegnn/utils/id_assign.py
new file mode 100644
index 0000000..0601b87
--- /dev/null
+++ b/tracegnn/utils/id_assign.py
@@ -0,0 +1,58 @@
+import os
+
+import yaml
+
+__all__ = ['IDAssign']
+
+
+class IDAssign(object):
+
+    def __init__(self, path: str):
+        self._path = path
+        self._mapping = {'': 0}  # by default let 0 == '' (a NULL item)
+
+        if os.path.isfile(path):
+            with open(path, 'r', encoding='utf-8') as f:
+                self._mapping = yaml.safe_load(f.read())
+
+        if self._mapping:
+            self._next_index = max(self._mapping.values()) + 1
+            self._rev_mapping = {v: k for k, v in self._mapping.items()}
+        else:
+            self._next_index = 0
+            self._rev_mapping = {}
+
+    def __len__(self):
+        return self._next_index
+
+    def __getitem__(self, key):
+        return self._mapping[key]
+
+    @property
+    def path(self) -> str:
+        return self._path
+
+    def dump_to(self, path: str):
+        cnt = yaml.safe_dump(self._mapping)
+        with open(path, 'w', encoding='utf-8') as f:
+            f.write(cnt)
+
+    def get_or_assign(self, key: str):
+        ret = self._mapping.get(key, None)
+        if ret is None:
+            self._mapping[key] = ret = self._next_index
+            self._rev_mapping[ret] = key
+            self._next_index += 1
+        return ret
+
+    def reverse_map(self, index: int):
+        return self._rev_mapping[index]
+
+    def flush(self):
+        self.dump_to(self._path)
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.flush()
diff --git a/tracegnn/utils/latency_codec.py b/tracegnn/utils/latency_codec.py
new file mode 100644
index 0000000..21ab9b1
--- /dev/null
+++ b/tracegnn/utils/latency_codec.py
@@ -0,0 +1,135 @@
+from typing import *
+
+import numpy as np
+
+from tracegnn.constants import *
+
+
+if not USE_MULTI_DIM_LATENCY_CODEC:
+    __all__ = []
+
+else:
+    __all__ = [
+        'encode_multi_latency',
+        'decode_multi_latency',
+        'encode_latency',
+        'decode_latency',
+    ]
+
+    EPS = 1e-6
+
+
+    def encode_multi_latency(latencies: Sequence[np.ndarray],
+                             max_latency_dims: int
+                             ) -> Tuple[np.ndarray, np.ndarray]:
+        """
+        Encode multiple latencies into (codec, onehot) feature vectors.
+
+        If `max_latency_dims` is sufficient:
+
+        >>> latencies = [np.array([0.0, 9.6, 10.3, 58.7, 101.2]), np.array([11.3, 0.6, 0.0, 99.1, 100.0])]
+        >>> codec, onehot = encode_multi_latency(latencies, 3)
+        >>> codec
+        array([[-1.  , -1.  , -1.  , -0.74, -0.8 , -1.  ],
+               [ 0.92, -1.  , -1.  , -0.88, -1.  , -1.  ],
+               [-0.94, -0.8 , -1.  , -1.  , -1.  , -1.  ],
+               [ 0.74,  0.  , -1.  ,  0.82,  0.8 , -1.  ],
+               [-0.76, -1.  , -0.8 , -1.  , -1.  , -0.8 ]])
+        >>> onehot
+        array([[ True, False, False, False,  True, False],
+               [ True, False, False,  True, False, False],
+               [False,  True, False,  True, False, False],
+               [False,  True, False, False,  True, False],
+               [False, False,  True, False, False,  True]])
+        >>> decode_multi_latency(codec, onehot, 3)
+        [array([  0. ,   9.6,  10.3,  58.7, 101.2]), array([ 11.3,   0.6,   0. ,  99.1, 100. ])]
+
+        If `max_latency_dims` is partially sufficient:
+
+        >>> latencies = [np.array([9.6, 10.3, 58.7, 101.2]), np.array([11.3, 0.6, 99.1, 100.0])]
+        >>> codec, onehot = encode_multi_latency(latencies, 2)
+        >>> codec
+        array([[ 0.92, -1.  , -0.74, -0.8 ],
+               [-0.94, -0.8 , -0.88, -1.  ],
+               [ 0.74,  0.  ,  0.82,  0.8 ],
+               [-0.76,  1.  , -1.  ,  1.  ]])
+        >>> onehot
+        array([[ True, False, False,  True],
+               [False,  True,  True, False],
+               [False,  True, False,  True],
+               [False,  True, False,  True]])
+        >>> decode_multi_latency(codec, onehot, 2)
+        [array([  9.6,  10.3,  58.7, 101.2]), array([ 11.3,   0.6,  99.1, 100. ])]
+
+        If `max_latency_dims` is insufficient:
+
+        >>> latencies = [np.array([9.6, 10.3, 58.7, 101.2]), np.array([11.3, 0.6, 99.1, 100.0])]
+        >>> codec, onehot = encode_multi_latency(latencies, 1)
+        >>> codec
+        array([[ 0.92,  1.26],
+               [ 1.06, -0.88],
+               [10.74, 18.82],
+               [19.24, 19.  ]])
+        >>> onehot
+        array([[ True,  True],
+               [ True,  True],
+               [ True,  True],
+               [ True,  True]])
+        >>> decode_multi_latency(codec, onehot, 1)
+        [array([  9.6,  10.3,  58.7, 101.2]), array([ 11.3,   0.6,  99.1, 100. ])]
+        """
+        codec, onehot = [], []
+        for residual in latencies:
+            for i in range(max_latency_dims - 1):
+                if i == 0:
+                    onehot.append(residual < 10)
+                else:
+                    onehot.append(np.logical_and(EPS < residual, residual < 10))
+                r = residual % 10
+                codec.append(r)
+                residual = (residual - r) / 10
+            onehot.append(EPS < residual)
+            codec.append(residual)
+        codec, onehot = np.stack(codec, axis=-1), np.stack(onehot, axis=-1)
+        codec = codec / 5. - 1  # scale to [-1, 1]
+        return codec, onehot
+
+
+    def decode_multi_latency(codec: np.ndarray,
+                             onehot: np.ndarray,
+                             max_latency_dims: int
+                             ) -> List[np.ndarray]:
+        if codec.shape[-1] % max_latency_dims != 0:
+            raise ValueError(
+                f'arr.shape[-1] % max_latency_dims != 0: '
+                f'arr.shape = {codec.shape!r}, where max_latency_dims = {max_latency_dims!r}'
+            )
+
+        ret = []
+        codec = (np.clip(codec, -1, 1) + 1) * 5  # scale back from [-1, 1]
+        for i in range(codec.shape[-1] // max_latency_dims):
+            left = i * max_latency_dims
+            right = left + max_latency_dims - 1
+            m = onehot[..., right]
+            r = codec[..., right] * m.astype(np.float32)
+            while right > left:
+                r = r * 10
+                right -= 1
+                m |= onehot[..., right]
+                r += codec[..., right]
+            ret.append(r)
+
+        return ret
+
+
+    def encode_latency(latency: np.ndarray,
+                       max_latency_dims: int
+                       ) -> Tuple[np.ndarray, np.ndarray]:
+        return encode_multi_latency([latency], max_latency_dims)
+
+
+    def decode_latency(codec: np.ndarray,
+                       onehot: np.ndarray,
+                       max_latency_dims: int
+                       ) -> np.ndarray:
+        return decode_multi_latency(codec, onehot, max_latency_dims)[0]
diff --git a/tracegnn/utils/latency_range_file.py b/tracegnn/utils/latency_range_file.py
new file mode 100644
index 0000000..620a918
--- /dev/null
+++ b/tracegnn/utils/latency_range_file.py
@@ -0,0 +1,80 @@
+import os
+from typing import *
+
+import yaml
+
+__all__ = ['TraceGraphLatencyRangeFile']
+
+LATENCY_RANGE_FILE = 'latency_range.yml'
+
+
+class  TraceGraphLatencyRangeFile(object):
+    __slots__ = ['root_dir', 'yaml_path', 'latency_data']
+
+    root_dir: str
+    yaml_path: str
+    latency_data: Dict[int, Dict[str, float]]
+
+    def __init__(self, root_dir: str, require_exists: bool = False):
+        self.root_dir = os.path.abspath(root_dir)
+        self.yaml_path = os.path.join(self.root_dir, LATENCY_RANGE_FILE)
+        self.latency_data = {}
+        if os.path.exists(self.yaml_path):
+            with open(self.yaml_path, 'r', encoding='utf-8') as f:
+                obj = yaml.safe_load(f.read())
+            self.latency_data = {
+                int(op_id): v
+                for op_id, v in obj.items()
+            }
+        elif require_exists:
+            raise IOError(f'LatencyRangeFile does not exist: {self.yaml_path}')
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.flush()
+
+    def __contains__(self, item):
+        return int(item) in self.latency_data
+
+    def __getitem__(self, operation_id: int) -> Tuple[float, float]:
+        v = self.latency_data[int(operation_id)]
+        return v['mean'], v['std']
+
+    def __setitem__(self,
+                    operation_id: int,
+                    value: Union[Tuple[float, float], Dict[str, float]]):
+        self.update_item(operation_id, value)
+
+    def get_item(self, operation_id: int):
+        return self.latency_data[int(operation_id)]
+
+    def update_item(self,
+                    operation_id: int,
+                    value: Union[Tuple[float, float], Dict[str, float]]
+                    ):
+        if isinstance(value, (tuple, list)) and len(value) == 2:
+            mean, std = value
+            value = {'mean': mean, 'std': std}
+
+        key = int(operation_id)
+        if key not in self.latency_data:
+            self.latency_data[key] = {}
+        self.latency_data[key].update({k: float(v) for k, v in value.items()})
+
+    def clear(self):
+        self.latency_data.clear()
+
+    def flush(self):
+        self.dump_to(self.root_dir)
+
+    def dump_to(self, output_dir: str):
+        payload = {
+            k: v
+            for k, v in self.latency_data.items()
+        }
+        cnt = yaml.safe_dump(payload)
+        path = os.path.join(output_dir, LATENCY_RANGE_FILE)
+        with open(path, 'w', encoding='utf-8') as f:
+            f.write(cnt)
diff --git a/tracegnn/utils/misc.py b/tracegnn/utils/misc.py
new file mode 100644
index 0000000..8d9837d
--- /dev/null
+++ b/tracegnn/utils/misc.py
@@ -0,0 +1,54 @@
+import re
+import sys
+from contextlib import contextmanager
+from tempfile import TemporaryDirectory
+from typing import *
+from urllib.request import urlretrieve
+
+import os
+
+__all__ = [
+    'abspath_relative_to_file',
+    'fake_tqdm',
+    'ensure_parent_exists',
+    'as_local_file',
+]
+
+
+def abspath_relative_to_file(path, file_path):
+    return os.path.join(
+        os.path.split(os.path.abspath(file_path))[0],
+        path
+    )
+
+
+def fake_tqdm(data, *args, **kwargs):
+    yield from data
+
+
+def ensure_parent_exists(path):
+    if path is not None:
+        path = os.path.abspath(path)
+        parent_dir = os.path.split(path)[0]
+        if not os.path.isdir(parent_dir):
+            os.makedirs(parent_dir, exist_ok=True)
+        return path
+
+
+@contextmanager
+def as_local_file(uri: str) -> ContextManager[str]:
+    if re.match(r'^https?://', uri):
+        m = re.match(r'^(https?://[^/]+)/([a-z0-9]{24})/(.*)?$', uri)
+        if m:
+            uri = f'{m.group(1)}/v1/_getfile/{m.group(2)}'
+            if m.group(3):
+                uri += f'/{m.group(3)}'
+        with TemporaryDirectory() as temp_dir:
+            filename = os.path.join(temp_dir, uri.rstrip('/').rsplit('/', 1)[-1])
+            print(f'Download: {uri}', file=sys.stderr)
+            urlretrieve(uri, filename=filename)
+            yield filename
+    elif uri.startswith('file://'):
+        yield uri[7:]
+    else:
+        yield uri
diff --git a/tracegnn/utils/summary_callback.py b/tracegnn/utils/summary_callback.py
new file mode 100644
index 0000000..bb57b17
--- /dev/null
+++ b/tracegnn/utils/summary_callback.py
@@ -0,0 +1,75 @@
+from typing import *
+
+import numpy as np
+from mltk.callbacks import Callback, CallbackData, Stage
+from torch.utils.tensorboard import SummaryWriter
+
+try:
+    # problem: https://github.com/pytorch/pytorch/issues/30966
+    import tensorflow as tf
+    import tensorboard as tb
+
+    tf.io.gfile = tb.compat.tensorflow_stub.io.gfile
+except ImportError:
+    pass
+
+__all__ = ['SummaryCallback']
+
+
+class SummaryCallback(Callback):
+    """Callback class that writes metrics to TensorBoard."""
+
+    writer: SummaryWriter
+    stage: Optional[Stage]
+    stage_stack: List[Stage]
+    global_step: int
+
+    def __init__(self, *, summary_dir=None, summary_writer=None, global_step: int = 0):
+        if (summary_dir is None) == (summary_writer is None):
+            raise ValueError(f'One and only one of `summary_dir` and `summary_writer` should be specified, '
+                             f'but not both.')
+
+        if summary_dir is not None:
+            summary_writer = SummaryWriter(summary_dir)
+        self.writer = summary_writer
+        self.stage = None
+        self.stage_stack = []
+        self.global_step = global_step
+
+    def add_embedding(self, *args, **kwargs):
+        kwargs.setdefault('global_step', self.global_step)
+        return self.writer.add_embedding(*args, **kwargs)
+
+    def update_metrics(self, metrics):
+        if metrics:
+            for key, val in metrics.items():
+                key = self.stage_stack[-1].type.add_metric_prefix(key)
+                if np.shape(val) != ():
+                    val = np.mean(val)
+                self.writer.add_scalar(key, val, self.global_step)
+
+    def set_global_step(self, step: int):
+        self.global_step = step
+
+    def on_stage_begin(self, data: CallbackData):
+        self.stage_stack.append(data.stage)
+
+    def on_stage_end(self, data: CallbackData):
+        self.stage_stack.pop()
+
+    def on_test_end(self, data: CallbackData):
+        self.update_metrics(data.metrics)
+
+    def on_validation_end(self, data: CallbackData):
+        self.update_metrics(data.metrics)
+
+    def on_batch_begin(self, data: CallbackData):
+        if len(self.stage_stack) == 1:
+            self.global_step += 1
+
+    def on_batch_end(self, data: CallbackData):
+        if len(self.stage_stack) == 1:
+            self.update_metrics(data.metrics)
+
+    def on_epoch_end(self, data: CallbackData):
+        self.update_metrics(data.metrics)
diff --git a/train.sh b/train.sh
new file mode 100644
index 0000000..8da1d10
--- /dev/null
+++ b/train.sh
@@ -0,0 +1,4 @@
+echo "Usage: bash train.sh [dataset_path]"
+echo "DATASET: $1"
+rm -r results
+python3 -m tracegnn.models.trace_vae.train --device=cpu --dataset.root_dir="$1" --seed=1234 --model.struct.z_dim=10 --model.struct.decoder.use_prior_flow=true --train.z_unit_ball_reg=1 --model.latency.z2_dim=10 --model.latency.decoder.condition_on_z=true
\ No newline at end of file
-- 
GitLab