From 07a0fc6960cb04d237ebb0c2670538197ff48379 Mon Sep 17 00:00:00 2001 From: openaiops Date: Tue, 27 Feb 2024 22:50:24 +0800 Subject: [PATCH] Initial commit --- README.md | 18 + requirements.txt | 35 + sample_dataset/id_manager/latency_range.yml | 3316 +++++++++++++++++ sample_dataset/id_manager/operation_id.yml | 1489 ++++++++ sample_dataset/id_manager/service_id.yml | 316 ++ sample_dataset/id_manager/status_id.yml | 21 + sample_dataset/test.csv | 51 + sample_dataset/train.csv | 22 + sample_dataset/val.csv | 22 + test.sh | 4 + tracegnn/__init__.py | 0 tracegnn/cli/__init__.py | 0 tracegnn/cli/data_process.py | 108 + tracegnn/constants.py | 13 + tracegnn/data/__init__.py | 3 + tracegnn/data/bytes_db.py | 242 ++ tracegnn/data/trace_graph.py | 617 +++ tracegnn/data/trace_graph_db.py | 108 + tracegnn/models/__init__.py | 0 tracegnn/models/trace_vae/__init__.py | 0 tracegnn/models/trace_vae/constants.py | 3 + tracegnn/models/trace_vae/dataset.py | 134 + tracegnn/models/trace_vae/distributions.py | 356 ++ tracegnn/models/trace_vae/evaluation.py | 570 +++ tracegnn/models/trace_vae/graph_utils.py | 424 +++ tracegnn/models/trace_vae/model/__init__.py | 1 + tracegnn/models/trace_vae/model/gnn_layers.py | 190 + .../models/trace_vae/model/latency_vae.py | 444 +++ .../models/trace_vae/model/model_utils.py | 35 + .../trace_vae/model/operation_embedding.py | 25 + tracegnn/models/trace_vae/model/pooling.py | 75 + .../models/trace_vae/model/realnvp_flow.py | 49 + tracegnn/models/trace_vae/model/struct_vae.py | 414 ++ tracegnn/models/trace_vae/model/trace_vae.py | 161 + tracegnn/models/trace_vae/tensor_utils.py | 225 ++ tracegnn/models/trace_vae/test.py | 275 ++ tracegnn/models/trace_vae/test_utils.py | 106 + tracegnn/models/trace_vae/train.py | 531 +++ tracegnn/models/trace_vae/types.py | 74 + tracegnn/utils/__init__.py | 9 + tracegnn/utils/analyze_nll.py | 134 + tracegnn/utils/array_buffer.py | 39 + tracegnn/utils/data_utils.py | 23 + tracegnn/utils/fscore_utils.py | 37 + tracegnn/utils/graph_conversion.py | 35 + tracegnn/utils/id_assign.py | 58 + tracegnn/utils/latency_codec.py | 135 + tracegnn/utils/latency_range_file.py | 80 + tracegnn/utils/misc.py | 54 + tracegnn/utils/summary_callback.py | 75 + train.sh | 4 + 51 files changed, 11160 insertions(+) create mode 100644 README.md create mode 100644 requirements.txt create mode 100644 sample_dataset/id_manager/latency_range.yml create mode 100644 sample_dataset/id_manager/operation_id.yml create mode 100644 sample_dataset/id_manager/service_id.yml create mode 100644 sample_dataset/id_manager/status_id.yml create mode 100644 sample_dataset/test.csv create mode 100644 sample_dataset/train.csv create mode 100644 sample_dataset/val.csv create mode 100644 test.sh create mode 100644 tracegnn/__init__.py create mode 100644 tracegnn/cli/__init__.py create mode 100644 tracegnn/cli/data_process.py create mode 100644 tracegnn/constants.py create mode 100644 tracegnn/data/__init__.py create mode 100644 tracegnn/data/bytes_db.py create mode 100644 tracegnn/data/trace_graph.py create mode 100644 tracegnn/data/trace_graph_db.py create mode 100644 tracegnn/models/__init__.py create mode 100644 tracegnn/models/trace_vae/__init__.py create mode 100644 tracegnn/models/trace_vae/constants.py create mode 100644 tracegnn/models/trace_vae/dataset.py create mode 100644 tracegnn/models/trace_vae/distributions.py create mode 100644 tracegnn/models/trace_vae/evaluation.py create mode 100644 tracegnn/models/trace_vae/graph_utils.py create mode 100644 tracegnn/models/trace_vae/model/__init__.py create mode 100644 tracegnn/models/trace_vae/model/gnn_layers.py create mode 100644 tracegnn/models/trace_vae/model/latency_vae.py create mode 100644 tracegnn/models/trace_vae/model/model_utils.py create mode 100644 tracegnn/models/trace_vae/model/operation_embedding.py create mode 100644 tracegnn/models/trace_vae/model/pooling.py create mode 100644 tracegnn/models/trace_vae/model/realnvp_flow.py create mode 100644 tracegnn/models/trace_vae/model/struct_vae.py create mode 100644 tracegnn/models/trace_vae/model/trace_vae.py create mode 100644 tracegnn/models/trace_vae/tensor_utils.py create mode 100644 tracegnn/models/trace_vae/test.py create mode 100644 tracegnn/models/trace_vae/test_utils.py create mode 100644 tracegnn/models/trace_vae/train.py create mode 100644 tracegnn/models/trace_vae/types.py create mode 100644 tracegnn/utils/__init__.py create mode 100644 tracegnn/utils/analyze_nll.py create mode 100644 tracegnn/utils/array_buffer.py create mode 100644 tracegnn/utils/data_utils.py create mode 100644 tracegnn/utils/fscore_utils.py create mode 100644 tracegnn/utils/graph_conversion.py create mode 100644 tracegnn/utils/id_assign.py create mode 100644 tracegnn/utils/latency_codec.py create mode 100644 tracegnn/utils/latency_range_file.py create mode 100644 tracegnn/utils/misc.py create mode 100644 tracegnn/utils/summary_callback.py create mode 100644 train.sh diff --git a/README.md b/README.md new file mode 100644 index 0000000..184db47 --- /dev/null +++ b/README.md @@ -0,0 +1,18 @@ +# TraceVAE +This is the source code for "Unsupervised Anomaly Detection on Microservice Traces through Graph VAE". + +## Usage +1. `pip3 install -r requirements.txt`. +2. Convert the dataset with `python3 -m tracegnn.cli.data_process preprocess -i [input_path] -o [dataset_path]`. The sample dataset is under `sample_dataset`. (Note: This sample dataset only shows data format and usage, and cannot be used to evaluate model performance. Please replace it with your dataset.) +sample: +``` +python3 -m tracegnn.cli.data_process preprocess -i sample_dataset -o sample_dataset +``` +3. Train the model with `bash train.sh [dataset_path]`: +``` +bash train.sh sample_dataset +``` +4. Evaluate the model with `bash teset.sh [model_path] [dataset_path]`. The default model path is under `results/train/models/final.pt`: +``` +bash test.sh results/train/models/final.pt sample_dataset +``` diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..7e40a0b --- /dev/null +++ b/requirements.txt @@ -0,0 +1,35 @@ +git+https://gitee.com/haowen-xu/ml-essentials +git+https://gitee.com/haowen-xu/tensorkit +git+https://gitee.com/mirrors/ZhuSuan.git +click +jinja2 +networkx +numpy +pandas +PyYAML +python-snappy +scikit-learn +seaborn +semver +natsort +imageio +fs +lazy_object_proxy +kafka-python +tqdm +loguru +tensorboard==1.15 +numba + +-f https://download.pytorch.org/whl/cu116/torch_stable.html +torch==1.12.0+cu116 + +-f https://data.dgl.ai/wheels/repo.html +dgl-cu116 + +-f https://data.pyg.org/whl/torch-1.12.0+cu116.html +torch-scatter +torch-sparse +torch-cluster +torch-spline-conv +torch-geometric diff --git a/sample_dataset/id_manager/latency_range.yml b/sample_dataset/id_manager/latency_range.yml new file mode 100644 index 0000000..1aa1da0 --- /dev/null +++ b/sample_dataset/id_manager/latency_range.yml @@ -0,0 +1,3316 @@ +1: + mean: 1.0 + p99: 3.0 + std: 1.0 +3: + mean: 1.0 + p99: 3.0 + std: 1.0 +4: + mean: 1.0 + p99: 3.0 + std: 1.0 +5: + mean: 1.0 + p99: 3.0 + std: 1.0 +6: + mean: 1.0 + p99: 3.0 + std: 1.0 +7: + mean: 1.0 + p99: 3.0 + std: 1.0 +8: + mean: 1.0 + p99: 3.0 + std: 1.0 +9: + mean: 1.0 + p99: 3.0 + std: 1.0 +10: + mean: 1.0 + p99: 3.0 + std: 1.0 +11: + mean: 1.0 + p99: 3.0 + std: 1.0 +12: + mean: 1.0 + p99: 3.0 + std: 1.0 +13: + mean: 1.0 + p99: 3.0 + std: 1.0 +15: + mean: 1.0 + p99: 3.0 + std: 1.0 +16: + mean: 1.0 + p99: 3.0 + std: 1.0 +17: + mean: 1.0 + p99: 3.0 + std: 1.0 +19: + mean: 1.0 + p99: 3.0 + std: 1.0 +20: + mean: 1.0 + p99: 3.0 + std: 1.0 +21: + mean: 1.0 + p99: 3.0 + std: 1.0 +23: + mean: 1.0 + p99: 3.0 + std: 1.0 +26: + mean: 1.0 + p99: 3.0 + std: 1.0 +27: + mean: 1.0 + p99: 3.0 + std: 1.0 +28: + mean: 1.0 + p99: 3.0 + std: 1.0 +29: + mean: 1.0 + p99: 3.0 + std: 1.0 +30: + mean: 1.0 + p99: 3.0 + std: 1.0 +47: + mean: 1.0 + p99: 3.0 + std: 1.0 +48: + mean: 1.0 + p99: 3.0 + std: 1.0 +62: + mean: 1.0 + p99: 3.0 + std: 1.0 +63: + mean: 1.0 + p99: 3.0 + std: 1.0 +64: + mean: 1.0 + p99: 3.0 + std: 1.0 +65: + mean: 1.0 + p99: 3.0 + std: 1.0 +67: + mean: 1.0 + p99: 3.0 + std: 1.0 +68: + mean: 1.0 + p99: 3.0 + std: 1.0 +69: + mean: 1.0 + p99: 3.0 + std: 1.0 +70: + mean: 1.0 + p99: 3.0 + std: 1.0 +71: + mean: 1.0 + p99: 3.0 + std: 1.0 +72: + mean: 1.0 + p99: 3.0 + std: 1.0 +73: + mean: 1.0 + p99: 3.0 + std: 1.0 +74: + mean: 1.0 + p99: 3.0 + std: 1.0 +75: + mean: 1.0 + p99: 3.0 + std: 1.0 +76: + mean: 1.0 + p99: 3.0 + std: 1.0 +77: + mean: 1.0 + p99: 3.0 + std: 1.0 +78: + mean: 1.0 + p99: 3.0 + std: 1.0 +79: + mean: 1.0 + p99: 3.0 + std: 1.0 +80: + mean: 1.0 + p99: 3.0 + std: 1.0 +83: + mean: 1.0 + p99: 3.0 + std: 1.0 +84: + mean: 1.0 + p99: 3.0 + std: 1.0 +94: + mean: 1.0 + p99: 3.0 + std: 1.0 +96: + mean: 1.0 + p99: 3.0 + std: 1.0 +98: + mean: 1.0 + p99: 3.0 + std: 1.0 +99: + mean: 1.0 + p99: 3.0 + std: 1.0 +102: + mean: 1.0 + p99: 3.0 + std: 1.0 +103: + mean: 1.0 + p99: 3.0 + std: 1.0 +105: + mean: 1.0 + p99: 3.0 + std: 1.0 +106: + mean: 1.0 + p99: 3.0 + std: 1.0 +107: + mean: 1.0 + p99: 3.0 + std: 1.0 +108: + mean: 1.0 + p99: 3.0 + std: 1.0 +110: + mean: 1.0 + p99: 3.0 + std: 1.0 +112: + mean: 1.0 + p99: 3.0 + std: 1.0 +113: + mean: 1.0 + p99: 3.0 + std: 1.0 +115: + mean: 1.0 + p99: 3.0 + std: 1.0 +116: + mean: 1.0 + p99: 3.0 + std: 1.0 +117: + mean: 1.0 + p99: 3.0 + std: 1.0 +118: + mean: 1.0 + p99: 3.0 + std: 1.0 +119: + mean: 1.0 + p99: 3.0 + std: 1.0 +121: + mean: 1.0 + p99: 3.0 + std: 1.0 +122: + mean: 1.0 + p99: 3.0 + std: 1.0 +123: + mean: 1.0 + p99: 3.0 + std: 1.0 +124: + mean: 1.0 + p99: 3.0 + std: 1.0 +139: + mean: 1.0 + p99: 3.0 + std: 1.0 +140: + mean: 1.0 + p99: 3.0 + std: 1.0 +141: + mean: 1.0 + p99: 3.0 + std: 1.0 +142: + mean: 1.0 + p99: 3.0 + std: 1.0 +143: + mean: 1.0 + p99: 3.0 + std: 1.0 +144: + mean: 1.0 + p99: 3.0 + std: 1.0 +145: + mean: 1.0 + p99: 3.0 + std: 1.0 +146: + mean: 1.0 + p99: 3.0 + std: 1.0 +147: + mean: 1.0 + p99: 3.0 + std: 1.0 +148: + mean: 1.0 + p99: 3.0 + std: 1.0 +149: + mean: 1.0 + p99: 3.0 + std: 1.0 +150: + mean: 1.0 + p99: 3.0 + std: 1.0 +151: + mean: 1.0 + p99: 3.0 + std: 1.0 +152: + mean: 1.0 + p99: 3.0 + std: 1.0 +153: + mean: 1.0 + p99: 3.0 + std: 1.0 +154: + mean: 1.0 + p99: 3.0 + std: 1.0 +155: + mean: 1.0 + p99: 3.0 + std: 1.0 +156: + mean: 1.0 + p99: 3.0 + std: 1.0 +157: + mean: 1.0 + p99: 3.0 + std: 1.0 +158: + mean: 1.0 + p99: 3.0 + std: 1.0 +159: + mean: 1.0 + p99: 3.0 + std: 1.0 +160: + mean: 1.0 + p99: 3.0 + std: 1.0 +161: + mean: 1.0 + p99: 3.0 + std: 1.0 +181: + mean: 1.0 + p99: 3.0 + std: 1.0 +184: + mean: 1.0 + p99: 3.0 + std: 1.0 +186: + mean: 1.0 + p99: 3.0 + std: 1.0 +187: + mean: 1.0 + p99: 3.0 + std: 1.0 +188: + mean: 1.0 + p99: 3.0 + std: 1.0 +189: + mean: 1.0 + p99: 3.0 + std: 1.0 +190: + mean: 1.0 + p99: 3.0 + std: 1.0 +191: + mean: 1.0 + p99: 3.0 + std: 1.0 +192: + mean: 1.0 + p99: 3.0 + std: 1.0 +193: + mean: 1.0 + p99: 3.0 + std: 1.0 +194: + mean: 1.0 + p99: 3.0 + std: 1.0 +195: + mean: 1.0 + p99: 3.0 + std: 1.0 +196: + mean: 1.0 + p99: 3.0 + std: 1.0 +197: + mean: 1.0 + p99: 3.0 + std: 1.0 +198: + mean: 1.0 + p99: 3.0 + std: 1.0 +199: + mean: 1.0 + p99: 3.0 + std: 1.0 +200: + mean: 1.0 + p99: 3.0 + std: 1.0 +201: + mean: 1.0 + p99: 3.0 + std: 1.0 +202: + mean: 1.0 + p99: 3.0 + std: 1.0 +203: + mean: 1.0 + p99: 3.0 + std: 1.0 +204: + mean: 1.0 + p99: 3.0 + std: 1.0 +206: + mean: 1.0 + p99: 3.0 + std: 1.0 +207: + mean: 1.0 + p99: 3.0 + std: 1.0 +208: + mean: 1.0 + p99: 3.0 + std: 1.0 +209: + mean: 1.0 + p99: 3.0 + std: 1.0 +210: + mean: 1.0 + p99: 3.0 + std: 1.0 +211: + mean: 1.0 + p99: 3.0 + std: 1.0 +212: + mean: 1.0 + p99: 3.0 + std: 1.0 +213: + mean: 1.0 + p99: 3.0 + std: 1.0 +214: + mean: 1.0 + p99: 3.0 + std: 1.0 +215: + mean: 1.0 + p99: 3.0 + std: 1.0 +216: + mean: 1.0 + p99: 3.0 + std: 1.0 +217: + mean: 1.0 + p99: 3.0 + std: 1.0 +218: + mean: 1.0 + p99: 3.0 + std: 1.0 +219: + mean: 1.0 + p99: 3.0 + std: 1.0 +220: + mean: 1.0 + p99: 3.0 + std: 1.0 +221: + mean: 1.0 + p99: 3.0 + std: 1.0 +222: + mean: 1.0 + p99: 3.0 + std: 1.0 +224: + mean: 1.0 + p99: 3.0 + std: 1.0 +225: + mean: 1.0 + p99: 3.0 + std: 1.0 +226: + mean: 1.0 + p99: 3.0 + std: 1.0 +227: + mean: 1.0 + p99: 3.0 + std: 1.0 +228: + mean: 1.0 + p99: 3.0 + std: 1.0 +229: + mean: 1.0 + p99: 3.0 + std: 1.0 +230: + mean: 1.0 + p99: 3.0 + std: 1.0 +231: + mean: 1.0 + p99: 3.0 + std: 1.0 +235: + mean: 1.0 + p99: 3.0 + std: 1.0 +236: + mean: 1.0 + p99: 3.0 + std: 1.0 +238: + mean: 1.0 + p99: 3.0 + std: 1.0 +239: + mean: 1.0 + p99: 3.0 + std: 1.0 +240: + mean: 1.0 + p99: 3.0 + std: 1.0 +242: + mean: 1.0 + p99: 3.0 + std: 1.0 +243: + mean: 1.0 + p99: 3.0 + std: 1.0 +244: + mean: 1.0 + p99: 3.0 + std: 1.0 +245: + mean: 1.0 + p99: 3.0 + std: 1.0 +248: + mean: 1.0 + p99: 3.0 + std: 1.0 +249: + mean: 1.0 + p99: 3.0 + std: 1.0 +251: + mean: 1.0 + p99: 3.0 + std: 1.0 +252: + mean: 1.0 + p99: 3.0 + std: 1.0 +253: + mean: 1.0 + p99: 3.0 + std: 1.0 +254: + mean: 1.0 + p99: 3.0 + std: 1.0 +255: + mean: 1.0 + p99: 3.0 + std: 1.0 +256: + mean: 1.0 + p99: 3.0 + std: 1.0 +261: + mean: 1.0 + p99: 3.0 + std: 1.0 +263: + mean: 1.0 + p99: 3.0 + std: 1.0 +264: + mean: 1.0 + p99: 3.0 + std: 1.0 +266: + mean: 1.0 + p99: 3.0 + std: 1.0 +267: + mean: 1.0 + p99: 3.0 + std: 1.0 +268: + mean: 1.0 + p99: 3.0 + std: 1.0 +269: + mean: 1.0 + p99: 3.0 + std: 1.0 +270: + mean: 1.0 + p99: 3.0 + std: 1.0 +271: + mean: 1.0 + p99: 3.0 + std: 1.0 +272: + mean: 1.0 + p99: 3.0 + std: 1.0 +273: + mean: 1.0 + p99: 3.0 + std: 1.0 +274: + mean: 1.0 + p99: 3.0 + std: 1.0 +275: + mean: 1.0 + p99: 3.0 + std: 1.0 +276: + mean: 1.0 + p99: 3.0 + std: 1.0 +278: + mean: 1.0 + p99: 3.0 + std: 1.0 +281: + mean: 1.0 + p99: 3.0 + std: 1.0 +282: + mean: 1.0 + p99: 3.0 + std: 1.0 +283: + mean: 1.0 + p99: 3.0 + std: 1.0 +285: + mean: 1.0 + p99: 3.0 + std: 1.0 +291: + mean: 1.0 + p99: 3.0 + std: 1.0 +293: + mean: 1.0 + p99: 3.0 + std: 1.0 +299: + mean: 1.0 + p99: 3.0 + std: 1.0 +300: + mean: 1.0 + p99: 3.0 + std: 1.0 +301: + mean: 1.0 + p99: 3.0 + std: 1.0 +303: + mean: 1.0 + p99: 3.0 + std: 1.0 +305: + mean: 1.0 + p99: 3.0 + std: 1.0 +306: + mean: 1.0 + p99: 3.0 + std: 1.0 +308: + mean: 1.0 + p99: 3.0 + std: 1.0 +310: + mean: 1.0 + p99: 3.0 + std: 1.0 +314: + mean: 1.0 + p99: 3.0 + std: 1.0 +315: + mean: 1.0 + p99: 3.0 + std: 1.0 +316: + mean: 1.0 + p99: 3.0 + std: 1.0 +317: + mean: 1.0 + p99: 3.0 + std: 1.0 +318: + mean: 1.0 + p99: 3.0 + std: 1.0 +319: + mean: 1.0 + p99: 3.0 + std: 1.0 +320: + mean: 1.0 + p99: 3.0 + std: 1.0 +322: + mean: 1.0 + p99: 3.0 + std: 1.0 +323: + mean: 1.0 + p99: 3.0 + std: 1.0 +326: + mean: 1.0 + p99: 3.0 + std: 1.0 +327: + mean: 1.0 + p99: 3.0 + std: 1.0 +328: + mean: 1.0 + p99: 3.0 + std: 1.0 +329: + mean: 1.0 + p99: 3.0 + std: 1.0 +330: + mean: 1.0 + p99: 3.0 + std: 1.0 +331: + mean: 1.0 + p99: 3.0 + std: 1.0 +332: + mean: 1.0 + p99: 3.0 + std: 1.0 +333: + mean: 1.0 + p99: 3.0 + std: 1.0 +335: + mean: 1.0 + p99: 3.0 + std: 1.0 +336: + mean: 1.0 + p99: 3.0 + std: 1.0 +337: + mean: 1.0 + p99: 3.0 + std: 1.0 +339: + mean: 1.0 + p99: 3.0 + std: 1.0 +340: + mean: 1.0 + p99: 3.0 + std: 1.0 +341: + mean: 1.0 + p99: 3.0 + std: 1.0 +342: + mean: 1.0 + p99: 3.0 + std: 1.0 +344: + mean: 1.0 + p99: 3.0 + std: 1.0 +345: + mean: 1.0 + p99: 3.0 + std: 1.0 +346: + mean: 1.0 + p99: 3.0 + std: 1.0 +347: + mean: 1.0 + p99: 3.0 + std: 1.0 +349: + mean: 1.0 + p99: 3.0 + std: 1.0 +350: + mean: 1.0 + p99: 3.0 + std: 1.0 +351: + mean: 1.0 + p99: 3.0 + std: 1.0 +352: + mean: 1.0 + p99: 3.0 + std: 1.0 +353: + mean: 1.0 + p99: 3.0 + std: 1.0 +354: + mean: 1.0 + p99: 3.0 + std: 1.0 +355: + mean: 1.0 + p99: 3.0 + std: 1.0 +356: + mean: 1.0 + p99: 3.0 + std: 1.0 +358: + mean: 1.0 + p99: 3.0 + std: 1.0 +402: + mean: 1.0 + p99: 3.0 + std: 1.0 +403: + mean: 1.0 + p99: 3.0 + std: 1.0 +404: + mean: 1.0 + p99: 3.0 + std: 1.0 +406: + mean: 1.0 + p99: 3.0 + std: 1.0 +407: + mean: 1.0 + p99: 3.0 + std: 1.0 +411: + mean: 1.0 + p99: 3.0 + std: 1.0 +412: + mean: 1.0 + p99: 3.0 + std: 1.0 +413: + mean: 1.0 + p99: 3.0 + std: 1.0 +414: + mean: 1.0 + p99: 3.0 + std: 1.0 +417: + mean: 1.0 + p99: 3.0 + std: 1.0 +418: + mean: 1.0 + p99: 3.0 + std: 1.0 +421: + mean: 1.0 + p99: 3.0 + std: 1.0 +422: + mean: 1.0 + p99: 3.0 + std: 1.0 +424: + mean: 1.0 + p99: 3.0 + std: 1.0 +425: + mean: 1.0 + p99: 3.0 + std: 1.0 +427: + mean: 1.0 + p99: 3.0 + std: 1.0 +430: + mean: 1.0 + p99: 3.0 + std: 1.0 +431: + mean: 1.0 + p99: 3.0 + std: 1.0 +432: + mean: 1.0 + p99: 3.0 + std: 1.0 +433: + mean: 1.0 + p99: 3.0 + std: 1.0 +434: + mean: 1.0 + p99: 3.0 + std: 1.0 +435: + mean: 1.0 + p99: 3.0 + std: 1.0 +436: + mean: 1.0 + p99: 3.0 + std: 1.0 +437: + mean: 1.0 + p99: 3.0 + std: 1.0 +438: + mean: 1.0 + p99: 3.0 + std: 1.0 +439: + mean: 1.0 + p99: 3.0 + std: 1.0 +441: + mean: 1.0 + p99: 3.0 + std: 1.0 +442: + mean: 1.0 + p99: 3.0 + std: 1.0 +444: + mean: 1.0 + p99: 3.0 + std: 1.0 +445: + mean: 1.0 + p99: 3.0 + std: 1.0 +446: + mean: 1.0 + p99: 3.0 + std: 1.0 +447: + mean: 1.0 + p99: 3.0 + std: 1.0 +448: + mean: 1.0 + p99: 3.0 + std: 1.0 +454: + mean: 1.0 + p99: 3.0 + std: 1.0 +455: + mean: 1.0 + p99: 3.0 + std: 1.0 +456: + mean: 1.0 + p99: 3.0 + std: 1.0 +457: + mean: 1.0 + p99: 3.0 + std: 1.0 +458: + mean: 1.0 + p99: 3.0 + std: 1.0 +459: + mean: 1.0 + p99: 3.0 + std: 1.0 +470: + mean: 1.0 + p99: 3.0 + std: 1.0 +472: + mean: 1.0 + p99: 3.0 + std: 1.0 +473: + mean: 1.0 + p99: 3.0 + std: 1.0 +478: + mean: 1.0 + p99: 3.0 + std: 1.0 +479: + mean: 1.0 + p99: 3.0 + std: 1.0 +481: + mean: 1.0 + p99: 3.0 + std: 1.0 +493: + mean: 1.0 + p99: 3.0 + std: 1.0 +494: + mean: 1.0 + p99: 3.0 + std: 1.0 +495: + mean: 1.0 + p99: 3.0 + std: 1.0 +496: + mean: 1.0 + p99: 3.0 + std: 1.0 +497: + mean: 1.0 + p99: 3.0 + std: 1.0 +498: + mean: 1.0 + p99: 3.0 + std: 1.0 +499: + mean: 1.0 + p99: 3.0 + std: 1.0 +500: + mean: 1.0 + p99: 3.0 + std: 1.0 +501: + mean: 1.0 + p99: 3.0 + std: 1.0 +502: + mean: 1.0 + p99: 3.0 + std: 1.0 +503: + mean: 1.0 + p99: 3.0 + std: 1.0 +504: + mean: 1.0 + p99: 3.0 + std: 1.0 +505: + mean: 1.0 + p99: 3.0 + std: 1.0 +507: + mean: 1.0 + p99: 3.0 + std: 1.0 +509: + mean: 1.0 + p99: 3.0 + std: 1.0 +510: + mean: 1.0 + p99: 3.0 + std: 1.0 +511: + mean: 1.0 + p99: 3.0 + std: 1.0 +512: + mean: 1.0 + p99: 3.0 + std: 1.0 +514: + mean: 1.0 + p99: 3.0 + std: 1.0 +515: + mean: 1.0 + p99: 3.0 + std: 1.0 +526: + mean: 1.0 + p99: 3.0 + std: 1.0 +527: + mean: 1.0 + p99: 3.0 + std: 1.0 +544: + mean: 1.0 + p99: 3.0 + std: 1.0 +545: + mean: 1.0 + p99: 3.0 + std: 1.0 +546: + mean: 1.0 + p99: 3.0 + std: 1.0 +548: + mean: 1.0 + p99: 3.0 + std: 1.0 +549: + mean: 1.0 + p99: 3.0 + std: 1.0 +551: + mean: 1.0 + p99: 3.0 + std: 1.0 +552: + mean: 1.0 + p99: 3.0 + std: 1.0 +553: + mean: 1.0 + p99: 3.0 + std: 1.0 +557: + mean: 1.0 + p99: 3.0 + std: 1.0 +558: + mean: 1.0 + p99: 3.0 + std: 1.0 +559: + mean: 1.0 + p99: 3.0 + std: 1.0 +560: + mean: 1.0 + p99: 3.0 + std: 1.0 +561: + mean: 1.0 + p99: 3.0 + std: 1.0 +562: + mean: 1.0 + p99: 3.0 + std: 1.0 +563: + mean: 1.0 + p99: 3.0 + std: 1.0 +564: + mean: 1.0 + p99: 3.0 + std: 1.0 +565: + mean: 1.0 + p99: 3.0 + std: 1.0 +566: + mean: 1.0 + p99: 3.0 + std: 1.0 +567: + mean: 1.0 + p99: 3.0 + std: 1.0 +568: + mean: 1.0 + p99: 3.0 + std: 1.0 +570: + mean: 1.0 + p99: 3.0 + std: 1.0 +571: + mean: 1.0 + p99: 3.0 + std: 1.0 +572: + mean: 1.0 + p99: 3.0 + std: 1.0 +573: + mean: 1.0 + p99: 3.0 + std: 1.0 +574: + mean: 1.0 + p99: 3.0 + std: 1.0 +575: + mean: 1.0 + p99: 3.0 + std: 1.0 +576: + mean: 1.0 + p99: 3.0 + std: 1.0 +577: + mean: 1.0 + p99: 3.0 + std: 1.0 +578: + mean: 1.0 + p99: 3.0 + std: 1.0 +580: + mean: 1.0 + p99: 3.0 + std: 1.0 +582: + mean: 1.0 + p99: 3.0 + std: 1.0 +583: + mean: 1.0 + p99: 3.0 + std: 1.0 +584: + mean: 1.0 + p99: 3.0 + std: 1.0 +587: + mean: 1.0 + p99: 3.0 + std: 1.0 +588: + mean: 1.0 + p99: 3.0 + std: 1.0 +589: + mean: 1.0 + p99: 3.0 + std: 1.0 +590: + mean: 1.0 + p99: 3.0 + std: 1.0 +592: + mean: 1.0 + p99: 3.0 + std: 1.0 +593: + mean: 1.0 + p99: 3.0 + std: 1.0 +596: + mean: 1.0 + p99: 3.0 + std: 1.0 +598: + mean: 1.0 + p99: 3.0 + std: 1.0 +599: + mean: 1.0 + p99: 3.0 + std: 1.0 +600: + mean: 1.0 + p99: 3.0 + std: 1.0 +602: + mean: 1.0 + p99: 3.0 + std: 1.0 +628: + mean: 1.0 + p99: 3.0 + std: 1.0 +631: + mean: 1.0 + p99: 3.0 + std: 1.0 +632: + mean: 1.0 + p99: 3.0 + std: 1.0 +634: + mean: 1.0 + p99: 3.0 + std: 1.0 +635: + mean: 1.0 + p99: 3.0 + std: 1.0 +636: + mean: 1.0 + p99: 3.0 + std: 1.0 +638: + mean: 1.0 + p99: 3.0 + std: 1.0 +639: + mean: 1.0 + p99: 3.0 + std: 1.0 +640: + mean: 1.0 + p99: 3.0 + std: 1.0 +641: + mean: 1.0 + p99: 3.0 + std: 1.0 +642: + mean: 1.0 + p99: 3.0 + std: 1.0 +643: + mean: 1.0 + p99: 3.0 + std: 1.0 +644: + mean: 1.0 + p99: 3.0 + std: 1.0 +646: + mean: 1.0 + p99: 3.0 + std: 1.0 +647: + mean: 1.0 + p99: 3.0 + std: 1.0 +649: + mean: 1.0 + p99: 3.0 + std: 1.0 +650: + mean: 1.0 + p99: 3.0 + std: 1.0 +653: + mean: 1.0 + p99: 3.0 + std: 1.0 +655: + mean: 1.0 + p99: 3.0 + std: 1.0 +656: + mean: 1.0 + p99: 3.0 + std: 1.0 +657: + mean: 1.0 + p99: 3.0 + std: 1.0 +658: + mean: 1.0 + p99: 3.0 + std: 1.0 +659: + mean: 1.0 + p99: 3.0 + std: 1.0 +660: + mean: 1.0 + p99: 3.0 + std: 1.0 +661: + mean: 1.0 + p99: 3.0 + std: 1.0 +662: + mean: 1.0 + p99: 3.0 + std: 1.0 +663: + mean: 1.0 + p99: 3.0 + std: 1.0 +664: + mean: 1.0 + p99: 3.0 + std: 1.0 +665: + mean: 1.0 + p99: 3.0 + std: 1.0 +666: + mean: 1.0 + p99: 3.0 + std: 1.0 +667: + mean: 1.0 + p99: 3.0 + std: 1.0 +668: + mean: 1.0 + p99: 3.0 + std: 1.0 +669: + mean: 1.0 + p99: 3.0 + std: 1.0 +670: + mean: 1.0 + p99: 3.0 + std: 1.0 +671: + mean: 1.0 + p99: 3.0 + std: 1.0 +672: + mean: 1.0 + p99: 3.0 + std: 1.0 +673: + mean: 1.0 + p99: 3.0 + std: 1.0 +674: + mean: 1.0 + p99: 3.0 + std: 1.0 +675: + mean: 1.0 + p99: 3.0 + std: 1.0 +676: + mean: 1.0 + p99: 3.0 + std: 1.0 +677: + mean: 1.0 + p99: 3.0 + std: 1.0 +680: + mean: 1.0 + p99: 3.0 + std: 1.0 +683: + mean: 1.0 + p99: 3.0 + std: 1.0 +684: + mean: 1.0 + p99: 3.0 + std: 1.0 +685: + mean: 1.0 + p99: 3.0 + std: 1.0 +686: + mean: 1.0 + p99: 3.0 + std: 1.0 +687: + mean: 1.0 + p99: 3.0 + std: 1.0 +688: + mean: 1.0 + p99: 3.0 + std: 1.0 +689: + mean: 1.0 + p99: 3.0 + std: 1.0 +690: + mean: 1.0 + p99: 3.0 + std: 1.0 +692: + mean: 1.0 + p99: 3.0 + std: 1.0 +693: + mean: 1.0 + p99: 3.0 + std: 1.0 +696: + mean: 1.0 + p99: 3.0 + std: 1.0 +697: + mean: 1.0 + p99: 3.0 + std: 1.0 +698: + mean: 1.0 + p99: 3.0 + std: 1.0 +699: + mean: 1.0 + p99: 3.0 + std: 1.0 +700: + mean: 1.0 + p99: 3.0 + std: 1.0 +701: + mean: 1.0 + p99: 3.0 + std: 1.0 +702: + mean: 1.0 + p99: 3.0 + std: 1.0 +703: + mean: 1.0 + p99: 3.0 + std: 1.0 +704: + mean: 1.0 + p99: 3.0 + std: 1.0 +705: + mean: 1.0 + p99: 3.0 + std: 1.0 +706: + mean: 1.0 + p99: 3.0 + std: 1.0 +707: + mean: 1.0 + p99: 3.0 + std: 1.0 +708: + mean: 1.0 + p99: 3.0 + std: 1.0 +709: + mean: 1.0 + p99: 3.0 + std: 1.0 +710: + mean: 1.0 + p99: 3.0 + std: 1.0 +711: + mean: 1.0 + p99: 3.0 + std: 1.0 +712: + mean: 1.0 + p99: 3.0 + std: 1.0 +713: + mean: 1.0 + p99: 3.0 + std: 1.0 +714: + mean: 1.0 + p99: 3.0 + std: 1.0 +715: + mean: 1.0 + p99: 3.0 + std: 1.0 +716: + mean: 1.0 + p99: 3.0 + std: 1.0 +717: + mean: 1.0 + p99: 3.0 + std: 1.0 +718: + mean: 1.0 + p99: 3.0 + std: 1.0 +719: + mean: 1.0 + p99: 3.0 + std: 1.0 +720: + mean: 1.0 + p99: 3.0 + std: 1.0 +722: + mean: 1.0 + p99: 3.0 + std: 1.0 +723: + mean: 1.0 + p99: 3.0 + std: 1.0 +724: + mean: 1.0 + p99: 3.0 + std: 1.0 +725: + mean: 1.0 + p99: 3.0 + std: 1.0 +726: + mean: 1.0 + p99: 3.0 + std: 1.0 +729: + mean: 1.0 + p99: 3.0 + std: 1.0 +732: + mean: 1.0 + p99: 3.0 + std: 1.0 +733: + mean: 1.0 + p99: 3.0 + std: 1.0 +734: + mean: 1.0 + p99: 3.0 + std: 1.0 +735: + mean: 1.0 + p99: 3.0 + std: 1.0 +736: + mean: 1.0 + p99: 3.0 + std: 1.0 +738: + mean: 1.0 + p99: 3.0 + std: 1.0 +741: + mean: 1.0 + p99: 3.0 + std: 1.0 +743: + mean: 1.0 + p99: 3.0 + std: 1.0 +745: + mean: 1.0 + p99: 3.0 + std: 1.0 +746: + mean: 1.0 + p99: 3.0 + std: 1.0 +749: + mean: 1.0 + p99: 3.0 + std: 1.0 +751: + mean: 1.0 + p99: 3.0 + std: 1.0 +752: + mean: 1.0 + p99: 3.0 + std: 1.0 +753: + mean: 1.0 + p99: 3.0 + std: 1.0 +755: + mean: 1.0 + p99: 3.0 + std: 1.0 +756: + mean: 1.0 + p99: 3.0 + std: 1.0 +757: + mean: 1.0 + p99: 3.0 + std: 1.0 +758: + mean: 1.0 + p99: 3.0 + std: 1.0 +759: + mean: 1.0 + p99: 3.0 + std: 1.0 +761: + mean: 1.0 + p99: 3.0 + std: 1.0 +762: + mean: 1.0 + p99: 3.0 + std: 1.0 +768: + mean: 1.0 + p99: 3.0 + std: 1.0 +769: + mean: 1.0 + p99: 3.0 + std: 1.0 +776: + mean: 1.0 + p99: 3.0 + std: 1.0 +777: + mean: 1.0 + p99: 3.0 + std: 1.0 +778: + mean: 1.0 + p99: 3.0 + std: 1.0 +779: + mean: 1.0 + p99: 3.0 + std: 1.0 +781: + mean: 1.0 + p99: 3.0 + std: 1.0 +782: + mean: 1.0 + p99: 3.0 + std: 1.0 +785: + mean: 1.0 + p99: 3.0 + std: 1.0 +788: + mean: 1.0 + p99: 3.0 + std: 1.0 +790: + mean: 1.0 + p99: 3.0 + std: 1.0 +791: + mean: 1.0 + p99: 3.0 + std: 1.0 +792: + mean: 1.0 + p99: 3.0 + std: 1.0 +794: + mean: 1.0 + p99: 3.0 + std: 1.0 +796: + mean: 1.0 + p99: 3.0 + std: 1.0 +801: + mean: 1.0 + p99: 3.0 + std: 1.0 +802: + mean: 1.0 + p99: 3.0 + std: 1.0 +803: + mean: 1.0 + p99: 3.0 + std: 1.0 +804: + mean: 1.0 + p99: 3.0 + std: 1.0 +811: + mean: 1.0 + p99: 3.0 + std: 1.0 +812: + mean: 1.0 + p99: 3.0 + std: 1.0 +813: + mean: 1.0 + p99: 3.0 + std: 1.0 +814: + mean: 1.0 + p99: 3.0 + std: 1.0 +837: + mean: 1.0 + p99: 3.0 + std: 1.0 +843: + mean: 1.0 + p99: 3.0 + std: 1.0 +844: + mean: 1.0 + p99: 3.0 + std: 1.0 +845: + mean: 1.0 + p99: 3.0 + std: 1.0 +846: + mean: 1.0 + p99: 3.0 + std: 1.0 +847: + mean: 1.0 + p99: 3.0 + std: 1.0 +852: + mean: 1.0 + p99: 3.0 + std: 1.0 +870: + mean: 1.0 + p99: 3.0 + std: 1.0 +874: + mean: 1.0 + p99: 3.0 + std: 1.0 +876: + mean: 1.0 + p99: 3.0 + std: 1.0 +877: + mean: 1.0 + p99: 3.0 + std: 1.0 +878: + mean: 1.0 + p99: 3.0 + std: 1.0 +879: + mean: 1.0 + p99: 3.0 + std: 1.0 +880: + mean: 1.0 + p99: 3.0 + std: 1.0 +938: + mean: 1.0 + p99: 3.0 + std: 1.0 +939: + mean: 1.0 + p99: 3.0 + std: 1.0 +940: + mean: 1.0 + p99: 3.0 + std: 1.0 +941: + mean: 1.0 + p99: 3.0 + std: 1.0 +942: + mean: 1.0 + p99: 3.0 + std: 1.0 +943: + mean: 1.0 + p99: 3.0 + std: 1.0 +945: + mean: 1.0 + p99: 3.0 + std: 1.0 +946: + mean: 1.0 + p99: 3.0 + std: 1.0 +947: + mean: 1.0 + p99: 3.0 + std: 1.0 +948: + mean: 1.0 + p99: 3.0 + std: 1.0 +949: + mean: 1.0 + p99: 3.0 + std: 1.0 +950: + mean: 1.0 + p99: 3.0 + std: 1.0 +952: + mean: 1.0 + p99: 3.0 + std: 1.0 +953: + mean: 1.0 + p99: 3.0 + std: 1.0 +954: + mean: 1.0 + p99: 3.0 + std: 1.0 +955: + mean: 1.0 + p99: 3.0 + std: 1.0 +956: + mean: 1.0 + p99: 3.0 + std: 1.0 +957: + mean: 1.0 + p99: 3.0 + std: 1.0 +958: + mean: 1.0 + p99: 3.0 + std: 1.0 +959: + mean: 1.0 + p99: 3.0 + std: 1.0 +961: + mean: 1.0 + p99: 3.0 + std: 1.0 +962: + mean: 1.0 + p99: 3.0 + std: 1.0 +963: + mean: 1.0 + p99: 3.0 + std: 1.0 +964: + mean: 1.0 + p99: 3.0 + std: 1.0 +965: + mean: 1.0 + p99: 3.0 + std: 1.0 +966: + mean: 1.0 + p99: 3.0 + std: 1.0 +967: + mean: 1.0 + p99: 3.0 + std: 1.0 +968: + mean: 1.0 + p99: 3.0 + std: 1.0 +971: + mean: 1.0 + p99: 3.0 + std: 1.0 +972: + mean: 1.0 + p99: 3.0 + std: 1.0 +973: + mean: 1.0 + p99: 3.0 + std: 1.0 +975: + mean: 1.0 + p99: 3.0 + std: 1.0 +976: + mean: 1.0 + p99: 3.0 + std: 1.0 +977: + mean: 1.0 + p99: 3.0 + std: 1.0 +978: + mean: 1.0 + p99: 3.0 + std: 1.0 +989: + mean: 1.0 + p99: 3.0 + std: 1.0 +990: + mean: 1.0 + p99: 3.0 + std: 1.0 +992: + mean: 1.0 + p99: 3.0 + std: 1.0 +995: + mean: 1.0 + p99: 3.0 + std: 1.0 +996: + mean: 1.0 + p99: 3.0 + std: 1.0 +997: + mean: 1.0 + p99: 3.0 + std: 1.0 +999: + mean: 1.0 + p99: 3.0 + std: 1.0 +1000: + mean: 1.0 + p99: 3.0 + std: 1.0 +1001: + mean: 1.0 + p99: 3.0 + std: 1.0 +1002: + mean: 1.0 + p99: 3.0 + std: 1.0 +1013: + mean: 1.0 + p99: 3.0 + std: 1.0 +1014: + mean: 1.0 + p99: 3.0 + std: 1.0 +1020: + mean: 1.0 + p99: 3.0 + std: 1.0 +1021: + mean: 1.0 + p99: 3.0 + std: 1.0 +1023: + mean: 1.0 + p99: 3.0 + std: 1.0 +1024: + mean: 1.0 + p99: 3.0 + std: 1.0 +1025: + mean: 1.0 + p99: 3.0 + std: 1.0 +1027: + mean: 1.0 + p99: 3.0 + std: 1.0 +1029: + mean: 1.0 + p99: 3.0 + std: 1.0 +1030: + mean: 1.0 + p99: 3.0 + std: 1.0 +1031: + mean: 1.0 + p99: 3.0 + std: 1.0 +1032: + mean: 1.0 + p99: 3.0 + std: 1.0 +1033: + mean: 1.0 + p99: 3.0 + std: 1.0 +1034: + mean: 1.0 + p99: 3.0 + std: 1.0 +1035: + mean: 1.0 + p99: 3.0 + std: 1.0 +1036: + mean: 1.0 + p99: 3.0 + std: 1.0 +1045: + mean: 1.0 + p99: 3.0 + std: 1.0 +1046: + mean: 1.0 + p99: 3.0 + std: 1.0 +1047: + mean: 1.0 + p99: 3.0 + std: 1.0 +1048: + mean: 1.0 + p99: 3.0 + std: 1.0 +1049: + mean: 1.0 + p99: 3.0 + std: 1.0 +1050: + mean: 1.0 + p99: 3.0 + std: 1.0 +1051: + mean: 1.0 + p99: 3.0 + std: 1.0 +1059: + mean: 1.0 + p99: 3.0 + std: 1.0 +1061: + mean: 1.0 + p99: 3.0 + std: 1.0 +1063: + mean: 1.0 + p99: 3.0 + std: 1.0 +1065: + mean: 1.0 + p99: 3.0 + std: 1.0 +1067: + mean: 1.0 + p99: 3.0 + std: 1.0 +1069: + mean: 1.0 + p99: 3.0 + std: 1.0 +1072: + mean: 1.0 + p99: 3.0 + std: 1.0 +1073: + mean: 1.0 + p99: 3.0 + std: 1.0 +1075: + mean: 1.0 + p99: 3.0 + std: 1.0 +1076: + mean: 1.0 + p99: 3.0 + std: 1.0 +1078: + mean: 1.0 + p99: 3.0 + std: 1.0 +1082: + mean: 1.0 + p99: 3.0 + std: 1.0 +1083: + mean: 1.0 + p99: 3.0 + std: 1.0 +1084: + mean: 1.0 + p99: 3.0 + std: 1.0 +1090: + mean: 1.0 + p99: 3.0 + std: 1.0 +1091: + mean: 1.0 + p99: 3.0 + std: 1.0 +1092: + mean: 1.0 + p99: 3.0 + std: 1.0 +1096: + mean: 1.0 + p99: 3.0 + std: 1.0 +1099: + mean: 1.0 + p99: 3.0 + std: 1.0 +1101: + mean: 1.0 + p99: 3.0 + std: 1.0 +1102: + mean: 1.0 + p99: 3.0 + std: 1.0 +1103: + mean: 1.0 + p99: 3.0 + std: 1.0 +1104: + mean: 1.0 + p99: 3.0 + std: 1.0 +1105: + mean: 1.0 + p99: 3.0 + std: 1.0 +1106: + mean: 1.0 + p99: 3.0 + std: 1.0 +1107: + mean: 1.0 + p99: 3.0 + std: 1.0 +1108: + mean: 1.0 + p99: 3.0 + std: 1.0 +1113: + mean: 1.0 + p99: 3.0 + std: 1.0 +1114: + mean: 1.0 + p99: 3.0 + std: 1.0 +1115: + mean: 1.0 + p99: 3.0 + std: 1.0 +1116: + mean: 1.0 + p99: 3.0 + std: 1.0 +1117: + mean: 1.0 + p99: 3.0 + std: 1.0 +1120: + mean: 1.0 + p99: 3.0 + std: 1.0 +1122: + mean: 1.0 + p99: 3.0 + std: 1.0 +1123: + mean: 1.0 + p99: 3.0 + std: 1.0 +1128: + mean: 1.0 + p99: 3.0 + std: 1.0 +1129: + mean: 1.0 + p99: 3.0 + std: 1.0 +1130: + mean: 1.0 + p99: 3.0 + std: 1.0 +1131: + mean: 1.0 + p99: 3.0 + std: 1.0 +1132: + mean: 1.0 + p99: 3.0 + std: 1.0 +1133: + mean: 1.0 + p99: 3.0 + std: 1.0 +1134: + mean: 1.0 + p99: 3.0 + std: 1.0 +1136: + mean: 1.0 + p99: 3.0 + std: 1.0 +1138: + mean: 1.0 + p99: 3.0 + std: 1.0 +1141: + mean: 1.0 + p99: 3.0 + std: 1.0 +1142: + mean: 1.0 + p99: 3.0 + std: 1.0 +1144: + mean: 1.0 + p99: 3.0 + std: 1.0 +1145: + mean: 1.0 + p99: 3.0 + std: 1.0 +1146: + mean: 1.0 + p99: 3.0 + std: 1.0 +1148: + mean: 1.0 + p99: 3.0 + std: 1.0 +1149: + mean: 1.0 + p99: 3.0 + std: 1.0 +1150: + mean: 1.0 + p99: 3.0 + std: 1.0 +1151: + mean: 1.0 + p99: 3.0 + std: 1.0 +1152: + mean: 1.0 + p99: 3.0 + std: 1.0 +1153: + mean: 1.0 + p99: 3.0 + std: 1.0 +1154: + mean: 1.0 + p99: 3.0 + std: 1.0 +1155: + mean: 1.0 + p99: 3.0 + std: 1.0 +1156: + mean: 1.0 + p99: 3.0 + std: 1.0 +1157: + mean: 1.0 + p99: 3.0 + std: 1.0 +1158: + mean: 1.0 + p99: 3.0 + std: 1.0 +1159: + mean: 1.0 + p99: 3.0 + std: 1.0 +1160: + mean: 1.0 + p99: 3.0 + std: 1.0 +1161: + mean: 1.0 + p99: 3.0 + std: 1.0 +1162: + mean: 1.0 + p99: 3.0 + std: 1.0 +1163: + mean: 1.0 + p99: 3.0 + std: 1.0 +1165: + mean: 1.0 + p99: 3.0 + std: 1.0 +1168: + mean: 1.0 + p99: 3.0 + std: 1.0 +1171: + mean: 1.0 + p99: 3.0 + std: 1.0 +1172: + mean: 1.0 + p99: 3.0 + std: 1.0 +1181: + mean: 1.0 + p99: 3.0 + std: 1.0 +1182: + mean: 1.0 + p99: 3.0 + std: 1.0 +1183: + mean: 1.0 + p99: 3.0 + std: 1.0 +1184: + mean: 1.0 + p99: 3.0 + std: 1.0 +1185: + mean: 1.0 + p99: 3.0 + std: 1.0 +1186: + mean: 1.0 + p99: 3.0 + std: 1.0 +1187: + mean: 1.0 + p99: 3.0 + std: 1.0 +1189: + mean: 1.0 + p99: 3.0 + std: 1.0 +1190: + mean: 1.0 + p99: 3.0 + std: 1.0 +1191: + mean: 1.0 + p99: 3.0 + std: 1.0 +1193: + mean: 1.0 + p99: 3.0 + std: 1.0 +1194: + mean: 1.0 + p99: 3.0 + std: 1.0 +1195: + mean: 1.0 + p99: 3.0 + std: 1.0 +1196: + mean: 1.0 + p99: 3.0 + std: 1.0 +1202: + mean: 1.0 + p99: 3.0 + std: 1.0 +1205: + mean: 1.0 + p99: 3.0 + std: 1.0 +1207: + mean: 1.0 + p99: 3.0 + std: 1.0 +1208: + mean: 1.0 + p99: 3.0 + std: 1.0 +1209: + mean: 1.0 + p99: 3.0 + std: 1.0 +1211: + mean: 1.0 + p99: 3.0 + std: 1.0 +1212: + mean: 1.0 + p99: 3.0 + std: 1.0 +1213: + mean: 1.0 + p99: 3.0 + std: 1.0 +1214: + mean: 1.0 + p99: 3.0 + std: 1.0 +1215: + mean: 1.0 + p99: 3.0 + std: 1.0 +1216: + mean: 1.0 + p99: 3.0 + std: 1.0 +1217: + mean: 1.0 + p99: 3.0 + std: 1.0 +1218: + mean: 1.0 + p99: 3.0 + std: 1.0 +1219: + mean: 1.0 + p99: 3.0 + std: 1.0 +1220: + mean: 1.0 + p99: 3.0 + std: 1.0 +1221: + mean: 1.0 + p99: 3.0 + std: 1.0 +1222: + mean: 1.0 + p99: 3.0 + std: 1.0 +1224: + mean: 1.0 + p99: 3.0 + std: 1.0 +1226: + mean: 1.0 + p99: 3.0 + std: 1.0 +1230: + mean: 1.0 + p99: 3.0 + std: 1.0 +1231: + mean: 1.0 + p99: 3.0 + std: 1.0 +1232: + mean: 1.0 + p99: 3.0 + std: 1.0 +1233: + mean: 1.0 + p99: 3.0 + std: 1.0 +1234: + mean: 1.0 + p99: 3.0 + std: 1.0 +1235: + mean: 1.0 + p99: 3.0 + std: 1.0 +1236: + mean: 1.0 + p99: 3.0 + std: 1.0 +1237: + mean: 1.0 + p99: 3.0 + std: 1.0 +1238: + mean: 1.0 + p99: 3.0 + std: 1.0 +1239: + mean: 1.0 + p99: 3.0 + std: 1.0 +1242: + mean: 1.0 + p99: 3.0 + std: 1.0 +1243: + mean: 1.0 + p99: 3.0 + std: 1.0 +1244: + mean: 1.0 + p99: 3.0 + std: 1.0 +1246: + mean: 1.0 + p99: 3.0 + std: 1.0 +1247: + mean: 1.0 + p99: 3.0 + std: 1.0 +1248: + mean: 1.0 + p99: 3.0 + std: 1.0 +1249: + mean: 1.0 + p99: 3.0 + std: 1.0 +1250: + mean: 1.0 + p99: 3.0 + std: 1.0 +1251: + mean: 1.0 + p99: 3.0 + std: 1.0 +1252: + mean: 1.0 + p99: 3.0 + std: 1.0 +1253: + mean: 1.0 + p99: 3.0 + std: 1.0 +1255: + mean: 1.0 + p99: 3.0 + std: 1.0 +1256: + mean: 1.0 + p99: 3.0 + std: 1.0 +1258: + mean: 1.0 + p99: 3.0 + std: 1.0 +1259: + mean: 1.0 + p99: 3.0 + std: 1.0 +1260: + mean: 1.0 + p99: 3.0 + std: 1.0 +1261: + mean: 1.0 + p99: 3.0 + std: 1.0 +1262: + mean: 1.0 + p99: 3.0 + std: 1.0 +1263: + mean: 1.0 + p99: 3.0 + std: 1.0 +1264: + mean: 1.0 + p99: 3.0 + std: 1.0 +1265: + mean: 1.0 + p99: 3.0 + std: 1.0 +1266: + mean: 1.0 + p99: 3.0 + std: 1.0 +1267: + mean: 1.0 + p99: 3.0 + std: 1.0 +1270: + mean: 1.0 + p99: 3.0 + std: 1.0 +1271: + mean: 1.0 + p99: 3.0 + std: 1.0 +1272: + mean: 1.0 + p99: 3.0 + std: 1.0 +1273: + mean: 1.0 + p99: 3.0 + std: 1.0 +1274: + mean: 1.0 + p99: 3.0 + std: 1.0 +1276: + mean: 1.0 + p99: 3.0 + std: 1.0 +1277: + mean: 1.0 + p99: 3.0 + std: 1.0 +1280: + mean: 1.0 + p99: 3.0 + std: 1.0 +1281: + mean: 1.0 + p99: 3.0 + std: 1.0 +1282: + mean: 1.0 + p99: 3.0 + std: 1.0 +1284: + mean: 1.0 + p99: 3.0 + std: 1.0 +1285: + mean: 1.0 + p99: 3.0 + std: 1.0 +1286: + mean: 1.0 + p99: 3.0 + std: 1.0 +1287: + mean: 1.0 + p99: 3.0 + std: 1.0 +1288: + mean: 1.0 + p99: 3.0 + std: 1.0 +1291: + mean: 1.0 + p99: 3.0 + std: 1.0 +1292: + mean: 1.0 + p99: 3.0 + std: 1.0 +1294: + mean: 1.0 + p99: 3.0 + std: 1.0 +1295: + mean: 1.0 + p99: 3.0 + std: 1.0 +1296: + mean: 1.0 + p99: 3.0 + std: 1.0 +1298: + mean: 1.0 + p99: 3.0 + std: 1.0 +1300: + mean: 1.0 + p99: 3.0 + std: 1.0 +1301: + mean: 1.0 + p99: 3.0 + std: 1.0 +1302: + mean: 1.0 + p99: 3.0 + std: 1.0 +1303: + mean: 1.0 + p99: 3.0 + std: 1.0 +1304: + mean: 1.0 + p99: 3.0 + std: 1.0 +1305: + mean: 1.0 + p99: 3.0 + std: 1.0 +1306: + mean: 1.0 + p99: 3.0 + std: 1.0 +1307: + mean: 1.0 + p99: 3.0 + std: 1.0 +1308: + mean: 1.0 + p99: 3.0 + std: 1.0 +1310: + mean: 1.0 + p99: 3.0 + std: 1.0 +1311: + mean: 1.0 + p99: 3.0 + std: 1.0 +1312: + mean: 1.0 + p99: 3.0 + std: 1.0 +1313: + mean: 1.0 + p99: 3.0 + std: 1.0 +1314: + mean: 1.0 + p99: 3.0 + std: 1.0 +1315: + mean: 1.0 + p99: 3.0 + std: 1.0 +1316: + mean: 1.0 + p99: 3.0 + std: 1.0 +1317: + mean: 1.0 + p99: 3.0 + std: 1.0 +1318: + mean: 1.0 + p99: 3.0 + std: 1.0 +1319: + mean: 1.0 + p99: 3.0 + std: 1.0 +1320: + mean: 1.0 + p99: 3.0 + std: 1.0 +1321: + mean: 1.0 + p99: 3.0 + std: 1.0 +1322: + mean: 1.0 + p99: 3.0 + std: 1.0 +1323: + mean: 1.0 + p99: 3.0 + std: 1.0 +1324: + mean: 1.0 + p99: 3.0 + std: 1.0 +1325: + mean: 1.0 + p99: 3.0 + std: 1.0 +1326: + mean: 1.0 + p99: 3.0 + std: 1.0 +1327: + mean: 1.0 + p99: 3.0 + std: 1.0 +1329: + mean: 1.0 + p99: 3.0 + std: 1.0 +1330: + mean: 1.0 + p99: 3.0 + std: 1.0 +1331: + mean: 1.0 + p99: 3.0 + std: 1.0 +1333: + mean: 1.0 + p99: 3.0 + std: 1.0 +1334: + mean: 1.0 + p99: 3.0 + std: 1.0 +1336: + mean: 1.0 + p99: 3.0 + std: 1.0 +1337: + mean: 1.0 + p99: 3.0 + std: 1.0 +1338: + mean: 1.0 + p99: 3.0 + std: 1.0 +1340: + mean: 1.0 + p99: 3.0 + std: 1.0 +1342: + mean: 1.0 + p99: 3.0 + std: 1.0 +1344: + mean: 1.0 + p99: 3.0 + std: 1.0 +1346: + mean: 1.0 + p99: 3.0 + std: 1.0 +1349: + mean: 1.0 + p99: 3.0 + std: 1.0 +1352: + mean: 1.0 + p99: 3.0 + std: 1.0 +1355: + mean: 1.0 + p99: 3.0 + std: 1.0 +1356: + mean: 1.0 + p99: 3.0 + std: 1.0 +1358: + mean: 1.0 + p99: 3.0 + std: 1.0 +1360: + mean: 1.0 + p99: 3.0 + std: 1.0 +1364: + mean: 1.0 + p99: 3.0 + std: 1.0 +1365: + mean: 1.0 + p99: 3.0 + std: 1.0 +1366: + mean: 1.0 + p99: 3.0 + std: 1.0 +1367: + mean: 1.0 + p99: 3.0 + std: 1.0 +1369: + mean: 1.0 + p99: 3.0 + std: 1.0 +1370: + mean: 1.0 + p99: 3.0 + std: 1.0 +1371: + mean: 1.0 + p99: 3.0 + std: 1.0 +1372: + mean: 1.0 + p99: 3.0 + std: 1.0 +1373: + mean: 1.0 + p99: 3.0 + std: 1.0 +1374: + mean: 1.0 + p99: 3.0 + std: 1.0 +1375: + mean: 1.0 + p99: 3.0 + std: 1.0 +1376: + mean: 1.0 + p99: 3.0 + std: 1.0 +1377: + mean: 1.0 + p99: 3.0 + std: 1.0 +1378: + mean: 1.0 + p99: 3.0 + std: 1.0 +1380: + mean: 1.0 + p99: 3.0 + std: 1.0 +1381: + mean: 1.0 + p99: 3.0 + std: 1.0 +1382: + mean: 1.0 + p99: 3.0 + std: 1.0 +1383: + mean: 1.0 + p99: 3.0 + std: 1.0 +1384: + mean: 1.0 + p99: 3.0 + std: 1.0 +1385: + mean: 1.0 + p99: 3.0 + std: 1.0 +1386: + mean: 1.0 + p99: 3.0 + std: 1.0 +1387: + mean: 1.0 + p99: 3.0 + std: 1.0 +1388: + mean: 1.0 + p99: 3.0 + std: 1.0 +1389: + mean: 1.0 + p99: 3.0 + std: 1.0 +1390: + mean: 1.0 + p99: 3.0 + std: 1.0 +1391: + mean: 1.0 + p99: 3.0 + std: 1.0 +1393: + mean: 1.0 + p99: 3.0 + std: 1.0 +1394: + mean: 1.0 + p99: 3.0 + std: 1.0 +1395: + mean: 1.0 + p99: 3.0 + std: 1.0 +1396: + mean: 1.0 + p99: 3.0 + std: 1.0 +1397: + mean: 1.0 + p99: 3.0 + std: 1.0 +1398: + mean: 1.0 + p99: 3.0 + std: 1.0 +1399: + mean: 1.0 + p99: 3.0 + std: 1.0 +1400: + mean: 1.0 + p99: 3.0 + std: 1.0 +1402: + mean: 1.0 + p99: 3.0 + std: 1.0 +1403: + mean: 1.0 + p99: 3.0 + std: 1.0 +1404: + mean: 1.0 + p99: 3.0 + std: 1.0 +1406: + mean: 1.0 + p99: 3.0 + std: 1.0 +1407: + mean: 1.0 + p99: 3.0 + std: 1.0 +1408: + mean: 1.0 + p99: 3.0 + std: 1.0 +1409: + mean: 1.0 + p99: 3.0 + std: 1.0 +1410: + mean: 1.0 + p99: 3.0 + std: 1.0 +1411: + mean: 1.0 + p99: 3.0 + std: 1.0 +1412: + mean: 1.0 + p99: 3.0 + std: 1.0 +1413: + mean: 1.0 + p99: 3.0 + std: 1.0 +1414: + mean: 1.0 + p99: 3.0 + std: 1.0 +1415: + mean: 1.0 + p99: 3.0 + std: 1.0 +1417: + mean: 1.0 + p99: 3.0 + std: 1.0 +1420: + mean: 1.0 + p99: 3.0 + std: 1.0 +1421: + mean: 1.0 + p99: 3.0 + std: 1.0 +1422: + mean: 1.0 + p99: 3.0 + std: 1.0 +1423: + mean: 1.0 + p99: 3.0 + std: 1.0 +1424: + mean: 1.0 + p99: 3.0 + std: 1.0 +1426: + mean: 1.0 + p99: 3.0 + std: 1.0 +1427: + mean: 1.0 + p99: 3.0 + std: 1.0 +1428: + mean: 1.0 + p99: 3.0 + std: 1.0 +1429: + mean: 1.0 + p99: 3.0 + std: 1.0 +1430: + mean: 1.0 + p99: 3.0 + std: 1.0 +1432: + mean: 1.0 + p99: 3.0 + std: 1.0 +1433: + mean: 1.0 + p99: 3.0 + std: 1.0 +1434: + mean: 1.0 + p99: 3.0 + std: 1.0 +1435: + mean: 1.0 + p99: 3.0 + std: 1.0 +1436: + mean: 1.0 + p99: 3.0 + std: 1.0 +1438: + mean: 1.0 + p99: 3.0 + std: 1.0 +1442: + mean: 1.0 + p99: 3.0 + std: 1.0 +1443: + mean: 1.0 + p99: 3.0 + std: 1.0 +1444: + mean: 1.0 + p99: 3.0 + std: 1.0 +1445: + mean: 1.0 + p99: 3.0 + std: 1.0 +1450: + mean: 1.0 + p99: 3.0 + std: 1.0 +1452: + mean: 1.0 + p99: 3.0 + std: 1.0 +1453: + mean: 1.0 + p99: 3.0 + std: 1.0 +1454: + mean: 1.0 + p99: 3.0 + std: 1.0 +1455: + mean: 1.0 + p99: 3.0 + std: 1.0 +1456: + mean: 1.0 + p99: 3.0 + std: 1.0 +1458: + mean: 1.0 + p99: 3.0 + std: 1.0 +1459: + mean: 1.0 + p99: 3.0 + std: 1.0 +1462: + mean: 1.0 + p99: 3.0 + std: 1.0 +1463: + mean: 1.0 + p99: 3.0 + std: 1.0 +1464: + mean: 1.0 + p99: 3.0 + std: 1.0 +1466: + mean: 1.0 + p99: 3.0 + std: 1.0 +1469: + mean: 1.0 + p99: 3.0 + std: 1.0 +1471: + mean: 1.0 + p99: 3.0 + std: 1.0 +1473: + mean: 1.0 + p99: 3.0 + std: 1.0 +1476: + mean: 1.0 + p99: 3.0 + std: 1.0 +1479: + mean: 1.0 + p99: 3.0 + std: 1.0 +1481: + mean: 1.0 + p99: 3.0 + std: 1.0 +1484: + mean: 1.0 + p99: 3.0 + std: 1.0 +1487: + mean: 1.0 + p99: 3.0 + std: 1.0 diff --git a/sample_dataset/id_manager/operation_id.yml b/sample_dataset/id_manager/operation_id.yml new file mode 100644 index 0000000..eed078d --- /dev/null +++ b/sample_dataset/id_manager/operation_id.yml @@ -0,0 +1,1489 @@ +? '' +: 0 +1/1: 1 +10/50: 50 +10/51: 51 +10/52: 52 +10/53: 53 +100/444: 444 +100/445: 445 +100/446: 446 +100/447: 447 +100/448: 448 +101/449: 449 +101/450: 450 +101/451: 451 +101/452: 452 +101/453: 453 +102/454: 454 +102/455: 455 +102/456: 456 +102/457: 457 +102/458: 458 +102/459: 459 +103/460: 460 +103/461: 461 +103/462: 462 +103/463: 463 +103/464: 464 +103/465: 465 +103/466: 466 +103/467: 467 +103/468: 468 +103/469: 469 +104/470: 470 +105/471: 471 +106/472: 472 +107/473: 473 +108/474: 474 +109/475: 475 +109/476: 476 +109/477: 477 +11/54: 54 +11/55: 55 +11/56: 56 +11/57: 57 +11/58: 58 +11/59: 59 +11/60: 60 +11/61: 61 +110/478: 478 +111/479: 479 +112/480: 480 +112/481: 481 +112/482: 482 +113/483: 483 +113/484: 484 +113/485: 485 +113/486: 486 +113/487: 487 +114/488: 488 +114/489: 489 +114/490: 490 +114/491: 491 +115/492: 492 +115/493: 493 +115/494: 494 +115/495: 495 +115/496: 496 +115/497: 497 +115/498: 498 +115/499: 499 +115/500: 500 +115/501: 501 +115/502: 502 +115/503: 503 +116/504: 504 +116/505: 505 +116/506: 506 +116/507: 507 +116/508: 508 +116/509: 509 +116/510: 510 +117/511: 511 +117/512: 512 +118/513: 513 +119/514: 514 +12/62: 62 +12/63: 63 +120/515: 515 +121/516: 516 +121/517: 517 +121/518: 518 +122/519: 519 +123/520: 520 +124/521: 521 +124/522: 522 +125/523: 523 +126/524: 524 +127/525: 525 +127/526: 526 +127/527: 527 +128/528: 528 +128/529: 529 +128/530: 530 +128/531: 531 +128/532: 532 +128/533: 533 +128/534: 534 +128/535: 535 +128/536: 536 +128/537: 537 +128/538: 538 +128/539: 539 +128/540: 540 +128/541: 541 +128/542: 542 +129/543: 543 +129/544: 544 +129/545: 545 +129/546: 546 +129/547: 547 +129/548: 548 +129/549: 549 +129/550: 550 +129/551: 551 +13/64: 64 +13/65: 65 +13/66: 66 +13/67: 67 +13/68: 68 +13/69: 69 +13/70: 70 +13/71: 71 +13/72: 72 +13/73: 73 +130/552: 552 +131/553: 553 +132/554: 554 +132/555: 555 +133/556: 556 +134/557: 557 +135/558: 558 +135/559: 559 +135/560: 560 +135/561: 561 +135/562: 562 +136/563: 563 +136/564: 564 +136/565: 565 +137/566: 566 +138/567: 567 +139/568: 568 +14/74: 74 +14/75: 75 +14/76: 76 +140/569: 569 +141/570: 570 +142/571: 571 +142/572: 572 +142/573: 573 +142/574: 574 +143/575: 575 +144/576: 576 +144/577: 577 +144/578: 578 +145/579: 579 +146/580: 580 +146/581: 581 +146/582: 582 +146/583: 583 +146/584: 584 +146/585: 585 +146/586: 586 +146/587: 587 +146/588: 588 +146/589: 589 +146/590: 590 +146/591: 591 +146/592: 592 +146/593: 593 +146/594: 594 +146/595: 595 +147/596: 596 +148/597: 597 +148/598: 598 +148/599: 599 +148/600: 600 +148/601: 601 +148/602: 602 +149/603: 603 +149/604: 604 +149/605: 605 +149/606: 606 +149/607: 607 +149/608: 608 +149/609: 609 +149/610: 610 +149/611: 611 +149/612: 612 +149/613: 613 +149/614: 614 +149/615: 615 +149/616: 616 +149/617: 617 +149/618: 618 +15/77: 77 +15/78: 78 +15/79: 79 +150/619: 619 +150/620: 620 +150/621: 621 +150/622: 622 +150/623: 623 +150/624: 624 +150/625: 625 +150/626: 626 +150/627: 627 +151/628: 628 +151/629: 629 +151/630: 630 +152/631: 631 +152/632: 632 +152/633: 633 +152/634: 634 +152/635: 635 +152/636: 636 +152/637: 637 +152/638: 638 +152/639: 639 +152/640: 640 +152/641: 641 +152/642: 642 +153/643: 643 +153/644: 644 +153/645: 645 +153/646: 646 +153/647: 647 +153/648: 648 +153/649: 649 +154/650: 650 +155/651: 651 +155/652: 652 +155/653: 653 +155/654: 654 +156/655: 655 +156/656: 656 +156/657: 657 +156/658: 658 +156/659: 659 +156/660: 660 +156/661: 661 +156/662: 662 +156/663: 663 +156/664: 664 +156/665: 665 +156/666: 666 +156/667: 667 +156/668: 668 +156/669: 669 +156/670: 670 +156/671: 671 +156/672: 672 +156/673: 673 +156/674: 674 +156/675: 675 +156/676: 676 +156/677: 677 +156/678: 678 +156/679: 679 +156/680: 680 +156/681: 681 +157/682: 682 +158/683: 683 +158/684: 684 +158/685: 685 +158/686: 686 +158/687: 687 +158/688: 688 +158/689: 689 +158/690: 690 +158/691: 691 +158/692: 692 +159/693: 693 +159/694: 694 +159/695: 695 +159/696: 696 +159/697: 697 +159/698: 698 +159/699: 699 +159/700: 700 +159/701: 701 +159/702: 702 +159/703: 703 +16/80: 80 +160/704: 704 +160/705: 705 +160/706: 706 +160/707: 707 +160/708: 708 +160/709: 709 +160/710: 710 +161/711: 711 +161/712: 712 +162/713: 713 +162/714: 714 +163/715: 715 +163/716: 716 +163/717: 717 +164/718: 718 +164/719: 719 +165/720: 720 +166/721: 721 +166/722: 722 +166/723: 723 +166/724: 724 +166/725: 725 +167/726: 726 +168/727: 727 +168/728: 728 +168/729: 729 +169/730: 730 +169/731: 731 +17/81: 81 +170/732: 732 +171/733: 733 +171/734: 734 +171/735: 735 +171/736: 736 +171/737: 737 +172/738: 738 +173/739: 739 +174/740: 740 +174/741: 741 +174/742: 742 +174/743: 743 +174/744: 744 +174/745: 745 +174/746: 746 +174/747: 747 +174/748: 748 +174/749: 749 +174/750: 750 +174/751: 751 +174/752: 752 +175/753: 753 +175/754: 754 +175/755: 755 +175/756: 756 +175/757: 757 +175/758: 758 +175/759: 759 +175/760: 760 +175/761: 761 +176/762: 762 +176/763: 763 +176/764: 764 +176/765: 765 +176/766: 766 +176/767: 767 +176/768: 768 +176/769: 769 +176/770: 770 +176/771: 771 +176/772: 772 +177/773: 773 +177/774: 774 +178/775: 775 +178/776: 776 +178/777: 777 +178/778: 778 +179/779: 779 +179/780: 780 +179/781: 781 +179/782: 782 +179/783: 783 +179/784: 784 +179/785: 785 +18/82: 82 +180/786: 786 +180/787: 787 +181/788: 788 +181/789: 789 +181/790: 790 +181/791: 791 +181/792: 792 +181/793: 793 +181/794: 794 +181/795: 795 +181/796: 796 +182/797: 797 +182/798: 798 +183/799: 799 +183/800: 800 +184/801: 801 +184/802: 802 +185/803: 803 +185/804: 804 +186/805: 805 +186/806: 806 +186/807: 807 +186/808: 808 +186/809: 809 +186/810: 810 +187/811: 811 +187/812: 812 +187/813: 813 +187/814: 814 +188/815: 815 +188/816: 816 +188/817: 817 +188/818: 818 +188/819: 819 +188/820: 820 +188/821: 821 +188/822: 822 +188/823: 823 +189/824: 824 +189/825: 825 +189/826: 826 +189/827: 827 +189/828: 828 +189/829: 829 +189/830: 830 +189/831: 831 +189/832: 832 +189/833: 833 +189/834: 834 +189/835: 835 +19/83: 83 +190/836: 836 +191/837: 837 +192/838: 838 +192/839: 839 +193/840: 840 +194/841: 841 +195/842: 842 +195/843: 843 +196/844: 844 +197/845: 845 +198/846: 846 +198/847: 847 +199/848: 848 +2/2: 2 +2/3: 3 +2/4: 4 +2/5: 5 +2/6: 6 +2/7: 7 +2/8: 8 +2/9: 9 +20/84: 84 +200/849: 849 +200/850: 850 +200/851: 851 +200/852: 852 +200/853: 853 +200/854: 854 +200/855: 855 +200/856: 856 +200/857: 857 +200/858: 858 +200/859: 859 +200/860: 860 +200/861: 861 +200/862: 862 +200/863: 863 +200/864: 864 +200/865: 865 +200/866: 866 +200/867: 867 +200/868: 868 +201/869: 869 +201/870: 870 +201/871: 871 +202/872: 872 +203/873: 873 +203/874: 874 +203/875: 875 +203/876: 876 +203/877: 877 +203/878: 878 +203/879: 879 +203/880: 880 +204/881: 881 +204/882: 882 +204/883: 883 +204/884: 884 +204/885: 885 +204/886: 886 +204/887: 887 +204/888: 888 +204/889: 889 +205/890: 890 +205/891: 891 +205/892: 892 +205/893: 893 +205/894: 894 +205/895: 895 +205/896: 896 +205/897: 897 +205/898: 898 +205/899: 899 +205/900: 900 +205/901: 901 +205/902: 902 +205/903: 903 +206/904: 904 +206/905: 905 +206/906: 906 +206/907: 907 +206/908: 908 +206/909: 909 +206/910: 910 +206/911: 911 +206/912: 912 +207/913: 913 +207/914: 914 +207/915: 915 +207/916: 916 +207/917: 917 +207/918: 918 +208/919: 919 +208/920: 920 +208/921: 921 +208/922: 922 +208/923: 923 +208/924: 924 +208/925: 925 +208/926: 926 +209/927: 927 +209/928: 928 +209/929: 929 +209/930: 930 +209/931: 931 +209/932: 932 +209/933: 933 +209/934: 934 +209/935: 935 +209/936: 936 +209/937: 937 +21/85: 85 +21/86: 86 +21/87: 87 +21/88: 88 +21/89: 89 +210/938: 938 +210/939: 939 +210/940: 940 +211/941: 941 +211/942: 942 +211/943: 943 +211/944: 944 +212/945: 945 +213/946: 946 +214/947: 947 +215/948: 948 +215/949: 949 +216/950: 950 +216/951: 951 +217/952: 952 +217/953: 953 +217/954: 954 +217/955: 955 +217/956: 956 +218/957: 957 +219/958: 958 +219/959: 959 +219/960: 960 +219/961: 961 +219/962: 962 +219/963: 963 +219/964: 964 +22/90: 90 +22/91: 91 +220/965: 965 +220/966: 966 +220/967: 967 +220/968: 968 +220/969: 969 +220/970: 970 +220/971: 971 +220/972: 972 +220/973: 973 +221/974: 974 +222/975: 975 +222/976: 976 +222/977: 977 +223/978: 978 +224/979: 979 +224/980: 980 +224/981: 981 +224/982: 982 +224/983: 983 +224/984: 984 +224/985: 985 +224/986: 986 +224/987: 987 +224/988: 988 +225/989: 989 +225/990: 990 +225/991: 991 +225/992: 992 +225/993: 993 +225/994: 994 +225/995: 995 +225/996: 996 +225/997: 997 +225/998: 998 +225/999: 999 +226/1000: 1000 +226/1001: 1001 +226/1002: 1002 +227/1003: 1003 +227/1004: 1004 +227/1005: 1005 +227/1006: 1006 +227/1007: 1007 +227/1008: 1008 +227/1009: 1009 +227/1010: 1010 +227/1011: 1011 +227/1012: 1012 +227/1013: 1013 +227/1014: 1014 +227/1015: 1015 +227/1016: 1016 +227/1017: 1017 +227/1018: 1018 +228/1019: 1019 +229/1020: 1020 +229/1021: 1021 +229/1022: 1022 +229/1023: 1023 +229/1024: 1024 +229/1025: 1025 +229/1026: 1026 +229/1027: 1027 +23/92: 92 +23/93: 93 +230/1028: 1028 +230/1029: 1029 +230/1030: 1030 +230/1031: 1031 +230/1032: 1032 +230/1033: 1033 +230/1034: 1034 +230/1035: 1035 +231/1036: 1036 +231/1037: 1037 +231/1038: 1038 +232/1039: 1039 +232/1040: 1040 +233/1041: 1041 +234/1042: 1042 +234/1043: 1043 +234/1044: 1044 +235/1045: 1045 +235/1046: 1046 +235/1047: 1047 +235/1048: 1048 +236/1049: 1049 +236/1050: 1050 +236/1051: 1051 +236/1052: 1052 +237/1053: 1053 +237/1054: 1054 +238/1055: 1055 +239/1056: 1056 +239/1057: 1057 +24/94: 94 +240/1058: 1058 +241/1059: 1059 +241/1060: 1060 +241/1061: 1061 +241/1062: 1062 +242/1063: 1063 +243/1064: 1064 +244/1065: 1065 +244/1066: 1066 +244/1067: 1067 +244/1068: 1068 +244/1069: 1069 +244/1070: 1070 +244/1071: 1071 +244/1072: 1072 +244/1073: 1073 +244/1074: 1074 +244/1075: 1075 +244/1076: 1076 +244/1077: 1077 +244/1078: 1078 +244/1079: 1079 +244/1080: 1080 +244/1081: 1081 +244/1082: 1082 +244/1083: 1083 +244/1084: 1084 +245/1085: 1085 +245/1086: 1086 +245/1087: 1087 +245/1088: 1088 +245/1089: 1089 +246/1090: 1090 +246/1091: 1091 +246/1092: 1092 +247/1093: 1093 +248/1094: 1094 +249/1095: 1095 +25/95: 95 +25/96: 96 +25/97: 97 +25/98: 98 +250/1096: 1096 +250/1097: 1097 +251/1098: 1098 +252/1099: 1099 +253/1100: 1100 +254/1101: 1101 +254/1102: 1102 +255/1103: 1103 +256/1104: 1104 +256/1105: 1105 +256/1106: 1106 +256/1107: 1107 +256/1108: 1108 +257/1109: 1109 +257/1110: 1110 +258/1111: 1111 +259/1112: 1112 +26/99: 99 +260/1113: 1113 +260/1114: 1114 +261/1115: 1115 +261/1116: 1116 +262/1117: 1117 +262/1118: 1118 +262/1119: 1119 +262/1120: 1120 +262/1121: 1121 +262/1122: 1122 +262/1123: 1123 +262/1124: 1124 +262/1125: 1125 +262/1126: 1126 +262/1127: 1127 +262/1128: 1128 +262/1129: 1129 +262/1130: 1130 +262/1131: 1131 +262/1132: 1132 +262/1133: 1133 +262/1134: 1134 +262/1135: 1135 +262/1136: 1136 +262/1137: 1137 +262/1138: 1138 +262/1139: 1139 +262/1140: 1140 +262/1141: 1141 +262/1142: 1142 +262/1143: 1143 +262/1144: 1144 +263/1145: 1145 +263/1146: 1146 +263/1147: 1147 +263/1148: 1148 +263/1149: 1149 +263/1150: 1150 +263/1151: 1151 +263/1152: 1152 +263/1153: 1153 +263/1154: 1154 +263/1155: 1155 +263/1156: 1156 +263/1157: 1157 +263/1158: 1158 +263/1159: 1159 +263/1160: 1160 +263/1161: 1161 +263/1162: 1162 +263/1163: 1163 +263/1164: 1164 +263/1165: 1165 +263/1166: 1166 +264/1167: 1167 +265/1168: 1168 +266/1169: 1169 +267/1170: 1170 +268/1171: 1171 +269/1172: 1172 +27/100: 100 +27/101: 101 +27/102: 102 +270/1173: 1173 +270/1174: 1174 +270/1175: 1175 +270/1176: 1176 +270/1177: 1177 +270/1178: 1178 +270/1179: 1179 +271/1180: 1180 +271/1181: 1181 +271/1182: 1182 +272/1183: 1183 +272/1184: 1184 +272/1185: 1185 +273/1186: 1186 +274/1187: 1187 +275/1188: 1188 +275/1189: 1189 +275/1190: 1190 +276/1191: 1191 +276/1192: 1192 +276/1193: 1193 +276/1194: 1194 +276/1195: 1195 +276/1196: 1196 +276/1197: 1197 +276/1198: 1198 +276/1199: 1199 +276/1200: 1200 +276/1201: 1201 +276/1202: 1202 +276/1203: 1203 +276/1204: 1204 +276/1205: 1205 +277/1206: 1206 +277/1207: 1207 +277/1208: 1208 +277/1209: 1209 +278/1210: 1210 +279/1211: 1211 +279/1212: 1212 +279/1213: 1213 +28/103: 103 +28/104: 104 +28/105: 105 +28/106: 106 +28/107: 107 +28/108: 108 +28/109: 109 +280/1214: 1214 +280/1215: 1215 +280/1216: 1216 +280/1217: 1217 +280/1218: 1218 +280/1219: 1219 +280/1220: 1220 +280/1221: 1221 +280/1222: 1222 +280/1223: 1223 +280/1224: 1224 +280/1225: 1225 +280/1226: 1226 +280/1227: 1227 +280/1228: 1228 +280/1229: 1229 +280/1230: 1230 +280/1231: 1231 +280/1232: 1232 +280/1233: 1233 +280/1234: 1234 +280/1235: 1235 +280/1236: 1236 +280/1237: 1237 +280/1238: 1238 +280/1239: 1239 +280/1240: 1240 +280/1241: 1241 +280/1242: 1242 +280/1243: 1243 +280/1244: 1244 +280/1245: 1245 +280/1246: 1246 +280/1247: 1247 +280/1248: 1248 +280/1249: 1249 +280/1250: 1250 +280/1251: 1251 +280/1252: 1252 +280/1253: 1253 +280/1254: 1254 +280/1255: 1255 +280/1256: 1256 +280/1257: 1257 +280/1258: 1258 +280/1259: 1259 +280/1260: 1260 +280/1261: 1261 +280/1262: 1262 +280/1263: 1263 +281/1264: 1264 +281/1265: 1265 +281/1266: 1266 +281/1267: 1267 +281/1268: 1268 +281/1269: 1269 +281/1270: 1270 +281/1271: 1271 +281/1272: 1272 +281/1273: 1273 +282/1274: 1274 +283/1275: 1275 +284/1276: 1276 +284/1277: 1277 +284/1278: 1278 +285/1279: 1279 +286/1280: 1280 +286/1281: 1281 +287/1282: 1282 +287/1283: 1283 +287/1284: 1284 +287/1285: 1285 +288/1286: 1286 +288/1287: 1287 +288/1288: 1288 +289/1289: 1289 +289/1290: 1290 +29/110: 110 +290/1291: 1291 +290/1292: 1292 +291/1293: 1293 +291/1294: 1294 +291/1295: 1295 +291/1296: 1296 +291/1297: 1297 +291/1298: 1298 +291/1299: 1299 +291/1300: 1300 +291/1301: 1301 +291/1302: 1302 +291/1303: 1303 +291/1304: 1304 +291/1305: 1305 +292/1306: 1306 +293/1307: 1307 +294/1308: 1308 +294/1309: 1309 +295/1310: 1310 +296/1311: 1311 +297/1312: 1312 +297/1313: 1313 +297/1314: 1314 +297/1315: 1315 +297/1316: 1316 +297/1317: 1317 +297/1318: 1318 +297/1319: 1319 +297/1320: 1320 +297/1321: 1321 +297/1322: 1322 +297/1323: 1323 +297/1324: 1324 +297/1325: 1325 +298/1326: 1326 +299/1327: 1327 +299/1328: 1328 +299/1329: 1329 +299/1330: 1330 +299/1331: 1331 +299/1332: 1332 +299/1333: 1333 +299/1334: 1334 +299/1335: 1335 +299/1336: 1336 +299/1337: 1337 +299/1338: 1338 +3/10: 10 +3/11: 11 +3/12: 12 +3/13: 13 +3/14: 14 +3/15: 15 +3/16: 16 +30/111: 111 +30/112: 112 +30/113: 113 +30/114: 114 +30/115: 115 +30/116: 116 +30/117: 117 +30/118: 118 +30/119: 119 +30/120: 120 +30/121: 121 +30/122: 122 +30/123: 123 +30/124: 124 +300/1339: 1339 +300/1340: 1340 +300/1341: 1341 +300/1342: 1342 +300/1343: 1343 +300/1344: 1344 +300/1345: 1345 +300/1346: 1346 +300/1347: 1347 +300/1348: 1348 +300/1349: 1349 +301/1350: 1350 +301/1351: 1351 +301/1352: 1352 +301/1353: 1353 +301/1354: 1354 +301/1355: 1355 +301/1356: 1356 +301/1357: 1357 +301/1358: 1358 +302/1359: 1359 +302/1360: 1360 +302/1361: 1361 +302/1362: 1362 +302/1363: 1363 +303/1364: 1364 +303/1365: 1365 +303/1366: 1366 +303/1367: 1367 +304/1368: 1368 +305/1369: 1369 +305/1370: 1370 +305/1371: 1371 +306/1372: 1372 +306/1373: 1373 +307/1374: 1374 +308/1375: 1375 +308/1376: 1376 +308/1377: 1377 +308/1378: 1378 +308/1379: 1379 +308/1380: 1380 +308/1381: 1381 +308/1382: 1382 +308/1383: 1383 +308/1384: 1384 +308/1385: 1385 +308/1386: 1386 +308/1387: 1387 +308/1388: 1388 +308/1389: 1389 +308/1390: 1390 +308/1391: 1391 +308/1392: 1392 +308/1393: 1393 +309/1394: 1394 +309/1395: 1395 +309/1396: 1396 +31/125: 125 +31/126: 126 +31/127: 127 +31/128: 128 +31/129: 129 +31/130: 130 +31/131: 131 +31/132: 132 +31/133: 133 +31/134: 134 +31/135: 135 +31/136: 136 +310/1397: 1397 +310/1398: 1398 +310/1399: 1399 +310/1400: 1400 +310/1401: 1401 +310/1402: 1402 +310/1403: 1403 +310/1404: 1404 +310/1405: 1405 +310/1406: 1406 +310/1407: 1407 +310/1408: 1408 +310/1409: 1409 +310/1410: 1410 +310/1411: 1411 +310/1412: 1412 +310/1413: 1413 +310/1414: 1414 +310/1415: 1415 +311/1416: 1416 +312/1417: 1417 +313/1418: 1418 +313/1419: 1419 +313/1420: 1420 +313/1421: 1421 +314/1422: 1422 +314/1423: 1423 +314/1424: 1424 +314/1425: 1425 +314/1426: 1426 +314/1427: 1427 +314/1428: 1428 +314/1429: 1429 +314/1430: 1430 +314/1431: 1431 +314/1432: 1432 +314/1433: 1433 +314/1434: 1434 +314/1435: 1435 +314/1436: 1436 +314/1437: 1437 +314/1438: 1438 +314/1439: 1439 +314/1440: 1440 +314/1441: 1441 +314/1442: 1442 +314/1443: 1443 +314/1444: 1444 +314/1445: 1445 +314/1446: 1446 +314/1447: 1447 +314/1448: 1448 +314/1449: 1449 +314/1450: 1450 +314/1451: 1451 +314/1452: 1452 +314/1453: 1453 +314/1454: 1454 +314/1455: 1455 +314/1456: 1456 +314/1457: 1457 +314/1458: 1458 +314/1459: 1459 +314/1460: 1460 +314/1461: 1461 +314/1462: 1462 +314/1463: 1463 +314/1464: 1464 +314/1465: 1465 +314/1466: 1466 +314/1467: 1467 +314/1468: 1468 +314/1469: 1469 +314/1470: 1470 +314/1471: 1471 +314/1472: 1472 +314/1473: 1473 +314/1474: 1474 +314/1475: 1475 +314/1476: 1476 +314/1477: 1477 +314/1478: 1478 +314/1479: 1479 +314/1480: 1480 +314/1481: 1481 +314/1482: 1482 +314/1483: 1483 +314/1484: 1484 +314/1485: 1485 +314/1486: 1486 +314/1487: 1487 +32/137: 137 +32/138: 138 +32/139: 139 +32/140: 140 +32/141: 141 +32/142: 142 +32/143: 143 +32/144: 144 +32/145: 145 +32/146: 146 +32/147: 147 +32/148: 148 +32/149: 149 +32/150: 150 +32/151: 151 +32/152: 152 +32/153: 153 +32/154: 154 +32/155: 155 +32/156: 156 +32/157: 157 +32/158: 158 +32/159: 159 +32/160: 160 +32/161: 161 +33/162: 162 +33/163: 163 +33/164: 164 +33/165: 165 +33/166: 166 +33/167: 167 +33/168: 168 +33/169: 169 +33/170: 170 +33/171: 171 +33/172: 172 +33/173: 173 +33/174: 174 +33/175: 175 +33/176: 176 +33/177: 177 +33/178: 178 +33/179: 179 +33/180: 180 +34/181: 181 +35/182: 182 +36/183: 183 +36/184: 184 +37/185: 185 +38/186: 186 +38/187: 187 +38/188: 188 +38/189: 189 +38/190: 190 +38/191: 191 +38/192: 192 +38/193: 193 +38/194: 194 +39/195: 195 +39/196: 196 +39/197: 197 +39/198: 198 +39/199: 199 +39/200: 200 +39/201: 201 +39/202: 202 +39/203: 203 +39/204: 204 +39/205: 205 +39/206: 206 +39/207: 207 +39/208: 208 +39/209: 209 +39/210: 210 +39/211: 211 +39/212: 212 +39/213: 213 +39/214: 214 +39/215: 215 +39/216: 216 +39/217: 217 +39/218: 218 +39/219: 219 +39/220: 220 +39/221: 221 +39/222: 222 +39/223: 223 +39/224: 224 +39/225: 225 +39/226: 226 +39/227: 227 +39/228: 228 +39/229: 229 +39/230: 230 +39/231: 231 +4/17: 17 +4/18: 18 +4/19: 19 +4/20: 20 +40/232: 232 +40/233: 233 +41/234: 234 +41/235: 235 +41/236: 236 +41/237: 237 +41/238: 238 +41/239: 239 +41/240: 240 +41/241: 241 +41/242: 242 +41/243: 243 +41/244: 244 +41/245: 245 +41/246: 246 +41/247: 247 +41/248: 248 +42/249: 249 +43/250: 250 +44/251: 251 +45/252: 252 +45/253: 253 +45/254: 254 +45/255: 255 +45/256: 256 +45/257: 257 +45/258: 258 +45/259: 259 +45/260: 260 +45/261: 261 +45/262: 262 +45/263: 263 +45/264: 264 +45/265: 265 +45/266: 266 +46/267: 267 +46/268: 268 +46/269: 269 +46/270: 270 +46/271: 271 +46/272: 272 +46/273: 273 +46/274: 274 +46/275: 275 +46/276: 276 +46/277: 277 +46/278: 278 +47/279: 279 +47/280: 280 +48/281: 281 +48/282: 282 +48/283: 283 +48/284: 284 +48/285: 285 +49/286: 286 +49/287: 287 +49/288: 288 +49/289: 289 +49/290: 290 +49/291: 291 +49/292: 292 +5/21: 21 +5/22: 22 +5/23: 23 +5/24: 24 +5/25: 25 +5/26: 26 +50/293: 293 +51/294: 294 +52/295: 295 +52/296: 296 +52/297: 297 +53/298: 298 +54/299: 299 +55/300: 300 +55/301: 301 +56/302: 302 +56/303: 303 +56/304: 304 +56/305: 305 +56/306: 306 +56/307: 307 +56/308: 308 +56/309: 309 +57/310: 310 +58/311: 311 +58/312: 312 +58/313: 313 +59/314: 314 +59/315: 315 +6/27: 27 +6/28: 28 +60/316: 316 +60/317: 317 +60/318: 318 +61/319: 319 +61/320: 320 +61/321: 321 +62/322: 322 +62/323: 323 +63/324: 324 +64/325: 325 +65/326: 326 +65/327: 327 +65/328: 328 +65/329: 329 +66/330: 330 +66/331: 331 +66/332: 332 +66/333: 333 +67/334: 334 +68/335: 335 +68/336: 336 +68/337: 337 +68/338: 338 +68/339: 339 +68/340: 340 +68/341: 341 +68/342: 342 +68/343: 343 +68/344: 344 +68/345: 345 +68/346: 346 +68/347: 347 +68/348: 348 +68/349: 349 +68/350: 350 +69/351: 351 +7/29: 29 +7/30: 30 +70/352: 352 +71/353: 353 +71/354: 354 +72/355: 355 +72/356: 356 +73/357: 357 +73/358: 358 +74/359: 359 +75/360: 360 +76/361: 361 +76/362: 362 +76/363: 363 +76/364: 364 +76/365: 365 +77/366: 366 +78/367: 367 +78/368: 368 +78/369: 369 +79/370: 370 +79/371: 371 +79/372: 372 +79/373: 373 +79/374: 374 +79/375: 375 +79/376: 376 +79/377: 377 +79/378: 378 +8/31: 31 +8/32: 32 +8/33: 33 +8/34: 34 +8/35: 35 +8/36: 36 +8/37: 37 +8/38: 38 +8/39: 39 +8/40: 40 +8/41: 41 +8/42: 42 +8/43: 43 +8/44: 44 +8/45: 45 +8/46: 46 +80/379: 379 +80/380: 380 +80/381: 381 +80/382: 382 +80/383: 383 +80/384: 384 +80/385: 385 +80/386: 386 +80/387: 387 +80/388: 388 +80/389: 389 +80/390: 390 +80/391: 391 +81/392: 392 +81/393: 393 +81/394: 394 +81/395: 395 +81/396: 396 +81/397: 397 +81/398: 398 +81/399: 399 +81/400: 400 +81/401: 401 +82/402: 402 +83/403: 403 +84/404: 404 +85/405: 405 +86/406: 406 +86/407: 407 +87/408: 408 +88/409: 409 +88/410: 410 +89/411: 411 +89/412: 412 +9/47: 47 +9/48: 48 +9/49: 49 +90/413: 413 +90/414: 414 +91/415: 415 +91/416: 416 +92/417: 417 +93/418: 418 +93/419: 419 +93/420: 420 +93/421: 421 +93/422: 422 +93/423: 423 +93/424: 424 +93/425: 425 +93/426: 426 +94/427: 427 +94/428: 428 +94/429: 429 +94/430: 430 +94/431: 431 +94/432: 432 +94/433: 433 +95/434: 434 +95/435: 435 +95/436: 436 +95/437: 437 +95/438: 438 +96/439: 439 +97/440: 440 +97/441: 441 +98/442: 442 +99/443: 443 diff --git a/sample_dataset/id_manager/service_id.yml b/sample_dataset/id_manager/service_id.yml new file mode 100644 index 0000000..dfc83f7 --- /dev/null +++ b/sample_dataset/id_manager/service_id.yml @@ -0,0 +1,316 @@ +? '' +: 0 +'1': 1 +'10': 10 +'100': 100 +'101': 101 +'102': 102 +'103': 103 +'104': 104 +'105': 105 +'106': 106 +'107': 107 +'108': 108 +'109': 109 +'11': 11 +'110': 110 +'111': 111 +'112': 112 +'113': 113 +'114': 114 +'115': 115 +'116': 116 +'117': 117 +'118': 118 +'119': 119 +'12': 12 +'120': 120 +'121': 121 +'122': 122 +'123': 123 +'124': 124 +'125': 125 +'126': 126 +'127': 127 +'128': 128 +'129': 129 +'13': 13 +'130': 130 +'131': 131 +'132': 132 +'133': 133 +'134': 134 +'135': 135 +'136': 136 +'137': 137 +'138': 138 +'139': 139 +'14': 14 +'140': 140 +'141': 141 +'142': 142 +'143': 143 +'144': 144 +'145': 145 +'146': 146 +'147': 147 +'148': 148 +'149': 149 +'15': 15 +'150': 150 +'151': 151 +'152': 152 +'153': 153 +'154': 154 +'155': 155 +'156': 156 +'157': 157 +'158': 158 +'159': 159 +'16': 16 +'160': 160 +'161': 161 +'162': 162 +'163': 163 +'164': 164 +'165': 165 +'166': 166 +'167': 167 +'168': 168 +'169': 169 +'17': 17 +'170': 170 +'171': 171 +'172': 172 +'173': 173 +'174': 174 +'175': 175 +'176': 176 +'177': 177 +'178': 178 +'179': 179 +'18': 18 +'180': 180 +'181': 181 +'182': 182 +'183': 183 +'184': 184 +'185': 185 +'186': 186 +'187': 187 +'188': 188 +'189': 189 +'19': 19 +'190': 190 +'191': 191 +'192': 192 +'193': 193 +'194': 194 +'195': 195 +'196': 196 +'197': 197 +'198': 198 +'199': 199 +'2': 2 +'20': 20 +'200': 200 +'201': 201 +'202': 202 +'203': 203 +'204': 204 +'205': 205 +'206': 206 +'207': 207 +'208': 208 +'209': 209 +'21': 21 +'210': 210 +'211': 211 +'212': 212 +'213': 213 +'214': 214 +'215': 215 +'216': 216 +'217': 217 +'218': 218 +'219': 219 +'22': 22 +'220': 220 +'221': 221 +'222': 222 +'223': 223 +'224': 224 +'225': 225 +'226': 226 +'227': 227 +'228': 228 +'229': 229 +'23': 23 +'230': 230 +'231': 231 +'232': 232 +'233': 233 +'234': 234 +'235': 235 +'236': 236 +'237': 237 +'238': 238 +'239': 239 +'24': 24 +'240': 240 +'241': 241 +'242': 242 +'243': 243 +'244': 244 +'245': 245 +'246': 246 +'247': 247 +'248': 248 +'249': 249 +'25': 25 +'250': 250 +'251': 251 +'252': 252 +'253': 253 +'254': 254 +'255': 255 +'256': 256 +'257': 257 +'258': 258 +'259': 259 +'26': 26 +'260': 260 +'261': 261 +'262': 262 +'263': 263 +'264': 264 +'265': 265 +'266': 266 +'267': 267 +'268': 268 +'269': 269 +'27': 27 +'270': 270 +'271': 271 +'272': 272 +'273': 273 +'274': 274 +'275': 275 +'276': 276 +'277': 277 +'278': 278 +'279': 279 +'28': 28 +'280': 280 +'281': 281 +'282': 282 +'283': 283 +'284': 284 +'285': 285 +'286': 286 +'287': 287 +'288': 288 +'289': 289 +'29': 29 +'290': 290 +'291': 291 +'292': 292 +'293': 293 +'294': 294 +'295': 295 +'296': 296 +'297': 297 +'298': 298 +'299': 299 +'3': 3 +'30': 30 +'300': 300 +'301': 301 +'302': 302 +'303': 303 +'304': 304 +'305': 305 +'306': 306 +'307': 307 +'308': 308 +'309': 309 +'31': 31 +'310': 310 +'311': 311 +'312': 312 +'313': 313 +'314': 314 +'32': 32 +'33': 33 +'34': 34 +'35': 35 +'36': 36 +'37': 37 +'38': 38 +'39': 39 +'4': 4 +'40': 40 +'41': 41 +'42': 42 +'43': 43 +'44': 44 +'45': 45 +'46': 46 +'47': 47 +'48': 48 +'49': 49 +'5': 5 +'50': 50 +'51': 51 +'52': 52 +'53': 53 +'54': 54 +'55': 55 +'56': 56 +'57': 57 +'58': 58 +'59': 59 +'6': 6 +'60': 60 +'61': 61 +'62': 62 +'63': 63 +'64': 64 +'65': 65 +'66': 66 +'67': 67 +'68': 68 +'69': 69 +'7': 7 +'70': 70 +'71': 71 +'72': 72 +'73': 73 +'74': 74 +'75': 75 +'76': 76 +'77': 77 +'78': 78 +'79': 79 +'8': 8 +'80': 80 +'81': 81 +'82': 82 +'83': 83 +'84': 84 +'85': 85 +'86': 86 +'87': 87 +'88': 88 +'89': 89 +'9': 9 +'90': 90 +'91': 91 +'92': 92 +'93': 93 +'94': 94 +'95': 95 +'96': 96 +'97': 97 +'98': 98 +'99': 99 diff --git a/sample_dataset/id_manager/status_id.yml b/sample_dataset/id_manager/status_id.yml new file mode 100644 index 0000000..343578e --- /dev/null +++ b/sample_dataset/id_manager/status_id.yml @@ -0,0 +1,21 @@ +? '' +: 0 +'0': 1 +'1': 2 +'400': 3 +'401': 4 +'403': 5 +'404': 6 +'423': 7 +'429': 8 +'500': 9 +'503': 10 +'504': 11 +CompletionException: 12 +HystrixRuntimeException: 13 +ProcessingException: 14 +SOAERROR_1006: 15 +SOAERROR_2017: 16 +SOAERROR_2026: 17 +SOAERROR_2028: 18 +UNFINISHED: 19 diff --git a/sample_dataset/test.csv b/sample_dataset/test.csv new file mode 100644 index 0000000..ff69df4 --- /dev/null +++ b/sample_dataset/test.csv @@ -0,0 +1,51 @@ +traceIdHigh,traceIdLow,parentSpanId,spanId,startTime,duration,nanosecond,DBhash,status,operationName,serviceName,nodeLatencyLabel,graphLatencyLabel,graphStructureLabel +1596357529536981502,4448694259929862755,0,-1990250972,2022-05-02 18:10:09,4188,102000000,0,0,1413,310,0,0,1 +1596357529536981502,4448694259929862755,-1990250972,-281558527,2022-05-02 18:10:09,3913,347000000,0,0,1399,310,0,0,1 +1596357529536981502,4448694259929862755,-1990250972,-335450563,2022-05-02 18:10:09,74,256000000,0,0,1400,310,0,0,1 +1596357529536981502,4448694259929862755,-1990250972,159928739,2022-05-02 18:10:09,14,166000000,0,0,1402,310,0,0,1 +1596357529536981502,4448694259929862755,-1990250972,1875814534,2022-05-02 18:10:09,20,180000000,0,0,1402,310,0,0,1 +1596357529536981502,4448694259929862755,-1990250972,-1741606249,2022-05-02 18:10:09,20,250000000,0,0,1402,310,0,0,1 +1596357529536981502,4448694259929862755,-1990250972,-1617443693,2022-05-02 18:10:09,12,268000000,0,0,1402,310,0,0,1 +1596357529536981502,4448694259929862755,-1990250972,-278055518,2022-05-02 18:10:09,12,328000000,0,0,1402,310,0,0,1 +1596357529536981502,4448694259929862755,-1990250972,-370879640,2022-05-02 18:10:09,11,349000000,0,0,1402,310,0,0,1 +1596357529536981502,4448694259929862755,-1990250972,2071447605,2022-05-02 18:10:13,15,265000000,0,0,1402,310,0,0,1 +1596357529536981502,4448694259929862755,-1990250972,-1819031907,2022-05-02 18:10:13,12,278000000,0,0,1402,310,0,0,1 +1596357529536981502,4448694259929862755,-1990250972,1842451354,2022-05-02 18:10:13,13,287000000,0,0,1402,310,0,0,1 +1596357529536981502,4448694259929862755,-1990250972,1935560096,2022-05-02 18:10:13,15,295000000,0,0,1402,310,0,0,1 +1596357529536981502,4448694259929862755,-1990250972,-2003257890,2022-05-02 18:10:14,14,306000000,0,500,1412,310,0,0,1 +1596357529536981502,4448694259929862755,-281558527,-700384075,2022-05-02 18:10:09,3907,353000000,0,0,124,30,0,0,1 +1596357529536981502,4448694259929862755,-335450563,-21249935,2022-05-02 18:10:09,63,267000000,0,0,249,42,0,0,1 +1596357529536981502,4448694259929862755,-2003257890,1900376561,2022-05-02 18:10:14,2,318000000,0,500,1370,305,0,0,1 +1596357529536981502,4448694259929862755,-700384075,-803543939,2022-05-02 18:10:09,23,367000000,0,0,113,30,0,0,1 +1596357529536981502,4448694259929862755,-700384075,-736459037,2022-05-02 18:10:09,19,371000000,0,0,113,30,0,0,1 +1596357529536981502,4448694259929862755,-700384075,-1183351125,2022-05-02 18:10:11,16,34000000,0,0,113,30,0,0,1 +1596357529536981502,4448694259929862755,-700384075,350862234,2022-05-02 18:10:12,10,580000000,0,0,113,30,0,0,1 +1596357529536981502,4448694259929862755,-700384075,1682015471,2022-05-02 18:10:13,11,169000000,0,0,113,30,0,0,1 +1596357529536981502,4448694259929862755,-700384075,2040964166,2022-05-02 18:10:13,11,259000000,0,0,113,30,0,0,1 +1596357529536981502,4448694259929862755,-700384075,1762151399,2022-05-02 18:10:10,955,85000000,0,0,117,30,0,0,1 +1596357529536981502,4448694259929862755,-700384075,-334971362,2022-05-02 18:10:12,583,577000000,0,0,117,30,0,0,1 +1596357529536981502,4448694259929862755,-700384075,-15543737,2022-05-02 18:10:09,51,379000000,0,0,119,30,0,0,1 +1596357529536981502,4448694259929862755,-700384075,583885210,2022-05-02 18:10:14,11,269000000,0,0,121,30,0,0,1 +1596357529536981502,4448694259929862755,1762151399,681721006,2022-05-02 18:10:10,840,160000000,0,0,1270,281,0,0,1 +1596357529536981502,4448694259929862755,-334971362,1067395986,2022-05-02 18:10:12,479,611000000,0,0,1270,281,0,0,1 +1596357529536981502,4448694259929862755,-15543737,408810147,2022-05-02 18:10:09,33,397000000,0,0,1277,284,0,0,1 +1596357529536981502,4448694259929862755,583885210,-304841780,2022-05-02 18:10:14,3,277000000,0,0,1369,305,0,0,1 +5662566985137355690,7477298637177050925,0,-491131385,2022-05-02 17:47:41,5176,914000000,0,0,1393,308,0,1,0 +5662566985137355690,7477298637177050925,-491131385,-652820602,2022-05-02 17:47:41,14,976000000,0,0,1378,308,0,1,0 +5662566985137355690,7477298637177050925,-491131385,-1333335415,2022-05-02 17:47:47,11,69000000,0,0,1380,308,0,1,0 +5662566985137355690,7477298637177050925,-491131385,703056482,2022-05-02 17:47:41,10,950000000,0,0,1381,308,0,1,0 +5662566985137355690,7477298637177050925,-491131385,687971319,2022-05-02 17:47:47,7,83000000,0,0,1386,308,1,1,0 +5662566985137355690,7477298637177050925,-491131385,1792075409,2022-05-02 17:47:41,7,963000000,0,0,1389,308,0,1,0 +5662566985137355690,7477298637177050925,-491131385,48140695,2022-05-02 17:47:47,26,14000000,0,0,1391,308,0,1,0 +5662566985137355690,7477298637177050925,-652820602,1379876670,2022-05-02 17:47:41,11,979000000,0,0,310,57,0,1,0 +5662566985137355690,7477298637177050925,703056482,-699583531,2022-05-02 17:47:41,5,945000000,0,0,356,72,0,1,0 +5662566985137355690,7477298637177050925,687971319,-1212461088,2022-05-02 17:47:47,4,86000000,0,0,1212,279,0,1,0 +5662566985137355690,7477298637177050925,48140695,-1178781667,2022-05-02 17:47:47,11,19000000,0,0,1301,291,0,1,0 +3646047056327432861,6658453249126145996,0,1931323897,2022-05-02 15:59:59,60,450000000,0,0,403,83,0,0,0 +3646047056327432861,6658453249126145996,1931323897,711219543,2022-05-02 15:59:59,57,453000000,0,0,649,153,0,0,0 +3646047056327432861,6658453249126145996,711219543,-1561583093,2022-05-02 15:59:59,4,466000000,0,0,643,153,0,0,0 +3646047056327432861,6658453249126145996,711219543,81837891,2022-05-02 15:59:59,24,486000000,0,0,643,153,0,0,0 +3646047056327432861,6658453249126145996,711219543,1963630877,2022-05-02 15:59:59,5,505000000,0,0,643,153,0,0,0 +3646047056327432861,6658453249126145996,711219543,-1370474143,2022-05-02 15:59:59,5,505000000,0,0,643,153,0,0,0 +3646047056327432861,6658453249126145996,711219543,1992379152,2022-05-02 15:59:59,17,473000000,0,500,644,153,0,0,0 +3646047056327432861,6658453249126145996,1992379152,360300743,2022-05-02 15:59:59,13,467000000,0,500,673,156,0,0,0 diff --git a/sample_dataset/train.csv b/sample_dataset/train.csv new file mode 100644 index 0000000..213be91 --- /dev/null +++ b/sample_dataset/train.csv @@ -0,0 +1,22 @@ +traceIdHigh,traceIdLow,parentSpanId,spanId,startTime,duration,nanosecond,DBhash,status,operationName,serviceName +7565692060307280094,4149727419449628353,0,-1160783149,2022-05-02 18:32:16,8,522000000,0,0,3,2 +7565692060307280094,4149727419449628353,-1160783149,-663518905,2022-05-02 18:32:16,5,525000000,0,0,6,2 +7565692060307280094,4149727419449628353,-663518905,292134812,2022-05-02 18:32:16,2,528000000,0,0,17,4 +7469867095312772506,9054005929149387783,0,-116188428,2022-05-02 15:59:34,9,401000000,0,0,3,2 +7469867095312772506,9054005929149387783,-116188428,-936201595,2022-05-02 15:59:34,6,404000000,0,0,6,2 +7469867095312772506,9054005929149387783,-936201595,203651095,2022-05-02 15:59:34,4,406000000,0,0,17,4 +1302365061014104190,4713847854262197922,0,147046579,2022-05-02 16:05:08,8,262000000,0,0,3,2 +1302365061014104190,4713847854262197922,147046579,-815705052,2022-05-02 16:05:08,5,265000000,0,0,6,2 +1302365061014104190,4713847854262197922,-815705052,-1767599182,2022-05-02 16:05:08,3,267000000,0,0,17,4 +4520607604792678538,90296779370241065,0,-1192584432,2022-05-02 15:03:15,10,800000000,0,0,3,2 +4520607604792678538,90296779370241065,-1192584432,2102356101,2022-05-02 15:03:15,7,803000000,0,0,6,2 +4520607604792678538,90296779370241065,2102356101,-1201056520,2022-05-02 15:03:15,5,805000000,0,0,17,4 +4301483373465897605,1921512395340371532,0,-116188428,2022-05-02 15:59:34,9,401000000,0,0,3,2 +4301483373465897605,1921512395340371532,-116188428,-936201595,2022-05-02 15:59:34,6,404000000,0,0,6,2 +4301483373465897605,1921512395340371532,-936201595,203651095,2022-05-02 15:59:34,4,406000000,0,0,17,4 +4516399473592649105,3122656494628515993,0,-940181392,2022-05-02 16:32:32,8,162000000,0,0,3,2 +4516399473592649105,3122656494628515993,-940181392,-1775474648,2022-05-02 16:32:32,6,164000000,0,0,6,2 +4516399473592649105,3122656494628515993,-1775474648,-614412147,2022-05-02 16:32:32,4,166000000,0,0,17,4 +2001615163061348505,1103669949841503114,0,-251538214,2022-05-02 17:19:09,7,523000000,0,0,3,2 +2001615163061348505,1103669949841503114,-251538214,-295534211,2022-05-02 17:19:09,5,525000000,0,0,6,2 +2001615163061348505,1103669949841503114,-295534211,-1868571256,2022-05-02 17:19:09,3,527000000,0,0,17,4 diff --git a/sample_dataset/val.csv b/sample_dataset/val.csv new file mode 100644 index 0000000..1363df4 --- /dev/null +++ b/sample_dataset/val.csv @@ -0,0 +1,22 @@ +traceIdHigh,traceIdLow,parentSpanId,spanId,startTime,duration,nanosecond,DBhash,status,operationName,serviceName +3718860522599049613,8245757295869652015,0,744605799,2022-05-02 15:34:49,10,630000000,0,0,3,2 +3718860522599049613,8245757295869652015,744605799,258669622,2022-05-02 15:34:49,6,634000000,0,0,6,2 +3718860522599049613,8245757295869652015,258669622,-2140429749,2022-05-02 15:34:49,4,626000000,0,0,17,4 +171786705856344778,8432392905725602200,0,834436815,2022-05-02 15:38:25,22,928000000,0,0,3,2 +171786705856344778,8432392905725602200,834436815,1941867149,2022-05-02 15:38:25,19,931000000,0,0,6,2 +171786705856344778,8432392905725602200,1941867149,-2120879518,2022-05-02 15:38:25,17,933000000,0,0,17,4 +1884068908740226663,198674487451014796,0,-1192584432,2022-05-02 15:03:15,10,800000000,0,0,3,2 +1884068908740226663,198674487451014796,-1192584432,2102356101,2022-05-02 15:03:15,7,803000000,0,0,6,2 +1884068908740226663,198674487451014796,2102356101,-1201056520,2022-05-02 15:03:15,5,805000000,0,0,17,4 +6433112438253994909,8162558342722269388,0,-940181392,2022-05-02 16:32:32,8,162000000,0,0,3,2 +6433112438253994909,8162558342722269388,-940181392,-1775474648,2022-05-02 16:32:32,6,164000000,0,0,6,2 +6433112438253994909,8162558342722269388,-1775474648,-614412147,2022-05-02 16:32:32,4,166000000,0,0,17,4 +6877930537907535995,8837251782173988176,0,-112915630,2022-05-02 16:57:11,7,243000000,0,0,3,2 +6877930537907535995,8837251782173988176,-112915630,-1653931621,2022-05-02 16:57:11,4,236000000,0,0,6,2 +6877930537907535995,8837251782173988176,-1653931621,-48204267,2022-05-02 16:57:11,2,238000000,0,0,17,4 +790603823422444737,8049481303438475592,0,-269253468,2022-05-02 15:00:08,14,176000000,0,0,3,2 +790603823422444737,8049481303438475592,-269253468,1292285861,2022-05-02 15:00:08,11,179000000,0,0,6,2 +790603823422444737,8049481303438475592,1292285861,-205310853,2022-05-02 15:00:08,4,186000000,0,0,17,4 +4016062494274678058,802407659628105677,0,-1067224499,2022-05-02 15:14:32,20,520000000,0,0,3,2 +4016062494274678058,802407659628105677,-1067224499,1026915991,2022-05-02 15:14:32,17,523000000,0,0,6,2 +4016062494274678058,802407659628105677,1026915991,-1777842468,2022-05-02 15:14:32,15,525000000,0,0,17,4 diff --git a/test.sh b/test.sh new file mode 100644 index 0000000..00b4fff --- /dev/null +++ b/test.sh @@ -0,0 +1,4 @@ +echo "Usage: bash test.sh [model_path] [dataset_path]" +echo "MODEL: $1" +echo "DATASET: $2" +python3 -m tracegnn.models.trace_vae.test evaluate-nll -M "$1" --use-train-val -D "$2" --device cpu --use-std-limit --std-limit-global diff --git a/tracegnn/__init__.py b/tracegnn/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tracegnn/cli/__init__.py b/tracegnn/cli/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tracegnn/cli/data_process.py b/tracegnn/cli/data_process.py new file mode 100644 index 0000000..03f6303 --- /dev/null +++ b/tracegnn/cli/data_process.py @@ -0,0 +1,108 @@ +import math +import pickle as pkl +import random +import shutil +import sys + +import click +import numpy as np +from tqdm import tqdm + +from tracegnn.constants import * +from tracegnn.data import * +from tracegnn.utils import * + + +def get_graph_key(g): + node_types = set() + stack = [g.root] + while stack: + nd = stack.pop() + node_types.add(nd.operation_id) + stack.extend(nd.children) + return g.root.operation_id, g.max_depth, tuple(sorted(node_types)) + + +@click.group() +def main(): + pass + + +@main.command() +@click.option('-i', '--input-dir') +@click.option('-o', '--output-dir') +@click.option('-n', '--name', type=str, required=True) +def csv_to_db(input_dir, output_dir, name): + # check the parameters + input_dir = os.path.abspath(input_dir) + output_dir = os.path.abspath(output_dir) + + input_path = os.path.join(input_dir, f"{name}.csv") + output_path = os.path.join(output_dir, "processed", name) + + # Load id_manager + id_manager = TraceGraphIDManager(os.path.join(input_dir, 'id_manager')) + + # process the traces + # load the graphs + if 'test' not in name: + df = load_trace_csv(input_path) + trace_graphs = df_to_trace_graphs( + df, + id_manager=id_manager, + merge_spans=True, + ) + + # write to db + if os.path.exists(output_path): + shutil.rmtree(output_path) + + db = BytesSqliteDB(output_path, write=True) + with db, db.write_batch(): + for g in tqdm(trace_graphs, desc='Save graphs'): + db.add(g.to_bytes()) + else: + # read test data + df = load_trace_csv(input_path, is_test=True) + + for i in range(3): + trace_graphs = df_to_trace_graphs( + df, + id_manager=id_manager, + merge_spans=True, + test_label=i + ) + + # write to db + if i == 0: + output_path = os.path.join(output_dir, 'processed', 'test') + elif i == 1: + output_path = os.path.join(output_dir, 'processed', 'test-drop') + else: + output_path = os.path.join(output_dir, 'processed', 'test-latency') + + if os.path.exists(output_path): + shutil.rmtree(output_path) + + db = BytesSqliteDB(output_path, write=True) + with db, db.write_batch(): + for g in tqdm(trace_graphs, desc='Save graphs'): + db.add(g.to_bytes()) + + +@main.command() +@click.option('-i', '--input-dir') +@click.option('-o', '--output_dir') +def preprocess(input_dir, output_dir): + print("Convert datasets...") + print("------------> Train") + os.system(f"python3 -m tracegnn.cli.data_process csv-to-db -i {input_dir} -o {output_dir} -n train") + print("------------> Val") + os.system(f"python3 -m tracegnn.cli.data_process csv-to-db -i {input_dir} -o {output_dir} -n val") + print("------------> Test") + os.system(f"python3 -m tracegnn.cli.data_process csv-to-db -i {input_dir} -o {output_dir} -n test") + + print("Finished!") + +if __name__ == '__main__': + main() diff --git a/tracegnn/constants.py b/tracegnn/constants.py new file mode 100644 index 0000000..fdc685d --- /dev/null +++ b/tracegnn/constants.py @@ -0,0 +1,13 @@ +import os + +# if MIN_NODE_COUNT <= 2 <= MAX_NODE_COUNT, then the graph will be chosen +MAX_NODE_COUNT = int(os.environ.get('MAX_NODE_COUNT', '32')) +MAX_SPAN_COUNT = int(os.environ.get('MAX_SPAN_COUNT', '32')) + +# whether or not to use multi-dimensional latency codec? +# If not set, will normalize the latency w.r.t. each operation. +USE_MULTI_DIM_LATENCY_CODEC = os.environ.get('USE_MULTI_DIM_LATENCY_CODEC', '0') == '1' + +# If USE_MULTI_DIM_LATENCY_CODEC, then encode the codec parameters. +MAX_LATENCY_DIM = int(os.environ.get('MAX_LATENCY_DIM', '5')) +MAX_DEPTH = int(os.environ.get('MAX_DEPTH', '4')) diff --git a/tracegnn/data/__init__.py b/tracegnn/data/__init__.py new file mode 100644 index 0000000..6359ac6 --- /dev/null +++ b/tracegnn/data/__init__.py @@ -0,0 +1,3 @@ +from .bytes_db import * +from .trace_graph import * +from .trace_graph_db import * diff --git a/tracegnn/data/bytes_db.py b/tracegnn/data/bytes_db.py new file mode 100644 index 0000000..5ee9a5d --- /dev/null +++ b/tracegnn/data/bytes_db.py @@ -0,0 +1,242 @@ +"""Databases for large-scale datasets.""" +import bisect +import os +import pickle +import sqlite3 +from contextlib import contextmanager +from typing import * + +import numpy as np +import snappy + +__all__ = [ + 'BytesDB', + 'BytesSqliteDB', + 'BytesMultiDB', +] + + +class BytesDB(object): + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + if not exc_type: + self.commit() + self.close() + + def __len__(self) -> int: + return self.data_count() + + def __getitem__(self, item: int): + return self.get(item) + + def __iter__(self): + for i in range(self.data_count()): + yield self.get(i) + + def __repr__(self): + desc = self.describe().strip() + if '\n' in desc: + desc = '\n'.join(f' {l}' for l in desc.split('\n')) + desc = f'\n{desc}\n' + return f'{self.__class__.__name__}({desc})' + + def describe(self) -> str: + raise NotImplementedError() + + def sample_n(self, n: int) -> List[bytes]: + ret = [] + indices = np.random.randint(self.data_count(), size=n) + for i in indices: + ret.append(self.get(i)) + return ret + + def data_count(self) -> int: + raise NotImplementedError() + + def get(self, item: int) -> bytes: + raise NotImplementedError() + + def add(self, val: bytes) -> int: + raise NotImplementedError() + + @contextmanager + def write_batch(self): + raise NotImplementedError() + + def commit(self): + raise NotImplementedError() + + def optimize(self): + raise NotImplementedError() + + def close(self): + raise NotImplementedError() + + +class BytesSqliteDB(BytesDB): + + class WB(object): + + def __init__(self, conn, cur, table_name, buf_size=8192): + self.conn = conn + self.cur = cur + self.table_name = table_name + self.buf = [] + self.buf_size = buf_size + + def add(self, id, value): + self.buf.append((id, snappy.compress(value))) + if len(self.buf) >= self.buf_size: + self.commit() + + def commit(self): + if self.buf: + self.cur.executemany( + f'INSERT INTO "{self.table_name}"("key", "value") VALUES (?, ?)', + self.buf + ) + self.conn.commit() + self.buf.clear() + + def rollback(self): + self.conn.rollback() + self.buf.clear() + + conn: sqlite3.Connection + path: str + file_name: str + _data_count: int + + def __init__(self, path: str, write: bool = False, table_name: str = 'data', + file_name: str = '_bytes.db'): + self.path = path + self.table_name = table_name + self.file_name = file_name + + if write and not os.path.isdir(path): + os.makedirs(path, exist_ok=True) + + self.conn = sqlite3.connect(os.path.join(self.path, file_name)) + self.conn.text_factory = bytes + with self._scoped_cursor() as cur: + cur.execute( + f'CREATE TABLE IF NOT EXISTS "{self.table_name}" (' + ' "key" INT PRIMARY KEY,' + ' "value" BLOB' + ');' + ) + self.conn.commit() + self._data_count = cur.execute(f'SELECT COUNT(*) FROM "{self.table_name}"').fetchone()[0] + self._wb = None + + @contextmanager + def _scoped_cursor(self): + cur = self.conn.cursor() + try: + yield cur + finally: + cur.close() + + def describe(self) -> str: + p = self.path + if self.file_name != '_bytes.db': + p = os.path.join(p, self.file_name) + if any(c in p for c in '(),'): + return repr(p) + return p + + def data_count(self) -> int: + return self._data_count + + def get(self, item: int) -> bytes: + with self._scoped_cursor() as cur: + cur.execute(f'SELECT "value" FROM "{self.table_name}" WHERE "key" = {item}') + row = cur.fetchone() + if row is not None: + return snappy.decompress(row[0]) + + def add(self, val: bytes) -> int: + if self._wb is None: + with self.write_batch(): + return self.add(val) + else: + key = self._data_count + self._wb.add(key, val) + self._data_count += 1 + return key + + @contextmanager + def write_batch(self): + if self._wb is not None: + raise RuntimeError(f'Another write_batch is already open!') + try: + self._wb = self.WB(self.conn, self.conn.cursor(), self.table_name) + yield self + self._wb.commit() + self._wb = None + except: + self._wb.rollback() + self._wb = None + raise + + def commit(self): + if self._wb is not None: + self._wb.commit() + + def optimize(self): + pass + + def close(self): + self.commit() + self._wb = None + self.conn.close() + + +class BytesMultiDB(BytesDB): + + db_list: List[BytesDB] + db_sizes: List[int] + _db_offset: List[int] + _data_count: int + + def __init__(self, *db_list): + self.db_list = list(db_list) + self.db_sizes = [db.data_count() for db in self.db_list] + self._db_offset = [] + i = 0 + for db in self.db_list: + self._db_offset.append(i) + i += db.data_count() + self._data_count = i + + def describe(self) -> str: + return '\n'.join(f'{db.describe()},' for db in self.db_list).rstrip(',') + + def data_count(self) -> int: + return self._data_count + + def get(self, item: int) -> bytes: + if item < 0 or item >= self._data_count: + raise IndexError(item) + i = bisect.bisect_left(self._db_offset, item + 1) - 1 + return self.db_list[i].get(item - self._db_offset[i]) + + def add(self, val: bytes) -> int: + raise RuntimeError(f'BytesMultiDB is not writeable.') + + @contextmanager + def write_batch(self): + raise RuntimeError(f'BytesMultiDB is not writeable.') + + def commit(self): + pass + + def optimize(self): + raise RuntimeError(f'BytesMultiDB is not writeable.') + + def close(self): + for db in self.db_list: + db.close() diff --git a/tracegnn/data/trace_graph.py b/tracegnn/data/trace_graph.py new file mode 100644 index 0000000..b443629 --- /dev/null +++ b/tracegnn/data/trace_graph.py @@ -0,0 +1,617 @@ +import os +import pickle as pkl +import sys +from dataclasses import dataclass +from datetime import datetime, timedelta +from typing import * + +import networkx as nx +import numpy as np +import pandas as pd +from tqdm import tqdm + +from ..utils import * + +__all__ = [ + 'TraceGraphNodeFeatures', + 'TraceGraphNodeReconsScores', + 'TraceGraphNode', + 'TraceGraphVectors', + 'TraceGraph', + 'TraceGraphIDManager', + 'load_trace_csv', + 'df_to_trace_graphs', +] + + +SERVICE_ID_YAML_FILE = 'service_id.yml' +OPERATION_ID_YAML_FILE = 'operation_id.yml' + + +@dataclass +class TraceGraphNodeFeatures(object): + __slots__ = ['span_count', 'max_latency', 'min_latency', 'avg_latency'] + + span_count: int # number of duplicates in the parent + avg_latency: float # for span_count == 1, avg == max == min + max_latency: float + min_latency: float + + +@dataclass +class TraceGraphNodeReconsScores(object): + # probability of the node + edge_logit: float + operation_logit: float + + # probability of the latency + avg_latency_nstd: float # (avg_latency - avg_latency_mean) / avg_latency_std + + +@dataclass +class TraceGraphSpan(object): + __slots__ = [ + 'span_id', 'start_time', 'latency', + ] + + span_id: Optional[int] + start_time: Optional[datetime] + latency: float + + +@dataclass +class TraceGraphNode(object): + __slots__ = [ + 'node_id', 'service_id', 'operation_id', + 'features', 'children', 'spans', 'scores', + 'anomaly', + ] + + node_id: Optional[int] # the node id of the graph + service_id: Optional[int] # the service id + operation_id: int # the operation id + features: TraceGraphNodeFeatures # the node features + children: List['TraceGraphNode'] # children nodes + spans: Optional[List[TraceGraphSpan]] # detailed spans information (from the original data) + scores: Optional[TraceGraphNodeReconsScores] + anomaly: Optional[int] # 1: drop anomaly; 2: latency anomaly; 3: service type anomaly + + def __eq__(self, other): + return other is self + + def __hash__(self): + return id(self) + + @staticmethod + def new_sampled(node_id: int, + operation_id: int, + features: TraceGraphNodeFeatures, + scores: Optional[TraceGraphNodeReconsScores] = None + ): + return TraceGraphNode( + node_id=node_id, + service_id=None, + operation_id=operation_id, + features=features, + children=[], + spans=None, + scores=scores, + anomaly=None, + ) + + def iter_bfs(self, + depth: int = 0, + with_parent: bool = False + ) -> Generator[ + Union[ + Tuple[int, 'TraceGraphNode'], + Tuple[int, 'TraceGraphNode', 'TraceGraphNode'] + ], + None, + None + ]: + """Iterate through the nodes in BFS order.""" + if with_parent: + depth = depth + level = [(self, None, 0)] + + while level: + next_level: List[Tuple[TraceGraphNode, TraceGraphNode, int]] = [] + for nd, parent, idx in level: + yield depth, idx, nd, parent + for c_idx, child in enumerate(nd.children): + next_level.append((child, nd, c_idx)) + depth += 1 + level = next_level + + else: + depth = depth + level = [self] + + while level: + next_level: List[TraceGraphNode] = [] + for nd in level: + yield depth, nd + next_level.extend(nd.children) + depth += 1 + level = next_level + + def count_nodes(self) -> int: + ret = 0 + for _ in self.iter_bfs(): + ret += 1 + return ret + + +@dataclass +class TraceGraphVectors(object): + """Cached result of `TraceGraph.graph_vectors()`.""" + __slots__ = [ + 'u', 'v', + 'node_type', + 'node_depth', 'node_idx', + 'span_count', 'avg_latency', 'max_latency', 'min_latency', + 'node_features', + ] + + # note that it is guaranteed that u[i] < v[i], i.e., upper triangle matrix + u: np.ndarray + v: np.ndarray + + # node type + node_type: np.ndarray + + # node depth + node_depth: np.ndarray + + # node idx + node_idx: np.ndarray + + # node feature + span_count: np.ndarray + avg_latency: np.ndarray + max_latency: np.ndarray + min_latency: np.ndarray + + +@dataclass +class TraceGraph(object): + __slots__ = [ + 'version', + 'trace_id', 'parent_id', 'root', 'node_count', 'max_depth', 'data', + ] + + version: int # version control + trace_id: Optional[Tuple[int, int]] + parent_id: Optional[int] + root: TraceGraphNode + node_count: Optional[int] + max_depth: Optional[int] + data: Dict[str, Any] # any data about the graph + + @staticmethod + def default_version() -> int: + return 0x2 + + @staticmethod + def new_sampled(root: TraceGraphNode, node_count: int, max_depth: int): + return TraceGraph( + version=TraceGraph.default_version(), + trace_id=None, + parent_id=None, + root=root, + node_count=node_count, + max_depth=max_depth, + data={}, + ) + + @property + def edge_count(self) -> Optional[int]: + if self.node_count is not None: + return self.node_count - 1 + + def iter_bfs(self, + with_parent: bool = False + ): + """Iterate through the nodes in BFS order.""" + yield from self.root.iter_bfs(with_parent=with_parent) + + def merge_spans_and_assign_id(self): + """ + Merge spans with the same (service, operation) under the same parent, + and re-assign node IDs. + """ + node_count = 0 + max_depth = 0 + + for depth, parent in self.iter_bfs(): + max_depth = max(max_depth, depth) + + # assign ID to this node + parent.node_id = node_count + node_count += 1 + + # merge the children of this node + children = [] + for child in sorted(parent.children, key=lambda o: o.operation_id): + if children and children[-1].operation_id == child.operation_id: + prev_child = children[-1] + + # merge the features + f1, f2 = prev_child.features, child.features + f1.span_count += f2.span_count + f1.avg_latency += (f2.avg_latency - f1.avg_latency) * (f2.span_count / f1.span_count) + f1.max_latency = max(f1.max_latency, f2.max_latency) + f1.min_latency = min(f1.min_latency, f2.min_latency) + + # merge the children + if child.children: + if prev_child.children: + prev_child.children.extend(child.children) + else: + prev_child.children = child.children + + # merge the spans + if child.spans: + if prev_child.spans: + prev_child.spans.extend(child.spans) + else: + prev_child.spans = child.spans + else: + children.append(child) + + # re-assign the merged children + parent.children = children + + # record node count and depth + self.node_count = node_count + self.max_depth = max_depth + + def assign_node_id(self): + """Assign node IDs to the graph nodes by pre-root order.""" + node_count = 0 + max_depth = 0 + + for depth, node in self.iter_bfs(): + max_depth = max(max_depth, depth) + + # assign id to this node + node.node_id = node_count + node_count += 1 + + # record node count and depth + self.node_count = node_count + self.max_depth = max_depth + + def graph_vectors(self): + # edge index + u = np.empty([self.edge_count], dtype=np.int64) + v = np.empty([self.edge_count], dtype=np.int64) + + # node type + node_type = np.zeros([self.node_count], dtype=np.int64) + + # node depth + node_depth = np.zeros([self.node_count], dtype=np.int64) + + # node idx + node_idx = np.zeros([self.node_count], dtype=np.int64) + + # node feature + span_count = np.zeros([self.node_count], dtype=np.int64) + avg_latency = np.zeros([self.node_count], dtype=np.float32) + max_latency = np.zeros([self.node_count], dtype=np.float32) + min_latency = np.zeros([self.node_count], dtype=np.float32) + + # X = np.zeros([self.node_count, x_dim], dtype=np.float32) + + edge_idx = 0 + for depth, idx, node, parent in self.iter_bfs(with_parent=True): + j = node.node_id + feat = node.features + + # node type + node_type[j] = node.operation_id + + # node depth + node_depth[j] = depth + + # node idx + node_idx[j] = idx + + # node feature + span_count[j] = feat.span_count + avg_latency[j] = feat.avg_latency + max_latency[j] = feat.max_latency + min_latency[j] = feat.min_latency + # X[parent.node_id, parent.operation_id] = 1 # one-hot encoded node feature + + # edge index + for child in node.children: + u[edge_idx] = node.node_id + v[edge_idx] = child.node_id + edge_idx += 1 + + if len(u) != self.edge_count: + raise ValueError(f'`len(u)` != `self.edge_count`: {len(u)} != {self.edge_count}') + + return TraceGraphVectors( + # edge index + u=u, v=v, + # node type + node_type=node_type, + # node depth + node_depth=node_depth, + # node idx + node_idx=node_idx, + # node feature + span_count=span_count, + avg_latency=avg_latency, + max_latency=max_latency, + min_latency=min_latency, + ) + + def networkx_graph(self, id_manager: 'TraceGraphIDManager') -> nx.Graph: + gv = self.graph_vectors() + self_nodes = {nd.node_id: nd for _, nd in self.iter_bfs()} + g = nx.Graph() + # graph + for k, v in self.data.items(): + g.graph[k] = v + # nodes + g.add_nodes_from(range(self.node_count)) + # edges + g.add_edges_from([(i, j) for i, j in zip(gv.u, gv.v)]) + # node features + for i in range(len(gv.node_type)): + nd = g.nodes[i] + nd['node_type'] = gv.node_type[i] + nd['operation'] = id_manager.operation_id.reverse_map(gv.node_type[i]) + for attr in TraceGraphNodeFeatures.__slots__: + nd[attr] = getattr(gv, attr)[i] + if self_nodes[i].scores: + nd['avg_latency_nstd'] = self_nodes[i].scores.avg_latency_nstd + return g + + def to_bytes(self, protocol: int = pkl.DEFAULT_PROTOCOL) -> bytes: + return pkl.dumps(self, protocol=protocol) + + @staticmethod + def from_bytes(content: bytes) -> 'TraceGraph': + r = pkl.loads(content) + + # for deserializing old versions of TraceGraph + if not hasattr(r, 'version'): + r.version = 0x0 + + if r.version < 0x1: # upgrade 0x0 => 0x2 + for _, nd in r.root.iter_bfs(): + nd.scores = None + nd.anomaly = None + r.version = 0x2 + + if r.version < 0x2: # upgrade 0x1 => 0x2 + for _, nd in r.root.iter_bfs(): + nd.anomaly = None + r.version = 0x2 + + return r + + def deepcopy(self) -> 'TraceGraph': + return TraceGraph.from_bytes(self.to_bytes()) + + +@dataclass +class TempGraphNode(object): + __slots__ = ['trace_id', 'parent_id', 'node'] + + trace_id: Tuple[int, int] + parent_id: int + node: 'TraceGraphNode' + + +class TraceGraphIDManager(object): + __slots__ = ['root_dir', 'service_id', 'operation_id'] + + root_dir: str + service_id: IDAssign + operation_id: IDAssign + + def __init__(self, root_dir: str): + self.root_dir = os.path.abspath(root_dir) + self.service_id = IDAssign(os.path.join(self.root_dir, SERVICE_ID_YAML_FILE)) + self.operation_id = IDAssign(os.path.join(self.root_dir, OPERATION_ID_YAML_FILE)) + + def __enter__(self): + self.service_id.__enter__() + self.operation_id.__enter__() + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.service_id.__exit__(exc_type, exc_val, exc_tb) + self.operation_id.__exit__(exc_type, exc_val, exc_tb) + + @property + def num_operations(self) -> int: + return len(self.operation_id) + + def dump_to(self, output_dir: str): + self.service_id.dump_to(os.path.join(output_dir, SERVICE_ID_YAML_FILE)) + self.operation_id.dump_to(os.path.join(output_dir, OPERATION_ID_YAML_FILE)) + + +def load_trace_csv(input_path: str, is_test: bool=False) -> pd.DataFrame: + if is_test: + dtype = { + 'traceIdHigh': int, + 'traceIdLow': int, + 'spanId': int, + 'parentSpanId': int, + 'serviceName': str, + 'operationName': str, + 'startTime': str, + 'duration': float, + 'nanosecond': int, + 'DBhash': int, + 'nodeLatencyLabel': int, + 'graphLatencyLabel': int, + 'graphStructureLabel': int + } + else: + dtype = { + 'traceIdHigh': int, + 'traceIdLow': int, + 'spanId': int, + 'parentSpanId': int, + 'serviceName': str, + 'operationName': str, + 'startTime': str, + 'duration': float, + 'nanosecond': int, + 'DBhash': int, + } + + return pd.read_csv( + input_path, + engine='c', + usecols=list(dtype), + dtype=dtype + ) + + +def df_to_trace_graphs(df: pd.DataFrame, + id_manager: TraceGraphIDManager, + test_label: int = None, + min_node_count: int = 2, + max_node_count: int = 32, + summary_file: Optional[str] = None, + merge_spans: bool = False, + ) -> List[TraceGraph]: + summary = [] + trace_spans = {} + df = df[df['DBhash'] == 0] + + # read the spans + with id_manager: + for row in tqdm(df.itertuples(), desc='Read spans', total=len(df)): + graph_label = 0 + + if test_label is not None: + if row.graphStructureLabel != 0: + graph_label = 1 + elif row.graphLatencyLabel != 0: + graph_label = 2 + if graph_label != test_label: + continue + + if row.serviceName not in id_manager.service_id._mapping: + print(row.serviceName, ": Service not in file!") + continue + if f'{row.serviceName}/{row.operationName}' not in id_manager.operation_id._mapping: + print(f'{row.serviceName}/{row.operationName}', ": Operation not in file!") + continue + + trace_id = (row.traceIdHigh, row.traceIdLow) + span_dict = trace_spans.get(trace_id, None) + if span_dict is None: + trace_spans[trace_id] = span_dict = {} + + span_latency = row.duration + span_dict[row.spanId] = TempGraphNode( + trace_id=trace_id, + parent_id=row.parentSpanId, + node=TraceGraphNode( + node_id=None, + service_id=id_manager.service_id.get_or_assign(row.serviceName), + operation_id=id_manager.operation_id.get_or_assign(f'{row.serviceName}/{row.operationName}'), + features=TraceGraphNodeFeatures( + span_count=1, + avg_latency=span_latency, + max_latency=span_latency, + min_latency=span_latency, + ), + children=[], + spans=[ + TraceGraphSpan( + span_id=row.spanId, + start_time=( + datetime.strptime(row.startTime, '%Y-%m-%d %H:%M:%S') + + timedelta(microseconds=row.nanosecond / 1_000) + ), + latency=span_latency, + ), + ], + scores=None, + anomaly=None, + ) + ) + + summary.append(f'Span count: {len(trace_spans)}') + + # construct the traces + trace_graphs = [] + + if test_label is None or test_label == 0: + graph_data = {} + elif test_label == 1: + graph_data = { + 'is_anomaly': True, + 'anomaly_type': 'drop' + } + else: + graph_data = { + 'is_anomaly': True, + 'anomaly_type': 'latency' + } + + for _, trace in tqdm(trace_spans.items(), total=len(trace_spans), desc='Build graphs'): + nodes = sorted( + trace.values(), + key=(lambda nd: (nd.node.service_id, nd.node.operation_id, nd.node.spans[0].start_time)) + ) + for nd in nodes: + parent_id = nd.parent_id + if (parent_id == 0) or (parent_id not in trace): + # if only a certain service is taken from the database, then just the sub-trees + # of a trace are obtained, which leads to orphan nodes (parent_id != 0 and not in trace + trace_graphs.append(TraceGraph( + version=TraceGraph.default_version(), + trace_id=nd.trace_id, + parent_id=nd.parent_id, + root=nd.node, + node_count=None, + max_depth=None, + data=graph_data, + )) + else: + trace[parent_id].node.children.append(nd.node) + + # merge spans and assign id + if merge_spans: + for trace in tqdm(trace_graphs, desc='Merge spans and assign node id'): + trace.merge_spans_and_assign_id() + else: + for trace in tqdm(trace_graphs, desc='Assign node id'): + trace.assign_node_id() + + # gather the final results + ret = [] + too_small = 0 + too_large = 0 + + for trace in trace_graphs: + if trace.node_count < min_node_count: + too_small += 1 + elif trace.node_count > max_node_count: + too_large += 1 + else: + ret.append(trace) + + summary.append(f'Imported graph: {len(trace_graphs)}; dropped graph: too small = {too_small}, too large = {too_large}') + if summary_file: + with open(summary_file, 'w', encoding='utf-8') as f: + f.write('\n'.join(summary) + '\n') + else: + print('\n'.join(summary), file=sys.stderr) + + return ret diff --git a/tracegnn/data/trace_graph_db.py b/tracegnn/data/trace_graph_db.py new file mode 100644 index 0000000..25478c9 --- /dev/null +++ b/tracegnn/data/trace_graph_db.py @@ -0,0 +1,108 @@ +"""Wraps a BytesDB into TraceGraphDB.""" +import os +import pickle as pkl +import re +from contextlib import contextmanager +from typing import * + +import numpy as np + +from .bytes_db import * +from .trace_graph import * + +__all__ = ['TraceGraphDB', 'open_trace_graph_db'] + + +class TraceGraphDB(object): + bytes_db: BytesDB + protocol: int + + def __init__(self, bytes_db: BytesDB, protocol: Optional[int] = None): + if protocol is None: + protocol = pkl.DEFAULT_PROTOCOL + self.bytes_db = bytes_db + self.protocol = protocol + + def __enter__(self): + self.bytes_db.__enter__() + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.bytes_db.__exit__(exc_type, exc_val, exc_tb) + + def __len__(self) -> int: + return self.data_count() + + def __getitem__(self, item: int): + return self.get(item) + + def __iter__(self): + for i in range(self.data_count()): + yield self.get(i) + + def __repr__(self): + desc = repr(self.bytes_db) + desc = desc[desc.find('(') + 1: -1] + return f'TraceGraphDB({desc})' + + def sample_n(self, + n: int, + with_id: bool = False + ) -> List[Union[TraceGraph, Tuple[int, TraceGraph]]]: + ret = [] + indices = np.random.randint(self.data_count(), size=n) + for i in indices: + g = self.get(i) + if with_id: + ret.append((int(i), g)) + else: + ret.append(g) + return ret + + def data_count(self) -> int: + return self.bytes_db.data_count() + + def get(self, item: int) -> TraceGraph: + return TraceGraph.from_bytes(self.bytes_db.get(item)) + + def add(self, g: TraceGraph) -> int: + return self.bytes_db.add(g.to_bytes(protocol=self.protocol)) + + @contextmanager + def write_batch(self): + with self.bytes_db.write_batch(): + yield self + + def commit(self): + self.bytes_db.commit() + + def optimize(self): + self.bytes_db.optimize() + + def close(self): + self.bytes_db.close() + + +def open_trace_graph_db(input_dir: str, + names: Optional[Sequence[str]] = (), + protocol: Optional[int] = None, + ) -> Tuple[TraceGraphDB, TraceGraphIDManager]: + file_name = f'_bytes_{protocol}.db' if protocol else '_bytes.db' + + id_manager = TraceGraphIDManager(os.path.join(input_dir, 'id_manager')) + + if len(names) == 1: + db = TraceGraphDB( + BytesSqliteDB(os.path.join(input_dir, 'processed', names[0]), file_name=file_name), + protocol=protocol, + ) + else: + db = TraceGraphDB( + BytesMultiDB(*[ + BytesSqliteDB(os.path.join(input_dir, 'processed', name), file_name=file_name) + for name in names + ]), + protocol=protocol, + ) + + return db, id_manager diff --git a/tracegnn/models/__init__.py b/tracegnn/models/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tracegnn/models/trace_vae/__init__.py b/tracegnn/models/trace_vae/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tracegnn/models/trace_vae/constants.py b/tracegnn/models/trace_vae/constants.py new file mode 100644 index 0000000..71cc541 --- /dev/null +++ b/tracegnn/models/trace_vae/constants.py @@ -0,0 +1,3 @@ +from tracegnn.constants import * + +LATENCY_DIM = 1 diff --git a/tracegnn/models/trace_vae/dataset.py b/tracegnn/models/trace_vae/dataset.py new file mode 100644 index 0000000..6dd1556 --- /dev/null +++ b/tracegnn/models/trace_vae/dataset.py @@ -0,0 +1,134 @@ +from dataclasses import dataclass +from typing import * + +import dgl +import mltk +import numpy as np +import torch +from tensorkit import tensor as T + +from tracegnn.data import * +from tracegnn.utils import * +from .constants import * + +__all__ = [ + 'trace_graph_to_dgl', + 'TraceGraphDataStream', +] + + +def trace_graph_to_dgl(graph: TraceGraph, + num_node_types: int, + add_self_loop: bool, + latency_range: Optional[TraceGraphLatencyRangeFile] = None, + directed: Union[bool, str] = False, # True, False or 'reverse' + ): + with T.no_grad(): + gv = graph.graph_vectors() + + # build edges + # todo: use heterogeneous graph to distinguish between "parent -> child" edge and opposite direction + # here we just add edges for the both direction (as an initial step) + if directed == 'reverse': + u = T.as_tensor(gv.v, dtype=T.int64) + v = T.as_tensor(gv.u, dtype=T.int64) + elif directed is True: + u = T.as_tensor(gv.u, dtype=T.int64) + v = T.as_tensor(gv.v, dtype=T.int64) + elif directed is False: + u = T.as_tensor( + np.concatenate([gv.u, gv.v], axis=0), + dtype=T.int64, + ) + v = T.as_tensor( + np.concatenate([gv.v, gv.u], axis=0), + dtype=T.int64 + ) + else: + raise ValueError(f'Unsupported value for directed: {directed!r}') + + g = dgl.graph((u, v), num_nodes=graph.node_count) + if add_self_loop: + g = dgl.add_self_loop(g) + + # node type (use nn.Embedding later to map the node type => node embedding) + g.ndata['node_type'] = T.as_tensor(gv.node_type, dtype=T.int64) + + # the index of the node under its parent + g.ndata['node_idx'] = T.as_tensor(gv.node_idx, dtype=T.int64) + + # node depth + g.ndata['node_depth'] = T.as_tensor(gv.node_depth, dtype=T.int64) + + # span count + g.ndata['span_count'] = T.as_tensor(np.minimum(gv.span_count, MAX_SPAN_COUNT), dtype=T.int64) + + # latency + if USE_MULTI_DIM_LATENCY_CODEC: + for pfx in ('avg_', 'max_', 'min_'): + codec, onehot = encode_latency(getattr(gv, f'{pfx}latency'), MAX_LATENCY_DIM) + g.ndata[f'{pfx}latency_codec'] = T.as_tensor(codec, dtype=T.float32) + g.ndata[f'{pfx}latency_onehot'] = T.as_tensor(onehot, dtype=T.float32) + else: + for pfx in ('avg_', 'max_', 'min_'): + latency_array = getattr(gv, f'{pfx}latency') + latency = [] + for i in range(graph.node_count): + mu, std = latency_range[gv.node_type[i]] + latency.append((latency_array[i] - mu) / (std + 1e-5)) + g.ndata[f'{pfx}latency'] = T.as_tensor(np.reshape(latency, (-1, 1)), dtype=T.float32) + g.ndata['latency'] = T.concat( + [ + g.ndata['avg_latency'], + g.ndata['min_latency'], + g.ndata['max_latency'], + ], + axis=-1, + ) + + return g + + +class TraceGraphDataStream(mltk.data.MapperDataStream): + + def __init__(self, + db: TraceGraphDB, + id_manager: TraceGraphIDManager, + batch_size: int, + shuffle: bool = False, + skip_incomplete: bool = False, + random_state: Optional[np.random.RandomState] = None, + data_count: Optional[int] = None, + ): + if (data_count is not None) and (data_count < len(db)) and shuffle: + indices = np.arange(len(db)) + np.random.shuffle(indices) + indices = indices[:data_count] + source_cls = lambda **kwargs: mltk.DataStream.arrays([indices], **kwargs) + else: + if data_count is None: + data_count = len(db) + source_cls = lambda **kwargs: mltk.DataStream.int_seq(data_count, **kwargs) + + source = source_cls( + batch_size=batch_size, + shuffle=shuffle, + skip_incomplete=skip_incomplete, + random_state=random_state, + ) + + def mapper(indices): + return (np.array( + [ + db.get(idx) + for idx in indices + ] + ),) + + super().__init__( + source=source, + mapper=mapper, + array_count=1, + data_shapes=((),) + ) + diff --git a/tracegnn/models/trace_vae/distributions.py b/tracegnn/models/trace_vae/distributions.py new file mode 100644 index 0000000..d7f4706 --- /dev/null +++ b/tracegnn/models/trace_vae/distributions.py @@ -0,0 +1,356 @@ +import math +from typing import * + +import tensorkit as tk +from tensorkit import tensor as T +from tensorkit.typing_ import TensorOrData +from tensorkit.distributions.utils import copy_distribution + +__all__ = [ + 'MaskedDistribution', + 'BiasedBernoulli', + 'BiasedCategorical', + 'BiasedOneHotCategorical', + 'BiasedNormal', + 'SafeNormal', + 'AnomalyDetectionNormal', +] + + +class MaskedDistribution(tk.Distribution): + """ + A wrapper distribution to mask some elements, in order to mimic "variadic length" + in the event dimensions. + """ + + def __init__(self, + distribution: tk.Distribution, + mask: TensorOrData, # should be right-aligned with the underlying log_prob + log_prob_weight: Optional[TensorOrData] = None, # should be right-aligned with the underlying log_prob + *, + event_ndims: Optional[int] = None, + validate_tensors: Optional[bool] = None, + ): + # validate the arguments + if validate_tensors is None: + validate_tensors = distribution.validate_tensors + + # compute event ndims + batch_shape = distribution.batch_shape + value_shape = distribution.value_shape + min_event_ndims = distribution.event_ndims + max_event_ndims = distribution.value_ndims + + if event_ndims is None: + event_ndims = min_event_ndims + if not (min_event_ndims <= event_ndims <= max_event_ndims): + raise ValueError( + f'`event_ndims` out of range: got {event_ndims}, but ' + f'the minimum allowed value is {min_event_ndims}, ' + f'and the maximum allowed value is {max_event_ndims}.' + ) + batch_shape = batch_shape[: len(batch_shape) - (event_ndims - min_event_ndims)] + + super().__init__( + dtype=distribution.dtype, + value_shape=value_shape, + batch_shape=batch_shape, + continuous=distribution.continuous, + reparameterized=distribution.reparameterized, + event_ndims=event_ndims, + min_event_ndims=min_event_ndims, + device=distribution.device, + validate_tensors=validate_tensors, + ) + self._base_distribution = distribution + self.mask = T.as_tensor(mask, device=distribution.device) + self.log_prob_weight = T.as_tensor(log_prob_weight, device=distribution.device) \ + if log_prob_weight is not None else None + + @property + def base_distribution(self) -> tk.Distribution: + return self._base_distribution + + def _apply_mask_on_log_prob(self, log_prob): + r = log_prob * T.as_tensor(self.mask, dtype=T.get_dtype(log_prob)) + if self.log_prob_weight is not None: + r = r * T.as_tensor(self.log_prob_weight, dtype=T.get_dtype(log_prob)) + return r + + def _apply_mask_on_samples(self, samples): + mask = T.as_tensor(self.mask, dtype=T.get_dtype(samples)) + return samples * T.reshape( + mask, + T.shape(mask) + ([1] * self.min_event_ndims) # expand mask to match the samples + ) + + def _sample(self, + n_samples: Optional[int], + group_ndims: int, + reduce_ndims: int, + reparameterized: bool) -> 'tk.StochasticTensor': + x = self._base_distribution.sample( + n_samples=n_samples, + reparameterized=reparameterized + ) + t = tk.StochasticTensor( + tensor=self._apply_mask_on_samples(x.tensor), + distribution=self, + n_samples=n_samples, + group_ndims=group_ndims, + reparameterized=reparameterized + ) + return t + + def _log_prob(self, + given: T.Tensor, + group_ndims: int, + reduce_ndims: int) -> T.Tensor: + log_prob = self._base_distribution.log_prob(given) + log_prob = self._apply_mask_on_log_prob(log_prob) + if reduce_ndims > 0: + log_prob = T.reduce_sum(log_prob, axis=T.int_range(-reduce_ndims, 0)) + return log_prob + + def copy(self, **overrided_params): + return copy_distribution( + cls=MaskedDistribution, + base=self, + attrs=(('distribution', '_base_distribution'), 'mask', + 'event_ndims', 'validate_tensors'), + overrided_params=overrided_params, + ) + + +def _biased_Bernoulli_or_Categorical_log_prob(log_prob, alpha, threshold_logit, reduce_ndims): + dtype = T.get_dtype(log_prob) + log_prob = T.where( + log_prob < T.float_scalar(threshold_logit, dtype=dtype), + log_prob * T.float_scalar(alpha, dtype=dtype), + log_prob, + ) + if reduce_ndims > 0: + log_prob = T.reduce_sum(log_prob, axis=T.int_range(-reduce_ndims, 0)) + return log_prob + + +class BiasedBernoulli(tk.distributions.Bernoulli): + """Bernoulli whose log p(x) is biased towards error.""" + + alpha: float + threshold: float + + def __init__(self, alpha: float = 1.0, threshold: float = 0.5, **kwargs): + super().__init__(**kwargs) + self.alpha = alpha + self.threshold = threshold + self._threshold_logit = math.log(threshold) + + def _log_prob(self, + given: T.Tensor, + group_ndims: int, + reduce_ndims: int) -> T.Tensor: + return _biased_Bernoulli_or_Categorical_log_prob( + T.random.bernoulli_log_prob( + given=given, + logits=self.logits, + group_ndims=0, + ), + self.alpha, + self._threshold_logit, + reduce_ndims, + ) + + def copy(self, **overrided_params): + return copy_distribution( + cls=BiasedBernoulli, + base=self, + attrs=('alpha', 'threshold', 'dtype', 'event_ndims', 'epsilon', 'device', 'validate_tensors'), + mutual_attrs=(('logits', 'probs'),), + compute_deps={'logits': ('epsilon',)}, + original_mutual_params=self._mutual_params, + overrided_params=overrided_params, + ) + + +class BiasedCategorical(tk.distributions.Categorical): + """Categorical whose log p(x) is biased towards error.""" + + alpha: float + threshold: float + + def __init__(self, alpha: float = 1.0, threshold: float = 0.5, **kwargs): + super().__init__(**kwargs) + self.alpha = alpha + self.threshold = threshold + self._threshold_logit = math.log(threshold) + + def _log_prob(self, + given: T.Tensor, + group_ndims: int, + reduce_ndims: int) -> T.Tensor: + return _biased_Bernoulli_or_Categorical_log_prob( + T.random.categorical_log_prob( + given=given, + logits=self.logits, + group_ndims=0, + ), + self.alpha, + self._threshold_logit, + reduce_ndims, + ) + + def copy(self, **overrided_params): + return copy_distribution( + cls=BiasedCategorical, + base=self, + attrs=('alpha', 'threshold', 'dtype', 'event_ndims', 'epsilon', 'device', 'validate_tensors'), + mutual_attrs=(('logits', 'probs'),), + compute_deps={'logits': ('epsilon',)}, + original_mutual_params=self._mutual_params, + overrided_params=overrided_params, + ) + + +class BiasedOneHotCategorical(tk.distributions.OneHotCategorical): + """OneHotCategorical whose log p(x) is biased towards error.""" + + alpha: float + threshold: float + + def __init__(self, alpha: float = 1.0, threshold: float = 0.5, **kwargs): + super().__init__(**kwargs) + self.alpha = alpha + self.threshold = threshold + self._threshold_logit = math.log(threshold) + + def _log_prob(self, + given: T.Tensor, + group_ndims: int, + reduce_ndims: int) -> T.Tensor: + return _biased_Bernoulli_or_Categorical_log_prob( + T.random.one_hot_categorical_log_prob( + given=given, + logits=self.logits, + group_ndims=0, + ), + self.alpha, + self._threshold_logit, + reduce_ndims, + ) + + def copy(self, **overrided_params): + return copy_distribution( + cls=BiasedOneHotCategorical, + base=self, + attrs=('alpha', 'threshold', 'dtype', 'event_ndims', 'epsilon', 'device', 'validate_tensors'), + mutual_attrs=(('logits', 'probs'),), + compute_deps={'logits': ('epsilon',)}, + original_mutual_params=self._mutual_params, + overrided_params=overrided_params, + ) + + +class BiasedNormal(tk.distributions.Normal): + """Normal whose log p(x) is biased towards error.""" + + alpha: float + std_threshold: float + + _extra_args = ('alpha', 'std_threshold') + + def __init__(self, alpha: float = 1.0, std_threshold: float = 3.0, **kwargs): + super().__init__(**kwargs) + self.alpha = alpha + self.std_threshold = std_threshold + + def _log_prob(self, + given: T.Tensor, + group_ndims: int, + reduce_ndims: int) -> T.Tensor: + log_prob = T.random.normal_log_pdf( + given=given, + mean=self.mean, + logstd=self.logstd, + group_ndims=0, + validate_tensors=self.validate_tensors, + ) + dtype = T.get_dtype(log_prob) + log_prob = T.where( + T.abs(given - self.mean) > (T.float_scalar(self.std_threshold, dtype=dtype) * self.std), + log_prob * T.float_scalar(self.alpha, dtype=dtype), + log_prob, + ) + if reduce_ndims > 0: + log_prob = T.reduce_sum(log_prob, axis=T.int_range(-reduce_ndims, 0)) + return log_prob + + +class SafeNormal(tk.distributions.Normal): + """Normal whose log p(x) is computed with |x-mean| clipped within nstd * std.""" + + std_threshold: float + + _extra_args = ('std_threshold',) + + def __init__(self, std_threshold: float = 5.0, **kwargs): + super().__init__(**kwargs) + self.std_threshold = std_threshold + + def _log_prob(self, + given: T.Tensor, + group_ndims: int, + reduce_ndims: int) -> T.Tensor: + min_val = T.stop_grad(self.mean - self.std_threshold * self.std) + max_val = T.stop_grad(self.mean + self.std_threshold * self.std) + return T.random.normal_log_pdf( + given=T.maximum( + T.minimum(given, max_val), + min_val, + ), + mean=self.mean, + logstd=self.logstd, + group_ndims=reduce_ndims, + validate_tensors=self.validate_tensors, + ) + + +class AnomalyDetectionNormal(tk.distributions.Normal): + """Normal whose log p(x) is replaced by clipped Normal-CDF for anomaly detection.""" + + SQRT2 = math.sqrt(2) + LOG2 = math.log(2) + + std_threshold: float + bias_alpha: float + bias_threshold: float + + _extra_args = ('std_threshold', 'bias_alpha', 'bias_threshold',) + + def __init__(self, + std_threshold: float = 3.0, + bias_alpha: float = 1.0, + bias_threshold: float = 0.5, + **kwargs): + super().__init__(**kwargs) + self.std_threshold = std_threshold + self.bias_alpha = bias_alpha + self.bias_threshold = bias_threshold + self._log_bias_threshold = math.log(bias_threshold) + + def _log_prob(self, + given: T.Tensor, + group_ndims: int, + reduce_ndims: int) -> T.Tensor: + # t = abs(X) - std_threshold + # prob = 1 - normal_cdf(t) + # = 0.5 * (1 - erf(t / sqrt(2))) + # log_prob = -log(2) + log1p(-erf(t / sqrt(2))) + t = T.abs((given - self.mean) / self.std) - self.std_threshold + log_prob = -self.LOG2 + T.log1p(-T.erf(t / self.SQRT2)) + return _biased_Bernoulli_or_Categorical_log_prob( + log_prob, + self.bias_alpha, + self._log_bias_threshold, + reduce_ndims, + ) diff --git a/tracegnn/models/trace_vae/evaluation.py b/tracegnn/models/trace_vae/evaluation.py new file mode 100644 index 0000000..db75bc9 --- /dev/null +++ b/tracegnn/models/trace_vae/evaluation.py @@ -0,0 +1,570 @@ +import json +import math +from pprint import pprint +from typing import * + +import mltk +import tensorkit as tk +import yaml +from tensorkit import tensor as T +from tqdm import tqdm +import pickle +import snappy +import numpy as np +import os + +from tracegnn.utils import * +from tracegnn.data import * +from ...data import TraceGraph, TraceGraphNode +from ...utils import TraceGraphLatencyRangeFile +from .graph_utils import p_net_to_trace_graphs, trace_graph_key +from .model import TraceVAE +from .tensor_utils import * +from .types import TraceGraphBatch + +__all__ = [ + 'do_evaluate_nll', + 'do_evaluate_prior', + 'do_anomaly_detect' +] + + +def do_evaluate_nll(test_stream: mltk.DataStream, + vae: TraceVAE, + id_manager: TraceGraphIDManager, + latency_range: TraceGraphLatencyRangeFile, + n_z: int, + use_biased: bool = True, + use_latency_biased: bool = True, + no_latency: bool = False, + no_struct: bool = False, + std_limit: Optional[T.Tensor] = None, + latency_log_prob_weight: bool = False, + latency_logstd_min: Optional[float] = None, + test_threshold: Optional[float] = None, + test_loop=None, + summary_writer=None, + clip_nll=None, + use_embeddings: bool = False, + num_embedding_samples=None, + nll_output_file=None, + proba_cdf_file=None, + auc_curve_file=None, + latency_hist_file=None, + operation_id_dict_out=None, # corresponding to latency_std_dict_out + latency_std_dict_out=None, + latency_reldiff_dict_out=None, + p_node_count_dict_out=None, + p_edge_dict_out=None, + latency_dict_prefix='', + ): + # check params + if std_limit is not None: + std_limit = T.as_tensor(std_limit, dtype=T.float32) + + # result buffer + nll_list = [] + label_list = [] + trace_id_list = [] + graph_key_list = [] + z_buffer = [] # the z embedding buffer of the graph + z2_buffer = [] # the z2 embedding buffer of the graph + z_label = [] # the label for z and z2 + latency_samples = {} + result_dict = {} + + if operation_id_dict_out is not None: + for key in ('normal', 'drop', 'latency'): + if key not in operation_id_dict_out: + operation_id_dict_out[latency_dict_prefix + key] = ArrayBuffer(81920, dtype=np.int) + + if latency_std_dict_out is not None: + for key in ('normal', 'drop', 'latency'): + if key not in latency_std_dict_out: + latency_std_dict_out[latency_dict_prefix + key] = ArrayBuffer(81920) + + if latency_reldiff_dict_out is not None: + for key in ('normal', 'drop', 'latency'): + if key not in latency_reldiff_dict_out: + latency_reldiff_dict_out[latency_dict_prefix + key] = ArrayBuffer(81920) + + if p_node_count_dict_out is not None: + for key in ('normal', 'drop', 'latency'): + if key not in p_node_count_dict_out: + p_node_count_dict_out[latency_dict_prefix + key] = ArrayBuffer(81920) + + if p_edge_dict_out is not None: + for key in ('normal', 'drop', 'latency'): + if key not in p_edge_dict_out: + p_edge_dict_out[latency_dict_prefix + key] = ArrayBuffer(81920) + + def add_embedding(buffer, label, tag, limit=None): + if limit is not None: + indices = np.arange(len(buffer)) + np.random.shuffle(indices) + indices = indices[:limit] + buffer = buffer[indices] + label = label[indices] + summary_writer.add_embedding( + buffer, + metadata=label, + tag=tag, + ) + + # run evaluation + def eval_step(trace_graphs: List[TraceGraph]): + G = TraceGraphBatch( + id_manager=id_manager, + latency_range=latency_range, + trace_graphs=trace_graphs, + ) + chain = vae.q(G, n_z=n_z, no_latency=no_latency).chain( + vae.p, + latent_axis=0, + G=G, + use_biased=use_biased, + use_latency_biased=use_latency_biased, + no_latency=no_latency, + latency_logstd_min=latency_logstd_min, + latency_log_prob_weight=latency_log_prob_weight, + std_limit=std_limit, + ) + if no_struct: + q, p = chain.q, chain.p + del q['z'] + del p['z'] + del p['adj'] + del p['node_count'] + del p['node_type'] + chain = q.chain(lambda *args, **kwargs: p, latent_axis=0) + + loss = chain.vi.training.sgvb() + nll = -chain.vi.evaluation.is_loglikelihood() + + # clip the nll, and treat 'NaN' or 'Inf' nlls as `config.test.clip_nll` + if clip_nll is not None: + clip_limit = T.float_scalar(clip_nll) + loss = T.where(loss < clip_limit, loss, clip_limit) + nll = T.where(nll < clip_limit, nll, clip_limit) + + # the nlls and labels of this step + step_label_list = np.array([ + 0 if not g.data.get('is_anomaly') else ( + 1 if g.data['anomaly_type'] == 'drop' else 2) + for g in trace_graphs + ]) + + # Load the graph_key + step_graph_key_list = [trace_graph_key(g) for g in trace_graphs] + step_trace_id_list = [g.trace_id for g in trace_graphs] + + if not no_struct: + # collect operation id + if operation_id_dict_out is not None: + collect_operation_id(operation_id_dict_out[f'{latency_dict_prefix}normal'], chain, step_label_list == 0) + collect_operation_id(operation_id_dict_out[f'{latency_dict_prefix}drop'], chain, step_label_list == 1) + collect_operation_id(operation_id_dict_out[f'{latency_dict_prefix}latency'], chain, step_label_list == 2) + + # collect latency + if latency_std_dict_out is not None: + collect_latency_std(latency_std_dict_out[f'{latency_dict_prefix}normal'], chain, step_label_list == 0) + collect_latency_std(latency_std_dict_out[f'{latency_dict_prefix}drop'], chain, step_label_list == 1) + collect_latency_std(latency_std_dict_out[f'{latency_dict_prefix}latency'], chain, step_label_list == 2) + + # collect relative diff + if latency_reldiff_dict_out is not None: + collect_latency_reldiff(latency_reldiff_dict_out[f'{latency_dict_prefix}normal'], chain, step_label_list == 0) + collect_latency_reldiff(latency_reldiff_dict_out[f'{latency_dict_prefix}drop'], chain, step_label_list == 1) + collect_latency_reldiff(latency_reldiff_dict_out[f'{latency_dict_prefix}latency'], chain, step_label_list == 2) + + # collect p node count + if p_node_count_dict_out is not None: + collect_p_node_count(p_node_count_dict_out[f'{latency_dict_prefix}normal'], chain, step_label_list == 0) + collect_p_node_count(p_node_count_dict_out[f'{latency_dict_prefix}drop'], chain, step_label_list == 1) + collect_p_node_count(p_node_count_dict_out[f'{latency_dict_prefix}latency'], chain, step_label_list == 2) + + # collect p edge + if p_edge_dict_out is not None: + collect_p_edge(p_edge_dict_out[f'{latency_dict_prefix}normal'], chain, step_label_list == 0) + collect_p_edge(p_edge_dict_out[f'{latency_dict_prefix}drop'], chain, step_label_list == 1) + collect_p_edge(p_edge_dict_out[f'{latency_dict_prefix}latency'], chain, step_label_list == 2) + + # inspect the internals of every trace graph + if 'latency' in chain.p: + p_latency = chain.p['latency'].distribution.base_distribution + p_latency_mu, p_latency_std = p_latency.mean, p_latency.std + if len(T.shape(p_latency.mean)) == 4: + p_latency_mu = p_latency_mu[0] + p_latency_std = p_latency_std[0] + + latency_sample = T.to_numpy(T.random.normal(p_latency_mu, p_latency_std)) + + for i, tg in enumerate(trace_graphs): + assert isinstance(tg, TraceGraph) + if step_label_list[i] == 0: + for j in range(tg.node_count): + node_type = int(T.to_numpy(G.dgl_graphs[i].ndata['node_type'][j])) + if node_type not in latency_samples: + latency_samples[node_type] = [] + mu, std = latency_range[node_type] + latency_samples[node_type].append(latency_sample[i, j, 0] * std + mu) + + if use_embeddings: + for i in range(len(trace_graphs)): + if step_label_list[i] == 0: + node_type = trace_graphs[i].root.operation_id + node_label = id_manager.operation_id.reverse_map(node_type) + z_label.append(node_label) + z_buffer.append(T.to_numpy(chain.q['z'].tensor[0, i])) + if 'z2' in chain.q: + z2_buffer.append(T.to_numpy(chain.q['z2'].tensor[0, i])) + + # memorize the outputs + nll_list.extend(T.to_numpy(nll)) + label_list.extend(step_label_list) + trace_id_list.extend(step_trace_id_list) + graph_key_list.extend(step_graph_key_list) + + # return a dict of the test result + ret = {} + normal_losses = T.to_numpy(loss)[step_label_list == 0] + if len(normal_losses) > 0: + test_loss = np.nanmean(normal_losses) + if not math.isnan(test_loss): + ret['loss'] = test_loss + return ret + + with T.no_grad(): + # run test on test set + if test_loop is not None: + with test_loop.timeit('eval_time'): + r = test_loop.run(eval_step, test_stream) + if 'loss' in r: + r['test_loss'] = r['loss'] + if 'test_loss' in r: + result_dict['test_loss'] = r['test_loss'] + else: + test_losses = [] + test_weights = [] + for [trace_graphs] in tqdm(test_stream, total=test_stream.batch_count): + r = eval_step(trace_graphs) + if 'loss' in r: + test_losses.append(r['loss']) + test_weights.append(len(trace_graphs)) + test_weights = np.asarray(test_weights) + result_dict['test_loss'] = np.sum( + np.asarray(test_losses) * + (test_weights / np.sum(test_weights)) + ) + + # save the evaluation results + nll_list = np.asarray(nll_list) + label_list = np.asarray(label_list) + graph_key_list = np.asarray(pickle.dumps(graph_key_list)) + + # analyze nll + result_dict.update( + analyze_anomaly_nll( + nll_list=nll_list, + label_list=label_list, + proba_cdf_file=proba_cdf_file, + auc_curve_file=auc_curve_file, + threshold=test_threshold, + ) + ) + + if nll_output_file is not None: + np.savez_compressed( + ensure_parent_exists(nll_output_file), + nll_list=nll_list, + label_list=label_list, + graph_key_list=graph_key_list, + anomaly_degree=nll_list / result_dict['best_threshold_latency'] + ) + + print(f'{latency_dict_prefix} file saved to {nll_output_file}') + + # z embedding + if use_embeddings: + # add the operation embedding + operation_buffer = T.to_numpy(vae.operation_embedding( + T.arange(0, id_manager.num_operations, dtype=T.int64))) + operation_label = [ + id_manager.operation_id.reverse_map(i) + for i in range(id_manager.num_operations) + ] + add_embedding(operation_buffer, operation_label, 'operation') + + # add z & z2 embedding + z_label = np.stack(z_label, axis=0) + add_embedding( + np.stack(z_buffer, axis=0), + z_label, + tag='z', + limit=num_embedding_samples + ) + if z2_buffer: + add_embedding( + np.stack(z2_buffer, axis=0), + z_label, + tag='z2', + limit=num_embedding_samples + ) + + # return the results + result_dict = {k: float(v) for k, v in result_dict.items()} + return result_dict + + +def do_evaluate_prior(vae: TraceVAE, + id_manager: TraceGraphIDManager, + latency_range: TraceGraphLatencyRangeFile, + n_samples: int, + batch_size: int, + eval_n_z: int, + nll_threshold: Optional[float] = None, + use_biased: bool = True, + output_file: Optional[str] = None, + latency_hist_out: Optional[str] = None, + ): + with T.no_grad(): + # results + sample_count = 0 + drop_count = 0 + result_dict = {} + latency_map = {} + + def add_sample(g: TraceGraph): + if latency_hist_out is not None: + for _, nd in g.iter_bfs(): + assert isinstance(nd, TraceGraphNode) + if nd.operation_id not in latency_map: + latency_map[nd.operation_id] = [] + latency_map[nd.operation_id].append(nd.features.avg_latency) + + # run by sample from prior + n_batches = (n_samples + batch_size - 1) // batch_size + for _ in tqdm(range(n_batches), total=n_batches, desc='Sample graphs from prior'): + # sample from prior + p = vae.p(n_z=batch_size) + trace_graphs = p_net_to_trace_graphs( + p, + id_manager=id_manager, + latency_range=latency_range, + discard_node_with_type_0=True, + discard_node_with_unknown_latency_range=True, + discard_graph_with_error_node_count=True, + ) + + sample_count += len(trace_graphs) + drop_count += sum(g is None for g in trace_graphs) + trace_graphs = [g for g in trace_graphs if g is not None] + + # evaluate the NLLs + G = TraceGraphBatch( + id_manager=id_manager, + latency_range=latency_range, + trace_graphs=trace_graphs, + ) + chain = vae.q(G=G, n_z=eval_n_z). \ + chain(vae.p, n_z=eval_n_z, latent_axis=0, use_biased=use_biased) + eval_nlls = T.to_numpy(chain.vi.evaluation.is_loglikelihood(reduction='none')) + + # purge too-low NLL graphs + for g, nll in zip(trace_graphs, eval_nlls): + if nll >= nll_threshold: + drop_count += 1 + else: + add_sample(g) + + # save the results + drop_rate = float(drop_count / sample_count) + result_dict.update({ + 'drop_rate': drop_rate, + }) + pprint(result_dict) + + if output_file is not None: + _, ext = os.path.splitext(output_file) + if ext == '.json': + result_cont = json.dumps(result_dict) + else: + result_cont = yaml.safe_dump(result_dict) + + with open(output_file, 'w', encoding='utf-8') as f: + f.write(result_cont) + + +def do_anomaly_detect(test_stream: mltk.DataStream, + vae: TraceVAE, + id_manager: TraceGraphIDManager, + latency_range: TraceGraphLatencyRangeFile, + n_z: int, + use_biased: bool = True, + use_latency_biased: bool = True, + no_latency: bool = False, + no_struct: bool = False, + std_limit: Optional[T.Tensor] = None, + latency_log_prob_weight: bool = False, + latency_logstd_min: Optional[float] = None, + test_threshold: Optional[float] = None, + test_loop=None, + summary_writer=None, + clip_nll=None, + use_embeddings: bool = False, + num_embedding_samples=None, + nll_output_file=None, + proba_cdf_file=None, + auc_curve_file=None, + latency_hist_file=None, + operation_id_dict_out=None, # corresponding to latency_std_dict_out + latency_std_dict_out=None, + latency_reldiff_dict_out=None, + p_node_count_dict_out=None, + p_edge_dict_out=None, + latency_dict_prefix='', + ): + # check params + if std_limit is not None: + std_limit = T.as_tensor(std_limit, dtype=T.float32) + + # result buffer + nll_list = [] + label_list = [] + graph_key_list = [] + z_buffer = [] # the z embedding buffer of the graph + z2_buffer = [] # the z2 embedding buffer of the graph + z_label = [] # the label for z and z2 + + def add_embedding(buffer, label, tag, limit=None): + if limit is not None: + indices = np.arange(len(buffer)) + np.random.shuffle(indices) + indices = indices[:limit] + buffer = buffer[indices] + label = label[indices] + summary_writer.add_embedding( + buffer, + metadata=label, + tag=tag, + ) + + # run evaluation + def eval_step(trace_graphs): + G = TraceGraphBatch( + id_manager=id_manager, + latency_range=latency_range, + trace_graphs=trace_graphs, + ) + chain = vae.q(G, n_z=n_z, no_latency=no_latency).chain( + vae.p, + latent_axis=0, + G=G, + use_biased=use_biased, + use_latency_biased=use_latency_biased, + no_latency=no_latency, + latency_logstd_min=latency_logstd_min, + latency_log_prob_weight=latency_log_prob_weight, + std_limit=std_limit, + ) + if no_struct: + q, p = chain.q, chain.p + del q['z'] + del p['z'] + del p['adj'] + del p['node_count'] + del p['node_type'] + chain = q.chain(lambda *args, **kwargs: p, latent_axis=0) + + loss = chain.vi.training.sgvb() + nll = -chain.vi.evaluation.is_loglikelihood() + + # clip the nll, and treat 'NaN' or 'Inf' nlls as `config.test.clip_nll` + if clip_nll is not None: + clip_limit = T.float_scalar(clip_nll) + loss = T.where(loss < clip_limit, loss, clip_limit) + nll = T.where(nll < clip_limit, nll, clip_limit) + + # the nlls and labels of this step + step_label_list = np.array([ + 0 if not g.data.get('is_anomaly') else ( + 1 if g.data['anomaly_type'] == 'drop' else 2) + for g in trace_graphs + ]) + + # Load the graph_key + step_graph_key_list = [trace_graph_key(g) for g in trace_graphs] + + if not no_struct: + if use_embeddings: + for i in range(len(trace_graphs)): + if step_label_list[i] == 0: + node_type = trace_graphs[i].root.operation_id + node_label = id_manager.operation_id.reverse_map(node_type) + z_label.append(node_label) + z_buffer.append(T.to_numpy(chain.q['z'].tensor[0, i])) + if 'z2' in chain.q: + z2_buffer.append(T.to_numpy(chain.q['z2'].tensor[0, i])) + + # memorize the outputs + nll_list.extend(T.to_numpy(nll)) + label_list.extend(step_label_list) + graph_key_list.extend(step_graph_key_list) + + # return a dict of the test result + ret = {} + normal_losses = T.to_numpy(loss)[step_label_list == 0] + if len(normal_losses) > 0: + test_loss = np.nanmean(normal_losses) + if not math.isnan(test_loss): + ret['loss'] = test_loss + return ret + + with T.no_grad(): + # run test on test set + if test_loop is not None: + with test_loop.timeit('eval_time'): + r = test_loop.run(eval_step, test_stream) + if 'loss' in r: + r['test_loss'] = r['loss'] + else: + test_losses = [] + test_weights = [] + for [trace_graphs] in tqdm(test_stream, total=test_stream.batch_count): + r = eval_step(trace_graphs) + if 'loss' in r: + test_losses.append(r['loss']) + test_weights.append(len(trace_graphs)) + test_weights = np.asarray(test_weights) + + # save the evaluation results + nll_list = np.asarray(nll_list) + label_list = np.asarray(label_list) + graph_key_list = np.asarray(pickle.dumps(graph_key_list)) + + # z embedding + if use_embeddings: + # add the operation embedding + operation_buffer = T.to_numpy(vae.operation_embedding( + T.arange(0, id_manager.num_operations, dtype=T.int64))) + operation_label = [ + id_manager.operation_id.reverse_map(i) + for i in range(id_manager.num_operations) + ] + add_embedding(operation_buffer, operation_label, 'operation') + + # add z & z2 embedding + z_label = np.stack(z_label, axis=0) + add_embedding( + np.stack(z_buffer, axis=0), + z_label, + tag='z', + limit=num_embedding_samples + ) + if z2_buffer: + add_embedding( + np.stack(z2_buffer, axis=0), + z_label, + tag='z2', + limit=num_embedding_samples + ) diff --git a/tracegnn/models/trace_vae/graph_utils.py b/tracegnn/models/trace_vae/graph_utils.py new file mode 100644 index 0000000..340417a --- /dev/null +++ b/tracegnn/models/trace_vae/graph_utils.py @@ -0,0 +1,424 @@ +import math +from dataclasses import dataclass +from typing import * + +import networkx as nx +import numpy as np +import tensorkit as tk +from tensorkit import tensor as T + +from tracegnn.data import * +from tracegnn.utils import * +from .constants import * +from .tensor_utils import * +import dgl +import torch + +__all__ = [ + 'flat_to_nx_graphs', + 'p_net_to_trace_graphs', + 'GraphNodeMatch', 'GraphNodeDiff', + 'diff_graph', +] + + +# util to reshape an array +def reshape_to(x, ndims): + shape = T.shape(x) + return T.reshape(x, [-1] + shape[len(shape) - ndims + 1:]) + + +def to_scalar(x): + return T.to_numpy(x).tolist() + + +def flat_to_nx_graphs(p: tk.BayesianNet, + id_manager: TraceGraphIDManager, + latency_range: TraceGraphLatencyRangeFile, + min_edge_weight: float = 0.2, + ) -> List[nx.Graph]: + """Convert `p` net sampled from a flat TraceVAE to nx.Graph.""" + # extract features + adjs = reshape_to(p['adj'].distribution.probs, 2) + node_counts = T.to_numpy(reshape_to(p['node_count'].tensor, 1)) + node_types = T.to_numpy(reshape_to(p['node_type'].tensor, 2)) + # span_counts = reshape_to(p['span_count'].tensor, 2) + + if 'latency' in p: + latency_src = T.to_numpy(reshape_to(p['latency'].distribution.base_distribution.mean, 3)) + latencies = np.zeros(latency_src.shape, dtype=np.float32) + for i in range(node_types.shape[0]): + for j in range(node_types.shape[1]): + try: + node_type = int(node_types[i, j]) + mu, std = latency_range[node_type] + latencies[i, j] = latency_src[i, j] * std + mu + except KeyError: + latencies[i, j] = -1. # todo: is this okay? + else: + latencies = None + + # build the graph + ret = [] + for i, node_count in enumerate(node_counts): + g = nx.Graph() + + # add nodes + for j in range(node_count): + g.add_node(j) + + # add edges + adj = triu_to_dense(adjs[i: i+1], MAX_NODE_COUNT) + for u in range(node_count): + for v in range(u + 1, node_count): + w = float(to_scalar(adj[u, v])) + if w >= min_edge_weight: + g.add_edge(u, v, weight=w) + + # add node attributes + for j in range(node_count): + node_type = int(node_types[i, j]) + g.nodes[j]['node_type'] = node_type + g.nodes[j]['operation'] = id_manager.operation_id.reverse_map(node_type) + if latencies is not None: + for k, pfx in enumerate(('avg_', 'max_', 'min_')): + if k < LATENCY_DIM: + g.nodes[j][f'{pfx}latency'] = latencies[i, j, k] + + # g.nodes[j]['span_count'] = to_scalar(span_counts[i, j]) + # for pfx in ('avg_', 'max_', 'min_'): + # g.nodes[j][f'{pfx}latency'] = latencies[f'{pfx}latency'][i, j] + + ret.append(g) + + # return the graphs + return ret + + +def p_net_to_trace_graphs(p: tk.BayesianNet, + id_manager: TraceGraphIDManager, + latency_range: TraceGraphLatencyRangeFile, + discard_node_with_type_0: bool = True, + discard_node_with_unknown_latency_range: bool = True, + discard_graph_with_error_node_count: bool = False, + keep_front_shape: bool = False, + ) -> Union[List[Optional[TraceGraph]], np.ndarray]: + """Convert `p` net sampled from a flat TraceVAE to TraceGraph.""" + if USE_MULTI_DIM_LATENCY_CODEC: + raise RuntimeError(f'`USE_MULTI_DIM_LATENCY_CODEC` is not supported.') + + # find the base distribution (Normal, Categorical, OneHotCategorical) + def find_base(t: tk.StochasticTensor): + d = t.distribution + while not isinstance(d, (tk.Normal, + tk.Bernoulli, + tk.Categorical, + tk.OneHotCategorical)): + d = d.base_distribution + return d + + # extract features + def get_adj(t, pad_value=0): + t = reshape_to(t, 2) + return np.stack( + [ + T.to_numpy(triu_to_dense( + t[i: i + 1], + MAX_NODE_COUNT, + pad_value=pad_value + )) + for i in range(len(t)) + ], + axis=0 + ) + + def bernoulli_log_prob(l): + # log(1 / (1 + exp(-l)) = log(exp(l) / (1 + exp(l))) + return T.where( + l >= 0, + -T.log1p(T.exp(-l)), + l - T.log1p(T.exp(l)), + ) + + def softmax_log_prob(l): + # log(exp(l) / sum(exp(l)) + return l - T.log_sum_exp(l, axis=[-1], keepdims=True) + + front_shape = T.shape(p['adj'].tensor)[:-1] + + adjs = get_adj(p['adj'].tensor) + adj_probs = get_adj(find_base(p['adj']).probs) + adj_logits = get_adj(bernoulli_log_prob(find_base(p['adj']).logits), pad_value=-100000) + + node_counts = T.to_numpy(reshape_to(p['node_count'].tensor, 1)) + node_types = T.to_numpy(reshape_to(p['node_type'].tensor, 2)) + node_count_logits = T.to_numpy(reshape_to(softmax_log_prob(find_base(p['node_count']).logits), 2)) + node_type_logits = T.to_numpy(reshape_to(softmax_log_prob(find_base(p['node_type']).logits), 3)) + + if 'latency' in p: + latencies = T.to_numpy(reshape_to(p['latency'].tensor, 3)) + avg_latencies = latencies[..., 0] + latency_means = T.to_numpy(reshape_to(find_base(p['latency']).mean, 3)) + latency_stds = T.to_numpy(reshape_to(find_base(p['latency']).std, 3)) + + # build the graph + ret = [] + for i, node_count in enumerate(node_counts): + # extract the arrays + adj = adjs[i][:node_count][:, :node_count] + adj_prob = adj_probs[i][:node_count][:, :node_count] + adj_logit = adj_logits[i] # [:node_count][:, :node_count] + node_type = node_types[i] # [:node_count] + node_mask = np.full([node_count], True, dtype=np.bool) + node_count_logit = node_count_logits[i] + node_type_logit = node_type_logits[i] + + if 'latency' in p: + avg_latency = avg_latencies[i] + latency_mean = latency_means[i] + latency_std = latency_stds[i] + + # if `discard_node_with_type_0`, set all adjs that from / to `node_type == 0` as 0 + node_count_new = node_count + for j in range(node_count): + n_type = int(node_type[j]) + if (discard_node_with_type_0 and n_type == 0) or \ + (discard_node_with_unknown_latency_range and n_type not in latency_range): + node_mask[j] = False + node_count_new -= 1 + adj[:, j] = 0 + adj[j, :] = 0 + adj_prob[:, j] = 0 + adj_prob[j, :] = 0 + + # for each column in `adj`, if there are more than 2 candidate in-edges, + # or no in-edge, then choose an edge sampled w.r.t. to adj_prob + for j in range(node_count): + if node_mask[j] and np.sum(adj[:, j]) != 1: + prob_vec = adj_prob[:, j] + prob_sum = np.sum(prob_vec) + if prob_sum > 1e-7: + pvals = prob_vec / np.sum(prob_vec) + pvals_mask = pvals > 1e-7 + indices = np.arange(len(pvals))[pvals_mask] + k = indices[np.argmax(np.random.multinomial(1, pvals[pvals_mask]))] + adj[:, j] = 0 + adj[k, j] = 1 + + # select the edges + edges = list(zip(*np.where(adj))) + if len(edges) < node_count_new - 1: + # pick out the root sub-graph + union_set = {j: -1 for j in range(node_count) if node_mask[j]} + + def find_root(s): + t = union_set[s] + if t == -1: + return s + r = find_root(t) + if r != t: + union_set[s] = r + return r + + def link_edge(s, t): + union_set[t] = s + + edges_new = [] + for s, t in edges: + link_edge(s, t) + for s, t in edges: + if s == 0 or find_root(s) == 0: + edges_new.append((s, t)) + + edges = edges_new + node_count_new = len(edges_new) + 1 + + if discard_graph_with_error_node_count and (node_count_new != node_count): + ret.append(None) + continue + + # build the trace graph + def get_node(s): + if s not in nodes: + n_type = node_type[s] + if 'latency' in p: + latency = avg_latency[s] + if n_type in latency_range: + mu, std = latency_range[n_type] + latency = latency * std + mu + features = TraceGraphNodeFeatures( + span_count=1, + avg_latency=latency, + max_latency=latency, + min_latency=latency, + ) + avg_latency_nstd = float( + abs(avg_latency[s] - latency_mean[s, 0]) / + latency_std[s, 0] + ) + else: + features = TraceGraphNodeFeatures( + span_count=1, + avg_latency=math.nan, + max_latency=math.nan, + min_latency=math.nan, + ) + avg_latency_nstd = 0 + + nodes[s] = TraceGraphNode.new_sampled( + node_id=s, + operation_id=node_type[s], + features=features, + scores=TraceGraphNodeReconsScores( + edge_logit=0, + operation_logit=node_type_logit[s, n_type], + avg_latency_nstd=avg_latency_nstd, + ) + ) + return nodes[s] + + nodes = {} + edges.sort() + for u, v in edges: + if node_mask[u] and node_mask[v]: + v_node = get_node(v) + get_node(u).children.append(v_node) + v_node.scores.edge_logit = adj_logit[u, v] + + if 0 in nodes: + g = TraceGraph.new_sampled(nodes[0], len(nodes), -1) + g.merge_spans_and_assign_id() + ret.append(g) + else: + ret.append(None) + + # return the graphs + if keep_front_shape: + ret = np.array(ret).reshape(front_shape) + + return ret + + +@dataclass(init=False) +class GraphNodeMatch(object): + __slots__ = [ + 'g1_to_g2', + 'g2_to_g1', + ] + + g1_to_g2: Dict[TraceGraphNode, TraceGraphNode] + g2_to_g1: Dict[TraceGraphNode, TraceGraphNode] + + def __init__(self): + self.g1_to_g2 = {} + self.g2_to_g1 = {} + + def add_match(self, node1, node2): + self.g1_to_g2[node1] = node2 + self.g2_to_g1[node2] = node1 + + +@dataclass(init=False) +class GraphNodeDiff(object): + __slots__ = [ + 'parent', 'depth', 'node', 'offset', 'node_count', + ] + + parent: Optional[TraceGraphNode] + depth: int + node: TraceGraphNode + offset: int # -1: present in g but absent in g2; 1: present in g2 but absent in g1 + node_count: int # count of nodes in this branch + + def __init__(self, parent, depth, node, offset): + self.parent = parent + self.depth = depth + self.node = node + self.offset = offset + self.node_count = node.count_nodes() + + def __repr__(self): + return f'GraphNodeDiff(depth={self.depth}, offset={self.offset})' + + +def diff_graph(g1: TraceGraph, + g2: TraceGraph + ) -> Tuple[GraphNodeMatch, List[GraphNodeDiff]]: + m = GraphNodeMatch() + ret = [] + + def match_node(depth: int, + parent1: Optional[TraceGraphNode], + parent2: Optional[TraceGraphNode], + node1: Optional[TraceGraphNode], + node2: Optional[TraceGraphNode]): + if node1 is None: + if node2 is None: + pass + else: + ret.append(GraphNodeDiff(parent=parent2, depth=depth, node=node2, offset=1)) + else: + if node2 is None: + ret.append(GraphNodeDiff(parent=parent1, depth=depth, node=node1, offset=-1)) + elif node1.operation_id != node2.operation_id: + ret.append(GraphNodeDiff(parent=parent1, depth=depth, node=node1, offset=-1)) + ret.append(GraphNodeDiff(parent=parent2, depth=depth, node=node2, offset=1)) + else: + m.add_match(node1, node2) + c_depth = depth + 1 + + i, j = 0, 0 + while i < len(node1.children) and j < len(node2.children): + c1 = node1.children[i] + c2 = node2.children[j] + if c1.operation_id < c2.operation_id: + match_node(c_depth, node1, None, c1, None) + i += 1 + elif c2.operation_id < c1.operation_id: + match_node(c_depth, None, node2, None, c2) + j += 1 + else: + match_node(c_depth, node1, node2, c1, c2) + i += 1 + j += 1 + + while i < len(node1.children): + c1 = node1.children[i] + match_node(c_depth, node1, None, c1, None) + i += 1 + + while j < len(node2.children): + c2 = node2.children[j] + match_node(c_depth, None, node2, None, c2) + j += 1 + + match_node(0, None, None, g1.root, g2.root) + return m, ret + + +def dgl_graph_key(graph: dgl.DGLGraph) -> str: + return edges_to_key(graph.ndata['operation_id'], *graph.edges()) + +@torch.jit.script +def edges_to_key(operation_id: torch.Tensor, u_list: torch.Tensor, v_list: torch.Tensor) -> str: + mask = u_list != v_list + u_id: List[int] = operation_id[u_list][mask].tolist() + v_id: List[int] = operation_id[v_list][mask].tolist() + + graph_key = f'0,{operation_id[0].item()};' + ';'.join(sorted([f'{u},{v}' for (u, v) in zip(u_id, v_id)])) + + return graph_key + +def trace_graph_key(graph: TraceGraph) -> str: + def dfs(nd: TraceGraphNode, pa_id: int, cnt: int=1): + cur_cnt = cnt * len(nd.spans) + spans = [f'{pa_id},{nd.operation_id}'] * cur_cnt + + for child in nd.children: + spans += dfs(child, nd.operation_id, cur_cnt) + + return spans + + spans = dfs(graph.root, 0, 1) + + return ';'.join(sorted(spans)) diff --git a/tracegnn/models/trace_vae/model/__init__.py b/tracegnn/models/trace_vae/model/__init__.py new file mode 100644 index 0000000..fde6b5f --- /dev/null +++ b/tracegnn/models/trace_vae/model/__init__.py @@ -0,0 +1 @@ +from .trace_vae import * diff --git a/tracegnn/models/trace_vae/model/gnn_layers.py b/tracegnn/models/trace_vae/model/gnn_layers.py new file mode 100644 index 0000000..c9e8871 --- /dev/null +++ b/tracegnn/models/trace_vae/model/gnn_layers.py @@ -0,0 +1,190 @@ +from enum import Enum +from typing import * + +import mltk +import tensorkit as tk +from dgl import nn as gnn +from tensorkit import tensor as T + +__all__ = [ + 'GNNLayerType', + 'GNNLayerConfig', + 'make_gnn_layers', + 'apply_gnn_layer', + 'GNNSequential', + 'GATConvAgg', + 'GraphConv', +] + + +class GNNLayerType(str, Enum): + GAT = 'GAT' + GraphConv = 'GraphConv' + + +class GNNLayerConfig(mltk.Config): + type: GNNLayerType = GNNLayerType.GAT + + # whether to use batch norm? + use_batch_norm: bool = True + + # config for GAT + class gat(mltk.Config): + num_attention_heads: int = 2 + + +def make_gnn_layers(config: GNNLayerConfig, + input_dim: int, + gnn_layers: List[int], + ): + if config.use_batch_norm: + normalization_factory = tk.layers.BatchNorm + else: + normalization_factory = lambda num_inputs: None + + layers = [] + for size in gnn_layers: + if config.type == GNNLayerType.GAT: + layers.append(GATConvAgg( + input_dim, + size, + config.gat.num_attention_heads, + activation=tk.layers.LeakyReLU(), + normalization_factory=normalization_factory, + )) + elif config.type == GNNLayerType.GraphConv: + layers.append(GraphConv( + input_dim, + size, + activation=tk.layers.LeakyReLU(), + normalization_factory=normalization_factory, + )) + else: + raise ValueError(f'Unsupported GNN type: {config.type!r}') + input_dim = layers[-1].output_dim + + return input_dim, layers + + +def apply_gnn_layer(layer, g, h): + if isinstance(g, (list, tuple)): + if len(h.shape) == 3: + if len(g) != h.shape[0]: + raise ValueError(f'len(g) != h.shape[0]: {len(g)} vs {h.shape[0]}') + return T.stack( + [ + layer(g[i], h[i]) + for i in range(len(g)) + ], + axis=0 + ) + else: + return T.stack( + [ + layer(g[i], h) + for i in range(len(g)) + ], + axis=0 + ) + else: + if len(h.shape) == 3: + return T.stack( + [ + layer(g, h[i]) + for i in range(h.shape[0]) + ], + axis=0 + ) + else: + return layer(g, h) + + +class GNNSequential(tk.layers.BaseLayer): + + def __init__(self, layers): + super().__init__() + self.gnn = gnn.Sequential(*layers) + + def forward(self, g, h): + return apply_gnn_layer(self.gnn, g, h) + + +class GATConvAgg(tk.layers.BaseLayer): + """First apply `dgl.nn.GATConv` then aggregate the multi attention heads.""" + + aggregate_mode: str + output_dim: int + + def __init__(self, input_dim: int, output_dim: int, num_heads: int, + aggregate_mode: str = 'concat', activation=None, + normalization_factory=None): + super().__init__() + + if aggregate_mode == 'concat': + self.output_dim = output_dim * num_heads + elif aggregate_mode in ('mean', 'avg'): + self.output_dim = output_dim + else: + raise ValueError(f'Unsupported aggregate_mode: {aggregate_mode!r}') + + self.activation = activation + self.normalization = None if normalization_factory is None else \ + normalization_factory(self.output_dim) + + self.gnn = gnn.GATConv( + input_dim, + output_dim, + num_heads, + activation=None, + ) + self.aggregate_mode = aggregate_mode + + def forward(self, g, h): + h = self.gnn(g, h) + if self.aggregate_mode == 'concat': + h = T.concat( + [h[..., i, :] for i in range(h.shape[-2])], + axis=-1 + ) + else: + h = T.reduce_mean(h, axis=[-2]) + + if self.normalization is not None: + h = self.normalization(h) + if self.activation is not None: + h = self.activation(h) + + return h + + +class GraphConv(tk.layers.BaseLayer): + + output_dim: int + + def __init__(self, input_dim: int, output_dim: int, activation=None, + normalization_factory=None): + super().__init__() + self.output_dim = output_dim + + self.activation = activation + self.normalization = None if normalization_factory is None else \ + normalization_factory(self.output_dim) + + self.gnn = gnn.GraphConv( + input_dim, + output_dim, + norm='both', + weight=self.normalization is None, + bias=self.normalization is None, + activation=None, + ) + + def forward(self, g, h): + h = self.gnn(g, h) + + if self.normalization is not None: + h = self.normalization(h) + if self.activation is not None: + h = self.activation(h) + + return h diff --git a/tracegnn/models/trace_vae/model/latency_vae.py b/tracegnn/models/trace_vae/model/latency_vae.py new file mode 100644 index 0000000..1252675 --- /dev/null +++ b/tracegnn/models/trace_vae/model/latency_vae.py @@ -0,0 +1,444 @@ +from typing import * + +import dgl +import mltk +import tensorkit as tk +from tensorkit import tensor as T + +from ..constants import * +from ..distributions import * +from ..tensor_utils import node_count_mask +from .gnn_layers import * +from .model_utils import * +from .operation_embedding import * +from .pooling import * +from .realnvp_flow import * + +__all__ = [ + 'TraceLatencyVAEConfig', + 'TraceLatencyVAE', +] + + +class TraceLatencyVAEConfig(mltk.Config): + # whether to use the operation embedding? (but grad will be blocked) + use_operation_embedding: bool = True + + # the dimension of z2 (to encode latency) + z2_dim: int = 10 + + # the config of posterior / prior flow + realnvp: RealNVPFlowConfig = RealNVPFlowConfig() + + # whether to use BatchNorm? + use_batch_norm: bool = True + + class encoder(mltk.Config): + # ================ + # h(G) for q(z2|G) + # ================ + # the gnn layer config + gnn: GNNLayerConfig = GNNLayerConfig() + + # the gnn layer sizes for q(z2|...) + gnn_layers: List[int] = [500, 500, 500, 500] + + # whether to stop gradient to operation_embedding along this path? + operation_embedding_stop_grad: bool = True + + # ============= + # graph pooling + # ============= + pool_type: PoolingType = PoolingType.AVG + pool_config: PoolingConfig = PoolingConfig() + + # ======= + # q(z2|G) + # ======= + z2_logstd_min: Optional[float] = -7 + z2_logstd_max: Optional[float] = 2 + + # whether to use realnvp posterior flow? + use_posterior_flow: bool = False + + class decoder(mltk.Config): + # ==================== + # decoder architecture + # ==================== + use_prior_flow: bool = False + + # p(z2|z) n_mixtures + z2_prior_mixtures: int = 1 + + # whether z2 should condition on z? + condition_on_z: bool = True + + # z2 given z hidden layers + z2_given_z_stop_grad: bool = True + z2_given_z_hidden_layers: List[int] = [250, 250] + z2_logstd_min: Optional[float] = -5 + z2_logstd_max: Optional[float] = 2 + + # ======= + # latency + # ======= + # gnn layer config + gnn: GNNLayerConfig = GNNLayerConfig() + + # the node types from node embedding e + gnn_layers: List[int] = [500, 500, 500, 500] + + # hidden layers for graph embedding from z + graph_embedding_layers: List[int] = [500, 500] + + # size of the latent embedding e + latent_embedding_size: int = 40 + + # whether to stop gradient to operation_embedding along this path? + operation_embedding_stop_grad: bool = True + + # ============== + # p(latency|...) + # ============== + # the minimum value for latency logstd + latency_logstd_min: Optional[float] = -7 + + # whether to use mask on p(latency|...)? + use_latency_mask: bool = True + + # whether to clip the latency to one dim even if three dim is provided? + clip_latency_to_one_dim: bool = False + + # whether to use biased in p(latency|...)? + use_biased_latency: bool = False + + # whether to use `AnomalyDetectionNormal`? + use_anomaly_detection_normal: bool = False + + # the `std_threshold` for AnomalyDetectionNormal or BiasedNormal in testing + biased_normal_std_threshold: float = 4.0 + + # the `std_threshold` for SafeNormal in training + safe_normal_std_threshold: float = 6.0 + + +class TraceLatencyVAE(tk.layers.BaseLayer): + + config: TraceLatencyVAEConfig + num_node_types: int + + def __init__(self, + config: TraceLatencyVAEConfig, + z_dim: int, # z dimension of the struct_vae + operation_embedding: OperationEmbedding, + ): + super().__init__() + + # =================== + # memorize the config + # =================== + self.config = config + self.z_dim = z_dim + + # ============================= + # node embedding for operations + # ============================= + self.operation_embedding = operation_embedding + self.num_node_types = operation_embedding.num_operations + + # ======================== + # standard layer arguments + # ======================== + layer_args = tk.layers.LayerArgs() + layer_args.set_args(['dense'], activation=tk.layers.LeakyReLU) + if config.use_batch_norm: + layer_args.set_args(['dense'], normalizer=tk.layers.BatchNorm) + + # =========================== + # q(z2|adj,node_type,latency) + # =========================== + if config.use_operation_embedding: + input_size = self.operation_embedding.embedding_dim + else: + input_size = self.num_node_types + output_size, gnn_layers = make_gnn_layers( + config.encoder.gnn, + ( + input_size + + LATENCY_DIM # avg, min, max + ), + config.encoder.gnn_layers, + ) + self.qz2_gnn_layers = GNNSequential( + gnn_layers + [ + make_graph_pooling( + output_size, + config.encoder.pool_type, + config.encoder.pool_config + ), + ] + ) + self.qz2_mean = tk.layers.Linear(output_size, config.z2_dim) + self.qz2_logstd = tk.layers.Linear(output_size, config.z2_dim) + + if config.encoder.use_posterior_flow: + self.qz_flow = make_realnvp_flow(config.z2_dim, config.realnvp) + + # ================ + # p(z2) or p(z2|z) + # ================ + if config.decoder.condition_on_z: + if config.decoder.use_prior_flow and config.decoder.z2_prior_mixtures > 1: + raise ValueError(f'`use_prior_flow == True` and `z2_prior_mixtures > 1` cannot be both True.') + + n_mixtures = config.decoder.z2_prior_mixtures + z2_given_z_builder = tk.layers.SequentialBuilder( + self.z_dim, + layer_args=layer_args + ) + for size in config.decoder.z2_given_z_hidden_layers: + z2_given_z_builder.dense(size) + self.z2_given_z_hidden_layers = z2_given_z_builder.build(flatten_to_ndims=True) + self.pz2_mean = z2_given_z_builder.as_input().linear(config.z2_dim * n_mixtures).build() + self.pz2_logstd = z2_given_z_builder.as_input().linear(config.z2_dim * n_mixtures).build() + + if config.decoder.use_prior_flow: + self.pz2_flow = make_realnvp_flow(config.z2_dim, config.realnvp).invert() + + # node features from gnn + input_size = config.z2_dim + + if config.use_operation_embedding: + input_size += self.operation_embedding.embedding_dim + else: + input_size += self.num_operations + + output_size, gnn_layers = make_gnn_layers( + config.decoder.gnn, + input_size, + config.decoder.gnn_layers, + ) + self.pG_node_features = GNNSequential( + gnn_layers + + [ + GraphConv( # p(latency|e) + output_size, + 2 * LATENCY_DIM # (mean, logstd) * (avg, min, max) + ), + ] + ) + + def _is_attr_included_in_repr(self, attr: str, value: Any) -> bool: + if attr == 'config': + return False + return super()._is_attr_included_in_repr(attr, value) + + def q(self, + net: tk.BayesianNet, + g: dgl.DGLGraph, + n_z: Optional[int] = None): + config = self.config + + # compose feature vector + if config.use_operation_embedding: + h2 = self.operation_embedding(g.ndata['node_type']) + if config.encoder.operation_embedding_stop_grad: + h2 = T.stop_grad(h2) + else: + h2 = T.one_hot( + g.ndata['node_type'], + self.num_node_types, + dtype=T.float32, + ) + h = T.concat([h2, g.ndata['latency'][..., :LATENCY_DIM]], axis=-1) + + # feed into gnn and get node embeddings + h = self.qz2_gnn_layers(g, h) + + # mean and logstd for q(z2|G) + z2_mean = self.qz2_mean(h) + z2_logstd = T.maybe_clip( + self.qz2_logstd(h), + min_val=config.encoder.z2_logstd_min, + max_val=config.encoder.z2_logstd_max, + ) + + # add 'z2' random variable + qz2 = tk.Normal(mean=z2_mean, logstd=z2_logstd, event_ndims=1) + if config.encoder.use_posterior_flow: + qz2 = tk.FlowDistribution(qz2, self.qz_flow) + z2 = net.add('z2', qz2, n_samples=n_z) + + def p(self, + net: tk.BayesianNet, + g: dgl.DGLGraph, + n_z: Optional[int] = None, + use_biased: bool = False, + latency_logstd_min: Optional[float] = None, + latency_log_prob_weight: bool = False, + std_limit: Optional[T.Tensor] = None, + ): + config = self.config + + # sample z2 ~ p(z2) or p(z2|z) + if config.decoder.condition_on_z: + h = net['z'].tensor + if config.decoder.z2_given_z_stop_grad: + h = T.stop_grad(h) + h = self.z2_given_z_hidden_layers(h) + z2_mean = self.pz2_mean(h) + z2_logstd = T.maybe_clip( + self.pz2_logstd(h), + min_val=config.decoder.z2_logstd_min, + max_val=config.decoder.z2_logstd_max, + ) + + n_mixtures = config.decoder.z2_prior_mixtures + if n_mixtures > 1: + z2_mean_list = T.split(z2_mean, [config.z2_dim] * n_mixtures, axis=-1) + z2_logstd_list = T.split(z2_logstd, [config.z2_dim] * n_mixtures, axis=-1) + pz2 = tk.Mixture( + categorical=tk.Categorical( + logits=T.zeros(T.shape(z2_mean)[:-1] + [n_mixtures]), + ), + components=[ + tk.Normal(mean=mu, logstd=logstd, event_ndims=1) + for mu, logstd in zip(z2_mean_list, z2_logstd_list) + ], + reparameterized=True, + ) + else: + pz2 = tk.Normal(mean=z2_mean, logstd=z2_logstd, event_ndims=1) + else: + pz2 = tk.UnitNormal([1, config.z2_dim], event_ndims=1) + + if config.decoder.use_prior_flow: + pz2 = tk.FlowDistribution(pz2, self.pz2_flow) + + z2 = net.add('z2', pz2, n_samples=n_z) + + # z2 as context + z2_shape = T.shape(z2.tensor) + h = T.reshape(z2.tensor, z2_shape[:-1] + [1, z2_shape[-1]]) + + # concat with node type information + if config.use_operation_embedding: + h2 = self.operation_embedding(net['node_type'].tensor) + if config.decoder.operation_embedding_stop_grad: + h2 = T.stop_grad(h2) + else: + h2 = T.one_hot( + net['node_type'].tensor, + self.num_node_types, + dtype=T.float32, + ) + h = T.broadcast_concat(h, h2, axis=-1) + h2 = None + + # node_features from gnn + h_shape = T.shape(h) + h = T.reshape( + h, + h_shape[:-3] + [h_shape[-3] * h_shape[-2], h_shape[-1]] + ) + node_features = self.pG_node_features(g, h) + + # mean & logstd for p(latency|z2,G) + if latency_logstd_min is not None: + if config.decoder.latency_logstd_min is not None: + latency_logstd_min = max( + latency_logstd_min, + config.decoder.latency_logstd_min + ) + else: + latency_logstd_min = config.decoder.latency_logstd_min + + latency_mean = T.reshape( + node_features[..., :LATENCY_DIM], # avg, min, max + h_shape[:-1] + [LATENCY_DIM] + ) + latency_logstd = T.maybe_clip( + T.reshape( + node_features[..., LATENCY_DIM: LATENCY_DIM*2], + h_shape[:-1] + [LATENCY_DIM] + ), + min_val=latency_logstd_min, + ) + + if std_limit is not None: + logstd_limit = T.log( + T.clip_left( + std_limit[net['node_type'].tensor], + 1e-7 + ) + ) + logstd_limit = T.stop_grad(logstd_limit) + logstd_limit = T.expand_dim(logstd_limit, axis=-1) + latency_logstd = T.minimum(latency_logstd, logstd_limit) + + # clip the latency + if config.decoder.clip_latency_to_one_dim: + latency_mean = latency_mean[..., :1] + latency_logstd = latency_logstd[..., :1] + + # p(latency|z2,G) + if config.decoder.use_latency_mask: + inner_event_ndims = 0 + else: + inner_event_ndims = 2 + + if self.training: + p_latency = SafeNormal( + std_threshold=config.decoder.safe_normal_std_threshold, + mean=latency_mean, + logstd=latency_logstd, + event_ndims=inner_event_ndims, + ) + elif use_biased and config.decoder.use_biased_latency: + if config.decoder.use_anomaly_detection_normal: + p_latency = AnomalyDetectionNormal( + std_threshold=config.decoder.biased_normal_std_threshold, + bias_alpha=MAX_NODE_COUNT, + bias_threshold=0.5, + mean=latency_mean, + logstd=latency_logstd, + event_ndims=inner_event_ndims, + ) + else: + p_latency = BiasedNormal( + alpha=MAX_NODE_COUNT, + std_threshold=config.decoder.biased_normal_std_threshold, + mean=latency_mean, + logstd=latency_logstd, + event_ndims=inner_event_ndims, + ) + else: + p_latency = tk.Normal( + mean=latency_mean, + logstd=latency_logstd, + event_ndims=inner_event_ndims, + ) + + if config.decoder.use_latency_mask: + # mask + mask = node_count_mask( + net['node_count'].tensor, + MAX_NODE_COUNT, + dtype=T.boolean, + ) + mask = T.stop_grad(mask) + mask = T.expand_dim(mask, axis=-1) + + # log_prob_weight + if latency_log_prob_weight: + log_prob_weight = T.cast(net['node_count'].tensor, dtype=T.float32) + log_prob_weight = T.float_scalar(MAX_NODE_COUNT) / log_prob_weight + log_prob_weight = T.reshape(log_prob_weight, T.shape(log_prob_weight) + [1, 1]) + log_prob_weight = T.stop_grad(log_prob_weight) + else: + log_prob_weight = None + + # p(latency|...) + p_latency = MaskedDistribution(p_latency, mask, log_prob_weight, event_ndims=2) + + latency = net.add('latency', p_latency) diff --git a/tracegnn/models/trace_vae/model/model_utils.py b/tracegnn/models/trace_vae/model/model_utils.py new file mode 100644 index 0000000..5f9d5de --- /dev/null +++ b/tracegnn/models/trace_vae/model/model_utils.py @@ -0,0 +1,35 @@ +from typing import * +from tensorkit import tensor as T + +from ..constants import * + +__all__ = [ + 'decoder_use_depth_and_idx', +] + + +def decoder_use_depth_and_idx(g, + use_depth: bool, + use_idx: bool + ) -> Optional[T.Tensor]: + def use_tensor(name, num_classes): + if isinstance(g, list): + t = T.stack([g2.ndata[name] for g2 in g], axis=0) + else: + t = g.ndata[name] + t_shape = T.shape(t) + t = T.reshape( + t, + t_shape[:-1] + [t_shape[-1] // MAX_NODE_COUNT, MAX_NODE_COUNT] + ) + t = T.one_hot(t, num_classes, dtype=T.float32) + return t + + buf = [] + if use_depth: + buf.append(use_tensor('node_depth', MAX_DEPTH + 1)) + if use_idx: + buf.append(use_tensor('node_idx', MAX_NODE_COUNT)) + + if buf: + return T.concat(buf, axis=-1) diff --git a/tracegnn/models/trace_vae/model/operation_embedding.py b/tracegnn/models/trace_vae/model/operation_embedding.py new file mode 100644 index 0000000..c7c8c7c --- /dev/null +++ b/tracegnn/models/trace_vae/model/operation_embedding.py @@ -0,0 +1,25 @@ +import tensorkit as tk +from tensorkit import tensor as T +from torch.nn import Embedding + +__all__ = [ + 'OperationEmbedding', +] + + +class OperationEmbedding(tk.layers.BaseLayer): + + num_operations: int + embedding_dim: int + + def __init__(self, num_operations: int, embedding_dim: int): + super().__init__() + self.num_operations = num_operations + self.embedding_dim = embedding_dim + self.node_embedding = Embedding(num_operations, embedding_dim) + + def forward(self, node_type: T.Tensor) -> T.Tensor: + node_type, shape = T.flatten_to_ndims(node_type, 1) + node_type = self.node_embedding(node_type) + node_type = T.unflatten_from_ndims(node_type, shape) + return node_type diff --git a/tracegnn/models/trace_vae/model/pooling.py b/tracegnn/models/trace_vae/model/pooling.py new file mode 100644 index 0000000..c40a9a5 --- /dev/null +++ b/tracegnn/models/trace_vae/model/pooling.py @@ -0,0 +1,75 @@ +from enum import Enum +from typing import * + +import mltk +import tensorkit as tk +import torch +from dgl import nn as gnn +from tensorkit import tensor as T + +__all__ = [ + 'PoolingType', + 'PoolingConfig', + 'make_graph_pooling', + 'graph_node_offsets', + 'RootPooling', +] + + +class PoolingType(str, Enum): + ROOT = 'root' + AVG = 'avg' + ATTENTION = 'attention' # graph attention pooling + + +class PoolingConfig(mltk.Config): + # whether to use batch norm? + use_batch_norm: bool = True + + # config for ATTENTION + class attention(mltk.Config): + hidden_layers: List[int] = [] + + +def make_graph_pooling(feature_size: int, + pool_type: Union[str, PoolingType], + pool_config: PoolingConfig): + layer_args = tk.layers.LayerArgs() + layer_args.set_args(['dense'], activation=tk.layers.LeakyReLU) + if pool_config.use_batch_norm: + layer_args.set_args(['dense'], normalizer=tk.layers.BatchNorm) + + if pool_type == PoolingType.ROOT: + return RootPooling() # is this okay? + elif pool_type == PoolingType.AVG: + return gnn.AvgPooling() + elif pool_type == PoolingType.ATTENTION: + gap_nn_builder = tk.layers.SequentialBuilder( + feature_size, + layer_args=layer_args, + ) + for size in pool_config.attention.hidden_layers: + gap_nn_builder.dense(size) + return gnn.GlobalAttentionPooling(gap_nn_builder.linear(1).build()) + else: + raise ValueError(f'Unsupported `config.encoder.pool_type`: {pool_type!r}') + + +def graph_node_offsets(seglen): + ret = torch.cumsum( + T.concat( + [ + T.zeros([1], dtype=T.get_dtype(seglen), device=T.get_device(seglen)), + seglen + ], + axis=0 + ), + dim=0 + ) + return ret[:-1] + + +class RootPooling(tk.layers.BaseLayer): + + def forward(self, graph, feat): + return feat[graph_node_offsets(graph.batch_num_nodes())] diff --git a/tracegnn/models/trace_vae/model/realnvp_flow.py b/tracegnn/models/trace_vae/model/realnvp_flow.py new file mode 100644 index 0000000..92d84cf --- /dev/null +++ b/tracegnn/models/trace_vae/model/realnvp_flow.py @@ -0,0 +1,49 @@ +import mltk +import tensorkit as tk + +__all__ = [ + 'RealNVPFlowConfig', + 'make_realnvp_flow', +] + + +class RealNVPFlowConfig(mltk.Config): + flow_levels: int = 5 + coupling_hidden_layer_count: int = 1 + coupling_hidden_layer_units: int = 64 + coupling_layer_scale: str = 'sigmoid' + strict_invertible: bool = False + + +def make_realnvp_flow(z_dim: int, flow_config: RealNVPFlowConfig): + flows = [] + for i in range(flow_config.flow_levels): + # act norm + flows.append(tk.flows.ActNorm(z_dim)) + + # coupling layer + n1 = z_dim // 2 + n2 = z_dim - n1 + b = tk.layers.SequentialBuilder( + n1, + layer_args=tk.layers.LayerArgs(). + set_args(['dense'], activation=tk.layers.LeakyReLU) + ) + for j in range(flow_config.coupling_hidden_layer_count): + b.dense(flow_config.coupling_hidden_layer_units) + shift_and_pre_scale = tk.layers.Branch( + branches=[ + # shift + b.as_input().linear(n2, weight_init=tk.init.zeros).build(), + # pre_scale + b.as_input().linear(n2, weight_init=tk.init.zeros).build(), + ], + shared=b.build(), + ) + flows.append(tk.flows.CouplingLayer( + shift_and_pre_scale, scale=flow_config.coupling_layer_scale)) + + # feature rearrangement by invertible dense + flows.append(tk.flows.InvertibleDense(z_dim, strict=flow_config.strict_invertible)) + + return tk.flows.SequentialFlow(flows) diff --git a/tracegnn/models/trace_vae/model/struct_vae.py b/tracegnn/models/trace_vae/model/struct_vae.py new file mode 100644 index 0000000..7cc8c56 --- /dev/null +++ b/tracegnn/models/trace_vae/model/struct_vae.py @@ -0,0 +1,414 @@ +from typing import * + +import dgl +import mltk +import tensorkit as tk +import torch +from tensorkit import tensor as T + +from ..constants import * +from ..distributions import * +from ..tensor_utils import * +from .gnn_layers import * +from .model_utils import * +from .operation_embedding import * +from .pooling import * +from .realnvp_flow import * + +__all__ = [ + 'TraceStructVAEConfig', + 'TraceStructVAE', +] + + +class TraceStructVAEConfig(mltk.Config): + # the dimension of z (to encode adj & node_type) + z_dim: int = 3 + + # the config of posterior / prior flow + realnvp: RealNVPFlowConfig = RealNVPFlowConfig() + + # whether to use BatchNorm? + use_batch_norm: bool = True + + class encoder(mltk.Config): + # =============== + # h(G) for q(z|G) + # =============== + # the gnn layer config + gnn: GNNLayerConfig = GNNLayerConfig() + + # the gnn layer sizes for q(z|...) + gnn_layers: List[int] = [500, 500, 500, 500] + + # ============= + # graph pooling + # ============= + pool_type: PoolingType = PoolingType.AVG + pool_config: PoolingConfig = PoolingConfig() + + # ====== + # q(z|G) + # ====== + z_logstd_min: Optional[float] = -7 + z_logstd_max: Optional[float] = 2 + + # whether to use realnvp posterior flow? + use_posterior_flow: bool = False + + class decoder(mltk.Config): + # ==================== + # decoder architecture + # ==================== + use_prior_flow: bool = False + + # whether to use `z` directly as context, instead of passing through + # the graph embedding layers? + z_as_context: bool = False + + # whether to use `node_depth` and `node_idx` as extra information? + use_depth: bool = False + use_idx: bool = True + + # ========= + # structure + # ========= + # gnn layer config + gnn: GNNLayerConfig = GNNLayerConfig() + + # the node types from node embedding e + gnn_layers: List[int] = [500, 500, 500, 500] + + # hidden layers for p(node_count|z) + node_count_layers: List[int] = [500] + + # hidden layers for graph embedding from z + graph_embedding_layers: List[int] = [500, 500] + + # size of the latent embedding e + latent_embedding_size: int = 40 + + +class TraceStructVAE(tk.layers.BaseLayer): + + config: TraceStructVAEConfig + num_operations: int + + def __init__(self, + config: TraceStructVAEConfig, + operation_embedding: OperationEmbedding, + ): + super().__init__() + + # =================== + # memorize the config + # =================== + self.config = config + + # ============================= + # node embedding for operations + # ============================= + self.operation_embedding = operation_embedding + self.num_operations = operation_embedding.num_operations + + # ======================== + # standard layer arguments + # ======================== + layer_args = tk.layers.LayerArgs() + layer_args.set_args(['dense'], activation=tk.layers.LeakyReLU) + if config.use_batch_norm: + layer_args.set_args(['dense'], normalizer=tk.layers.BatchNorm) + + # ================== + # q(z|adj,node_type) + # ================== + output_size, gnn_layers = make_gnn_layers( + config.encoder.gnn, + self.operation_embedding.embedding_dim, + config.encoder.gnn_layers, + ) + self.qz_gnn_layers = GNNSequential( + gnn_layers + [ + make_graph_pooling( + output_size, + config.encoder.pool_type, + config.encoder.pool_config + ), + ] + ) + self.qz_mean = tk.layers.Linear(output_size, config.z_dim) + self.qz_logstd = tk.layers.Linear(output_size, config.z_dim) + + if config.encoder.use_posterior_flow: + self.qz_flow = make_realnvp_flow(config.z_dim, config.realnvp) + + # ==== + # p(z) + # ==== + if config.decoder.use_prior_flow: + self.pz_flow = make_realnvp_flow(config.z_dim, config.realnvp).invert() + + # =============== + # p(node_count|z) + # =============== + node_count_builder = tk.layers.SequentialBuilder( + config.z_dim, + layer_args=layer_args + ) + for size in config.decoder.node_count_layers: + node_count_builder.dense(size) + self.pG_node_count_logits = node_count_builder. \ + linear(MAX_NODE_COUNT + 1). \ + build(flatten_to_ndims=True) + + # ======== + # p(adj|z) + # ======== + # graph embedding from z + graph_embedding_builder = tk.layers.SequentialBuilder( + config.z_dim, + layer_args=layer_args, + ) + for size in config.decoder.graph_embedding_layers: + graph_embedding_builder.dense(size) + self.pG_graph_embedding = graph_embedding_builder.build(flatten_to_ndims=True) + + # node embedding (akka, `e`) from the graph embedding + self.pG_node_embedding = tk.layers.Linear( + graph_embedding_builder.out_shape[-1], + MAX_NODE_COUNT * config.decoder.latent_embedding_size, + ) + + # note: p(adj) = outer-dot(e) + + # ================== + # p(node_type|e,adj) + # ================== + if config.decoder.z_as_context: + input_size = ( + config.z_dim + + int(config.decoder.use_idx) * MAX_NODE_COUNT + # node_idx + int(config.decoder.use_depth) * (MAX_SPAN_COUNT + 1) # node_depth + ) + else: + input_size = config.decoder.latent_embedding_size + + output_size, gnn_layers = make_gnn_layers( + config.decoder.gnn, + input_size, + config.decoder.gnn_layers, + ) + self.pG_node_type_logits = GNNSequential( + gnn_layers + + [ + GraphConv(output_size, self.num_operations), # p(node_type|e) + ] + ) + + def _is_attr_included_in_repr(self, attr: str, value: Any) -> bool: + if attr == 'config': + return False + return super()._is_attr_included_in_repr(attr, value) + + def q(self, + net: tk.BayesianNet, + g: dgl.DGLGraph, + n_z: Optional[int] = None): + config = self.config + + # embedding lookup + h = self.operation_embedding(g.ndata['node_type']) + + # feed into gnn and get node embeddings + h = self.qz_gnn_layers(g, h) + + # mean and logstd for q(z|G) + z_mean = T.maybe_clip( + self.qz_mean(h), + # min_val=-5, + # max_val=5, + ) + z_logstd = T.maybe_clip( + self.qz_logstd(h), + min_val=config.encoder.z_logstd_min, + max_val=config.encoder.z_logstd_max, + ) + + # add 'z' random variable + qz = tk.Normal(mean=z_mean, logstd=z_logstd, event_ndims=1) + if config.encoder.use_posterior_flow: + qz = tk.FlowDistribution(qz, self.qz_flow) + z = net.add('z', qz, n_samples=n_z) + + def p(self, + net: tk.BayesianNet, + g: Optional[dgl.DGLGraph] = None, + n_z: Optional[int] = None, + use_biased: bool = False): + config = self.config + + # sample z ~ p(z) + pz = tk.UnitNormal([1, config.z_dim], event_ndims=1) + if config.decoder.use_prior_flow: + pz = tk.FlowDistribution(pz, self.pz_flow) + z = net.add('z', pz, n_samples=n_z) + + # p(node_count|z) + node_count_logits = self.pG_node_count_logits(z.tensor) + if use_biased: + p_node_count = BiasedCategorical( + alpha=MAX_NODE_COUNT * MAX_NODE_COUNT, + threshold=0.5, + logits=node_count_logits, + ) + else: + p_node_count = tk.Categorical(logits=node_count_logits) + node_count = net.add('node_count', p_node_count) + + # graph embedding + h = z.tensor + h = self.pG_graph_embedding(h) + h = self.pG_node_embedding(h) + h = T.reshape( + h, + T.shape(h)[:-1] + [ + MAX_NODE_COUNT, + config.decoder.latent_embedding_size + ] + ) + + # p(A|e) + edge_logits = edge_logits_by_dot_product(h) + edge_logits = dense_to_triu(edge_logits, MAX_NODE_COUNT) + + if use_biased: + p_adj = BiasedBernoulli( + alpha=MAX_NODE_COUNT, + threshold=0.5, + logits=edge_logits, + event_ndims=1, + ) + else: + p_adj = tk.Bernoulli(logits=edge_logits, event_ndims=1) + + adj = net.add('adj', p_adj) + + if g is None: + # construct the `g` from the `adj`, assuming full MAX_NODE_COUNT adj + def make_graph(triu_adj): + adj = triu_to_dense(triu_adj, MAX_NODE_COUNT) + + # make graph + u, v = T.where(adj) + g = dgl.graph((u, v), num_nodes=MAX_NODE_COUNT) + g = dgl.add_reverse_edges(g) + g = dgl.add_self_loop(g) + + # make `node_idx` + node_idx = T.maximum( + T.reduce_max( + (adj * torch.cumsum(adj, dim=-1)), + axis=[-2] + ) - 1, + T.int_scalar(0, dtype=T.int64), + ) + g.ndata['node_idx'] = node_idx + return g + + adj_shape = T.shape(adj.tensor) + if len(adj_shape) == 3: + g = [ + dgl.batch([ + make_graph(adj.tensor[i, j]) + for j in range(adj_shape[1]) + ]) + for i in range(adj_shape[0]) + ] + elif len(adj_shape) == 2: + g = dgl.batch([ + make_graph(adj.tensor[i]) + for i in range(adj_shape[0]) + ]) + else: + raise RuntimeError(f'Unsupported adj.shape: {adj_shape}') + + else: + # expand the node_count of each graph to MAX_NODE_COUNT + sub_graphs = [] + for sub_g in dgl.unbatch(g): + # struct + sub_u, sub_v = sub_g.edges() + sub_node_idx = sub_g.ndata['node_idx'] + mask = sub_u < sub_v + sub_u = sub_u[mask] + sub_v = sub_v[mask] + sub_g = dgl.graph((sub_u, sub_v), num_nodes=MAX_NODE_COUNT) + sub_g = dgl.add_reverse_edges(sub_g) + sub_g = dgl.add_self_loop(sub_g) + + # feature + if sub_node_idx.shape[0] < MAX_NODE_COUNT: + sub_node_idx = T.concat( + [ + sub_node_idx, + T.zeros([MAX_NODE_COUNT - sub_node_idx.shape[0]], dtype=T.int64) + ], + axis=0 + ) + sub_g.ndata['node_idx'] = sub_node_idx + + # add this graph + sub_graphs.append(sub_g) + g = dgl.batch(sub_graphs) + + net.meta['g'] = g + + # p(node_type|e) + if config.decoder.z_as_context: + # z as context + z_shape = T.shape(z.tensor) + h = T.repeat( + T.reshape(z.tensor, z_shape[:-1] + [1, z_shape[-1]]), + [1] * (len(z_shape) - 1) + [MAX_NODE_COUNT, 1] + ) + + # h = [] + # for i, node_count in enumerate(g.batch_num_nodes()): + # h.append( + # T.repeat( + # z.tensor[..., i: i+1, :], + # [1] * (len(h_shape) - 1) + [int(T.to_numpy(node_count)), 1], + # ) + # ) + # h = T.concat(h, axis=-2) + + # node_depth and node_idx + h2 = decoder_use_depth_and_idx( + g, + config.decoder.use_depth, + config.decoder.use_idx, + ) + if h2 is not None: + h = T.broadcast_concat(h, h2, axis=-1) + + h_shape = T.shape(h) + h = T.reshape( + h, + h_shape[:-3] + [h_shape[-3] * h_shape[-2], h_shape[-1]] + ) + + node_type_logits = self.pG_node_type_logits(g, h) + node_type_logits = T.reshape(node_type_logits, h_shape[:-1] + [self.num_operations]) + + # if use_biased: + # p_node_type = BiasedCategorical( + # alpha=MAX_NODE_COUNT, + # threshold=0.5, + # logits=node_type_logits, + # event_ndims=1 + # ) + # else: + # p_node_type = tk.Categorical(logits=node_type_logits, event_ndims=1) + + p_node_type = tk.Categorical(logits=node_type_logits, event_ndims=1) + node_type = net.add('node_type', p_node_type) diff --git a/tracegnn/models/trace_vae/model/trace_vae.py b/tracegnn/models/trace_vae/model/trace_vae.py new file mode 100644 index 0000000..71e076a --- /dev/null +++ b/tracegnn/models/trace_vae/model/trace_vae.py @@ -0,0 +1,161 @@ +from enum import Enum +from typing import * + +import mltk +import tensorkit as tk +from tensorkit import tensor as T +from tensorkit.typing_ import TensorOrData + +from ..constants import * +from ..tensor_utils import * +from ..types import * +from .latency_vae import * +from .operation_embedding import * +from .struct_vae import * + +__all__ = [ + 'TraceVAEArch', + 'TraceVAEConfig', + 'TraceVAE', +] + + +class TraceVAEArch(str, Enum): + DEFAULT = 'default' + + +class TraceVAEConfig(mltk.Config): + # operation embedding + operation_embedding_dim: int = 40 + + # the architecture selector + arch: TraceVAEArch = TraceVAEArch.DEFAULT + + # the default architecture + struct: TraceStructVAEConfig = TraceStructVAEConfig() + latency: TraceLatencyVAEConfig = TraceLatencyVAEConfig() + use_latency: bool = True + + +class TraceVAE(tk.layers.BaseLayer): + + config: TraceVAEConfig + num_operations: int + + def __init__(self, config: TraceVAEConfig, num_operations: int): + super().__init__() + + # =================== + # memorize the config + # =================== + self.config = config + self.num_operations = num_operations + + # ============== + # the components + # ============== + self.operation_embedding = OperationEmbedding( + num_operations=num_operations, + embedding_dim=config.operation_embedding_dim, + ) + if self.config.arch == TraceVAEArch.DEFAULT: + self.struct_vae = TraceStructVAE(config.struct, self.operation_embedding) + if self.config.use_latency: + self.latency_vae = TraceLatencyVAE( + config.latency, + config.struct.z_dim, + self.operation_embedding, + ) + else: + raise ValueError(f'Unsupported arch: {self.config.arch!r}') + + def _is_attr_included_in_repr(self, attr: str, value: Any) -> bool: + if attr == 'config': + return False + return super()._is_attr_included_in_repr(attr, value) + + def _call_graph_batch_build(self, G: TraceGraphBatch): + G.build_dgl( + add_self_loop=True, + directed=False, + # directed=('reverse' if self.config.edge.reverse_directed else False), + ) + + def q(self, + G: TraceGraphBatch, + observed: Optional[Mapping[str, TensorOrData]] = None, + n_z: Optional[int] = None, + no_latency: bool = False, + ): + config = self.config + + self._call_graph_batch_build(G) + net = tk.BayesianNet(observed=observed) + + self.struct_vae.q(net, G.dgl_batch, n_z=n_z) + if config.use_latency and not no_latency: + self.latency_vae.q(net, G.dgl_batch, n_z=n_z) + + return net + + def p(self, + observed: Optional[Mapping[str, TensorOrData]] = None, + G: Optional[TraceGraphBatch] = None, # the observed `G` + n_z: Optional[int] = None, + no_latency: bool = False, + use_biased: bool = False, + use_latency_biased: bool = False, + latency_logstd_min: Optional[float] = None, + latency_log_prob_weight: bool = False, + std_limit: Optional[T.Tensor] = None, + ) -> tk.BayesianNet: + config = self.config + + # populate `observed` from `G` if specified, and construct net + if G is not None: + self._call_graph_batch_build(G) + g = G.dgl_batch + observed = observed or {} + + # struct + observed['node_count'] = G.dgl_batch.batch_num_nodes() + observed['adj'] = T.stack( + [ + dense_triu_adj( + g, + MAX_NODE_COUNT, + reverse=False, + ) + for g in G.dgl_graphs + ], + axis=0 + ) + # observed['span_count'] = pad_node_feature(G, 'span_count') + observed['node_type'] = pad_node_feature(G, 'node_type') + + # latency + latency = pad_node_feature(G, 'latency')[..., :LATENCY_DIM] + if config.latency.decoder.clip_latency_to_one_dim: + latency = latency[..., :1] + observed['latency'] = latency + else: + g = None + + # the Bayesian net + net = tk.BayesianNet(observed=observed) + + # call components + self.struct_vae.p(net, g, n_z=n_z, use_biased=use_biased) + if config.use_latency and not no_latency: + g = net.meta['g'] + self.latency_vae.p( + net, + g, + n_z=n_z, + use_biased=use_biased and use_latency_biased, + latency_logstd_min=latency_logstd_min, + latency_log_prob_weight=latency_log_prob_weight, + std_limit=std_limit, + ) + + return net diff --git a/tracegnn/models/trace_vae/tensor_utils.py b/tracegnn/models/trace_vae/tensor_utils.py new file mode 100644 index 0000000..818403e --- /dev/null +++ b/tracegnn/models/trace_vae/tensor_utils.py @@ -0,0 +1,225 @@ +from typing import * + +import dgl +import numpy as np +import torch +from tensorkit import tensor as T + +from tracegnn.models.trace_vae.constants import * +from tracegnn.models.trace_vae.types import * +from tracegnn.utils.array_buffer import ArrayBuffer + +__all__ = [ + 'latency_onehot_to_mask', + 'edge_logits_by_dot_product', + 'dense_to_triu', + 'triu_to_dense', + 'dense_triu_adj', + 'pad_node_feature', + 'get_moments', + 'node_count_mask', + 'collect_operation_id', + 'collect_latency_std', + 'collect_latency_reldiff', + 'collect_p_node_count', + 'collect_p_edge', +] + + +def latency_onehot_to_mask(onehot: T.Tensor) -> T.Tensor: + """ + >>> onehot = T.as_tensor([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) + >>> T.to_numpy(latency_onehot_to_mask(onehot)) + array([[1, 0, 0], + [1, 1, 0], + [1, 1, 1]]) + >>> T.to_numpy(latency_onehot_to_mask(T.cast(onehot, dtype=T.float32))) + array([[1., 0., 0.], + [1., 1., 0.], + [1., 1., 1.]], dtype=float32) + """ + origin_dtype = T.get_dtype(onehot) + onehot = T.as_tensor(onehot, dtype=T.boolean) + shape = T.shape(onehot) + right = shape[-1] - 1 + mask = T.full(shape, False, dtype=T.boolean) + mask[..., right] = onehot[..., right] + while right > 0: + old_right = right + right -= 1 + mask[..., right] = T.logical_or(mask[..., old_right], onehot[..., right]) + return T.cast(mask, dtype=origin_dtype) + + +def edge_logits_by_dot_product(h: T.Tensor) -> T.Tensor: + left = h + right = T.swap_axes(h, -1, -2) + return T.matmul(left, right) + + +def triu_mask(node_count: int) -> T.Tensor: + return torch.triu(T.full([node_count, node_count], True, T.boolean), 1) + + +def dense_to_triu(x: T.Tensor, node_count: int) -> T.Tensor: + mask = triu_mask(node_count) + shape = T.shape(x) + return T.reshape(x, shape[:-2] + [-1])[..., mask.reshape(-1)] + + +def triu_to_dense(x: T.Tensor, + node_count: int, + pad_value: Union[int, float] = 0) -> T.Tensor: + mask = triu_mask(node_count).reshape(-1) + ret = T.full([node_count * node_count], pad_value, dtype=T.get_dtype(x)) + ret[mask] = x + return T.reshape(ret, [node_count, node_count]) + + +def dense_triu_adj(g: dgl.DGLGraph, node_count: int, reverse: bool = False) -> T.Tensor: + adj = T.zeros([node_count, node_count], dtype=T.float32) + u, v = g.edges() + if reverse: + v, u = u, v + adj[u, v] = 1 + # adj = to_dense_adj( + # T.stack([u, v], axis=0), + # max_num_nodes=node_count + # ) + return dense_to_triu(adj, node_count) + + +def pad_node_feature(G: TraceGraphBatch, + feature_name: str, + max_node_count: int = MAX_NODE_COUNT): + # inspect graph count + graph_count = len(G.dgl_graphs) + + # inspect features + vec = G.dgl_batch.ndata[feature_name] + value_shape = T.shape(vec)[1:] + dtype = T.get_dtype(vec) + device = T.get_device(vec) + + # todo: whether or not it's better to use concat instead of copying into a new tensor? + with T.no_grad(): + ret = T.zeros( + [graph_count, max_node_count] + value_shape, + dtype=dtype, + device=device, + ) + for i in range(graph_count): + vec = G.dgl_graphs[i].ndata[feature_name] + ret[i, :T.shape(vec)[0]] = vec + return ret + + +def get_moments(x, + axis: Optional[List[int]] = None, + clip_var: bool = False, + ) -> Tuple[T.Tensor, T.Tensor]: + mean = T.reduce_mean(x, axis=axis) + var = T.reduce_mean(x ** 2, axis=axis) - mean ** 2 + if clip_var: + var = T.maximum(var, dtype=T.get_dtype(var)) + return mean, var + + +def node_count_mask(node_count, + max_node_count: int, + dtype: Optional[str] = None) -> T.Tensor: + h = T.arange(0, max_node_count, dtype=T.get_dtype(node_count)) + node_count = T.expand_dim(node_count, axis=-1) + h = h < node_count + if dtype is not None: + h = T.cast(h, dtype) + return h + + +def collect_operation_id(buf, chain, mask=None): + if 'node_type' in chain.p: + node_count = T.to_numpy(chain.p['node_count'].tensor) + node_type = chain.p['node_type'].tensor + if len(T.shape(node_type)) == 3: + node_type = node_type[0, ...] + node_type = T.to_numpy(node_type) + if mask is None: + for i, k in enumerate(node_count): + buf.extend(node_type[i, :k]) + else: + for i, (k, m) in enumerate(zip(node_count, mask)): + if m: + buf.extend(node_type[i, :k]) + + +def collect_latency_std(buf, chain, mask=None): + if 'latency' in chain.p: + node_count = T.to_numpy(chain.p['node_count'].tensor) + latency_std = chain.p['latency'].distribution.base_distribution.std + if len(T.shape(latency_std)) == 4: + latency_std = latency_std[0, ...] + latency_std = T.to_numpy(latency_std) + + if mask is None: + for i, k in enumerate(node_count): + buf.extend(latency_std[i, :k, 0]) + else: + for i, (k, m) in enumerate(zip(node_count, mask)): + if m: + buf.extend(latency_std[i, :k, 0]) + +def collect_p_node_count(buf, chain, mask=None): + node_count = chain.p['node_count'].distribution.probs[0] + truth_node_count = chain.p['node_count'].tensor.unsqueeze(1) + + node_count_p = torch.gather(node_count, 1, truth_node_count).squeeze(-1) + + if mask is None: + buf.extend(T.to_numpy(node_count_p)) + else: + buf.extend(T.to_numpy(node_count_p)[mask]) + +def collect_p_edge(buf: ArrayBuffer, chain, mask=None): + # prob = np.exp(T.to_numpy(chain.p.log_prob('adj'))[0]) + node_count = T.to_numpy(chain.p['node_count'].tensor) + p_edge = chain.p['adj'].distribution.probs[0] + truth_p_edge = chain.p['adj'].tensor + + if mask is None: + for i in range(p_edge.shape[0]): + cur_p_edge = T.to_numpy(triu_to_dense(p_edge[i], MAX_NODE_COUNT))[:node_count[i], :node_count[i]] + cur_truth = T.to_numpy(triu_to_dense(truth_p_edge[i], MAX_NODE_COUNT))[:node_count[i], :node_count[i]] + buf.extend(np.abs((1.0 - cur_truth) - cur_p_edge).reshape(-1)) + else: + for i, m in enumerate(mask): + if m: + cur_p_edge = T.to_numpy(triu_to_dense(p_edge[i], MAX_NODE_COUNT))[:node_count[i], :node_count[i]] + cur_truth = T.to_numpy(triu_to_dense(truth_p_edge[i], MAX_NODE_COUNT))[:node_count[i], :node_count[i]] + buf.extend(np.abs((1.0 - cur_truth) - cur_p_edge).reshape(-1)) + +def collect_latency_reldiff(buf, chain, mask=None, abs=True): + def collect_dist_val(attr=None): + if attr is None: + v = chain.p['latency'].tensor + else: + v = getattr(chain.p['latency'].distribution.base_distribution, attr) + if len(T.shape(v)) == 4: + v = v[0, ...] + return T.to_numpy(v[..., 0]) + + if 'latency' in chain.p: + node_count = T.to_numpy(chain.p['node_count'].tensor) + latency = collect_dist_val() + latency_mean = collect_dist_val('mean') + latency_std = collect_dist_val('std') + rel_diff = (latency - latency_mean) / np.maximum(latency_std, 1e-7) + if abs: + rel_diff = np.abs(rel_diff) + + if mask is None: + for i, k in enumerate(node_count): + buf.extend(rel_diff[i, :k]) + else: + for i, (k, m) in enumerate(zip(node_count, mask)): + if m: + buf.extend(rel_diff[i, :k]) diff --git a/tracegnn/models/trace_vae/test.py b/tracegnn/models/trace_vae/test.py new file mode 100644 index 0000000..7930575 --- /dev/null +++ b/tracegnn/models/trace_vae/test.py @@ -0,0 +1,275 @@ +import pickle +from pprint import pprint +from tempfile import TemporaryDirectory + +import os +import mltk +import click +import tensorkit as tk +import numpy as np +from tensorkit import tensor as T +from tensorkit.examples.utils import print_experiment_summary + +from tracegnn.data import * +from tracegnn.models.trace_vae.dataset import TraceGraphDataStream +from tracegnn.models.trace_vae.evaluation import * +from tracegnn.models.trace_vae.graph_utils import * +from tracegnn.models.trace_vae.test_utils import * +from tracegnn.models.trace_vae.types import TraceGraphBatch +from tracegnn.utils import * + + +@click.group() +def main(): + pass + + +@main.command(context_settings=dict( + ignore_unknown_options=True, + help_option_names=[], +)) +@click.option('-D', '--data-dir', required=False) +@click.option('-M', '--model-path', required=True) +@click.option('-o', '--nll-out', required=False, default=None) +@click.option('--proba-out', default=None, required=False) +@click.option('--auc-out', default=None, required=False) +@click.option('--latency-out', default=None, required=False) +@click.option('--gui', is_flag=True, default=False, required=False) +@click.option('--device', required=False, default=None) +@click.option('--n_z', type=int, required=False, default=10) +@click.option('--batch-size', type=int, default=128) +@click.option('--clip-nll', type=float, default=100_000) +@click.option('--no-biased', is_flag=True, default=False, required=False) +@click.option('--no-latency-biased', is_flag=True, default=False, required=False) +@click.option('--no-latency', is_flag=True, default=False, required=False) +@click.option('--use-train-val', is_flag=True, default=False, required=False) +@click.option('--infer-bias-std', is_flag=True, default=False, required=False) +@click.option('--bias-std-normal-p', type=float, default=0.995, required=False) +@click.option('--infer-threshold', is_flag=True, default=False, required=False) +@click.option('--threshold-p', type=float, default=0.995, required=False) +@click.option('--threshold-amplify', type=float, default=1.0, required=False) +@click.option('--no-latency-log-prob-weight', is_flag=True, default=False, required=False) +@click.option('--use-std-limit', is_flag=True, default=False, required=False) +@click.option('--std-limit-global', is_flag=True, default=False, required=False) +@click.option('--std-limit-fixed', type=float, default=None, required=False) +@click.option('--std-limit-p', type=float, default=0.99, required=False) +@click.option('--std-limit-amplify', type=float, default=1.0, required=False) +@click.argument('extra_args', nargs=-1, type=click.UNPROCESSED) +def evaluate_nll(data_dir, model_path, nll_out, proba_out, auc_out, latency_out, gui, device, + n_z, batch_size, clip_nll, no_biased, no_latency_biased, no_latency, + use_train_val, infer_bias_std, bias_std_normal_p, infer_threshold, + threshold_p, threshold_amplify, no_latency_log_prob_weight, + use_std_limit, std_limit_global, std_limit_fixed, std_limit_p, std_limit_amplify, + extra_args): + N_LIMIT = None + + if infer_bias_std or infer_threshold or use_std_limit: + use_train_val = True + + with mltk.Experiment(mltk.Config, args=[]) as exp: + # check parameters + if gui: + proba_out = ':show:' + auc_out = ':show:' + latency_out = ':show:' + + with T.use_device(device or T.first_gpu_device()): + # load the config + train_config = load_config( + model_path=model_path, + strict=False, + extra_args=extra_args, + ) + if data_dir is None: + data_dir = train_config.dataset.root_dir + + # load the dataset + data_names = ['test', 'test-drop', 'test-latency'] + test_db, id_manager = open_trace_graph_db( + data_dir, + names=data_names + ) + print('Test DB:', test_db) + latency_range = TraceGraphLatencyRangeFile( + id_manager.root_dir, + require_exists=True, + ) + test_stream = TraceGraphDataStream( + test_db, id_manager=id_manager, batch_size=batch_size, + shuffle=False, skip_incomplete=False, data_count=N_LIMIT, + ) + + # also load train / val + if use_train_val: + train_db, _ = open_trace_graph_db( + data_dir, + names=['train'], + ) + print('Train DB:', train_db) + val_db, _ = open_trace_graph_db( + data_dir, + names=['val'] + ) + print('Val DB:', val_db) + train_stream = TraceGraphDataStream( + train_db, id_manager=id_manager, batch_size=batch_size, + shuffle=True, skip_incomplete=False, data_count=N_LIMIT, + ) + val_stream = TraceGraphDataStream( + val_db, id_manager=id_manager, batch_size=batch_size, + shuffle=True, skip_incomplete=False, data_count=N_LIMIT, + ) + else: + train_stream = val_stream = None + + print_experiment_summary(exp, train_stream, val_stream, test_stream) + + # load the model + vae = load_model2( + model_path=model_path, + train_config=train_config, + id_manager=id_manager, + ) + mltk.print_config(vae.config, title='Model Config') + vae = vae.to(T.current_device()) + + # do evaluation + operation_id = {} + latency_std = {} + latency_reldiff = {} + p_node_count = {} + p_edge = {} + nll_result = {} + thresholds = {} + std_group_limit = np.full([id_manager.num_operations], np.nan, dtype=np.float32) + + def F(stream, category, n_z, threshold=None, std_limit=None): + # the save files kw + kw = dict( + nll_output_file=ensure_parent_exists(nll_out), + proba_cdf_file=ensure_parent_exists(proba_out), + auc_curve_file=ensure_parent_exists(auc_out), + latency_hist_file=ensure_parent_exists(latency_out), + ) + differ_set = set() + + for k in kw: + if kw[k] is not None: + s = kw[k].replace('test', category) + if category == 'test' or s != kw[k]: + differ_set.add(k) + kw[k] = s + kw = {k: v for k, v in kw.items() if k in differ_set} + + # the output temp dir + with TemporaryDirectory() as temp_dir: + if 'nll_output_file' not in kw: + kw['nll_output_file'] = ensure_parent_exists( + os.path.join(temp_dir, 'nll.npz') + ) + + # do evaluation + result_dict = do_evaluate_nll( + test_stream=stream, + vae=vae, + id_manager=id_manager, + latency_range=latency_range, + n_z=n_z, + use_biased=(not no_biased) and (category == 'test'), + use_latency_biased=not no_latency_biased, + no_latency=no_latency, + no_struct=False, + latency_log_prob_weight=not no_latency_log_prob_weight, + std_limit=std_limit, + test_threshold=threshold, + clip_nll=clip_nll, + use_embeddings=False, + operation_id_dict_out=operation_id, + latency_std_dict_out=latency_std, + p_node_count_dict_out=p_node_count, + p_edge_dict_out=p_edge, + latency_reldiff_dict_out=latency_reldiff, + latency_dict_prefix=f'{category}_', + **kw, + ) + result_dict = {f'{category}_{k}': v for k, v in result_dict.items()} + exp.doc.update({'result': result_dict}, immediately=True) + pprint(result_dict) + + # load the NLLs if category in ('train', 'val') + if category in ('train', 'val'): + nll_result[category] = np.load(kw['nll_output_file'])['nll_list'] + + tk.layers.set_eval_mode(vae) + with T.no_grad(): + if use_train_val: + F(train_stream, 'train', 1) + F(val_stream, 'val', 1) + + if infer_bias_std: + bias_std = np.percentile(latency_reldiff['val_normal'].array, bias_std_normal_p * 100) + exp.doc.update({'result': {'bias_std': bias_std}}, immediately=True) + print(f'Set bias_std = {bias_std:.3f}, bias_std_normal_p = {bias_std_normal_p:.3f}') + vae.config.latency.decoder.biased_normal_std_threshold = bias_std + + if infer_threshold: + for category in ('train', 'val'): + th_cand = [] + for _ in range(10): + nll_subset = nll_result[category] + nll_subset = np.random.choice(nll_subset, replace=True, size=len(nll_subset)) + if clip_nll: + nll_subset = nll_subset[nll_subset < clip_nll - 1e-7] + else: + nll_subset = nll_subset[np.isfinite(nll_subset)] + th = np.percentile(nll_subset, threshold_p * 100) * threshold_amplify + th_cand.append(th) + thresholds[f'{category}_threshold'] = th = np.median(th_cand) + print( + f'Set {category}_threshold = {th:.3f}, ' + f'threshold_p = {threshold_p:.3f}, ' + f'threshold_amplify = {threshold_amplify:.3f}' + ) + exp.doc.update({'result': thresholds}, immediately=True) + + if use_std_limit: + if std_limit_fixed is not None: + print(f'Std limit fixed: {std_limit_fixed:.4f}') + std_group_limit[:] = std_limit_fixed + elif std_limit_global: + key = 'val_normal' + std_limit = float(np.percentile( + latency_std[key].array, + std_limit_p * 100 + )) + print(f'Std limit: {std_limit:.4f}') + std_group_limit[:] = std_limit + else: + key = 'val_normal' + v1 = operation_id[key].array + v2 = latency_std[key].array + max_limit = 0 + + for srv_id in range(id_manager.num_operations): + v = v2[v1 == srv_id] + if len(v) > 0: + srv_limit = ( + std_limit_amplify * + float(np.percentile(v, std_limit_p * 100)) + ) + std_group_limit[srv_id] = srv_limit + max_limit = max(max_limit, srv_limit) + + for srv_id in range(id_manager.num_operations): + if np.isnan(std_group_limit[srv_id]): + std_group_limit[srv_id] = max_limit + pprint({i: v for i, v in enumerate(std_group_limit)}) + + else: + std_group_limit = None + + F(test_stream, 'test', n_z, thresholds.get('val_threshold'), std_limit=std_group_limit) + + +if __name__ == '__main__': + main() diff --git a/tracegnn/models/trace_vae/test_utils.py b/tracegnn/models/trace_vae/test_utils.py new file mode 100644 index 0000000..ef373eb --- /dev/null +++ b/tracegnn/models/trace_vae/test_utils.py @@ -0,0 +1,106 @@ +from urllib.error import HTTPError +from typing import * + +import mltk +import tensorkit as tk +import torch +import yaml +from tensorkit import tensor as T + +from tracegnn.models.trace_vae.model import TraceVAE +from tracegnn.models.trace_vae.train import ExpConfig as TrainConfig +from tracegnn.data import * +from tracegnn.utils import * + +__all__ = [ + 'load_config', + 'load_model', + 'load_model2', +] + + +def _model_and_config_file(model_path: str) -> Tuple[str, str]: + # get model file and config file path + if model_path.endswith('.pt'): + model_file = model_path + config_file = model_path.rsplit('/', 2)[-3] + '/config.json' + else: + if not model_path.endswith('/'): + model_path += '/' + model_file = model_path + 'models/final.pt' + config_file = model_path + 'config.json' + + return model_file, config_file + + +def load_config(model_path: str, strict: bool, extra_args) -> TrainConfig: + # get model file and config file path + model_file, config_file = _model_and_config_file(model_path) + + # load config + with as_local_file(config_file) as config_file: + config_loader = mltk.ConfigLoader(TrainConfig) + config_loader.load_file(config_file) + + # also patch the config + if extra_args: + extra_args_dict = {} + for arg in extra_args: + if arg.startswith('--'): + arg = arg[2:] + if '=' not in arg: + val = True + else: + arg, val = arg.split('=', 1) + val = yaml.safe_load(val) + extra_args_dict[arg] = val + else: + raise ValueError(f'Unsupported argument: {arg!r}') + config_loader.load_object(extra_args_dict) + + # get the config + if strict: + discard_undefined = mltk.type_check.DiscardMode.NO + else: + discard_undefined = mltk.type_check.DiscardMode.WARN + return config_loader.get(discard_undefined=discard_undefined) + + +def load_model(model_path: str, + id_manager: TraceGraphIDManager, + strict: bool, + extra_args, + ) -> Tuple[TraceVAE, TrainConfig]: + # load config + train_config = load_config(model_path, strict, extra_args) + + # load model + vae = load_model2(model_path, train_config, id_manager) + return vae, train_config + + +def load_model2(model_path: str, + train_config: TrainConfig, + id_manager: TraceGraphIDManager, + ) -> TraceVAE: + # get model file and config file path + model_file, config_file = _model_and_config_file(model_path) + + # load the model + vae = TraceVAE(train_config.model, id_manager.num_operations) + try: + with as_local_file(model_file) as model_file: + vae.load_state_dict(torch.load( + model_file, + map_location=T.current_device() + )) + except HTTPError as ex: + if ex.code != 404: + raise + with as_local_file(model_file) as model_file: + vae.load_state_dict(torch.load( + model_file, + map_location=T.current_device() + )) + tk.init.set_initialized(vae) + return vae diff --git a/tracegnn/models/trace_vae/train.py b/tracegnn/models/trace_vae/train.py new file mode 100644 index 0000000..61053ce --- /dev/null +++ b/tracegnn/models/trace_vae/train.py @@ -0,0 +1,531 @@ +import json +import math +import random +import shutil +import traceback +from enum import Enum +from functools import wraps +from typing import * + +import os +import sys +import mltk +import tensorkit as tk +import numpy as np +import torch +import click +from tensorkit import tensor as T +from tensorkit.examples import utils +from tensorkit.train import Checkpoint + +from tracegnn.data import * +from tracegnn.models.trace_vae.evaluation import * +from tracegnn.models.trace_vae.graph_utils import * +from tracegnn.models.trace_vae.tensor_utils import * +from tracegnn.models.trace_vae.types import * +from tracegnn.models.trace_vae.model import * +from tracegnn.models.trace_vae.dataset import * +from tracegnn.utils import * + + +class NANLossError(Exception): + + def __init__(self, epoch): + super().__init__(epoch) + + @property + def epoch(self) -> Optional[int]: + return self.args[0] + + def __str__(self): + return f'NaN loss encountered at epoch {self.epoch}' + + +class OptimizerType(str, Enum): + ADAM = 'adam' + RMSPROP = 'rmsprop' + + +class ExpConfig(mltk.Config): + model: TraceVAEConfig = TraceVAEConfig() + device: Optional[str] = 'cpu' + seed: Optional[int] = 0 + + class train(mltk.Config): + max_epoch: int = 60 + struct_pretrain_epochs: Optional[int] = 40 # number of epochs to pre-train the struct_vae + ckpt_epoch_freq: Optional[int] = 5 + test_epoch_freq: Optional[int] = 5 + latency_hist_epoch_freq: Optional[int] = 10 + latency_std_hist_epoch_freq: Optional[int] = 5 + + use_early_stopping: bool = False + val_epoch_freq: Optional[int] = 2 + + kl_beta: float = 1.0 + warm_up_epochs: Optional[int] = None # number of epochs to warm-up the prior (KLD) + + l2_reg: float = 0.0001 + z_unit_ball_reg: Optional[float] = None + z2_unit_ball_reg: Optional[float] = None + + init_batch_size: int = 64 + batch_size: int = 64 + val_batch_size: int = 64 + + optimizer: OptimizerType = OptimizerType.RMSPROP + initial_lr: float = 0.001 + lr_anneal_ratio: float = 0.1 + lr_anneal_epochs: int = 30 + clip_norm: Optional[float] = None + global_clip_norm: Optional[float] = 10 # important for numerical stability + + test_n_z: int = 10 + num_plot_samples: int = 20 + + class test(mltk.Config): + batch_size: int = 64 + eval_n_z: int = 10 + use_biased: bool = True + latency_log_prob_weight: bool = True + clip_nll: Optional[float] = 100_000 + + class report(mltk.Config): + html_ext: str = '.html.gz' + + class dataset(mltk.Config): + root_dir: str = os.path.abspath('./data/processed') + + +def main(exp: mltk.Experiment[ExpConfig]): + # config + config = exp.config + + # set random seed to encourage reproducibility (does it really work?) + if config.seed is not None: + T.random.set_deterministic(True) + T.random.seed(config.seed) + np.random.seed(config.seed) + random.seed(config.seed) + + # Load data + id_manager = TraceGraphIDManager(os.path.join(config.dataset.root_dir, 'id_manager')) + latency_range = TraceGraphLatencyRangeFile(os.path.join(config.dataset.root_dir, 'id_manager')) + + train_db = TraceGraphDB(BytesSqliteDB(os.path.join(config.dataset.root_dir, 'processed', 'train'))) + val_db = TraceGraphDB(BytesSqliteDB(os.path.join(config.dataset.root_dir, 'processed', 'val'))) + test_db = TraceGraphDB( + BytesMultiDB( + BytesSqliteDB(os.path.join(config.dataset.root_dir, 'processed', 'test')), + BytesSqliteDB(os.path.join(config.dataset.root_dir, 'processed', 'test-drop')), + BytesSqliteDB(os.path.join(config.dataset.root_dir, 'processed', 'test-latency')), + ) + ) + train_stream = TraceGraphDataStream( + train_db, id_manager=id_manager, batch_size=config.train.batch_size, + shuffle=True, skip_incomplete=False, + ) + val_stream = TraceGraphDataStream( + val_db, id_manager=id_manager, batch_size=config.train.val_batch_size, + shuffle=False, skip_incomplete=False, + ) + test_stream = TraceGraphDataStream( + test_db, id_manager=id_manager, batch_size=config.test.batch_size, + shuffle=False, skip_incomplete=False, + ) + + utils.print_experiment_summary( + exp, + train_data=train_stream, + val_data=val_stream, + test_data=test_stream + ) + print('Train Data:', train_db) + print('Val Data:', val_db) + print('Test Data:', test_db) + + # build the network + vae: TraceVAE = TraceVAE( + config.model, + id_manager.num_operations, + ) + vae = vae.to(T.current_device()) + params, param_names = utils.get_params_and_names(vae) + utils.print_parameters_summary(params, param_names) + print('') + mltk.print_with_time('Network constructed.') + + # define the training method for a certain model part + def train_part(params, start_epoch, max_epoch, latency_only, do_final_eval): + # util to ensure all installed hooks will only run within this context + in_context = [True] + + def F(func): + @wraps(func) + def wrapper(*args, **kwargs): + if in_context[0]: + return func(*args, **kwargs) + return wrapper + + # the train procedure + try: + # buffer to collect stds of each p(latency|z) + latency_std = {} + for key in ('train', 'val', 'test_normal', 'test_drop', 'test_latency'): + latency_std[key] = ArrayBuffer(81920) + + def should_collect_latency_std(): + return ( + config.train.latency_std_hist_epoch_freq and + loop.epoch % config.train.latency_std_hist_epoch_freq == 0 + ) + + def clear_std_buf(): + for buf in latency_std.values(): + buf.clear() + + # the initialization function + def initialize(): + G = TraceGraphBatch( + id_manager=id_manager, + latency_range=latency_range, + trace_graphs=train_db.sample_n(config.train.init_batch_size), + ) + chain = vae.q(G).chain( + vae.p, + G=G, + ) + loss = chain.vi.training.sgvb(reduction='mean') + mltk.print_with_time(f'Network initialized: loss = {T.to_numpy(loss)}') + + # the train functions + def on_train_epoch_begin(): + # set train mode + if latency_only: + tk.layers.set_eval_mode(vae) + tk.layers.set_train_mode(vae.latency_vae) + else: + tk.layers.set_train_mode(vae) + + # clear std buffer + clear_std_buf() + + def train_step(trace_graphs): + G = TraceGraphBatch( + id_manager=id_manager, + latency_range=latency_range, + trace_graphs=trace_graphs, + ) + chain = vae.q(G).chain( + vae.p, + G=G, + ) + + # collect the latency std + if should_collect_latency_std(): + collect_latency_std(latency_std['train'], chain) + + # collect the log likelihoods + p_obs = [] + p_latent = [] + q_latent = [] + for name in chain.p: + if name in chain.q: + q_latent.append(chain.q[name].log_prob()) + p_latent.append(chain.p[name].log_prob()) + else: + # print(name, chain.p[name].log_prob().mean()) + p_obs.append(chain.p[name].log_prob()) + + # get E[log p(x|z)] and KLD[q(z|x)||p(z)] + recons = T.reduce_mean(T.add_n(p_obs)) + kl = T.reduce_mean(T.add_n(q_latent) - T.add_n(p_latent)) + + # KL beta + beta = config.train.kl_beta + if config.train.warm_up_epochs and loop.epoch < config.train.warm_up_epochs: + beta = beta * (loop.epoch / config.train.warm_up_epochs) + loss = beta * kl - recons + + # l2 regularization + if config.train.l2_reg: + l2_params = [] + for p, n in zip(params, param_names): + if 'bias' not in n: + l2_params.append(p) + loss = loss + config.train.l2_reg * T.nn.l2_regularization(l2_params) + + # unit ball regularization + def add_unit_ball_reg(l, t, reg): + if reg is not None: + ball_mean, ball_var = get_moments(t, axis=[-1]) + l = l + reg * ( + T.reduce_mean(ball_mean ** 2) + + T.reduce_mean((ball_var - 1) ** 2) + ) + return l + + loss = add_unit_ball_reg(loss, chain.q['z'].tensor, config.train.z_unit_ball_reg) + if 'z2' in chain.q: + loss = add_unit_ball_reg(loss, chain.q['z2'].tensor, config.train.z2_unit_ball_reg) + + # check and return the metrics + loss_val = T.to_numpy(loss) + if math.isnan(loss_val): + raise NANLossError(loop.epoch) + + return {'loss': loss, 'recons': recons, 'kl': kl} + + # the validation function + def validate(): + tk.layers.set_eval_mode(vae) + + def val_step(trace_graphs): + with T.no_grad(): + G = TraceGraphBatch( + id_manager=id_manager, + latency_range=latency_range, + trace_graphs=trace_graphs, + ) + chain = vae.q(G).chain( + vae.p, + G=G, + ) + # collect the latency std + if should_collect_latency_std(): + collect_latency_std(latency_std['val'], chain) + loss = chain.vi.training.sgvb() + return {'loss': T.to_numpy(T.reduce_mean(loss))} + + val_loop = loop.validation() + result_dict = val_loop.run(val_step, val_stream) + result_dict = { + f'val_{k}': v + for k, v in result_dict.items() + } + summary_cb.update_metrics(result_dict) + + # the evaluation function + def evaluate(n_z, eval_loop, eval_stream, epoch, use_embeddings=False, + plot_latency_hist=False): + # latency_hist_file + latency_hist_file = None + if plot_latency_hist: + latency_hist_file = exp.make_parent(f'./plotting/latency-sample/{epoch}.jpg') + + # do evaluation + tk.layers.set_eval_mode(vae) + with T.no_grad(): + kw = {} + if should_collect_latency_std(): + kw['latency_std_dict_out'] = latency_std + kw['latency_dict_prefix'] = 'test_' + result_dict = do_evaluate_nll( + test_stream=eval_stream, + vae=vae, + id_manager=id_manager, + latency_range=latency_range, + n_z=n_z, + use_biased=config.test.use_biased, + latency_log_prob_weight=config.test.latency_log_prob_weight, + test_loop=eval_loop, + summary_writer=summary_cb, + clip_nll=config.test.clip_nll, + use_embeddings=use_embeddings, + latency_hist_file=latency_hist_file, + **kw, + ) + + with open(exp.make_parent(f'./result/test-anomaly/{epoch}.json'), 'w', encoding='utf-8') as f: + f.write(json.dumps(result_dict)) + eval_loop.add_metrics(**result_dict) + + def save_model(epoch=None): + epoch = epoch or loop.epoch + torch.save(vae.state_dict(), exp.make_parent(f'models/{epoch}.pt')) + + # final evaluation + if do_final_eval: + tk.layers.set_eval_mode(vae) + + # save the final model + save_model('final') + + clear_std_buf() + evaluate( + n_z=config.test.eval_n_z, + eval_loop=mltk.TestLoop(), + eval_stream=test_stream, + epoch='final', + use_embeddings=True, + plot_latency_hist=True, + ) + + else: + # set train mode at the beginning of each epoch + loop.on_epoch_begin.do(F(on_train_epoch_begin)) + + # the optimizer and learning rate scheduler + if config.train.optimizer == OptimizerType.ADAM: + optimizer = tk.optim.Adam(params) + elif config.train.optimizer == OptimizerType.RMSPROP: + optimizer = tk.optim.RMSprop(params) + + def update_lr(): + n_cycles = int( + loop.epoch // # (loop.epoch - start_epoch) // + config.train.lr_anneal_epochs + ) + lr_discount = config.train.lr_anneal_ratio ** n_cycles + optimizer.set_lr(config.train.initial_lr * lr_discount) + + update_lr() + loop.on_epoch_end.do(F(update_lr)) + + # install the validation function and early-stopping + if config.train.val_epoch_freq: + loop.run_after_every( + F(validate), + epochs=config.train.val_epoch_freq, + ) + + # install the evaluation function during training + if config.train.test_epoch_freq: + loop.run_after_every( + F(lambda: evaluate( + n_z=config.train.test_n_z, + eval_loop=loop.test(), + eval_stream=test_stream, + epoch=loop.epoch, + plot_latency_hist=( + config.train.latency_hist_epoch_freq and + loop.epoch % config.train.latency_hist_epoch_freq == 0 + ) + )), + epochs=config.train.test_epoch_freq, + ) + + # install the plot and sample functions during training + def after_epoch(): + save_model() + loop.run_after_every(F(after_epoch), epochs=1) + + # train the model + tk.layers.set_eval_mode(vae) + on_train_epoch_begin() + initialize() + utils.fit_model( + loop=loop, + optimizer=optimizer, + fn=train_step, + stream=train_stream, + clip_norm=config.train.clip_norm, + global_clip_norm=config.train.global_clip_norm, + # pass to `loop.run()` + limit=max_epoch, + ) + finally: + in_context = [False] + + # the train loop + loop = mltk.TrainLoop(max_epoch=config.train.max_epoch) + + # checkpoint + ckpt = Checkpoint(vae=vae) + loop.add_callback(mltk.callbacks.AutoCheckpoint( + ckpt, + root_dir=exp.make_dirs('./checkpoint'), + epoch_freq=config.train.ckpt_epoch_freq, + max_checkpoints_to_keep=10, + )) + + # early-stopping + if config.train.val_epoch_freq and config.train.use_early_stopping: + loop.add_callback(mltk.callbacks.EarlyStopping( + checkpoint=ckpt, + root_dir=exp.abspath('./early-stopping'), + metric_name='val_loss', + )) + + # the summary writer + summary_cb = SummaryCallback(summary_dir=exp.abspath('./summary')) + loop.add_callback(summary_cb) + + # pre-train the struct_vae + try: + with loop: + start_epoch = 1 + part_params = params + latency_only = False + + if (config.model.arch == TraceVAEArch.DEFAULT) and config.train.struct_pretrain_epochs: + # train struct_vae first + print(f'Start to train vae with {len(part_params)} params ...') + train_part( + list(part_params), + start_epoch=start_epoch, + max_epoch=config.train.struct_pretrain_epochs, + latency_only=latency_only, + do_final_eval=False, + ) + + # train latency_vae next + part_params = [ + p for n, p in zip(param_names, params) + if n.startswith('latency_vae') + ] + start_epoch = config.train.struct_pretrain_epochs + 1 + latency_only = True + print(f'Start to train latency_vae with {len(part_params)} params ...') + + train_part( + part_params, + start_epoch=start_epoch, + max_epoch=config.train.max_epoch, + latency_only=latency_only, + do_final_eval=False, + ) + + # do final evaluation + train_part( + [], + start_epoch=-1, + max_epoch=-1, + latency_only=False, + do_final_eval=True, + ) + + except KeyboardInterrupt: + print( + 'Train interrupted, press Ctrl+C again to skip the final test ...', + file=sys.stderr, + ) + + +if __name__ == '__main__': + with mltk.Experiment(ExpConfig) as exp: + config = exp.config + device = config.device or T.first_gpu_device() + with T.use_device(device): + retrial = 0 + while True: + try: + main(exp) + except NANLossError as ex: + if ex.epoch != 1 or retrial >= 10: + raise + retrial += 1 + print( + f'\n' + f'Restart the experiment for the {retrial}-th time ' + f'due to NaN loss at epoch {ex.epoch}.\n', + file=sys.stderr + ) + if ex.epoch == 1: + for name in ['checkpoint', 'early-stopping', 'models', + 'plotting', 'summary']: + path = exp.abspath(name) + if os.path.isdir(name): + shutil.rmtree(path) + else: + break diff --git a/tracegnn/models/trace_vae/types.py b/tracegnn/models/trace_vae/types.py new file mode 100644 index 0000000..38f471e --- /dev/null +++ b/tracegnn/models/trace_vae/types.py @@ -0,0 +1,74 @@ +from dataclasses import dataclass +from typing import * + +import dgl +from tensorkit import tensor as T + +from tracegnn.data import * +from tracegnn.utils import * + +__all__ = ['TraceGraphBatch'] + + +@dataclass(init=False) +class TraceGraphBatch(object): + __slots__ = [ + 'id_manager', 'latency_range', + 'trace_graphs', 'dgl_graphs', 'dgl_batch' + ] + + id_manager: Optional[TraceGraphIDManager] + trace_graphs: Optional[List[TraceGraph]] # the original trace graphs + dgl_graphs: Optional[List[dgl.DGLGraph]] # graph components + dgl_batch: Optional[dgl.DGLGraph] # the batched DGL graph + + def __init__(self, + *, + id_manager: Optional[TraceGraphIDManager] = None, + latency_range: Optional[TraceGraphLatencyRangeFile] = None, + trace_graphs: Optional[List[TraceGraph]] = None, + dgl_graphs: Optional[List[dgl.DGLGraph]] = None, + dgl_batch: Optional[dgl.DGLGraph] = None, + ): + if ((trace_graphs is None) or (id_manager is None)) and \ + ((dgl_graphs is None) or (dgl_batch is None)): + raise ValueError('Insufficient arguments.') + self.id_manager = id_manager + self.latency_range = latency_range + self.trace_graphs = trace_graphs + self.dgl_graphs = dgl_graphs + self.dgl_batch = dgl_batch + + def build_dgl(self, + add_self_loop: bool = True, + directed: Union[bool, str] = False, + ): + from .dataset import trace_graph_to_dgl + if self.dgl_graphs is None: + with T.no_grad(): + with T.use_device('cpu'): + self.dgl_graphs = [ + trace_graph_to_dgl( + g, + num_node_types=self.id_manager.num_operations, + add_self_loop=add_self_loop, + latency_range=self.latency_range, + directed=directed, + ) + for g in self.trace_graphs + ] + if self.dgl_batch is None: + with T.no_grad(): + self.dgl_batch = dgl.batch(self.dgl_graphs).to(T.current_device()) + + # @property + # def dgl_graphs(self) -> List[dgl.DGLGraph]: + # if self._dgl_graphs is None: + # self.build_dgl() + # return self._dgl_graphs + # + # @property + # def dgl_batch(self) -> dgl.DGLGraph: + # if self._dgl_batch is None: + # self.build_dgl() + # return self._dgl_batch diff --git a/tracegnn/utils/__init__.py b/tracegnn/utils/__init__.py new file mode 100644 index 0000000..6d4c4da --- /dev/null +++ b/tracegnn/utils/__init__.py @@ -0,0 +1,9 @@ +from .analyze_nll import * +from .array_buffer import * +from .data_utils import * +from .fscore_utils import * +from .id_assign import * +from .latency_codec import * +from .latency_range_file import * +from .misc import * +from .summary_callback import * diff --git a/tracegnn/utils/analyze_nll.py b/tracegnn/utils/analyze_nll.py new file mode 100644 index 0000000..63be715 --- /dev/null +++ b/tracegnn/utils/analyze_nll.py @@ -0,0 +1,134 @@ +import math +import os +import sys +import traceback +from functools import wraps +from typing import * + +import numpy as np +import pandas as pd +from matplotlib import pyplot as plt +from sklearn.metrics import f1_score + +from .fscore_utils import * + +__all__ = ['analyze_anomaly_nll'] + + +def analyze_anomaly_nll(nll_list: np.ndarray, + label_list: np.ndarray, + up_sample_normal: int = 1, + threshold: Optional[float] = None, + proba_cdf_file: Optional[str] = None, + auc_curve_file: Optional[str] = None, + method: Optional[str] = None, + dataset: Optional[str] = None, + save_dict: bool = False, + save_filename: str = 'baseline.csv' + ) -> Dict[str, float]: + + def log_error(method, default_value=None): + @wraps(method) + def wrapper(*args, **kwargs): + try: + return method(*args, **kwargs) + except Exception: + print(''.join(traceback.format_exception(*sys.exc_info())), file=sys.stderr) + return default_value + return wrapper + + def call_plot(fn_, *args, output_file, **kwargs): + if output_file == ':show:': + fig = fn_(*args, **kwargs) + plt.show() + plt.close() + else: + fn_(*args, output_file=output_file, **kwargs) + + # up sample normal nll & label if required + if up_sample_normal and up_sample_normal > 1: + normal_nll = nll_list[label_list == 0] + normal_label = label_list[label_list == 0] + nll_list = np.concatenate( + [normal_nll] * (up_sample_normal - 1) + [nll_list], + axis=0 + ) + label_list = np.concatenate( + [normal_label] * (up_sample_normal - 1) + [label_list], + axis=0 + ) + + # prepare for analyze + result_dict = {} + is_anomaly_list = label_list != 0 + + # separated nlls for different labels + result_dict['nll_normal'] = float(np.mean(nll_list[label_list == 0])) + result_dict['nll_drop'] = float(np.mean(nll_list[label_list == 1])) + result_dict['nll_latency'] = float(np.mean(nll_list[label_list == 2])) + + # auc score + result_dict['auc'] = float(auc_score(nll_list, is_anomaly_list)) + + # best f-score + F = log_error(best_fscore, default_value=(math.nan, math.nan)) + + def best_fscore_for_label(label): + not_label = 2 if label == 1 else 1 + mask = label_list != not_label + return F(nll_list[mask], label_list[mask] != 0) + + best_fscore_total, _, best_pr_total, best_rc_total = F(nll_list, is_anomaly_list) + best_fscore_drop, _, best_pr_drop, best_rc_drop = best_fscore_for_label(1) + best_fscore_latency, best_threshold_latency, best_pr_latency, best_rc_latency = best_fscore_for_label(2) + result_dict.update({ + 'best_fscore': float(best_fscore_total), + 'best_fscore_drop': float(best_fscore_drop), + 'best_fscore_latency': float(best_fscore_latency), + 'best_pr': float(best_pr_total), + 'best_rc': float(best_rc_total), + 'best_pr_drop': float(best_pr_drop), + 'best_rc_drop': float(best_rc_drop), + 'best_pr_latency': float(best_pr_latency), + 'best_rc_latency': float(best_rc_latency), + 'best_threshold_latency': float(best_threshold_latency) + }) + + # f-score + F = log_error(f1_score, default_value=math.nan) + + def fscore_for_label(label): + not_label = 2 if label == 1 else 1 + mask = label_list != not_label + return F(label_list[mask] != 0, nll_list[mask] > threshold) + + if threshold is not None: + result_dict.update({ + 'fscore': float(F(is_anomaly_list, nll_list > threshold)), + 'fscore_drop': float(fscore_for_label(1)), + 'fscore_latency': float(fscore_for_label(2)), + }) + + # save result + if save_dict and method and dataset: + dataset = dataset.rstrip('/') + + result_to_save = result_dict.copy() + result_to_save['dataset'] = dataset + result_to_save['method'] = method + + if os.path.exists(f'paper-data/{save_filename}'): + df = pd.read_csv(f'paper-data/{save_filename}') + + if not df[(df['dataset']==dataset)&(df['method']==method)].empty: + df.iloc[df[(df['dataset']==dataset)&(df['method']==method)].index[0]] = result_to_save + else: + df = df.append(result_to_save, ignore_index=True) + else: + df = pd.DataFrame() + df = df.append(result_to_save, ignore_index=True) + + os.makedirs('paper-data', exist_ok=True) + df.to_csv(f'paper-data/{save_filename}', index=False) + + return result_dict diff --git a/tracegnn/utils/array_buffer.py b/tracegnn/utils/array_buffer.py new file mode 100644 index 0000000..4488d92 --- /dev/null +++ b/tracegnn/utils/array_buffer.py @@ -0,0 +1,39 @@ +import numpy as np + +__all__ = ['ArrayBuffer'] + + +class ArrayBuffer(object): + + __slots__ = ['length', 'capacity', 'dtype', 'buffer'] + + def __init__(self, capacity: int = 32, dtype=np.float32): + self.length = 0 + self.capacity = capacity + self.dtype = dtype + self.buffer = np.empty([capacity], dtype=dtype) + + def __len__(self): + return self.length + + def __iter__(self): + return iter(self.array) + + @property + def array(self): + return self.buffer[:self.length] + + def extend(self, items): + offset = self.length + new_length = len(items) + req_capacity = new_length + offset + if req_capacity > self.capacity: + self.capacity = capacity = max(self.capacity * 2, req_capacity) + buffer = np.empty([capacity], dtype=self.dtype) + buffer[:offset] = self.buffer[:offset] + self.buffer = buffer + self.buffer[offset: offset + new_length] = items + self.length += new_length + + def clear(self): + self.length = 0 diff --git a/tracegnn/utils/data_utils.py b/tracegnn/utils/data_utils.py new file mode 100644 index 0000000..b858204 --- /dev/null +++ b/tracegnn/utils/data_utils.py @@ -0,0 +1,23 @@ +from typing import * + +import numpy as np + +__all__ = [ + 'compute_cdf', +] + + +def compute_cdf(arr: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: + # calculate bin size cdf + hist = {} + for v in arr: + if v not in hist: + hist[v] = 0 + hist[v] += 1 + + keys = np.array(sorted(hist)) + values = np.array([hist[v] for v in keys], dtype=np.float64) + values /= values.sum() + values = np.cumsum(values) + + return keys, values diff --git a/tracegnn/utils/fscore_utils.py b/tracegnn/utils/fscore_utils.py new file mode 100644 index 0000000..23cb53a --- /dev/null +++ b/tracegnn/utils/fscore_utils.py @@ -0,0 +1,37 @@ +from typing import * + +import numpy as np +from sklearn.metrics import precision_recall_curve, average_precision_score + +__all__ = [ + 'fscore_for_precision_and_recall', + 'best_fscore', + 'auc_score', +] + + +def fscore_for_precision_and_recall(precision: np.ndarray, + recall: np.ndarray) -> np.ndarray: + precision = np.asarray(precision, dtype=np.float64) + recall = np.asarray(recall, dtype=np.float64) + return np.where( + (precision == 0) | (recall == 0), + 0.0, + 2. * np.exp( + np.log(np.maximum(precision, 1e-8)) + + np.log(np.maximum(recall, 1e-8)) - + np.log(np.maximum(precision + recall, 1e-8)) + ) + ) + + +def best_fscore(proba: np.ndarray, + truth: np.ndarray) -> Tuple[float, float]: + precision, recall, threshold = precision_recall_curve(truth, proba) + fscore = fscore_for_precision_and_recall(precision, recall) + idx = np.argmax(fscore[:-1]) + return fscore[idx], threshold[idx], precision[idx], recall[idx] + + +def auc_score(proba: np.ndarray, truth: np.ndarray) -> float: + return float(average_precision_score(truth, proba)) diff --git a/tracegnn/utils/graph_conversion.py b/tracegnn/utils/graph_conversion.py new file mode 100644 index 0000000..4c4a2a8 --- /dev/null +++ b/tracegnn/utils/graph_conversion.py @@ -0,0 +1,35 @@ +import networkx as nx +import numpy as np +from tracegnn.data.trace_graph import TraceGraphIDManager + + +def np_to_nx(DV: np.ndarray, DE: np.ndarray, id_manager: TraceGraphIDManager) -> nx.Graph: + """ + DV: [n x d] + DE: [n x n x 1] or [n x n] + """ + # Reshape DE to [n x n] + if len(DE.shape) == 3: + DE = DE[:,:,0] + + # Choose Nodes + nodes_idx = (1.0-np.sum(DV[:,:len(id_manager.operation_id)], axis=-1)) < np.max(DV[:,:len(id_manager.operation_id)], axis=-1) + DV = DV[nodes_idx] + DE = DE[nodes_idx][:, nodes_idx] + + DE = (DE + DE.T) / 2 + + # Get Node Type + node_type = np.argmax(DV[:,:len(id_manager.operation_id)], axis=-1) + + # Generate nx Graph + g: nx.Graph = nx.from_numpy_matrix(DE, create_using=nx.Graph) + + for i in range(len(g.nodes)): + g.nodes[i]['node_type'] = node_type[i] + g.nodes[i]['operation'] = id_manager.operation_id.reverse_map(node_type[i]) + + # MST + # g = nx.maximum_spanning_tree(g) + + return g diff --git a/tracegnn/utils/id_assign.py b/tracegnn/utils/id_assign.py new file mode 100644 index 0000000..0601b87 --- /dev/null +++ b/tracegnn/utils/id_assign.py @@ -0,0 +1,58 @@ +import os + +import yaml + +__all__ = ['IDAssign'] + + +class IDAssign(object): + + def __init__(self, path: str): + self._path = path + self._mapping = {'': 0} # by default let 0 == '' (a NULL item) + + if os.path.isfile(path): + with open(path, 'r', encoding='utf-8') as f: + self._mapping = yaml.safe_load(f.read()) + + if self._mapping: + self._next_index = max(self._mapping.values()) + 1 + self._rev_mapping = {v: k for k, v in self._mapping.items()} + else: + self._next_index = 0 + self._rev_mapping = {} + + def __len__(self): + return self._next_index + + def __getitem__(self, key): + return self._mapping[key] + + @property + def path(self) -> str: + return self._path + + def dump_to(self, path: str): + cnt = yaml.safe_dump(self._mapping) + with open(path, 'w', encoding='utf-8') as f: + f.write(cnt) + + def get_or_assign(self, key: str): + ret = self._mapping.get(key, None) + if ret is None: + self._mapping[key] = ret = self._next_index + self._rev_mapping[ret] = key + self._next_index += 1 + return ret + + def reverse_map(self, index: int): + return self._rev_mapping[index] + + def flush(self): + self.dump_to(self._path) + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.flush() diff --git a/tracegnn/utils/latency_codec.py b/tracegnn/utils/latency_codec.py new file mode 100644 index 0000000..21ab9b1 --- /dev/null +++ b/tracegnn/utils/latency_codec.py @@ -0,0 +1,135 @@ +from typing import * + +import numpy as np + +from tracegnn.constants import * + + +if not USE_MULTI_DIM_LATENCY_CODEC: + __all__ = [] + +else: + __all__ = [ + 'encode_multi_latency', + 'decode_multi_latency', + 'encode_latency', + 'decode_latency', + ] + + EPS = 1e-6 + + + def encode_multi_latency(latencies: Sequence[np.ndarray], + max_latency_dims: int + ) -> Tuple[np.ndarray, np.ndarray]: + """ + Encode multiple latencies into (codec, onehot) feature vectors. + + If `max_latency_dims` is sufficient: + + >>> latencies = [np.array([0.0, 9.6, 10.3, 58.7, 101.2]), np.array([11.3, 0.6, 0.0, 99.1, 100.0])] + >>> codec, onehot = encode_multi_latency(latencies, 3) + >>> codec + array([[-1. , -1. , -1. , -0.74, -0.8 , -1. ], + [ 0.92, -1. , -1. , -0.88, -1. , -1. ], + [-0.94, -0.8 , -1. , -1. , -1. , -1. ], + [ 0.74, 0. , -1. , 0.82, 0.8 , -1. ], + [-0.76, -1. , -0.8 , -1. , -1. , -0.8 ]]) + >>> onehot + array([[ True, False, False, False, True, False], + [ True, False, False, True, False, False], + [False, True, False, True, False, False], + [False, True, False, False, True, False], + [False, False, True, False, False, True]]) + >>> decode_multi_latency(codec, onehot, 3) + [array([ 0. , 9.6, 10.3, 58.7, 101.2]), array([ 11.3, 0.6, 0. , 99.1, 100. ])] + + If `max_latency_dims` is partially sufficient: + + >>> latencies = [np.array([9.6, 10.3, 58.7, 101.2]), np.array([11.3, 0.6, 99.1, 100.0])] + >>> codec, onehot = encode_multi_latency(latencies, 2) + >>> codec + array([[ 0.92, -1. , -0.74, -0.8 ], + [-0.94, -0.8 , -0.88, -1. ], + [ 0.74, 0. , 0.82, 0.8 ], + [-0.76, 1. , -1. , 1. ]]) + >>> onehot + array([[ True, False, False, True], + [False, True, True, False], + [False, True, False, True], + [False, True, False, True]]) + >>> decode_multi_latency(codec, onehot, 2) + [array([ 9.6, 10.3, 58.7, 101.2]), array([ 11.3, 0.6, 99.1, 100. ])] + + If `max_latency_dims` is insufficient: + + >>> latencies = [np.array([9.6, 10.3, 58.7, 101.2]), np.array([11.3, 0.6, 99.1, 100.0])] + >>> codec, onehot = encode_multi_latency(latencies, 1) + >>> codec + array([[ 0.92, 1.26], + [ 1.06, -0.88], + [10.74, 18.82], + [19.24, 19. ]]) + >>> onehot + array([[ True, True], + [ True, True], + [ True, True], + [ True, True]]) + >>> decode_multi_latency(codec, onehot, 1) + [array([ 9.6, 10.3, 58.7, 101.2]), array([ 11.3, 0.6, 99.1, 100. ])] + """ + codec, onehot = [], [] + for residual in latencies: + for i in range(max_latency_dims - 1): + if i == 0: + onehot.append(residual < 10) + else: + onehot.append(np.logical_and(EPS < residual, residual < 10)) + r = residual % 10 + codec.append(r) + residual = (residual - r) / 10 + onehot.append(EPS < residual) + codec.append(residual) + codec, onehot = np.stack(codec, axis=-1), np.stack(onehot, axis=-1) + codec = codec / 5. - 1 # scale to [-1, 1] + return codec, onehot + + + def decode_multi_latency(codec: np.ndarray, + onehot: np.ndarray, + max_latency_dims: int + ) -> List[np.ndarray]: + if codec.shape[-1] % max_latency_dims != 0: + raise ValueError( + f'arr.shape[-1] % max_latency_dims != 0: ' + f'arr.shape = {codec.shape!r}, where max_latency_dims = {max_latency_dims!r}' + ) + + ret = [] + codec = (np.clip(codec, -1, 1) + 1) * 5 # scale back from [-1, 1] + for i in range(codec.shape[-1] // max_latency_dims): + left = i * max_latency_dims + right = left + max_latency_dims - 1 + m = onehot[..., right] + r = codec[..., right] * m.astype(np.float32) + while right > left: + r = r * 10 + right -= 1 + m |= onehot[..., right] + r += codec[..., right] + ret.append(r) + + return ret + + + def encode_latency(latency: np.ndarray, + max_latency_dims: int + ) -> Tuple[np.ndarray, np.ndarray]: + return encode_multi_latency([latency], max_latency_dims) + + + def decode_latency(codec: np.ndarray, + onehot: np.ndarray, + max_latency_dims: int + ) -> np.ndarray: + return decode_multi_latency(codec, onehot, max_latency_dims)[0] diff --git a/tracegnn/utils/latency_range_file.py b/tracegnn/utils/latency_range_file.py new file mode 100644 index 0000000..620a918 --- /dev/null +++ b/tracegnn/utils/latency_range_file.py @@ -0,0 +1,80 @@ +import os +from typing import * + +import yaml + +__all__ = ['TraceGraphLatencyRangeFile'] + +LATENCY_RANGE_FILE = 'latency_range.yml' + + +class TraceGraphLatencyRangeFile(object): + __slots__ = ['root_dir', 'yaml_path', 'latency_data'] + + root_dir: str + yaml_path: str + latency_data: Dict[int, Dict[str, float]] + + def __init__(self, root_dir: str, require_exists: bool = False): + self.root_dir = os.path.abspath(root_dir) + self.yaml_path = os.path.join(self.root_dir, LATENCY_RANGE_FILE) + self.latency_data = {} + if os.path.exists(self.yaml_path): + with open(self.yaml_path, 'r', encoding='utf-8') as f: + obj = yaml.safe_load(f.read()) + self.latency_data = { + int(op_id): v + for op_id, v in obj.items() + } + elif require_exists: + raise IOError(f'LatencyRangeFile does not exist: {self.yaml_path}') + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.flush() + + def __contains__(self, item): + return int(item) in self.latency_data + + def __getitem__(self, operation_id: int) -> Tuple[float, float]: + v = self.latency_data[int(operation_id)] + return v['mean'], v['std'] + + def __setitem__(self, + operation_id: int, + value: Union[Tuple[float, float], Dict[str, float]]): + self.update_item(operation_id, value) + + def get_item(self, operation_id: int): + return self.latency_data[int(operation_id)] + + def update_item(self, + operation_id: int, + value: Union[Tuple[float, float], Dict[str, float]] + ): + if isinstance(value, (tuple, list)) and len(value) == 2: + mean, std = value + value = {'mean': mean, 'std': std} + + key = int(operation_id) + if key not in self.latency_data: + self.latency_data[key] = {} + self.latency_data[key].update({k: float(v) for k, v in value.items()}) + + def clear(self): + self.latency_data.clear() + + def flush(self): + self.dump_to(self.root_dir) + + def dump_to(self, output_dir: str): + payload = { + k: v + for k, v in self.latency_data.items() + } + cnt = yaml.safe_dump(payload) + path = os.path.join(output_dir, LATENCY_RANGE_FILE) + with open(path, 'w', encoding='utf-8') as f: + f.write(cnt) diff --git a/tracegnn/utils/misc.py b/tracegnn/utils/misc.py new file mode 100644 index 0000000..8d9837d --- /dev/null +++ b/tracegnn/utils/misc.py @@ -0,0 +1,54 @@ +import re +import sys +from contextlib import contextmanager +from tempfile import TemporaryDirectory +from typing import * +from urllib.request import urlretrieve + +import os + +__all__ = [ + 'abspath_relative_to_file', + 'fake_tqdm', + 'ensure_parent_exists', + 'as_local_file', +] + + +def abspath_relative_to_file(path, file_path): + return os.path.join( + os.path.split(os.path.abspath(file_path))[0], + path + ) + + +def fake_tqdm(data, *args, **kwargs): + yield from data + + +def ensure_parent_exists(path): + if path is not None: + path = os.path.abspath(path) + parent_dir = os.path.split(path)[0] + if not os.path.isdir(parent_dir): + os.makedirs(parent_dir, exist_ok=True) + return path + + +@contextmanager +def as_local_file(uri: str) -> ContextManager[str]: + if re.match(r'^https?://', uri): + m = re.match(r'^(https?://[^/]+)/([a-z0-9]{24})/(.*)?$', uri) + if m: + uri = f'{m.group(1)}/v1/_getfile/{m.group(2)}' + if m.group(3): + uri += f'/{m.group(3)}' + with TemporaryDirectory() as temp_dir: + filename = os.path.join(temp_dir, uri.rstrip('/').rsplit('/', 1)[-1]) + print(f'Download: {uri}', file=sys.stderr) + urlretrieve(uri, filename=filename) + yield filename + elif uri.startswith('file://'): + yield uri[7:] + else: + yield uri diff --git a/tracegnn/utils/summary_callback.py b/tracegnn/utils/summary_callback.py new file mode 100644 index 0000000..bb57b17 --- /dev/null +++ b/tracegnn/utils/summary_callback.py @@ -0,0 +1,75 @@ +from typing import * + +import numpy as np +from mltk.callbacks import Callback, CallbackData, Stage +from torch.utils.tensorboard import SummaryWriter + +try: + # problem: https://github.com/pytorch/pytorch/issues/30966 + import tensorflow as tf + import tensorboard as tb + + tf.io.gfile = tb.compat.tensorflow_stub.io.gfile +except ImportError: + pass + +__all__ = ['SummaryCallback'] + + +class SummaryCallback(Callback): + """Callback class that writes metrics to TensorBoard.""" + + writer: SummaryWriter + stage: Optional[Stage] + stage_stack: List[Stage] + global_step: int + + def __init__(self, *, summary_dir=None, summary_writer=None, global_step: int = 0): + if (summary_dir is None) == (summary_writer is None): + raise ValueError(f'One and only one of `summary_dir` and `summary_writer` should be specified, ' + f'but not both.') + + if summary_dir is not None: + summary_writer = SummaryWriter(summary_dir) + self.writer = summary_writer + self.stage = None + self.stage_stack = [] + self.global_step = global_step + + def add_embedding(self, *args, **kwargs): + kwargs.setdefault('global_step', self.global_step) + return self.writer.add_embedding(*args, **kwargs) + + def update_metrics(self, metrics): + if metrics: + for key, val in metrics.items(): + key = self.stage_stack[-1].type.add_metric_prefix(key) + if np.shape(val) != (): + val = np.mean(val) + self.writer.add_scalar(key, val, self.global_step) + + def set_global_step(self, step: int): + self.global_step = step + + def on_stage_begin(self, data: CallbackData): + self.stage_stack.append(data.stage) + + def on_stage_end(self, data: CallbackData): + self.stage_stack.pop() + + def on_test_end(self, data: CallbackData): + self.update_metrics(data.metrics) + + def on_validation_end(self, data: CallbackData): + self.update_metrics(data.metrics) + + def on_batch_begin(self, data: CallbackData): + if len(self.stage_stack) == 1: + self.global_step += 1 + + def on_batch_end(self, data: CallbackData): + if len(self.stage_stack) == 1: + self.update_metrics(data.metrics) + + def on_epoch_end(self, data: CallbackData): + self.update_metrics(data.metrics) diff --git a/train.sh b/train.sh new file mode 100644 index 0000000..8da1d10 --- /dev/null +++ b/train.sh @@ -0,0 +1,4 @@ +echo "Usage: bash train.sh [dataset_path]" +echo "DATASET: $1" +rm -r results +python3 -m tracegnn.models.trace_vae.train --device=cpu --dataset.root_dir="$1" --seed=1234 --model.struct.z_dim=10 --model.struct.decoder.use_prior_flow=true --train.z_unit_ball_reg=1 --model.latency.z2_dim=10 --model.latency.decoder.condition_on_z=true \ No newline at end of file -- GitLab