百度统计
一面之猿网
让这个世界,因为我,有一点点的不一样
关于 CGraph和 taskflow 性能对比测试的相关说明

之前写过一篇文章 炸裂!CGraph性能全面超越taskflow之后,作者却说他更想… ,介绍 色丶图和 taskflow 之间串行、并行和 dag的性能对比,并且记录了相关数据,并保存截图。

但后期重新自测,发现跟那次对比,差距较大。经多次反复重试,在纯并行和dag环境下,taskflow性能均好于 CGraph。在这里记录一下当时的测试代码,供大家对比测试。

希望大家可以根据自己的场景和环境,做出自己的性能对比。也欢迎大家给我们提供性能优化的意见和建议,随时期待您的指教。

再次申明一下,压测情况和实际使用的情况,有较大的区别。实际使用的情况下,更多性能损耗在算子内部,压测环境下损耗主要在调度上。请大家根据实际情况,对比分析。

最后,为之前自测的不谨慎抱歉,我对着当时的代码截图,在已经屏蔽了cpu占用、release/debug版本,git版本等因素的情况下,完全跑不复现当时的耗时了。
需要澄清,个人和团队小伙伴 绝非学术造假,也无任何商业利益。希望大家理性看待。我们接受拍砖,同时更期待您的指导和帮助。


以下代码,均推荐用当前最新版本测试:

CGraph

#include "MyGAspect/MyTimerAspect.h"

using namespace CGraph;

class MyEmptyNode : public GNode {
public:
    CStatus run() override {
        return CStatus();
    }
};


void tutorial_concurrent_32() {
    // 并行的执行32次,对应第1个例子,8thread,32并发,50w次
    GPipelinePtr pipeline = GPipelineFactory::create();
    CStatus status;
    GElementPtr arr[32];
    UThreadPoolConfig config;
    config.default_thread_size_ = 8;    // 我的笔记本,是8核心的 macbook pro m1
    config.max_thread_size_ = config.default_thread_size_;
    config.monitor_enable_ = false;    // 关闭扩缩容机制
    pipeline->setUniqueThreadPoolConfig(config);
    for (int i = 0; i < 32; i++) {
        pipeline->registerGElement<MyEmptyNode>(&arr[i]);
    }
    status += pipeline->init();

    /** 其中流程进行计时 **/
    MyTimerAspect asp;
    asp.beginRun();
    for (int t = 0; t < 500000; t++) {
        pipeline->run();
    }
    asp.finishRun(status);    // 这里会输出时间信息
    /*******************/
    status += pipeline->destroy();
    GPipelineFactory::remove(pipeline);
}

void tutorial_serial_32() {
    // 串行执行32次,对应第二个例子,1thread,32串行,1000w次
    GPipelinePtr pipeline = GPipelineFactory::create();
    CStatus status;
    GElementPtr arr[32];
    UThreadPoolConfig config;
    config.default_thread_size_ = 1;    // 我的笔记本,是8核心的 macbook pro m1
    config.max_thread_size_ = config.default_thread_size_;
    config.monitor_enable_ = false;    // 关闭扩缩容机制
    pipeline->setUniqueThreadPoolConfig(config);

    pipeline->setGEngineType(GEngineType::DYNAMIC);

    pipeline->registerGElement<MyEmptyNode>(&arr[0]);
    for (int i = 1; i < 32; i++) {
        pipeline->registerGElement<MyEmptyNode>(&arr[i], {arr[i-1]});
    }
    status += pipeline->init();

    /** 其中流程进行计时 **/
    MyTimerAspect asp;
    asp.beginRun();
    for (int t = 0; t < 10000000; t++) {
        pipeline->run();
    }
    asp.finishRun(status);    // 这里会输出时间信息
    /*******************/
    status += pipeline->destroy();
    GPipelineFactory::remove(pipeline);
}

void tutorial_dag() {
    // 简单dag场景,对应第三个例子,2thread,dag,100w次
    GPipelinePtr pipeline = GPipelineFactory::create();
    CStatus status;
    GElementPtr a,b1,b2,c1,c2,d;
    UThreadPoolConfig config;
    config.default_thread_size_ = 2;    // 我的笔记本,是8核心的 macbook pro m1
    config.max_thread_size_ = config.default_thread_size_;
    config.monitor_enable_ = false;    // 关闭扩缩容机制
    pipeline->setUniqueThreadPoolConfig(config);

    pipeline->registerGElement<MyEmptyNode>(&a);
    pipeline->registerGElement<MyEmptyNode>(&b1, {a});
    pipeline->registerGElement<MyEmptyNode>(&b2, {b1});
    pipeline->registerGElement<MyEmptyNode>(&c1, {a});
    pipeline->registerGElement<MyEmptyNode>(&c2, {c1});
    pipeline->registerGElement<MyEmptyNode>(&d, {b2, c2});

    pipeline->setGEngineType(GEngineType::DYNAMIC);
    status += pipeline->init();

    /** 其中流程进行计时 **/
    MyTimerAspect asp;
    asp.beginRun();
    for (int t = 0; t < 1000000; t++) {
        pipeline->run();
    }
    asp.finishRun(status);    // 这里会输出时间信息
    /*******************/
    status += pipeline->destroy();
    GPipelineFactory::remove(pipeline);
}


int main() {
    tutorial_concurrent_32();
    tutorial_serial_32();
    tutorial_dag();
    return 0;
}

taskflow

// A simple example to capture the following task dependencies.
//
//           +---+
//     +---->| B |-----+
//     |     +---+     |
//   +---+           +-v-+
//   | A |           | D |
//   +---+           +-^-+
//     |     +---+     |
//     +---->| C |-----+
//           +---+
//
#include <taskflow/taskflow.hpp>  // the only include you need

class CStatus {
    int code = 0;
    std::string info;
};

void demo1() {
    tf::Executor executor(8);
    tf::Taskflow taskflow("simple");

    // 并行的32路
    for (int i = 0; i < 32; i++) {
        auto x = taskflow.emplace([] {
            return CStatus();
        });
    }

    auto start_ts_ = std::chrono::high_resolution_clock::now();
    for(int i = 0; i < 500000; i++) {
        executor.run(taskflow).wait();
    }

    std::chrono::duration<double, std::milli> span = std::chrono::high_resolution_clock::now() - start_ts_;
    printf("----> [MyTimerAspect] time cost is : [%0.2lf] ms \n", span.count());
}

void demo2() {
    // 串行32个
    tf::Executor executor(1);
    tf::Taskflow taskflow;
    auto task1 = taskflow.emplace([]() { return CStatus(); });
    auto task2 = taskflow.emplace([]() { return CStatus(); });
    auto task3 = taskflow.emplace([]() { return CStatus();  });
    auto task4 = taskflow.emplace([]() { return CStatus();  });
    auto task5 = taskflow.emplace([]() { return CStatus();  });
    auto task6 = taskflow.emplace([]() { return CStatus();  });
    auto task7 = taskflow.emplace([]() { return CStatus();  });
    auto task8 = taskflow.emplace([]() { return CStatus();  });
    auto task9 = taskflow.emplace([]() { return CStatus();  });
    auto task10 = taskflow.emplace([]() { return CStatus();  });
    auto task11 = taskflow.emplace([]() { return CStatus();  });
    auto task12 = taskflow.emplace([]() { return CStatus();  });
    auto task13 = taskflow.emplace([]() { return CStatus();  });
    auto task14 = taskflow.emplace([]() { return CStatus();  });
    auto task15 = taskflow.emplace([]() { return CStatus();  });
    auto task16 = taskflow.emplace([]() { return CStatus();  });
    auto task17 = taskflow.emplace([]() { return CStatus();  });
    auto task18 = taskflow.emplace([]() { return CStatus();  });
    auto task19 = taskflow.emplace([]() { return CStatus();  });
    auto task20 = taskflow.emplace([]() { return CStatus();  });
    auto task21 = taskflow.emplace([]() { return CStatus();  });
    auto task22 = taskflow.emplace([]() { return CStatus();  });
    auto task23 = taskflow.emplace([]() { return CStatus();  });
    auto task24 = taskflow.emplace([]() { return CStatus();  });
    auto task25 = taskflow.emplace([]() { return CStatus();  });
    auto task26 = taskflow.emplace([]() { return CStatus();  });
    auto task27 = taskflow.emplace([]() { return CStatus(); });
    auto task28 = taskflow.emplace([]() { return CStatus();  });
    auto task29 = taskflow.emplace([]() { return CStatus();  });
    auto task30 = taskflow.emplace([]() { return CStatus();  });
    auto task31 = taskflow.emplace([]() { return CStatus();  });
    auto task32 = taskflow.emplace([]() { return CStatus();  });

    task1.precede(task2);
    task2.precede(task3);
    task3.precede(task4);
    task4.precede(task5);
    task5.precede(task6);
    task6.precede(task7);
    task7.precede(task8);
    task8.precede(task9);
    task9.precede(task10);
    task10.precede(task11);
    task11.precede(task12);
    task12.precede(task13);
    task13.precede(task14);
    task14.precede(task15);
    task15.precede(task16);
    task16.precede(task17);
    task17.precede(task18);
    task18.precede(task19);
    task19.precede(task20);
    task20.precede(task21);
    task21.precede(task22);
    task22.precede(task23);
    task23.precede(task24);
    task24.precede(task25);
    task25.precede(task26);
    task26.precede(task27);
    task27.precede(task28);
    task28.precede(task29);
    task29.precede(task30);
    task30.precede(task31);
    task31.precede(task32);

    auto start_ts_ = std::chrono::high_resolution_clock::now();
    for(int i = 0; i < 10000000; i++) {
        executor.run(taskflow).wait();
    }

    std::chrono::duration<double, std::milli> span = std::chrono::high_resolution_clock::now() - start_ts_;
    printf("----> [MyTimerAspect] time cost is : [%0.2lf] ms \n",
           span.count());
}

void demo3() {
    // 简单dag图
    tf::Taskflow taskflow;

    auto [A, B1, B2, C1, C2, D] = taskflow.emplace(
            // []() { return std::this_thread::sleep_for(std::chrono::milliseconds(1)); },
            []() { return CStatus(); },
            []() { return CStatus(); },
            []() { return CStatus(); },
            []() { return CStatus(); },
            []() { return CStatus(); },
            []() { return CStatus(); }
    );

    A.precede(B1, C1);
    B1.precede(B2);
    C1.precede(C2);
    D.succeed(B2, C2);

    // execute the workflow
    tf::Executor executor(2);

    auto start_ts_ = std::chrono::high_resolution_clock::now();
    for (int i = 0; i < 1000000; i++) {
        executor.run(taskflow).wait();
    }

    std::chrono::duration<double, std::milli> span = std::chrono::high_resolution_clock::now() - start_ts_;
    printf("----> [MyTimerAspect] time cost is : [%0.2lf] ms \n",
           span.count());

}

int main(){
    demo1();
    demo2();
    demo3();

    return 0;

}