publications | Zhenyu Bai

2026

ASPLOS26

A data-driven dynamic execution orchestration architecture

Zhenyu Bai, Pranav Dangi, Rohan Juneja, and 4 more authors

In Proceedings of the 31st ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 1, 2026

Bib

@inproceedings{bai2026data,
  title = {A data-driven dynamic execution orchestration architecture},
  author = {Bai, Zhenyu and Dangi, Pranav and Juneja, Rohan and Li, Zhaoying and Yan, Zhanglu and Lan, Huiying and Mitra, Tulika},
  booktitle = {Proceedings of the 31st ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 1},
  pages = {1--19},
  year = {2026},
}

2025

arXiv25

TL: Automatic End-to-End Compiler of Tile-Based Languages for Spatial Dataflow Architectures

Wei Li, Zhenyu Bai, Heru Wang, and 6 more authors

arXiv preprint arXiv:2512.22168, 2025

Bib

@article{li2025tl,
  title = {TL: Automatic End-to-End Compiler of Tile-Based Languages for Spatial Dataflow Architectures},
  author = {Li, Wei and Bai, Zhenyu and Wang, Heru and Dangi, Pranav and Zhang, Zhiqiang and Tan, Cheng and Lan, Huiying and Wong, Weng-Fai and Mitra, Tulika},
  journal = {arXiv preprint arXiv:2512.22168},
  year = {2025},
}

ASPLOS25

Enhancing CGRA efficiency through aligned compute and communication provisioning

Zhaoying Li, Pranav Dangi, Chenyang Yin, and 5 more authors

In Proceedings of the 30th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 1, 2025
arXiv25

Data-aware Dynamic Execution of Irregular Workloads on Heterogeneous Systems

Zhenyu Bai, Dan Wu, Pranav Dangi, and 3 more authors

arXiv preprint arXiv:2502.06304, 2025
arXiv25

TerEffic: Highly Efficient Ternary LLM Inference on FPGA

Chenyang Yin, Zhenyu Bai, Pranav Venkatram, and 3 more authors

arXiv preprint arXiv:2502.16473, 2025

2024

HPCA24

Asadi: Accelerating sparse attention using diagonal-based in-situ computing

Huize Li, Zhaoying Li, Zhenyu Bai, and 1 more author

In 2024 IEEE International Symposium on High-Performance Computer Architecture (HPCA), 2024

Bib

@inproceedings{li2024asadi,
  title = {Asadi: Accelerating sparse attention using diagonal-based in-situ computing},
  author = {Li, Huize and Li, Zhaoying and Bai, Zhenyu and Mitra, Tulika},
  booktitle = {2024 IEEE International Symposium on High-Performance Computer Architecture (HPCA)},
  pages = {774--787},
  year = {2024},
  organization = {IEEE},
}

DAC24

Swat: Scalable and efficient window attention-based transformers acceleration on fpgas

Zhenyu Bai, Pranav Dangi, Huize Li, and 1 more author

In Proceedings of the 61st ACM/IEEE Design Automation Conference, 2024

Bib

@inproceedings{bai2024swat,
  title = {Swat: Scalable and efficient window attention-based transformers acceleration on fpgas},
  author = {Bai, Zhenyu and Dangi, Pranav and Li, Huize and Mitra, Tulika},
  booktitle = {Proceedings of the 61st ACM/IEEE Design Automation Conference},
  pages = {1--6},
  year = {2024},
}

arXiv24

SparrowSNN: A Hardware/software Co-design for Energy Efficient ECG Classification

Zhanglu Yan, Zhenyu Bai, Tulika Mitra, and 1 more author

arXiv preprint arXiv:2406.06543, 2024

Bib

@article{yan2024sparrowsnn,
  title = {SparrowSNN: A Hardware/software Co-design for Energy Efficient ECG Classification},
  author = {Yan, Zhanglu and Bai, Zhenyu and Mitra, Tulika and Wong, Weng-Fai},
  journal = {arXiv preprint arXiv:2406.06543},
  year = {2024},
}

PACT24

Zed: A generalized accelerator for variably sparse matrix computations in ml

Pranav Dangi, Zhenyu Bai, Rohan Juneja, and 2 more authors

In Proceedings of the 2024 International Conference on Parallel Architectures and Compilation Techniques, 2024

Bib

@inproceedings{dangi2024zed,
  title = {Zed: A generalized accelerator for variably sparse matrix computations in ml},
  author = {Dangi, Pranav and Bai, Zhenyu and Juneja, Rohan and Wijerathne, Dhananjaya and Mitra, Tulika},
  booktitle = {Proceedings of the 2024 International Conference on Parallel Architectures and Compilation Techniques},
  pages = {246--257},
  year = {2024},
}

arXiv24

Reconsidering the energy efficiency of spiking neural networks

Zhanglu Yan, Zhenyu Bai, and Weng-Fai Wong

arXiv preprint arXiv:2409.08290, 2024

Bib

@article{yan2024reconsidering,
  title = {Reconsidering the energy efficiency of spiking neural networks},
  author = {Yan, Zhanglu and Bai, Zhenyu and Wong, Weng-Fai},
  journal = {arXiv preprint arXiv:2409.08290},
  year = {2024},
}

2023

TCAD23

Computing execution times with execution decision diagrams in the presence of out-of-order resources

Zhenyu Bai, Hugues Cassé, Thomas Carle, and 1 more author

IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems, 2023
PHD23

Modélisation du comportement temporel du pipeline pour le calcul de WCET

Zhenyu Bai

Université Paul Sabatier-Toulouse III, 2023

2022

TECS22

A framework for calculating WCET based on execution decision diagrams

Zhenyu Bai, Hugues Cassé, Marianne De Michiel, and 2 more authors

ACM Transactions on Embedded Computing Systems (TECS), 2022

2021

COMPAS21

Déterminer le WCET d’applications temps-réel en présence de latences d’exécution variables

Zhenyu Bai, Hugues Cassé, Marianne Michiel, and 2 more authors

In Conférence francophone d’informatique en Parallélisme, Architecture et Système (COMPAS 2021), 2021

2020

LCTES20

Improving the performance of WCET analysis in the presence of variable latencies

Zhenyu Bai, Hugues Cassé, Marianne De Michiel, and 2 more authors

In The 21st ACM SIGPLAN/SIGBED Conference on Languages, Compilers, and Tools for Embedded Systems, 2020

2019

JRWRTC19

Plru cache analysis

Zhenyu Bai, David Monniaux, and C Maıza

In 13th Junior Researcher Workshop on Real-Time Computing (JRWRTC19, 2019