{"payload":{"pageCount":2,"repositories":[{"type":"Public","name":"qserve","owner":"mit-han-lab","isFork":false,"description":"QServe: W4A8KV4 Quantization and System Co-design for Efficient LLM Serving","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":2,"issueCount":25,"starsCount":401,"forksCount":19,"license":"Apache License 2.0","participation":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,1,0,0,0,0,0,0,0,0,1,1,0,0,1,0,2,1,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-09-05T03:10:38.261Z"}},{"type":"Public","name":"proxylessnas","owner":"mit-han-lab","isFork":false,"description":"[ICLR 2019] ProxylessNAS: Direct Neural Architecture Search on Target Task and Hardware","allTopics":["acceleration","automl","specialization","efficient-model","on-device-ai","hardware-aware"],"primaryLanguage":{"name":"C++","color":"#f34b7d"},"pullRequestCount":2,"issueCount":0,"starsCount":1419,"forksCount":284,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-08-30T23:51:21.468Z"}},{"type":"Public","name":"spatten","owner":"mit-han-lab","isFork":false,"description":"[HPCA'21] SpAtten: Efficient Sparse Attention Architecture with Cascade Token and Head Pruning","allTopics":["rtl","attention","hardware-acceleration","spinalhdl","llm-inference"],"primaryLanguage":{"name":"Scala","color":"#c22d40"},"pullRequestCount":0,"issueCount":1,"starsCount":66,"forksCount":7,"license":"MIT License","participation":[0,0,0,0,0,8,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,5,0,0,0,0,0,0,0,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-08-27T19:21:28.876Z"}},{"type":"Public","name":"fastcomposer","owner":"mit-han-lab","isFork":false,"description":"[IJCV] FastComposer: Tuning-Free Multi-Subject Image Generation with Localized Attention","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":16,"starsCount":648,"forksCount":36,"license":"MIT License","participation":[0,0,0,0,1,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-08-21T18:59:24.846Z"}},{"type":"Public","name":"distrifuser","owner":"mit-han-lab","isFork":false,"description":"[CVPR 2024 Highlight] DistriFusion: Distributed Parallel Inference for High-Resolution Diffusion Models","allTopics":["acceleration","parallelism","generative-model","diffusion-models","generative-ai"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":8,"starsCount":556,"forksCount":21,"license":"MIT License","participation":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,3,0,0,0,2,0,0,3,2,2,0,0,0,1,0,0,2,1,3,0,0,0,0,0,2,0,0,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-08-17T08:59:26.304Z"}},{"type":"Public","name":"efficientvit","owner":"mit-han-lab","isFork":false,"description":"EfficientViT is a new family of vision models for efficient high-resolution vision.","allTopics":["imagenet","segmentation","high-resolution","vision-transformer","efficientvit","segment-anything"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":3,"issueCount":88,"starsCount":1770,"forksCount":162,"license":"Apache License 2.0","participation":[10,2,0,0,1,2,0,0,2,0,0,0,0,1,0,0,1,0,0,3,5,2,0,2,7,5,0,3,2,0,1,4,3,1,2,0,1,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-08-09T09:15:07.777Z"}},{"type":"Public","name":"torchsparse","owner":"mit-han-lab","isFork":false,"description":"[MICRO'23, MLSys'22] TorchSparse: Efficient Training and Inference Framework for Sparse Convolution on GPUs.","allTopics":["acceleration","pytorch"],"primaryLanguage":{"name":"Cuda","color":"#3A4E3A"},"pullRequestCount":1,"issueCount":21,"starsCount":1189,"forksCount":137,"license":"MIT License","participation":[0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-07-31T15:15:14.561Z"}},{"type":"Public archive","name":"bevfusion","owner":"mit-han-lab","isFork":false,"description":"[ICRA'23] BEVFusion: Multi-Task Multi-Sensor Fusion with Unified Bird's-Eye View Representation","allTopics":["camera","pytorch","lidar","object-detection","sensor-fusion","semantic-segmentation","3d-perception"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":2243,"forksCount":407,"license":"Apache License 2.0","participation":[0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,12,0,1,1,0,0,0,0,0,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-07-31T04:04:52.791Z"}},{"type":"Public","name":"torchquantum","owner":"mit-han-lab","isFork":false,"description":"A PyTorch-based framework for Quantum Classical Simulation, Quantum Machine Learning, Quantum Neural Networks, Parameterized Quantum Circuits with support for easy deployments on real quantum computers.","allTopics":["machine-learning","system","deep-learning","neural-network","quantum","pytorch","quantum-computing","quantum-machine-learning","quantum-simulation","ml-for-systems","pytorch-quantum","quantum-neural-network","parameterized-quantum-circuit"],"primaryLanguage":{"name":"Jupyter Notebook","color":"#DA5B0B"},"pullRequestCount":7,"issueCount":57,"starsCount":1288,"forksCount":193,"license":"MIT License","participation":[0,1,0,12,6,1,9,3,4,21,9,2,0,13,2,1,1,14,6,1,2,24,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-07-21T06:34:36.432Z"}},{"type":"Public","name":"llm-awq","owner":"mit-han-lab","isFork":false,"description":"[MLSys 2024 Best Paper Award] AWQ: Activation-aware Weight Quantization for LLM Compression and Acceleration","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":8,"issueCount":120,"starsCount":2340,"forksCount":177,"license":"MIT License","participation":[3,1,0,1,6,9,2,0,0,5,0,0,0,0,0,0,0,0,0,0,0,2,9,1,2,0,0,1,0,0,0,2,2,1,0,0,1,0,0,0,0,3,1,0,0,0,0,0,0,0,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-07-16T00:50:32.032Z"}},{"type":"Public","name":"hardware-aware-transformers","owner":"mit-han-lab","isFork":false,"description":"[ACL'20] HAT: Hardware-Aware Transformers for Efficient Natural Language Processing","allTopics":["natural-language-processing","machine-translation","transformer","specialization","efficient-model","hardware-aware"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":3,"starsCount":325,"forksCount":50,"license":"Other","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-07-14T04:10:59.889Z"}},{"type":"Public","name":"smoothquant","owner":"mit-han-lab","isFork":false,"description":"[ICML 2023] SmoothQuant: Accurate and Efficient Post-Training Quantization for Large Language Models","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":1,"issueCount":62,"starsCount":1183,"forksCount":135,"license":"MIT License","participation":[0,0,0,3,1,0,0,0,0,9,0,0,0,0,0,0,0,0,0,0,0,1,6,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,2,1,0,0,0,0,0,0,0,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-07-12T03:11:08.771Z"}},{"type":"Public archive","name":"spvnas","owner":"mit-han-lab","isFork":false,"description":"[ECCV 2020] Searching Efficient 3D Architectures with Sparse Point-Voxel Convolution","allTopics":["computer-vision","deep-learning","efficiency","pytorch","lidar","architecture-search","point-cloud","3d-deep-learning","semantickitti"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":582,"forksCount":109,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-07-11T21:13:56.527Z"}},{"type":"Public archive","name":"lite-transformer","owner":"mit-han-lab","isFork":false,"description":"[ICLR 2020] Lite Transformer with Long-Short Range Attention","allTopics":["nlp","pytorch","transformer"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":596,"forksCount":81,"license":"Other","participation":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-07-11T20:50:46.616Z"}},{"type":"Public","name":"temporal-shift-module","owner":"mit-han-lab","isFork":false,"description":"[ICCV 2019] TSM: Temporal Shift Module for Efficient Video Understanding","allTopics":["acceleration","low-latency","video-understanding","efficient-model","temporal-modeling","tsm","nvidia-jetson-nano"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":6,"issueCount":93,"starsCount":2051,"forksCount":418,"license":"MIT License","participation":[0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-07-11T18:54:08.654Z"}},{"type":"Public","name":"streaming-llm","owner":"mit-han-lab","isFork":false,"description":"[ICLR 2024] Efficient Streaming Language Models with Attention Sinks","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":2,"issueCount":39,"starsCount":6550,"forksCount":364,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-07-11T08:14:43.421Z"}},{"type":"Public","name":"tinychat-tutorial","owner":"mit-han-lab","isFork":false,"description":"","allTopics":[],"primaryLanguage":{"name":"C++","color":"#f34b7d"},"pullRequestCount":1,"issueCount":3,"starsCount":37,"forksCount":13,"license":null,"participation":[0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-07-10T00:26:37.441Z"}},{"type":"Public","name":"tinyengine","owner":"mit-han-lab","isFork":false,"description":"[NeurIPS 2020] MCUNet: Tiny Deep Learning on IoT Devices; [NeurIPS 2021] MCUNetV2: Memory-Efficient Patch-based Inference for Tiny Deep Learning; [NeurIPS 2022] MCUNetV3: On-Device Training Under 256KB Memory","allTopics":["c","microcontroller","cpp","pytorch","codegenerator","tinyml","deep-learning","quantization","edge-computing","neural-architecture-search"],"primaryLanguage":{"name":"C","color":"#555555"},"pullRequestCount":1,"issueCount":33,"starsCount":786,"forksCount":130,"license":"MIT License","participation":[0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-07-08T08:38:03.452Z"}},{"type":"Public","name":"TinyChatEngine","owner":"mit-han-lab","isFork":false,"description":"TinyChatEngine: On-Device LLM Inference Library","allTopics":["c","arm","deep-learning","cpp","x86-64","quantization","edge-computing","cuda-programming","on-device-ai","large-language-models"],"primaryLanguage":{"name":"C++","color":"#f34b7d"},"pullRequestCount":3,"issueCount":31,"starsCount":701,"forksCount":68,"license":"MIT License","participation":[0,3,3,0,5,1,1,0,1,1,0,0,0,0,0,1,0,3,0,3,0,1,3,3,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,0,2,0,0,0,0,0,0,0,0,0,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-07-04T04:20:45.384Z"}},{"type":"Public","name":"Quest","owner":"mit-han-lab","isFork":false,"description":"[ICML 2024] Quest: Query-Aware Sparsity for Efficient Long-Context LLM Inference","allTopics":[],"primaryLanguage":{"name":"Cuda","color":"#3A4E3A"},"pullRequestCount":0,"issueCount":4,"starsCount":162,"forksCount":9,"license":null,"participation":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0,3,0,0,0,0,0,0,0,0,0,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-07-03T06:13:54.496Z"}},{"type":"Public","name":"lmquant","owner":"mit-han-lab","isFork":false,"description":"","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":11,"starsCount":102,"forksCount":6,"license":"Apache License 2.0","participation":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-06-12T00:29:07.090Z"}},{"type":"Public","name":"litepose","owner":"mit-han-lab","isFork":false,"description":"[CVPR'22] Lite Pose: Efficient Architecture Design for 2D Human Pose Estimation","allTopics":["pose-estimation","efficient-models","litepose"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":1,"issueCount":19,"starsCount":304,"forksCount":37,"license":"MIT License","participation":[0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-06-05T22:04:06.770Z"}},{"type":"Public","name":"gan-compression","owner":"mit-han-lab","isFork":false,"description":"[CVPR 2020] GAN Compression: Efficient Architectures for Interactive Conditional GANs","allTopics":["compression","pytorch","gans","pix2pix","cyclegan","image-to-image-translation","conditional-gans","gaugan"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":6,"issueCount":3,"starsCount":1103,"forksCount":148,"license":"Other","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-06-05T21:59:17.248Z"}},{"type":"Public","name":"patch_conv","owner":"mit-han-lab","isFork":false,"description":"Patch convolution to avoid large GPU memory usage of Conv2D","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":1,"issueCount":1,"starsCount":73,"forksCount":5,"license":"MIT License","participation":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,2,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-05-26T05:14:52.126Z"}},{"type":"Public","name":"sparsevit","owner":"mit-han-lab","isFork":false,"description":"[CVPR'23] SparseViT: Revisiting Activation Sparsity for Efficient High-Resolution Vision Transformer","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":2,"starsCount":59,"forksCount":5,"license":"Apache License 2.0","participation":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-04-24T01:30:59.362Z"}},{"type":"Public","name":"mcunet","owner":"mit-han-lab","isFork":false,"description":"[NeurIPS 2020] MCUNet: Tiny Deep Learning on IoT Devices; [NeurIPS 2021] MCUNetV2: Memory-Efficient Patch-based Inference for Tiny Deep Learning","allTopics":["deep-learning","pytorch","neural-architecture-search","tinyml","microncontroller"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":2,"issueCount":21,"starsCount":454,"forksCount":81,"license":"MIT License","participation":[1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-03-29T18:17:37.080Z"}},{"type":"Public","name":"tiny-training","owner":"mit-han-lab","isFork":false,"description":"On-Device Training Under 256KB Memory [NeurIPS'22]","allTopics":["edge-ai","on-device-training","learning-on-the-edge"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":7,"starsCount":428,"forksCount":59,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-03-29T18:15:19.456Z"}},{"type":"Public","name":"once-for-all","owner":"mit-han-lab","isFork":false,"description":"[ICLR 2020] Once for All: Train One Network and Specialize it for Efficient Deployment","allTopics":["acceleration","nas","automl","edge-ai","efficient-model","tinyml"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":6,"issueCount":53,"starsCount":1864,"forksCount":334,"license":"MIT License","participation":[3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-12-14T04:44:52.937Z"}},{"type":"Public","name":"tinyml","owner":"mit-han-lab","isFork":false,"description":"","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":1,"issueCount":6,"starsCount":747,"forksCount":136,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-11-29T04:22:13.211Z"}},{"type":"Public","name":"offsite-tuning","owner":"mit-han-lab","isFork":false,"description":"Offsite-Tuning: Transfer Learning without Full Model","allTopics":["deep-learning","transfer-learning"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":1,"issueCount":5,"starsCount":365,"forksCount":37,"license":"MIT License","participation":[0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-11-27T17:39:38.113Z"}}],"repositoryCount":51,"userInfo":null,"searchable":true,"definitions":[],"typeFilters":[{"id":"all","text":"All"},{"id":"public","text":"Public"},{"id":"source","text":"Sources"},{"id":"fork","text":"Forks"},{"id":"archived","text":"Archived"},{"id":"template","text":"Templates"}],"compactMode":false},"title":"mit-han-lab repositories"}