-
Notifications
You must be signed in to change notification settings - Fork 81
Expand file tree
/
Copy pathlru_cache.cpp
More file actions
140 lines (120 loc) · 3.71 KB
/
lru_cache.cpp
File metadata and controls
140 lines (120 loc) · 3.71 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
// clang-format off
/*
* SPDX-FileCopyrightText: Copyright (c) 2025-present NVIDIA CORPORATION & AFFILIATES.
* All rights reserved.
* SPDX-License-Identifier: BSD-3-Clause
*/
// clang-format on
#include <bindings.h>
#include <lru_cache.h>
namespace nvfuser::python {
FusionExecutorCache* LRUCache::cacheCompile(
std::unique_ptr<Fusion> unique_fusion) {
std::lock_guard<std::mutex> guard(lru_mutex_);
Fusion* fusion = unique_fusion.get();
auto it = items_map.find(fusion);
num_cache_lookups_++;
// short-circuit: Fusion already exists; Get FusionExecutorCache
if (it != items_map.end()) {
num_cache_hits_++;
it->second->visits++;
// Move the item to the front of the list (most recently used)
items_list.splice(items_list.begin(), items_list, it->second);
return it->second->executor_cache.get();
}
// The fusion is new, check for capacity and evict LRU if necessary.
if (items_map.size() == max_fusions_) {
// Evict the least recently used item (the one at the back)
Fusion* lru_key = items_list.back().fusion;
items_list.pop_back();
items_map.erase(lru_key);
}
// Insert the new item at the front of the list
// If fusion does not exist in cache, move it to new FusionExecutorCache.
items_list.push_front(
{fusion,
std::make_unique<FusionExecutorCache>(
std::move(unique_fusion),
/*fusion_id=*/numFusionsCompiled()),
/*visits=*/0});
// Store the iterator to the new item in the map
items_map.emplace(fusion, items_list.begin());
return items_list.front().executor_cache.get();
}
std::string LRUCache::stats() const {
std::lock_guard<std::mutex> guard(lru_mutex_);
std::stringstream ss;
ss << "Max Fusions Allowed: " << max_fusions_ << "\n";
// short-circuit: It is unnecessary to print stats if the cache is empty.
if (items_list.empty()) {
ss << "The fusion cache is empty.\n";
return ss.str();
}
ss << "Total Fusions in Cache: " << items_list.size() << "\n";
ss << "Total Unique Fusions Compiled: " << numFusionsCompiled() << "\n";
ss << "Cache Hits by LRU ordering:\n";
for (const auto&& [index, item] : enumerate(items_list)) {
ss << "\t" << index << " -> " << item.visits << " hits\n";
}
float hit_rate = (num_cache_lookups_ == 0)
? 0.0f
: static_cast<float>(num_cache_hits_) /
static_cast<float>(num_cache_lookups_) * 100.0;
ss << "Cache Lookups: " << num_cache_lookups_ << "\n";
ss << "Cache Hits: " << num_cache_hits_ << "\n";
ss << "Hit Rate: " << hit_rate << "%" << "\n";
return ss.str();
}
void bindLRUCache(py::module_& nvfuser) {
py::class_<LRUCache>(nvfuser, "LRUCache")
.def(
py::init<size_t>(),
py::arg("max_fusions"),
R"(
Create a new LRUCache.
Parameters
----------
max_fusions : int
The maximum number of fusions to cache.
)")
.def(
"cache_compile",
&LRUCache::cacheCompile,
py::arg("fusion"),
R"(
Compile a fusion and its executor cache into the cache.
If the fusion is already in the cache, it will be moved to the front of the
cache.
If the cache is full, the least recently used fusion will be evicted.
Parameters
----------
fusion : Fusion
The fusion to cache.
Returns
------
FusionExecutorCache
The executor cache for the fusion.
)",
py::return_value_policy::reference)
.def(
"stats",
&LRUCache::stats,
R"(
Get stats about the LRU cache.
Returns
------
str
The stats about the LRU cache.
)")
.def(
"num_fusions",
&LRUCache::numFusions,
R"(
Get the number of fusions in the LRU cache.
Returns
------
int
The number of fusions in the LRU cache.
)");
}
} // namespace nvfuser::python