Skip to content

Commit 2c19acc

Browse files
authored
Merge pull request #4803 from epage/osstr
fix(lex): Clarify unsafe safety
2 parents 5b101eb + 73e4025 commit 2c19acc

File tree

1 file changed

+30
-31
lines changed

1 file changed

+30
-31
lines changed

clap_lex/src/ext.rs

Lines changed: 30 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -213,8 +213,7 @@ pub trait OsStrExt: private::Sealed {
213213

214214
impl OsStrExt for OsStr {
215215
fn try_str(&self) -> Result<&str, std::str::Utf8Error> {
216-
// SAFETY: Only interacting with `OsStr` as `&str
217-
let bytes = unsafe { to_bytes(self) };
216+
let bytes = to_bytes(self);
218217
std::str::from_utf8(bytes)
219218
}
220219

@@ -223,23 +222,22 @@ impl OsStrExt for OsStr {
223222
}
224223

225224
fn find(&self, needle: &str) -> Option<usize> {
226-
// SAFETY: Only interacting with `OsStr` as `&str
227-
let bytes = unsafe { to_bytes(self) };
225+
let bytes = to_bytes(self);
228226
(0..=self.len().checked_sub(needle.len())?)
229227
.find(|&x| bytes[x..].starts_with(needle.as_bytes()))
230228
}
231229

232230
fn strip_prefix(&self, prefix: &str) -> Option<&OsStr> {
233-
// SAFETY: Only interacting with `OsStr` as `&str
234-
let bytes = unsafe { to_bytes(self) };
231+
let bytes = to_bytes(self);
235232
bytes.strip_prefix(prefix.as_bytes()).map(|s| {
236-
// SAFETY: Only interacting with `OsStr` as `&str
237-
unsafe { to_os_str(s) }
233+
// SAFETY:
234+
// - This came from `to_bytes`
235+
// - Since `prefix` is `&str`, any split will be along UTF-8 boundarie
236+
unsafe { to_os_str_unchecked(s) }
238237
})
239238
}
240239
fn starts_with(&self, prefix: &str) -> bool {
241-
// SAFETY: Only interacting with `OsStr` as `&str
242-
let bytes = unsafe { to_bytes(self) };
240+
let bytes = to_bytes(self);
243241
bytes.starts_with(prefix.as_bytes())
244242
}
245243

@@ -252,24 +250,24 @@ impl OsStrExt for OsStr {
252250
}
253251

254252
fn split_at(&self, index: usize) -> (&OsStr, &OsStr) {
255-
// BUG: This is unsafe and has been deprecated
253+
let bytes = to_bytes(self);
256254
unsafe {
257-
let bytes = to_bytes(self);
255+
// BUG: This is unsafe and has been deprecated
258256
let (first, second) = bytes.split_at(index);
259-
(to_os_str(first), to_os_str(second))
257+
(to_os_str_unchecked(first), to_os_str_unchecked(second))
260258
}
261259
}
262260

263261
fn split_once(&self, needle: &'_ str) -> Option<(&OsStr, &OsStr)> {
264262
let start = self.find(needle)?;
265263
let end = start + needle.len();
266-
// SAFETY: Only interacting with `OsStr` as `&str
267-
unsafe {
268-
let haystack = to_bytes(self);
269-
let first = &haystack[0..start];
270-
let second = &haystack[end..];
271-
Some((to_os_str(first), to_os_str(second)))
272-
}
264+
let haystack = to_bytes(self);
265+
let first = &haystack[0..start];
266+
let second = &haystack[end..];
267+
// SAFETY:
268+
// - This came from `to_bytes`
269+
// - Since `needle` is `&str`, any split will be along UTF-8 boundarie
270+
unsafe { Some((to_os_str_unchecked(first), to_os_str_unchecked(second))) }
273271
}
274272
}
275273

@@ -281,12 +279,14 @@ mod private {
281279

282280
/// Allow access to raw bytes
283281
///
284-
/// # Safety
282+
/// As the non-UTF8 encoding is not defined, the bytes only make sense when compared with
283+
/// 7-bit ASCII or `&str`
285284
///
286-
/// - The bytes only make sense when compared with ASCII or `&str`
287-
/// - This must never be serialized as there is no guarantee at how invalid UTF-8 will be
288-
/// encoded, even within the same version of this crate (since its dependent on rustc version)
289-
unsafe fn to_bytes(s: &OsStr) -> &[u8] {
285+
/// # Compatibility
286+
///
287+
/// There is no guarantee how non-UTF8 bytes will be encoded, even within versions of this crate
288+
/// (since its dependent on rustc)
289+
fn to_bytes(s: &OsStr) -> &[u8] {
290290
// SAFETY:
291291
// - Lifetimes are the same
292292
// - Types are compatible (`OsStr` is effectively a transparent wrapper for `[u8]`)
@@ -295,17 +295,16 @@ unsafe fn to_bytes(s: &OsStr) -> &[u8] {
295295
//
296296
// There is a proposal to support this natively (https://github.com/rust-lang/rust/pull/95290)
297297
// but its in limbo
298-
std::mem::transmute(s)
298+
unsafe { std::mem::transmute(s) }
299299
}
300300

301301
/// Restore raw bytes as `OsStr`
302302
///
303303
/// # Safety
304304
///
305-
/// - The bytes only make sense when compared with ASCII or `&str`
306-
/// - This must never be serialized as there is no guarantee at how invalid UTF-8 will be
307-
/// encoded, even within the same version of this crate (since its dependent on rustc version)
308-
unsafe fn to_os_str(s: &[u8]) -> &OsStr {
305+
/// - `&[u8]` must either by a `&str` or originated with `to_bytes` within the same binary
306+
/// - Any splits of the original `&[u8]` must be done along UTF-8 boundaries
307+
unsafe fn to_os_str_unchecked(s: &[u8]) -> &OsStr {
309308
// SAFETY:
310309
// - Lifetimes are the same
311310
// - Types are compatible (`OsStr` is effectively a transparent wrapper for `[u8]`)
@@ -351,5 +350,5 @@ impl<'s, 'n> Iterator for Split<'s, 'n> {
351350
pub(crate) unsafe fn split_at(os: &OsStr, index: usize) -> (&OsStr, &OsStr) {
352351
let bytes = to_bytes(os);
353352
let (first, second) = bytes.split_at(index);
354-
(to_os_str(first), to_os_str(second))
353+
(to_os_str_unchecked(first), to_os_str_unchecked(second))
355354
}

0 commit comments

Comments
 (0)