@@ -213,8 +213,7 @@ pub trait OsStrExt: private::Sealed {
213213
214214impl OsStrExt for OsStr {
215215 fn try_str ( & self ) -> Result < & str , std:: str:: Utf8Error > {
216- // SAFETY: Only interacting with `OsStr` as `&str
217- let bytes = unsafe { to_bytes ( self ) } ;
216+ let bytes = to_bytes ( self ) ;
218217 std:: str:: from_utf8 ( bytes)
219218 }
220219
@@ -223,23 +222,22 @@ impl OsStrExt for OsStr {
223222 }
224223
225224 fn find ( & self , needle : & str ) -> Option < usize > {
226- // SAFETY: Only interacting with `OsStr` as `&str
227- let bytes = unsafe { to_bytes ( self ) } ;
225+ let bytes = to_bytes ( self ) ;
228226 ( 0 ..=self . len ( ) . checked_sub ( needle. len ( ) ) ?)
229227 . find ( |& x| bytes[ x..] . starts_with ( needle. as_bytes ( ) ) )
230228 }
231229
232230 fn strip_prefix ( & self , prefix : & str ) -> Option < & OsStr > {
233- // SAFETY: Only interacting with `OsStr` as `&str
234- let bytes = unsafe { to_bytes ( self ) } ;
231+ let bytes = to_bytes ( self ) ;
235232 bytes. strip_prefix ( prefix. as_bytes ( ) ) . map ( |s| {
236- // SAFETY: Only interacting with `OsStr` as `&str
237- unsafe { to_os_str ( s) }
233+ // SAFETY:
234+ // - This came from `to_bytes`
235+ // - Since `prefix` is `&str`, any split will be along UTF-8 boundarie
236+ unsafe { to_os_str_unchecked ( s) }
238237 } )
239238 }
240239 fn starts_with ( & self , prefix : & str ) -> bool {
241- // SAFETY: Only interacting with `OsStr` as `&str
242- let bytes = unsafe { to_bytes ( self ) } ;
240+ let bytes = to_bytes ( self ) ;
243241 bytes. starts_with ( prefix. as_bytes ( ) )
244242 }
245243
@@ -252,24 +250,24 @@ impl OsStrExt for OsStr {
252250 }
253251
254252 fn split_at ( & self , index : usize ) -> ( & OsStr , & OsStr ) {
255- // BUG: This is unsafe and has been deprecated
253+ let bytes = to_bytes ( self ) ;
256254 unsafe {
257- let bytes = to_bytes ( self ) ;
255+ // BUG: This is unsafe and has been deprecated
258256 let ( first, second) = bytes. split_at ( index) ;
259- ( to_os_str ( first) , to_os_str ( second) )
257+ ( to_os_str_unchecked ( first) , to_os_str_unchecked ( second) )
260258 }
261259 }
262260
263261 fn split_once ( & self , needle : & ' _ str ) -> Option < ( & OsStr , & OsStr ) > {
264262 let start = self . find ( needle) ?;
265263 let end = start + needle. len ( ) ;
266- // SAFETY: Only interacting with `OsStr` as `&str
267- unsafe {
268- let haystack = to_bytes ( self ) ;
269- let first = & haystack [ 0 ..start ] ;
270- let second = & haystack [ end.. ] ;
271- Some ( ( to_os_str ( first ) , to_os_str ( second ) ) )
272- }
264+ let haystack = to_bytes ( self ) ;
265+ let first = & haystack [ 0 ..start ] ;
266+ let second = & haystack [ end.. ] ;
267+ // SAFETY:
268+ // - This came from `to_bytes`
269+ // - Since `needle` is `&str`, any split will be along UTF-8 boundarie
270+ unsafe { Some ( ( to_os_str_unchecked ( first ) , to_os_str_unchecked ( second ) ) ) }
273271 }
274272}
275273
@@ -281,12 +279,14 @@ mod private {
281279
282280/// Allow access to raw bytes
283281///
284- /// # Safety
282+ /// As the non-UTF8 encoding is not defined, the bytes only make sense when compared with
283+ /// 7-bit ASCII or `&str`
285284///
286- /// - The bytes only make sense when compared with ASCII or `&str`
287- /// - This must never be serialized as there is no guarantee at how invalid UTF-8 will be
288- /// encoded, even within the same version of this crate (since its dependent on rustc version)
289- unsafe fn to_bytes ( s : & OsStr ) -> & [ u8 ] {
285+ /// # Compatibility
286+ ///
287+ /// There is no guarantee how non-UTF8 bytes will be encoded, even within versions of this crate
288+ /// (since its dependent on rustc)
289+ fn to_bytes ( s : & OsStr ) -> & [ u8 ] {
290290 // SAFETY:
291291 // - Lifetimes are the same
292292 // - Types are compatible (`OsStr` is effectively a transparent wrapper for `[u8]`)
@@ -295,17 +295,16 @@ unsafe fn to_bytes(s: &OsStr) -> &[u8] {
295295 //
296296 // There is a proposal to support this natively (https://github.com/rust-lang/rust/pull/95290)
297297 // but its in limbo
298- std:: mem:: transmute ( s)
298+ unsafe { std:: mem:: transmute ( s) }
299299}
300300
301301/// Restore raw bytes as `OsStr`
302302///
303303/// # Safety
304304///
305- /// - The bytes only make sense when compared with ASCII or `&str`
306- /// - This must never be serialized as there is no guarantee at how invalid UTF-8 will be
307- /// encoded, even within the same version of this crate (since its dependent on rustc version)
308- unsafe fn to_os_str ( s : & [ u8 ] ) -> & OsStr {
305+ /// - `&[u8]` must either by a `&str` or originated with `to_bytes` within the same binary
306+ /// - Any splits of the original `&[u8]` must be done along UTF-8 boundaries
307+ unsafe fn to_os_str_unchecked ( s : & [ u8 ] ) -> & OsStr {
309308 // SAFETY:
310309 // - Lifetimes are the same
311310 // - Types are compatible (`OsStr` is effectively a transparent wrapper for `[u8]`)
@@ -351,5 +350,5 @@ impl<'s, 'n> Iterator for Split<'s, 'n> {
351350pub ( crate ) unsafe fn split_at ( os : & OsStr , index : usize ) -> ( & OsStr , & OsStr ) {
352351 let bytes = to_bytes ( os) ;
353352 let ( first, second) = bytes. split_at ( index) ;
354- ( to_os_str ( first) , to_os_str ( second) )
353+ ( to_os_str_unchecked ( first) , to_os_str_unchecked ( second) )
355354}
0 commit comments