#thread local spontaneously clears itself in dylib (probably UB or FFI-related)

99 messages · Page 1 of 1 (latest)

still chasm
#

The following is a rough outline of my code on the rust side. These two functions are the only two that contain any references to CTX

#[repr(transparent)]
#[derive(Clone, Copy)]
pub struct CtxPtr(NonNull<OxideGLContext>>);

//Thread local
thread_local! {
    pub static CTX: Cell<Option<CtxPtr>> = const {Cell::new(None)};
}
// Called from C
#[no_mangle]
extern "C" fn oxidegl_set_current_context(ctx: Option<CtxPtr>) {
    println!(
        "stace trace in set_context: {}",
        std::backtrace::Backtrace::force_capture().to_string()
    );
    CTX.set(ctx);
    // "set context Some(0xblahblah)"
    println!("set context {:?}", ctx);
}
// Called from Rust called by C
fn get_state() -> CtxPtr {
    println!(
        "\n\nstack trace from get_state: {}",
        std::backtrace::Backtrace::force_capture().to_string()
    );
    let c = CTX.get();
    println!("got context {:?}", &c);
    //panic: tried to unwrap None value
    c.unwrap()
}
#[no_mangle]
extern "C" fn thing_that_uses_state() {
  do_something(get_state());
}

This library is linked to a C binary that calls the functions in this pattern. I've confirmed that all of this is happening on the same thread

// create_context returns valid CtxPtr
void* ptr = oxidegl_create_context([irrelevant args]);
oxidegl_set_current_context(ptr);
// Causes panic in get_state, CTX is None????
thing_that_uses_state();

Any insight into this would be appreciated, I'm completely stumped

#

related question: is it sound to represent a C void* T as Option<NonNull<T>>

paper axle
still chasm
#

the output looks like
[blah blah stacktrace]
set context Some(0xwhatever)
[blah blah stacktrace]
got context None

#

ill paste after I wake up my main machine

#

in bed on laptop

#

hmm this is kinda sus

#

when I use a rust bin target to process spawn the binary that has the issue theres a segfault in some glfw code

#

when I run it from terminal there's a panic on None

#

bruh

#

what

#

wait there is no way

#

segfault in makeContextCurrentOxideGL: (running via rust process::spawn() + cargo bin target)

Thread 0 Crashed::  Dispatch queue: com.apple.main-thread
0   libglfw.3.4.dylib                        0x101613870 makeContextCurrentOxideGL + 16
1   libglfw.3.4.dylib                        0x1015f4c02 glfwMakeContextCurrent + 194
2   libglfw.3.4.dylib                        0x1015f4af4 _glfwRefreshContextAttribs + 1892
3   libglfw.3.4.dylib                        0x10160f368 _glfwCreateWindowCocoa + 232
4   libglfw.3.4.dylib                        0x1015fef6a glfwCreateWindow + 1002
5   triangle-opengl                          0x10148b071 main + 129
6   dyld                                     0x107cb852e start + 462
#

rust panic (aborts because caller is C) -- running directly from terminal

Thread 0 Crashed::  Dispatch queue: com.apple.main-thread
0   libsystem_kernel.dylib                0x7ff81b5f0ffe __pthread_kill + 10
1   libsystem_pthread.dylib               0x7ff81b6271ff pthread_kill + 263
2   libsystem_c.dylib                     0x7ff81b572d14 abort + 123
3   liboxidegl.dylib                         0x115a9d079 std::sys::unix::abort_internal::h6cf38b02528bc80a + 9
4   liboxidegl.dylib                         0x115a9b41d rust_panic + 109
5   liboxidegl.dylib                         0x115a9b2e8 std::panicking::rust_panic_with_hook::h28420d44f043d3a5 + 632
6   liboxidegl.dylib                         0x115a9b00c std::panicking::begin_panic_handler::_$u7b$$u7b$closure$u7d$$u7d$::h6f886c0e89185cdc + 108
7   liboxidegl.dylib                         0x115a9a239 std::sys_common::backtrace::__rust_end_short_backtrace::h7ca2b2ff22d46410 + 9
8   liboxidegl.dylib                         0x115a9ad92 rust_begin_unwind + 66
9   liboxidegl.dylib                         0x115abc9f5 core::panicking::panic_fmt::h52dad7a658d9bf41 + 53
10  liboxidegl.dylib                         0x115abca87 core::panicking::panic::ha1c871ecc7d4b1cb + 71
11  liboxidegl.dylib                         0x115a53bae core::option::Option$LT$T$GT$::unwrap::h3d460fac59552063 + 62 (option.rs:931)
12  liboxidegl.dylib                         0x115a60add oxidegl::context::get_state::h02b3d65196d42592 + 317 (mod.rs:44)
13  liboxidegl.dylib                         0x115a45ae3 glGetString + 19 (gl_core.rs:209)
14  libglfw.3.4.dylib                        0x10adf64be _glfwRefreshContextAttribs + 302
15  libglfw.3.4.dylib                        0x10ae11368 _glfwCreateWindowCocoa + 232
16  libglfw.3.4.dylib                        0x10ae00f6a glfwCreateWindow + 1002
17  triangle-opengl                          0x10ac8d071 main + 129
18  dyld                                     0x117fc352e start + 462
#

wtf

#

hmm this is scuffed why are there two different crashes based on what I run it with lol

#

there could be UB/some weird FFI interaction afoot

#

wait hang on a second

#

lmfao am I doing a buffer overrun in my glGet code

#

the segfault is loading
0x00000000000002b8

#

lemme see if thats an ouput of any gets

#

or I guess get might not even be getting called

#

oh nevermind

still chasm
#

if I run it straight from the terminal it doesnt work??

#

huh

paper axle
#

yeah maybe you have some good old UB

still chasm
#

yeahh

#

it's probably UB if I can change the program behavior by running it with a different parent process lmao

#

the issue is, I don't even know where it's coming from

paper axle
#

not sure if there's anyway we can help without just browsing the full source code

still chasm
#

yeah

#

there are a few suspect places

#

CtxPtr implements DerefMut<Target = OxideGLContext> in a very unsound way, but multiple mutable references are never hopefully never created

paper axle
#

you say that so confidently when your pointer is static and copy lol

still chasm
#

I meant to say

#

that I hoped there werent any aliasing mutable references

#

because there are lmao

still chasm
still chasm
still chasm
#

meh I give up for now

#

I understand why people dislike opengl's ergonomics

#

being on the providing end of an uber stateful api is just as painful as having to use one

still chasm
#

@paper axle if you feel like reading some really scuffed shit I set the repo to public. I really need a cleaner/safer/saner way to store the context state in a thread-local way, but none of the stuff I tried worked
link here

#

there's a lot of unsafe scattered around for various reasons. The spicy stuff is mostly in context/mod.rs and metal_view.rs

paper axle
#

GlEnum looks very sketch

#

or to be more specific, the blanket impl below it

still chasm
#

I really should just make a derive macro for it

#

or even just a macro_rules one would probably cur down most of the boilerplate

paper axle
#

yeah, just a macro_rules would be good

still chasm
#

just pushed

#
macro_rules! impl_gl_enum {
    ($e:ident) => {
        impl From<$e> for OxideGLItemSingle {
            #[inline]
            fn from(val: $e) -> Self {
                const _: () = assert!(std::mem::align_of::<$e>() == std::mem::align_of::<u32>());
                const _: () = assert!(std::mem::size_of::<$e>() == std::mem::size_of::<u32>());
                let ret = unsafe { std::mem::transmute::<$e, u32>(val) }.into();
                ret
            }
        }
    };
}
#

I think the constant asserts are overkill because transmute throws a compile error anyways

paper axle
#

you don't need unsafe btw, just use as casting

still chasm
#

you can as cast repr(u32) enums?

paper axle
#

ye

#

pretty sure

still chasm
#

huh, did not know that

paper axle
#

you only need transmute for references

still chasm
#

or going from int -> enum

#

in which case its unsafe because you could violate a niche

paper axle
#

and you're not checking against niches anyway

still chasm
#

ye

#

I only care about enum -> int

paper axle
#

then just as is fine

still chasm
#

nice

macro_rules! impl_gl_enum {
    ($e:ident) => {
        impl From<$e> for OxideGLItemSingle {
            #[inline]
            fn from(val: $e) -> Self {
                (e as u32).into()
            }
        }
    };
}
#

I think theres still UB somewhere

#

this thread local is acting really weirdly

#

is it possible that GLFW and rust somehow alias a thread local storage entry

paper axle
#

btw I saw you basically wrote your own Mutex

#

maybe you should use existing synchonization primitives

still chasm
#

yeah that was jank

#

mostly just for testing

paper axle
#

ye i know

still chasm
#

once I fix stupid thread locals im gonna actually store the whole context in a thread local

#

not just a pointer to it

#

still not sure how multiple things are mutating it

still chasm
#

this is really odd

#

now there are no more raw pointers/jank

#

same bug

#

at this point this is the fourth separate attempt to make thread locals storing the context work

#

this feels like some wierd FFI interaction

paper axle
#

is the c code you're testing with aslso somewhere?

still chasm
#

yes

#

its oxidegl-glfw/examples/opengl-triangle

#

here's the outut when running from terminal

#

just realized I wasnt doing debug build so the c side didnt have symbols so fixed that at least

#

(the call to glGetIntegerv and the printf following I added for debugging purposes)

#
  previous = _glfwPlatformGetTls(&_glfw.contextSlot);

  glfwMakeContextCurrent((GLFWwindow *)window);
  uint flags;
  window->context.GetIntegerv =
      (PFNGLGETINTEGERVPROC)window->context.getProcAddress("glGetIntegerv");
  //Rust panics in this call
  window->context.GetIntegerv(GL_CONTEXT_FLAGS, &flags);
  printf("flags: %i", flags);

this is what C is doing around when rust says the TLS is empty
(github link to glfwRefreshContextAttribs)

#

note that glfwMakeContextCurrent sets the TLS

#

glfwMakeContextCurrent translates into this call

static void makeContextCurrentOxideGL(_GLFWwindow *window)
{
  //Set rust current context TLS
  oxidegl_set_current_context(window->context.oxidegl.ctx);
  // Set GLFW current context TLS
  _glfwPlatformSetTls(&_glfw.contextSlot, window);
}
#

btw, thanks for all the help lol

#

this isn't exactly your everyday "help my lifetimes arent compiling" issue

paper axle
#

all good, it's kinda fun. I'll have another look in a bit

still chasm
#

weird thread local behavior (probably UB or FFI-related)