src/runtime/mgc.go | 7 ++++--- src/runtime/mgcmark.go | 4 ++-- src/runtime/proc.go | 70 +++++++++++++++++++++++++++++++++-------------------- src/runtime/runtime2.go | 14 +++++++------- src/runtime/stack.go | 4 ++-- src/runtime/trace.go | 2 +- src/runtime/tracestatus.go | 9 +++++---- diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go index f72edc2afe95dab50ef4b4356965609db2309f3f..9cf0c901097b154d83f0c5d38c6d117afe483ade 100644 --- a/src/runtime/mgc.go +++ b/src/runtime/mgc.go @@ -1019,7 +1019,7 @@ curgp := mp.curg // N.B. The execution tracer is not aware of this status // transition and handles it specially based on the // wait reason. - casGToWaitingForGC(curgp, _Grunning, waitReasonGarbageCollection) + casGToWaitingForSuspendG(curgp, _Grunning, waitReasonGarbageCollection) // Run gc on the g0 stack. We do this so that the g stack // we're currently running on will no longer change. Cuts @@ -1471,7 +1471,8 @@ } systemstack(func() { // Mark our goroutine preemptible so its stack - // can be scanned. This lets two mark workers + // can be scanned or observed by the execution + // tracer. This, for example, lets two mark workers // scan each other (otherwise, they would // deadlock). We must not modify anything on // the G stack. However, stack shrinking is @@ -1481,7 +1482,7 @@ // // N.B. The execution tracer is not aware of this status // transition and handles it specially based on the // wait reason. - casGToWaitingForGC(gp, _Grunning, waitReasonGCWorkerActive) + casGToWaitingForSuspendG(gp, _Grunning, waitReasonGCWorkerActive) switch pp.gcMarkWorkerMode { default: throw("gcBgMarkWorker: unexpected gcMarkWorkerMode") diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go index 61e917df41089b19f6cb805e09670091da77dd5b..2563580e30f2b17906ff8107bd1a05a479b83876 100644 --- a/src/runtime/mgcmark.go +++ b/src/runtime/mgcmark.go @@ -217,7 +217,7 @@ // worker or we're in mark termination. userG := getg().m.curg selfScan := gp == userG && readgstatus(userG) == _Grunning if selfScan { - casGToWaitingForGC(userG, _Grunning, waitReasonGarbageCollectionScan) + casGToWaitingForSuspendG(userG, _Grunning, waitReasonGarbageCollectionScan) } // TODO: suspendG blocks (and spins) until gp @@ -645,7 +645,7 @@ throw("nwait > work.nprocs") } // gcDrainN requires the caller to be preemptible. - casGToWaitingForGC(gp, _Grunning, waitReasonGCAssistMarking) + casGToWaitingForSuspendG(gp, _Grunning, waitReasonGCAssistMarking) // drain own cached work first in the hopes that it // will be more cache friendly. diff --git a/src/runtime/proc.go b/src/runtime/proc.go index e3cdf71911be45d404ec04fd5c02408be4a94ecd..d922dd6193989f58950493f29f233571e4fe0be1 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -1282,13 +1282,13 @@ gp.waitreason = reason casgstatus(gp, old, _Gwaiting) } -// casGToWaitingForGC transitions gp from old to _Gwaiting, and sets the wait reason. -// The wait reason must be a valid isWaitingForGC wait reason. +// casGToWaitingForSuspendG transitions gp from old to _Gwaiting, and sets the wait reason. +// The wait reason must be a valid isWaitingForSuspendG wait reason. // // Use this over casgstatus when possible to ensure that a waitreason is set. -func casGToWaitingForGC(gp *g, old uint32, reason waitReason) { - if !reason.isWaitingForGC() { - throw("casGToWaitingForGC with non-isWaitingForGC wait reason") +func casGToWaitingForSuspendG(gp *g, old uint32, reason waitReason) { + if !reason.isWaitingForSuspendG() { + throw("casGToWaitingForSuspendG with non-isWaitingForSuspendG wait reason") } casGToWaiting(gp, old, reason) } @@ -1429,23 +1429,7 @@ semacquire(&worldsema) gp := getg() gp.m.preemptoff = reason.String() systemstack(func() { - // Mark the goroutine which called stopTheWorld preemptible so its - // stack may be scanned. - // This lets a mark worker scan us while we try to stop the world - // since otherwise we could get in a mutual preemption deadlock. - // We must not modify anything on the G stack because a stack shrink - // may occur. A stack shrink is otherwise OK though because in order - // to return from this function (and to leave the system stack) we - // must have preempted all goroutines, including any attempting - // to scan our stack, in which case, any stack shrinking will - // have already completed by the time we exit. - // - // N.B. The execution tracer is not aware of this status - // transition and handles it specially based on the - // wait reason. - casGToWaitingForGC(gp, _Grunning, waitReasonStoppingTheWorld) stopTheWorldContext = stopTheWorldWithSema(reason) // avoid write to stack - casgstatus(gp, _Gwaiting, _Grunning) }) return stopTheWorldContext } @@ -1534,7 +1518,30 @@ // stopTheWorld to block. // // Returns the STW context. When starting the world, this context must be // passed to startTheWorldWithSema. +// +//go:systemstack func stopTheWorldWithSema(reason stwReason) worldStop { + // Mark the goroutine which called stopTheWorld preemptible so its + // stack may be scanned by the GC or observed by the execution tracer. + // + // This lets a mark worker scan us or the execution tracer take our + // stack while we try to stop the world since otherwise we could get + // in a mutual preemption deadlock. + // + // We must not modify anything on the G stack because a stack shrink + // may occur, now that we switched to _Gwaiting, specifically if we're + // doing this during the mark phase (mark termination excepted, since + // we know that stack scanning is done by that point). A stack shrink + // is otherwise OK though because in order to return from this function + // (and to leave the system stack) we must have preempted all + // goroutines, including any attempting to scan our stack, in which + // case, any stack shrinking will have already completed by the time we + // exit. + // + // N.B. The execution tracer is not aware of this status transition and + // andles it specially based on the wait reason. + casGToWaitingForSuspendG(getg().m.curg, _Grunning, waitReasonStoppingTheWorld) + trace := traceAcquire() if trace.ok() { trace.STWStart(reason) @@ -1641,6 +1648,9 @@ throw(bad) } worldStopped() + + // Switch back to _Grunning, now that the world is stopped. + casgstatus(getg().m.curg, _Gwaiting, _Grunning) return worldStop{ reason: reason, @@ -1999,15 +2009,23 @@ // part of the current goroutine's stack, since the GC may move it. func forEachP(reason waitReason, fn func(*p)) { systemstack(func() { gp := getg().m.curg - // Mark the user stack as preemptible so that it may be scanned. - // Otherwise, our attempt to force all P's to a safepoint could - // result in a deadlock as we attempt to preempt a worker that's - // trying to preempt us (e.g. for a stack scan). + // Mark the user stack as preemptible so that it may be scanned + // by the GC or observed by the execution tracer. Otherwise, our + // attempt to force all P's to a safepoint could result in a + // deadlock as we attempt to preempt a goroutine that's trying + // to preempt us (e.g. for a stack scan). + // + // We must not modify anything on the G stack because a stack shrink + // may occur. A stack shrink is otherwise OK though because in order + // to return from this function (and to leave the system stack) we + // must have preempted all goroutines, including any attempting + // to scan our stack, in which case, any stack shrinking will + // have already completed by the time we exit. // // N.B. The execution tracer is not aware of this status // transition and handles it specially based on the // wait reason. - casGToWaitingForGC(gp, _Grunning, reason) + casGToWaitingForSuspendG(gp, _Grunning, reason) forEachPInternal(fn) casgstatus(gp, _Gwaiting, _Grunning) }) diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go index ca69719db0e2754a42213f3876812eb6052b20bd..c88f2b70585d2c80da8e6c90b5a837037013bdd7 100644 --- a/src/runtime/runtime2.go +++ b/src/runtime/runtime2.go @@ -1153,17 +1153,17 @@ w == waitReasonSyncRWMutexRLock || w == waitReasonSyncRWMutexLock } -func (w waitReason) isWaitingForGC() bool { - return isWaitingForGC[w] +func (w waitReason) isWaitingForSuspendG() bool { + return isWaitingForSuspendG[w] } -// isWaitingForGC indicates that a goroutine is only entering _Gwaiting and -// setting a waitReason because it needs to be able to let the GC take ownership -// of its stack. The G is always actually executing on the system stack, in -// these cases. +// isWaitingForSuspendG indicates that a goroutine is only entering _Gwaiting and +// setting a waitReason because it needs to be able to let the suspendG +// (used by the GC and the execution tracer) take ownership of its stack. +// The G is always actually executing on the system stack in these cases. // // TODO(mknyszek): Consider replacing this with a new dedicated G status. -var isWaitingForGC = [len(waitReasonStrings)]bool{ +var isWaitingForSuspendG = [len(waitReasonStrings)]bool{ waitReasonStoppingTheWorld: true, waitReasonGCMarkTermination: true, waitReasonGarbageCollection: true, diff --git a/src/runtime/stack.go b/src/runtime/stack.go index d43c6ace4ffcf20e29eb7cbe1089493edd09b6b2..f0efb176b5e049abfeb0bcf8b2950b51275d3933 100644 --- a/src/runtime/stack.go +++ b/src/runtime/stack.go @@ -1173,14 +1173,14 @@ if gp.parkingOnChan.Load() { return false } // We also can't copy the stack while tracing is enabled, and - // gp is in _Gwaiting solely to make itself available to the GC. + // gp is in _Gwaiting solely to make itself available to suspendG. // In these cases, the G is actually executing on the system // stack, and the execution tracer may want to take a stack trace // of the G's stack. Note: it's safe to access gp.waitreason here. // We're only checking if this is true if we took ownership of the // G with the _Gscan bit. This prevents the goroutine from transitioning, // which prevents gp.waitreason from changing. - if traceEnabled() && readgstatus(gp)&^_Gscan == _Gwaiting && gp.waitreason.isWaitingForGC() { + if traceEnabled() && readgstatus(gp)&^_Gscan == _Gwaiting && gp.waitreason.isWaitingForSuspendG() { return false } return true diff --git a/src/runtime/trace.go b/src/runtime/trace.go index adf7b0951dce4b414d4c8d8fb23eaf709a9ef111..d59501e80e891ff88191893093fb6d48b4ea1641 100644 --- a/src/runtime/trace.go +++ b/src/runtime/trace.go @@ -375,7 +375,7 @@ systemstack(func() { me := getg().m.curg // We don't have to handle this G status transition because we // already eliminated ourselves from consideration above. - casGToWaitingForGC(me, _Grunning, waitReasonTraceGoroutineStatus) + casGToWaitingForSuspendG(me, _Grunning, waitReasonTraceGoroutineStatus) // We need to suspend and take ownership of the G to safely read its // goid. Note that we can't actually emit the event at this point // because we might stop the G in a window where it's unsafe to write diff --git a/src/runtime/tracestatus.go b/src/runtime/tracestatus.go index 77ccdd139841a71008ae9a8e4cbcc5be378585bd..5e109a9e346ac705372e86257c850acb6e6c4e6c 100644 --- a/src/runtime/tracestatus.go +++ b/src/runtime/tracestatus.go @@ -140,11 +140,12 @@ case _Gwaiting, _Gpreempted: // There are a number of cases where a G might end up in // _Gwaiting but it's actually running in a non-preemptive // state but needs to present itself as preempted to the - // garbage collector. In these cases, we're not going to - // emit an event, and we want these goroutines to appear in - // the final trace as if they're running, not blocked. + // garbage collector and traceAdvance (via suspendG). In + // these cases, we're not going to emit an event, and we + // want these goroutines to appear in the final trace as + // if they're running, not blocked. tgs = traceGoWaiting - if status == _Gwaiting && wr.isWaitingForGC() { + if status == _Gwaiting && wr.isWaitingForSuspendG() { tgs = traceGoRunning } case _Gdead: