src/runtime/mgc.go | 7 ++++--- src/runtime/mgcmark.go | 4 ++-- src/runtime/proc.go | 70 +++++++++++++++++++++++++++++++++-------------------- src/runtime/runtime2.go | 14 +++++++------- src/runtime/stack.go | 4 ++-- src/runtime/trace.go | 2 +- src/runtime/tracestatus.go | 9 +++++---- diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go index 48001cfdb9463182914fb63efb70106f504f60b0..44669711e97cd727021510401959af7a03bde263 100644 --- a/src/runtime/mgc.go +++ b/src/runtime/mgc.go @@ -1030,7 +1030,7 @@ curgp := mp.curg // N.B. The execution tracer is not aware of this status // transition and handles it specially based on the // wait reason. - casGToWaitingForGC(curgp, _Grunning, waitReasonGarbageCollection) + casGToWaitingForSuspendG(curgp, _Grunning, waitReasonGarbageCollection) // Run gc on the g0 stack. We do this so that the g stack // we're currently running on will no longer change. Cuts @@ -1482,7 +1482,8 @@ } systemstack(func() { // Mark our goroutine preemptible so its stack - // can be scanned. This lets two mark workers + // can be scanned or observed by the execution + // tracer. This, for example, lets two mark workers // scan each other (otherwise, they would // deadlock). We must not modify anything on // the G stack. However, stack shrinking is @@ -1492,7 +1493,7 @@ // // N.B. The execution tracer is not aware of this status // transition and handles it specially based on the // wait reason. - casGToWaitingForGC(gp, _Grunning, waitReasonGCWorkerActive) + casGToWaitingForSuspendG(gp, _Grunning, waitReasonGCWorkerActive) switch pp.gcMarkWorkerMode { default: throw("gcBgMarkWorker: unexpected gcMarkWorkerMode") diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go index 823b2bd7df9a0474ebcfedac7a299e6042758460..6897fd979fa1127ec5ea8d2f9d968853a147bc72 100644 --- a/src/runtime/mgcmark.go +++ b/src/runtime/mgcmark.go @@ -219,7 +219,7 @@ // worker or we're in mark termination. userG := getg().m.curg selfScan := gp == userG && readgstatus(userG) == _Grunning if selfScan { - casGToWaitingForGC(userG, _Grunning, waitReasonGarbageCollectionScan) + casGToWaitingForSuspendG(userG, _Grunning, waitReasonGarbageCollectionScan) } // TODO: suspendG blocks (and spins) until gp @@ -662,7 +662,7 @@ throw("nwait > work.nprocs") } // gcDrainN requires the caller to be preemptible. - casGToWaitingForGC(gp, _Grunning, waitReasonGCAssistMarking) + casGToWaitingForSuspendG(gp, _Grunning, waitReasonGCAssistMarking) // drain own cached work first in the hopes that it // will be more cache friendly. diff --git a/src/runtime/proc.go b/src/runtime/proc.go index 21bee4df71a998956c793bbcee02e212dd6ddf03..42200dcb1ca1fed5ac9e9793cdd02010aded62a3 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -1306,13 +1306,13 @@ gp.waitreason = reason casgstatus(gp, old, _Gwaiting) } -// casGToWaitingForGC transitions gp from old to _Gwaiting, and sets the wait reason. -// The wait reason must be a valid isWaitingForGC wait reason. +// casGToWaitingForSuspendG transitions gp from old to _Gwaiting, and sets the wait reason. +// The wait reason must be a valid isWaitingForSuspendG wait reason. // // Use this over casgstatus when possible to ensure that a waitreason is set. -func casGToWaitingForGC(gp *g, old uint32, reason waitReason) { - if !reason.isWaitingForGC() { - throw("casGToWaitingForGC with non-isWaitingForGC wait reason") +func casGToWaitingForSuspendG(gp *g, old uint32, reason waitReason) { + if !reason.isWaitingForSuspendG() { + throw("casGToWaitingForSuspendG with non-isWaitingForSuspendG wait reason") } casGToWaiting(gp, old, reason) } @@ -1446,23 +1446,7 @@ semacquire(&worldsema) gp := getg() gp.m.preemptoff = reason.String() systemstack(func() { - // Mark the goroutine which called stopTheWorld preemptible so its - // stack may be scanned. - // This lets a mark worker scan us while we try to stop the world - // since otherwise we could get in a mutual preemption deadlock. - // We must not modify anything on the G stack because a stack shrink - // may occur. A stack shrink is otherwise OK though because in order - // to return from this function (and to leave the system stack) we - // must have preempted all goroutines, including any attempting - // to scan our stack, in which case, any stack shrinking will - // have already completed by the time we exit. - // - // N.B. The execution tracer is not aware of this status - // transition and handles it specially based on the - // wait reason. - casGToWaitingForGC(gp, _Grunning, waitReasonStoppingTheWorld) stopTheWorldContext = stopTheWorldWithSema(reason) // avoid write to stack - casgstatus(gp, _Gwaiting, _Grunning) }) return stopTheWorldContext } @@ -1551,7 +1535,30 @@ // stopTheWorld to block. // // Returns the STW context. When starting the world, this context must be // passed to startTheWorldWithSema. +// +//go:systemstack func stopTheWorldWithSema(reason stwReason) worldStop { + // Mark the goroutine which called stopTheWorld preemptible so its + // stack may be scanned by the GC or observed by the execution tracer. + // + // This lets a mark worker scan us or the execution tracer take our + // stack while we try to stop the world since otherwise we could get + // in a mutual preemption deadlock. + // + // We must not modify anything on the G stack because a stack shrink + // may occur, now that we switched to _Gwaiting, specifically if we're + // doing this during the mark phase (mark termination excepted, since + // we know that stack scanning is done by that point). A stack shrink + // is otherwise OK though because in order to return from this function + // (and to leave the system stack) we must have preempted all + // goroutines, including any attempting to scan our stack, in which + // case, any stack shrinking will have already completed by the time we + // exit. + // + // N.B. The execution tracer is not aware of this status transition and + // andles it specially based on the wait reason. + casGToWaitingForSuspendG(getg().m.curg, _Grunning, waitReasonStoppingTheWorld) + trace := traceAcquire() if trace.ok() { trace.STWStart(reason) @@ -1658,6 +1665,9 @@ throw(bad) } worldStopped() + + // Switch back to _Grunning, now that the world is stopped. + casgstatus(getg().m.curg, _Gwaiting, _Grunning) return worldStop{ reason: reason, @@ -2023,15 +2033,23 @@ // part of the current goroutine's stack, since the GC may move it. func forEachP(reason waitReason, fn func(*p)) { systemstack(func() { gp := getg().m.curg - // Mark the user stack as preemptible so that it may be scanned. - // Otherwise, our attempt to force all P's to a safepoint could - // result in a deadlock as we attempt to preempt a worker that's - // trying to preempt us (e.g. for a stack scan). + // Mark the user stack as preemptible so that it may be scanned + // by the GC or observed by the execution tracer. Otherwise, our + // attempt to force all P's to a safepoint could result in a + // deadlock as we attempt to preempt a goroutine that's trying + // to preempt us (e.g. for a stack scan). + // + // We must not modify anything on the G stack because a stack shrink + // may occur. A stack shrink is otherwise OK though because in order + // to return from this function (and to leave the system stack) we + // must have preempted all goroutines, including any attempting + // to scan our stack, in which case, any stack shrinking will + // have already completed by the time we exit. // // N.B. The execution tracer is not aware of this status // transition and handles it specially based on the // wait reason. - casGToWaitingForGC(gp, _Grunning, reason) + casGToWaitingForSuspendG(gp, _Grunning, reason) forEachPInternal(fn) casgstatus(gp, _Gwaiting, _Grunning) }) diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go index 80e929bac4606794529357fafe09b385b3f22bf7..9c7065eeb183de47449214581dc47891cf754b8d 100644 --- a/src/runtime/runtime2.go +++ b/src/runtime/runtime2.go @@ -1147,17 +1147,17 @@ w == waitReasonSyncRWMutexRLock || w == waitReasonSyncRWMutexLock } -func (w waitReason) isWaitingForGC() bool { - return isWaitingForGC[w] +func (w waitReason) isWaitingForSuspendG() bool { + return isWaitingForSuspendG[w] } -// isWaitingForGC indicates that a goroutine is only entering _Gwaiting and -// setting a waitReason because it needs to be able to let the GC take ownership -// of its stack. The G is always actually executing on the system stack, in -// these cases. +// isWaitingForSuspendG indicates that a goroutine is only entering _Gwaiting and +// setting a waitReason because it needs to be able to let the suspendG +// (used by the GC and the execution tracer) take ownership of its stack. +// The G is always actually executing on the system stack in these cases. // // TODO(mknyszek): Consider replacing this with a new dedicated G status. -var isWaitingForGC = [len(waitReasonStrings)]bool{ +var isWaitingForSuspendG = [len(waitReasonStrings)]bool{ waitReasonStoppingTheWorld: true, waitReasonGCMarkTermination: true, waitReasonGarbageCollection: true, diff --git a/src/runtime/stack.go b/src/runtime/stack.go index 8f11f54ccefd1ddb26b5627c5b029337dadc46de..a7e90cb5e8de03ec09c154b01779948c6d37be79 100644 --- a/src/runtime/stack.go +++ b/src/runtime/stack.go @@ -1163,14 +1163,14 @@ if gp.parkingOnChan.Load() { return false } // We also can't copy the stack while tracing is enabled, and - // gp is in _Gwaiting solely to make itself available to the GC. + // gp is in _Gwaiting solely to make itself available to suspendG. // In these cases, the G is actually executing on the system // stack, and the execution tracer may want to take a stack trace // of the G's stack. Note: it's safe to access gp.waitreason here. // We're only checking if this is true if we took ownership of the // G with the _Gscan bit. This prevents the goroutine from transitioning, // which prevents gp.waitreason from changing. - if traceEnabled() && readgstatus(gp)&^_Gscan == _Gwaiting && gp.waitreason.isWaitingForGC() { + if traceEnabled() && readgstatus(gp)&^_Gscan == _Gwaiting && gp.waitreason.isWaitingForSuspendG() { return false } return true diff --git a/src/runtime/trace.go b/src/runtime/trace.go index bc2978bb4bf6ef3f187a48b7e021fc14fe9f1047..23f181dc6d6cd1679d2729c95738e6319113b7ec 100644 --- a/src/runtime/trace.go +++ b/src/runtime/trace.go @@ -375,7 +375,7 @@ systemstack(func() { me := getg().m.curg // We don't have to handle this G status transition because we // already eliminated ourselves from consideration above. - casGToWaitingForGC(me, _Grunning, waitReasonTraceGoroutineStatus) + casGToWaitingForSuspendG(me, _Grunning, waitReasonTraceGoroutineStatus) // We need to suspend and take ownership of the G to safely read its // goid. Note that we can't actually emit the event at this point // because we might stop the G in a window where it's unsafe to write diff --git a/src/runtime/tracestatus.go b/src/runtime/tracestatus.go index 425ac37ba04ee0a42c58e02056d70c2725045036..03579b4bfab8604a7d11e165971d824fd5695fb1 100644 --- a/src/runtime/tracestatus.go +++ b/src/runtime/tracestatus.go @@ -160,11 +160,12 @@ case _Gwaiting, _Gpreempted: // There are a number of cases where a G might end up in // _Gwaiting but it's actually running in a non-preemptive // state but needs to present itself as preempted to the - // garbage collector. In these cases, we're not going to - // emit an event, and we want these goroutines to appear in - // the final trace as if they're running, not blocked. + // garbage collector and traceAdvance (via suspendG). In + // these cases, we're not going to emit an event, and we + // want these goroutines to appear in the final trace as + // if they're running, not blocked. tgs = traceGoWaiting - if status == _Gwaiting && wr.isWaitingForGC() { + if status == _Gwaiting && wr.isWaitingForSuspendG() { tgs = traceGoRunning } case _Gdead: