src/cmd/compile/internal/arm64/ggen.go | 6 +++--- src/cmd/compile/internal/arm64/ssa.go | 2 +- src/cmd/compile/internal/ssa/gen/ARM64Ops.go | 16 ++++++++-------- src/cmd/compile/internal/ssa/opGen.go | 10 +++++----- src/cmd/internal/obj/arm64/a.out.go | 4 ++-- src/runtime/duff_arm64.s | 384 +++++++++++++++++++++++++++--------------------------- src/runtime/mkduff.go | 18 +++++++++--------- diff --git a/src/cmd/compile/internal/arm64/ggen.go b/src/cmd/compile/internal/arm64/ggen.go index 9d8fe53cfdc87eddbf4430517456ac2fb64b29fc..2f925656bc15d088883218fefcacf6c70b1220ff 100644 --- a/src/cmd/compile/internal/arm64/ggen.go +++ b/src/cmd/compile/internal/arm64/ggen.go @@ -36,9 +36,9 @@ p = pp.Appendpp(p, arm64.AMOVD, obj.TYPE_REG, arm64.REGZERO, 0, obj.TYPE_MEM, arm64.REGSP, 8+off) off += int64(gc.Widthptr) cnt -= int64(gc.Widthptr) } - p = pp.Appendpp(p, arm64.AMOVD, obj.TYPE_REG, arm64.REGSP, 0, obj.TYPE_REG, arm64.REGRT1, 0) - p = pp.Appendpp(p, arm64.AADD, obj.TYPE_CONST, 0, 8+off, obj.TYPE_REG, arm64.REGRT1, 0) - p.Reg = arm64.REGRT1 + p = pp.Appendpp(p, arm64.AMOVD, obj.TYPE_REG, arm64.REGSP, 0, obj.TYPE_REG, arm64.REG_R20, 0) + p = pp.Appendpp(p, arm64.AADD, obj.TYPE_CONST, 0, 8+off, obj.TYPE_REG, arm64.REG_R20, 0) + p.Reg = arm64.REG_R20 p = pp.Appendpp(p, obj.ADUFFZERO, obj.TYPE_NONE, 0, 0, obj.TYPE_MEM, 0, 0) p.To.Name = obj.NAME_EXTERN p.To.Sym = gc.Duffzero diff --git a/src/cmd/compile/internal/arm64/ssa.go b/src/cmd/compile/internal/arm64/ssa.go index fc7a60e63eef2225af74e973e4594816403620d9..f13bd71f7ae979ba90fef1c21aae6856dedff647 100644 --- a/src/cmd/compile/internal/arm64/ssa.go +++ b/src/cmd/compile/internal/arm64/ssa.go @@ -820,7 +820,7 @@ p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: r1}) p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpARM64DUFFZERO: - // runtime.duffzero expects start address in R16 + // runtime.duffzero expects start address in R20 p := s.Prog(obj.ADUFFZERO) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN diff --git a/src/cmd/compile/internal/ssa/gen/ARM64Ops.go b/src/cmd/compile/internal/ssa/gen/ARM64Ops.go index a0c8b060c759315de4ef2cfe65e8dff7972685ca..d4de904a24b0f32f563cf5ed04baf9dc5ab317a4 100644 --- a/src/cmd/compile/internal/ssa/gen/ARM64Ops.go +++ b/src/cmd/compile/internal/ssa/gen/ARM64Ops.go @@ -495,14 +495,14 @@ // arg0 = address of memory to zero // arg1 = mem // auxint = offset into duffzero code to start executing // returns mem - // R16 aka arm64.REGRT1 changed as side effect + // R20 changed as side effect { name: "DUFFZERO", aux: "Int64", argLength: 2, reg: regInfo{ - inputs: []regMask{buildReg("R16")}, - clobbers: buildReg("R16 R30"), + inputs: []regMask{buildReg("R20")}, + clobbers: buildReg("R20 R30"), }, faultOnNilArg0: true, }, @@ -529,19 +529,19 @@ faultOnNilArg0: true, }, // duffcopy - // arg0 = address of dst memory (in R17 aka arm64.REGRT2, changed as side effect) - // arg1 = address of src memory (in R16 aka arm64.REGRT1, changed as side effect) + // arg0 = address of dst memory (in R21, changed as side effect) + // arg1 = address of src memory (in R20, changed as side effect) // arg2 = mem // auxint = offset into duffcopy code to start executing // returns mem - // R16, R17 changed as side effect + // R20, R21 changed as side effect { name: "DUFFCOPY", aux: "Int64", argLength: 3, reg: regInfo{ - inputs: []regMask{buildReg("R17"), buildReg("R16")}, - clobbers: buildReg("R16 R17 R26 R30"), + inputs: []regMask{buildReg("R21"), buildReg("R20")}, + clobbers: buildReg("R20 R21 R26 R30"), }, faultOnNilArg0: true, faultOnNilArg1: true, diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index 8e701cdd9f35d833fb0a645fccfbf2e581f34c07..00e49c97b7ba6622d45715e3d20f39104790066d 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -18668,9 +18668,9 @@ argLen: 2, faultOnNilArg0: true, reg: regInfo{ inputs: []inputInfo{ - {0, 65536}, // R16 + {0, 1048576}, // R20 }, - clobbers: 536936448, // R16 R30 + clobbers: 537919488, // R20 R30 }, }, { @@ -18694,10 +18694,10 @@ faultOnNilArg0: true, faultOnNilArg1: true, reg: regInfo{ inputs: []inputInfo{ - {0, 131072}, // R17 - {1, 65536}, // R16 + {0, 2097152}, // R21 + {1, 1048576}, // R20 }, - clobbers: 604176384, // R16 R17 R26 R30 + clobbers: 607125504, // R20 R21 R26 R30 }, }, { diff --git a/src/cmd/internal/obj/arm64/a.out.go b/src/cmd/internal/obj/arm64/a.out.go index 944eab19556ad92f1453693a0027a0179a544ede..4379f010ff33c03abc8d4c243149a2c9133dd6f7 100644 --- a/src/cmd/internal/obj/arm64/a.out.go +++ b/src/cmd/internal/obj/arm64/a.out.go @@ -241,8 +241,8 @@ // compiler allocates register variables F7-F26 // compiler allocates external registers F26 down const ( REGMIN = REG_R7 // register variables allocated from here to REGMAX - REGRT1 = REG_R16 // ARM64 IP0, for external linker, runtime, duffzero and duffcopy - REGRT2 = REG_R17 // ARM64 IP1, for external linker, runtime, duffcopy + REGRT1 = REG_R16 // ARM64 IP0, external linker may use as a scrach register in trampoline + REGRT2 = REG_R17 // ARM64 IP1, external linker may use as a scrach register in trampoline REGPR = REG_R18 // ARM64 platform register, unused in the Go toolchain REGMAX = REG_R25 diff --git a/src/runtime/duff_arm64.s b/src/runtime/duff_arm64.s index 3739c3945a8dd400d7cd06404bb12a0ffd2d5315..128b076af9db658b41847080c5fa72e35d7eecc6 100644 --- a/src/runtime/duff_arm64.s +++ b/src/runtime/duff_arm64.s @@ -5,263 +5,263 @@ #include "textflag.h" TEXT runtime·duffzero(SB), NOSPLIT|NOFRAME, $0-0 - STP.P (ZR, ZR), 16(R16) - STP.P (ZR, ZR), 16(R16) - STP.P (ZR, ZR), 16(R16) - STP.P (ZR, ZR), 16(R16) - STP.P (ZR, ZR), 16(R16) - STP.P (ZR, ZR), 16(R16) - STP.P (ZR, ZR), 16(R16) - STP.P (ZR, ZR), 16(R16) - STP.P (ZR, ZR), 16(R16) - STP.P (ZR, ZR), 16(R16) - STP.P (ZR, ZR), 16(R16) - STP.P (ZR, ZR), 16(R16) - STP.P (ZR, ZR), 16(R16) - STP.P (ZR, ZR), 16(R16) - STP.P (ZR, ZR), 16(R16) - STP.P (ZR, ZR), 16(R16) - STP.P (ZR, ZR), 16(R16) - STP.P (ZR, ZR), 16(R16) - STP.P (ZR, ZR), 16(R16) - STP.P (ZR, ZR), 16(R16) - STP.P (ZR, ZR), 16(R16) - STP.P (ZR, ZR), 16(R16) - STP.P (ZR, ZR), 16(R16) - STP.P (ZR, ZR), 16(R16) - STP.P (ZR, ZR), 16(R16) - STP.P (ZR, ZR), 16(R16) - STP.P (ZR, ZR), 16(R16) - STP.P (ZR, ZR), 16(R16) - STP.P (ZR, ZR), 16(R16) - STP.P (ZR, ZR), 16(R16) - STP.P (ZR, ZR), 16(R16) - STP.P (ZR, ZR), 16(R16) - STP.P (ZR, ZR), 16(R16) - STP.P (ZR, ZR), 16(R16) - STP.P (ZR, ZR), 16(R16) - STP.P (ZR, ZR), 16(R16) - STP.P (ZR, ZR), 16(R16) - STP.P (ZR, ZR), 16(R16) - STP.P (ZR, ZR), 16(R16) - STP.P (ZR, ZR), 16(R16) - STP.P (ZR, ZR), 16(R16) - STP.P (ZR, ZR), 16(R16) - STP.P (ZR, ZR), 16(R16) - STP.P (ZR, ZR), 16(R16) - STP.P (ZR, ZR), 16(R16) - STP.P (ZR, ZR), 16(R16) - STP.P (ZR, ZR), 16(R16) - STP.P (ZR, ZR), 16(R16) - STP.P (ZR, ZR), 16(R16) - STP.P (ZR, ZR), 16(R16) - STP.P (ZR, ZR), 16(R16) - STP.P (ZR, ZR), 16(R16) - STP.P (ZR, ZR), 16(R16) - STP.P (ZR, ZR), 16(R16) - STP.P (ZR, ZR), 16(R16) - STP.P (ZR, ZR), 16(R16) - STP.P (ZR, ZR), 16(R16) - STP.P (ZR, ZR), 16(R16) - STP.P (ZR, ZR), 16(R16) - STP.P (ZR, ZR), 16(R16) - STP.P (ZR, ZR), 16(R16) - STP.P (ZR, ZR), 16(R16) - STP.P (ZR, ZR), 16(R16) - STP (ZR, ZR), (R16) + STP.P (ZR, ZR), 16(R20) + STP.P (ZR, ZR), 16(R20) + STP.P (ZR, ZR), 16(R20) + STP.P (ZR, ZR), 16(R20) + STP.P (ZR, ZR), 16(R20) + STP.P (ZR, ZR), 16(R20) + STP.P (ZR, ZR), 16(R20) + STP.P (ZR, ZR), 16(R20) + STP.P (ZR, ZR), 16(R20) + STP.P (ZR, ZR), 16(R20) + STP.P (ZR, ZR), 16(R20) + STP.P (ZR, ZR), 16(R20) + STP.P (ZR, ZR), 16(R20) + STP.P (ZR, ZR), 16(R20) + STP.P (ZR, ZR), 16(R20) + STP.P (ZR, ZR), 16(R20) + STP.P (ZR, ZR), 16(R20) + STP.P (ZR, ZR), 16(R20) + STP.P (ZR, ZR), 16(R20) + STP.P (ZR, ZR), 16(R20) + STP.P (ZR, ZR), 16(R20) + STP.P (ZR, ZR), 16(R20) + STP.P (ZR, ZR), 16(R20) + STP.P (ZR, ZR), 16(R20) + STP.P (ZR, ZR), 16(R20) + STP.P (ZR, ZR), 16(R20) + STP.P (ZR, ZR), 16(R20) + STP.P (ZR, ZR), 16(R20) + STP.P (ZR, ZR), 16(R20) + STP.P (ZR, ZR), 16(R20) + STP.P (ZR, ZR), 16(R20) + STP.P (ZR, ZR), 16(R20) + STP.P (ZR, ZR), 16(R20) + STP.P (ZR, ZR), 16(R20) + STP.P (ZR, ZR), 16(R20) + STP.P (ZR, ZR), 16(R20) + STP.P (ZR, ZR), 16(R20) + STP.P (ZR, ZR), 16(R20) + STP.P (ZR, ZR), 16(R20) + STP.P (ZR, ZR), 16(R20) + STP.P (ZR, ZR), 16(R20) + STP.P (ZR, ZR), 16(R20) + STP.P (ZR, ZR), 16(R20) + STP.P (ZR, ZR), 16(R20) + STP.P (ZR, ZR), 16(R20) + STP.P (ZR, ZR), 16(R20) + STP.P (ZR, ZR), 16(R20) + STP.P (ZR, ZR), 16(R20) + STP.P (ZR, ZR), 16(R20) + STP.P (ZR, ZR), 16(R20) + STP.P (ZR, ZR), 16(R20) + STP.P (ZR, ZR), 16(R20) + STP.P (ZR, ZR), 16(R20) + STP.P (ZR, ZR), 16(R20) + STP.P (ZR, ZR), 16(R20) + STP.P (ZR, ZR), 16(R20) + STP.P (ZR, ZR), 16(R20) + STP.P (ZR, ZR), 16(R20) + STP.P (ZR, ZR), 16(R20) + STP.P (ZR, ZR), 16(R20) + STP.P (ZR, ZR), 16(R20) + STP.P (ZR, ZR), 16(R20) + STP.P (ZR, ZR), 16(R20) + STP (ZR, ZR), (R20) RET TEXT runtime·duffcopy(SB), NOSPLIT|NOFRAME, $0-0 - LDP.P 16(R16), (R26, R27) - STP.P (R26, R27), 16(R17) + LDP.P 16(R20), (R26, R27) + STP.P (R26, R27), 16(R21) - LDP.P 16(R16), (R26, R27) - STP.P (R26, R27), 16(R17) + LDP.P 16(R20), (R26, R27) + STP.P (R26, R27), 16(R21) - LDP.P 16(R16), (R26, R27) - STP.P (R26, R27), 16(R17) + LDP.P 16(R20), (R26, R27) + STP.P (R26, R27), 16(R21) - LDP.P 16(R16), (R26, R27) - STP.P (R26, R27), 16(R17) + LDP.P 16(R20), (R26, R27) + STP.P (R26, R27), 16(R21) - LDP.P 16(R16), (R26, R27) - STP.P (R26, R27), 16(R17) + LDP.P 16(R20), (R26, R27) + STP.P (R26, R27), 16(R21) - LDP.P 16(R16), (R26, R27) - STP.P (R26, R27), 16(R17) + LDP.P 16(R20), (R26, R27) + STP.P (R26, R27), 16(R21) - LDP.P 16(R16), (R26, R27) - STP.P (R26, R27), 16(R17) + LDP.P 16(R20), (R26, R27) + STP.P (R26, R27), 16(R21) - LDP.P 16(R16), (R26, R27) - STP.P (R26, R27), 16(R17) + LDP.P 16(R20), (R26, R27) + STP.P (R26, R27), 16(R21) - LDP.P 16(R16), (R26, R27) - STP.P (R26, R27), 16(R17) + LDP.P 16(R20), (R26, R27) + STP.P (R26, R27), 16(R21) - LDP.P 16(R16), (R26, R27) - STP.P (R26, R27), 16(R17) + LDP.P 16(R20), (R26, R27) + STP.P (R26, R27), 16(R21) - LDP.P 16(R16), (R26, R27) - STP.P (R26, R27), 16(R17) + LDP.P 16(R20), (R26, R27) + STP.P (R26, R27), 16(R21) - LDP.P 16(R16), (R26, R27) - STP.P (R26, R27), 16(R17) + LDP.P 16(R20), (R26, R27) + STP.P (R26, R27), 16(R21) - LDP.P 16(R16), (R26, R27) - STP.P (R26, R27), 16(R17) + LDP.P 16(R20), (R26, R27) + STP.P (R26, R27), 16(R21) - LDP.P 16(R16), (R26, R27) - STP.P (R26, R27), 16(R17) + LDP.P 16(R20), (R26, R27) + STP.P (R26, R27), 16(R21) - LDP.P 16(R16), (R26, R27) - STP.P (R26, R27), 16(R17) + LDP.P 16(R20), (R26, R27) + STP.P (R26, R27), 16(R21) - LDP.P 16(R16), (R26, R27) - STP.P (R26, R27), 16(R17) + LDP.P 16(R20), (R26, R27) + STP.P (R26, R27), 16(R21) - LDP.P 16(R16), (R26, R27) - STP.P (R26, R27), 16(R17) + LDP.P 16(R20), (R26, R27) + STP.P (R26, R27), 16(R21) - LDP.P 16(R16), (R26, R27) - STP.P (R26, R27), 16(R17) + LDP.P 16(R20), (R26, R27) + STP.P (R26, R27), 16(R21) - LDP.P 16(R16), (R26, R27) - STP.P (R26, R27), 16(R17) + LDP.P 16(R20), (R26, R27) + STP.P (R26, R27), 16(R21) - LDP.P 16(R16), (R26, R27) - STP.P (R26, R27), 16(R17) + LDP.P 16(R20), (R26, R27) + STP.P (R26, R27), 16(R21) - LDP.P 16(R16), (R26, R27) - STP.P (R26, R27), 16(R17) + LDP.P 16(R20), (R26, R27) + STP.P (R26, R27), 16(R21) - LDP.P 16(R16), (R26, R27) - STP.P (R26, R27), 16(R17) + LDP.P 16(R20), (R26, R27) + STP.P (R26, R27), 16(R21) - LDP.P 16(R16), (R26, R27) - STP.P (R26, R27), 16(R17) + LDP.P 16(R20), (R26, R27) + STP.P (R26, R27), 16(R21) - LDP.P 16(R16), (R26, R27) - STP.P (R26, R27), 16(R17) + LDP.P 16(R20), (R26, R27) + STP.P (R26, R27), 16(R21) - LDP.P 16(R16), (R26, R27) - STP.P (R26, R27), 16(R17) + LDP.P 16(R20), (R26, R27) + STP.P (R26, R27), 16(R21) - LDP.P 16(R16), (R26, R27) - STP.P (R26, R27), 16(R17) + LDP.P 16(R20), (R26, R27) + STP.P (R26, R27), 16(R21) - LDP.P 16(R16), (R26, R27) - STP.P (R26, R27), 16(R17) + LDP.P 16(R20), (R26, R27) + STP.P (R26, R27), 16(R21) - LDP.P 16(R16), (R26, R27) - STP.P (R26, R27), 16(R17) + LDP.P 16(R20), (R26, R27) + STP.P (R26, R27), 16(R21) - LDP.P 16(R16), (R26, R27) - STP.P (R26, R27), 16(R17) + LDP.P 16(R20), (R26, R27) + STP.P (R26, R27), 16(R21) - LDP.P 16(R16), (R26, R27) - STP.P (R26, R27), 16(R17) + LDP.P 16(R20), (R26, R27) + STP.P (R26, R27), 16(R21) - LDP.P 16(R16), (R26, R27) - STP.P (R26, R27), 16(R17) + LDP.P 16(R20), (R26, R27) + STP.P (R26, R27), 16(R21) - LDP.P 16(R16), (R26, R27) - STP.P (R26, R27), 16(R17) + LDP.P 16(R20), (R26, R27) + STP.P (R26, R27), 16(R21) - LDP.P 16(R16), (R26, R27) - STP.P (R26, R27), 16(R17) + LDP.P 16(R20), (R26, R27) + STP.P (R26, R27), 16(R21) - LDP.P 16(R16), (R26, R27) - STP.P (R26, R27), 16(R17) + LDP.P 16(R20), (R26, R27) + STP.P (R26, R27), 16(R21) - LDP.P 16(R16), (R26, R27) - STP.P (R26, R27), 16(R17) + LDP.P 16(R20), (R26, R27) + STP.P (R26, R27), 16(R21) - LDP.P 16(R16), (R26, R27) - STP.P (R26, R27), 16(R17) + LDP.P 16(R20), (R26, R27) + STP.P (R26, R27), 16(R21) - LDP.P 16(R16), (R26, R27) - STP.P (R26, R27), 16(R17) + LDP.P 16(R20), (R26, R27) + STP.P (R26, R27), 16(R21) - LDP.P 16(R16), (R26, R27) - STP.P (R26, R27), 16(R17) + LDP.P 16(R20), (R26, R27) + STP.P (R26, R27), 16(R21) - LDP.P 16(R16), (R26, R27) - STP.P (R26, R27), 16(R17) + LDP.P 16(R20), (R26, R27) + STP.P (R26, R27), 16(R21) - LDP.P 16(R16), (R26, R27) - STP.P (R26, R27), 16(R17) + LDP.P 16(R20), (R26, R27) + STP.P (R26, R27), 16(R21) - LDP.P 16(R16), (R26, R27) - STP.P (R26, R27), 16(R17) + LDP.P 16(R20), (R26, R27) + STP.P (R26, R27), 16(R21) - LDP.P 16(R16), (R26, R27) - STP.P (R26, R27), 16(R17) + LDP.P 16(R20), (R26, R27) + STP.P (R26, R27), 16(R21) - LDP.P 16(R16), (R26, R27) - STP.P (R26, R27), 16(R17) + LDP.P 16(R20), (R26, R27) + STP.P (R26, R27), 16(R21) - LDP.P 16(R16), (R26, R27) - STP.P (R26, R27), 16(R17) + LDP.P 16(R20), (R26, R27) + STP.P (R26, R27), 16(R21) - LDP.P 16(R16), (R26, R27) - STP.P (R26, R27), 16(R17) + LDP.P 16(R20), (R26, R27) + STP.P (R26, R27), 16(R21) - LDP.P 16(R16), (R26, R27) - STP.P (R26, R27), 16(R17) + LDP.P 16(R20), (R26, R27) + STP.P (R26, R27), 16(R21) - LDP.P 16(R16), (R26, R27) - STP.P (R26, R27), 16(R17) + LDP.P 16(R20), (R26, R27) + STP.P (R26, R27), 16(R21) - LDP.P 16(R16), (R26, R27) - STP.P (R26, R27), 16(R17) + LDP.P 16(R20), (R26, R27) + STP.P (R26, R27), 16(R21) - LDP.P 16(R16), (R26, R27) - STP.P (R26, R27), 16(R17) + LDP.P 16(R20), (R26, R27) + STP.P (R26, R27), 16(R21) - LDP.P 16(R16), (R26, R27) - STP.P (R26, R27), 16(R17) + LDP.P 16(R20), (R26, R27) + STP.P (R26, R27), 16(R21) - LDP.P 16(R16), (R26, R27) - STP.P (R26, R27), 16(R17) + LDP.P 16(R20), (R26, R27) + STP.P (R26, R27), 16(R21) - LDP.P 16(R16), (R26, R27) - STP.P (R26, R27), 16(R17) + LDP.P 16(R20), (R26, R27) + STP.P (R26, R27), 16(R21) - LDP.P 16(R16), (R26, R27) - STP.P (R26, R27), 16(R17) + LDP.P 16(R20), (R26, R27) + STP.P (R26, R27), 16(R21) - LDP.P 16(R16), (R26, R27) - STP.P (R26, R27), 16(R17) + LDP.P 16(R20), (R26, R27) + STP.P (R26, R27), 16(R21) - LDP.P 16(R16), (R26, R27) - STP.P (R26, R27), 16(R17) + LDP.P 16(R20), (R26, R27) + STP.P (R26, R27), 16(R21) - LDP.P 16(R16), (R26, R27) - STP.P (R26, R27), 16(R17) + LDP.P 16(R20), (R26, R27) + STP.P (R26, R27), 16(R21) - LDP.P 16(R16), (R26, R27) - STP.P (R26, R27), 16(R17) + LDP.P 16(R20), (R26, R27) + STP.P (R26, R27), 16(R21) - LDP.P 16(R16), (R26, R27) - STP.P (R26, R27), 16(R17) + LDP.P 16(R20), (R26, R27) + STP.P (R26, R27), 16(R21) - LDP.P 16(R16), (R26, R27) - STP.P (R26, R27), 16(R17) + LDP.P 16(R20), (R26, R27) + STP.P (R26, R27), 16(R21) - LDP.P 16(R16), (R26, R27) - STP.P (R26, R27), 16(R17) + LDP.P 16(R20), (R26, R27) + STP.P (R26, R27), 16(R21) - LDP.P 16(R16), (R26, R27) - STP.P (R26, R27), 16(R17) + LDP.P 16(R20), (R26, R27) + STP.P (R26, R27), 16(R21) - LDP.P 16(R16), (R26, R27) - STP.P (R26, R27), 16(R17) + LDP.P 16(R20), (R26, R27) + STP.P (R26, R27), 16(R21) - LDP.P 16(R16), (R26, R27) - STP.P (R26, R27), 16(R17) + LDP.P 16(R20), (R26, R27) + STP.P (R26, R27), 16(R21) - LDP.P 16(R16), (R26, R27) - STP.P (R26, R27), 16(R17) + LDP.P 16(R20), (R26, R27) + STP.P (R26, R27), 16(R21) RET diff --git a/src/runtime/mkduff.go b/src/runtime/mkduff.go index b7c7e2689cfd73fa083414118551160214e33694..b6fe701497ad8f1e161c27563c93c16d0e07ba03 100644 --- a/src/runtime/mkduff.go +++ b/src/runtime/mkduff.go @@ -151,26 +151,26 @@ } func zeroARM64(w io.Writer) { // ZR: always zero - // R16 (aka REGRT1): ptr to memory to be zeroed - // On return, R16 points to the last zeroed dword. + // R20: ptr to memory to be zeroed + // On return, R20 points to the last zeroed dword. fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT|NOFRAME, $0-0") for i := 0; i < 63; i++ { - fmt.Fprintln(w, "\tSTP.P\t(ZR, ZR), 16(R16)") + fmt.Fprintln(w, "\tSTP.P\t(ZR, ZR), 16(R20)") } - fmt.Fprintln(w, "\tSTP\t(ZR, ZR), (R16)") + fmt.Fprintln(w, "\tSTP\t(ZR, ZR), (R20)") fmt.Fprintln(w, "\tRET") } func copyARM64(w io.Writer) { - // R16 (aka REGRT1): ptr to source memory - // R17 (aka REGRT2): ptr to destination memory + // R20: ptr to source memory + // R21: ptr to destination memory // R26, R27 (aka REGTMP): scratch space - // R16 and R17 are updated as a side effect + // R20 and R21 are updated as a side effect fmt.Fprintln(w, "TEXT runtime·duffcopy(SB), NOSPLIT|NOFRAME, $0-0") for i := 0; i < 64; i++ { - fmt.Fprintln(w, "\tLDP.P\t16(R16), (R26, R27)") - fmt.Fprintln(w, "\tSTP.P\t(R26, R27), 16(R17)") + fmt.Fprintln(w, "\tLDP.P\t16(R20), (R26, R27)") + fmt.Fprintln(w, "\tSTP.P\t(R26, R27), 16(R21)") fmt.Fprintln(w) } fmt.Fprintln(w, "\tRET")