diff --git a/Source/PascalScriptFPC.inc b/Source/PascalScriptFPC.inc index 269a9d28..cd97ada1 100644 --- a/Source/PascalScriptFPC.inc +++ b/Source/PascalScriptFPC.inc @@ -8,7 +8,7 @@ {$ENDIF} {$IF (defined(cpuaarch64) or defined(cpuarm))} - {$DEFINE USEINVOKECALL} + // {$DEFINE USEINVOKECALL} // InvokeCall.inc does not compile on FPC due to missing Rtti features {$IFEND} diff --git a/Source/arm64.inc b/Source/arm64.inc new file mode 100644 index 00000000..1c7f3db2 --- /dev/null +++ b/Source/arm64.inc @@ -0,0 +1,394 @@ +{ ARM64 runtime call bridge (FPC + Linux/macOS SysV only) + + This include contains the Pascal-side marshalling logic used by TPSExec.InnerfuseCall + on AArch64 targets. The low-level indirect call is delegated to arm64call in + arm64sysv.S. + + High-level flow: + 1) Convert TPSVariant arguments to ABI-ready integer/float register values. + 2) Spill overflow arguments to an aligned stack buffer. + 3) Prepare hidden-result arguments/registers for managed/indirect result types. + 4) Call arm64call, which performs the actual register+stack setup and BLR. + 5) Copy return values back into Pascal Script result storage. +} + +{$IFDEF CPUAARCH64} + {$IFDEF DARWIN} + {$L arm64sysv-macos.o} // compile with gcc -c arm64sysv.S -o arm64sysv-macos.o + {$ELSE} + {$L arm64sysv-linux.o} // compile with gcc -c arm64sysv.S -o arm64sysv-linux.o + {$ENDIF} +{$ENDIF} + +type + TARM64Registers = packed record + X1, X2, X3, X4, X5, X6, X7: IPointer; + X8: IPointer; // indirect result register for AArch64 ABI + D1, D2, D3, D4, D5, D6, D7: Double; + FloatBits: Integer; // bits 0..7 = single args in D0..D7, bit 8 = single result + end; + +const + EmptyPchar: array[0..0] of char = #0; + +{ arm64call performs the final ABI transition in arm64sysv.S: + - preserves callee-saved registers + - copies overflow argument slots to the real stack + - loads x0..x8 and d0..d7 from marshalling buffers + - performs BLR to Address + - writes x0/x1/d0 return values back to _X0/_X1/_D0 +} +procedure arm64call( + Address: Pointer; + var _X0: IPointer; // in/out: carries call-time x0 and receives return x0 + var _X1: IPointer; // out: secondary integer return register (used for 9..16 byte composites) + var _D0: Double; + constref Registers: TARM64Registers; + Stack: Pointer; + Items: NativeInt); external {$IFDEF DARWIN}name '_arm64call'{$ENDIF}; + +function TPSExec.InnerfuseCall(_Self, Address: Pointer; CallingConv: TPSCallingConvention; Params: TPSList; res: PPSVariantIFC): Boolean; +var + Stack: array of Byte; + _X0: IPointer; + _X1: IPointer; + _D0: Double; + Registers: TARM64Registers; + IntRegUsage: Byte; + FloatRegUsage: Byte; + CallData: TPSList; + I: Integer; + pp: ^Byte; +{$IFDEF FPC} + IsConstructor, IsVirtualCons: Boolean; +{$ENDIF} + + function rp(p: PPSVariantIFC): PPSVariantIFC; + begin + if p = nil then + Exit(nil); + if p.aType.BaseType = btPointer then + begin + p^.aType := Pointer(Pointer(IPointer(p^.dta) + PointerSize)^); + p^.Dta := Pointer(p^.dta^); + end; + Result := p; + end; + + procedure StoreIntReg(Data: IPointer); + var + p: Pointer; + begin + case IntRegUsage of + 0: begin Inc(IntRegUsage); _X0 := Data; end; + 1: begin Inc(IntRegUsage); Registers.X1 := Data; end; + 2: begin Inc(IntRegUsage); Registers.X2 := Data; end; + 3: begin Inc(IntRegUsage); Registers.X3 := Data; end; + 4: begin Inc(IntRegUsage); Registers.X4 := Data; end; + 5: begin Inc(IntRegUsage); Registers.X5 := Data; end; + 6: begin Inc(IntRegUsage); Registers.X6 := Data; end; + 7: begin Inc(IntRegUsage); Registers.X7 := Data; end; + else + begin + SetLength(Stack, Length(Stack) + 8); + p := @Stack[Length(Stack) - 8]; + IPointer(p^) := Data; + end; + end; + end; + + procedure StoreFloatReg(const Data: Double; IsSingle: Boolean); + var + p: Pointer; + Bit: Integer; + begin + if FloatRegUsage < 8 then + begin + Bit := 1 shl FloatRegUsage; + if IsSingle then + Registers.FloatBits := Registers.FloatBits or Bit; + case FloatRegUsage of + 0: _D0 := Data; + 1: Registers.D1 := Data; + 2: Registers.D2 := Data; + 3: Registers.D3 := Data; + 4: Registers.D4 := Data; + 5: Registers.D5 := Data; + 6: Registers.D6 := Data; + 7: Registers.D7 := Data; + end; + Inc(FloatRegUsage); + end else begin + SetLength(Stack, Length(Stack) + 8); + p := @Stack[Length(Stack) - 8]; + Double(p^) := Data; + end; + end; + + procedure StoreStack(const aData; Len: Integer); + var + p: Pointer; + begin + if (Len > 8) and ((Length(Stack) mod 16) <> 0) then + SetLength(Stack, Length(Stack) + (16 - (Length(Stack) mod 16))); + SetLength(Stack, Length(Stack) + Len); + p := @Stack[Length(Stack) - Len]; + Move(aData, p^, Len); + end; + + function GetPtr(fVar: PPSVariantIFC): Boolean; + var + VarPtr: Pointer; + p: Pointer; + begin + Result := False; + if FVar = nil then + Exit; + + if fVar.VarParam then + begin + case fvar.aType.BaseType of + btArray: + begin + if Copy(fvar.aType.ExportName, 1, 10) = '!OPENARRAY' then + begin + p := CreateOpenArray(True, Self, FVar); + if p = nil then + Exit; + CallData.Add(p); + StoreIntReg(IPointer(POpenArray(p)^.Data)); + StoreIntReg(IPointer(POpenArray(p)^.ItemCount - 1)); + Result := True; + Exit; + end; + VarPtr := fvar.Dta; + end; + btVariant, btSet, btStaticArray, btRecord, btInterface, btClass, + {$IFNDEF PS_NOWIDESTRING}btUnicodeString, btWideString, btWideChar,{$ENDIF} + btU8, btS8, btU16, btS16, btU32, btS32, btSingle, btDouble, + btExtended, btString, btPChar, btChar, btCurrency + {$IFNDEF PS_NOINT64}, btS64, btU64{$ENDIF}: + VarPtr := fvar.Dta; + else + Exit; + end; + StoreIntReg(IPointer(VarPtr)); + end else begin + case fVar^.aType.BaseType of + btSet: + case TPSTypeRec_Set(fvar.aType).aByteSize of + 1: StoreIntReg(IPointer(Byte(fvar.dta^))); + 2: StoreIntReg(IPointer(Word(fvar.dta^))); + 3, 4: StoreIntReg(IPointer(Cardinal(fvar.dta^))); + 5, 6, 7, 8: StoreIntReg(IPointer(fVar.Dta^)); + else + StoreIntReg(IPointer(fvar.Dta)); + end; + btArray: + begin + if Copy(fvar^.aType.ExportName, 1, 10) = '!OPENARRAY' then + begin + p := CreateOpenArray(False, Self, FVar); + if p = nil then + Exit; + CallData.Add(p); + StoreIntReg(IPointer(POpenArray(p)^.Data)); + StoreIntReg(IPointer(POpenArray(p)^.ItemCount - 1)); + Result := True; + Exit; + end; + {$IFDEF FPC} + StoreIntReg(IPointer(FVar.Dta)); + {$ELSE} + StoreIntReg(IPointer(FVar.Dta^)); + {$ENDIF} + end; + btRecord: + begin + if fvar^.aType.RealSize <= SizeOf(IPointer) then + StoreIntReg(IPointer(fvar.dta^)) + else if fvar^.aType.RealSize <= 16 then + StoreStack(fvar.Dta^, fvar^.aType.RealSize) + else + StoreIntReg(IPointer(fVar.Dta)); + end; + btVariant, btStaticArray: + StoreIntReg(IPointer(fVar.Dta)); + btExtended, btDouble: + StoreFloatReg(Double(fvar.dta^), False); + btCurrency: + StoreIntReg(IPointer(fvar.dta^)); + btSingle: + StoreFloatReg(Single(fvar.dta^), True); + btChar, btU8, btS8: + StoreIntReg(IPointer(Byte(fVar^.dta^))); + {$IFNDEF PS_NOWIDESTRING} + btWideChar, + {$ENDIF} + btU16, btS16: + StoreIntReg(IPointer(Word(FVar^.dta^))); + btU32, btS32: + StoreIntReg(IPointer(Cardinal(FVar^.dta^))); + btPChar: + if Pointer(fvar^.dta^) = nil then + StoreIntReg(IPointer(@EmptyPChar)) + else + StoreIntReg(IPointer(fvar^.dta^)); + btClass, btInterface, btString{$IFNDEF PS_NOWIDESTRING}, btWideString, btUnicodeString{$ENDIF}: + StoreIntReg(IPointer(fvar^.dta^)); + btProcPtr: + begin + GetMem(p, PointerSize2); + TMethod(p^) := MKMethod(Self, Longint(FVar.Dta^)); + StoreStack(p^, PointerSize2); + FreeMem(p); + end; + {$IFNDEF PS_NOINT64} + btS64: + StoreIntReg(IPointer(Int64(fvar^.dta^))); + btU64: + StoreIntReg(IPointer(UInt64(fvar^.dta^))); + {$ENDIF} + else + Exit; + end; + end; + Result := True; + end; +begin +{$IFDEF FPC} + if (Integer(CallingConv) and 128) <> 0 then + begin + IsVirtualCons := True; + CallingConv := TPSCallingConvention(Integer(CallingConv) and not 128); + end else + IsVirtualCons := False; + if (Integer(CallingConv) and 64) <> 0 then + begin + IsConstructor := True; + CallingConv := TPSCallingConvention(Integer(CallingConv) and not 64); + end else + IsConstructor := False; +{$ENDIF} + + InnerfuseCall := False; + if Address = nil then + Exit; + + SetLength(Stack, 0); + CallData := TPSList.Create; + res := rp(res); + if res <> nil then + res.VarParam := True; + try + FillChar(Registers, SizeOf(Registers), 0); + _X0 := 0; + _X1 := 0; + _D0 := 0; + IntRegUsage := 0; + FloatRegUsage := 0; + + {$IF DEFINED(FPC) and (FPC_VERSION >= 3)} + if IsConstructor then + begin + if not GetPtr(rp(Params[0])) then + Exit; + DisposePPSVariantIFC(Params[0]); + Params.Delete(0); + end; + {$ENDIF} + + if Assigned(_Self) then + StoreIntReg(IPointer(_Self)); + + if Assigned(res) then + begin + case res^.aType.BaseType of + btRecord, btSet: + if res.aType.RealSize > 16 then + begin + Registers.X8 := IPointer(res.Dta); + {$IFNDEF MSWINDOWS} + // FPC AArch64 managed-result ABI uses a hidden first argument in x0. + StoreIntReg(IPointer(res.Dta)); + {$ENDIF} + end; + btString, {$IFNDEF PS_NOWIDESTRING}btWideString, btUnicodeString,{$ENDIF} + btInterface, btArray, btVariant, btStaticArray: + begin + Registers.X8 := IPointer(res.Dta); + {$IFNDEF MSWINDOWS} + // Keep x8 for C-style ABI while also passing hidden result in x0 for Pascal ABI. + StoreIntReg(IPointer(res.Dta)); + {$ENDIF} + end; + end; + if res^.aType.BaseType = btSingle then + Registers.FloatBits := Registers.FloatBits or 256; + end; + + for I := 0 to Params.Count - 1 do + if not GetPtr(rp(Params[I])) then + Exit; + + if (Length(Stack) mod 16) <> 0 then + SetLength(Stack, Length(Stack) + (16 - (Length(Stack) mod 16))); + + if Length(Stack) = 0 then + pp := nil + else + pp := @Stack[0]; + + arm64call(Address, _X0, _X1, _D0, Registers, pp, Length(Stack) div 8); + + if Assigned(res) then + begin + case res^.aType.BaseType of + btRecord, btSet: + if res.aType.RealSize <= 16 then + case res.aType.RealSize of + 1: Byte(res.Dta^) := _X0; + 2: Word(res.Dta^) := _X0; + 3, 4: Cardinal(res.Dta^) := _X0; + 5, 6, 7, 8: IPointer(res.Dta^) := _X0; + 9..16: + begin + Move(_X0, res.Dta^, 8); + Move(_X1, Pointer(IPointer(res.Dta) + 8)^, res.aType.RealSize - 8); + end; + end; + btSingle: tbtsingle(res.Dta^) := _D0; + btDouble: tbtdouble(res.Dta^) := _D0; + btExtended: tbtextended(res.Dta^) := _D0; + btChar, btU8, btS8: tbtu8(res.dta^) := _X0; + {$IFNDEF PS_NOWIDESTRING} + btWideChar, + {$ENDIF} + btU16, btS16: tbtu16(res.dta^) := _X0; + btClass: IPointer(res.dta^) := _X0; + btU32, btS32: tbtu32(res.dta^) := _X0; + btPChar: pansichar(res.dta^) := Pansichar(_X0); + {$IFNDEF PS_NOINT64} + btS64: tbts64(res.dta^) := Int64(_X0); + btU64: tbtu64(res.dta^) := UInt64(_X0); + {$ENDIF} + btCurrency: tbts64(res.Dta^) := Int64(_X0); + btInterface, btVariant, btStaticArray, btArray, + btString{$IFNDEF PS_NOWIDESTRING}, btWideString, btUnicodeString{$ENDIF}: ; + else + Exit; + end; + end; + + Result := True; + finally + for I := CallData.Count - 1 downto 0 do + begin + pp := CallData[I]; + case pp^ of + 0: DestroyOpenArray(Self, Pointer(pp)); + end; + end; + CallData.Free; + end; +end; diff --git a/Source/arm64sysv-linux.o b/Source/arm64sysv-linux.o new file mode 100644 index 00000000..8ca39f14 Binary files /dev/null and b/Source/arm64sysv-linux.o differ diff --git a/Source/arm64sysv-macos.o b/Source/arm64sysv-macos.o new file mode 100644 index 00000000..1a51d0b2 Binary files /dev/null and b/Source/arm64sysv-macos.o differ diff --git a/Source/arm64sysv.S b/Source/arm64sysv.S new file mode 100644 index 00000000..af9a2087 --- /dev/null +++ b/Source/arm64sysv.S @@ -0,0 +1,137 @@ +/* + arm64sysv.S - AArch64 SysV indirect call shim for Pascal Script + + Inputs (from arm64.inc arm64call declaration): + x0 = Address (target function pointer) + x1 = &_X0 (integer return storage) + x2 = &_X1 (secondary integer return storage) + x3 = &_D0 (floating return storage) + x4 = &TARM64Registers (pre-marshalled integer/float register block) + x5 = Stack (pointer to overflow arguments, 8-byte slots) + x6 = Items (number of stack slots) + + Steps performed: + 1) Preserve callee-saved registers and keep stable frame pointers. + 2) Copy overflow slots from x4 into the real call stack (maintaining ABI alignment). + 3) Load x0..x8 and d0..d7 from marshalling buffers (including single/double conversion bits). + 4) BLR to the target function pointer. + 5) Store x0/x1/d0 back via output pointers, restoring single->double when needed. +*/ +.text +.align 2 +#ifdef __APPLE__ +.globl _arm64call +.p2align 2 +_arm64call: +#else +.globl arm64call +.type arm64call, %function +arm64call: +#endif + stp x29, x30, [sp, #-64]! + mov x29, sp + stp x19, x20, [sp, #16] + stp x21, x22, [sp, #32] + stp x23, x24, [sp, #48] + + mov x24, x0 + mov x19, x1 + mov x22, x2 + mov x20, x3 + mov x21, x4 + mov x4, x5 + mov x23, x6 + + cbz x23, 1f + lsl x9, x23, #3 + sub sp, sp, x9 + mov x10, sp +0: + ldr x11, [x4], #8 + str x11, [x10], #8 + subs x23, x23, #1 + b.ne 0b +1: + ldr w9, [x21, #120] + + tbnz w9, #0, 2f + ldr d0, [x20] + b 3f +2: + ldr d16, [x20] + fcvt s0, d16 +3: + ldr x0, [x19] + ldr x1, [x21, #0] + ldr x2, [x21, #8] + ldr x3, [x21, #16] + ldr x4, [x21, #24] + ldr x5, [x21, #32] + ldr x6, [x21, #40] + ldr x7, [x21, #48] + ldr x8, [x21, #56] + + tbnz w9, #1, 4f + ldr d1, [x21, #64] + b 5f +4: ldr d16, [x21, #64] + fcvt s1, d16 +5: + tbnz w9, #2, 6f + ldr d2, [x21, #72] + b 7f +6: ldr d16, [x21, #72] + fcvt s2, d16 +7: + tbnz w9, #3, 8f + ldr d3, [x21, #80] + b 9f +8: ldr d16, [x21, #80] + fcvt s3, d16 +9: + tbnz w9, #4, 10f + ldr d4, [x21, #88] + b 11f +10: ldr d16, [x21, #88] + fcvt s4, d16 +11: + tbnz w9, #5, 12f + ldr d5, [x21, #96] + b 13f +12: ldr d16, [x21, #96] + fcvt s5, d16 +13: + tbnz w9, #6, 14f + ldr d6, [x21, #104] + b 15f +14: ldr d16, [x21, #104] + fcvt s6, d16 +15: + tbnz w9, #7, 16f + ldr d7, [x21, #112] + b 17f +16: ldr d16, [x21, #112] + fcvt s7, d16 +17: + blr x24 + + str x0, [x19] + str x1, [x22] + ldr w9, [x21, #120] /* x9/w9 is caller-saved, reload FloatBits after BLR */ + tbnz w9, #8, 18f + str d0, [x20] + b 19f +18: + fcvt d16, s0 + str d16, [x20] +19: + mov sp, x29 + ldp x19, x20, [sp, #16] + ldp x21, x22, [sp, #32] + ldp x23, x24, [sp, #48] + ldp x29, x30, [sp], #64 + ret +#ifndef __APPLE__ +.size arm64call, .-arm64call +.section .note.GNU-stack,"",@progbits +#endif diff --git a/Source/uPSRuntime.pas b/Source/uPSRuntime.pas index 3f28be64..c3c8ef14 100644 --- a/Source/uPSRuntime.pas +++ b/Source/uPSRuntime.pas @@ -10556,6 +10556,8 @@ procedure DestroyOpenArray(Sender: TPSExec; V: POpenArray); {$include x86.inc} {$elseif defined(cpupowerpc)} {$include powerpc.inc} + {$elseif defined(cpuaarch64)} + {$include arm64.inc} {$elseif defined(cpuarm)} {$include arm.inc} {$elseif defined(CPUX86_64)} @@ -12348,10 +12350,33 @@ function BGRFW(var s: tbtString): tbtString; s := ''; end; +function AlwaysAsVariable(aType: TPSTypeRec): Boolean; +begin + case atype.BaseType of + btVariant: Result := true; + btSet: Result := atype.RealSize > PointerSize; + btRecord: Result := atype.RealSize > PointerSize; + btStaticArray: Result := atype.RealSize > PointerSize; + else + Result := false; + end; +end; + +function ParamAsVariable(const Modifier: tbtchar; aType: TPSTypeRec): Boolean; +begin + Result := (Modifier = '%') or (Modifier = '!') or AlwaysAsVariable(aType); +end; + + + {$ifdef fpc} {$if defined(cpupowerpc) or defined(cpuarm) or defined(cpu64)} {$define empty_methods_handler} {$ifend} +{$else} + {$if defined(cpuarm) or defined(cpuarm64)} + {$define empty_methods_handler} + {$ifend} {$endif} {$ifdef empty_methods_handler} @@ -12537,23 +12562,6 @@ function SupportsRegister(b: TPSTypeRec): Boolean; end; end; -function AlwaysAsVariable(aType: TPSTypeRec): Boolean; -begin - case atype.BaseType of - btVariant: Result := true; - btSet: Result := atype.RealSize > PointerSize; - btRecord: Result := atype.RealSize > PointerSize; - btStaticArray: Result := atype.RealSize > PointerSize; - else - Result := false; - end; -end; - -function ParamAsVariable(const Modifier: tbtchar; aType: TPSTypeRec): Boolean; -begin - Result := (Modifier = '%') or (Modifier = '!') or AlwaysAsVariable(aType); -end; - procedure PutOnFPUStackExtended(ft: extended); asm // fstp tbyte ptr [ft]