Eliot Miranda uploaded a new version of VMMaker to project VM Maker: http://source.squeak.org/VMMaker/VMMaker.oscog-eem.3205.mcz
==================== Summary ====================
Name: VMMaker.oscog-eem.3205 Author: eem Time: 3 July 2022, 10:54:24.223522 am UUID: 624993bc-bfc4-483f-b63b-a8b035905d5f Ancestors: VMMaker.oscog-eem.3204
ThreadedRiscV64FFIPlugin contributed by Ken Dickey; thankyou!!
=============== Diff against VMMaker.oscog-eem.3204 ===============
Item was added: + ThreadedFFICalloutStateForARM64 subclass: #ThreadedFFICalloutStateForRiscV64 + instanceVariableNames: '' + classVariableNames: '' + poolDictionaries: '' + category: 'VMMaker-Plugins-FFI'! + + !ThreadedFFICalloutStateForRiscV64 commentStamp: 'KenD 6/27/2022 10:21' prior: 0! + A ThreadedFFICalloutStateForRiscV64 leverages the ARM64 code + !
Item was added: + ----- Method: ThreadedFFICalloutStateForRiscV64>>initialize (in category 'initialize-release') ----- + initialize + super initialize. + integerRegisterIndex := floatRegisterIndex := 0. + integerRegisters := CArrayAccessor on: (Array new: ThreadedRiscV64FFIPlugin numIntRegArgs). + floatRegisters := CArrayAccessor on: (Array new: ThreadedRiscV64FFIPlugin numFloatRegArgs)!
Item was changed: ----- Method: ThreadedFFIPlugin class>>preambleCCode (in category 'translation') ----- preambleCCode "For a source of builtin defines grep for builtin_define in a gcc release config directory. See platforms/Cross/vm/sqCogStackAlignment.h for per-platform definitions for STACK_ALIGN_BYTES MUST_ALIGN_STACK, getsp, et al." ^' #include "sqAssert.h" /* for assert */ #define ThreadedFFIPlugin 1 /* to filter-out unwanted declarations from sqFFI.h */ #include "sqFFI.h" /* for logging and surface functions */ #include "sqCogStackAlignment.h" /* for STACK_ALIGN_BYTES and getsp() */
#ifdef _MSC_VER # define alloca _alloca #endif #if !!defined(setsp) && defined(__GNUC__) # if defined(_X86_) || defined(i386) || defined(__i386) || defined(__i386__) # define setsp(spval) asm volatile ("movl %0,%%esp" : : "m"(spval)) # elif defined(__amd64__) || defined(__x86_64__) || defined(__amd64) || defined(__x86_64) # define setsp(spval) asm volatile ("movq %0,%%rsp" : : "m"(spval)) # elif defined(__arm64__) || defined(__aarch64__) || defined(ARM64) /* https://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html#Extended-Asm * http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.den0024a/index.h... */ # define setsp(spval) asm volatile ("mov sp, %0" : : "r"(spval)) # elif defined(__arm__) # define setsp(spval) asm volatile ("ldr %%sp, %0" : : "m"(spval)) + # elif defined(__riscv64__) || defined(__rv64g__) || defined(__rv64gcv__) + # define setsp(spval) asm volatile ("addi sp, %0, 0 " : : "r"(spval) : ) + # define getsp() asm volatile ("addi a0, sp, 0 " ) # endif #endif #if !!defined(getsp) # define getsp() 0 #endif #if !!defined(setsp) # define setsp(ignored) 0 #endif
#if !!defined(STACK_ALIGN_BYTES) # define STACK_ALIGN_BYTES 0 #endif /* !!defined(STACK_ALIGN_BYTES) */
/* For ABI that require stack alignment greater than natural word size */ #define MUST_ALIGN_STACK (STACK_ALIGN_BYTES > sizeof(void*))
#if defined(_X86_) || defined(i386) || defined(__i386) || defined(__i386__) /* Both Mac OS X x86 and Win32 x86 return structs of a power of two in size * less than or equal to eight bytes in length in registers. Linux never does so. */ # if __linux__ # define WIN32_X86_STRUCT_RETURN 0 # else # define WIN32_X86_STRUCT_RETURN 1 # endif # if _WIN32 # define PLATFORM_API_USES_CALLEE_POPS_CONVENTION 1 # endif #elif defined(__amd64__) || defined(__x86_64__) || defined(__amd64) || defined(__x86_64) # if _WIN32 | _WIN64 # define PLATFORM_API_USES_CALLEE_POPS_CONVENTION 1 # endif #endif /* defined(_X86_) || defined(i386) || defined(__i386) || defined(__i386__) */
#if !!defined(ALLOCA_LIES_SO_SETSP_BEFORE_CALL) # if defined(__MINGW32__) && !!defined(__clang__) && (__GNUC__ >= 3) && (defined(_X86_) || defined(i386) || defined(__i386) || defined(__i386__)) /* * cygwin -mno-cygwin (MinGW) gcc 3.4.x''s alloca is a library routine that answers * %esp + xx, so the outgoing stack is offset by one or more word if uncorrected. * Grab the actual stack pointer to correct. */ # define ALLOCA_LIES_SO_SETSP_BEFORE_CALL 1 # else # define ALLOCA_LIES_SO_SETSP_BEFORE_CALL 0 # endif #endif /* !!defined(ALLOCA_LIES_SO_SETSP_BEFORE_CALL) */
#if !!defined(PLATFORM_API_USES_CALLEE_POPS_CONVENTION) # define PLATFORM_API_USES_CALLEE_POPS_CONVENTION 0 #endif
/* The dispatchOn:in:with:with: generates an unwanted call on error. Just squash it. */ #define error(foo) 0
/* sanitize */ #ifdef SQUEAK_BUILTIN_PLUGIN # define EXTERN #else # define EXTERN extern #endif '!
Item was added: + ThreadedARM64FFIPlugin subclass: #ThreadedRiscV64FFIPlugin + instanceVariableNames: '' + classVariableNames: '' + poolDictionaries: '' + category: 'VMMaker-Plugins-FFI'! + + !ThreadedRiscV64FFIPlugin commentStamp: 'KenD 6/27/2022 10:22' prior: 0! + A ThreadedRiscV64FFIPlugin is for the RV64g RISC-V ABI. [Leverage ARM64 code] + + Stack grows Down; 16 byte aligned + 32 integer and 32 double float registers + x0 - always Zero [Thread swap shadow uses slot for PC] + x1 - Return Address (RA) + x2 - Stack Pointer (SP) + x8 - Frame Pointer (FP) - aka S0 + Argument registers: 8 integer and 8 float (float args in float regs) + A0..A7 = x10..x17 ; FA0..FA7 = f10..f17 + In RV64 sizeof(long) = sizeof(long long) = 8 = sizeof(void *) + Args larger than (void *) pointer are passed by reference. + Conceptually, args as fields of C struct with pointer alignment; + more args than # arg regs passed on stack with SP pointing + to 1st arg not passed in a register. + Values returned in A0/A1 or FA0/FA1 + Larger return values are placed in space alloc'ed by caller, + with pointer passed as implicit 1st parameter. + S1..S11 Save: CallEE Saved Registers (floats: FS0..FS11) + T0..T6 Temp: CalleR Saved Registers (floats: FT0..FT11) + + Bytes C type Description + 1 char Character value/byte + 2 short Short integer + 4 int Integer + 8 long Long integer + 8 long long Long long integer + 8 void* Pointer + 4 float Single-precision float + 8 double Double-precision float + 16 long double Extended-precision float + See + https://riscv.org/technical/specifications/ + https://riscv.org/wp-content/uploads/2015/01/riscv-calling.pdf + !
Item was added: + ----- Method: ThreadedRiscV64FFIPlugin class>>calloutStateClass (in category 'translation') ----- + calloutStateClass + + ^ThreadedFFICalloutStateForRiscV64!
Item was added: + ----- Method: ThreadedRiscV64FFIPlugin class>>excludingPredefinedMacros (in category 'translation') ----- + excludingPredefinedMacros + + ^#('__ARM_ARCH_ISA_A64' '__aarch64__' '__arm64__' 'ARM64' '_M_ARM64')!
Item was added: + ----- Method: ThreadedRiscV64FFIPlugin class>>identifyingPredefinedMacros (in category 'translation') ----- + identifyingPredefinedMacros + + ^#('__riscv64__') !
Item was added: + ----- Method: ThreadedRiscV64FFIPlugin class>>initialize (in category 'class initialization') ----- + initialize + super initialize. + NumIntRegArgs := 8. + NumFloatRegArgs := 8!
Item was added: + ----- Method: ThreadedRiscV64FFIPlugin class>>moduleName (in category 'translation') ----- + moduleName + + ^'RiscV64FFIPlugin'!
Item was added: + ----- Method: ThreadedRiscV64FFIPlugin>>ffiCall:ArgArrayOrNil:NumArgs: (in category 'callout support') ----- + ffiCall: externalFunction ArgArrayOrNil: argArrayOrNil NumArgs: nArgs + "Generic callout. Does the actual work. If argArrayOrNil is nil it takes args from the stack + and the spec from the method. If argArrayOrNil is not nil takes args from argArrayOrNil + and the spec from the receiver." + | flags argTypeArray address argType oop argSpec argClass err theCalloutState calloutState requiredStackSize stackSize allocation result primNumArgs | + <inline: true> + <var: #theCalloutState type: #'CalloutState'> + <var: #calloutState type: #'CalloutState *'> + <var: #allocation type: #'char *'> + + primNumArgs := interpreterProxy methodArgumentCount. + (interpreterProxy is: externalFunction KindOfClass: interpreterProxy classExternalFunction) ifFalse: + [^self ffiFail: FFIErrorNotFunction]. + "Load and check the values in the externalFunction before we call out" + flags := interpreterProxy fetchInteger: ExternalFunctionFlagsIndex ofObject: externalFunction. + interpreterProxy failed ifTrue: + [^self ffiFail: FFIErrorBadArgs]. + + "This must come early for compatibility with the old FFIPlugin. Image-level code + may assume the function pointer is loaded eagerly. Thanks to Nicolas Cellier." + address := self ffiLoadCalloutAddress: externalFunction. + interpreterProxy failed ifTrue: + [^0 "error code already set by ffiLoadCalloutAddress:"]. + + argTypeArray := interpreterProxy fetchPointer: ExternalFunctionArgTypesIndex ofObject: externalFunction. + "must be array of arg types" + ((interpreterProxy isArray: argTypeArray) + and: [(interpreterProxy slotSizeOf: argTypeArray) = (nArgs + 1)]) ifFalse: + [^self ffiFail: FFIErrorBadArgs]. + "check if the calling convention is supported" + self cppIf: COGMTVM + ifTrue: + [(self ffiSupportsCallingConvention: (flags bitAnd: FFICallTypesMask)) ifFalse: + [^self ffiFail: FFIErrorCallType]] + ifFalse: "not masking causes threaded calls to fail, which is as they should if the plugin is not threaded." + [(self ffiSupportsCallingConvention: flags) ifFalse: + [^self ffiFail: FFIErrorCallType]]. + + requiredStackSize := self externalFunctionHasStackSizeSlot + ifTrue: [interpreterProxy + fetchInteger: ExternalFunctionStackSizeIndex + ofObject: externalFunction] + ifFalse: [-1]. + interpreterProxy failed ifTrue: + [^interpreterProxy primitiveFailFor: (argArrayOrNil isNil + ifTrue: [PrimErrBadMethod] + ifFalse: [PrimErrBadReceiver])]. + stackSize := requiredStackSize < 0 ifTrue: [DefaultMaxStackSize] ifFalse: [requiredStackSize]. + self cCode: [] inSmalltalk: [theCalloutState := self class calloutStateClass new]. + calloutState := self addressOf: theCalloutState. + self cCode: [self memset: calloutState _: 0 _: (self sizeof: #CalloutState)]. + calloutState callFlags: flags. + "Fetch return type and args" + argType := interpreterProxy fetchPointer: 0 ofObject: argTypeArray. + argSpec := interpreterProxy fetchPointer: 0 ofObject: argType. + argClass := interpreterProxy fetchPointer: 1 ofObject: argType. + (err := self ffiCheckReturn: argSpec With: argClass in: calloutState) ~= 0 ifTrue: + [^self ffiFail: err]. "cannot return" + "alloca the outgoing stack frame, leaving room for marshalling args, and including space for the return struct, if any. + Additional space reserved for saving register args like mandated by Win64 X64 or PPC ABI, will be managed by the call itself" + allocation := self alloca: stackSize + calloutState structReturnSize + self cStackAlignment. + self mustAlignStack ifTrue: + [allocation := self cCoerce: (allocation asUnsignedIntegerPtr bitClear: self cStackAlignment - 1) to: #'char *']. + calloutState + argVector: allocation; + currentArg: allocation; + limit: allocation + stackSize. + "This next bit overrides the ARM64 code to pass return struct pointer in A0" + (calloutState structReturnSize > 0 + and: [self nonRegisterStructReturnIsViaImplicitFirstArgument + and: [(self returnStructInRegisters: calloutState) not]]) ifTrue: + [err := self ffiPushPointer: calloutState limit in: calloutState. + err ~= 0 ifTrue: + [self cleanupCalloutState: calloutState. + self cppIf: COGMTVM ifTrue: + [err = PrimErrObjectMayMove negated ifTrue: + [^PrimErrObjectMayMove]]. "N.B. Do not fail if object may move because caller will GC and retry." + ^self ffiFail: err]]. + "Aside from the bit above, code identical with arn64" + 1 to: nArgs do: + [:i| + argType := interpreterProxy fetchPointer: i ofObject: argTypeArray. + argSpec := interpreterProxy fetchPointer: 0 ofObject: argType. + argClass := interpreterProxy fetchPointer: 1 ofObject: argType. + oop := argArrayOrNil isNil + ifTrue: [interpreterProxy stackValue: nArgs - i] + ifFalse: [interpreterProxy fetchPointer: i - 1 ofObject: argArrayOrNil]. + err := self ffiArgument: oop Spec: argSpec Class: argClass in: calloutState. + err ~= 0 ifTrue: + [self cleanupCalloutState: calloutState. + self cppIf: COGMTVM ifTrue: + [err = PrimErrObjectMayMove negated ifTrue: + [^PrimErrObjectMayMove]]. "N.B. Do not fail if object may move because caller will GC and retry." + ^self ffiFail: err]]. "coercion failed or out of stack space" + "Failures must be reported back from ffiArgument:Spec:Class:in:. + Should not fail from here on in." + self assert: interpreterProxy failed not. + self ffiLogCallout: externalFunction. + (requiredStackSize < 0 + and: [self externalFunctionHasStackSizeSlot]) ifTrue: + [stackSize := calloutState currentArg - calloutState argVector. + interpreterProxy storeInteger: ExternalFunctionStackSizeIndex ofObject: externalFunction withValue: stackSize]. + "Go out and call this guy" + result := self ffiCalloutTo: address SpecOnStack: argArrayOrNil notNil in: calloutState. + self cleanupCalloutState: calloutState. + "Can not safely use argumentCount (via e.g. methodReturnValue:) since it may have been changed by a callback." + interpreterProxy pop: primNumArgs + 1 thenPush: result. + ^result!
Item was added: + ----- Method: ThreadedRiscV64FFIPlugin>>nonRegisterStructReturnIsViaImplicitFirstArgument (in category 'marshalling') ----- + nonRegisterStructReturnIsViaImplicitFirstArgument + "Answer if a struct returned in memory is returned to the + referent of a pointer passed as an implciit first argument. + It almost always is. Subclasses can override if not." + ^true!
Item was added: + ----- Method: ThreadedRiscV64FFIPlugin>>numFloatRegArgs (in category 'accessing') ----- + numFloatRegArgs + + ^NumFloatRegArgs!
Item was added: + ----- Method: ThreadedRiscV64FFIPlugin>>numIntRegArgs (in category 'accessing') ----- + numIntRegArgs + + ^NumIntRegArgs!
Item was added: + ----- Method: ThreadedRiscV64FFIPlugin>>wordSize (in category 'simulation support') ----- + wordSize + + ^ 8 "RiscV64g"!
vm-dev@lists.squeakfoundation.org