//===----------------------Hexagon builtin routine ------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // Functions that implement common sequences in function prologues and epilogues // used to save code size .macro FUNCTION_BEGIN name .text .globl \name .type \name, @function .falign \name: .endm .macro FUNCTION_END name .size \name, . - \name .endm .macro FALLTHROUGH_TAIL_CALL name0 name1 .size \name0, . - \name0 .globl \name1 .type \name1, @function .falign \name1: .endm // Save r25:24 at fp+#-8 and r27:26 at fp+#-16. // The compiler knows that the __save_* functions clobber LR. No other // registers should be used without informing the compiler. // Since we can only issue one store per packet, we don't hurt performance by // simply jumping to the right point in this sequence of stores. FUNCTION_BEGIN __save_r24_through_r27 memd(fp+#-16) = r27:26 FALLTHROUGH_TAIL_CALL __save_r24_through_r27 __save_r24_through_r25 { memd(fp+#-8) = r25:24 jumpr lr } FUNCTION_END __save_r24_through_r25 // For each of the *_before_tailcall functions, jumpr lr is executed in parallel // with deallocframe. That way, the return gets the old value of lr, which is // where these functions need to return, and at the same time, lr gets the value // it needs going into the tail call. FUNCTION_BEGIN __restore_r24_through_r27_and_deallocframe_before_tailcall r27:26 = memd(fp+#-16) FALLTHROUGH_TAIL_CALL __restore_r24_through_r27_and_deallocframe_before_tailcall __restore_r24_through_r25_and_deallocframe_before_tailcall { r25:24 = memd(fp+#-8) deallocframe jumpr lr } FUNCTION_END __restore_r24_through_r25_and_deallocframe_before_tailcall // Here we use the extra load bandwidth to restore LR early, allowing the return // to occur in parallel with the deallocframe. FUNCTION_BEGIN __restore_r24_through_r27_and_deallocframe { lr = memw(fp+#4) r27:26 = memd(fp+#-16) } { r25:24 = memd(fp+#-8) deallocframe jumpr lr } FUNCTION_END __restore_r24_through_r27_and_deallocframe // Here the load bandwidth is maximized. FUNCTION_BEGIN __restore_r24_through_r25_and_deallocframe { r25:24 = memd(fp+#-8) deallocframe } jumpr lr FUNCTION_END __restore_r24_through_r25_and_deallocframe |