/* GRT stack implementation for ia64. Copyright (C) 2002 - 2014 Tristan Gingold. GHDL is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. GHDL is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with GCC; see the file COPYING. If not, write to the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As a special exception, if other files instantiate generics from this unit, or you link this unit with other files to produce an executable, this unit does not by itself cause the resulting executable to be covered by the GNU General Public License. This exception does not however invalidate any other reasons why the executable file might be covered by the GNU Public License. */ .file "ia64.S" .pred.safe_across_calls p1-p5,p16-p63 .text .align 16 .proc grt_stack_loop grt_stack_loop: alloc r32 = ar.pfs, 0, 1, 1, 0 .body ;; 1: mov r33 = r4 br.call.sptk.many b0 = b1 ;; br 1b .endp frame_size = 480 .global grt_stack_switch# .proc grt_stack_switch# /* r32: struct stack_context *TO, r33: struct stack_context *FROM. */ // Registers to be saved: // ar.rsc, ar.bsp, ar.pfs, ar.lc, ar.rnat [5] // gp, r4-r7 (+ Nat) [6] // f2-f5, f16-f31 [20] // p1-p5, p16-p63 [1] ??? // b1-b5 [5] // f2-f5, f16-f31 [20*16] grt_stack_switch: .prologue 2, 2 .vframe r2 { alloc r31=ar.pfs, 2, 0, 0, 0 mov r14 = ar.rsc adds r12 = -frame_size, r12 .body ;; } // Save ar.rsc, ar.bsp, ar.pfs { st8 [r12] = r14 // sp + 0 <- ar.rsc mov r15 = ar.bsp adds r22 = (5*8), r12 ;; } { st8.spill [r22] = r1, 8 // sp + 40 <- r1 ;; st8.spill [r22] = r4, 8 // sp + 48 <- r4 adds r20 = 8, r12 ;; } st8 [r20] = r15, 8 // sp + 8 <- ar.bsp st8.spill [r22] = r5, 8 // sp + 56 <- r5 mov r15 = ar.lc ;; { st8 [r20] = r31, 8 // sp + 16 <- ar.pfs // Flush dirty registers to the backing store flushrs mov r14 = b0 ;; } { st8 [r20] = r15, 8 // sp + 24 <- ar.lc // Set the RSE in enforced lazy mode. mov ar.rsc = 0 ;; } { // Save sp. st8 [r33] = r12 mov r15 = ar.rnat mov r16 = b1 ;; } { st8.spill [r22] = r6, 8 // sp + 64 <- r6 st8 [r20] = r15, 64 // sp + 32 <- ar.rnat ;; } { st8.spill [r22] = r7, 16 // sp + 72 <- r7 st8 [r20] = r14, 8 // sp + 96 <- b0 mov r15 = b2 ;; } { mov r17 = ar.unat ;; st8 [r22] = r17, 24 // sp + 88 <- ar.unat mov r14 = b3 ;; } { st8 [r20] = r16, 16 // sp + 104 <- b1 st8 [r22] = r15, 16 // sp + 112 <- b2 mov r17 = b4 ;; } { st8 [r20] = r14, 16 // sp + 120 <- b3 st8 [r22] = r17, 16 // sp + 128 <- b4 mov r15 = b5 ;; } { // Read new sp. ld8 r21 = [r32] ;; st8 [r20] = r15, 24 // sp + 136 <- b5 mov r14 = pr ;; } ;; st8 [r22] = r14, 32 // sp + 144 <- pr stf.spill [r20] = f2, 32 // sp + 160 <- f2 ;; stf.spill [r22] = f3, 32 // sp + 176 <- f3 stf.spill [r20] = f4, 32 // sp + 192 <- f4 ;; stf.spill [r22] = f5, 32 // sp + 208 <- f5 stf.spill [r20] = f16, 32 // sp + 224 <- f16 ;; stf.spill [r22] = f17, 32 // sp + 240 <- f17 stf.spill [r20] = f18, 32 // sp + 256 <- f18 ;; stf.spill [r22] = f19, 32 // sp + 272 <- f19 stf.spill [r20] = f20, 32 // sp + 288 <- f20 ;; stf.spill [r22] = f21, 32 // sp + 304 <- f21 stf.spill [r20] = f22, 32 // sp + 320 <- f22 ;; stf.spill [r22] = f23, 32 // sp + 336 <- f23 stf.spill [r20] = f24, 32 // sp + 352 <- f24 ;; stf.spill [r22] = f25, 32 // sp + 368 <- f25 stf.spill [r20] = f26, 32 // sp + 384 <- f26 ;; stf.spill [r22] = f27, 32 // sp + 400 <- f27 stf.spill [r20] = f28, 32 // sp + 416 <- f28 ;; stf.spill [r22] = f29, 32 // sp + 432 <- f29 stf.spill [r20] = f30, 32 // sp + 448 <- f30 ;; { stf.spill [r22] = f31, 32 // sp + 464 <- f31 invala adds r20 = 8, r21 ;; } ld8 r14 = [r21], 88 // sp + 0 (ar.rsc) ld8 r16 = [r20], 8 // sp + 8 (ar.bsp) ;; ld8 r15 = [r21], -56 // sp + 88 (ar.unat) ;; ld8 r18 = [r20], 8 // sp + 16 (ar.pfs) mov ar.unat = r15 ld8 r17 = [r21], 8 // sp + 32 (ar.rnat) ;; ld8 r15 = [r20], 72 // sp + 24 (ar.lc) ld8.fill r1 = [r21], 8 // sp + 40 (r1) mov ar.bspstore = r16 ;; ld8.fill r4 = [r21], 8 // sp + 48 (r4) mov ar.pfs = r18 mov ar.rnat = r17 ;; mov ar.rsc = r14 mov ar.lc = r15 ld8 r17 = [r20], 8 // sp + 96 (b0) ;; { ld8.fill r5 = [r21], 8 // sp + 56 (r5) ld8 r14 = [r20], 8 // sp + 104 (b1) mov b0 = r17 ;; } { ld8.fill r6 = [r21], 8 // sp + 64 (r6) ld8 r15 = [r20], 8 // sp + 112 (b2) mov b1 = r14 ;; } ld8.fill r7 = [r21], 64 // sp + 72 (r7) ld8 r14 = [r20], 8 // sp + 120 (b3) mov b2 = r15 ;; ld8 r15 = [r20], 16 // sp + 128 (b4) ld8 r16 = [r21], 40 // sp + 136 (b5) mov b3 = r14 ;; { ld8 r14 = [r20], 16 // sp + 144 (pr) ;; ldf.fill f2 = [r20], 32 // sp + 160 (f2) mov b4 = r15 ;; } ldf.fill f3 = [r21], 32 // sp + 176 (f3) ldf.fill f4 = [r20], 32 // sp + 192 (f4) mov b5 = r16 ;; ldf.fill f5 = [r21], 32 // sp + 208 (f5) ldf.fill f16 = [r20], 32 // sp + 224 (f16) mov pr = r14, -1 ;; ldf.fill f17 = [r21], 32 // sp + 240 (f17) ldf.fill f18 = [r20], 32 // sp + 256 (f18) ;; ldf.fill f19 = [r21], 32 // sp + 272 (f19) ldf.fill f20 = [r20], 32 // sp + 288 (f20) ;; ldf.fill f21 = [r21], 32 // sp + 304 (f21) ldf.fill f22 = [r20], 32 // sp + 320 (f22) ;; ldf.fill f23 = [r21], 32 // sp + 336 (f23) ldf.fill f24 = [r20], 32 // sp + 352 (f24) ;; ldf.fill f25 = [r21], 32 // sp + 368 (f25) ldf.fill f26 = [r20], 32 // sp + 384 (f26) ;; ldf.fill f27 = [r21], 32 // sp + 400 (f27) ldf.fill f28 = [r20], 32 // sp + 416 (f28) ;; ldf.fill f29 = [r21], 32 // sp + 432 (f29) ldf.fill f30 = [r20], 32 // sp + 448 (f30) ;; ldf.fill f31 = [r21], 32 // sp + 464 (f31) mov r12 = r20 br.ret.sptk.many b0 ;; .endp grt_stack_switch# .align 16 // r32: func, r33: arg .global grt_stack_create# .proc grt_stack_create# grt_stack_create: .prologue 14, 34 .save ar.pfs, r35 alloc r35 = ar.pfs, 2, 3, 0, 0 .save rp, r34 // Compute backing store. movl r14 = stack_max_size ;; .body { ld4 r36 = [r14] // r14: bsp mov r34 = b0 br.call.sptk.many b0 = grt_stack_allocate# ;; } { ld8 r22 = [r32], 8 // read ip (-> b1) ;; ld8 r23 = [r32] // read r1 from func adds r21 = -(frame_size + 16) + 32, r8 ;; } { st8 [r21] = r0, -32 // sp + 32 (ar.rnat = 0) ;; st8 [r8] = r21 // Save cur_sp mov r18 = 0x0f // ar.rsc: LE, PL=3, Eager ;; } { st8 [r21] = r18, 40 // sp + 0 (ar.rsc) ;; st8 [r21] = r23, 64 // sp + 40 (r1 = func.r1) mov b0 = r34 ;; } { st8 [r21] = r22, -96 // sp + 104 (b1 = func.ip) movl r15 = grt_stack_loop ;; } sub r14 = r8, r36 // Backing store base ;; adds r14 = 16, r14 // Add sizeof (stack_context) adds r20 = 40, r21 ;; { st8 [r21] = r14, 88 // sp + 8 (ar.bsp) ;; st8 [r21] = r15, -80 // sp + 96 (b0 = grt_stack_loop) mov r16 = (0 << 7) | 1 // CFM: sol=0, sof=1 ;; } { st8 [r21] = r16, 8 // sp + 16 (ar.pfs) ;; st8 [r21] = r0, 24 // sp + 24 (ar.lc) mov ar.pfs = r35 ;; } { st8 [r20] = r0, 8 // sp + 32 (ar.rnat) st8 [r21] = r33 // sp + 48 (r4 = arg) br.ret.sptk.many b0 ;; } .endp grt_stack_create# .ident "GCC: (GNU) 4.0.2"