diff options
author | gingold | 2005-11-07 23:18:35 +0000 |
---|---|---|
committer | gingold | 2005-11-07 23:18:35 +0000 |
commit | 004bd818080a8090ea61bfb9cd656b01fe4541e0 (patch) | |
tree | a09472ff8de767ccd7f84d64ffc3c3fc4179bb75 /translate/grt/config/ia64.S | |
parent | d5888aa28f654fa58ec9f3914932885e36af3d5c (diff) | |
download | ghdl-004bd818080a8090ea61bfb9cd656b01fe4541e0.tar.gz ghdl-004bd818080a8090ea61bfb9cd656b01fe4541e0.tar.bz2 ghdl-004bd818080a8090ea61bfb9cd656b01fe4541e0.zip |
handle universal real div integer evaluation,
more optimizations added,
multi-thread ready grt,
bug fixes
Diffstat (limited to 'translate/grt/config/ia64.S')
-rw-r--r-- | translate/grt/config/ia64.S | 245 |
1 files changed, 179 insertions, 66 deletions
diff --git a/translate/grt/config/ia64.S b/translate/grt/config/ia64.S index cd77d49..d7fb2d1 100644 --- a/translate/grt/config/ia64.S +++ b/translate/grt/config/ia64.S @@ -13,6 +13,8 @@ grt_stack_loop: ;; br 1b .endp + + frame_size = 480 .global grt_stack_switch# .proc grt_stack_switch# @@ -23,118 +25,209 @@ grt_stack_loop: // f2-f5, f16-f31 [20] // p1-p5, p16-p63 [1] ??? // b1-b5 [5] + // f2-f5, f16-f31 [20*16] grt_stack_switch: .prologue 2, 2 .vframe r2 + { alloc r31=ar.pfs, 2, 0, 0, 0 - adds r12 = -160, r12 + mov r14 = ar.rsc + adds r12 = -(frame_size + 16), r12 .body ;; + } // Save ar.rsc, ar.bsp, ar.pfs { - mov r14 = ar.rsc + st8 [r12] = r14 // sp + 0 <- ar.rsc mov r15 = ar.bsp adds r22 = (5*8), r12 - } ;; + } { - st8 [r12] = r14 // sp + 0 <- ar.rsc st8.spill [r22] = r1, 8 // sp + 40 <- r1 + ;; + st8.spill [r22] = r4, 8 // sp + 48 <- r4 adds r20 = 8, r12 - } ;; + } st8 [r20] = r15, 8 // sp + 8 <- ar.bsp - st8.spill [r22] = r4, 8 // sp + 48 <- r4 - ;; - mov r15 = ar.lc - st8 [r20] = r31, 8 // sp + 16 <- ar.pfs st8.spill [r22] = r5, 8 // sp + 56 <- r5 + mov r15 = ar.lc ;; - mov r14 = b0 - st8 [r20] = r15, 8 // sp + 24 <- ar.lc + { + st8 [r20] = r31, 8 // sp + 16 <- ar.pfs // Flush dirty registers to the backing store flushrs + mov r14 = b0 ;; + } + { + st8 [r20] = r15, 8 // sp + 24 <- ar.lc // Set the RSE in enforced lazy mode. mov ar.rsc = 0 - ;; - mov r15 = ar.rnat - st8.spill [r22] = r6, 8 // sp + 64 <- r6 ;; + } + { + // Save sp. + st8 [r33] = r12 + mov r15 = ar.rnat mov r16 = b1 - st8.spill [r22] = r7, 16 // sp + 72 <- r7 + ;; + } + { + st8.spill [r22] = r6, 8 // sp + 64 <- r6 st8 [r20] = r15, 64 // sp + 32 <- ar.rnat ;; + } + { + st8.spill [r22] = r7, 16 // sp + 72 <- r7 + st8 [r20] = r14, 8 // sp + 96 <- b0 mov r15 = b2 + ;; + } + { mov r17 = ar.unat - st8 [r20] = r14, 8 // sp + 96 <- b0 ;; - mov r14 = b3 st8 [r22] = r17, 24 // sp + 88 <- ar.unat + mov r14 = b3 + ;; + } + { st8 [r20] = r16, 16 // sp + 104 <- b1 - ;; st8 [r22] = r15, 16 // sp + 112 <- b2 - st8 [r20] = r14, 16 // sp + 120 <- b3 - mov r16 = b4 + mov r17 = b4 ;; - st8 [r22] = r16, 16 // sp + 128 <- b4 + } + { + st8 [r20] = r14, 16 // sp + 120 <- b3 + st8 [r22] = r17, 16 // sp + 128 <- b4 + mov r15 = b5 + ;; + } + { // Read new sp. ld8 r21 = [r32] - mov r15 = b5 ;; + st8 [r20] = r15, 24 // sp + 136 <- b5 mov r14 = pr - st8 [r20] = r15 // sp + 136 <- b5 + ;; + } ;; - st8 [r22] = r14 // sp + 144 <- pr - adds r20 = 8, r21 + st8 [r22] = r14, 32 // sp + 144 <- pr + stf.spill [r20] = f2, 32 // sp + 160 <- f2 + ;; + stf.spill [r22] = f3, 32 // sp + 176 <- f3 + stf.spill [r20] = f4, 32 // sp + 192 <- f4 + ;; + stf.spill [r22] = f5, 32 // sp + 208 <- f5 + stf.spill [r20] = f16, 32 // sp + 224 <- f16 + ;; + stf.spill [r22] = f17, 32 // sp + 240 <- f17 + stf.spill [r20] = f18, 32 // sp + 256 <- f18 ;; + stf.spill [r22] = f19, 32 // sp + 272 <- f19 + stf.spill [r20] = f20, 32 // sp + 288 <- f20 + ;; + stf.spill [r22] = f21, 32 // sp + 304 <- f21 + stf.spill [r20] = f22, 32 // sp + 320 <- f22 + ;; + stf.spill [r22] = f23, 32 // sp + 336 <- f23 + stf.spill [r20] = f24, 32 // sp + 352 <- f24 + ;; + stf.spill [r22] = f25, 32 // sp + 368 <- f25 + stf.spill [r20] = f26, 32 // sp + 384 <- f26 + ;; + stf.spill [r22] = f27, 32 // sp + 400 <- f27 + stf.spill [r20] = f28, 32 // sp + 416 <- f28 + ;; + stf.spill [r22] = f29, 32 // sp + 432 <- f29 + stf.spill [r20] = f30, 32 // sp + 448 <- f30 + ;; + { + stf.spill [r22] = f31, 32 // sp + 464 <- f31 invala - // Save sp. - st8 [r33] = r12 + adds r20 = 8, r21 + ;; + } ld8 r14 = [r21], 88 // sp + 0 (ar.rsc) + ld8 r16 = [r20], 8 // sp + 8 (ar.bsp) ;; ld8 r15 = [r21], -56 // sp + 88 (ar.unat) - ld8 r16 = [r20], 8 // sp + 8 (ar.bsp) - ;; - ld8 r17 = [r21], 8 // sp + 32 (ar.rnat) + ;; ld8 r18 = [r20], 8 // sp + 16 (ar.pfs) mov ar.unat = r15 + ld8 r17 = [r21], 8 // sp + 32 (ar.rnat) ;; ld8 r15 = [r20], 72 // sp + 24 (ar.lc) ld8.fill r1 = [r21], 8 // sp + 40 (r1) mov ar.bspstore = r16 ;; - mov ar.rnat = r17 - mov ar.pfs = r18 ld8.fill r4 = [r21], 8 // sp + 48 (r4) + mov ar.pfs = r18 + mov ar.rnat = r17 ;; mov ar.rsc = r14 mov ar.lc = r15 ld8 r17 = [r20], 8 // sp + 96 (b0) ;; - mov b0 = r17 + { ld8.fill r5 = [r21], 8 // sp + 56 (r5) ld8 r14 = [r20], 8 // sp + 104 (b1) + mov b0 = r17 ;; - mov b1 = r14 + } + { ld8.fill r6 = [r21], 8 // sp + 64 (r6) ld8 r15 = [r20], 8 // sp + 112 (b2) + mov b1 = r14 ;; - mov b2 = r15 - ld8.fill r7 = [r21], 8 // sp + 72 (r7) + } + ld8.fill r7 = [r21], 64 // sp + 72 (r7) ld8 r14 = [r20], 8 // sp + 120 (b3) + mov b2 = r15 ;; + ld8 r15 = [r20], 16 // sp + 128 (b4) + ld8 r16 = [r21], 40 // sp + 136 (b5) mov b3 = r14 - ld8 r15 = [r20], 8 // sp + 128 (b4) ;; + { + ld8 r14 = [r20], 16 // sp + 144 (pr) + ;; + ldf.fill f2 = [r20], 32 // sp + 160 (f2) mov b4 = r15 - ld8 r14 = [r20], 8 // sp + 136 (b5) ;; - mov b5 = r14 - ld8 r15 = [r20], 8 // sp + 144 (pr) - mov r12 = r21 + } + ldf.fill f3 = [r21], 32 // sp + 176 (f3) + ldf.fill f4 = [r20], 32 // sp + 192 (f4) + mov b5 = r16 + ;; + ldf.fill f5 = [r21], 32 // sp + 208 (f5) + ldf.fill f16 = [r20], 32 // sp + 224 (f16) + mov pr = r14, -1 + ;; + ldf.fill f17 = [r21], 32 // sp + 240 (f17) + ldf.fill f18 = [r20], 32 // sp + 256 (f18) ;; - mov pr = r15, -1 + ldf.fill f19 = [r21], 32 // sp + 272 (f19) + ldf.fill f20 = [r20], 32 // sp + 288 (f20) + ;; + ldf.fill f21 = [r21], 32 // sp + 304 (f21) + ldf.fill f22 = [r20], 32 // sp + 320 (f22) + ;; + ldf.fill f23 = [r21], 32 // sp + 336 (f23) + ldf.fill f24 = [r20], 32 // sp + 352 (f24) + ;; + ldf.fill f25 = [r21], 32 // sp + 368 (f25) + ldf.fill f26 = [r20], 32 // sp + 384 (f26) + ;; + ldf.fill f27 = [r21], 32 // sp + 400 (f27) + ldf.fill f28 = [r20], 32 // sp + 416 (f28) + ;; + ldf.fill f29 = [r21], 32 // sp + 432 (f29) + ldf.fill f30 = [r20], 32 // sp + 448 (f30) + ;; + ldf.fill f31 = [r21], 32 // sp + 464 (f31) + adds r12 = 16, r20 br.ret.sptk.many b0 ;; .endp grt_stack_switch# @@ -146,48 +239,68 @@ grt_stack_switch: grt_stack_create: .prologue 14, 34 .save ar.pfs, r35 - alloc r35 = ar.pfs, 2, 4, 0, 0 + alloc r35 = ar.pfs, 2, 3, 0, 0 .save rp, r34 - mov r34 = b0 + // Compute backing store. + movl r14 = stack_max_size ;; .body + { + ld4 r36 = [r14] // r14: bsp + mov r34 = b0 br.call.sptk.many b0 = grt_stack_allocate# ;; - // Compute backing store. - movl r14=stack_max_size + } + { ld8 r22 = [r32], 8 // read ip (-> b1) - adds r20 = -(160 + 16), r8 - adds r21 = -(160 + 16) + 32, r8 ;; - mov r18 = 0x0f // ar.rsc: LE, PL=3, Eager - ld4 r14 = [r14] // r16: bsp - st8 [r21] = r0, 8 // sp + 32 (ar.rnat = 0) ld8 r23 = [r32] // read r1 from func - st8 [r8] = r20 // Save cur_sp + adds r21 = -(frame_size + 16) + 32, r8 + ;; + } + { + st8 [r21] = r0, -32 // sp + 32 (ar.rnat = 0) + ;; + st8 [r8] = r21 // Save cur_sp + mov r18 = 0x0f // ar.rsc: LE, PL=3, Eager + ;; + } + { + st8 [r21] = r18, 40 // sp + 0 (ar.rsc) ;; - st8 [r20] = r18, 8 // sp + 0 (ar.rsc) st8 [r21] = r23, 64 // sp + 40 (r1 = func.r1) - sub r14 = r8, r14 // Backing store base + mov b0 = r34 ;; - adds r14 = 16, r14 // Add sizeof (stack_context) - st8 [r21] = r22, -8 // sp + 104 (b1 = func.ip) - ;; + } + { + st8 [r21] = r22, -96 // sp + 104 (b1 = func.ip) movl r15 = grt_stack_loop - mov r16 = (0 << 7) | 1 // CFM: sol=0, sof=1 - st8 [r20] = r14, 8 // sp + 8 (ar.bsp) ;; - st8 [r21] = r15, -48 // sp + 96 (b0 = grt_stack_loop) - st8 [r20] = r16, 8 // sp + 16 (ar.pfs) + } + sub r14 = r8, r36 // Backing store base ;; - st8 [r20] = r0, 8 // sp + 24 (ar.lc) - st8 [r21] = r33 // sp + 48 (r4 = arg) + adds r14 = 16, r14 // Add sizeof (stack_context) + adds r20 = 40, r21 ;; - st8 [r20] = r0, 8 // sp + 32 (ar.rnat) + { + st8 [r21] = r14, 88 // sp + 8 (ar.bsp) ;; - + st8 [r21] = r15, -80 // sp + 96 (b0 = grt_stack_loop) + mov r16 = (0 << 7) | 1 // CFM: sol=0, sof=1 + ;; + } + { + st8 [r21] = r16, 8 // sp + 16 (ar.pfs) + ;; + st8 [r21] = r0, 24 // sp + 24 (ar.lc) mov ar.pfs = r35 - mov b0 = r34 + ;; + } + { + st8 [r20] = r0, 8 // sp + 32 (ar.rnat) + st8 [r21] = r33 // sp + 48 (r4 = arg) br.ret.sptk.many b0 ;; + } .endp grt_stack_create# .ident "GCC: (GNU) 4.0.2" |