summaryrefslogtreecommitdiff
path: root/translate/grt/config/ia64.S
blob: cd77d497ac212801158ad7f5b3184f84d1449864 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
	.file	"ia64.S"
	.pred.safe_across_calls p1-p5,p16-p63
	
	.text
	.align 16
	.proc grt_stack_loop
grt_stack_loop:
	alloc r32 = ar.pfs, 0, 1, 1, 0
	.body
	;;
1:	mov r33 = r4	
	br.call.sptk.many b0 = b1
	;;
	br 1b
	.endp
	
	.global grt_stack_switch#
	.proc grt_stack_switch#
	/* r32:	struct stack_context *TO, r33:  struct stack_context *FROM.  */
	//  Registers to be saved:
	//  ar.rsc, ar.bsp, ar.pfs, ar.lc, ar.rnat [5]
	//  gp, r4-r7 (+ Nat)                      [6]
	//  f2-f5, f16-f31                         [20]
	//  p1-p5, p16-p63                         [1] ???
	//  b1-b5                                  [5]
grt_stack_switch:
	.prologue 2, 2
	.vframe r2
	alloc r31=ar.pfs, 2, 0, 0, 0
	adds r12 = -160, r12
	.body
	;;
	// Save ar.rsc, ar.bsp, ar.pfs
	{
	mov r14 = ar.rsc
	mov r15 = ar.bsp
	adds r22 = (5*8), r12
	}
	;;
	{
	st8 [r12] = r14      		// sp + 0  <- ar.rsc
	st8.spill [r22] = r1, 8		// sp + 40 <- r1
	adds r20 = 8, r12
	}
	;;
	st8 [r20] = r15, 8		// sp + 8  <- ar.bsp
	st8.spill [r22] = r4, 8		// sp + 48 <- r4
	;;
	mov r15 = ar.lc
	st8 [r20] = r31, 8		// sp + 16 <- ar.pfs
	st8.spill [r22] = r5, 8		// sp + 56 <- r5
	;;
	mov r14 = b0
	st8 [r20] = r15, 8		// sp + 24 <- ar.lc
	//  Flush dirty registers to the backing store
	flushrs
	;;
	//  Set the RSE in enforced lazy mode.
	mov ar.rsc = 0
	;; 
	mov r15 = ar.rnat	
	st8.spill [r22] = r6, 8		// sp + 64 <- r6
	;;
	mov r16 = b1
	st8.spill [r22] = r7, 16	// sp + 72 <- r7
	st8 [r20] = r15, 64		// sp + 32 <- ar.rnat
	;;
	mov r15 = b2
	mov r17 = ar.unat
	st8 [r20] = r14, 8		// sp + 96 <- b0
	;;
	mov r14 = b3
	st8 [r22] = r17, 24		// sp + 88 <- ar.unat
	st8 [r20] = r16, 16		// sp + 104 <- b1
	;;
	st8 [r22] = r15, 16		// sp + 112 <- b2
	st8 [r20] = r14, 16		// sp + 120 <- b3
	mov r16 = b4
	;;
	st8 [r22] = r16, 16		// sp + 128 <- b4
	//  Read new sp.
	ld8 r21 = [r32]
	mov r15 = b5
	;; 
	mov r14 = pr
	st8 [r20] = r15			// sp + 136 <- b5
	;;
	st8 [r22] = r14			// sp + 144 <- pr
	adds r20 = 8, r21
	;;
	invala	
	//  Save sp.
	st8 [r33] = r12
	ld8 r14 = [r21], 88		// sp + 0 (ar.rsc)
	;; 
	ld8 r15 = [r21], -56		// sp + 88 (ar.unat)
	ld8 r16 = [r20], 8		// sp + 8 (ar.bsp)
	;;
	ld8 r17 = [r21], 8		// sp + 32 (ar.rnat)
	ld8 r18 = [r20], 8		// sp + 16 (ar.pfs)
	mov ar.unat = r15
	;;
	ld8 r15 = [r20], 72		// sp + 24 (ar.lc)
	ld8.fill r1 = [r21], 8		// sp + 40 (r1)
	mov ar.bspstore = r16
	;; 
	mov ar.rnat = r17
	mov ar.pfs = r18
	ld8.fill r4 = [r21], 8		// sp + 48 (r4)
	;;
	mov ar.rsc = r14
	mov ar.lc = r15
	ld8 r17 = [r20], 8		// sp + 96 (b0)
	;;
	mov b0 = r17
	ld8.fill r5 = [r21], 8		// sp + 56 (r5)
	ld8 r14 = [r20], 8		// sp + 104 (b1)
	;;
	mov b1 = r14
	ld8.fill r6 = [r21], 8		// sp + 64 (r6)
	ld8 r15 = [r20], 8		// sp + 112 (b2)
	;;
	mov b2 = r15
	ld8.fill r7 = [r21], 8		// sp + 72 (r7)
	ld8 r14 = [r20], 8		// sp + 120 (b3)
	;;
	mov b3 = r14
	ld8 r15 = [r20], 8		// sp + 128 (b4)
	;;
	mov b4 = r15
	ld8 r14 = [r20], 8		// sp + 136 (b5)
	;;
	mov b5 = r14
	ld8 r15 = [r20], 8		// sp + 144 (pr)
	mov r12 = r21
	;;
	mov pr = r15, -1
	br.ret.sptk.many b0
	;;
	.endp grt_stack_switch#
	
	.align 16
	// r32:	 func, r33: arg
	.global grt_stack_create#
	.proc grt_stack_create#
grt_stack_create:
	.prologue 14, 34
	.save ar.pfs, r35
	alloc r35 = ar.pfs, 2, 4, 0, 0
	.save rp, r34
	mov r34 = b0
	;; 
	.body
	br.call.sptk.many b0 = grt_stack_allocate#
	;;
	//  Compute backing store.
	movl r14=stack_max_size
	ld8 r22 = [r32], 8	// read ip (-> b1)
	adds r20 = -(160 + 16), r8
	adds r21 = -(160 + 16) + 32, r8
	;;
	mov r18 = 0x0f		// ar.rsc: LE, PL=3, Eager
	ld4 r14 = [r14]		// r16: bsp
	st8 [r21] = r0, 8	// sp + 32 (ar.rnat = 0)
	ld8 r23 = [r32]		// read r1 from func
	st8 [r8] = r20		// Save cur_sp
	;;
	st8 [r20] = r18, 8	// sp + 0 (ar.rsc)
	st8 [r21] = r23, 64	// sp + 40 (r1 = func.r1)
	sub r14 = r8, r14	// Backing store base
	;;
	adds r14 = 16, r14	// Add sizeof (stack_context)
	st8 [r21] = r22, -8	// sp + 104 (b1 = func.ip)
	;; 
	movl r15 = grt_stack_loop
	mov r16 = (0 << 7) | 1	// CFM:	sol=0, sof=1
	st8 [r20] = r14, 8	// sp + 8 (ar.bsp)
	;; 
	st8 [r21] = r15, -48	// sp + 96 (b0 = grt_stack_loop)
	st8 [r20] = r16, 8	// sp + 16 (ar.pfs)
	;;
	st8 [r20] = r0, 8	// sp + 24 (ar.lc)
	st8 [r21] = r33		// sp + 48 (r4 = arg)
	;;
	st8 [r20] = r0, 8	// sp + 32 (ar.rnat)
	;; 
	
	mov ar.pfs = r35
	mov b0 = r34
	br.ret.sptk.many b0
	;;
	.endp grt_stack_create#
	.ident	"GCC: (GNU) 4.0.2"