| @@ -1,2056 +1,2060 @@ | | | @@ -1,2056 +1,2060 @@ |
1 | /* $NetBSD: sljitNativeARM_64.c,v 1.4 2019/01/20 23:14:16 alnsn Exp $ */ | | 1 | /* $NetBSD: sljitNativeARM_64.c,v 1.4.30.1 2024/04/18 15:21:55 martin Exp $ */ |
2 | | | 2 | |
3 | /* | | 3 | /* |
4 | * Stack-less Just-In-Time compiler | | 4 | * Stack-less Just-In-Time compiler |
5 | * | | 5 | * |
6 | * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. | | 6 | * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. |
7 | * | | 7 | * |
8 | * Redistribution and use in source and binary forms, with or without modification, are | | 8 | * Redistribution and use in source and binary forms, with or without modification, are |
9 | * permitted provided that the following conditions are met: | | 9 | * permitted provided that the following conditions are met: |
10 | * | | 10 | * |
11 | * 1. Redistributions of source code must retain the above copyright notice, this list of | | 11 | * 1. Redistributions of source code must retain the above copyright notice, this list of |
12 | * conditions and the following disclaimer. | | 12 | * conditions and the following disclaimer. |
13 | * | | 13 | * |
14 | * 2. Redistributions in binary form must reproduce the above copyright notice, this list | | 14 | * 2. Redistributions in binary form must reproduce the above copyright notice, this list |
15 | * of conditions and the following disclaimer in the documentation and/or other materials | | 15 | * of conditions and the following disclaimer in the documentation and/or other materials |
16 | * provided with the distribution. | | 16 | * provided with the distribution. |
17 | * | | 17 | * |
18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY | | 18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY |
19 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES | | 19 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
20 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT | | 20 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT |
21 | * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, | | 21 | * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, |
22 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED | | 22 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED |
23 | * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR | | 23 | * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR |
24 | * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | | 24 | * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
25 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN | | 25 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN |
26 | * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | | 26 | * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
27 | */ | | 27 | */ |
28 | | | 28 | |
29 | SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void) | | 29 | SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void) |
30 | { | | 30 | { |
31 | return "ARM-64" SLJIT_CPUINFO; | | 31 | return "ARM-64" SLJIT_CPUINFO; |
32 | } | | 32 | } |
33 | | | 33 | |
34 | /* Length of an instruction word */ | | 34 | /* Length of an instruction word */ |
35 | typedef sljit_u32 sljit_ins; | | 35 | typedef sljit_u32 sljit_ins; |
36 | | | 36 | |
37 | #define TMP_ZERO (0) | | 37 | #define TMP_ZERO (0) |
38 | | | 38 | |
39 | #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) | | 39 | #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) |
40 | #define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) | | 40 | #define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) |
41 | #define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4) | | 41 | #define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4) |
42 | #define TMP_LR (SLJIT_NUMBER_OF_REGISTERS + 5) | | 42 | #define TMP_LR (SLJIT_NUMBER_OF_REGISTERS + 5) |
43 | #define TMP_SP (SLJIT_NUMBER_OF_REGISTERS + 6) | | 43 | #define TMP_SP (SLJIT_NUMBER_OF_REGISTERS + 6) |
44 | | | 44 | |
45 | #define TMP_FREG1 (0) | | 45 | #define TMP_FREG1 (0) |
46 | #define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) | | 46 | #define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) |
47 | | | 47 | |
48 | static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 8] = { | | 48 | static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 8] = { |
49 | 31, 0, 1, 2, 3, 4, 5, 6, 7, 12, 13, 14, 15, 16, 17, 8, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 29, 9, 10, 11, 30, 31 | | 49 | 31, 0, 1, 2, 3, 4, 5, 6, 7, 12, 13, 14, 15, 16, 17, 8, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 29, 9, 10, 11, 30, 31 |
50 | }; | | 50 | }; |
51 | | | 51 | |
52 | #define W_OP (1 << 31) | | 52 | #define W_OP (1 << 31) |
53 | #define RD(rd) (reg_map[rd]) | | 53 | #define RD(rd) (reg_map[rd]) |
54 | #define RT(rt) (reg_map[rt]) | | 54 | #define RT(rt) (reg_map[rt]) |
55 | #define RN(rn) (reg_map[rn] << 5) | | 55 | #define RN(rn) (reg_map[rn] << 5) |
56 | #define RT2(rt2) (reg_map[rt2] << 10) | | 56 | #define RT2(rt2) (reg_map[rt2] << 10) |
57 | #define RM(rm) (reg_map[rm] << 16) | | 57 | #define RM(rm) (reg_map[rm] << 16) |
58 | #define VD(vd) (vd) | | 58 | #define VD(vd) (vd) |
59 | #define VT(vt) (vt) | | 59 | #define VT(vt) (vt) |
60 | #define VN(vn) ((vn) << 5) | | 60 | #define VN(vn) ((vn) << 5) |
61 | #define VM(vm) ((vm) << 16) | | 61 | #define VM(vm) ((vm) << 16) |
62 | | | 62 | |
63 | /* --------------------------------------------------------------------- */ | | 63 | /* --------------------------------------------------------------------- */ |
64 | /* Instrucion forms */ | | 64 | /* Instrucion forms */ |
65 | /* --------------------------------------------------------------------- */ | | 65 | /* --------------------------------------------------------------------- */ |
66 | | | 66 | |
67 | #define ADC 0x9a000000 | | 67 | #define ADC 0x9a000000 |
68 | #define ADD 0x8b000000 | | 68 | #define ADD 0x8b000000 |
69 | #define ADDI 0x91000000 | | 69 | #define ADDI 0x91000000 |
70 | #define AND 0x8a000000 | | 70 | #define AND 0x8a000000 |
71 | #define ANDI 0x92000000 | | 71 | #define ANDI 0x92000000 |
72 | #define ASRV 0x9ac02800 | | 72 | #define ASRV 0x9ac02800 |
73 | #define B 0x14000000 | | 73 | #define B 0x14000000 |
74 | #define B_CC 0x54000000 | | 74 | #define B_CC 0x54000000 |
75 | #define BL 0x94000000 | | 75 | #define BL 0x94000000 |
76 | #define BLR 0xd63f0000 | | 76 | #define BLR 0xd63f0000 |
77 | #define BR 0xd61f0000 | | 77 | #define BR 0xd61f0000 |
78 | #define BRK 0xd4200000 | | 78 | #define BRK 0xd4200000 |
79 | #define CBZ 0xb4000000 | | 79 | #define CBZ 0xb4000000 |
80 | #define CLZ 0xdac01000 | | 80 | #define CLZ 0xdac01000 |
81 | #define CSINC 0x9a800400 | | 81 | #define CSINC 0x9a800400 |
82 | #define EOR 0xca000000 | | 82 | #define EOR 0xca000000 |
83 | #define EORI 0xd2000000 | | 83 | #define EORI 0xd2000000 |
84 | #define FABS 0x1e60c000 | | 84 | #define FABS 0x1e60c000 |
85 | #define FADD 0x1e602800 | | 85 | #define FADD 0x1e602800 |
86 | #define FCMP 0x1e602000 | | 86 | #define FCMP 0x1e602000 |
87 | #define FCVT 0x1e224000 | | 87 | #define FCVT 0x1e224000 |
88 | #define FCVTZS 0x9e780000 | | 88 | #define FCVTZS 0x9e780000 |
89 | #define FDIV 0x1e601800 | | 89 | #define FDIV 0x1e601800 |
90 | #define FMOV 0x1e604000 | | 90 | #define FMOV 0x1e604000 |
91 | #define FMUL 0x1e600800 | | 91 | #define FMUL 0x1e600800 |
92 | #define FNEG 0x1e614000 | | 92 | #define FNEG 0x1e614000 |
93 | #define FSUB 0x1e603800 | | 93 | #define FSUB 0x1e603800 |
94 | #define LDRI 0xf9400000 | | 94 | #define LDRI 0xf9400000 |
95 | #define LDP 0xa9400000 | | 95 | #define LDP 0xa9400000 |
96 | #define LDP_PST 0xa8c00000 | | 96 | #define LDP_PST 0xa8c00000 |
97 | #define LSLV 0x9ac02000 | | 97 | #define LSLV 0x9ac02000 |
98 | #define LSRV 0x9ac02400 | | 98 | #define LSRV 0x9ac02400 |
99 | #define MADD 0x9b000000 | | 99 | #define MADD 0x9b000000 |
100 | #define MOVK 0xf2800000 | | 100 | #define MOVK 0xf2800000 |
101 | #define MOVN 0x92800000 | | 101 | #define MOVN 0x92800000 |
102 | #define MOVZ 0xd2800000 | | 102 | #define MOVZ 0xd2800000 |
103 | #define NOP 0xd503201f | | 103 | #define NOP 0xd503201f |
104 | #define ORN 0xaa200000 | | 104 | #define ORN 0xaa200000 |
105 | #define ORR 0xaa000000 | | 105 | #define ORR 0xaa000000 |
106 | #define ORRI 0xb2000000 | | 106 | #define ORRI 0xb2000000 |
107 | #define RET 0xd65f0000 | | 107 | #define RET 0xd65f0000 |
108 | #define SBC 0xda000000 | | 108 | #define SBC 0xda000000 |
109 | #define SBFM 0x93000000 | | 109 | #define SBFM 0x93000000 |
110 | #define SCVTF 0x9e620000 | | 110 | #define SCVTF 0x9e620000 |
111 | #define SDIV 0x9ac00c00 | | 111 | #define SDIV 0x9ac00c00 |
112 | #define SMADDL 0x9b200000 | | 112 | #define SMADDL 0x9b200000 |
113 | #define SMULH 0x9b403c00 | | 113 | #define SMULH 0x9b403c00 |
114 | #define STP 0xa9000000 | | 114 | #define STP 0xa9000000 |
115 | #define STP_PRE 0xa9800000 | | 115 | #define STP_PRE 0xa9800000 |
116 | #define STRI 0xf9000000 | | 116 | #define STRI 0xf9000000 |
117 | #define STR_FI 0x3d000000 | | 117 | #define STR_FI 0x3d000000 |
118 | #define STR_FR 0x3c206800 | | 118 | #define STR_FR 0x3c206800 |
119 | #define STUR_FI 0x3c000000 | | 119 | #define STUR_FI 0x3c000000 |
120 | #define SUB 0xcb000000 | | 120 | #define SUB 0xcb000000 |
121 | #define SUBI 0xd1000000 | | 121 | #define SUBI 0xd1000000 |
122 | #define SUBS 0xeb000000 | | 122 | #define SUBS 0xeb000000 |
123 | #define UBFM 0xd3000000 | | 123 | #define UBFM 0xd3000000 |
124 | #define UDIV 0x9ac00800 | | 124 | #define UDIV 0x9ac00800 |
125 | #define UMULH 0x9bc03c00 | | 125 | #define UMULH 0x9bc03c00 |
126 | | | 126 | |
127 | /* dest_reg is the absolute name of the register | | 127 | /* dest_reg is the absolute name of the register |
128 | Useful for reordering instructions in the delay slot. */ | | 128 | Useful for reordering instructions in the delay slot. */ |
129 | static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins) | | 129 | static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins) |
130 | { | | 130 | { |
131 | sljit_ins *ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins)); | | 131 | sljit_ins *ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins)); |
132 | FAIL_IF(!ptr); | | 132 | FAIL_IF(!ptr); |
133 | *ptr = ins; | | 133 | *ptr = ins; |
134 | compiler->size++; | | 134 | compiler->size++; |
135 | return SLJIT_SUCCESS; | | 135 | return SLJIT_SUCCESS; |
136 | } | | 136 | } |
137 | | | 137 | |
138 | static SLJIT_INLINE sljit_s32 emit_imm64_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_uw imm) | | 138 | static SLJIT_INLINE sljit_s32 emit_imm64_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_uw imm) |
139 | { | | 139 | { |
140 | FAIL_IF(push_inst(compiler, MOVZ | RD(dst) | ((imm & 0xffff) << 5))); | | 140 | FAIL_IF(push_inst(compiler, MOVZ | RD(dst) | ((imm & 0xffff) << 5))); |
141 | FAIL_IF(push_inst(compiler, MOVK | RD(dst) | (((imm >> 16) & 0xffff) << 5) | (1 << 21))); | | 141 | FAIL_IF(push_inst(compiler, MOVK | RD(dst) | (((imm >> 16) & 0xffff) << 5) | (1 << 21))); |
142 | FAIL_IF(push_inst(compiler, MOVK | RD(dst) | (((imm >> 32) & 0xffff) << 5) | (2 << 21))); | | 142 | FAIL_IF(push_inst(compiler, MOVK | RD(dst) | (((imm >> 32) & 0xffff) << 5) | (2 << 21))); |
143 | return push_inst(compiler, MOVK | RD(dst) | ((imm >> 48) << 5) | (3 << 21)); | | 143 | return push_inst(compiler, MOVK | RD(dst) | ((imm >> 48) << 5) | (3 << 21)); |
144 | } | | 144 | } |
145 | | | 145 | |
146 | static SLJIT_INLINE void modify_imm64_const(sljit_ins* inst, sljit_uw new_imm) | | 146 | static SLJIT_INLINE void modify_imm64_const(sljit_ins* inst, sljit_uw new_imm) |
147 | { | | 147 | { |
148 | sljit_s32 dst = inst[0] & 0x1f; | | 148 | sljit_s32 dst = inst[0] & 0x1f; |
149 | SLJIT_ASSERT((inst[0] & 0xffe00000) == MOVZ && (inst[1] & 0xffe00000) == (MOVK | (1 << 21))); | | 149 | SLJIT_ASSERT((inst[0] & 0xffe00000) == MOVZ && (inst[1] & 0xffe00000) == (MOVK | (1 << 21))); |
150 | inst[0] = MOVZ | dst | ((new_imm & 0xffff) << 5); | | 150 | inst[0] = MOVZ | dst | ((new_imm & 0xffff) << 5); |
151 | inst[1] = MOVK | dst | (((new_imm >> 16) & 0xffff) << 5) | (1 << 21); | | 151 | inst[1] = MOVK | dst | (((new_imm >> 16) & 0xffff) << 5) | (1 << 21); |
152 | inst[2] = MOVK | dst | (((new_imm >> 32) & 0xffff) << 5) | (2 << 21); | | 152 | inst[2] = MOVK | dst | (((new_imm >> 32) & 0xffff) << 5) | (2 << 21); |
153 | inst[3] = MOVK | dst | ((new_imm >> 48) << 5) | (3 << 21); | | 153 | inst[3] = MOVK | dst | ((new_imm >> 48) << 5) | (3 << 21); |
154 | } | | 154 | } |
155 | | | 155 | |
156 | static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset) | | 156 | static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset) |
157 | { | | 157 | { |
158 | sljit_sw diff; | | 158 | sljit_sw diff; |
159 | sljit_uw target_addr; | | 159 | sljit_uw target_addr; |
160 | | | 160 | |
161 | if (jump->flags & SLJIT_REWRITABLE_JUMP) { | | 161 | if (jump->flags & SLJIT_REWRITABLE_JUMP) { |
162 | jump->flags |= PATCH_ABS64; | | 162 | jump->flags |= PATCH_ABS64; |
163 | return 0; | | 163 | return 0; |
164 | } | | 164 | } |
165 | | | 165 | |
166 | if (jump->flags & JUMP_ADDR) | | 166 | if (jump->flags & JUMP_ADDR) |
167 | target_addr = jump->u.target; | | 167 | target_addr = jump->u.target; |
168 | else { | | 168 | else { |
169 | SLJIT_ASSERT(jump->flags & JUMP_LABEL); | | 169 | SLJIT_ASSERT(jump->flags & JUMP_LABEL); |
170 | target_addr = (sljit_uw)(code + jump->u.label->size) + (sljit_uw)executable_offset; | | 170 | target_addr = (sljit_uw)(code + jump->u.label->size) + (sljit_uw)executable_offset; |
171 | } | | 171 | } |
172 | | | 172 | |
173 | diff = (sljit_sw)target_addr - (sljit_sw)(code_ptr + 4) - executable_offset; | | 173 | diff = (sljit_sw)target_addr - (sljit_sw)(code_ptr + 4) - executable_offset; |
174 | | | 174 | |
175 | if (jump->flags & IS_COND) { | | 175 | if (jump->flags & IS_COND) { |
176 | diff += sizeof(sljit_ins); | | 176 | diff += sizeof(sljit_ins); |
177 | if (diff <= 0xfffff && diff >= -0x100000) { | | 177 | if (diff <= 0xfffff && diff >= -0x100000) { |
178 | code_ptr[-5] ^= (jump->flags & IS_CBZ) ? (0x1 << 24) : 0x1; | | 178 | code_ptr[-5] ^= (jump->flags & IS_CBZ) ? (0x1 << 24) : 0x1; |
179 | jump->addr -= sizeof(sljit_ins); | | 179 | jump->addr -= sizeof(sljit_ins); |
180 | jump->flags |= PATCH_COND; | | 180 | jump->flags |= PATCH_COND; |
181 | return 5; | | 181 | return 5; |
182 | } | | 182 | } |
183 | diff -= sizeof(sljit_ins); | | 183 | diff -= sizeof(sljit_ins); |
184 | } | | 184 | } |
185 | | | 185 | |
186 | if (diff <= 0x7ffffff && diff >= -0x8000000) { | | 186 | if (diff <= 0x7ffffff && diff >= -0x8000000) { |
187 | jump->flags |= PATCH_B; | | 187 | jump->flags |= PATCH_B; |
188 | return 4; | | 188 | return 4; |
189 | } | | 189 | } |
190 | | | 190 | |
191 | if (target_addr <= 0xffffffffl) { | | 191 | if (target_addr <= 0xffffffffl) { |
192 | if (jump->flags & IS_COND) | | 192 | if (jump->flags & IS_COND) |
193 | code_ptr[-5] -= (2 << 5); | | 193 | code_ptr[-5] -= (2 << 5); |
194 | code_ptr[-2] = code_ptr[0]; | | 194 | code_ptr[-2] = code_ptr[0]; |
195 | return 2; | | 195 | return 2; |
196 | } | | 196 | } |
197 | if (target_addr <= 0xffffffffffffl) { | | 197 | if (target_addr <= 0xffffffffffffl) { |
198 | if (jump->flags & IS_COND) | | 198 | if (jump->flags & IS_COND) |
199 | code_ptr[-5] -= (1 << 5); | | 199 | code_ptr[-5] -= (1 << 5); |
200 | jump->flags |= PATCH_ABS48; | | 200 | jump->flags |= PATCH_ABS48; |
201 | code_ptr[-1] = code_ptr[0]; | | 201 | code_ptr[-1] = code_ptr[0]; |
202 | return 1; | | 202 | return 1; |
203 | } | | 203 | } |
204 | | | 204 | |
205 | jump->flags |= PATCH_ABS64; | | 205 | jump->flags |= PATCH_ABS64; |
206 | return 0; | | 206 | return 0; |
207 | } | | 207 | } |
208 | | | 208 | |
209 | SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler) | | 209 | SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler) |
210 | { | | 210 | { |
211 | struct sljit_memory_fragment *buf; | | 211 | struct sljit_memory_fragment *buf; |
212 | sljit_ins *code; | | 212 | sljit_ins *code; |
213 | sljit_ins *code_ptr; | | 213 | sljit_ins *code_ptr; |
214 | sljit_ins *buf_ptr; | | 214 | sljit_ins *buf_ptr; |
215 | sljit_ins *buf_end; | | 215 | sljit_ins *buf_end; |
216 | sljit_uw word_count; | | 216 | sljit_uw word_count; |
217 | sljit_sw executable_offset; | | 217 | sljit_sw executable_offset; |
218 | sljit_uw addr; | | 218 | sljit_uw addr; |
219 | sljit_s32 dst; | | 219 | sljit_s32 dst; |
220 | | | 220 | |
221 | struct sljit_label *label; | | 221 | struct sljit_label *label; |
222 | struct sljit_jump *jump; | | 222 | struct sljit_jump *jump; |
223 | struct sljit_const *const_; | | 223 | struct sljit_const *const_; |
224 | | | 224 | |
225 | CHECK_ERROR_PTR(); | | 225 | CHECK_ERROR_PTR(); |
226 | CHECK_PTR(check_sljit_generate_code(compiler)); | | 226 | CHECK_PTR(check_sljit_generate_code(compiler)); |
227 | reverse_buf(compiler); | | 227 | reverse_buf(compiler); |
228 | | | 228 | |
229 | code = (sljit_ins*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins)); | | 229 | code = (sljit_ins*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins)); |
230 | PTR_FAIL_WITH_EXEC_IF(code); | | 230 | PTR_FAIL_WITH_EXEC_IF(code); |
231 | buf = compiler->buf; | | 231 | buf = compiler->buf; |
232 | | | 232 | |
233 | code_ptr = code; | | 233 | code_ptr = code; |
234 | word_count = 0; | | 234 | word_count = 0; |
235 | executable_offset = SLJIT_EXEC_OFFSET(code); | | 235 | executable_offset = SLJIT_EXEC_OFFSET(code); |
236 | | | 236 | |
237 | label = compiler->labels; | | 237 | label = compiler->labels; |
238 | jump = compiler->jumps; | | 238 | jump = compiler->jumps; |
239 | const_ = compiler->consts; | | 239 | const_ = compiler->consts; |
240 | | | 240 | |
241 | do { | | 241 | do { |
242 | buf_ptr = (sljit_ins*)buf->memory; | | 242 | buf_ptr = (sljit_ins*)buf->memory; |
243 | buf_end = buf_ptr + (buf->used_size >> 2); | | 243 | buf_end = buf_ptr + (buf->used_size >> 2); |
244 | do { | | 244 | do { |
245 | *code_ptr = *buf_ptr++; | | 245 | *code_ptr = *buf_ptr++; |
246 | /* These structures are ordered by their address. */ | | 246 | /* These structures are ordered by their address. */ |
247 | SLJIT_ASSERT(!label || label->size >= word_count); | | 247 | SLJIT_ASSERT(!label || label->size >= word_count); |
248 | SLJIT_ASSERT(!jump || jump->addr >= word_count); | | 248 | SLJIT_ASSERT(!jump || jump->addr >= word_count); |
249 | SLJIT_ASSERT(!const_ || const_->addr >= word_count); | | 249 | SLJIT_ASSERT(!const_ || const_->addr >= word_count); |
250 | if (label && label->size == word_count) { | | 250 | if (label && label->size == word_count) { |
251 | label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); | | 251 | label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); |
252 | label->size = code_ptr - code; | | 252 | label->size = code_ptr - code; |
253 | label = label->next; | | 253 | label = label->next; |
254 | } | | 254 | } |
255 | if (jump && jump->addr == word_count) { | | 255 | if (jump && jump->addr == word_count) { |
256 | jump->addr = (sljit_uw)(code_ptr - 4); | | 256 | jump->addr = (sljit_uw)(code_ptr - 4); |
257 | code_ptr -= detect_jump_type(jump, code_ptr, code, executable_offset); | | 257 | code_ptr -= detect_jump_type(jump, code_ptr, code, executable_offset); |
258 | jump = jump->next; | | 258 | jump = jump->next; |
259 | } | | 259 | } |
260 | if (const_ && const_->addr == word_count) { | | 260 | if (const_ && const_->addr == word_count) { |
261 | const_->addr = (sljit_uw)code_ptr; | | 261 | const_->addr = (sljit_uw)code_ptr; |
262 | const_ = const_->next; | | 262 | const_ = const_->next; |
263 | } | | 263 | } |
264 | code_ptr ++; | | 264 | code_ptr ++; |
265 | word_count ++; | | 265 | word_count ++; |
266 | } while (buf_ptr < buf_end); | | 266 | } while (buf_ptr < buf_end); |
267 | | | 267 | |
268 | buf = buf->next; | | 268 | buf = buf->next; |
269 | } while (buf); | | 269 | } while (buf); |
270 | | | 270 | |
271 | if (label && label->size == word_count) { | | 271 | if (label && label->size == word_count) { |
272 | label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); | | 272 | label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); |
273 | label->size = code_ptr - code; | | 273 | label->size = code_ptr - code; |
274 | label = label->next; | | 274 | label = label->next; |
275 | } | | 275 | } |
276 | | | 276 | |
277 | SLJIT_ASSERT(!label); | | 277 | SLJIT_ASSERT(!label); |
278 | SLJIT_ASSERT(!jump); | | 278 | SLJIT_ASSERT(!jump); |
279 | SLJIT_ASSERT(!const_); | | 279 | SLJIT_ASSERT(!const_); |
280 | SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size); | | 280 | SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size); |
281 | | | 281 | |
282 | jump = compiler->jumps; | | 282 | jump = compiler->jumps; |
283 | while (jump) { | | 283 | while (jump) { |
284 | do { | | 284 | do { |
285 | addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target; | | 285 | addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target; |
286 | buf_ptr = (sljit_ins *)jump->addr; | | 286 | buf_ptr = (sljit_ins *)jump->addr; |
287 | | | 287 | |
288 | if (jump->flags & PATCH_B) { | | 288 | if (jump->flags & PATCH_B) { |
289 | addr = (sljit_sw)(addr - (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2; | | 289 | addr = (sljit_sw)(addr - (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2; |
290 | SLJIT_ASSERT((sljit_sw)addr <= 0x1ffffff && (sljit_sw)addr >= -0x2000000); | | 290 | SLJIT_ASSERT((sljit_sw)addr <= 0x1ffffff && (sljit_sw)addr >= -0x2000000); |
291 | buf_ptr[0] = ((jump->flags & IS_BL) ? BL : B) | (addr & 0x3ffffff); | | 291 | buf_ptr[0] = ((jump->flags & IS_BL) ? BL : B) | (addr & 0x3ffffff); |
292 | if (jump->flags & IS_COND) | | 292 | if (jump->flags & IS_COND) |
293 | buf_ptr[-1] -= (4 << 5); | | 293 | buf_ptr[-1] -= (4 << 5); |
294 | break; | | 294 | break; |
295 | } | | 295 | } |
296 | if (jump->flags & PATCH_COND) { | | 296 | if (jump->flags & PATCH_COND) { |
297 | addr = (sljit_sw)(addr - (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2; | | 297 | addr = (sljit_sw)(addr - (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2; |
298 | SLJIT_ASSERT((sljit_sw)addr <= 0x3ffff && (sljit_sw)addr >= -0x40000); | | 298 | SLJIT_ASSERT((sljit_sw)addr <= 0x3ffff && (sljit_sw)addr >= -0x40000); |
299 | buf_ptr[0] = (buf_ptr[0] & ~0xffffe0) | ((addr & 0x7ffff) << 5); | | 299 | buf_ptr[0] = (buf_ptr[0] & ~0xffffe0) | ((addr & 0x7ffff) << 5); |
300 | break; | | 300 | break; |
301 | } | | 301 | } |
302 | | | 302 | |
303 | SLJIT_ASSERT((jump->flags & (PATCH_ABS48 | PATCH_ABS64)) || addr <= 0xffffffffl); | | 303 | SLJIT_ASSERT((jump->flags & (PATCH_ABS48 | PATCH_ABS64)) || addr <= 0xffffffffl); |
304 | SLJIT_ASSERT((jump->flags & PATCH_ABS64) || addr <= 0xffffffffffffl); | | 304 | SLJIT_ASSERT((jump->flags & PATCH_ABS64) || addr <= 0xffffffffffffl); |
305 | | | 305 | |
306 | dst = buf_ptr[0] & 0x1f; | | 306 | dst = buf_ptr[0] & 0x1f; |
307 | buf_ptr[0] = MOVZ | dst | ((addr & 0xffff) << 5); | | 307 | buf_ptr[0] = MOVZ | dst | ((addr & 0xffff) << 5); |
308 | buf_ptr[1] = MOVK | dst | (((addr >> 16) & 0xffff) << 5) | (1 << 21); | | 308 | buf_ptr[1] = MOVK | dst | (((addr >> 16) & 0xffff) << 5) | (1 << 21); |
309 | if (jump->flags & (PATCH_ABS48 | PATCH_ABS64)) | | 309 | if (jump->flags & (PATCH_ABS48 | PATCH_ABS64)) |
310 | buf_ptr[2] = MOVK | dst | (((addr >> 32) & 0xffff) << 5) | (2 << 21); | | 310 | buf_ptr[2] = MOVK | dst | (((addr >> 32) & 0xffff) << 5) | (2 << 21); |
311 | if (jump->flags & PATCH_ABS64) | | 311 | if (jump->flags & PATCH_ABS64) |
312 | buf_ptr[3] = MOVK | dst | (((addr >> 48) & 0xffff) << 5) | (3 << 21); | | 312 | buf_ptr[3] = MOVK | dst | (((addr >> 48) & 0xffff) << 5) | (3 << 21); |
313 | } while (0); | | 313 | } while (0); |
314 | jump = jump->next; | | 314 | jump = jump->next; |
315 | } | | 315 | } |
316 | | | 316 | |
317 | compiler->error = SLJIT_ERR_COMPILED; | | 317 | compiler->error = SLJIT_ERR_COMPILED; |
318 | compiler->executable_offset = executable_offset; | | 318 | compiler->executable_offset = executable_offset; |
319 | compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins); | | 319 | compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins); |
320 | | | 320 | |
321 | code = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset); | | 321 | code = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset); |
322 | code_ptr = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); | | 322 | code_ptr = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); |
323 | | | 323 | |
324 | SLJIT_CACHE_FLUSH(code, code_ptr); | | 324 | SLJIT_CACHE_FLUSH(code, code_ptr); |
325 | return code; | | 325 | return code; |
326 | } | | 326 | } |
327 | | | 327 | |
328 | /* --------------------------------------------------------------------- */ | | 328 | /* --------------------------------------------------------------------- */ |
329 | /* Core code generator functions. */ | | 329 | /* Core code generator functions. */ |
330 | /* --------------------------------------------------------------------- */ | | 330 | /* --------------------------------------------------------------------- */ |
331 | | | 331 | |
332 | #define COUNT_TRAILING_ZERO(value, result) \ | | 332 | #define COUNT_TRAILING_ZERO(value, result) \ |
333 | result = 0; \ | | 333 | result = 0; \ |
334 | if (!(value & 0xffffffff)) { \ | | 334 | if (!(value & 0xffffffff)) { \ |
335 | result += 32; \ | | 335 | result += 32; \ |
336 | value >>= 32; \ | | 336 | value >>= 32; \ |
337 | } \ | | 337 | } \ |
338 | if (!(value & 0xffff)) { \ | | 338 | if (!(value & 0xffff)) { \ |
339 | result += 16; \ | | 339 | result += 16; \ |
340 | value >>= 16; \ | | 340 | value >>= 16; \ |
341 | } \ | | 341 | } \ |
342 | if (!(value & 0xff)) { \ | | 342 | if (!(value & 0xff)) { \ |
343 | result += 8; \ | | 343 | result += 8; \ |
344 | value >>= 8; \ | | 344 | value >>= 8; \ |
345 | } \ | | 345 | } \ |
346 | if (!(value & 0xf)) { \ | | 346 | if (!(value & 0xf)) { \ |
347 | result += 4; \ | | 347 | result += 4; \ |
348 | value >>= 4; \ | | 348 | value >>= 4; \ |
349 | } \ | | 349 | } \ |
350 | if (!(value & 0x3)) { \ | | 350 | if (!(value & 0x3)) { \ |
351 | result += 2; \ | | 351 | result += 2; \ |
352 | value >>= 2; \ | | 352 | value >>= 2; \ |
353 | } \ | | 353 | } \ |
354 | if (!(value & 0x1)) { \ | | 354 | if (!(value & 0x1)) { \ |
355 | result += 1; \ | | 355 | result += 1; \ |
356 | value >>= 1; \ | | 356 | value >>= 1; \ |
357 | } | | 357 | } |
358 | | | 358 | |
359 | #define LOGICAL_IMM_CHECK 0x100 | | 359 | #define LOGICAL_IMM_CHECK 0x100 |
360 | | | 360 | |
361 | static sljit_ins logical_imm(sljit_sw imm, sljit_s32 len) | | 361 | static sljit_ins logical_imm(sljit_sw imm, sljit_s32 len) |
362 | { | | 362 | { |
363 | sljit_s32 negated, ones, right; | | 363 | sljit_s32 negated, ones, right; |
364 | sljit_uw mask, uimm; | | 364 | sljit_uw mask, uimm; |
365 | sljit_ins ins; | | 365 | sljit_ins ins; |
366 | | | 366 | |
367 | if (len & LOGICAL_IMM_CHECK) { | | 367 | if (len & LOGICAL_IMM_CHECK) { |
368 | len &= ~LOGICAL_IMM_CHECK; | | 368 | len &= ~LOGICAL_IMM_CHECK; |
369 | if (len == 32 && (imm == 0 || imm == -1)) | | 369 | if (len == 32 && (imm == 0 || imm == -1)) |
370 | return 0; | | 370 | return 0; |
371 | if (len == 16 && ((sljit_s32)imm == 0 || (sljit_s32)imm == -1)) | | 371 | if (len == 16 && ((sljit_s32)imm == 0 || (sljit_s32)imm == -1)) |
372 | return 0; | | 372 | return 0; |
373 | } | | 373 | } |
374 | | | 374 | |
375 | SLJIT_ASSERT((len == 32 && imm != 0 && imm != -1) | | 375 | SLJIT_ASSERT((len == 32 && imm != 0 && imm != -1) |
376 | || (len == 16 && (sljit_s32)imm != 0 && (sljit_s32)imm != -1)); | | 376 | || (len == 16 && (sljit_s32)imm != 0 && (sljit_s32)imm != -1)); |
377 | uimm = (sljit_uw)imm; | | 377 | uimm = (sljit_uw)imm; |
378 | while (1) { | | 378 | while (1) { |
379 | if (len <= 0) { | | 379 | if (len <= 0) { |
380 | SLJIT_UNREACHABLE(); | | 380 | SLJIT_UNREACHABLE(); |
381 | return 0; | | 381 | return 0; |
382 | } | | 382 | } |
383 | mask = ((sljit_uw)1 << len) - 1; | | 383 | mask = ((sljit_uw)1 << len) - 1; |
384 | if ((uimm & mask) != ((uimm >> len) & mask)) | | 384 | if ((uimm & mask) != ((uimm >> len) & mask)) |
385 | break; | | 385 | break; |
386 | len >>= 1; | | 386 | len >>= 1; |
387 | } | | 387 | } |
388 | | | 388 | |
389 | len <<= 1; | | 389 | len <<= 1; |
390 | | | 390 | |
391 | negated = 0; | | 391 | negated = 0; |
392 | if (uimm & 0x1) { | | 392 | if (uimm & 0x1) { |
393 | negated = 1; | | 393 | negated = 1; |
394 | uimm = ~uimm; | | 394 | uimm = ~uimm; |
395 | } | | 395 | } |
396 | | | 396 | |
397 | if (len < 64) | | 397 | if (len < 64) |
398 | uimm &= ((sljit_uw)1 << len) - 1; | | 398 | uimm &= ((sljit_uw)1 << len) - 1; |
399 | | | 399 | |
400 | /* Unsigned right shift. */ | | 400 | /* Unsigned right shift. */ |
401 | COUNT_TRAILING_ZERO(uimm, right); | | 401 | COUNT_TRAILING_ZERO(uimm, right); |
402 | | | 402 | |
403 | /* Signed shift. We also know that the highest bit is set. */ | | 403 | /* Signed shift. We also know that the highest bit is set. */ |
404 | imm = (sljit_sw)~uimm; | | 404 | imm = (sljit_sw)~uimm; |
405 | SLJIT_ASSERT(imm < 0); | | 405 | SLJIT_ASSERT(imm < 0); |
406 | | | 406 | |
407 | COUNT_TRAILING_ZERO(imm, ones); | | 407 | COUNT_TRAILING_ZERO(imm, ones); |
408 | | | 408 | |
409 | if (~imm) | | 409 | if (~imm) |
410 | return 0; | | 410 | return 0; |
411 | | | 411 | |
412 | if (len == 64) | | 412 | if (len == 64) |
413 | ins = 1 << 22; | | 413 | ins = 1 << 22; |
414 | else | | 414 | else |
415 | ins = (0x3f - ((len << 1) - 1)) << 10; | | 415 | ins = (0x3f - ((len << 1) - 1)) << 10; |
416 | | | 416 | |
417 | if (negated) | | 417 | if (negated) |
418 | return ins | ((len - ones - 1) << 10) | ((len - ones - right) << 16); | | 418 | return ins | ((len - ones - 1) << 10) | ((len - ones - right) << 16); |
419 | | | 419 | |
420 | return ins | ((ones - 1) << 10) | ((len - right) << 16); | | 420 | return ins | ((ones - 1) << 10) | ((len - right) << 16); |
421 | } | | 421 | } |
422 | | | 422 | |
423 | #undef COUNT_TRAILING_ZERO | | 423 | #undef COUNT_TRAILING_ZERO |
424 | | | 424 | |
425 | static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw simm) | | 425 | static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw simm) |
426 | { | | 426 | { |
427 | sljit_uw imm = (sljit_uw)simm; | | 427 | sljit_uw imm = (sljit_uw)simm; |
428 | sljit_s32 i, zeros, ones, first; | | 428 | sljit_s32 i, zeros, ones, first; |
429 | sljit_ins bitmask; | | 429 | sljit_ins bitmask; |
430 | | | 430 | |
431 | if (imm <= 0xffff) | | 431 | if (imm <= 0xffff) |
432 | return push_inst(compiler, MOVZ | RD(dst) | (imm << 5)); | | 432 | return push_inst(compiler, MOVZ | RD(dst) | (imm << 5)); |
433 | | | 433 | |
434 | if (simm >= -0x10000 && simm < 0) | | 434 | if (simm >= -0x10000 && simm < 0) |
435 | return push_inst(compiler, MOVN | RD(dst) | ((~imm & 0xffff) << 5)); | | 435 | return push_inst(compiler, MOVN | RD(dst) | ((~imm & 0xffff) << 5)); |
436 | | | 436 | |
437 | if (imm <= 0xffffffffl) { | | 437 | if (imm <= 0xffffffffl) { |
438 | if ((imm & 0xffff0000l) == 0xffff0000) | | 438 | if ((imm & 0xffff0000l) == 0xffff0000) |
439 | return push_inst(compiler, (MOVN ^ W_OP) | RD(dst) | ((~imm & 0xffff) << 5)); | | 439 | return push_inst(compiler, (MOVN ^ W_OP) | RD(dst) | ((~imm & 0xffff) << 5)); |
440 | if ((imm & 0xffff) == 0xffff) | | 440 | if ((imm & 0xffff) == 0xffff) |
441 | return push_inst(compiler, (MOVN ^ W_OP) | RD(dst) | ((~imm & 0xffff0000l) >> (16 - 5)) | (1 << 21)); | | 441 | return push_inst(compiler, (MOVN ^ W_OP) | RD(dst) | ((~imm & 0xffff0000l) >> (16 - 5)) | (1 << 21)); |
442 | bitmask = logical_imm(simm, 16); | | 442 | bitmask = logical_imm(simm, 16); |
443 | if (bitmask != 0) | | 443 | if (bitmask != 0) |
444 | return push_inst(compiler, (ORRI ^ W_OP) | RD(dst) | RN(TMP_ZERO) | bitmask); | | 444 | return push_inst(compiler, (ORRI ^ W_OP) | RD(dst) | RN(TMP_ZERO) | bitmask); |
445 | } | | 445 | } |
446 | else { | | 446 | else { |
447 | bitmask = logical_imm(simm, 32); | | 447 | bitmask = logical_imm(simm, 32); |
448 | if (bitmask != 0) | | 448 | if (bitmask != 0) |
449 | return push_inst(compiler, ORRI | RD(dst) | RN(TMP_ZERO) | bitmask); | | 449 | return push_inst(compiler, ORRI | RD(dst) | RN(TMP_ZERO) | bitmask); |
450 | } | | 450 | } |
451 | | | 451 | |
452 | if (imm <= 0xffffffffl) { | | 452 | if (imm <= 0xffffffffl) { |
453 | FAIL_IF(push_inst(compiler, MOVZ | RD(dst) | ((imm & 0xffff) << 5))); | | 453 | FAIL_IF(push_inst(compiler, MOVZ | RD(dst) | ((imm & 0xffff) << 5))); |
454 | return push_inst(compiler, MOVK | RD(dst) | ((imm & 0xffff0000l) >> (16 - 5)) | (1 << 21)); | | 454 | return push_inst(compiler, MOVK | RD(dst) | ((imm & 0xffff0000l) >> (16 - 5)) | (1 << 21)); |
455 | } | | 455 | } |
456 | | | 456 | |
457 | if (simm >= -0x100000000l && simm < 0) { | | 457 | if (simm >= -0x100000000l && simm < 0) { |
458 | FAIL_IF(push_inst(compiler, MOVN | RD(dst) | ((~imm & 0xffff) << 5))); | | 458 | FAIL_IF(push_inst(compiler, MOVN | RD(dst) | ((~imm & 0xffff) << 5))); |
459 | return push_inst(compiler, MOVK | RD(dst) | ((imm & 0xffff0000l) >> (16 - 5)) | (1 << 21)); | | 459 | return push_inst(compiler, MOVK | RD(dst) | ((imm & 0xffff0000l) >> (16 - 5)) | (1 << 21)); |
460 | } | | 460 | } |
461 | | | 461 | |
462 | /* A large amount of number can be constructed from ORR and MOVx, | | 462 | /* A large amount of number can be constructed from ORR and MOVx, |
463 | but computing them is costly. We don't */ | | 463 | but computing them is costly. We don't */ |
464 | | | 464 | |
465 | zeros = 0; | | 465 | zeros = 0; |
466 | ones = 0; | | 466 | ones = 0; |
467 | for (i = 4; i > 0; i--) { | | 467 | for (i = 4; i > 0; i--) { |
468 | if ((simm & 0xffff) == 0) | | 468 | if ((simm & 0xffff) == 0) |
469 | zeros++; | | 469 | zeros++; |
470 | if ((simm & 0xffff) == 0xffff) | | 470 | if ((simm & 0xffff) == 0xffff) |
471 | ones++; | | 471 | ones++; |
472 | simm >>= 16; | | 472 | simm >>= 16; |
473 | } | | 473 | } |
474 | | | 474 | |
475 | simm = (sljit_sw)imm; | | 475 | simm = (sljit_sw)imm; |
476 | first = 1; | | 476 | first = 1; |
477 | if (ones > zeros) { | | 477 | if (ones > zeros) { |
478 | simm = ~simm; | | 478 | simm = ~simm; |
479 | for (i = 0; i < 4; i++) { | | 479 | for (i = 0; i < 4; i++) { |
480 | if (!(simm & 0xffff)) { | | 480 | if (!(simm & 0xffff)) { |
481 | simm >>= 16; | | 481 | simm >>= 16; |
482 | continue; | | 482 | continue; |
483 | } | | 483 | } |
484 | if (first) { | | 484 | if (first) { |
485 | first = 0; | | 485 | first = 0; |
486 | FAIL_IF(push_inst(compiler, MOVN | RD(dst) | ((simm & 0xffff) << 5) | (i << 21))); | | 486 | FAIL_IF(push_inst(compiler, MOVN | RD(dst) | ((simm & 0xffff) << 5) | (i << 21))); |
487 | } | | 487 | } |
488 | else | | 488 | else |
489 | FAIL_IF(push_inst(compiler, MOVK | RD(dst) | ((~simm & 0xffff) << 5) | (i << 21))); | | 489 | FAIL_IF(push_inst(compiler, MOVK | RD(dst) | ((~simm & 0xffff) << 5) | (i << 21))); |
490 | simm >>= 16; | | 490 | simm >>= 16; |
491 | } | | 491 | } |
492 | return SLJIT_SUCCESS; | | 492 | return SLJIT_SUCCESS; |
493 | } | | 493 | } |
494 | | | 494 | |
495 | for (i = 0; i < 4; i++) { | | 495 | for (i = 0; i < 4; i++) { |
496 | if (!(simm & 0xffff)) { | | 496 | if (!(simm & 0xffff)) { |
497 | simm >>= 16; | | 497 | simm >>= 16; |
498 | continue; | | 498 | continue; |
499 | } | | 499 | } |
500 | if (first) { | | 500 | if (first) { |
501 | first = 0; | | 501 | first = 0; |
502 | FAIL_IF(push_inst(compiler, MOVZ | RD(dst) | ((simm & 0xffff) << 5) | (i << 21))); | | 502 | FAIL_IF(push_inst(compiler, MOVZ | RD(dst) | ((simm & 0xffff) << 5) | (i << 21))); |
503 | } | | 503 | } |
504 | else | | 504 | else |
505 | FAIL_IF(push_inst(compiler, MOVK | RD(dst) | ((simm & 0xffff) << 5) | (i << 21))); | | 505 | FAIL_IF(push_inst(compiler, MOVK | RD(dst) | ((simm & 0xffff) << 5) | (i << 21))); |
506 | simm >>= 16; | | 506 | simm >>= 16; |
507 | } | | 507 | } |
508 | return SLJIT_SUCCESS; | | 508 | return SLJIT_SUCCESS; |
509 | } | | 509 | } |
510 | | | 510 | |
511 | #define ARG1_IMM 0x0010000 | | 511 | #define ARG1_IMM 0x0010000 |
512 | #define ARG2_IMM 0x0020000 | | 512 | #define ARG2_IMM 0x0020000 |
513 | #define INT_OP 0x0040000 | | 513 | #define INT_OP 0x0040000 |
514 | #define SET_FLAGS 0x0080000 | | 514 | #define SET_FLAGS 0x0080000 |
515 | #define UNUSED_RETURN 0x0100000 | | 515 | #define UNUSED_RETURN 0x0100000 |
516 | #define SLOW_DEST 0x0200000 | | 516 | #define SLOW_DEST 0x0200000 |
517 | #define SLOW_SRC1 0x0400000 | | 517 | #define SLOW_SRC1 0x0400000 |
518 | #define SLOW_SRC2 0x0800000 | | 518 | #define SLOW_SRC2 0x0800000 |
519 | | | 519 | |
520 | #define CHECK_FLAGS(flag_bits) \ | | 520 | #define CHECK_FLAGS(flag_bits) \ |
521 | if (flags & SET_FLAGS) { \ | | 521 | if (flags & SET_FLAGS) { \ |
522 | inv_bits |= flag_bits; \ | | 522 | inv_bits |= flag_bits; \ |
523 | if (flags & UNUSED_RETURN) \ | | 523 | if (flags & UNUSED_RETURN) \ |
524 | dst = TMP_ZERO; \ | | 524 | dst = TMP_ZERO; \ |
525 | } | | 525 | } |
526 | | | 526 | |
527 | static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 dst, sljit_sw arg1, sljit_sw arg2) | | 527 | static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 dst, sljit_sw arg1, sljit_sw arg2) |
528 | { | | 528 | { |
529 | /* dst must be register, TMP_REG1 | | 529 | /* dst must be register, TMP_REG1 |
530 | arg1 must be register, TMP_REG1, imm | | 530 | arg1 must be register, TMP_REG1, imm |
531 | arg2 must be register, TMP_REG2, imm */ | | 531 | arg2 must be register, TMP_REG2, imm */ |
532 | sljit_ins inv_bits = (flags & INT_OP) ? (1 << 31) : 0; | | 532 | sljit_ins inv_bits = (flags & INT_OP) ? (1 << 31) : 0; |
533 | sljit_ins inst_bits; | | 533 | sljit_ins inst_bits; |
534 | sljit_s32 op = (flags & 0xffff); | | 534 | sljit_s32 op = (flags & 0xffff); |
535 | sljit_s32 reg; | | 535 | sljit_s32 reg; |
536 | sljit_sw imm, nimm; | | 536 | sljit_sw imm, nimm; |
537 | | | 537 | |
538 | if (SLJIT_UNLIKELY((flags & (ARG1_IMM | ARG2_IMM)) == (ARG1_IMM | ARG2_IMM))) { | | 538 | if (SLJIT_UNLIKELY((flags & (ARG1_IMM | ARG2_IMM)) == (ARG1_IMM | ARG2_IMM))) { |
539 | /* Both are immediates. */ | | 539 | /* Both are immediates. */ |
540 | flags &= ~ARG1_IMM; | | 540 | flags &= ~ARG1_IMM; |
541 | if (arg1 == 0 && op != SLJIT_ADD && op != SLJIT_SUB) | | 541 | if (arg1 == 0 && op != SLJIT_ADD && op != SLJIT_SUB) |
542 | arg1 = TMP_ZERO; | | 542 | arg1 = TMP_ZERO; |
543 | else { | | 543 | else { |
544 | FAIL_IF(load_immediate(compiler, TMP_REG1, arg1)); | | 544 | FAIL_IF(load_immediate(compiler, TMP_REG1, arg1)); |
545 | arg1 = TMP_REG1; | | 545 | arg1 = TMP_REG1; |
546 | } | | 546 | } |
547 | } | | 547 | } |
548 | | | 548 | |
549 | if (flags & (ARG1_IMM | ARG2_IMM)) { | | 549 | if (flags & (ARG1_IMM | ARG2_IMM)) { |
550 | reg = (flags & ARG2_IMM) ? arg1 : arg2; | | 550 | reg = (flags & ARG2_IMM) ? arg1 : arg2; |
551 | imm = (flags & ARG2_IMM) ? arg2 : arg1; | | 551 | imm = (flags & ARG2_IMM) ? arg2 : arg1; |
552 | | | 552 | |
553 | switch (op) { | | 553 | switch (op) { |
554 | case SLJIT_MUL: | | 554 | case SLJIT_MUL: |
555 | case SLJIT_NEG: | | 555 | case SLJIT_NEG: |
556 | case SLJIT_CLZ: | | 556 | case SLJIT_CLZ: |
557 | case SLJIT_ADDC: | | 557 | case SLJIT_ADDC: |
558 | case SLJIT_SUBC: | | 558 | case SLJIT_SUBC: |
559 | /* No form with immediate operand (except imm 0, which | | 559 | /* No form with immediate operand (except imm 0, which |
560 | is represented by a ZERO register). */ | | 560 | is represented by a ZERO register). */ |
561 | break; | | 561 | break; |
562 | case SLJIT_MOV: | | 562 | case SLJIT_MOV: |
563 | SLJIT_ASSERT(!(flags & SET_FLAGS) && (flags & ARG2_IMM) && arg1 == TMP_REG1); | | 563 | SLJIT_ASSERT(!(flags & SET_FLAGS) && (flags & ARG2_IMM) && arg1 == TMP_REG1); |
564 | return load_immediate(compiler, dst, imm); | | 564 | return load_immediate(compiler, dst, imm); |
565 | case SLJIT_NOT: | | 565 | case SLJIT_NOT: |
566 | SLJIT_ASSERT(flags & ARG2_IMM); | | 566 | SLJIT_ASSERT(flags & ARG2_IMM); |
567 | FAIL_IF(load_immediate(compiler, dst, (flags & INT_OP) ? (~imm & 0xffffffff) : ~imm)); | | 567 | FAIL_IF(load_immediate(compiler, dst, (flags & INT_OP) ? (~imm & 0xffffffff) : ~imm)); |
568 | goto set_flags; | | 568 | goto set_flags; |
569 | case SLJIT_SUB: | | 569 | case SLJIT_SUB: |
570 | if (flags & ARG1_IMM) | | 570 | if (flags & ARG1_IMM) |
571 | break; | | 571 | break; |
572 | imm = -imm; | | 572 | imm = -imm; |
573 | /* Fall through. */ | | 573 | /* Fall through. */ |
574 | case SLJIT_ADD: | | 574 | case SLJIT_ADD: |
575 | if (imm == 0) { | | 575 | if (imm == 0) { |
576 | CHECK_FLAGS(1 << 29); | | 576 | CHECK_FLAGS(1 << 29); |
577 | return push_inst(compiler, ((op == SLJIT_ADD ? ADDI : SUBI) ^ inv_bits) | RD(dst) | RN(reg)); | | 577 | return push_inst(compiler, ((op == SLJIT_ADD ? ADDI : SUBI) ^ inv_bits) | RD(dst) | RN(reg)); |
578 | } | | 578 | } |
579 | if (imm > 0 && imm <= 0xfff) { | | 579 | if (imm > 0 && imm <= 0xfff) { |
580 | CHECK_FLAGS(1 << 29); | | 580 | CHECK_FLAGS(1 << 29); |
581 | return push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(reg) | (imm << 10)); | | 581 | return push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(reg) | (imm << 10)); |
582 | } | | 582 | } |
583 | nimm = -imm; | | 583 | nimm = -imm; |
584 | if (nimm > 0 && nimm <= 0xfff) { | | 584 | if (nimm > 0 && nimm <= 0xfff) { |
585 | CHECK_FLAGS(1 << 29); | | 585 | CHECK_FLAGS(1 << 29); |
586 | return push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(reg) | (nimm << 10)); | | 586 | return push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(reg) | (nimm << 10)); |
587 | } | | 587 | } |
588 | if (imm > 0 && imm <= 0xffffff && !(imm & 0xfff)) { | | 588 | if (imm > 0 && imm <= 0xffffff && !(imm & 0xfff)) { |
589 | CHECK_FLAGS(1 << 29); | | 589 | CHECK_FLAGS(1 << 29); |
590 | return push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(reg) | ((imm >> 12) << 10) | (1 << 22)); | | 590 | return push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(reg) | ((imm >> 12) << 10) | (1 << 22)); |
591 | } | | 591 | } |
592 | if (nimm > 0 && nimm <= 0xffffff && !(nimm & 0xfff)) { | | 592 | if (nimm > 0 && nimm <= 0xffffff && !(nimm & 0xfff)) { |
593 | CHECK_FLAGS(1 << 29); | | 593 | CHECK_FLAGS(1 << 29); |
594 | return push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(reg) | ((nimm >> 12) << 10) | (1 << 22)); | | 594 | return push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(reg) | ((nimm >> 12) << 10) | (1 << 22)); |
595 | } | | 595 | } |
596 | if (imm > 0 && imm <= 0xffffff && !(flags & SET_FLAGS)) { | | 596 | if (imm > 0 && imm <= 0xffffff && !(flags & SET_FLAGS)) { |
597 | FAIL_IF(push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(reg) | ((imm >> 12) << 10) | (1 << 22))); | | 597 | FAIL_IF(push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(reg) | ((imm >> 12) << 10) | (1 << 22))); |
598 | return push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(dst) | ((imm & 0xfff) << 10)); | | 598 | return push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(dst) | ((imm & 0xfff) << 10)); |
599 | } | | 599 | } |
600 | if (nimm > 0 && nimm <= 0xffffff && !(flags & SET_FLAGS)) { | | 600 | if (nimm > 0 && nimm <= 0xffffff && !(flags & SET_FLAGS)) { |
601 | FAIL_IF(push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(reg) | ((nimm >> 12) << 10) | (1 << 22))); | | 601 | FAIL_IF(push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(reg) | ((nimm >> 12) << 10) | (1 << 22))); |
602 | return push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(dst) | ((nimm & 0xfff) << 10)); | | 602 | return push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(dst) | ((nimm & 0xfff) << 10)); |
603 | } | | 603 | } |
604 | break; | | 604 | break; |
605 | case SLJIT_AND: | | 605 | case SLJIT_AND: |
606 | inst_bits = logical_imm(imm, LOGICAL_IMM_CHECK | ((flags & INT_OP) ? 16 : 32)); | | 606 | inst_bits = logical_imm(imm, LOGICAL_IMM_CHECK | ((flags & INT_OP) ? 16 : 32)); |
607 | if (!inst_bits) | | 607 | if (!inst_bits) |
608 | break; | | 608 | break; |
609 | CHECK_FLAGS(3 << 29); | | 609 | CHECK_FLAGS(3 << 29); |
610 | return push_inst(compiler, (ANDI ^ inv_bits) | RD(dst) | RN(reg) | inst_bits); | | 610 | return push_inst(compiler, (ANDI ^ inv_bits) | RD(dst) | RN(reg) | inst_bits); |
611 | case SLJIT_OR: | | 611 | case SLJIT_OR: |
612 | case SLJIT_XOR: | | 612 | case SLJIT_XOR: |
613 | inst_bits = logical_imm(imm, LOGICAL_IMM_CHECK | ((flags & INT_OP) ? 16 : 32)); | | 613 | inst_bits = logical_imm(imm, LOGICAL_IMM_CHECK | ((flags & INT_OP) ? 16 : 32)); |
614 | if (!inst_bits) | | 614 | if (!inst_bits) |
615 | break; | | 615 | break; |
616 | if (op == SLJIT_OR) | | 616 | if (op == SLJIT_OR) |
617 | inst_bits |= ORRI; | | 617 | inst_bits |= ORRI; |
618 | else | | 618 | else |
619 | inst_bits |= EORI; | | 619 | inst_bits |= EORI; |
620 | FAIL_IF(push_inst(compiler, (inst_bits ^ inv_bits) | RD(dst) | RN(reg))); | | 620 | FAIL_IF(push_inst(compiler, (inst_bits ^ inv_bits) | RD(dst) | RN(reg))); |
621 | goto set_flags; | | 621 | goto set_flags; |
622 | case SLJIT_SHL: | | 622 | case SLJIT_SHL: |
623 | if (flags & ARG1_IMM) | | 623 | if (flags & ARG1_IMM) |
624 | break; | | 624 | break; |
625 | if (flags & INT_OP) { | | 625 | if (flags & INT_OP) { |
626 | imm &= 0x1f; | | 626 | imm &= 0x1f; |
627 | FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) | ((-imm & 0x1f) << 16) | ((31 - imm) << 10))); | | 627 | FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) | ((-imm & 0x1f) << 16) | ((31 - imm) << 10))); |
628 | } | | 628 | } |
629 | else { | | 629 | else { |
630 | imm &= 0x3f; | | 630 | imm &= 0x3f; |
631 | FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) | (1 << 22) | ((-imm & 0x3f) << 16) | ((63 - imm) << 10))); | | 631 | FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) | (1 << 22) | ((-imm & 0x3f) << 16) | ((63 - imm) << 10))); |
632 | } | | 632 | } |
633 | goto set_flags; | | 633 | goto set_flags; |
634 | case SLJIT_LSHR: | | 634 | case SLJIT_LSHR: |
635 | case SLJIT_ASHR: | | 635 | case SLJIT_ASHR: |
636 | if (flags & ARG1_IMM) | | 636 | if (flags & ARG1_IMM) |
637 | break; | | 637 | break; |
638 | if (op == SLJIT_ASHR) | | 638 | if (op == SLJIT_ASHR) |
639 | inv_bits |= 1 << 30; | | 639 | inv_bits |= 1 << 30; |
640 | if (flags & INT_OP) { | | 640 | if (flags & INT_OP) { |
641 | imm &= 0x1f; | | 641 | imm &= 0x1f; |
642 | FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) | (imm << 16) | (31 << 10))); | | 642 | FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) | (imm << 16) | (31 << 10))); |
643 | } | | 643 | } |
644 | else { | | 644 | else { |
645 | imm &= 0x3f; | | 645 | imm &= 0x3f; |
646 | FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) | (1 << 22) | (imm << 16) | (63 << 10))); | | 646 | FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) | (1 << 22) | (imm << 16) | (63 << 10))); |
647 | } | | 647 | } |
648 | goto set_flags; | | 648 | goto set_flags; |
649 | default: | | 649 | default: |
650 | SLJIT_UNREACHABLE(); | | 650 | SLJIT_UNREACHABLE(); |
651 | break; | | 651 | break; |
652 | } | | 652 | } |
653 | | | 653 | |
654 | if (flags & ARG2_IMM) { | | 654 | if (flags & ARG2_IMM) { |
655 | if (arg2 == 0) | | 655 | if (arg2 == 0) |
656 | arg2 = TMP_ZERO; | | 656 | arg2 = TMP_ZERO; |
657 | else { | | 657 | else { |
658 | FAIL_IF(load_immediate(compiler, TMP_REG2, arg2)); | | 658 | FAIL_IF(load_immediate(compiler, TMP_REG2, arg2)); |
659 | arg2 = TMP_REG2; | | 659 | arg2 = TMP_REG2; |
660 | } | | 660 | } |
661 | } | | 661 | } |
662 | else { | | 662 | else { |
663 | if (arg1 == 0) | | 663 | if (arg1 == 0) |
664 | arg1 = TMP_ZERO; | | 664 | arg1 = TMP_ZERO; |
665 | else { | | 665 | else { |
666 | FAIL_IF(load_immediate(compiler, TMP_REG1, arg1)); | | 666 | FAIL_IF(load_immediate(compiler, TMP_REG1, arg1)); |
667 | arg1 = TMP_REG1; | | 667 | arg1 = TMP_REG1; |
668 | } | | 668 | } |
669 | } | | 669 | } |
670 | } | | 670 | } |
671 | | | 671 | |
672 | /* Both arguments are registers. */ | | 672 | /* Both arguments are registers. */ |
673 | switch (op) { | | 673 | switch (op) { |
674 | case SLJIT_MOV: | | 674 | case SLJIT_MOV: |
675 | case SLJIT_MOV_P: | | 675 | case SLJIT_MOV_P: |
676 | case SLJIT_MOVU: | | 676 | case SLJIT_MOVU: |
677 | case SLJIT_MOVU_P: | | 677 | case SLJIT_MOVU_P: |
678 | SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); | | 678 | SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); |
679 | if (dst == arg2) | | 679 | if (dst == arg2) |
680 | return SLJIT_SUCCESS; | | 680 | return SLJIT_SUCCESS; |
681 | return push_inst(compiler, ORR | RD(dst) | RN(TMP_ZERO) | RM(arg2)); | | 681 | return push_inst(compiler, ORR | RD(dst) | RN(TMP_ZERO) | RM(arg2)); |
682 | case SLJIT_MOV_U8: | | 682 | case SLJIT_MOV_U8: |
683 | case SLJIT_MOVU_U8: | | 683 | case SLJIT_MOVU_U8: |
684 | SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); | | 684 | SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); |
685 | return push_inst(compiler, (UBFM ^ (1 << 31)) | RD(dst) | RN(arg2) | (7 << 10)); | | 685 | return push_inst(compiler, (UBFM ^ (1 << 31)) | RD(dst) | RN(arg2) | (7 << 10)); |
686 | case SLJIT_MOV_S8: | | 686 | case SLJIT_MOV_S8: |
687 | case SLJIT_MOVU_S8: | | 687 | case SLJIT_MOVU_S8: |
688 | SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); | | 688 | SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); |
689 | if (!(flags & INT_OP)) | | 689 | if (!(flags & INT_OP)) |
690 | inv_bits |= 1 << 22; | | 690 | inv_bits |= 1 << 22; |
691 | return push_inst(compiler, (SBFM ^ inv_bits) | RD(dst) | RN(arg2) | (7 << 10)); | | 691 | return push_inst(compiler, (SBFM ^ inv_bits) | RD(dst) | RN(arg2) | (7 << 10)); |
692 | case SLJIT_MOV_U16: | | 692 | case SLJIT_MOV_U16: |
693 | case SLJIT_MOVU_U16: | | 693 | case SLJIT_MOVU_U16: |
694 | SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); | | 694 | SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); |
695 | return push_inst(compiler, (UBFM ^ (1 << 31)) | RD(dst) | RN(arg2) | (15 << 10)); | | 695 | return push_inst(compiler, (UBFM ^ (1 << 31)) | RD(dst) | RN(arg2) | (15 << 10)); |
696 | case SLJIT_MOV_S16: | | 696 | case SLJIT_MOV_S16: |
697 | case SLJIT_MOVU_S16: | | 697 | case SLJIT_MOVU_S16: |
698 | SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); | | 698 | SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); |
699 | if (!(flags & INT_OP)) | | 699 | if (!(flags & INT_OP)) |
700 | inv_bits |= 1 << 22; | | 700 | inv_bits |= 1 << 22; |
701 | return push_inst(compiler, (SBFM ^ inv_bits) | RD(dst) | RN(arg2) | (15 << 10)); | | 701 | return push_inst(compiler, (SBFM ^ inv_bits) | RD(dst) | RN(arg2) | (15 << 10)); |
702 | case SLJIT_MOV_U32: | | 702 | case SLJIT_MOV_U32: |
703 | case SLJIT_MOVU_U32: | | 703 | case SLJIT_MOVU_U32: |
704 | SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); | | 704 | SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); |
705 | if ((flags & INT_OP) && dst == arg2) | | 705 | if ((flags & INT_OP) && dst == arg2) |
706 | return SLJIT_SUCCESS; | | 706 | return SLJIT_SUCCESS; |
707 | return push_inst(compiler, (ORR ^ (1 << 31)) | RD(dst) | RN(TMP_ZERO) | RM(arg2)); | | 707 | return push_inst(compiler, (ORR ^ (1 << 31)) | RD(dst) | RN(TMP_ZERO) | RM(arg2)); |
708 | case SLJIT_MOV_S32: | | 708 | case SLJIT_MOV_S32: |
709 | case SLJIT_MOVU_S32: | | 709 | case SLJIT_MOVU_S32: |
710 | SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); | | 710 | SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); |
711 | if ((flags & INT_OP) && dst == arg2) | | 711 | if ((flags & INT_OP) && dst == arg2) |
712 | return SLJIT_SUCCESS; | | 712 | return SLJIT_SUCCESS; |
713 | return push_inst(compiler, SBFM | (1 << 22) | RD(dst) | RN(arg2) | (31 << 10)); | | 713 | return push_inst(compiler, SBFM | (1 << 22) | RD(dst) | RN(arg2) | (31 << 10)); |
714 | case SLJIT_NOT: | | 714 | case SLJIT_NOT: |
715 | SLJIT_ASSERT(arg1 == TMP_REG1); | | 715 | SLJIT_ASSERT(arg1 == TMP_REG1); |
716 | FAIL_IF(push_inst(compiler, (ORN ^ inv_bits) | RD(dst) | RN(TMP_ZERO) | RM(arg2))); | | 716 | FAIL_IF(push_inst(compiler, (ORN ^ inv_bits) | RD(dst) | RN(TMP_ZERO) | RM(arg2))); |
717 | goto set_flags; | | 717 | goto set_flags; |
718 | case SLJIT_NEG: | | 718 | case SLJIT_NEG: |
719 | SLJIT_ASSERT(arg1 == TMP_REG1); | | 719 | SLJIT_ASSERT(arg1 == TMP_REG1); |
720 | if (flags & SET_FLAGS) | | 720 | if (flags & SET_FLAGS) |
721 | inv_bits |= 1 << 29; | | 721 | inv_bits |= 1 << 29; |
722 | return push_inst(compiler, (SUB ^ inv_bits) | RD(dst) | RN(TMP_ZERO) | RM(arg2)); | | 722 | return push_inst(compiler, (SUB ^ inv_bits) | RD(dst) | RN(TMP_ZERO) | RM(arg2)); |
723 | case SLJIT_CLZ: | | 723 | case SLJIT_CLZ: |
724 | SLJIT_ASSERT(arg1 == TMP_REG1); | | 724 | SLJIT_ASSERT(arg1 == TMP_REG1); |
725 | FAIL_IF(push_inst(compiler, (CLZ ^ inv_bits) | RD(dst) | RN(arg2))); | | 725 | FAIL_IF(push_inst(compiler, (CLZ ^ inv_bits) | RD(dst) | RN(arg2))); |
726 | goto set_flags; | | 726 | goto set_flags; |
727 | case SLJIT_ADD: | | 727 | case SLJIT_ADD: |
728 | CHECK_FLAGS(1 << 29); | | 728 | CHECK_FLAGS(1 << 29); |
729 | return push_inst(compiler, (ADD ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)); | | 729 | return push_inst(compiler, (ADD ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)); |
730 | case SLJIT_ADDC: | | 730 | case SLJIT_ADDC: |
731 | CHECK_FLAGS(1 << 29); | | 731 | CHECK_FLAGS(1 << 29); |
732 | return push_inst(compiler, (ADC ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)); | | 732 | return push_inst(compiler, (ADC ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)); |
733 | case SLJIT_SUB: | | 733 | case SLJIT_SUB: |
734 | CHECK_FLAGS(1 << 29); | | 734 | CHECK_FLAGS(1 << 29); |
735 | return push_inst(compiler, (SUB ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)); | | 735 | return push_inst(compiler, (SUB ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)); |
736 | case SLJIT_SUBC: | | 736 | case SLJIT_SUBC: |
737 | CHECK_FLAGS(1 << 29); | | 737 | CHECK_FLAGS(1 << 29); |
738 | return push_inst(compiler, (SBC ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)); | | 738 | return push_inst(compiler, (SBC ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)); |
739 | case SLJIT_MUL: | | 739 | case SLJIT_MUL: |
740 | if (!(flags & SET_FLAGS)) | | 740 | if (!(flags & SET_FLAGS)) |
741 | return push_inst(compiler, (MADD ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2) | RT2(TMP_ZERO)); | | 741 | return push_inst(compiler, (MADD ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2) | RT2(TMP_ZERO)); |
742 | if (flags & INT_OP) { | | 742 | if (flags & INT_OP) { |
743 | FAIL_IF(push_inst(compiler, SMADDL | RD(dst) | RN(arg1) | RM(arg2) | (31 << 10))); | | 743 | FAIL_IF(push_inst(compiler, SMADDL | RD(dst) | RN(arg1) | RM(arg2) | (31 << 10))); |
744 | FAIL_IF(push_inst(compiler, ADD | RD(TMP_LR) | RN(TMP_ZERO) | RM(dst) | (2 << 22) | (31 << 10))); | | 744 | FAIL_IF(push_inst(compiler, ADD | RD(TMP_LR) | RN(TMP_ZERO) | RM(dst) | (2 << 22) | (31 << 10))); |
745 | return push_inst(compiler, SUBS | RD(TMP_ZERO) | RN(TMP_LR) | RM(dst) | (2 << 22) | (63 << 10)); | | 745 | return push_inst(compiler, SUBS | RD(TMP_ZERO) | RN(TMP_LR) | RM(dst) | (2 << 22) | (63 << 10)); |
746 | } | | 746 | } |
747 | FAIL_IF(push_inst(compiler, SMULH | RD(TMP_LR) | RN(arg1) | RM(arg2))); | | 747 | FAIL_IF(push_inst(compiler, SMULH | RD(TMP_LR) | RN(arg1) | RM(arg2))); |
748 | FAIL_IF(push_inst(compiler, MADD | RD(dst) | RN(arg1) | RM(arg2) | RT2(TMP_ZERO))); | | 748 | FAIL_IF(push_inst(compiler, MADD | RD(dst) | RN(arg1) | RM(arg2) | RT2(TMP_ZERO))); |
749 | return push_inst(compiler, SUBS | RD(TMP_ZERO) | RN(TMP_LR) | RM(dst) | (2 << 22) | (63 << 10)); | | 749 | return push_inst(compiler, SUBS | RD(TMP_ZERO) | RN(TMP_LR) | RM(dst) | (2 << 22) | (63 << 10)); |
750 | case SLJIT_AND: | | 750 | case SLJIT_AND: |
751 | CHECK_FLAGS(3 << 29); | | 751 | CHECK_FLAGS(3 << 29); |
752 | return push_inst(compiler, (AND ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)); | | 752 | return push_inst(compiler, (AND ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)); |
753 | case SLJIT_OR: | | 753 | case SLJIT_OR: |
754 | FAIL_IF(push_inst(compiler, (ORR ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2))); | | 754 | FAIL_IF(push_inst(compiler, (ORR ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2))); |
755 | goto set_flags; | | 755 | goto set_flags; |
756 | case SLJIT_XOR: | | 756 | case SLJIT_XOR: |
757 | FAIL_IF(push_inst(compiler, (EOR ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2))); | | 757 | FAIL_IF(push_inst(compiler, (EOR ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2))); |
758 | goto set_flags; | | 758 | goto set_flags; |
759 | case SLJIT_SHL: | | 759 | case SLJIT_SHL: |
760 | FAIL_IF(push_inst(compiler, (LSLV ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2))); | | 760 | FAIL_IF(push_inst(compiler, (LSLV ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2))); |
761 | goto set_flags; | | 761 | goto set_flags; |
762 | case SLJIT_LSHR: | | 762 | case SLJIT_LSHR: |
763 | FAIL_IF(push_inst(compiler, (LSRV ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2))); | | 763 | FAIL_IF(push_inst(compiler, (LSRV ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2))); |
764 | goto set_flags; | | 764 | goto set_flags; |
765 | case SLJIT_ASHR: | | 765 | case SLJIT_ASHR: |
766 | FAIL_IF(push_inst(compiler, (ASRV ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2))); | | 766 | FAIL_IF(push_inst(compiler, (ASRV ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2))); |
767 | goto set_flags; | | 767 | goto set_flags; |
768 | } | | 768 | } |
769 | | | 769 | |
770 | SLJIT_UNREACHABLE(); | | 770 | SLJIT_UNREACHABLE(); |
771 | return SLJIT_SUCCESS; | | 771 | return SLJIT_SUCCESS; |
772 | | | 772 | |
773 | set_flags: | | 773 | set_flags: |
774 | if (flags & SET_FLAGS) | | 774 | if (flags & SET_FLAGS) |
775 | return push_inst(compiler, (SUBS ^ inv_bits) | RD(TMP_ZERO) | RN(dst) | RM(TMP_ZERO)); | | 775 | return push_inst(compiler, (SUBS ^ inv_bits) | RD(TMP_ZERO) | RN(dst) | RM(TMP_ZERO)); |
776 | return SLJIT_SUCCESS; | | 776 | return SLJIT_SUCCESS; |
777 | } | | 777 | } |
778 | | | 778 | |
779 | #define STORE 0x01 | | 779 | #define STORE 0x01 |
780 | #define SIGNED 0x02 | | 780 | #define SIGNED 0x02 |
781 | | | 781 | |
782 | #define UPDATE 0x04 | | 782 | #define UPDATE 0x04 |
783 | #define ARG_TEST 0x08 | | 783 | #define ARG_TEST 0x08 |
784 | | | 784 | |
785 | #define BYTE_SIZE 0x000 | | 785 | #define BYTE_SIZE 0x000 |
786 | #define HALF_SIZE 0x100 | | 786 | #define HALF_SIZE 0x100 |
787 | #define INT_SIZE 0x200 | | 787 | #define INT_SIZE 0x200 |
788 | #define WORD_SIZE 0x300 | | 788 | #define WORD_SIZE 0x300 |
789 | | | 789 | |
790 | #define MEM_SIZE_SHIFT(flags) ((flags) >> 8) | | 790 | #define MEM_SIZE_SHIFT(flags) ((flags) >> 8) |
791 | | | 791 | |
792 | static const sljit_ins sljit_mem_imm[4] = { | | 792 | static const sljit_ins sljit_mem_imm[4] = { |
793 | /* u l */ 0x39400000 /* ldrb [reg,imm] */, | | 793 | /* u l */ 0x39400000 /* ldrb [reg,imm] */, |
794 | /* u s */ 0x39000000 /* strb [reg,imm] */, | | 794 | /* u s */ 0x39000000 /* strb [reg,imm] */, |
795 | /* s l */ 0x39800000 /* ldrsb [reg,imm] */, | | 795 | /* s l */ 0x39800000 /* ldrsb [reg,imm] */, |
796 | /* s s */ 0x39000000 /* strb [reg,imm] */, | | 796 | /* s s */ 0x39000000 /* strb [reg,imm] */, |
797 | }; | | 797 | }; |
798 | | | 798 | |
799 | static const sljit_ins sljit_mem_simm[4] = { | | 799 | static const sljit_ins sljit_mem_simm[4] = { |
800 | /* u l */ 0x38400000 /* ldurb [reg,imm] */, | | 800 | /* u l */ 0x38400000 /* ldurb [reg,imm] */, |
801 | /* u s */ 0x38000000 /* sturb [reg,imm] */, | | 801 | /* u s */ 0x38000000 /* sturb [reg,imm] */, |
802 | /* s l */ 0x38800000 /* ldursb [reg,imm] */, | | 802 | /* s l */ 0x38800000 /* ldursb [reg,imm] */, |
803 | /* s s */ 0x38000000 /* sturb [reg,imm] */, | | 803 | /* s s */ 0x38000000 /* sturb [reg,imm] */, |
804 | }; | | 804 | }; |
805 | | | 805 | |
806 | static const sljit_ins sljit_mem_pre_simm[4] = { | | 806 | static const sljit_ins sljit_mem_pre_simm[4] = { |
807 | /* u l */ 0x38400c00 /* ldrb [reg,imm]! */, | | 807 | /* u l */ 0x38400c00 /* ldrb [reg,imm]! */, |
808 | /* u s */ 0x38000c00 /* strb [reg,imm]! */, | | 808 | /* u s */ 0x38000c00 /* strb [reg,imm]! */, |
809 | /* s l */ 0x38800c00 /* ldrsb [reg,imm]! */, | | 809 | /* s l */ 0x38800c00 /* ldrsb [reg,imm]! */, |
810 | /* s s */ 0x38000c00 /* strb [reg,imm]! */, | | 810 | /* s s */ 0x38000c00 /* strb [reg,imm]! */, |
811 | }; | | 811 | }; |
812 | | | 812 | |
813 | static const sljit_ins sljit_mem_reg[4] = { | | 813 | static const sljit_ins sljit_mem_reg[4] = { |
814 | /* u l */ 0x38606800 /* ldrb [reg,reg] */, | | 814 | /* u l */ 0x38606800 /* ldrb [reg,reg] */, |
815 | /* u s */ 0x38206800 /* strb [reg,reg] */, | | 815 | /* u s */ 0x38206800 /* strb [reg,reg] */, |
816 | /* s l */ 0x38a06800 /* ldrsb [reg,reg] */, | | 816 | /* s l */ 0x38a06800 /* ldrsb [reg,reg] */, |
817 | /* s s */ 0x38206800 /* strb [reg,reg] */, | | 817 | /* s s */ 0x38206800 /* strb [reg,reg] */, |
818 | }; | | 818 | }; |
819 | | | 819 | |
820 | /* Helper function. Dst should be reg + value, using at most 1 instruction, flags does not set. */ | | 820 | /* Helper function. Dst should be reg + value, using at most 1 instruction, flags does not set. */ |
821 | static sljit_s32 emit_set_delta(struct sljit_compiler *compiler, sljit_s32 dst, sljit_s32 reg, sljit_sw value) | | 821 | static sljit_s32 emit_set_delta(struct sljit_compiler *compiler, sljit_s32 dst, sljit_s32 reg, sljit_sw value) |
822 | { | | 822 | { |
823 | if (value >= 0) { | | 823 | if (value >= 0) { |
824 | if (value <= 0xfff) | | 824 | if (value <= 0xfff) |
825 | return push_inst(compiler, ADDI | RD(dst) | RN(reg) | (value << 10)); | | 825 | return push_inst(compiler, ADDI | RD(dst) | RN(reg) | (value << 10)); |
826 | if (value <= 0xffffff && !(value & 0xfff)) | | 826 | if (value <= 0xffffff && !(value & 0xfff)) |
827 | return push_inst(compiler, ADDI | (1 << 22) | RD(dst) | RN(reg) | (value >> 2)); | | 827 | return push_inst(compiler, ADDI | (1 << 22) | RD(dst) | RN(reg) | (value >> 2)); |
828 | } | | 828 | } |
829 | else { | | 829 | else { |
830 | value = -value; | | 830 | value = -value; |
831 | if (value <= 0xfff) | | 831 | if (value <= 0xfff) |
832 | return push_inst(compiler, SUBI | RD(dst) | RN(reg) | (value << 10)); | | 832 | return push_inst(compiler, SUBI | RD(dst) | RN(reg) | (value << 10)); |
833 | if (value <= 0xffffff && !(value & 0xfff)) | | 833 | if (value <= 0xffffff && !(value & 0xfff)) |
834 | return push_inst(compiler, SUBI | (1 << 22) | RD(dst) | RN(reg) | (value >> 2)); | | 834 | return push_inst(compiler, SUBI | (1 << 22) | RD(dst) | RN(reg) | (value >> 2)); |
835 | } | | 835 | } |
836 | return SLJIT_ERR_UNSUPPORTED; | | 836 | return SLJIT_ERR_UNSUPPORTED; |
837 | } | | 837 | } |
838 | | | 838 | |
839 | /* Can perform an operation using at most 1 instruction. */ | | 839 | /* Can perform an operation using at most 1 instruction. */ |
840 | static sljit_s32 getput_arg_fast(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw) | | 840 | static sljit_s32 getput_arg_fast(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw) |
841 | { | | 841 | { |
842 | sljit_u32 shift = MEM_SIZE_SHIFT(flags); | | 842 | sljit_u32 shift = MEM_SIZE_SHIFT(flags); |
843 | | | 843 | |
844 | SLJIT_ASSERT(arg & SLJIT_MEM); | | 844 | SLJIT_ASSERT(arg & SLJIT_MEM); |
845 | | | 845 | |
846 | if (SLJIT_UNLIKELY(flags & UPDATE)) { | | 846 | if (SLJIT_UNLIKELY(flags & UPDATE)) { |
847 | if ((arg & REG_MASK) && !(arg & OFFS_REG_MASK) && argw <= 255 && argw >= -256) { | | 847 | if ((arg & REG_MASK) && !(arg & OFFS_REG_MASK) && argw <= 255 && argw >= -256) { |
848 | if (SLJIT_UNLIKELY(flags & ARG_TEST)) | | 848 | if (SLJIT_UNLIKELY(flags & ARG_TEST)) |
849 | return 1; | | 849 | return 1; |
850 | | | 850 | |
851 | arg &= REG_MASK; | | 851 | arg &= REG_MASK; |
852 | argw &= 0x1ff; | | 852 | argw &= 0x1ff; |
853 | FAIL_IF(push_inst(compiler, sljit_mem_pre_simm[flags & 0x3] | | 853 | FAIL_IF(push_inst(compiler, sljit_mem_pre_simm[flags & 0x3] |
854 | | (shift << 30) | RT(reg) | RN(arg) | (argw << 12))); | | 854 | | (shift << 30) | RT(reg) | RN(arg) | (argw << 12))); |
855 | return -1; | | 855 | return -1; |
856 | } | | 856 | } |
857 | return 0; | | 857 | return 0; |
858 | } | | 858 | } |
859 | | | 859 | |
860 | if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { | | 860 | if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { |
861 | argw &= 0x3; | | 861 | argw &= 0x3; |
862 | if (argw && argw != shift) | | 862 | if (argw && argw != shift) |
863 | return 0; | | 863 | return 0; |
864 | | | 864 | |
865 | if (SLJIT_UNLIKELY(flags & ARG_TEST)) | | 865 | if (SLJIT_UNLIKELY(flags & ARG_TEST)) |
866 | return 1; | | 866 | return 1; |
867 | | | 867 | |
868 | FAIL_IF(push_inst(compiler, sljit_mem_reg[flags & 0x3] | (shift << 30) | RT(reg) | | 868 | FAIL_IF(push_inst(compiler, sljit_mem_reg[flags & 0x3] | (shift << 30) | RT(reg) |
869 | | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | (argw ? (1 << 12) : 0))); | | 869 | | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | (argw ? (1 << 12) : 0))); |
870 | return -1; | | 870 | return -1; |
871 | } | | 871 | } |
872 | | | 872 | |
873 | arg &= REG_MASK; | | 873 | arg &= REG_MASK; |
874 | if (argw >= 0 && (argw >> shift) <= 0xfff && (argw & ((1 << shift) - 1)) == 0) { | | 874 | if (argw >= 0 && (argw >> shift) <= 0xfff && (argw & ((1 << shift) - 1)) == 0) { |
875 | if (SLJIT_UNLIKELY(flags & ARG_TEST)) | | 875 | if (SLJIT_UNLIKELY(flags & ARG_TEST)) |
876 | return 1; | | 876 | return 1; |
877 | | | 877 | |
878 | FAIL_IF(push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) | | 878 | FAIL_IF(push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) |
879 | | RT(reg) | RN(arg) | (argw << (10 - shift)))); | | 879 | | RT(reg) | RN(arg) | (argw << (10 - shift)))); |
880 | return -1; | | 880 | return -1; |
881 | } | | 881 | } |
882 | | | 882 | |
883 | if (argw > 255 || argw < -256) | | 883 | if (argw > 255 || argw < -256) |
884 | return 0; | | 884 | return 0; |
885 | | | 885 | |
886 | if (SLJIT_UNLIKELY(flags & ARG_TEST)) | | 886 | if (SLJIT_UNLIKELY(flags & ARG_TEST)) |
887 | return 1; | | 887 | return 1; |
888 | | | 888 | |
889 | FAIL_IF(push_inst(compiler, sljit_mem_simm[flags & 0x3] | (shift << 30) | | 889 | FAIL_IF(push_inst(compiler, sljit_mem_simm[flags & 0x3] | (shift << 30) |
890 | | RT(reg) | RN(arg) | ((argw & 0x1ff) << 12))); | | 890 | | RT(reg) | RN(arg) | ((argw & 0x1ff) << 12))); |
891 | return -1; | | 891 | return -1; |
892 | } | | 892 | } |
893 | | | 893 | |
894 | /* see getput_arg below. | | 894 | /* see getput_arg below. |
895 | Note: can_cache is called only for binary operators. Those | | 895 | Note: can_cache is called only for binary operators. Those |
896 | operators always uses word arguments without write back. */ | | 896 | operators always uses word arguments without write back. */ |
897 | static sljit_s32 can_cache(sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw) | | 897 | static sljit_s32 can_cache(sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw) |
898 | { | | 898 | { |
899 | sljit_sw diff; | | 899 | sljit_sw diff; |
900 | if ((arg & OFFS_REG_MASK) || !(next_arg & SLJIT_MEM)) | | 900 | if ((arg & OFFS_REG_MASK) || !(next_arg & SLJIT_MEM)) |
901 | return 0; | | 901 | return 0; |
902 | | | 902 | |
903 | if (!(arg & REG_MASK)) { | | 903 | if (!(arg & REG_MASK)) { |
904 | diff = argw - next_argw; | | 904 | diff = argw - next_argw; |
905 | if (diff <= 0xfff && diff >= -0xfff) | | 905 | if (diff <= 0xfff && diff >= -0xfff) |
906 | return 1; | | 906 | return 1; |
907 | return 0; | | 907 | return 0; |
908 | } | | 908 | } |
909 | | | 909 | |
910 | if (argw == next_argw) | | 910 | if (argw == next_argw) |
911 | return 1; | | 911 | return 1; |
912 | | | 912 | |
913 | diff = argw - next_argw; | | 913 | diff = argw - next_argw; |
914 | if (arg == next_arg && diff <= 0xfff && diff >= -0xfff) | | 914 | if (arg == next_arg && diff <= 0xfff && diff >= -0xfff) |
915 | return 1; | | 915 | return 1; |
916 | | | 916 | |
917 | return 0; | | 917 | return 0; |
918 | } | | 918 | } |
919 | | | 919 | |
920 | /* Emit the necessary instructions. See can_cache above. */ | | 920 | /* Emit the necessary instructions. See can_cache above. */ |
921 | static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, | | 921 | static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, |
922 | sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw) | | 922 | sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw) |
923 | { | | 923 | { |
924 | sljit_u32 shift = MEM_SIZE_SHIFT(flags); | | 924 | sljit_u32 shift = MEM_SIZE_SHIFT(flags); |
925 | sljit_s32 tmp_r, other_r; | | 925 | sljit_s32 tmp_r, other_r; |
926 | sljit_sw diff; | | 926 | sljit_sw diff; |
927 | | | 927 | |
928 | SLJIT_ASSERT(arg & SLJIT_MEM); | | 928 | SLJIT_ASSERT(arg & SLJIT_MEM); |
929 | if (!(next_arg & SLJIT_MEM)) { | | 929 | if (!(next_arg & SLJIT_MEM)) { |
930 | next_arg = 0; | | 930 | next_arg = 0; |
931 | next_argw = 0; | | 931 | next_argw = 0; |
932 | } | | 932 | } |
933 | | | 933 | |
934 | tmp_r = (flags & STORE) ? TMP_REG3 : reg; | | 934 | tmp_r = (flags & STORE) ? TMP_REG3 : reg; |
935 | | | 935 | |
936 | if (SLJIT_UNLIKELY((flags & UPDATE) && (arg & REG_MASK))) { | | 936 | if (SLJIT_UNLIKELY((flags & UPDATE) && (arg & REG_MASK))) { |
937 | /* Update only applies if a base register exists. */ | | 937 | /* Update only applies if a base register exists. */ |
938 | other_r = OFFS_REG(arg); | | 938 | other_r = OFFS_REG(arg); |
939 | if (!other_r) { | | 939 | if (!other_r) { |
940 | other_r = arg & REG_MASK; | | 940 | other_r = arg & REG_MASK; |
941 | SLJIT_ASSERT(other_r != reg); | | 941 | SLJIT_ASSERT(other_r != reg); |
942 | | | 942 | |
943 | if (argw >= 0 && argw <= 0xffffff) { | | 943 | if (argw >= 0 && argw <= 0xffffff) { |
944 | if ((argw & 0xfff) != 0) | | 944 | if ((argw & 0xfff) != 0) |
945 | FAIL_IF(push_inst(compiler, ADDI | RD(other_r) | RN(other_r) | ((argw & 0xfff) << 10))); | | 945 | FAIL_IF(push_inst(compiler, ADDI | RD(other_r) | RN(other_r) | ((argw & 0xfff) << 10))); |
946 | if (argw >> 12) | | 946 | if (argw >> 12) |
947 | FAIL_IF(push_inst(compiler, ADDI | (1 << 22) | RD(other_r) | RN(other_r) | ((argw >> 12) << 10))); | | 947 | FAIL_IF(push_inst(compiler, ADDI | (1 << 22) | RD(other_r) | RN(other_r) | ((argw >> 12) << 10))); |
948 | return push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) | RT(reg) | RN(other_r)); | | 948 | return push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) | RT(reg) | RN(other_r)); |
949 | } | | 949 | } |
950 | else if (argw < 0 && argw >= -0xffffff) { | | 950 | else if (argw < 0 && argw >= -0xffffff) { |
951 | argw = -argw; | | 951 | argw = -argw; |
952 | if ((argw & 0xfff) != 0) | | 952 | if ((argw & 0xfff) != 0) |
953 | FAIL_IF(push_inst(compiler, SUBI | RD(other_r) | RN(other_r) | ((argw & 0xfff) << 10))); | | 953 | FAIL_IF(push_inst(compiler, SUBI | RD(other_r) | RN(other_r) | ((argw & 0xfff) << 10))); |
954 | if (argw >> 12) | | 954 | if (argw >> 12) |
955 | FAIL_IF(push_inst(compiler, SUBI | (1 << 22) | RD(other_r) | RN(other_r) | ((argw >> 12) << 10))); | | 955 | FAIL_IF(push_inst(compiler, SUBI | (1 << 22) | RD(other_r) | RN(other_r) | ((argw >> 12) << 10))); |
956 | return push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) | RT(reg) | RN(other_r)); | | 956 | return push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) | RT(reg) | RN(other_r)); |
957 | } | | 957 | } |
958 | | | 958 | |
959 | if (compiler->cache_arg == SLJIT_MEM) { | | 959 | if (compiler->cache_arg == SLJIT_MEM) { |
960 | if (argw == compiler->cache_argw) { | | 960 | if (argw == compiler->cache_argw) { |
961 | other_r = TMP_REG3; | | 961 | other_r = TMP_REG3; |
962 | argw = 0; | | 962 | argw = 0; |
963 | } | | 963 | } |
964 | else if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, argw - compiler->cache_argw) != SLJIT_ERR_UNSUPPORTED) { | | 964 | else if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, argw - compiler->cache_argw) != SLJIT_ERR_UNSUPPORTED) { |
965 | FAIL_IF(compiler->error); | | 965 | FAIL_IF(compiler->error); |
966 | compiler->cache_argw = argw; | | 966 | compiler->cache_argw = argw; |
967 | other_r = TMP_REG3; | | 967 | other_r = TMP_REG3; |
968 | argw = 0; | | 968 | argw = 0; |
969 | } | | 969 | } |
970 | } | | 970 | } |
971 | | | 971 | |
972 | if (argw) { | | 972 | if (argw) { |
973 | FAIL_IF(load_immediate(compiler, TMP_REG3, argw)); | | 973 | FAIL_IF(load_immediate(compiler, TMP_REG3, argw)); |
974 | compiler->cache_arg = SLJIT_MEM; | | 974 | compiler->cache_arg = SLJIT_MEM; |
975 | compiler->cache_argw = argw; | | 975 | compiler->cache_argw = argw; |
976 | other_r = TMP_REG3; | | 976 | other_r = TMP_REG3; |
977 | argw = 0; | | 977 | argw = 0; |
978 | } | | 978 | } |
979 | } | | 979 | } |
980 | | | 980 | |
981 | /* No caching here. */ | | 981 | /* No caching here. */ |
982 | arg &= REG_MASK; | | 982 | arg &= REG_MASK; |
983 | FAIL_IF(push_inst(compiler, sljit_mem_reg[flags & 0x3] | (shift << 30) | RT(reg) | RN(arg) | RM(other_r))); | | 983 | FAIL_IF(push_inst(compiler, sljit_mem_reg[flags & 0x3] | (shift << 30) | RT(reg) | RN(arg) | RM(other_r))); |
984 | return push_inst(compiler, ADD | RD(arg) | RN(arg) | RM(other_r)); | | 984 | return push_inst(compiler, ADD | RD(arg) | RN(arg) | RM(other_r)); |
985 | } | | 985 | } |
986 | | | 986 | |
987 | if (arg & OFFS_REG_MASK) { | | 987 | if (arg & OFFS_REG_MASK) { |
988 | other_r = OFFS_REG(arg); | | 988 | other_r = OFFS_REG(arg); |
989 | arg &= REG_MASK; | | 989 | arg &= REG_MASK; |
990 | FAIL_IF(push_inst(compiler, ADD | RD(tmp_r) | RN(arg) | RM(other_r) | ((argw & 0x3) << 10))); | | 990 | FAIL_IF(push_inst(compiler, ADD | RD(tmp_r) | RN(arg) | RM(other_r) | ((argw & 0x3) << 10))); |
991 | return push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) | RT(reg) | RN(tmp_r)); | | 991 | return push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) | RT(reg) | RN(tmp_r)); |
992 | } | | 992 | } |
993 | | | 993 | |
994 | if (compiler->cache_arg == arg) { | | 994 | if (compiler->cache_arg == arg) { |
995 | diff = argw - compiler->cache_argw; | | 995 | diff = argw - compiler->cache_argw; |
996 | if (diff <= 255 && diff >= -256) | | 996 | if (diff <= 255 && diff >= -256) |
997 | return push_inst(compiler, sljit_mem_simm[flags & 0x3] | (shift << 30) | | 997 | return push_inst(compiler, sljit_mem_simm[flags & 0x3] | (shift << 30) |
998 | | RT(reg) | RN(TMP_REG3) | ((diff & 0x1ff) << 12)); | | 998 | | RT(reg) | RN(TMP_REG3) | ((diff & 0x1ff) << 12)); |
999 | if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, diff) != SLJIT_ERR_UNSUPPORTED) { | | 999 | if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, diff) != SLJIT_ERR_UNSUPPORTED) { |
1000 | FAIL_IF(compiler->error); | | 1000 | FAIL_IF(compiler->error); |
1001 | return push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) | RT(reg) | RN(arg)); | | 1001 | return push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) | RT(reg) | RN(arg)); |
1002 | } | | 1002 | } |
1003 | } | | 1003 | } |
1004 | | | 1004 | |
1005 | if (argw >= 0 && argw <= 0xffffff && (argw & ((1 << shift) - 1)) == 0) { | | 1005 | if (argw >= 0 && argw <= 0xffffff && (argw & ((1 << shift) - 1)) == 0) { |
1006 | FAIL_IF(push_inst(compiler, ADDI | (1 << 22) | RD(tmp_r) | RN(arg & REG_MASK) | ((argw >> 12) << 10))); | | 1006 | FAIL_IF(push_inst(compiler, ADDI | (1 << 22) | RD(tmp_r) | RN(arg & REG_MASK) | ((argw >> 12) << 10))); |
1007 | return push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) | | 1007 | return push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) |
1008 | | RT(reg) | RN(tmp_r) | ((argw & 0xfff) << (10 - shift))); | | 1008 | | RT(reg) | RN(tmp_r) | ((argw & 0xfff) << (10 - shift))); |
1009 | } | | 1009 | } |
1010 | | | 1010 | |
1011 | diff = argw - next_argw; | | 1011 | diff = argw - next_argw; |
1012 | next_arg = (arg & REG_MASK) && (arg == next_arg) && diff <= 0xfff && diff >= -0xfff && diff != 0; | | 1012 | next_arg = (arg & REG_MASK) && (arg == next_arg) && diff <= 0xfff && diff >= -0xfff && diff != 0; |
1013 | arg &= REG_MASK; | | 1013 | arg &= REG_MASK; |
1014 | | | 1014 | |
1015 | if (arg && compiler->cache_arg == SLJIT_MEM) { | | 1015 | if (arg && compiler->cache_arg == SLJIT_MEM) { |
1016 | if (compiler->cache_argw == argw) | | 1016 | if (compiler->cache_argw == argw) |
1017 | return push_inst(compiler, sljit_mem_reg[flags & 0x3] | (shift << 30) | RT(reg) | RN(arg) | RM(TMP_REG3)); | | 1017 | return push_inst(compiler, sljit_mem_reg[flags & 0x3] | (shift << 30) | RT(reg) | RN(arg) | RM(TMP_REG3)); |
1018 | if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, argw - compiler->cache_argw) != SLJIT_ERR_UNSUPPORTED) { | | 1018 | if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, argw - compiler->cache_argw) != SLJIT_ERR_UNSUPPORTED) { |
1019 | FAIL_IF(compiler->error); | | 1019 | FAIL_IF(compiler->error); |
1020 | compiler->cache_argw = argw; | | 1020 | compiler->cache_argw = argw; |
1021 | return push_inst(compiler, sljit_mem_reg[flags & 0x3] | (shift << 30) | RT(reg) | RN(arg) | RM(TMP_REG3)); | | 1021 | return push_inst(compiler, sljit_mem_reg[flags & 0x3] | (shift << 30) | RT(reg) | RN(arg) | RM(TMP_REG3)); |
1022 | } | | 1022 | } |
1023 | } | | 1023 | } |
1024 | | | 1024 | |
1025 | compiler->cache_argw = argw; | | 1025 | compiler->cache_argw = argw; |
1026 | if (next_arg && emit_set_delta(compiler, TMP_REG3, arg, argw) != SLJIT_ERR_UNSUPPORTED) { | | 1026 | if (next_arg && emit_set_delta(compiler, TMP_REG3, arg, argw) != SLJIT_ERR_UNSUPPORTED) { |
1027 | FAIL_IF(compiler->error); | | 1027 | FAIL_IF(compiler->error); |
1028 | compiler->cache_arg = SLJIT_MEM | arg; | | 1028 | compiler->cache_arg = SLJIT_MEM | arg; |
1029 | arg = 0; | | 1029 | arg = 0; |
1030 | } | | 1030 | } |
1031 | else { | | 1031 | else { |
1032 | FAIL_IF(load_immediate(compiler, TMP_REG3, argw)); | | 1032 | FAIL_IF(load_immediate(compiler, TMP_REG3, argw)); |
1033 | compiler->cache_arg = SLJIT_MEM; | | 1033 | compiler->cache_arg = SLJIT_MEM; |
1034 | | | 1034 | |
1035 | if (next_arg) { | | 1035 | if (next_arg) { |
1036 | FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG3) | RN(TMP_REG3) | RM(arg))); | | 1036 | FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG3) | RN(TMP_REG3) | RM(arg))); |
1037 | compiler->cache_arg = SLJIT_MEM | arg; | | 1037 | compiler->cache_arg = SLJIT_MEM | arg; |
1038 | arg = 0; | | 1038 | arg = 0; |
1039 | } | | 1039 | } |
1040 | } | | 1040 | } |
1041 | | | 1041 | |
1042 | if (arg) | | 1042 | if (arg) |
1043 | return push_inst(compiler, sljit_mem_reg[flags & 0x3] | (shift << 30) | RT(reg) | RN(arg) | RM(TMP_REG3)); | | 1043 | return push_inst(compiler, sljit_mem_reg[flags & 0x3] | (shift << 30) | RT(reg) | RN(arg) | RM(TMP_REG3)); |
1044 | return push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) | RT(reg) | RN(TMP_REG3)); | | 1044 | return push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) | RT(reg) | RN(TMP_REG3)); |
1045 | } | | 1045 | } |
1046 | | | 1046 | |
1047 | static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw) | | 1047 | static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw) |
1048 | { | | 1048 | { |
1049 | if (getput_arg_fast(compiler, flags, reg, arg, argw)) | | 1049 | if (getput_arg_fast(compiler, flags, reg, arg, argw)) |
1050 | return compiler->error; | | 1050 | return compiler->error; |
1051 | compiler->cache_arg = 0; | | 1051 | compiler->cache_arg = 0; |
1052 | compiler->cache_argw = 0; | | 1052 | compiler->cache_argw = 0; |
1053 | return getput_arg(compiler, flags, reg, arg, argw, 0, 0); | | 1053 | return getput_arg(compiler, flags, reg, arg, argw, 0, 0); |
1054 | } | | 1054 | } |
1055 | | | 1055 | |
1056 | static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg1, sljit_sw arg1w, sljit_s32 arg2, sljit_sw arg2w) | | 1056 | static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg1, sljit_sw arg1w, sljit_s32 arg2, sljit_sw arg2w) |
1057 | { | | 1057 | { |
1058 | if (getput_arg_fast(compiler, flags, reg, arg1, arg1w)) | | 1058 | if (getput_arg_fast(compiler, flags, reg, arg1, arg1w)) |
1059 | return compiler->error; | | 1059 | return compiler->error; |
1060 | return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w); | | 1060 | return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w); |
1061 | } | | 1061 | } |
1062 | | | 1062 | |
1063 | /* --------------------------------------------------------------------- */ | | 1063 | /* --------------------------------------------------------------------- */ |
1064 | /* Entry, exit */ | | 1064 | /* Entry, exit */ |
1065 | /* --------------------------------------------------------------------- */ | | 1065 | /* --------------------------------------------------------------------- */ |
1066 | | | 1066 | |
1067 | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, | | 1067 | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, |
1068 | sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds, | | 1068 | sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds, |
1069 | sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) | | 1069 | sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) |
1070 | { | | 1070 | { |
1071 | sljit_s32 i, tmp, offs, prev, saved_regs_size; | | 1071 | sljit_s32 i, tmp, offs, prev, saved_regs_size; |
1072 | | | 1072 | |
1073 | CHECK_ERROR(); | | 1073 | CHECK_ERROR(); |
1074 | CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); | | 1074 | CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); |
1075 | set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); | | 1075 | set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); |
1076 | | | 1076 | |
1077 | saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 0); | | 1077 | saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 0); |
1078 | local_size += saved_regs_size + SLJIT_LOCALS_OFFSET; | | 1078 | local_size += saved_regs_size + SLJIT_LOCALS_OFFSET; |
1079 | local_size = (local_size + 15) & ~0xf; | | 1079 | local_size = (local_size + 15) & ~0xf; |
1080 | compiler->local_size = local_size; | | 1080 | compiler->local_size = local_size; |
1081 | | | 1081 | |
1082 | if (local_size <= (63 * sizeof(sljit_sw))) { | | 1082 | SLJIT_ASSERT(local_size >= 0); |
| | | 1083 | if ((size_t)local_size <= (63 * sizeof(sljit_sw))) { |
1083 | FAIL_IF(push_inst(compiler, STP_PRE | 29 | RT2(TMP_LR) | | 1084 | FAIL_IF(push_inst(compiler, STP_PRE | 29 | RT2(TMP_LR) |
1084 | | RN(TMP_SP) | ((-(local_size >> 3) & 0x7f) << 15))); | | 1085 | | RN(TMP_SP) | ((-(local_size >> 3) & 0x7f) << 15))); |
1085 | FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(TMP_SP) | (0 << 10))); | | 1086 | FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(TMP_SP) | (0 << 10))); |
1086 | offs = (local_size - saved_regs_size) << (15 - 3); | | 1087 | offs = (local_size - saved_regs_size) << (15 - 3); |
1087 | } else { | | 1088 | } else { |
1088 | offs = 0 << 15; | | 1089 | offs = 0 << 15; |
1089 | if (saved_regs_size & 0x8) { | | 1090 | if (saved_regs_size & 0x8) { |
1090 | offs = 1 << 15; | | 1091 | offs = 1 << 15; |
1091 | saved_regs_size += sizeof(sljit_sw); | | 1092 | saved_regs_size += sizeof(sljit_sw); |
1092 | } | | 1093 | } |
1093 | local_size -= saved_regs_size + SLJIT_LOCALS_OFFSET; | | 1094 | local_size -= saved_regs_size + SLJIT_LOCALS_OFFSET; |
1094 | if (saved_regs_size > 0) | | 1095 | if (saved_regs_size > 0) |
1095 | FAIL_IF(push_inst(compiler, SUBI | RD(TMP_SP) | RN(TMP_SP) | (saved_regs_size << 10))); | | 1096 | FAIL_IF(push_inst(compiler, SUBI | RD(TMP_SP) | RN(TMP_SP) | (saved_regs_size << 10))); |
1096 | } | | 1097 | } |
1097 | | | 1098 | |
1098 | tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG; | | 1099 | tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG; |
1099 | prev = -1; | | 1100 | prev = -1; |
1100 | for (i = SLJIT_S0; i >= tmp; i--) { | | 1101 | for (i = SLJIT_S0; i >= tmp; i--) { |
1101 | if (prev == -1) { | | 1102 | if (prev == -1) { |
1102 | if (!(offs & (1 << 15))) { | | 1103 | if (!(offs & (1 << 15))) { |
1103 | prev = i; | | 1104 | prev = i; |
1104 | continue; | | 1105 | continue; |
1105 | } | | 1106 | } |
1106 | FAIL_IF(push_inst(compiler, STRI | RT(i) | RN(TMP_SP) | (offs >> 5))); | | 1107 | FAIL_IF(push_inst(compiler, STRI | RT(i) | RN(TMP_SP) | (offs >> 5))); |
1107 | offs += 1 << 15; | | 1108 | offs += 1 << 15; |
1108 | continue; | | 1109 | continue; |
1109 | } | | 1110 | } |
1110 | FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(TMP_SP) | offs)); | | 1111 | FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(TMP_SP) | offs)); |
1111 | offs += 2 << 15; | | 1112 | offs += 2 << 15; |
1112 | prev = -1; | | 1113 | prev = -1; |
1113 | } | | 1114 | } |
1114 | | | 1115 | |
1115 | for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { | | 1116 | for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { |
1116 | if (prev == -1) { | | 1117 | if (prev == -1) { |
1117 | if (!(offs & (1 << 15))) { | | 1118 | if (!(offs & (1 << 15))) { |
1118 | prev = i; | | 1119 | prev = i; |
1119 | continue; | | 1120 | continue; |
1120 | } | | 1121 | } |
1121 | FAIL_IF(push_inst(compiler, STRI | RT(i) | RN(TMP_SP) | (offs >> 5))); | | 1122 | FAIL_IF(push_inst(compiler, STRI | RT(i) | RN(TMP_SP) | (offs >> 5))); |
1122 | offs += 1 << 15; | | 1123 | offs += 1 << 15; |
1123 | continue; | | 1124 | continue; |
1124 | } | | 1125 | } |
1125 | FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(TMP_SP) | offs)); | | 1126 | FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(TMP_SP) | offs)); |
1126 | offs += 2 << 15; | | 1127 | offs += 2 << 15; |
1127 | prev = -1; | | 1128 | prev = -1; |
1128 | } | | 1129 | } |
1129 | | | 1130 | |
1130 | SLJIT_ASSERT(prev == -1); | | 1131 | SLJIT_ASSERT(prev == -1); |
1131 | | | 1132 | |
1132 | if (compiler->local_size > (63 * sizeof(sljit_sw))) { | | 1133 | SLJIT_ASSERT(compiler->local_size >= 0); |
| | | 1134 | if ((size_t)compiler->local_size > (63 * sizeof(sljit_sw))) { |
1133 | /* The local_size is already adjusted by the saved registers. */ | | 1135 | /* The local_size is already adjusted by the saved registers. */ |
1134 | if (local_size > 0xfff) { | | 1136 | if (local_size > 0xfff) { |
1135 | FAIL_IF(push_inst(compiler, SUBI | RD(TMP_SP) | RN(TMP_SP) | ((local_size >> 12) << 10) | (1 << 22))); | | 1137 | FAIL_IF(push_inst(compiler, SUBI | RD(TMP_SP) | RN(TMP_SP) | ((local_size >> 12) << 10) | (1 << 22))); |
1136 | local_size &= 0xfff; | | 1138 | local_size &= 0xfff; |
1137 | } | | 1139 | } |
1138 | if (local_size) | | 1140 | if (local_size) |
1139 | FAIL_IF(push_inst(compiler, SUBI | RD(TMP_SP) | RN(TMP_SP) | (local_size << 10))); | | 1141 | FAIL_IF(push_inst(compiler, SUBI | RD(TMP_SP) | RN(TMP_SP) | (local_size << 10))); |
1140 | FAIL_IF(push_inst(compiler, STP_PRE | 29 | RT2(TMP_LR) | | 1142 | FAIL_IF(push_inst(compiler, STP_PRE | 29 | RT2(TMP_LR) |
1141 | | RN(TMP_SP) | ((-(16 >> 3) & 0x7f) << 15))); | | 1143 | | RN(TMP_SP) | ((-(16 >> 3) & 0x7f) << 15))); |
1142 | FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(TMP_SP) | (0 << 10))); | | 1144 | FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(TMP_SP) | (0 << 10))); |
1143 | } | | 1145 | } |
1144 | | | 1146 | |
1145 | if (args >= 1) | | 1147 | if (args >= 1) |
1146 | FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S0) | RN(TMP_ZERO) | RM(SLJIT_R0))); | | 1148 | FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S0) | RN(TMP_ZERO) | RM(SLJIT_R0))); |
1147 | if (args >= 2) | | 1149 | if (args >= 2) |
1148 | FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S1) | RN(TMP_ZERO) | RM(SLJIT_R1))); | | 1150 | FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S1) | RN(TMP_ZERO) | RM(SLJIT_R1))); |
1149 | if (args >= 3) | | 1151 | if (args >= 3) |
1150 | FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S2) | RN(TMP_ZERO) | RM(SLJIT_R2))); | | 1152 | FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S2) | RN(TMP_ZERO) | RM(SLJIT_R2))); |
1151 | | | 1153 | |
1152 | return SLJIT_SUCCESS; | | 1154 | return SLJIT_SUCCESS; |
1153 | } | | 1155 | } |
1154 | | | 1156 | |
1155 | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, | | 1157 | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, |
1156 | sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds, | | 1158 | sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds, |
1157 | sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) | | 1159 | sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) |
1158 | { | | 1160 | { |
1159 | CHECK_ERROR(); | | 1161 | CHECK_ERROR(); |
1160 | CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); | | 1162 | CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); |
1161 | set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); | | 1163 | set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); |
1162 | | | 1164 | |
1163 | local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 0) + SLJIT_LOCALS_OFFSET; | | 1165 | local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 0) + SLJIT_LOCALS_OFFSET; |
1164 | local_size = (local_size + 15) & ~0xf; | | 1166 | local_size = (local_size + 15) & ~0xf; |
1165 | compiler->local_size = local_size; | | 1167 | compiler->local_size = local_size; |
1166 | return SLJIT_SUCCESS; | | 1168 | return SLJIT_SUCCESS; |
1167 | } | | 1169 | } |
1168 | | | 1170 | |
1169 | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) | | 1171 | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) |
1170 | { | | 1172 | { |
1171 | sljit_s32 local_size; | | 1173 | sljit_s32 local_size; |
1172 | sljit_s32 i, tmp, offs, prev, saved_regs_size; | | 1174 | sljit_s32 i, tmp, offs, prev, saved_regs_size; |
1173 | | | 1175 | |
1174 | CHECK_ERROR(); | | 1176 | CHECK_ERROR(); |
1175 | CHECK(check_sljit_emit_return(compiler, op, src, srcw)); | | 1177 | CHECK(check_sljit_emit_return(compiler, op, src, srcw)); |
1176 | | | 1178 | |
1177 | FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); | | 1179 | FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); |
1178 | | | 1180 | |
1179 | local_size = compiler->local_size; | | 1181 | local_size = compiler->local_size; |
1180 | | | 1182 | |
1181 | saved_regs_size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 0); | | 1183 | saved_regs_size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 0); |
1182 | if (local_size <= (63 * sizeof(sljit_sw))) | | 1184 | SLJIT_ASSERT(local_size >= 0); |
| | | 1185 | if ((size_t)local_size <= (63 * sizeof(sljit_sw))) |
1183 | offs = (local_size - saved_regs_size) << (15 - 3); | | 1186 | offs = (local_size - saved_regs_size) << (15 - 3); |
1184 | else { | | 1187 | else { |
1185 | FAIL_IF(push_inst(compiler, LDP_PST | 29 | RT2(TMP_LR) | | 1188 | FAIL_IF(push_inst(compiler, LDP_PST | 29 | RT2(TMP_LR) |
1186 | | RN(TMP_SP) | (((16 >> 3) & 0x7f) << 15))); | | 1189 | | RN(TMP_SP) | (((16 >> 3) & 0x7f) << 15))); |
1187 | offs = 0 << 15; | | 1190 | offs = 0 << 15; |
1188 | if (saved_regs_size & 0x8) { | | 1191 | if (saved_regs_size & 0x8) { |
1189 | offs = 1 << 15; | | 1192 | offs = 1 << 15; |
1190 | saved_regs_size += sizeof(sljit_sw); | | 1193 | saved_regs_size += sizeof(sljit_sw); |
1191 | } | | 1194 | } |
1192 | local_size -= saved_regs_size + SLJIT_LOCALS_OFFSET; | | 1195 | local_size -= saved_regs_size + SLJIT_LOCALS_OFFSET; |
1193 | if (local_size > 0xfff) { | | 1196 | if (local_size > 0xfff) { |
1194 | FAIL_IF(push_inst(compiler, ADDI | RD(TMP_SP) | RN(TMP_SP) | ((local_size >> 12) << 10) | (1 << 22))); | | 1197 | FAIL_IF(push_inst(compiler, ADDI | RD(TMP_SP) | RN(TMP_SP) | ((local_size >> 12) << 10) | (1 << 22))); |
1195 | local_size &= 0xfff; | | 1198 | local_size &= 0xfff; |
1196 | } | | 1199 | } |
1197 | if (local_size) | | 1200 | if (local_size) |
1198 | FAIL_IF(push_inst(compiler, ADDI | RD(TMP_SP) | RN(TMP_SP) | (local_size << 10))); | | 1201 | FAIL_IF(push_inst(compiler, ADDI | RD(TMP_SP) | RN(TMP_SP) | (local_size << 10))); |
1199 | } | | 1202 | } |
1200 | | | 1203 | |
1201 | tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG; | | 1204 | tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG; |
1202 | prev = -1; | | 1205 | prev = -1; |
1203 | for (i = SLJIT_S0; i >= tmp; i--) { | | 1206 | for (i = SLJIT_S0; i >= tmp; i--) { |
1204 | if (prev == -1) { | | 1207 | if (prev == -1) { |
1205 | if (!(offs & (1 << 15))) { | | 1208 | if (!(offs & (1 << 15))) { |
1206 | prev = i; | | 1209 | prev = i; |
1207 | continue; | | 1210 | continue; |
1208 | } | | 1211 | } |
1209 | FAIL_IF(push_inst(compiler, LDRI | RT(i) | RN(TMP_SP) | (offs >> 5))); | | 1212 | FAIL_IF(push_inst(compiler, LDRI | RT(i) | RN(TMP_SP) | (offs >> 5))); |
1210 | offs += 1 << 15; | | 1213 | offs += 1 << 15; |
1211 | continue; | | 1214 | continue; |
1212 | } | | 1215 | } |
1213 | FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(TMP_SP) | offs)); | | 1216 | FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(TMP_SP) | offs)); |
1214 | offs += 2 << 15; | | 1217 | offs += 2 << 15; |
1215 | prev = -1; | | 1218 | prev = -1; |
1216 | } | | 1219 | } |
1217 | | | 1220 | |
1218 | for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { | | 1221 | for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { |
1219 | if (prev == -1) { | | 1222 | if (prev == -1) { |
1220 | if (!(offs & (1 << 15))) { | | 1223 | if (!(offs & (1 << 15))) { |
1221 | prev = i; | | 1224 | prev = i; |
1222 | continue; | | 1225 | continue; |
1223 | } | | 1226 | } |
1224 | FAIL_IF(push_inst(compiler, LDRI | RT(i) | RN(TMP_SP) | (offs >> 5))); | | 1227 | FAIL_IF(push_inst(compiler, LDRI | RT(i) | RN(TMP_SP) | (offs >> 5))); |
1225 | offs += 1 << 15; | | 1228 | offs += 1 << 15; |
1226 | continue; | | 1229 | continue; |
1227 | } | | 1230 | } |
1228 | FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(TMP_SP) | offs)); | | 1231 | FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(TMP_SP) | offs)); |
1229 | offs += 2 << 15; | | 1232 | offs += 2 << 15; |
1230 | prev = -1; | | 1233 | prev = -1; |
1231 | } | | 1234 | } |
1232 | | | 1235 | |
1233 | SLJIT_ASSERT(prev == -1); | | 1236 | SLJIT_ASSERT(prev == -1); |
1234 | | | 1237 | |
1235 | if (compiler->local_size <= (63 * sizeof(sljit_sw))) { | | 1238 | SLJIT_ASSERT(compiler->local_size >= 0); |
| | | 1239 | if ((size_t)compiler->local_size <= (63 * sizeof(sljit_sw))) { |
1236 | FAIL_IF(push_inst(compiler, LDP_PST | 29 | RT2(TMP_LR) | | 1240 | FAIL_IF(push_inst(compiler, LDP_PST | 29 | RT2(TMP_LR) |
1237 | | RN(TMP_SP) | (((local_size >> 3) & 0x7f) << 15))); | | 1241 | | RN(TMP_SP) | (((local_size >> 3) & 0x7f) << 15))); |
1238 | } else if (saved_regs_size > 0) { | | 1242 | } else if (saved_regs_size > 0) { |
1239 | FAIL_IF(push_inst(compiler, ADDI | RD(TMP_SP) | RN(TMP_SP) | (saved_regs_size << 10))); | | 1243 | FAIL_IF(push_inst(compiler, ADDI | RD(TMP_SP) | RN(TMP_SP) | (saved_regs_size << 10))); |
1240 | } | | 1244 | } |
1241 | | | 1245 | |
1242 | FAIL_IF(push_inst(compiler, RET | RN(TMP_LR))); | | 1246 | FAIL_IF(push_inst(compiler, RET | RN(TMP_LR))); |
1243 | return SLJIT_SUCCESS; | | 1247 | return SLJIT_SUCCESS; |
1244 | } | | 1248 | } |
1245 | | | 1249 | |
1246 | /* --------------------------------------------------------------------- */ | | 1250 | /* --------------------------------------------------------------------- */ |
1247 | /* Operators */ | | 1251 | /* Operators */ |
1248 | /* --------------------------------------------------------------------- */ | | 1252 | /* --------------------------------------------------------------------- */ |
1249 | | | 1253 | |
1250 | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op) | | 1254 | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op) |
1251 | { | | 1255 | { |
1252 | sljit_ins inv_bits = (op & SLJIT_I32_OP) ? (1 << 31) : 0; | | 1256 | sljit_ins inv_bits = (op & SLJIT_I32_OP) ? (1 << 31) : 0; |
1253 | | | 1257 | |
1254 | CHECK_ERROR(); | | 1258 | CHECK_ERROR(); |
1255 | CHECK(check_sljit_emit_op0(compiler, op)); | | 1259 | CHECK(check_sljit_emit_op0(compiler, op)); |
1256 | | | 1260 | |
1257 | op = GET_OPCODE(op); | | 1261 | op = GET_OPCODE(op); |
1258 | switch (op) { | | 1262 | switch (op) { |
1259 | case SLJIT_BREAKPOINT: | | 1263 | case SLJIT_BREAKPOINT: |
1260 | return push_inst(compiler, BRK); | | 1264 | return push_inst(compiler, BRK); |
1261 | case SLJIT_NOP: | | 1265 | case SLJIT_NOP: |
1262 | return push_inst(compiler, NOP); | | 1266 | return push_inst(compiler, NOP); |
1263 | case SLJIT_LMUL_UW: | | 1267 | case SLJIT_LMUL_UW: |
1264 | case SLJIT_LMUL_SW: | | 1268 | case SLJIT_LMUL_SW: |
1265 | FAIL_IF(push_inst(compiler, ORR | RD(TMP_REG1) | RN(TMP_ZERO) | RM(SLJIT_R0))); | | 1269 | FAIL_IF(push_inst(compiler, ORR | RD(TMP_REG1) | RN(TMP_ZERO) | RM(SLJIT_R0))); |
1266 | FAIL_IF(push_inst(compiler, MADD | RD(SLJIT_R0) | RN(SLJIT_R0) | RM(SLJIT_R1) | RT2(TMP_ZERO))); | | 1270 | FAIL_IF(push_inst(compiler, MADD | RD(SLJIT_R0) | RN(SLJIT_R0) | RM(SLJIT_R1) | RT2(TMP_ZERO))); |
1267 | return push_inst(compiler, (op == SLJIT_LMUL_UW ? UMULH : SMULH) | RD(SLJIT_R1) | RN(TMP_REG1) | RM(SLJIT_R1)); | | 1271 | return push_inst(compiler, (op == SLJIT_LMUL_UW ? UMULH : SMULH) | RD(SLJIT_R1) | RN(TMP_REG1) | RM(SLJIT_R1)); |
1268 | case SLJIT_DIVMOD_UW: | | 1272 | case SLJIT_DIVMOD_UW: |
1269 | case SLJIT_DIVMOD_SW: | | 1273 | case SLJIT_DIVMOD_SW: |
1270 | FAIL_IF(push_inst(compiler, (ORR ^ inv_bits) | RD(TMP_REG1) | RN(TMP_ZERO) | RM(SLJIT_R0))); | | 1274 | FAIL_IF(push_inst(compiler, (ORR ^ inv_bits) | RD(TMP_REG1) | RN(TMP_ZERO) | RM(SLJIT_R0))); |
1271 | FAIL_IF(push_inst(compiler, ((op == SLJIT_DIVMOD_UW ? UDIV : SDIV) ^ inv_bits) | RD(SLJIT_R0) | RN(SLJIT_R0) | RM(SLJIT_R1))); | | 1275 | FAIL_IF(push_inst(compiler, ((op == SLJIT_DIVMOD_UW ? UDIV : SDIV) ^ inv_bits) | RD(SLJIT_R0) | RN(SLJIT_R0) | RM(SLJIT_R1))); |
1272 | FAIL_IF(push_inst(compiler, (MADD ^ inv_bits) | RD(SLJIT_R1) | RN(SLJIT_R0) | RM(SLJIT_R1) | RT2(TMP_ZERO))); | | 1276 | FAIL_IF(push_inst(compiler, (MADD ^ inv_bits) | RD(SLJIT_R1) | RN(SLJIT_R0) | RM(SLJIT_R1) | RT2(TMP_ZERO))); |
1273 | return push_inst(compiler, (SUB ^ inv_bits) | RD(SLJIT_R1) | RN(TMP_REG1) | RM(SLJIT_R1)); | | 1277 | return push_inst(compiler, (SUB ^ inv_bits) | RD(SLJIT_R1) | RN(TMP_REG1) | RM(SLJIT_R1)); |
1274 | case SLJIT_DIV_UW: | | 1278 | case SLJIT_DIV_UW: |
1275 | case SLJIT_DIV_SW: | | 1279 | case SLJIT_DIV_SW: |
1276 | return push_inst(compiler, ((op == SLJIT_DIV_UW ? UDIV : SDIV) ^ inv_bits) | RD(SLJIT_R0) | RN(SLJIT_R0) | RM(SLJIT_R1)); | | 1280 | return push_inst(compiler, ((op == SLJIT_DIV_UW ? UDIV : SDIV) ^ inv_bits) | RD(SLJIT_R0) | RN(SLJIT_R0) | RM(SLJIT_R1)); |
1277 | } | | 1281 | } |
1278 | | | 1282 | |
1279 | return SLJIT_SUCCESS; | | 1283 | return SLJIT_SUCCESS; |
1280 | } | | 1284 | } |
1281 | | | 1285 | |
1282 | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op, | | 1286 | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op, |
1283 | sljit_s32 dst, sljit_sw dstw, | | 1287 | sljit_s32 dst, sljit_sw dstw, |
1284 | sljit_s32 src, sljit_sw srcw) | | 1288 | sljit_s32 src, sljit_sw srcw) |
1285 | { | | 1289 | { |
1286 | sljit_s32 dst_r, flags, mem_flags; | | 1290 | sljit_s32 dst_r, flags, mem_flags; |
1287 | sljit_s32 op_flags = GET_ALL_FLAGS(op); | | 1291 | sljit_s32 op_flags = GET_ALL_FLAGS(op); |
1288 | | | 1292 | |
1289 | CHECK_ERROR(); | | 1293 | CHECK_ERROR(); |
1290 | CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw)); | | 1294 | CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw)); |
1291 | ADJUST_LOCAL_OFFSET(dst, dstw); | | 1295 | ADJUST_LOCAL_OFFSET(dst, dstw); |
1292 | ADJUST_LOCAL_OFFSET(src, srcw); | | 1296 | ADJUST_LOCAL_OFFSET(src, srcw); |
1293 | | | 1297 | |
1294 | compiler->cache_arg = 0; | | 1298 | compiler->cache_arg = 0; |
1295 | compiler->cache_argw = 0; | | 1299 | compiler->cache_argw = 0; |
1296 | | | 1300 | |
1297 | dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1; | | 1301 | dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1; |
1298 | | | 1302 | |
1299 | op = GET_OPCODE(op); | | 1303 | op = GET_OPCODE(op); |
1300 | if (op >= SLJIT_MOV && op <= SLJIT_MOVU_P) { | | 1304 | if (op >= SLJIT_MOV && op <= SLJIT_MOVU_P) { |
1301 | switch (op) { | | 1305 | switch (op) { |
1302 | case SLJIT_MOV: | | 1306 | case SLJIT_MOV: |
1303 | case SLJIT_MOV_P: | | 1307 | case SLJIT_MOV_P: |
1304 | flags = WORD_SIZE; | | 1308 | flags = WORD_SIZE; |
1305 | break; | | 1309 | break; |
1306 | case SLJIT_MOV_U8: | | 1310 | case SLJIT_MOV_U8: |
1307 | flags = BYTE_SIZE; | | 1311 | flags = BYTE_SIZE; |
1308 | if (src & SLJIT_IMM) | | 1312 | if (src & SLJIT_IMM) |
1309 | srcw = (sljit_u8)srcw; | | 1313 | srcw = (sljit_u8)srcw; |
1310 | break; | | 1314 | break; |
1311 | case SLJIT_MOV_S8: | | 1315 | case SLJIT_MOV_S8: |
1312 | flags = BYTE_SIZE | SIGNED; | | 1316 | flags = BYTE_SIZE | SIGNED; |
1313 | if (src & SLJIT_IMM) | | 1317 | if (src & SLJIT_IMM) |
1314 | srcw = (sljit_s8)srcw; | | 1318 | srcw = (sljit_s8)srcw; |
1315 | break; | | 1319 | break; |
1316 | case SLJIT_MOV_U16: | | 1320 | case SLJIT_MOV_U16: |
1317 | flags = HALF_SIZE; | | 1321 | flags = HALF_SIZE; |
1318 | if (src & SLJIT_IMM) | | 1322 | if (src & SLJIT_IMM) |
1319 | srcw = (sljit_u16)srcw; | | 1323 | srcw = (sljit_u16)srcw; |
1320 | break; | | 1324 | break; |
1321 | case SLJIT_MOV_S16: | | 1325 | case SLJIT_MOV_S16: |
1322 | flags = HALF_SIZE | SIGNED; | | 1326 | flags = HALF_SIZE | SIGNED; |
1323 | if (src & SLJIT_IMM) | | 1327 | if (src & SLJIT_IMM) |
1324 | srcw = (sljit_s16)srcw; | | 1328 | srcw = (sljit_s16)srcw; |
1325 | break; | | 1329 | break; |
1326 | case SLJIT_MOV_U32: | | 1330 | case SLJIT_MOV_U32: |
1327 | flags = INT_SIZE; | | 1331 | flags = INT_SIZE; |
1328 | if (src & SLJIT_IMM) | | 1332 | if (src & SLJIT_IMM) |
1329 | srcw = (sljit_u32)srcw; | | 1333 | srcw = (sljit_u32)srcw; |
1330 | break; | | 1334 | break; |
1331 | case SLJIT_MOV_S32: | | 1335 | case SLJIT_MOV_S32: |
1332 | flags = INT_SIZE | SIGNED; | | 1336 | flags = INT_SIZE | SIGNED; |
1333 | if (src & SLJIT_IMM) | | 1337 | if (src & SLJIT_IMM) |
1334 | srcw = (sljit_s32)srcw; | | 1338 | srcw = (sljit_s32)srcw; |
1335 | break; | | 1339 | break; |
1336 | case SLJIT_MOVU: | | 1340 | case SLJIT_MOVU: |
1337 | case SLJIT_MOVU_P: | | 1341 | case SLJIT_MOVU_P: |
1338 | flags = WORD_SIZE | UPDATE; | | 1342 | flags = WORD_SIZE | UPDATE; |
1339 | break; | | 1343 | break; |
1340 | case SLJIT_MOVU_U8: | | 1344 | case SLJIT_MOVU_U8: |
1341 | flags = BYTE_SIZE | UPDATE; | | 1345 | flags = BYTE_SIZE | UPDATE; |
1342 | if (src & SLJIT_IMM) | | 1346 | if (src & SLJIT_IMM) |
1343 | srcw = (sljit_u8)srcw; | | 1347 | srcw = (sljit_u8)srcw; |
1344 | break; | | 1348 | break; |
1345 | case SLJIT_MOVU_S8: | | 1349 | case SLJIT_MOVU_S8: |
1346 | flags = BYTE_SIZE | SIGNED | UPDATE; | | 1350 | flags = BYTE_SIZE | SIGNED | UPDATE; |
1347 | if (src & SLJIT_IMM) | | 1351 | if (src & SLJIT_IMM) |
1348 | srcw = (sljit_s8)srcw; | | 1352 | srcw = (sljit_s8)srcw; |
1349 | break; | | 1353 | break; |
1350 | case SLJIT_MOVU_U16: | | 1354 | case SLJIT_MOVU_U16: |
1351 | flags = HALF_SIZE | UPDATE; | | 1355 | flags = HALF_SIZE | UPDATE; |
1352 | if (src & SLJIT_IMM) | | 1356 | if (src & SLJIT_IMM) |
1353 | srcw = (sljit_u16)srcw; | | 1357 | srcw = (sljit_u16)srcw; |
1354 | break; | | 1358 | break; |
1355 | case SLJIT_MOVU_S16: | | 1359 | case SLJIT_MOVU_S16: |
1356 | flags = HALF_SIZE | SIGNED | UPDATE; | | 1360 | flags = HALF_SIZE | SIGNED | UPDATE; |
1357 | if (src & SLJIT_IMM) | | 1361 | if (src & SLJIT_IMM) |
1358 | srcw = (sljit_s16)srcw; | | 1362 | srcw = (sljit_s16)srcw; |
1359 | break; | | 1363 | break; |
1360 | case SLJIT_MOVU_U32: | | 1364 | case SLJIT_MOVU_U32: |
1361 | flags = INT_SIZE | UPDATE; | | 1365 | flags = INT_SIZE | UPDATE; |
1362 | if (src & SLJIT_IMM) | | 1366 | if (src & SLJIT_IMM) |
1363 | srcw = (sljit_u32)srcw; | | 1367 | srcw = (sljit_u32)srcw; |
1364 | break; | | 1368 | break; |
1365 | case SLJIT_MOVU_S32: | | 1369 | case SLJIT_MOVU_S32: |
1366 | flags = INT_SIZE | SIGNED | UPDATE; | | 1370 | flags = INT_SIZE | SIGNED | UPDATE; |
1367 | if (src & SLJIT_IMM) | | 1371 | if (src & SLJIT_IMM) |
1368 | srcw = (sljit_s32)srcw; | | 1372 | srcw = (sljit_s32)srcw; |
1369 | break; | | 1373 | break; |
1370 | default: | | 1374 | default: |
1371 | SLJIT_UNREACHABLE(); | | 1375 | SLJIT_UNREACHABLE(); |
1372 | flags = 0; | | 1376 | flags = 0; |
1373 | break; | | 1377 | break; |
1374 | } | | 1378 | } |
1375 | | | 1379 | |
1376 | if (src & SLJIT_IMM) | | 1380 | if (src & SLJIT_IMM) |
1377 | FAIL_IF(emit_op_imm(compiler, SLJIT_MOV | ARG2_IMM, dst_r, TMP_REG1, srcw)); | | 1381 | FAIL_IF(emit_op_imm(compiler, SLJIT_MOV | ARG2_IMM, dst_r, TMP_REG1, srcw)); |
1378 | else if (src & SLJIT_MEM) { | | 1382 | else if (src & SLJIT_MEM) { |
1379 | if (getput_arg_fast(compiler, flags, dst_r, src, srcw)) | | 1383 | if (getput_arg_fast(compiler, flags, dst_r, src, srcw)) |
1380 | FAIL_IF(compiler->error); | | 1384 | FAIL_IF(compiler->error); |
1381 | else | | 1385 | else |
1382 | FAIL_IF(getput_arg(compiler, flags, dst_r, src, srcw, dst, dstw)); | | 1386 | FAIL_IF(getput_arg(compiler, flags, dst_r, src, srcw, dst, dstw)); |
1383 | } else { | | 1387 | } else { |
1384 | if (dst_r != TMP_REG1) | | 1388 | if (dst_r != TMP_REG1) |
1385 | return emit_op_imm(compiler, op | ((op_flags & SLJIT_I32_OP) ? INT_OP : 0), dst_r, TMP_REG1, src); | | 1389 | return emit_op_imm(compiler, op | ((op_flags & SLJIT_I32_OP) ? INT_OP : 0), dst_r, TMP_REG1, src); |
1386 | dst_r = src; | | 1390 | dst_r = src; |
1387 | } | | 1391 | } |
1388 | | | 1392 | |
1389 | if (dst & SLJIT_MEM) { | | 1393 | if (dst & SLJIT_MEM) { |
1390 | if (getput_arg_fast(compiler, flags | STORE, dst_r, dst, dstw)) | | 1394 | if (getput_arg_fast(compiler, flags | STORE, dst_r, dst, dstw)) |
1391 | return compiler->error; | | 1395 | return compiler->error; |
1392 | else | | 1396 | else |
1393 | return getput_arg(compiler, flags | STORE, dst_r, dst, dstw, 0, 0); | | 1397 | return getput_arg(compiler, flags | STORE, dst_r, dst, dstw, 0, 0); |
1394 | } | | 1398 | } |
1395 | return SLJIT_SUCCESS; | | 1399 | return SLJIT_SUCCESS; |
1396 | } | | 1400 | } |
1397 | | | 1401 | |
1398 | flags = HAS_FLAGS(op_flags) ? SET_FLAGS : 0; | | 1402 | flags = HAS_FLAGS(op_flags) ? SET_FLAGS : 0; |
1399 | mem_flags = WORD_SIZE; | | 1403 | mem_flags = WORD_SIZE; |
1400 | if (op_flags & SLJIT_I32_OP) { | | 1404 | if (op_flags & SLJIT_I32_OP) { |
1401 | flags |= INT_OP; | | 1405 | flags |= INT_OP; |
1402 | mem_flags = INT_SIZE; | | 1406 | mem_flags = INT_SIZE; |
1403 | } | | 1407 | } |
1404 | | | 1408 | |
1405 | if (dst == SLJIT_UNUSED) | | 1409 | if (dst == SLJIT_UNUSED) |
1406 | flags |= UNUSED_RETURN; | | 1410 | flags |= UNUSED_RETURN; |
1407 | | | 1411 | |
1408 | if (src & SLJIT_MEM) { | | 1412 | if (src & SLJIT_MEM) { |
1409 | if (getput_arg_fast(compiler, mem_flags, TMP_REG2, src, srcw)) | | 1413 | if (getput_arg_fast(compiler, mem_flags, TMP_REG2, src, srcw)) |
1410 | FAIL_IF(compiler->error); | | 1414 | FAIL_IF(compiler->error); |
1411 | else | | 1415 | else |
1412 | FAIL_IF(getput_arg(compiler, mem_flags, TMP_REG2, src, srcw, dst, dstw)); | | 1416 | FAIL_IF(getput_arg(compiler, mem_flags, TMP_REG2, src, srcw, dst, dstw)); |
1413 | src = TMP_REG2; | | 1417 | src = TMP_REG2; |
1414 | } | | 1418 | } |
1415 | | | 1419 | |
1416 | if (src & SLJIT_IMM) { | | 1420 | if (src & SLJIT_IMM) { |
1417 | flags |= ARG2_IMM; | | 1421 | flags |= ARG2_IMM; |
1418 | if (op_flags & SLJIT_I32_OP) | | 1422 | if (op_flags & SLJIT_I32_OP) |
1419 | srcw = (sljit_s32)srcw; | | 1423 | srcw = (sljit_s32)srcw; |
1420 | } else | | 1424 | } else |
1421 | srcw = src; | | 1425 | srcw = src; |
1422 | | | 1426 | |
1423 | emit_op_imm(compiler, flags | op, dst_r, TMP_REG1, srcw); | | 1427 | emit_op_imm(compiler, flags | op, dst_r, TMP_REG1, srcw); |
1424 | | | 1428 | |
1425 | if (dst & SLJIT_MEM) { | | 1429 | if (dst & SLJIT_MEM) { |
1426 | if (getput_arg_fast(compiler, mem_flags | STORE, dst_r, dst, dstw)) | | 1430 | if (getput_arg_fast(compiler, mem_flags | STORE, dst_r, dst, dstw)) |
1427 | return compiler->error; | | 1431 | return compiler->error; |
1428 | else | | 1432 | else |
1429 | return getput_arg(compiler, mem_flags | STORE, dst_r, dst, dstw, 0, 0); | | 1433 | return getput_arg(compiler, mem_flags | STORE, dst_r, dst, dstw, 0, 0); |
1430 | } | | 1434 | } |
1431 | return SLJIT_SUCCESS; | | 1435 | return SLJIT_SUCCESS; |
1432 | } | | 1436 | } |
1433 | | | 1437 | |
1434 | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, | | 1438 | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, |
1435 | sljit_s32 dst, sljit_sw dstw, | | 1439 | sljit_s32 dst, sljit_sw dstw, |
1436 | sljit_s32 src1, sljit_sw src1w, | | 1440 | sljit_s32 src1, sljit_sw src1w, |
1437 | sljit_s32 src2, sljit_sw src2w) | | 1441 | sljit_s32 src2, sljit_sw src2w) |
1438 | { | | 1442 | { |
1439 | sljit_s32 dst_r, flags, mem_flags; | | 1443 | sljit_s32 dst_r, flags, mem_flags; |
1440 | | | 1444 | |
1441 | CHECK_ERROR(); | | 1445 | CHECK_ERROR(); |
1442 | CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); | | 1446 | CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); |
1443 | ADJUST_LOCAL_OFFSET(dst, dstw); | | 1447 | ADJUST_LOCAL_OFFSET(dst, dstw); |
1444 | ADJUST_LOCAL_OFFSET(src1, src1w); | | 1448 | ADJUST_LOCAL_OFFSET(src1, src1w); |
1445 | ADJUST_LOCAL_OFFSET(src2, src2w); | | 1449 | ADJUST_LOCAL_OFFSET(src2, src2w); |
1446 | | | 1450 | |
1447 | compiler->cache_arg = 0; | | 1451 | compiler->cache_arg = 0; |
1448 | compiler->cache_argw = 0; | | 1452 | compiler->cache_argw = 0; |
1449 | | | 1453 | |
1450 | dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1; | | 1454 | dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1; |
1451 | flags = HAS_FLAGS(op) ? SET_FLAGS : 0; | | 1455 | flags = HAS_FLAGS(op) ? SET_FLAGS : 0; |
1452 | mem_flags = WORD_SIZE; | | 1456 | mem_flags = WORD_SIZE; |
1453 | if (op & SLJIT_I32_OP) { | | 1457 | if (op & SLJIT_I32_OP) { |
1454 | flags |= INT_OP; | | 1458 | flags |= INT_OP; |
1455 | mem_flags = INT_SIZE; | | 1459 | mem_flags = INT_SIZE; |
1456 | } | | 1460 | } |
1457 | | | 1461 | |
1458 | if (dst == SLJIT_UNUSED) | | 1462 | if (dst == SLJIT_UNUSED) |
1459 | flags |= UNUSED_RETURN; | | 1463 | flags |= UNUSED_RETURN; |
1460 | | | 1464 | |
1461 | if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, mem_flags | STORE | ARG_TEST, TMP_REG1, dst, dstw)) | | 1465 | if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, mem_flags | STORE | ARG_TEST, TMP_REG1, dst, dstw)) |
1462 | flags |= SLOW_DEST; | | 1466 | flags |= SLOW_DEST; |
1463 | | | 1467 | |
1464 | if (src1 & SLJIT_MEM) { | | 1468 | if (src1 & SLJIT_MEM) { |
1465 | if (getput_arg_fast(compiler, mem_flags, TMP_REG1, src1, src1w)) | | 1469 | if (getput_arg_fast(compiler, mem_flags, TMP_REG1, src1, src1w)) |
1466 | FAIL_IF(compiler->error); | | 1470 | FAIL_IF(compiler->error); |
1467 | else | | 1471 | else |
1468 | flags |= SLOW_SRC1; | | 1472 | flags |= SLOW_SRC1; |
1469 | } | | 1473 | } |
1470 | if (src2 & SLJIT_MEM) { | | 1474 | if (src2 & SLJIT_MEM) { |
1471 | if (getput_arg_fast(compiler, mem_flags, TMP_REG2, src2, src2w)) | | 1475 | if (getput_arg_fast(compiler, mem_flags, TMP_REG2, src2, src2w)) |
1472 | FAIL_IF(compiler->error); | | 1476 | FAIL_IF(compiler->error); |
1473 | else | | 1477 | else |
1474 | flags |= SLOW_SRC2; | | 1478 | flags |= SLOW_SRC2; |
1475 | } | | 1479 | } |
1476 | | | 1480 | |
1477 | if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) { | | 1481 | if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) { |
1478 | if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) { | | 1482 | if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) { |
1479 | FAIL_IF(getput_arg(compiler, mem_flags, TMP_REG2, src2, src2w, src1, src1w)); | | 1483 | FAIL_IF(getput_arg(compiler, mem_flags, TMP_REG2, src2, src2w, src1, src1w)); |
1480 | FAIL_IF(getput_arg(compiler, mem_flags, TMP_REG1, src1, src1w, dst, dstw)); | | 1484 | FAIL_IF(getput_arg(compiler, mem_flags, TMP_REG1, src1, src1w, dst, dstw)); |
1481 | } | | 1485 | } |
1482 | else { | | 1486 | else { |
1483 | FAIL_IF(getput_arg(compiler, mem_flags, TMP_REG1, src1, src1w, src2, src2w)); | | 1487 | FAIL_IF(getput_arg(compiler, mem_flags, TMP_REG1, src1, src1w, src2, src2w)); |
1484 | FAIL_IF(getput_arg(compiler, mem_flags, TMP_REG2, src2, src2w, dst, dstw)); | | 1488 | FAIL_IF(getput_arg(compiler, mem_flags, TMP_REG2, src2, src2w, dst, dstw)); |
1485 | } | | 1489 | } |
1486 | } | | 1490 | } |
1487 | else if (flags & SLOW_SRC1) | | 1491 | else if (flags & SLOW_SRC1) |
1488 | FAIL_IF(getput_arg(compiler, mem_flags, TMP_REG1, src1, src1w, dst, dstw)); | | 1492 | FAIL_IF(getput_arg(compiler, mem_flags, TMP_REG1, src1, src1w, dst, dstw)); |
1489 | else if (flags & SLOW_SRC2) | | 1493 | else if (flags & SLOW_SRC2) |
1490 | FAIL_IF(getput_arg(compiler, mem_flags, TMP_REG2, src2, src2w, dst, dstw)); | | 1494 | FAIL_IF(getput_arg(compiler, mem_flags, TMP_REG2, src2, src2w, dst, dstw)); |
1491 | | | 1495 | |
1492 | if (src1 & SLJIT_MEM) | | 1496 | if (src1 & SLJIT_MEM) |
1493 | src1 = TMP_REG1; | | 1497 | src1 = TMP_REG1; |
1494 | if (src2 & SLJIT_MEM) | | 1498 | if (src2 & SLJIT_MEM) |
1495 | src2 = TMP_REG2; | | 1499 | src2 = TMP_REG2; |
1496 | | | 1500 | |
1497 | if (src1 & SLJIT_IMM) | | 1501 | if (src1 & SLJIT_IMM) |
1498 | flags |= ARG1_IMM; | | 1502 | flags |= ARG1_IMM; |
1499 | else | | 1503 | else |
1500 | src1w = src1; | | 1504 | src1w = src1; |
1501 | if (src2 & SLJIT_IMM) | | 1505 | if (src2 & SLJIT_IMM) |
1502 | flags |= ARG2_IMM; | | 1506 | flags |= ARG2_IMM; |
1503 | else | | 1507 | else |
1504 | src2w = src2; | | 1508 | src2w = src2; |
1505 | | | 1509 | |
1506 | emit_op_imm(compiler, flags | GET_OPCODE(op), dst_r, src1w, src2w); | | 1510 | emit_op_imm(compiler, flags | GET_OPCODE(op), dst_r, src1w, src2w); |
1507 | | | 1511 | |
1508 | if (dst & SLJIT_MEM) { | | 1512 | if (dst & SLJIT_MEM) { |
1509 | if (!(flags & SLOW_DEST)) { | | 1513 | if (!(flags & SLOW_DEST)) { |
1510 | getput_arg_fast(compiler, mem_flags | STORE, dst_r, dst, dstw); | | 1514 | getput_arg_fast(compiler, mem_flags | STORE, dst_r, dst, dstw); |
1511 | return compiler->error; | | 1515 | return compiler->error; |
1512 | } | | 1516 | } |
1513 | return getput_arg(compiler, mem_flags | STORE, TMP_REG1, dst, dstw, 0, 0); | | 1517 | return getput_arg(compiler, mem_flags | STORE, TMP_REG1, dst, dstw, 0, 0); |
1514 | } | | 1518 | } |
1515 | | | 1519 | |
1516 | return SLJIT_SUCCESS; | | 1520 | return SLJIT_SUCCESS; |
1517 | } | | 1521 | } |
1518 | | | 1522 | |
1519 | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg) | | 1523 | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg) |
1520 | { | | 1524 | { |
1521 | CHECK_REG_INDEX(check_sljit_get_register_index(reg)); | | 1525 | CHECK_REG_INDEX(check_sljit_get_register_index(reg)); |
1522 | return reg_map[reg]; | | 1526 | return reg_map[reg]; |
1523 | } | | 1527 | } |
1524 | | | 1528 | |
1525 | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg) | | 1529 | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg) |
1526 | { | | 1530 | { |
1527 | CHECK_REG_INDEX(check_sljit_get_float_register_index(reg)); | | 1531 | CHECK_REG_INDEX(check_sljit_get_float_register_index(reg)); |
1528 | return reg; | | 1532 | return reg; |
1529 | } | | 1533 | } |
1530 | | | 1534 | |
1531 | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, | | 1535 | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, |
1532 | void *instruction, sljit_s32 size) | | 1536 | void *instruction, sljit_s32 size) |
1533 | { | | 1537 | { |
1534 | CHECK_ERROR(); | | 1538 | CHECK_ERROR(); |
1535 | CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); | | 1539 | CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); |
1536 | | | 1540 | |
1537 | return push_inst(compiler, *(sljit_ins*)instruction); | | 1541 | return push_inst(compiler, *(sljit_ins*)instruction); |
1538 | } | | 1542 | } |
1539 | | | 1543 | |
1540 | /* --------------------------------------------------------------------- */ | | 1544 | /* --------------------------------------------------------------------- */ |
1541 | /* Floating point operators */ | | 1545 | /* Floating point operators */ |
1542 | /* --------------------------------------------------------------------- */ | | 1546 | /* --------------------------------------------------------------------- */ |
1543 | | | 1547 | |
1544 | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void) | | 1548 | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void) |
1545 | { | | 1549 | { |
1546 | #ifdef SLJIT_IS_FPU_AVAILABLE | | 1550 | #ifdef SLJIT_IS_FPU_AVAILABLE |
1547 | return SLJIT_IS_FPU_AVAILABLE; | | 1551 | return SLJIT_IS_FPU_AVAILABLE; |
1548 | #else | | 1552 | #else |
1549 | /* Available by default. */ | | 1553 | /* Available by default. */ |
1550 | return 1; | | 1554 | return 1; |
1551 | #endif | | 1555 | #endif |
1552 | } | | 1556 | } |
1553 | | | 1557 | |
1554 | static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw) | | 1558 | static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw) |
1555 | { | | 1559 | { |
1556 | sljit_u32 shift = MEM_SIZE_SHIFT(flags); | | 1560 | sljit_u32 shift = MEM_SIZE_SHIFT(flags); |
1557 | sljit_ins ins_bits = (shift << 30); | | 1561 | sljit_ins ins_bits = (shift << 30); |
1558 | sljit_s32 other_r; | | 1562 | sljit_s32 other_r; |
1559 | sljit_sw diff; | | 1563 | sljit_sw diff; |
1560 | | | 1564 | |
1561 | SLJIT_ASSERT(arg & SLJIT_MEM); | | 1565 | SLJIT_ASSERT(arg & SLJIT_MEM); |
1562 | | | 1566 | |
1563 | if (!(flags & STORE)) | | 1567 | if (!(flags & STORE)) |
1564 | ins_bits |= 1 << 22; | | 1568 | ins_bits |= 1 << 22; |
1565 | | | 1569 | |
1566 | if (arg & OFFS_REG_MASK) { | | 1570 | if (arg & OFFS_REG_MASK) { |
1567 | argw &= 3; | | 1571 | argw &= 3; |
1568 | if (!argw || argw == shift) | | 1572 | if (!argw || argw == shift) |
1569 | return push_inst(compiler, STR_FR | ins_bits | VT(reg) | | 1573 | return push_inst(compiler, STR_FR | ins_bits | VT(reg) |
1570 | | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | (argw ? (1 << 12) : 0)); | | 1574 | | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | (argw ? (1 << 12) : 0)); |
1571 | other_r = OFFS_REG(arg); | | 1575 | other_r = OFFS_REG(arg); |
1572 | arg &= REG_MASK; | | 1576 | arg &= REG_MASK; |
1573 | FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG1) | RN(arg) | RM(other_r) | (argw << 10))); | | 1577 | FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG1) | RN(arg) | RM(other_r) | (argw << 10))); |
1574 | arg = TMP_REG1; | | 1578 | arg = TMP_REG1; |
1575 | argw = 0; | | 1579 | argw = 0; |
1576 | } | | 1580 | } |
1577 | | | 1581 | |
1578 | arg &= REG_MASK; | | 1582 | arg &= REG_MASK; |
1579 | if (arg && argw >= 0 && ((argw >> shift) <= 0xfff) && (argw & ((1 << shift) - 1)) == 0) | | 1583 | if (arg && argw >= 0 && ((argw >> shift) <= 0xfff) && (argw & ((1 << shift) - 1)) == 0) |
1580 | return push_inst(compiler, STR_FI | ins_bits | VT(reg) | RN(arg) | (argw << (10 - shift))); | | 1584 | return push_inst(compiler, STR_FI | ins_bits | VT(reg) | RN(arg) | (argw << (10 - shift))); |
1581 | | | 1585 | |
1582 | if (arg && argw <= 255 && argw >= -256) | | 1586 | if (arg && argw <= 255 && argw >= -256) |
1583 | return push_inst(compiler, STUR_FI | ins_bits | VT(reg) | RN(arg) | ((argw & 0x1ff) << 12)); | | 1587 | return push_inst(compiler, STUR_FI | ins_bits | VT(reg) | RN(arg) | ((argw & 0x1ff) << 12)); |
1584 | | | 1588 | |
1585 | /* Slow cases */ | | 1589 | /* Slow cases */ |
1586 | if (compiler->cache_arg == SLJIT_MEM && argw != compiler->cache_argw) { | | 1590 | if (compiler->cache_arg == SLJIT_MEM && argw != compiler->cache_argw) { |
1587 | diff = argw - compiler->cache_argw; | | 1591 | diff = argw - compiler->cache_argw; |
1588 | if (!arg && diff <= 255 && diff >= -256) | | 1592 | if (!arg && diff <= 255 && diff >= -256) |
1589 | return push_inst(compiler, STUR_FI | ins_bits | VT(reg) | RN(TMP_REG3) | ((diff & 0x1ff) << 12)); | | 1593 | return push_inst(compiler, STUR_FI | ins_bits | VT(reg) | RN(TMP_REG3) | ((diff & 0x1ff) << 12)); |
1590 | if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, argw - compiler->cache_argw) != SLJIT_ERR_UNSUPPORTED) { | | 1594 | if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, argw - compiler->cache_argw) != SLJIT_ERR_UNSUPPORTED) { |
1591 | FAIL_IF(compiler->error); | | 1595 | FAIL_IF(compiler->error); |
1592 | compiler->cache_argw = argw; | | 1596 | compiler->cache_argw = argw; |
1593 | } | | 1597 | } |
1594 | } | | 1598 | } |
1595 | | | 1599 | |
1596 | if (compiler->cache_arg != SLJIT_MEM || argw != compiler->cache_argw) { | | 1600 | if (compiler->cache_arg != SLJIT_MEM || argw != compiler->cache_argw) { |
1597 | compiler->cache_arg = SLJIT_MEM; | | 1601 | compiler->cache_arg = SLJIT_MEM; |
1598 | compiler->cache_argw = argw; | | 1602 | compiler->cache_argw = argw; |
1599 | FAIL_IF(load_immediate(compiler, TMP_REG3, argw)); | | 1603 | FAIL_IF(load_immediate(compiler, TMP_REG3, argw)); |
1600 | } | | 1604 | } |
1601 | | | 1605 | |
1602 | if (arg & REG_MASK) | | 1606 | if (arg & REG_MASK) |
1603 | return push_inst(compiler, STR_FR | ins_bits | VT(reg) | RN(arg) | RM(TMP_REG3)); | | 1607 | return push_inst(compiler, STR_FR | ins_bits | VT(reg) | RN(arg) | RM(TMP_REG3)); |
1604 | return push_inst(compiler, STR_FI | ins_bits | VT(reg) | RN(TMP_REG3)); | | 1608 | return push_inst(compiler, STR_FI | ins_bits | VT(reg) | RN(TMP_REG3)); |
1605 | } | | 1609 | } |
1606 | | | 1610 | |
1607 | static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op, | | 1611 | static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op, |
1608 | sljit_s32 dst, sljit_sw dstw, | | 1612 | sljit_s32 dst, sljit_sw dstw, |
1609 | sljit_s32 src, sljit_sw srcw) | | 1613 | sljit_s32 src, sljit_sw srcw) |
1610 | { | | 1614 | { |
1611 | sljit_s32 dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1; | | 1615 | sljit_s32 dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1; |
1612 | sljit_ins inv_bits = (op & SLJIT_F32_OP) ? (1 << 22) : 0; | | 1616 | sljit_ins inv_bits = (op & SLJIT_F32_OP) ? (1 << 22) : 0; |
1613 | | | 1617 | |
1614 | if (GET_OPCODE(op) == SLJIT_CONV_S32_FROM_F64) | | 1618 | if (GET_OPCODE(op) == SLJIT_CONV_S32_FROM_F64) |
1615 | inv_bits |= (1 << 31); | | 1619 | inv_bits |= (1 << 31); |
1616 | | | 1620 | |
1617 | if (src & SLJIT_MEM) { | | 1621 | if (src & SLJIT_MEM) { |
1618 | emit_fop_mem(compiler, (op & SLJIT_F32_OP) ? INT_SIZE : WORD_SIZE, TMP_FREG1, src, srcw); | | 1622 | emit_fop_mem(compiler, (op & SLJIT_F32_OP) ? INT_SIZE : WORD_SIZE, TMP_FREG1, src, srcw); |
1619 | src = TMP_FREG1; | | 1623 | src = TMP_FREG1; |
1620 | } | | 1624 | } |
1621 | | | 1625 | |
1622 | FAIL_IF(push_inst(compiler, (FCVTZS ^ inv_bits) | RD(dst_r) | VN(src))); | | 1626 | FAIL_IF(push_inst(compiler, (FCVTZS ^ inv_bits) | RD(dst_r) | VN(src))); |
1623 | | | 1627 | |
1624 | if (dst_r == TMP_REG1 && dst != SLJIT_UNUSED) | | 1628 | if (dst_r == TMP_REG1 && dst != SLJIT_UNUSED) |
1625 | return emit_op_mem(compiler, ((GET_OPCODE(op) == SLJIT_CONV_S32_FROM_F64) ? INT_SIZE : WORD_SIZE) | STORE, TMP_REG1, dst, dstw); | | 1629 | return emit_op_mem(compiler, ((GET_OPCODE(op) == SLJIT_CONV_S32_FROM_F64) ? INT_SIZE : WORD_SIZE) | STORE, TMP_REG1, dst, dstw); |
1626 | return SLJIT_SUCCESS; | | 1630 | return SLJIT_SUCCESS; |
1627 | } | | 1631 | } |
1628 | | | 1632 | |
1629 | static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op, | | 1633 | static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op, |
1630 | sljit_s32 dst, sljit_sw dstw, | | 1634 | sljit_s32 dst, sljit_sw dstw, |
1631 | sljit_s32 src, sljit_sw srcw) | | 1635 | sljit_s32 src, sljit_sw srcw) |
1632 | { | | 1636 | { |
1633 | sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; | | 1637 | sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; |
1634 | sljit_ins inv_bits = (op & SLJIT_F32_OP) ? (1 << 22) : 0; | | 1638 | sljit_ins inv_bits = (op & SLJIT_F32_OP) ? (1 << 22) : 0; |
1635 | | | 1639 | |
1636 | if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) | | 1640 | if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) |
1637 | inv_bits |= (1 << 31); | | 1641 | inv_bits |= (1 << 31); |
1638 | | | 1642 | |
1639 | if (src & SLJIT_MEM) { | | 1643 | if (src & SLJIT_MEM) { |
1640 | emit_op_mem(compiler, ((GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) ? INT_SIZE : WORD_SIZE), TMP_REG1, src, srcw); | | 1644 | emit_op_mem(compiler, ((GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) ? INT_SIZE : WORD_SIZE), TMP_REG1, src, srcw); |
1641 | src = TMP_REG1; | | 1645 | src = TMP_REG1; |
1642 | } else if (src & SLJIT_IMM) { | | 1646 | } else if (src & SLJIT_IMM) { |
1643 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) | | 1647 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
1644 | if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) | | 1648 | if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) |
1645 | srcw = (sljit_s32)srcw; | | 1649 | srcw = (sljit_s32)srcw; |
1646 | #endif | | 1650 | #endif |
1647 | FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); | | 1651 | FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); |
1648 | src = TMP_REG1; | | 1652 | src = TMP_REG1; |
1649 | } | | 1653 | } |
1650 | | | 1654 | |
1651 | FAIL_IF(push_inst(compiler, (SCVTF ^ inv_bits) | VD(dst_r) | RN(src))); | | 1655 | FAIL_IF(push_inst(compiler, (SCVTF ^ inv_bits) | VD(dst_r) | RN(src))); |
1652 | | | 1656 | |
1653 | if (dst & SLJIT_MEM) | | 1657 | if (dst & SLJIT_MEM) |
1654 | return emit_fop_mem(compiler, ((op & SLJIT_F32_OP) ? INT_SIZE : WORD_SIZE) | STORE, TMP_FREG1, dst, dstw); | | 1658 | return emit_fop_mem(compiler, ((op & SLJIT_F32_OP) ? INT_SIZE : WORD_SIZE) | STORE, TMP_FREG1, dst, dstw); |
1655 | return SLJIT_SUCCESS; | | 1659 | return SLJIT_SUCCESS; |
1656 | } | | 1660 | } |
1657 | | | 1661 | |
1658 | static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op, | | 1662 | static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op, |
1659 | sljit_s32 src1, sljit_sw src1w, | | 1663 | sljit_s32 src1, sljit_sw src1w, |
1660 | sljit_s32 src2, sljit_sw src2w) | | 1664 | sljit_s32 src2, sljit_sw src2w) |
1661 | { | | 1665 | { |
1662 | sljit_s32 mem_flags = (op & SLJIT_F32_OP) ? INT_SIZE : WORD_SIZE; | | 1666 | sljit_s32 mem_flags = (op & SLJIT_F32_OP) ? INT_SIZE : WORD_SIZE; |
1663 | sljit_ins inv_bits = (op & SLJIT_F32_OP) ? (1 << 22) : 0; | | 1667 | sljit_ins inv_bits = (op & SLJIT_F32_OP) ? (1 << 22) : 0; |
1664 | | | 1668 | |
1665 | if (src1 & SLJIT_MEM) { | | 1669 | if (src1 & SLJIT_MEM) { |
1666 | emit_fop_mem(compiler, mem_flags, TMP_FREG1, src1, src1w); | | 1670 | emit_fop_mem(compiler, mem_flags, TMP_FREG1, src1, src1w); |
1667 | src1 = TMP_FREG1; | | 1671 | src1 = TMP_FREG1; |
1668 | } | | 1672 | } |
1669 | | | 1673 | |
1670 | if (src2 & SLJIT_MEM) { | | 1674 | if (src2 & SLJIT_MEM) { |
1671 | emit_fop_mem(compiler, mem_flags, TMP_FREG2, src2, src2w); | | 1675 | emit_fop_mem(compiler, mem_flags, TMP_FREG2, src2, src2w); |
1672 | src2 = TMP_FREG2; | | 1676 | src2 = TMP_FREG2; |
1673 | } | | 1677 | } |
1674 | | | 1678 | |
1675 | return push_inst(compiler, (FCMP ^ inv_bits) | VN(src1) | VM(src2)); | | 1679 | return push_inst(compiler, (FCMP ^ inv_bits) | VN(src1) | VM(src2)); |
1676 | } | | 1680 | } |
1677 | | | 1681 | |
1678 | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, | | 1682 | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, |
1679 | sljit_s32 dst, sljit_sw dstw, | | 1683 | sljit_s32 dst, sljit_sw dstw, |
1680 | sljit_s32 src, sljit_sw srcw) | | 1684 | sljit_s32 src, sljit_sw srcw) |
1681 | { | | 1685 | { |
1682 | sljit_s32 dst_r, mem_flags = (op & SLJIT_F32_OP) ? INT_SIZE : WORD_SIZE; | | 1686 | sljit_s32 dst_r, mem_flags = (op & SLJIT_F32_OP) ? INT_SIZE : WORD_SIZE; |
1683 | sljit_ins inv_bits; | | 1687 | sljit_ins inv_bits; |
1684 | | | 1688 | |
1685 | CHECK_ERROR(); | | 1689 | CHECK_ERROR(); |
1686 | compiler->cache_arg = 0; | | 1690 | compiler->cache_arg = 0; |
1687 | compiler->cache_argw = 0; | | 1691 | compiler->cache_argw = 0; |
1688 | | | 1692 | |
1689 | SLJIT_COMPILE_ASSERT((INT_SIZE ^ 0x100) == WORD_SIZE, must_be_one_bit_difference); | | 1693 | SLJIT_COMPILE_ASSERT((INT_SIZE ^ 0x100) == WORD_SIZE, must_be_one_bit_difference); |
1690 | SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); | | 1694 | SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); |
1691 | | | 1695 | |
1692 | inv_bits = (op & SLJIT_F32_OP) ? (1 << 22) : 0; | | 1696 | inv_bits = (op & SLJIT_F32_OP) ? (1 << 22) : 0; |
1693 | dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; | | 1697 | dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; |
1694 | | | 1698 | |
1695 | if (src & SLJIT_MEM) { | | 1699 | if (src & SLJIT_MEM) { |
1696 | emit_fop_mem(compiler, (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) ? (mem_flags ^ 0x100) : mem_flags, dst_r, src, srcw); | | 1700 | emit_fop_mem(compiler, (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) ? (mem_flags ^ 0x100) : mem_flags, dst_r, src, srcw); |
1697 | src = dst_r; | | 1701 | src = dst_r; |
1698 | } | | 1702 | } |
1699 | | | 1703 | |
1700 | switch (GET_OPCODE(op)) { | | 1704 | switch (GET_OPCODE(op)) { |
1701 | case SLJIT_MOV_F64: | | 1705 | case SLJIT_MOV_F64: |
1702 | if (src != dst_r) { | | 1706 | if (src != dst_r) { |
1703 | if (dst_r != TMP_FREG1) | | 1707 | if (dst_r != TMP_FREG1) |
1704 | FAIL_IF(push_inst(compiler, (FMOV ^ inv_bits) | VD(dst_r) | VN(src))); | | 1708 | FAIL_IF(push_inst(compiler, (FMOV ^ inv_bits) | VD(dst_r) | VN(src))); |
1705 | else | | 1709 | else |
1706 | dst_r = src; | | 1710 | dst_r = src; |
1707 | } | | 1711 | } |
1708 | break; | | 1712 | break; |
1709 | case SLJIT_NEG_F64: | | 1713 | case SLJIT_NEG_F64: |
1710 | FAIL_IF(push_inst(compiler, (FNEG ^ inv_bits) | VD(dst_r) | VN(src))); | | 1714 | FAIL_IF(push_inst(compiler, (FNEG ^ inv_bits) | VD(dst_r) | VN(src))); |
1711 | break; | | 1715 | break; |
1712 | case SLJIT_ABS_F64: | | 1716 | case SLJIT_ABS_F64: |
1713 | FAIL_IF(push_inst(compiler, (FABS ^ inv_bits) | VD(dst_r) | VN(src))); | | 1717 | FAIL_IF(push_inst(compiler, (FABS ^ inv_bits) | VD(dst_r) | VN(src))); |
1714 | break; | | 1718 | break; |
1715 | case SLJIT_CONV_F64_FROM_F32: | | 1719 | case SLJIT_CONV_F64_FROM_F32: |
1716 | FAIL_IF(push_inst(compiler, FCVT | ((op & SLJIT_F32_OP) ? (1 << 22) : (1 << 15)) | VD(dst_r) | VN(src))); | | 1720 | FAIL_IF(push_inst(compiler, FCVT | ((op & SLJIT_F32_OP) ? (1 << 22) : (1 << 15)) | VD(dst_r) | VN(src))); |
1717 | break; | | 1721 | break; |
1718 | } | | 1722 | } |
1719 | | | 1723 | |
1720 | if (dst & SLJIT_MEM) | | 1724 | if (dst & SLJIT_MEM) |
1721 | return emit_fop_mem(compiler, mem_flags | STORE, dst_r, dst, dstw); | | 1725 | return emit_fop_mem(compiler, mem_flags | STORE, dst_r, dst, dstw); |
1722 | return SLJIT_SUCCESS; | | 1726 | return SLJIT_SUCCESS; |
1723 | } | | 1727 | } |
1724 | | | 1728 | |
1725 | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op, | | 1729 | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op, |
1726 | sljit_s32 dst, sljit_sw dstw, | | 1730 | sljit_s32 dst, sljit_sw dstw, |
1727 | sljit_s32 src1, sljit_sw src1w, | | 1731 | sljit_s32 src1, sljit_sw src1w, |
1728 | sljit_s32 src2, sljit_sw src2w) | | 1732 | sljit_s32 src2, sljit_sw src2w) |
1729 | { | | 1733 | { |
1730 | sljit_s32 dst_r, mem_flags = (op & SLJIT_F32_OP) ? INT_SIZE : WORD_SIZE; | | 1734 | sljit_s32 dst_r, mem_flags = (op & SLJIT_F32_OP) ? INT_SIZE : WORD_SIZE; |
1731 | sljit_ins inv_bits = (op & SLJIT_F32_OP) ? (1 << 22) : 0; | | 1735 | sljit_ins inv_bits = (op & SLJIT_F32_OP) ? (1 << 22) : 0; |
1732 | | | 1736 | |
1733 | CHECK_ERROR(); | | 1737 | CHECK_ERROR(); |
1734 | CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); | | 1738 | CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); |
1735 | ADJUST_LOCAL_OFFSET(dst, dstw); | | 1739 | ADJUST_LOCAL_OFFSET(dst, dstw); |
1736 | ADJUST_LOCAL_OFFSET(src1, src1w); | | 1740 | ADJUST_LOCAL_OFFSET(src1, src1w); |
1737 | ADJUST_LOCAL_OFFSET(src2, src2w); | | 1741 | ADJUST_LOCAL_OFFSET(src2, src2w); |
1738 | | | 1742 | |
1739 | compiler->cache_arg = 0; | | 1743 | compiler->cache_arg = 0; |
1740 | compiler->cache_argw = 0; | | 1744 | compiler->cache_argw = 0; |
1741 | | | 1745 | |
1742 | dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; | | 1746 | dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; |
1743 | if (src1 & SLJIT_MEM) { | | 1747 | if (src1 & SLJIT_MEM) { |
1744 | emit_fop_mem(compiler, mem_flags, TMP_FREG1, src1, src1w); | | 1748 | emit_fop_mem(compiler, mem_flags, TMP_FREG1, src1, src1w); |
1745 | src1 = TMP_FREG1; | | 1749 | src1 = TMP_FREG1; |
1746 | } | | 1750 | } |
1747 | if (src2 & SLJIT_MEM) { | | 1751 | if (src2 & SLJIT_MEM) { |
1748 | emit_fop_mem(compiler, mem_flags, TMP_FREG2, src2, src2w); | | 1752 | emit_fop_mem(compiler, mem_flags, TMP_FREG2, src2, src2w); |
1749 | src2 = TMP_FREG2; | | 1753 | src2 = TMP_FREG2; |
1750 | } | | 1754 | } |
1751 | | | 1755 | |
1752 | switch (GET_OPCODE(op)) { | | 1756 | switch (GET_OPCODE(op)) { |
1753 | case SLJIT_ADD_F64: | | 1757 | case SLJIT_ADD_F64: |
1754 | FAIL_IF(push_inst(compiler, (FADD ^ inv_bits) | VD(dst_r) | VN(src1) | VM(src2))); | | 1758 | FAIL_IF(push_inst(compiler, (FADD ^ inv_bits) | VD(dst_r) | VN(src1) | VM(src2))); |
1755 | break; | | 1759 | break; |
1756 | case SLJIT_SUB_F64: | | 1760 | case SLJIT_SUB_F64: |
1757 | FAIL_IF(push_inst(compiler, (FSUB ^ inv_bits) | VD(dst_r) | VN(src1) | VM(src2))); | | 1761 | FAIL_IF(push_inst(compiler, (FSUB ^ inv_bits) | VD(dst_r) | VN(src1) | VM(src2))); |
1758 | break; | | 1762 | break; |
1759 | case SLJIT_MUL_F64: | | 1763 | case SLJIT_MUL_F64: |
1760 | FAIL_IF(push_inst(compiler, (FMUL ^ inv_bits) | VD(dst_r) | VN(src1) | VM(src2))); | | 1764 | FAIL_IF(push_inst(compiler, (FMUL ^ inv_bits) | VD(dst_r) | VN(src1) | VM(src2))); |
1761 | break; | | 1765 | break; |
1762 | case SLJIT_DIV_F64: | | 1766 | case SLJIT_DIV_F64: |
1763 | FAIL_IF(push_inst(compiler, (FDIV ^ inv_bits) | VD(dst_r) | VN(src1) | VM(src2))); | | 1767 | FAIL_IF(push_inst(compiler, (FDIV ^ inv_bits) | VD(dst_r) | VN(src1) | VM(src2))); |
1764 | break; | | 1768 | break; |
1765 | } | | 1769 | } |
1766 | | | 1770 | |
1767 | if (!(dst & SLJIT_MEM)) | | 1771 | if (!(dst & SLJIT_MEM)) |
1768 | return SLJIT_SUCCESS; | | 1772 | return SLJIT_SUCCESS; |
1769 | return emit_fop_mem(compiler, mem_flags | STORE, TMP_FREG1, dst, dstw); | | 1773 | return emit_fop_mem(compiler, mem_flags | STORE, TMP_FREG1, dst, dstw); |
1770 | } | | 1774 | } |
1771 | | | 1775 | |
1772 | /* --------------------------------------------------------------------- */ | | 1776 | /* --------------------------------------------------------------------- */ |
1773 | /* Other instructions */ | | 1777 | /* Other instructions */ |
1774 | /* --------------------------------------------------------------------- */ | | 1778 | /* --------------------------------------------------------------------- */ |
1775 | | | 1779 | |
1776 | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) | | 1780 | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) |
1777 | { | | 1781 | { |
1778 | CHECK_ERROR(); | | 1782 | CHECK_ERROR(); |
1779 | CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw)); | | 1783 | CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw)); |
1780 | ADJUST_LOCAL_OFFSET(dst, dstw); | | 1784 | ADJUST_LOCAL_OFFSET(dst, dstw); |
1781 | | | 1785 | |
1782 | /* For UNUSED dst. Uncommon, but possible. */ | | 1786 | /* For UNUSED dst. Uncommon, but possible. */ |
1783 | if (dst == SLJIT_UNUSED) | | 1787 | if (dst == SLJIT_UNUSED) |
1784 | return SLJIT_SUCCESS; | | 1788 | return SLJIT_SUCCESS; |
1785 | | | 1789 | |
1786 | if (FAST_IS_REG(dst)) | | 1790 | if (FAST_IS_REG(dst)) |
1787 | return push_inst(compiler, ORR | RD(dst) | RN(TMP_ZERO) | RM(TMP_LR)); | | 1791 | return push_inst(compiler, ORR | RD(dst) | RN(TMP_ZERO) | RM(TMP_LR)); |
1788 | | | 1792 | |
1789 | /* Memory. */ | | 1793 | /* Memory. */ |
1790 | return emit_op_mem(compiler, WORD_SIZE | STORE, TMP_LR, dst, dstw); | | 1794 | return emit_op_mem(compiler, WORD_SIZE | STORE, TMP_LR, dst, dstw); |
1791 | } | | 1795 | } |
1792 | | | 1796 | |
1793 | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw) | | 1797 | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw) |
1794 | { | | 1798 | { |
1795 | CHECK_ERROR(); | | 1799 | CHECK_ERROR(); |
1796 | CHECK(check_sljit_emit_fast_return(compiler, src, srcw)); | | 1800 | CHECK(check_sljit_emit_fast_return(compiler, src, srcw)); |
1797 | ADJUST_LOCAL_OFFSET(src, srcw); | | 1801 | ADJUST_LOCAL_OFFSET(src, srcw); |
1798 | | | 1802 | |
1799 | if (FAST_IS_REG(src)) | | 1803 | if (FAST_IS_REG(src)) |
1800 | FAIL_IF(push_inst(compiler, ORR | RD(TMP_LR) | RN(TMP_ZERO) | RM(src))); | | 1804 | FAIL_IF(push_inst(compiler, ORR | RD(TMP_LR) | RN(TMP_ZERO) | RM(src))); |
1801 | else if (src & SLJIT_MEM) | | 1805 | else if (src & SLJIT_MEM) |
1802 | FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_LR, src, srcw)); | | 1806 | FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_LR, src, srcw)); |
1803 | else if (src & SLJIT_IMM) | | 1807 | else if (src & SLJIT_IMM) |
1804 | FAIL_IF(load_immediate(compiler, TMP_LR, srcw)); | | 1808 | FAIL_IF(load_immediate(compiler, TMP_LR, srcw)); |
1805 | | | 1809 | |
1806 | return push_inst(compiler, RET | RN(TMP_LR)); | | 1810 | return push_inst(compiler, RET | RN(TMP_LR)); |
1807 | } | | 1811 | } |
1808 | | | 1812 | |
1809 | /* --------------------------------------------------------------------- */ | | 1813 | /* --------------------------------------------------------------------- */ |
1810 | /* Conditional instructions */ | | 1814 | /* Conditional instructions */ |
1811 | /* --------------------------------------------------------------------- */ | | 1815 | /* --------------------------------------------------------------------- */ |
1812 | | | 1816 | |
1813 | static sljit_uw get_cc(sljit_s32 type) | | 1817 | static sljit_uw get_cc(sljit_s32 type) |
1814 | { | | 1818 | { |
1815 | switch (type) { | | 1819 | switch (type) { |
1816 | case SLJIT_EQUAL: | | 1820 | case SLJIT_EQUAL: |
1817 | case SLJIT_MUL_NOT_OVERFLOW: | | 1821 | case SLJIT_MUL_NOT_OVERFLOW: |
1818 | case SLJIT_EQUAL_F64: | | 1822 | case SLJIT_EQUAL_F64: |
1819 | return 0x1; | | 1823 | return 0x1; |
1820 | | | 1824 | |
1821 | case SLJIT_NOT_EQUAL: | | 1825 | case SLJIT_NOT_EQUAL: |
1822 | case SLJIT_MUL_OVERFLOW: | | 1826 | case SLJIT_MUL_OVERFLOW: |
1823 | case SLJIT_NOT_EQUAL_F64: | | 1827 | case SLJIT_NOT_EQUAL_F64: |
1824 | return 0x0; | | 1828 | return 0x0; |
1825 | | | 1829 | |
1826 | case SLJIT_LESS: | | 1830 | case SLJIT_LESS: |
1827 | case SLJIT_LESS_F64: | | 1831 | case SLJIT_LESS_F64: |
1828 | return 0x2; | | 1832 | return 0x2; |
1829 | | | 1833 | |
1830 | case SLJIT_GREATER_EQUAL: | | 1834 | case SLJIT_GREATER_EQUAL: |
1831 | case SLJIT_GREATER_EQUAL_F64: | | 1835 | case SLJIT_GREATER_EQUAL_F64: |
1832 | return 0x3; | | 1836 | return 0x3; |
1833 | | | 1837 | |
1834 | case SLJIT_GREATER: | | 1838 | case SLJIT_GREATER: |
1835 | case SLJIT_GREATER_F64: | | 1839 | case SLJIT_GREATER_F64: |
1836 | return 0x9; | | 1840 | return 0x9; |
1837 | | | 1841 | |
1838 | case SLJIT_LESS_EQUAL: | | 1842 | case SLJIT_LESS_EQUAL: |
1839 | case SLJIT_LESS_EQUAL_F64: | | 1843 | case SLJIT_LESS_EQUAL_F64: |
1840 | return 0x8; | | 1844 | return 0x8; |
1841 | | | 1845 | |
1842 | case SLJIT_SIG_LESS: | | 1846 | case SLJIT_SIG_LESS: |
1843 | return 0xa; | | 1847 | return 0xa; |
1844 | | | 1848 | |
1845 | case SLJIT_SIG_GREATER_EQUAL: | | 1849 | case SLJIT_SIG_GREATER_EQUAL: |
1846 | return 0xb; | | 1850 | return 0xb; |
1847 | | | 1851 | |
1848 | case SLJIT_SIG_GREATER: | | 1852 | case SLJIT_SIG_GREATER: |
1849 | return 0xd; | | 1853 | return 0xd; |
1850 | | | 1854 | |
1851 | case SLJIT_SIG_LESS_EQUAL: | | 1855 | case SLJIT_SIG_LESS_EQUAL: |
1852 | return 0xc; | | 1856 | return 0xc; |
1853 | | | 1857 | |
1854 | case SLJIT_OVERFLOW: | | 1858 | case SLJIT_OVERFLOW: |
1855 | case SLJIT_UNORDERED_F64: | | 1859 | case SLJIT_UNORDERED_F64: |
1856 | return 0x7; | | 1860 | return 0x7; |
1857 | | | 1861 | |
1858 | case SLJIT_NOT_OVERFLOW: | | 1862 | case SLJIT_NOT_OVERFLOW: |
1859 | case SLJIT_ORDERED_F64: | | 1863 | case SLJIT_ORDERED_F64: |
1860 | return 0x6; | | 1864 | return 0x6; |
1861 | | | 1865 | |
1862 | default: | | 1866 | default: |
1863 | SLJIT_UNREACHABLE(); | | 1867 | SLJIT_UNREACHABLE(); |
1864 | return 0xe; | | 1868 | return 0xe; |
1865 | } | | 1869 | } |
1866 | } | | 1870 | } |
1867 | | | 1871 | |
1868 | SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler) | | 1872 | SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler) |
1869 | { | | 1873 | { |
1870 | struct sljit_label *label; | | 1874 | struct sljit_label *label; |
1871 | | | 1875 | |
1872 | CHECK_ERROR_PTR(); | | 1876 | CHECK_ERROR_PTR(); |
1873 | CHECK_PTR(check_sljit_emit_label(compiler)); | | 1877 | CHECK_PTR(check_sljit_emit_label(compiler)); |
1874 | | | 1878 | |
1875 | if (compiler->last_label && compiler->last_label->size == compiler->size) | | 1879 | if (compiler->last_label && compiler->last_label->size == compiler->size) |
1876 | return compiler->last_label; | | 1880 | return compiler->last_label; |
1877 | | | 1881 | |
1878 | label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label)); | | 1882 | label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label)); |
1879 | PTR_FAIL_IF(!label); | | 1883 | PTR_FAIL_IF(!label); |
1880 | set_label(label, compiler); | | 1884 | set_label(label, compiler); |
1881 | return label; | | 1885 | return label; |
1882 | } | | 1886 | } |
1883 | | | 1887 | |
1884 | SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type) | | 1888 | SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type) |
1885 | { | | 1889 | { |
1886 | struct sljit_jump *jump; | | 1890 | struct sljit_jump *jump; |
1887 | | | 1891 | |
1888 | CHECK_ERROR_PTR(); | | 1892 | CHECK_ERROR_PTR(); |
1889 | CHECK_PTR(check_sljit_emit_jump(compiler, type)); | | 1893 | CHECK_PTR(check_sljit_emit_jump(compiler, type)); |
1890 | | | 1894 | |
1891 | jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); | | 1895 | jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); |
1892 | PTR_FAIL_IF(!jump); | | 1896 | PTR_FAIL_IF(!jump); |
1893 | set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); | | 1897 | set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); |
1894 | type &= 0xff; | | 1898 | type &= 0xff; |
1895 | | | 1899 | |
1896 | if (type < SLJIT_JUMP) { | | 1900 | if (type < SLJIT_JUMP) { |
1897 | jump->flags |= IS_COND; | | 1901 | jump->flags |= IS_COND; |
1898 | PTR_FAIL_IF(push_inst(compiler, B_CC | (6 << 5) | get_cc(type))); | | 1902 | PTR_FAIL_IF(push_inst(compiler, B_CC | (6 << 5) | get_cc(type))); |
1899 | } | | 1903 | } |
1900 | else if (type >= SLJIT_FAST_CALL) | | 1904 | else if (type >= SLJIT_FAST_CALL) |
1901 | jump->flags |= IS_BL; | | 1905 | jump->flags |= IS_BL; |
1902 | | | 1906 | |
1903 | PTR_FAIL_IF(emit_imm64_const(compiler, TMP_REG1, 0)); | | 1907 | PTR_FAIL_IF(emit_imm64_const(compiler, TMP_REG1, 0)); |
1904 | jump->addr = compiler->size; | | 1908 | jump->addr = compiler->size; |
1905 | PTR_FAIL_IF(push_inst(compiler, ((type >= SLJIT_FAST_CALL) ? BLR : BR) | RN(TMP_REG1))); | | 1909 | PTR_FAIL_IF(push_inst(compiler, ((type >= SLJIT_FAST_CALL) ? BLR : BR) | RN(TMP_REG1))); |
1906 | | | 1910 | |
1907 | return jump; | | 1911 | return jump; |
1908 | } | | 1912 | } |
1909 | | | 1913 | |
1910 | static SLJIT_INLINE struct sljit_jump* emit_cmp_to0(struct sljit_compiler *compiler, sljit_s32 type, | | 1914 | static SLJIT_INLINE struct sljit_jump* emit_cmp_to0(struct sljit_compiler *compiler, sljit_s32 type, |
1911 | sljit_s32 src, sljit_sw srcw) | | 1915 | sljit_s32 src, sljit_sw srcw) |
1912 | { | | 1916 | { |
1913 | struct sljit_jump *jump; | | 1917 | struct sljit_jump *jump; |
1914 | sljit_ins inv_bits = (type & SLJIT_I32_OP) ? (1 << 31) : 0; | | 1918 | sljit_ins inv_bits = (type & SLJIT_I32_OP) ? (1 << 31) : 0; |
1915 | | | 1919 | |
1916 | SLJIT_ASSERT((type & 0xff) == SLJIT_EQUAL || (type & 0xff) == SLJIT_NOT_EQUAL); | | 1920 | SLJIT_ASSERT((type & 0xff) == SLJIT_EQUAL || (type & 0xff) == SLJIT_NOT_EQUAL); |
1917 | ADJUST_LOCAL_OFFSET(src, srcw); | | 1921 | ADJUST_LOCAL_OFFSET(src, srcw); |
1918 | | | 1922 | |
1919 | jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); | | 1923 | jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); |
1920 | PTR_FAIL_IF(!jump); | | 1924 | PTR_FAIL_IF(!jump); |
1921 | set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); | | 1925 | set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); |
1922 | jump->flags |= IS_CBZ | IS_COND; | | 1926 | jump->flags |= IS_CBZ | IS_COND; |
1923 | | | 1927 | |
1924 | if (src & SLJIT_MEM) { | | 1928 | if (src & SLJIT_MEM) { |
1925 | PTR_FAIL_IF(emit_op_mem(compiler, inv_bits ? INT_SIZE : WORD_SIZE, TMP_REG1, src, srcw)); | | 1929 | PTR_FAIL_IF(emit_op_mem(compiler, inv_bits ? INT_SIZE : WORD_SIZE, TMP_REG1, src, srcw)); |
1926 | src = TMP_REG1; | | 1930 | src = TMP_REG1; |
1927 | } | | 1931 | } |
1928 | else if (src & SLJIT_IMM) { | | 1932 | else if (src & SLJIT_IMM) { |
1929 | PTR_FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); | | 1933 | PTR_FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); |
1930 | src = TMP_REG1; | | 1934 | src = TMP_REG1; |
1931 | } | | 1935 | } |
1932 | SLJIT_ASSERT(FAST_IS_REG(src)); | | 1936 | SLJIT_ASSERT(FAST_IS_REG(src)); |
1933 | | | 1937 | |
1934 | if ((type & 0xff) == SLJIT_EQUAL) | | 1938 | if ((type & 0xff) == SLJIT_EQUAL) |
1935 | inv_bits |= 1 << 24; | | 1939 | inv_bits |= 1 << 24; |
1936 | | | 1940 | |
1937 | PTR_FAIL_IF(push_inst(compiler, (CBZ ^ inv_bits) | (6 << 5) | RT(src))); | | 1941 | PTR_FAIL_IF(push_inst(compiler, (CBZ ^ inv_bits) | (6 << 5) | RT(src))); |
1938 | PTR_FAIL_IF(emit_imm64_const(compiler, TMP_REG1, 0)); | | 1942 | PTR_FAIL_IF(emit_imm64_const(compiler, TMP_REG1, 0)); |
1939 | jump->addr = compiler->size; | | 1943 | jump->addr = compiler->size; |
1940 | PTR_FAIL_IF(push_inst(compiler, BR | RN(TMP_REG1))); | | 1944 | PTR_FAIL_IF(push_inst(compiler, BR | RN(TMP_REG1))); |
1941 | return jump; | | 1945 | return jump; |
1942 | } | | 1946 | } |
1943 | | | 1947 | |
1944 | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw) | | 1948 | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw) |
1945 | { | | 1949 | { |
1946 | struct sljit_jump *jump; | | 1950 | struct sljit_jump *jump; |
1947 | | | 1951 | |
1948 | CHECK_ERROR(); | | 1952 | CHECK_ERROR(); |
1949 | CHECK(check_sljit_emit_ijump(compiler, type, src, srcw)); | | 1953 | CHECK(check_sljit_emit_ijump(compiler, type, src, srcw)); |
1950 | ADJUST_LOCAL_OFFSET(src, srcw); | | 1954 | ADJUST_LOCAL_OFFSET(src, srcw); |
1951 | | | 1955 | |
1952 | /* In ARM, we don't need to touch the arguments. */ | | 1956 | /* In ARM, we don't need to touch the arguments. */ |
1953 | if (!(src & SLJIT_IMM)) { | | 1957 | if (!(src & SLJIT_IMM)) { |
1954 | if (src & SLJIT_MEM) { | | 1958 | if (src & SLJIT_MEM) { |
1955 | FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src, srcw)); | | 1959 | FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src, srcw)); |
1956 | src = TMP_REG1; | | 1960 | src = TMP_REG1; |
1957 | } | | 1961 | } |
1958 | return push_inst(compiler, ((type >= SLJIT_FAST_CALL) ? BLR : BR) | RN(src)); | | 1962 | return push_inst(compiler, ((type >= SLJIT_FAST_CALL) ? BLR : BR) | RN(src)); |
1959 | } | | 1963 | } |
1960 | | | 1964 | |
1961 | jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); | | 1965 | jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); |
1962 | FAIL_IF(!jump); | | 1966 | FAIL_IF(!jump); |
1963 | set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_BL : 0)); | | 1967 | set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_BL : 0)); |
1964 | jump->u.target = srcw; | | 1968 | jump->u.target = srcw; |
1965 | | | 1969 | |
1966 | FAIL_IF(emit_imm64_const(compiler, TMP_REG1, 0)); | | 1970 | FAIL_IF(emit_imm64_const(compiler, TMP_REG1, 0)); |
1967 | jump->addr = compiler->size; | | 1971 | jump->addr = compiler->size; |
1968 | return push_inst(compiler, ((type >= SLJIT_FAST_CALL) ? BLR : BR) | RN(TMP_REG1)); | | 1972 | return push_inst(compiler, ((type >= SLJIT_FAST_CALL) ? BLR : BR) | RN(TMP_REG1)); |
1969 | } | | 1973 | } |
1970 | | | 1974 | |
1971 | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, | | 1975 | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, |
1972 | sljit_s32 dst, sljit_sw dstw, | | 1976 | sljit_s32 dst, sljit_sw dstw, |
1973 | sljit_s32 src, sljit_sw srcw, | | 1977 | sljit_s32 src, sljit_sw srcw, |
1974 | sljit_s32 type) | | 1978 | sljit_s32 type) |
1975 | { | | 1979 | { |
1976 | sljit_s32 dst_r, flags, mem_flags; | | 1980 | sljit_s32 dst_r, flags, mem_flags; |
1977 | sljit_ins cc; | | 1981 | sljit_ins cc; |
1978 | | | 1982 | |
1979 | CHECK_ERROR(); | | 1983 | CHECK_ERROR(); |
1980 | CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type)); | | 1984 | CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type)); |
1981 | ADJUST_LOCAL_OFFSET(dst, dstw); | | 1985 | ADJUST_LOCAL_OFFSET(dst, dstw); |
1982 | ADJUST_LOCAL_OFFSET(src, srcw); | | 1986 | ADJUST_LOCAL_OFFSET(src, srcw); |
1983 | | | 1987 | |
1984 | if (dst == SLJIT_UNUSED) | | 1988 | if (dst == SLJIT_UNUSED) |
1985 | return SLJIT_SUCCESS; | | 1989 | return SLJIT_SUCCESS; |
1986 | | | 1990 | |
1987 | cc = get_cc(type & 0xff); | | 1991 | cc = get_cc(type & 0xff); |
1988 | dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; | | 1992 | dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; |
1989 | | | 1993 | |
1990 | if (GET_OPCODE(op) < SLJIT_ADD) { | | 1994 | if (GET_OPCODE(op) < SLJIT_ADD) { |
1991 | FAIL_IF(push_inst(compiler, CSINC | (cc << 12) | RD(dst_r) | RN(TMP_ZERO) | RM(TMP_ZERO))); | | 1995 | FAIL_IF(push_inst(compiler, CSINC | (cc << 12) | RD(dst_r) | RN(TMP_ZERO) | RM(TMP_ZERO))); |
1992 | if (dst_r != TMP_REG1) | | 1996 | if (dst_r != TMP_REG1) |
1993 | return SLJIT_SUCCESS; | | 1997 | return SLJIT_SUCCESS; |
1994 | return emit_op_mem(compiler, (GET_OPCODE(op) == SLJIT_MOV ? WORD_SIZE : INT_SIZE) | STORE, TMP_REG1, dst, dstw); | | 1998 | return emit_op_mem(compiler, (GET_OPCODE(op) == SLJIT_MOV ? WORD_SIZE : INT_SIZE) | STORE, TMP_REG1, dst, dstw); |
1995 | } | | 1999 | } |
1996 | | | 2000 | |
1997 | compiler->cache_arg = 0; | | 2001 | compiler->cache_arg = 0; |
1998 | compiler->cache_argw = 0; | | 2002 | compiler->cache_argw = 0; |
1999 | flags = HAS_FLAGS(op) ? SET_FLAGS : 0; | | 2003 | flags = HAS_FLAGS(op) ? SET_FLAGS : 0; |
2000 | mem_flags = WORD_SIZE; | | 2004 | mem_flags = WORD_SIZE; |
2001 | if (op & SLJIT_I32_OP) { | | 2005 | if (op & SLJIT_I32_OP) { |
2002 | flags |= INT_OP; | | 2006 | flags |= INT_OP; |
2003 | mem_flags = INT_SIZE; | | 2007 | mem_flags = INT_SIZE; |
2004 | } | | 2008 | } |
2005 | | | 2009 | |
2006 | if (src & SLJIT_MEM) { | | 2010 | if (src & SLJIT_MEM) { |
2007 | FAIL_IF(emit_op_mem2(compiler, mem_flags, TMP_REG1, src, srcw, dst, dstw)); | | 2011 | FAIL_IF(emit_op_mem2(compiler, mem_flags, TMP_REG1, src, srcw, dst, dstw)); |
2008 | src = TMP_REG1; | | 2012 | src = TMP_REG1; |
2009 | srcw = 0; | | 2013 | srcw = 0; |
2010 | } else if (src & SLJIT_IMM) | | 2014 | } else if (src & SLJIT_IMM) |
2011 | flags |= ARG1_IMM; | | 2015 | flags |= ARG1_IMM; |
2012 | | | 2016 | |
2013 | FAIL_IF(push_inst(compiler, CSINC | (cc << 12) | RD(TMP_REG2) | RN(TMP_ZERO) | RM(TMP_ZERO))); | | 2017 | FAIL_IF(push_inst(compiler, CSINC | (cc << 12) | RD(TMP_REG2) | RN(TMP_ZERO) | RM(TMP_ZERO))); |
2014 | emit_op_imm(compiler, flags | GET_OPCODE(op), dst_r, src, TMP_REG2); | | 2018 | emit_op_imm(compiler, flags | GET_OPCODE(op), dst_r, src, TMP_REG2); |
2015 | | | 2019 | |
2016 | if (dst_r != TMP_REG1) | | 2020 | if (dst_r != TMP_REG1) |
2017 | return SLJIT_SUCCESS; | | 2021 | return SLJIT_SUCCESS; |
2018 | return emit_op_mem2(compiler, mem_flags | STORE, TMP_REG1, dst, dstw, 0, 0); | | 2022 | return emit_op_mem2(compiler, mem_flags | STORE, TMP_REG1, dst, dstw, 0, 0); |
2019 | } | | 2023 | } |
2020 | | | 2024 | |
2021 | SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value) | | 2025 | SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value) |
2022 | { | | 2026 | { |
2023 | struct sljit_const *const_; | | 2027 | struct sljit_const *const_; |
2024 | sljit_s32 dst_r; | | 2028 | sljit_s32 dst_r; |
2025 | | | 2029 | |
2026 | CHECK_ERROR_PTR(); | | 2030 | CHECK_ERROR_PTR(); |
2027 | CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value)); | | 2031 | CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value)); |
2028 | ADJUST_LOCAL_OFFSET(dst, dstw); | | 2032 | ADJUST_LOCAL_OFFSET(dst, dstw); |
2029 | | | 2033 | |
2030 | const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const)); | | 2034 | const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const)); |
2031 | PTR_FAIL_IF(!const_); | | 2035 | PTR_FAIL_IF(!const_); |
2032 | set_const(const_, compiler); | | 2036 | set_const(const_, compiler); |
2033 | | | 2037 | |
2034 | dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1; | | 2038 | dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1; |
2035 | PTR_FAIL_IF(emit_imm64_const(compiler, dst_r, init_value)); | | 2039 | PTR_FAIL_IF(emit_imm64_const(compiler, dst_r, init_value)); |
2036 | | | 2040 | |
2037 | if (dst & SLJIT_MEM) | | 2041 | if (dst & SLJIT_MEM) |
2038 | PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, dst_r, dst, dstw)); | | 2042 | PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, dst_r, dst, dstw)); |
2039 | return const_; | | 2043 | return const_; |
2040 | } | | 2044 | } |
2041 | | | 2045 | |
2042 | SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) | | 2046 | SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) |
2043 | { | | 2047 | { |
2044 | sljit_ins* inst = (sljit_ins*)addr; | | 2048 | sljit_ins* inst = (sljit_ins*)addr; |
2045 | modify_imm64_const(inst, new_target); | | 2049 | modify_imm64_const(inst, new_target); |
2046 | inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); | | 2050 | inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); |
2047 | SLJIT_CACHE_FLUSH(inst, inst + 4); | | 2051 | SLJIT_CACHE_FLUSH(inst, inst + 4); |
2048 | } | | 2052 | } |
2049 | | | 2053 | |
2050 | SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) | | 2054 | SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) |
2051 | { | | 2055 | { |
2052 | sljit_ins* inst = (sljit_ins*)addr; | | 2056 | sljit_ins* inst = (sljit_ins*)addr; |
2053 | modify_imm64_const(inst, new_constant); | | 2057 | modify_imm64_const(inst, new_constant); |
2054 | inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); | | 2058 | inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); |
2055 | SLJIT_CACHE_FLUSH(inst, inst + 4); | | 2059 | SLJIT_CACHE_FLUSH(inst, inst + 4); |
2056 | } | | 2060 | } |