LALPulsar 7.1.1.1-eeff03c
hough_sse2.i
Go to the documentation of this file.
1/*
2 * Copyright (C) 2008 Bernd Machenschalk
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with with program; see the file COPYING. If not, write to the
16 * Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
17 * MA 02111-1307 USA
18 *
19 */
20
21#ifndef _OPT_HOUGH_I686_SSE2_H
22#define _OPT_HOUGH_I686_SSE2_H
23
24#ifdef __APPLE__
25#define AD_FLOAT ".single "
26#define AD_ASCII ".ascii "
27#define AD_ALIGN16 ".align 4"
28#define AD_ALIGN32 ".align 5"
29#define AD_ALIGN64 ".align 6"
30#else /* x86 gas */
31#define AD_FLOAT ".float "
32#define AD_ASCII ".string "
33#define AD_ALIGN16 ".align 16"
34#define AD_ALIGN32 ".align 32"
35#define AD_ALIGN64 ".align 64"
36#endif
37
38#define ADDPHMD2HD_WLR_LOOP(_XPIXEL,_YLOWER,_YUPPER,_XSIDEP1,_MAP,_WEIGHT)\
39__asm __volatile ( \
40 "push %%ebx \n\t" \
41 "mov %[xPixel], %%eax \n\t" \
42 "mov %[yLower], %%ebx \n\t" \
43 "lea (%%eax,%%ebx,0x2), %%esi \n\t" \
44 "mov %[xSideP1], %%edx \n\t" \
45 "mov %[yUpper] , %%edi \n\t" \
46 "lea -0x2(%%eax,%%edi,0x2),%%eax \n\t" \
47 "mov %[map] , %%edi \n\t" \
48 "mov %%ebx,%%ecx \n\t" \
49 "imul %%edx, %%ecx \n\t" \
50 "lea (%%edi, %%ecx, 0x8), %%edi \n\t" \
51 "movsd %[w],%%xmm0 \n\t" \
52 "cmp %%eax,%%esi \n\t" \
53 "jmp 2f \n\t" \
54 AD_ALIGN32 "\n" \
55 "1: \n\t" \
56 "movzwl (%%esi),%%ebx \n\t" \
57 "movzwl 2(%%esi),%%ecx \n\t" \
58 \
59 "lea (%%edi, %%ebx, 0x8) , %%ebx \n\t" \
60 "movsd (%%ebx),%%xmm1 \n\t" \
61 "lea (%%edi,%%edx,0x8) , %%edi \n\t" \
62 "lea (%%edi,%%ecx,0x8) , %%ecx \n\t" \
63 "movsd (%%ecx),%%xmm2 \n\t" \
64 \
65 "addsd %%xmm0,%%xmm1 \n\t" \
66 "movsd %%xmm1,(%%ebx) \n\t" \
67 "addsd %%xmm0,%%xmm2 \n\t" \
68 "movsd %%xmm2,(%%ecx) \n\t" \
69 "lea (%%edi,%%edx,0x8), %%edi \n\t" \
70 \
71 "lea 4(%%esi) , %%esi \n\t" \
72 "cmp %%eax,%%esi \n" \
73 \
74 "2: \n\t" \
75 "jbe 1b \n\t" \
76 "add $0x2,%%eax \n\t" \
77 "cmp %%eax,%%esi \n\t" \
78 "jne 3f \n\t" \
79 \
80 "movzwl (%%esi) , %%ebx \n\t" \
81 "lea (%%edi, %%ebx, 0x8) , %%ebx \n\t" \
82 "movsd (%%ebx),%%xmm1 \n\t" \
83 "addsd %%xmm0,%%xmm1 \n\t" \
84 "movsd %%xmm1,(%%ebx) \n\t" \
85 \
86 "3: \n\t" \
87 "pop %%ebx \n\t" \
88 : \
89 : \
90 [xPixel] "m" (_XPIXEL) , \
91 [yLower] "m" (_YLOWER) , \
92 [yUpper] "m" (_YUPPER), \
93 [xSideP1] "m" (_XSIDEP1) , \
94 [map] "m" (_MAP) , \
95 [w] "m" (_WEIGHT) \
96 : \
97 "memory","eax", "ecx", "edx", "esi", "edi", "cc", \
98 "xmm0","xmm1","xmm2" \
99 )
100#endif