LALPulsar 7.1.1.1-eeff03c
hough_x64.i
Go to the documentation of this file.
1/*
2 * Copyright (C) 2008 Bernd Machenschalk
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with with program; see the file COPYING. If not, write to the
16 * Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
17 * MA 02111-1307 USA
18 *
19 */
20
21
22#ifndef _OPT_HOUGH_AMD64_LINUX_H
23#define _OPT_HOUGH_AMD64_LINUX_H
24
25#ifdef __APPLE__
26#define AD_FLOAT ".single "
27#define AD_ASCII ".ascii "
28#define AD_ALIGN16 ".align 4"
29#define AD_ALIGN32 ".align 5"
30#define AD_ALIGN64 ".align 6"
31#else /* x86 gas */
32#define AD_FLOAT ".float "
33#define AD_ASCII ".string "
34#define AD_ALIGN16 ".align 16"
35#define AD_ALIGN32 ".align 32"
36#define AD_ALIGN64 ".align 64"
37#endif
38
39#define ADDPHMD2HD_WLR_LOOP(_XPIXEL,_YLOWER,_YUPPER,_XSIDEP1,_MAP,_WEIGHT)\
40__asm __volatile ( \
41 "xor %%r11,%%r11 \n\t" \
42 "mov %[yUpper] , %%eax \n\t" \
43 "test %%eax,%%eax \n\t" \
44 "mov %[xPixel], %%R8 \n\t" \
45 "mov %[yLower], %%r11d \n\t" \
46 "lea (%%R8,%%r11,0x2), %%rsi \n\t" \
47 "mov %[xSideP1], %%edx \n\t" \
48 "js 3f \n\t" \
49 "lea -0x2(%%r8,%%rax,0x2),%%R8 \n\t" \
50 "mov %[map] , %%rdi \n\t" \
51 "mov %%r11d,%%eax \n\t" \
52 "imul %%edx, %%eax \n\t" \
53 "lea (%%rdi, %%rax, 0x8), %%rdi \n\t" \
54 "movsd %[w],%%xmm0 \n\t" \
55 "cmp %%r8,%%rsi \n\t" \
56 "jmp 2f \n\t" \
57AD_ALIGN32 "\n" \
58 "1: \n\t" \
59 "movzwl (%%rsi),%%r11d \n\t" \
60 "movzwl 2(%%rsi),%%ecx \n\t" \
61 "lea (%%rdi, %%r11, 0x8) , %%r9 \n\t" \
62 "movsd (%%r9),%%xmm1 \n\t" \
63 "lea (%%rdi,%%rdx,0x8) , %%rdi \n\t" \
64 "lea (%%rdi,%%rcx,0x8) , %%r10 \n\t" \
65 "movsd (%%r10),%%xmm2 \n\t" \
66 "addsd %%xmm0,%%xmm1 \n\t" \
67 "movsd %%xmm1,(%%r9) \n\t" \
68 "addsd %%xmm0,%%xmm2 \n\t" \
69 "movsd %%xmm2,(%%r10) \n\t" \
70 "lea (%%rdi,%%rdx,0x8), %%rdi \n\t" \
71 "lea 4(%%rsi) , %%rsi \n\t" \
72 "cmp %%r8,%%rsi \n" \
73 "2: \n\t" \
74 "jbe 1b \n\t" \
75 "add $0x2,%%r8 \n\t" \
76 "cmp %%r8,%%rsi \n\t" \
77 "jne 3f \n\t" \
78 "movzwl (%%rsi) , %%r11d \n\t" \
79 "lea (%%rdi, %%r11, 0x8) , %%r9 \n\t" \
80 "movsd (%%r9),%%xmm1 \n\t" \
81 "addsd %%xmm0,%%xmm1 \n\t" \
82 "movsd %%xmm1,(%%r9) \n" \
83 "3: \n\t" \
84 "NOP \n" \
85 : \
86 : \
87 [xPixel] "m" (_XPIXEL) , \
88 [yLower] "m" (_YLOWER) , \
89 [yUpper] "m" (_YUPPER), \
90 [xSideP1] "m" (_XSIDEP1) , \
91 [map] "m" (_MAP) , \
92 [w] "m" (_WEIGHT) \
93 : \
94 "memory","r8","r9","r10","r11","rax", "rcx", "rdx", "rsi", "rdi", "cc", \
95 "xmm0","xmm1","xmm2" \
96 )
97
98#endif