summaryrefslogtreecommitdiff
path: root/gfx/cairo/libpixman/src
diff options
context:
space:
mode:
Diffstat (limited to 'gfx/cairo/libpixman/src')
-rw-r--r--gfx/cairo/libpixman/src/Makefile.in10
-rw-r--r--gfx/cairo/libpixman/src/dither/blue-noise-64x64.h77
-rw-r--r--gfx/cairo/libpixman/src/dither/make-blue-noise.c679
-rw-r--r--gfx/cairo/libpixman/src/loongson-mmintrin.h412
-rw-r--r--gfx/cairo/libpixman/src/make-combine.pl86
-rw-r--r--gfx/cairo/libpixman/src/meson.build129
-rw-r--r--gfx/cairo/libpixman/src/moz.build22
-rw-r--r--gfx/cairo/libpixman/src/pixman-access.c297
-rw-r--r--gfx/cairo/libpixman/src/pixman-arm-asm.h37
-rw-r--r--gfx/cairo/libpixman/src/pixman-arm-common.h27
-rw-r--r--gfx/cairo/libpixman/src/pixman-arm-detect-win32.asm21
-rw-r--r--gfx/cairo/libpixman/src/pixman-arm-neon-asm-bilinear.S12
-rw-r--r--gfx/cairo/libpixman/src/pixman-arm-neon-asm.S25
-rw-r--r--gfx/cairo/libpixman/src/pixman-arm-neon-asm.h30
-rw-r--r--gfx/cairo/libpixman/src/pixman-arm-neon.c65
-rw-r--r--gfx/cairo/libpixman/src/pixman-arm-simd-asm-scaled.S11
-rw-r--r--gfx/cairo/libpixman/src/pixman-arm-simd-asm.S566
-rw-r--r--gfx/cairo/libpixman/src/pixman-arm-simd-asm.h120
-rw-r--r--gfx/cairo/libpixman/src/pixman-arm-simd.c50
-rw-r--r--gfx/cairo/libpixman/src/pixman-arm.c63
-rw-r--r--gfx/cairo/libpixman/src/pixman-bits-image.c1336
-rw-r--r--gfx/cairo/libpixman/src/pixman-combine-float.c372
-rw-r--r--gfx/cairo/libpixman/src/pixman-combine.c.template2461
-rw-r--r--gfx/cairo/libpixman/src/pixman-combine.h.template226
-rw-r--r--gfx/cairo/libpixman/src/pixman-combine16.c114
-rw-r--r--gfx/cairo/libpixman/src/pixman-combine32.c1815
-rw-r--r--gfx/cairo/libpixman/src/pixman-combine32.h2
-rw-r--r--gfx/cairo/libpixman/src/pixman-combine64.c2465
-rw-r--r--gfx/cairo/libpixman/src/pixman-combine64.h230
-rw-r--r--gfx/cairo/libpixman/src/pixman-compiler.h14
-rw-r--r--gfx/cairo/libpixman/src/pixman-conical-gradient.c36
-rw-r--r--gfx/cairo/libpixman/src/pixman-cpu.c765
-rw-r--r--gfx/cairo/libpixman/src/pixman-dither.h51
-rw-r--r--gfx/cairo/libpixman/src/pixman-edge-imp.h5
-rw-r--r--gfx/cairo/libpixman/src/pixman-fast-path.c1272
-rw-r--r--gfx/cairo/libpixman/src/pixman-fast-path.h1022
-rw-r--r--gfx/cairo/libpixman/src/pixman-filter.c244
-rw-r--r--gfx/cairo/libpixman/src/pixman-general.c163
-rw-r--r--gfx/cairo/libpixman/src/pixman-glyph.c6
-rw-r--r--gfx/cairo/libpixman/src/pixman-gradient-walker.c166
-rw-r--r--gfx/cairo/libpixman/src/pixman-image.c159
-rw-r--r--gfx/cairo/libpixman/src/pixman-implementation.c132
-rw-r--r--gfx/cairo/libpixman/src/pixman-inlines.h160
-rw-r--r--gfx/cairo/libpixman/src/pixman-linear-gradient.c204
-rw-r--r--gfx/cairo/libpixman/src/pixman-matrix.c4
-rw-r--r--gfx/cairo/libpixman/src/pixman-mips-dspr2-asm.S1312
-rw-r--r--gfx/cairo/libpixman/src/pixman-mips-dspr2-asm.h54
-rw-r--r--gfx/cairo/libpixman/src/pixman-mips-dspr2.c58
-rw-r--r--gfx/cairo/libpixman/src/pixman-mips-dspr2.h50
-rw-r--r--gfx/cairo/libpixman/src/pixman-mmx.c261
-rw-r--r--gfx/cairo/libpixman/src/pixman-noop.c183
-rw-r--r--gfx/cairo/libpixman/src/pixman-private.h198
-rw-r--r--gfx/cairo/libpixman/src/pixman-radial-gradient.c382
-rw-r--r--gfx/cairo/libpixman/src/pixman-region.c22
-rw-r--r--gfx/cairo/libpixman/src/pixman-solid-fill.c8
-rw-r--r--gfx/cairo/libpixman/src/pixman-sse2.c506
-rw-r--r--gfx/cairo/libpixman/src/pixman-ssse3.c351
-rw-r--r--gfx/cairo/libpixman/src/pixman-utils.c27
-rw-r--r--gfx/cairo/libpixman/src/pixman-version.h10
-rw-r--r--gfx/cairo/libpixman/src/pixman-version.h.in54
-rw-r--r--gfx/cairo/libpixman/src/pixman-vmx.c1704
-rw-r--r--gfx/cairo/libpixman/src/pixman-x64-mmx-emulation.h263
-rw-r--r--gfx/cairo/libpixman/src/pixman-x86.c16
-rw-r--r--gfx/cairo/libpixman/src/pixman.c42
-rw-r--r--gfx/cairo/libpixman/src/pixman.h341
-rw-r--r--gfx/cairo/libpixman/src/refactor478
-rw-r--r--gfx/cairo/libpixman/src/solaris-hwcap.mapfile30
67 files changed, 9288 insertions, 13225 deletions
diff --git a/gfx/cairo/libpixman/src/Makefile.in b/gfx/cairo/libpixman/src/Makefile.in
deleted file mode 100644
index f119333b44..0000000000
--- a/gfx/cairo/libpixman/src/Makefile.in
+++ /dev/null
@@ -1,10 +0,0 @@
-# This Source Code Form is subject to the terms of the Mozilla Public
-# License, v. 2.0. If a copy of the MPL was not distributed with this
-# file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-include $(topsrcdir)/config/rules.mk
-
-# The ARM asm functions here don't appreciate being called by functions
-# compiled with -mapcs-frame. See bug 832752.
-CXXFLAGS := $(filter-out -mapcs-frame,$(CXXFLAGS))
-CFLAGS := $(filter-out -mapcs-frame,$(CFLAGS))
diff --git a/gfx/cairo/libpixman/src/dither/blue-noise-64x64.h b/gfx/cairo/libpixman/src/dither/blue-noise-64x64.h
new file mode 100644
index 0000000000..93c8805b51
--- /dev/null
+++ b/gfx/cairo/libpixman/src/dither/blue-noise-64x64.h
@@ -0,0 +1,77 @@
+/* WARNING: This file is generated by make-blue-noise.c
+ * Please edit that file instead of this one.
+ */
+
+#ifndef BLUE_NOISE_64X64_H
+#define BLUE_NOISE_64X64_H
+
+#include <stdint.h>
+
+static const uint16_t dither_blue_noise_64x64[4096] = {
+ 3039, 1368, 3169, 103, 2211, 1248, 2981, 668, 2633, 37, 3963, 2903, 384, 2564, 3115, 1973, 3348, 830, 2505, 1293, 3054, 1060, 1505, 3268, 400, 1341, 593, 3802, 3384, 429, 4082, 1411, 2503, 3863, 126, 1292, 1887, 2855, 205, 2094, 2977, 1899, 3924, 356, 3088, 2500, 3942, 1409, 2293, 1734, 3732, 1291, 3227, 277, 2054, 786, 2871, 411, 2425, 1678, 3986, 455, 2879, 2288,
+ 388, 1972, 3851, 778, 2768, 3697, 944, 2123, 1501, 3533, 937, 1713, 1381, 3888, 156, 1242, 516, 2888, 1607, 3676, 632, 2397, 3804, 2673, 1898, 3534, 2593, 1777, 1170, 2299, 3013, 1838, 523, 3053, 1647, 3601, 3197, 959, 1520, 3633, 893, 2437, 3367, 2187, 1258, 137, 1965, 401, 3546, 643, 3087, 2498, 733, 2786, 3371, 4053, 1266, 1977, 3663, 183, 2570, 2107, 1183, 3708,
+ 907, 2473, 1151, 3363, 1527, 1902, 232, 3903, 3060, 496, 2486, 3206, 2165, 861, 2387, 3653, 2101, 3972, 132, 2162, 3437, 1827, 215, 895, 3114, 271, 969, 2932, 197, 1598, 878, 3696, 1140, 2120, 904, 2431, 302, 3846, 2675, 481, 3187, 66, 1440, 650, 3833, 2826, 3435, 901, 2936, 2111, 250, 1875, 3609, 1174, 1747, 162, 2346, 3420, 913, 3172, 1383, 752, 3298, 1735,
+ 3540, 2938, 249, 2324, 526, 3099, 2561, 1324, 2347, 1861, 1200, 3702, 257, 3442, 1514, 2999, 992, 1766, 2735, 1163, 478, 2943, 1279, 3635, 2177, 1464, 3672, 2386, 3871, 3340, 2690, 64, 3489, 2811, 3999, 633, 1948, 1243, 2269, 1807, 1143, 2750, 3729, 1790, 2363, 1053, 1537, 2636, 4065, 1076, 1476, 3869, 450, 2200, 2676, 658, 2979, 1548, 544, 1913, 2838, 3911, 116, 2698,
+ 517, 1295, 3997, 1739, 3665, 1083, 3509, 599, 3400, 118, 2956, 720, 2689, 1907, 567, 2523, 284, 3397, 711, 3219, 2450, 3985, 1665, 2549, 562, 3011, 1855, 729, 1355, 528, 1908, 2456, 1384, 337, 1540, 2654, 3138, 3513, 703, 4080, 3314, 2047, 855, 3037, 209, 3317, 577, 1828, 17, 2336, 3193, 2748, 962, 3441, 1450, 3246, 1075, 3878, 2615, 3497, 1033, 2310, 1442, 2183,
+ 1654, 3254, 2061, 738, 2832, 148, 2030, 1670, 909, 3850, 2109, 1533, 4046, 1085, 3098, 3897, 1378, 2248, 3829, 1495, 1966, 23, 797, 3427, 1124, 4057, 95, 2787, 2190, 3074, 3950, 742, 3194, 1999, 3386, 1113, 16, 1657, 2804, 201, 1543, 383, 2559, 1325, 3604, 2068, 2493, 3771, 1284, 3460, 710, 1716, 2447, 80, 3811, 2032, 347, 2227, 15, 1689, 397, 3084, 662, 3798,
+ 973, 43, 2608, 3143, 1459, 2423, 4066, 2770, 3191, 1283, 2630, 314, 3235, 2289, 72, 1822, 2840, 924, 350, 2653, 1057, 3715, 2235, 2775, 346, 2083, 1553, 3292, 1081, 274, 1686, 1188, 2327, 3743, 578, 2234, 3916, 2519, 1011, 3056, 2207, 3438, 3890, 537, 1617, 837, 3094, 373, 2795, 1980, 276, 3951, 1353, 3015, 844, 1724, 3651, 2923, 1316, 4092, 2504, 3627, 1936, 2854,
+ 2461, 3929, 1193, 421, 3746, 820, 1180, 286, 2261, 532, 3625, 1812, 802, 1327, 3527, 670, 3730, 2025, 3124, 3565, 529, 2960, 1769, 1390, 3196, 2494, 3756, 796, 3618, 2602, 3463, 2847, 166, 953, 1745, 2900, 438, 2070, 1418, 3741, 639, 1205, 1891, 2882, 2282, 4012, 1182, 1696, 3630, 951, 2904, 2170, 3530, 375, 2320, 2742, 1132, 701, 3216, 2023, 847, 1230, 310, 3431,
+ 770, 1961, 3531, 1702, 2181, 3370, 1877, 3072, 1571, 3389, 1071, 2415, 3782, 2803, 1610, 2454, 1211, 182, 1655, 2322, 1282, 3372, 287, 3935, 704, 1232, 415, 1910, 2286, 1399, 556, 1964, 4068, 2444, 3605, 1272, 3345, 816, 3526, 256, 2402, 2777, 955, 345, 3289, 111, 2727, 635, 2396, 1488, 3331, 600, 1032, 1575, 4026, 515, 3507, 2433, 1605, 460, 3364, 2783, 1810, 1397,
+ 2334, 223, 2945, 688, 2533, 99, 2705, 624, 3944, 2073, 46, 2978, 508, 2132, 269, 3173, 3453, 2631, 4076, 694, 1892, 2586, 972, 2178, 3470, 1695, 2849, 3141, 77, 3884, 994, 3029, 1536, 673, 3083, 124, 2583, 1722, 2821, 1944, 4027, 1661, 3176, 3728, 1337, 1813, 3503, 2035, 3930, 157, 2537, 1865, 3096, 2646, 1941, 3252, 1449, 135, 2836, 3758, 2139, 84, 3678, 3106,
+ 3862, 1545, 3307, 1320, 3955, 1031, 3664, 1306, 2460, 776, 1487, 3294, 1187, 3990, 1903, 1021, 549, 1484, 943, 3027, 97, 3853, 1499, 2880, 198, 2575, 3995, 1089, 1587, 2475, 3282, 339, 2657, 1158, 2105, 1493, 3943, 580, 3232, 1287, 846, 48, 2480, 2112, 771, 2534, 459, 3134, 850, 1298, 3790, 325, 3652, 1249, 193, 940, 2202, 3895, 1829, 911, 1366, 2577, 1069, 534,
+ 2104, 1009, 2667, 392, 1983, 2917, 1645, 324, 3439, 2869, 3705, 1767, 2592, 756, 2916, 3683, 2276, 2850, 2053, 3594, 2403, 3181, 634, 3699, 1933, 906, 519, 2150, 3673, 764, 1770, 2220, 3795, 3336, 502, 3547, 2339, 1110, 301, 2210, 3354, 3643, 569, 1518, 2940, 3973, 1138, 1613, 2773, 2127, 2983, 1671, 769, 2161, 3800, 2730, 3127, 1179, 533, 3259, 2284, 4014, 1651, 2820,
+ 3566, 653, 1839, 3455, 2399, 789, 3149, 2244, 1863, 1099, 474, 2307, 158, 3541, 1312, 1711, 0, 3902, 360, 1629, 1091, 395, 1781, 1191, 2374, 3353, 1419, 3225, 206, 2931, 3553, 1046, 54, 1646, 2470, 910, 1860, 3137, 3770, 2635, 1562, 2809, 1215, 3788, 222, 2199, 3335, 67, 3606, 524, 1001, 3309, 2410, 3473, 591, 1619, 291, 2502, 3629, 2891, 335, 741, 3378, 168,
+ 2384, 3129, 4051, 22, 1444, 3613, 543, 3893, 186, 2665, 4062, 933, 3058, 2142, 449, 2711, 3224, 849, 1330, 3349, 2195, 2670, 3484, 2993, 32, 3774, 2722, 1859, 2548, 1268, 583, 2027, 3165, 2807, 4029, 227, 2897, 1434, 721, 1816, 195, 905, 2066, 3258, 1754, 970, 2674, 1880, 2338, 3915, 1485, 2660, 14, 1313, 2914, 2046, 4074, 791, 1917, 1301, 1725, 2687, 2019, 1443,
+ 418, 1186, 1664, 2859, 1049, 2056, 2741, 1226, 1589, 3186, 2042, 1377, 3449, 1574, 3941, 1063, 1930, 2501, 3751, 2930, 671, 4031, 888, 2081, 1544, 684, 1117, 351, 4052, 1698, 2393, 3881, 1439, 785, 1277, 2013, 3488, 441, 2459, 3980, 3061, 3481, 2543, 419, 3020, 609, 3515, 1350, 799, 2878, 348, 2034, 3966, 1824, 950, 3281, 1394, 2239, 3452, 55, 3922, 3119, 892, 3785,
+ 3023, 2140, 782, 2492, 3817, 241, 3355, 2424, 856, 3639, 612, 2556, 245, 2858, 705, 2316, 3562, 495, 1748, 128, 1912, 1454, 280, 2552, 3905, 3130, 2274, 3472, 834, 3055, 240, 2692, 471, 2272, 3301, 2632, 1080, 3693, 2136, 1029, 1364, 590, 1611, 4067, 1190, 2360, 3827, 261, 3180, 1768, 3471, 1103, 3003, 520, 3674, 151, 2571, 555, 3033, 982, 2353, 504, 1259, 2555,
+ 149, 3889, 3380, 493, 3178, 1681, 663, 1924, 2990, 49, 1792, 3861, 1192, 1987, 3273, 297, 1457, 3043, 1177, 2292, 3249, 2829, 3682, 1154, 1758, 428, 2872, 1993, 1500, 3703, 1129, 3421, 1840, 3754, 163, 659, 1733, 3182, 38, 2875, 1957, 3614, 2237, 78, 1873, 2801, 1513, 2121, 1074, 2516, 667, 3710, 1429, 2430, 2088, 2830, 1072, 3557, 1531, 2733, 1955, 3286, 3590, 1826,
+ 2778, 1068, 1932, 1452, 2279, 1185, 3564, 3952, 1391, 2726, 3313, 2331, 870, 3709, 1674, 2772, 4085, 808, 2596, 3848, 927, 538, 2335, 3334, 773, 3597, 1347, 109, 2663, 608, 2108, 2994, 936, 1524, 2922, 3968, 2422, 1467, 845, 3870, 321, 2704, 1073, 3308, 3680, 823, 430, 3375, 4030, 112, 2171, 2695, 267, 3374, 731, 1627, 3919, 1871, 352, 3839, 1370, 234, 794, 1532,
+ 3245, 647, 3575, 74, 3045, 2766, 285, 2174, 498, 1059, 1551, 385, 3125, 2598, 143, 1128, 2095, 3395, 318, 1590, 3524, 1345, 1969, 242, 2759, 2092, 947, 3926, 3244, 2356, 1658, 6, 3593, 2554, 1172, 1995, 371, 2755, 3417, 2294, 1570, 3164, 748, 2517, 1401, 3111, 2420, 1662, 2910, 1276, 3276, 854, 1804, 4000, 1253, 2987, 229, 2344, 3184, 649, 2196, 2921, 4095, 2389,
+ 1289, 2193, 2579, 4023, 757, 1858, 986, 3199, 2514, 3475, 4021, 2154, 651, 1432, 3468, 2404, 574, 1799, 3105, 2145, 86, 2614, 3218, 1565, 4088, 2481, 3079, 1815, 323, 1212, 3837, 759, 2159, 435, 3223, 784, 3659, 1114, 1888, 550, 1221, 3786, 1803, 499, 2117, 185, 3763, 942, 589, 2001, 3838, 1483, 3154, 2256, 468, 2544, 3403, 898, 1208, 2610, 3622, 967, 1929, 378,
+ 3781, 220, 1656, 1115, 3347, 2428, 3822, 1577, 712, 1959, 110, 2765, 1762, 3854, 979, 2928, 3714, 1371, 746, 3969, 2884, 975, 3779, 641, 1142, 159, 1460, 702, 3485, 2866, 2495, 3330, 1305, 3937, 1635, 2229, 2962, 146, 4055, 3091, 2417, 100, 3508, 2933, 4006, 1167, 1920, 2760, 3552, 2545, 433, 2845, 142, 1056, 1886, 3616, 1435, 2099, 3803, 1749, 27, 1446, 3350, 2843,
+ 884, 3310, 2948, 2103, 447, 1351, 187, 2895, 3655, 1256, 3036, 932, 3325, 2257, 451, 1915, 40, 2780, 2438, 1112, 1814, 423, 2290, 1905, 2898, 3419, 2306, 3760, 1938, 486, 1019, 1791, 3010, 2628, 203, 3408, 1269, 2507, 1606, 862, 2779, 2078, 952, 1529, 2638, 708, 3332, 1413, 2, 1726, 1156, 3500, 2392, 3791, 3076, 812, 107, 2861, 501, 3050, 3487, 2455, 594, 1731,
+ 2685, 1498, 680, 3908, 2621, 3529, 1786, 2236, 342, 2569, 1526, 3722, 230, 1290, 3203, 3947, 1609, 3516, 467, 3267, 3685, 1461, 3140, 3569, 367, 1759, 928, 2754, 1332, 2219, 4034, 260, 655, 1984, 978, 3814, 617, 2086, 3525, 279, 3841, 1373, 3361, 319, 2251, 3066, 407, 2382, 3918, 3133, 2168, 762, 1523, 507, 2641, 1677, 4025, 2413, 1584, 793, 2049, 1109, 3962, 2218,
+ 1194, 3692, 266, 1687, 981, 3103, 740, 3983, 1005, 3434, 570, 2383, 1942, 2718, 676, 2462, 1007, 2089, 1308, 2222, 233, 2568, 829, 1241, 2669, 3987, 514, 3303, 69, 3142, 1603, 3560, 2295, 3288, 1497, 2696, 1764, 2865, 1058, 3271, 1914, 477, 2529, 3927, 1736, 1273, 3752, 2029, 1012, 565, 2798, 4078, 1949, 3305, 1175, 2179, 380, 3366, 1195, 3849, 2637, 416, 2959, 125,
+ 3396, 2467, 2036, 3234, 2340, 68, 2819, 1436, 2011, 3139, 1704, 4073, 860, 3582, 1468, 2969, 211, 3157, 4056, 866, 2935, 2000, 3923, 31, 2157, 1477, 2429, 1147, 3792, 2557, 774, 2802, 1153, 3747, 464, 3192, 42, 3904, 539, 1474, 2283, 803, 2876, 1061, 75, 3477, 747, 2893, 1538, 3626, 251, 1322, 2506, 189, 2791, 3667, 939, 2991, 1971, 175, 3195, 1416, 3648, 1857,
+ 3052, 454, 851, 3789, 1271, 1906, 3694, 2484, 406, 2757, 26, 1189, 2909, 296, 2215, 3784, 1864, 637, 2715, 1673, 3445, 581, 1572, 3059, 3469, 761, 2984, 1737, 2058, 440, 1414, 1921, 121, 2527, 894, 2223, 1302, 2377, 3077, 2666, 3759, 3198, 1811, 3661, 2166, 2731, 1883, 359, 3285, 2458, 1805, 3459, 926, 3834, 675, 1893, 1496, 2612, 657, 3523, 1763, 2354, 564, 961,
+ 1367, 3977, 1588, 2714, 322, 3446, 1088, 625, 3887, 1354, 3535, 2090, 3316, 1760, 1127, 483, 3491, 1421, 2301, 94, 1202, 3740, 2311, 1014, 1878, 3836, 180, 3412, 991, 2868, 3953, 3450, 3081, 1632, 4071, 1882, 3543, 726, 1719, 179, 1171, 364, 1420, 622, 3090, 1490, 946, 4007, 2212, 1102, 619, 2739, 2189, 1669, 2937, 3426, 39, 3940, 2191, 1264, 887, 4091, 2792, 2135,
+ 4, 2883, 2281, 631, 3044, 1641, 2232, 3243, 1773, 2319, 827, 2591, 629, 3938, 2426, 3222, 2629, 1044, 3879, 3293, 1952, 2749, 275, 2590, 472, 1372, 2496, 660, 3669, 2264, 208, 915, 2167, 561, 2828, 307, 3265, 1104, 3964, 2155, 3425, 1951, 4077, 2391, 283, 3387, 2581, 115, 1415, 3069, 3896, 141, 3158, 1214, 442, 2405, 1349, 3085, 425, 2528, 3002, 312, 1602, 3588,
+ 1137, 3323, 1963, 1002, 3578, 2521, 127, 925, 2970, 273, 3737, 1573, 167, 2863, 1509, 800, 147, 2059, 2942, 409, 921, 3151, 1451, 3909, 3333, 2844, 2096, 1512, 3136, 1210, 1798, 2709, 1331, 3586, 1034, 1521, 2441, 2926, 488, 2585, 775, 3031, 2693, 879, 3602, 1173, 2028, 3654, 2781, 841, 1975, 1507, 3646, 768, 3991, 2012, 996, 3544, 1666, 3810, 1990, 3360, 753, 2597,
+ 3736, 304, 1473, 3828, 485, 1334, 4008, 2072, 3495, 1136, 2806, 2004, 3236, 1010, 2130, 3819, 1750, 3567, 644, 2515, 1794, 3636, 698, 2137, 1162, 832, 3761, 326, 2613, 513, 3302, 3820, 357, 3163, 2259, 3733, 101, 1922, 1386, 3587, 1640, 28, 1286, 2141, 1761, 2918, 693, 1639, 457, 3250, 2434, 365, 2599, 1729, 3284, 2643, 306, 2793, 689, 1090, 104, 1309, 2305, 1831,
+ 2776, 859, 2446, 2915, 1778, 3337, 2677, 614, 1508, 2409, 469, 4033, 1321, 3563, 402, 3131, 2720, 1093, 1569, 4042, 1229, 2277, 216, 3046, 1817, 57, 3006, 1684, 4059, 2016, 795, 2440, 1652, 1960, 610, 2763, 920, 3864, 3110, 1026, 2326, 3762, 3233, 521, 3856, 173, 2457, 3939, 2138, 1262, 3572, 989, 3021, 2238, 119, 1445, 3832, 1809, 2297, 3467, 2700, 3684, 3102, 394,
+ 4036, 2050, 3256, 89, 2198, 1079, 248, 1845, 3805, 3104, 880, 1779, 2688, 717, 2373, 1375, 262, 2249, 3071, 13, 2813, 3429, 1600, 3984, 2416, 3603, 1299, 2298, 998, 3492, 1393, 2951, 10, 4009, 1247, 3462, 1679, 2204, 414, 2736, 316, 1894, 2816, 1050, 3373, 1462, 3107, 817, 3464, 21, 1835, 4070, 568, 1178, 3718, 875, 3168, 466, 2974, 1458, 2084, 616, 1564, 1018,
+ 1693, 546, 1244, 3899, 716, 3160, 3608, 2877, 1220, 334, 3443, 2270, 44, 3000, 1843, 3928, 3405, 766, 3686, 2040, 587, 993, 2647, 387, 930, 2753, 630, 3274, 150, 2808, 453, 3638, 1092, 2352, 3030, 239, 2562, 700, 3240, 1257, 4016, 730, 1515, 2203, 2551, 417, 1866, 1123, 2348, 2902, 1550, 2678, 2075, 3238, 1630, 2531, 2115, 1255, 4054, 840, 290, 3874, 2477, 3399,
+ 2250, 3577, 2817, 1626, 2576, 1356, 2315, 792, 2087, 2618, 1612, 3855, 1263, 3637, 1036, 494, 1535, 2553, 1198, 1715, 3867, 3170, 1359, 1954, 3483, 1539, 2069, 3886, 1772, 2487, 1534, 2045, 3242, 806, 1578, 2018, 3948, 1423, 3596, 2076, 2466, 3424, 139, 3688, 871, 4049, 2852, 3342, 547, 3719, 327, 852, 3505, 207, 2794, 542, 3600, 45, 2411, 3324, 1788, 3012, 1235, 61,
+ 2655, 917, 253, 1986, 3738, 313, 1706, 4072, 120, 3229, 957, 597, 2024, 3262, 2453, 2857, 2002, 3190, 210, 2784, 2206, 300, 2400, 3766, 553, 3152, 218, 1150, 2988, 883, 3753, 627, 2664, 3831, 437, 3385, 1008, 2957, 60, 1636, 891, 2899, 1776, 3062, 1315, 2026, 194, 1643, 2079, 1296, 3201, 2465, 1379, 1927, 3898, 1125, 1847, 2846, 1552, 1028, 2725, 2169, 787, 3202,
+ 1441, 3982, 3032, 1052, 3251, 605, 2639, 3073, 1431, 3642, 2329, 2949, 341, 1634, 833, 129, 4020, 916, 3571, 669, 1506, 3411, 821, 2856, 1207, 2337, 2683, 3448, 340, 2214, 3128, 235, 1738, 1288, 2833, 2419, 606, 1884, 2668, 552, 3765, 1176, 399, 2302, 596, 3591, 2634, 767, 3845, 2767, 995, 3967, 491, 3057, 814, 2300, 3422, 691, 3797, 254, 3645, 509, 3478, 1836,
+ 2119, 475, 2445, 1525, 2175, 3539, 914, 1926, 473, 1157, 1800, 3971, 2701, 3739, 2129, 3486, 1333, 1784, 2366, 2982, 1070, 4089, 1802, 73, 1642, 3958, 835, 1837, 1480, 4043, 1217, 2469, 3416, 2113, 88, 3668, 1240, 3255, 3920, 2355, 3167, 2003, 2645, 3936, 3228, 1592, 1144, 3474, 2394, 79, 1820, 2241, 1594, 3656, 2584, 153, 1448, 3034, 2005, 2511, 1692, 1335, 3913, 217,
+ 2822, 3391, 745, 3813, 192, 1274, 2941, 3847, 2489, 3440, 744, 161, 1422, 1086, 572, 3004, 2617, 338, 3807, 2031, 236, 2472, 3065, 2098, 3358, 362, 2163, 3574, 497, 2788, 1970, 948, 3885, 685, 3100, 1712, 2228, 292, 1408, 1016, 164, 3537, 1417, 941, 34, 2172, 3001, 358, 1491, 3147, 699, 3356, 258, 1149, 2946, 1787, 3931, 382, 1146, 3291, 818, 2890, 2379, 1096,
+ 3679, 1328, 1901, 3162, 2747, 1730, 2253, 5, 1556, 2818, 2093, 3166, 2522, 3410, 2287, 1701, 956, 3237, 620, 1596, 3300, 1307, 511, 3701, 1020, 2939, 1362, 2532, 3208, 749, 3641, 160, 1522, 2624, 1095, 4086, 826, 2841, 3583, 2173, 1727, 723, 2925, 1911, 2482, 3726, 863, 1962, 4028, 1111, 2835, 3773, 2449, 2022, 582, 3278, 923, 2619, 2152, 4039, 92, 1934, 3145, 677,
+ 2530, 53, 2303, 1003, 458, 3989, 739, 3321, 1064, 369, 3556, 877, 1900, 426, 3876, 1, 3617, 2106, 1197, 2805, 3634, 857, 2706, 1504, 2418, 682, 3868, 20, 1139, 1688, 2333, 3311, 2907, 1945, 265, 2385, 3433, 1601, 636, 2620, 3095, 4044, 386, 3382, 1184, 527, 2814, 3414, 2342, 465, 1889, 1343, 874, 3479, 1502, 2233, 3689, 1385, 559, 2745, 1463, 3465, 376, 1718,
+ 3217, 4045, 1580, 3612, 2525, 1228, 3018, 1958, 3725, 2358, 1361, 3996, 1581, 3063, 1224, 2737, 1475, 2442, 3946, 191, 1796, 2128, 3975, 134, 1916, 3318, 1597, 2071, 3749, 2672, 403, 1278, 602, 3745, 3220, 1374, 445, 2064, 3830, 243, 1252, 2390, 1563, 2724, 3875, 1818, 1346, 165, 1650, 3264, 2680, 117, 2998, 4081, 343, 2799, 9, 3122, 1743, 3724, 1040, 2231, 3842, 1209,
+ 900, 398, 2851, 697, 1797, 3482, 293, 2679, 1649, 566, 2954, 91, 2697, 714, 2060, 3211, 781, 480, 3040, 1038, 2611, 666, 2989, 3458, 1201, 2796, 548, 2975, 839, 3121, 1850, 4001, 2208, 1631, 790, 2558, 2972, 1148, 3213, 1849, 3624, 971, 2102, 108, 772, 3101, 2589, 3777, 1042, 656, 3907, 2097, 1615, 2540, 805, 1935, 1231, 3494, 2451, 268, 2995, 750, 2682, 2020,
+ 3024, 1392, 2124, 3279, 106, 2217, 1387, 822, 3214, 3825, 2160, 1000, 2395, 3691, 228, 4038, 1872, 3413, 1608, 2225, 3536, 303, 1653, 886, 2541, 224, 4037, 2252, 1428, 172, 3504, 958, 2848, 113, 3628, 1834, 3979, 19, 2317, 779, 2797, 518, 3174, 3549, 1482, 2266, 444, 2014, 3555, 2439, 1213, 3113, 535, 1135, 3204, 3858, 2309, 931, 623, 2009, 3359, 1566, 140, 3550,
+ 1808, 3872, 2488, 1152, 3764, 2892, 3960, 2412, 353, 1223, 1825, 3444, 3116, 1717, 1082, 2313, 1280, 2661, 82, 3852, 1389, 3200, 2330, 3812, 2038, 3581, 1728, 1039, 3339, 2427, 586, 2580, 1238, 3328, 2280, 1047, 595, 2662, 1363, 3338, 1620, 3934, 2497, 1881, 1054, 3954, 3215, 864, 2887, 1801, 320, 3519, 2378, 3704, 1753, 424, 2958, 1660, 4005, 2601, 1116, 3912, 2381, 573,
+ 2740, 200, 828, 1667, 432, 1931, 1035, 1616, 3598, 2640, 728, 264, 1437, 557, 3501, 2966, 372, 3734, 974, 1978, 758, 2719, 1145, 452, 1433, 725, 2681, 408, 3843, 1918, 1547, 3906, 1996, 503, 1456, 3019, 3493, 1700, 3742, 355, 2134, 176, 1311, 615, 2867, 315, 1680, 1314, 8, 3297, 1494, 783, 1950, 83, 2656, 1382, 3561, 138, 2834, 1404, 330, 1904, 3156, 1027,
+ 1357, 3381, 3041, 3666, 2729, 734, 3415, 177, 3051, 2021, 4079, 2823, 3775, 2186, 2616, 869, 1668, 3148, 2367, 3315, 393, 4075, 1870, 2920, 3343, 2362, 3188, 1303, 2782, 825, 3171, 259, 2905, 3717, 2538, 184, 2074, 838, 2860, 2407, 1024, 3496, 3008, 3706, 1985, 2349, 3623, 2582, 4058, 2184, 2694, 3873, 2964, 990, 3346, 690, 2033, 1066, 2201, 3490, 2971, 718, 3700, 2188,
+ 4061, 391, 1989, 2325, 1430, 3150, 2125, 2526, 592, 1403, 976, 2351, 1165, 1851, 114, 3921, 2063, 613, 1358, 2785, 1623, 2254, 25, 3542, 1045, 246, 1852, 3554, 87, 2243, 3615, 1169, 727, 1705, 968, 3957, 3185, 1251, 500, 4063, 1751, 2622, 842, 1519, 90, 3393, 819, 490, 1874, 999, 571, 1275, 2271, 1586, 4040, 2448, 3126, 3731, 436, 885, 1708, 2421, 24, 1599,
+ 889, 2563, 1199, 645, 70, 4013, 1237, 3723, 1694, 3499, 3, 3266, 484, 2997, 3390, 1233, 2842, 3687, 152, 3480, 1084, 3698, 881, 2490, 1542, 3992, 2209, 692, 1690, 3022, 1470, 2625, 2114, 3512, 2359, 381, 2684, 1897, 3368, 1395, 3080, 289, 2065, 3981, 2758, 1141, 3097, 1472, 2870, 3352, 3707, 225, 3159, 505, 1895, 214, 1222, 1774, 2686, 3978, 3275, 1196, 3518, 2825,
+ 3270, 1720, 3796, 3466, 2650, 1841, 298, 899, 2862, 2091, 2671, 1744, 3735, 801, 1560, 349, 2262, 903, 1833, 2524, 512, 3117, 1793, 2827, 476, 3038, 1216, 2550, 3826, 980, 431, 4048, 35, 2992, 1265, 1595, 765, 3675, 76, 2247, 696, 3456, 1254, 2452, 664, 1757, 2133, 3750, 145, 2332, 1554, 1981, 3580, 2712, 868, 3640, 2919, 638, 2275, 1427, 309, 2595, 2006, 492,
+ 2226, 178, 2911, 836, 1528, 3028, 2240, 3327, 404, 3970, 707, 1294, 2464, 2131, 4032, 2600, 3319, 1406, 2913, 3974, 2156, 1425, 221, 3877, 2017, 811, 3662, 272, 3287, 1988, 2408, 3357, 1746, 598, 3239, 3823, 2182, 2934, 1078, 2604, 3840, 1697, 2906, 413, 3210, 3880, 331, 2644, 1260, 848, 3042, 2535, 1077, 1438, 3261, 2365, 1561, 3799, 85, 3082, 1876, 674, 3932, 1101,
+ 3644, 1344, 1943, 2401, 390, 3835, 1048, 2572, 1541, 1133, 3075, 3584, 308, 2889, 1065, 1869, 601, 3783, 282, 1181, 736, 3312, 2368, 1126, 3383, 1675, 2734, 1426, 628, 2873, 1317, 843, 2717, 2048, 1004, 2536, 333, 1782, 3295, 1517, 219, 2153, 815, 3502, 1579, 2268, 987, 3409, 1780, 4018, 354, 665, 3914, 47, 1956, 456, 1006, 2010, 3406, 1130, 3621, 2894, 1549, 3092,
+ 2485, 640, 3993, 3179, 1270, 3436, 585, 1925, 3757, 2304, 136, 1976, 1486, 646, 3520, 50, 3155, 1637, 2435, 3522, 1937, 2756, 3748, 661, 2224, 58, 3230, 2357, 1830, 3892, 170, 3607, 1447, 3949, 190, 3392, 1336, 584, 4010, 918, 3016, 3670, 1155, 2406, 52, 1304, 3009, 607, 2085, 2699, 3205, 1848, 2291, 3402, 2764, 3865, 3048, 2508, 735, 2710, 443, 2341, 897, 263,
+ 1785, 2769, 983, 56, 2197, 1685, 2703, 202, 2944, 810, 3377, 2626, 3787, 3047, 2055, 1236, 2752, 2122, 945, 3093, 96, 1624, 439, 3014, 1388, 4015, 977, 448, 3506, 1098, 2242, 3026, 506, 2361, 2952, 1862, 3619, 2790, 1992, 2483, 525, 1868, 2652, 4093, 1998, 3595, 2478, 3816, 122, 1412, 929, 3716, 1166, 1648, 813, 1300, 199, 1489, 3998, 1771, 1310, 3808, 2052, 3423,
+ 434, 3712, 1625, 3558, 2955, 853, 4019, 1348, 3511, 1732, 1246, 487, 934, 1672, 2510, 3965, 788, 3711, 396, 1369, 4090, 1055, 2603, 1879, 3528, 2518, 2067, 3005, 1516, 2588, 751, 1740, 3418, 1131, 1576, 686, 2296, 1118, 18, 3263, 1365, 3401, 294, 737, 3177, 410, 867, 1633, 2963, 3579, 2375, 252, 2881, 479, 2471, 3576, 2180, 3306, 332, 2255, 3035, 41, 2648, 1396,
+ 2929, 2230, 1219, 2512, 446, 2008, 3189, 2388, 626, 2164, 2831, 4047, 2376, 174, 3272, 368, 1469, 3226, 2578, 1991, 2874, 2263, 3681, 876, 188, 1239, 683, 3776, 226, 3183, 4083, 2148, 63, 2649, 3859, 299, 3086, 3933, 1585, 2185, 3767, 988, 1707, 2908, 1407, 1844, 2771, 2245, 1161, 560, 1755, 3376, 2051, 4064, 3135, 1832, 652, 2853, 1051, 3649, 760, 3290, 1105, 3945,
+ 872, 154, 3207, 713, 3780, 1453, 281, 1087, 3695, 30, 3299, 1919, 1400, 3551, 1119, 1890, 2314, 618, 1703, 3428, 724, 295, 3146, 1557, 3341, 2896, 1683, 2723, 1974, 1017, 541, 1380, 3720, 804, 3280, 2082, 997, 2567, 777, 2961, 213, 2707, 2328, 3632, 1025, 3891, 3304, 255, 4003, 3108, 2587, 1323, 743, 1479, 105, 1013, 3901, 1618, 2044, 2627, 1465, 1846, 576, 1994,
+ 2560, 3521, 1742, 2118, 2800, 3404, 1783, 2609, 2968, 1582, 1022, 412, 2713, 687, 2976, 3857, 2761, 3620, 62, 1108, 3844, 1340, 2100, 540, 2345, 3925, 405, 3457, 1319, 2468, 3362, 2815, 1867, 2372, 1281, 1714, 3690, 482, 3498, 1842, 1285, 3994, 558, 2039, 81, 2499, 678, 1481, 1923, 964, 12, 3824, 2980, 2205, 2762, 3432, 2398, 181, 3247, 462, 4094, 2350, 3589, 3089,
+ 1555, 1094, 4041, 247, 1267, 908, 3959, 2041, 732, 3860, 2343, 3132, 3769, 2144, 1621, 237, 912, 1329, 3025, 2146, 2642, 1775, 3721, 2746, 1121, 1953, 902, 2285, 130, 3671, 1659, 278, 3153, 522, 2721, 123, 2996, 1466, 2380, 377, 3231, 873, 1510, 3476, 3123, 1250, 2147, 3650, 2839, 3451, 2323, 1122, 3545, 379, 1765, 1218, 603, 3768, 1360, 938, 2885, 133, 1245, 363,
+ 2364, 554, 2743, 3344, 2474, 530, 3112, 169, 1297, 3430, 536, 1741, 98, 1043, 2574, 3253, 2246, 1854, 4022, 510, 3283, 204, 858, 3398, 36, 3118, 1478, 3794, 2986, 706, 2176, 922, 3559, 1097, 3976, 3322, 2149, 1160, 2810, 3883, 2007, 2513, 2953, 328, 1721, 3793, 422, 2566, 807, 329, 1638, 1967, 648, 2520, 3727, 3109, 2116, 2927, 2491, 1939, 3365, 1709, 2728, 3815,
+ 2037, 3120, 831, 1405, 1896, 3592, 1622, 2369, 2864, 2151, 1107, 2542, 3532, 1410, 3917, 427, 3568, 709, 2509, 1503, 1037, 2973, 2436, 1604, 4035, 2594, 563, 1819, 2659, 1234, 4004, 2565, 1511, 2273, 1823, 336, 882, 3772, 575, 1628, 171, 3570, 1120, 2260, 2716, 935, 3064, 1806, 1342, 3144, 3900, 2744, 3296, 985, 1546, 238, 896, 1663, 305, 3660, 695, 2213, 960, 3407,
+ 144, 1795, 3894, 2267, 51, 2708, 1023, 3818, 366, 1821, 4087, 2985, 755, 2057, 2912, 949, 1583, 2774, 231, 3447, 2258, 3866, 1982, 672, 1225, 2077, 3320, 1062, 370, 3241, 1968, 7, 3068, 681, 3631, 2573, 1567, 3175, 2321, 1067, 3070, 722, 1856, 3744, 642, 1471, 4084, 131, 3514, 2443, 531, 1227, 155, 2265, 4024, 2658, 3326, 3910, 1168, 3078, 1530, 3956, 489, 1424,
+ 3647, 1203, 420, 2924, 3755, 719, 3248, 1376, 3067, 890, 196, 1559, 3269, 270, 2432, 1885, 3212, 1164, 3778, 1752, 579, 1338, 344, 3585, 3017, 288, 3658, 2371, 3882, 1691, 611, 2789, 3809, 1339, 389, 2950, 2015, 59, 3548, 2751, 2158, 4011, 1352, 29, 3388, 2370, 2812, 1946, 954, 2110, 1558, 2947, 3573, 1909, 1326, 679, 1853, 2312, 551, 2702, 33, 2414, 3209, 2824,
+ 2547, 2143, 3379, 966, 1492, 1979, 2479, 463, 2194, 3657, 2738, 2318, 1261, 3713, 604, 4002, 11, 2192, 2967, 919, 2607, 3369, 2837, 1676, 2539, 984, 1568, 93, 2901, 1318, 3538, 1041, 2216, 1756, 3454, 1030, 4050, 1402, 798, 1723, 311, 3277, 2546, 2886, 2043, 461, 1206, 3677, 361, 3260, 3988, 809, 2605, 470, 3007, 3517, 102, 3221, 1398, 2062, 3611, 1134, 1928, 865,
+ 4060, 621, 1710, 2606, 3510, 317, 4017, 1682, 3329, 1159, 1940, 654, 3461, 1789, 1015, 2691, 1455, 3599, 374, 1947, 4069, 71, 2126, 763, 3961, 2278, 3161, 1997, 824, 2623, 2080, 244, 3257, 780, 2732, 2308, 545, 3351, 2476, 3806, 1204, 588, 1591, 963, 3610, 1699, 754, 3049, 2651, 1106, 65, 2221, 1644, 3821, 1100, 2463, 1614, 3801, 965, 2965, 715, 3394, 1593, 212,
+};
+
+#endif /* BLUE_NOISE_64X64_H */
diff --git a/gfx/cairo/libpixman/src/dither/make-blue-noise.c b/gfx/cairo/libpixman/src/dither/make-blue-noise.c
new file mode 100644
index 0000000000..f9974b4d44
--- /dev/null
+++ b/gfx/cairo/libpixman/src/dither/make-blue-noise.c
@@ -0,0 +1,679 @@
+/* Blue noise generation using the void-and-cluster method as described in
+ *
+ * The void-and-cluster method for dither array generation
+ * Ulichney, Robert A (1993)
+ *
+ * http://cv.ulichney.com/papers/1993-void-cluster.pdf
+ *
+ * Note that running with openmp (-DUSE_OPENMP) will trigger additional
+ * randomness due to computing reductions in parallel, and is not recommended
+ * unless generating very large dither arrays.
+ */
+
+#include <assert.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <math.h>
+#include <stdio.h>
+
+/* Booleans and utility functions */
+
+#ifndef TRUE
+# define TRUE 1
+#endif
+
+#ifndef FALSE
+# define FALSE 0
+#endif
+
+typedef int bool_t;
+
+int
+imin (int x, int y)
+{
+ return x < y ? x : y;
+}
+
+/* Memory allocation */
+void *
+malloc_abc (unsigned int a, unsigned int b, unsigned int c)
+{
+ if (a >= INT32_MAX / b)
+ return NULL;
+ else if (a * b >= INT32_MAX / c)
+ return NULL;
+ else
+ return malloc (a * b * c);
+}
+
+/* Random number generation */
+typedef uint32_t xorwow_state_t[5];
+
+uint32_t
+xorwow_next (xorwow_state_t *state)
+{
+ uint32_t s = (*state)[0],
+ t = (*state)[3];
+ (*state)[3] = (*state)[2];
+ (*state)[2] = (*state)[1];
+ (*state)[1] = s;
+
+ t ^= t >> 2;
+ t ^= t << 1;
+ t ^= s ^ (s << 4);
+
+ (*state)[0] = t;
+ (*state)[4] += 362437;
+
+ return t + (*state)[4];
+}
+
+float
+xorwow_float (xorwow_state_t *s)
+{
+ return (xorwow_next (s) >> 9) / (float)((1 << 23) - 1);
+}
+
+/* Floating point matrices
+ *
+ * Used to cache the cluster sizes.
+ */
+typedef struct matrix_t {
+ int width;
+ int height;
+ float *buffer;
+} matrix_t;
+
+bool_t
+matrix_init (matrix_t *matrix, int width, int height)
+{
+ float *buffer;
+
+ if (!matrix)
+ return FALSE;
+
+ buffer = malloc_abc (width, height, sizeof (float));
+
+ if (!buffer)
+ return FALSE;
+
+ matrix->buffer = buffer;
+ matrix->width = width;
+ matrix->height = height;
+
+ return TRUE;
+}
+
+bool_t
+matrix_copy (matrix_t *dst, matrix_t const *src)
+{
+ float *srcbuf = src->buffer,
+ *srcend = src->buffer + src->width * src->height,
+ *dstbuf = dst->buffer;
+
+ if (dst->width != src->width || dst->height != src->height)
+ return FALSE;
+
+ while (srcbuf < srcend)
+ *dstbuf++ = *srcbuf++;
+
+ return TRUE;
+}
+
+float *
+matrix_get (matrix_t *matrix, int x, int y)
+{
+ return &matrix->buffer[y * matrix->width + x];
+}
+
+void
+matrix_destroy (matrix_t *matrix)
+{
+ free (matrix->buffer);
+}
+
+/* Binary patterns */
+typedef struct pattern_t {
+ int width;
+ int height;
+ bool_t *buffer;
+} pattern_t;
+
+bool_t
+pattern_init (pattern_t *pattern, int width, int height)
+{
+ bool_t *buffer;
+
+ if (!pattern)
+ return FALSE;
+
+ buffer = malloc_abc (width, height, sizeof (bool_t));
+
+ if (!buffer)
+ return FALSE;
+
+ pattern->buffer = buffer;
+ pattern->width = width;
+ pattern->height = height;
+
+ return TRUE;
+}
+
+bool_t
+pattern_copy (pattern_t *dst, pattern_t const *src)
+{
+ bool_t *srcbuf = src->buffer,
+ *srcend = src->buffer + src->width * src->height,
+ *dstbuf = dst->buffer;
+
+ if (dst->width != src->width || dst->height != src->height)
+ return FALSE;
+
+ while (srcbuf < srcend)
+ *dstbuf++ = *srcbuf++;
+
+ return TRUE;
+}
+
+bool_t *
+pattern_get (pattern_t *pattern, int x, int y)
+{
+ return &pattern->buffer[y * pattern->width + x];
+}
+
+void
+pattern_fill_white_noise (pattern_t *pattern, float fraction,
+ xorwow_state_t *s)
+{
+ bool_t *buffer = pattern->buffer;
+ bool_t *end = buffer + (pattern->width * pattern->height);
+
+ while (buffer < end)
+ *buffer++ = xorwow_float (s) < fraction;
+}
+
+void
+pattern_destroy (pattern_t *pattern)
+{
+ free (pattern->buffer);
+}
+
+/* Dither arrays */
+typedef struct array_t {
+ int width;
+ int height;
+ uint32_t *buffer;
+} array_t;
+
+bool_t
+array_init (array_t *array, int width, int height)
+{
+ uint32_t *buffer;
+
+ if (!array)
+ return FALSE;
+
+ buffer = malloc_abc (width, height, sizeof (uint32_t));
+
+ if (!buffer)
+ return FALSE;
+
+ array->buffer = buffer;
+ array->width = width;
+ array->height = height;
+
+ return TRUE;
+}
+
+uint32_t *
+array_get (array_t *array, int x, int y)
+{
+ return &array->buffer[y * array->width + x];
+}
+
+bool_t
+array_save_ppm (array_t *array, const char *filename)
+{
+ FILE *f = fopen(filename, "wb");
+
+ int i = 0;
+ int bpp = 2;
+ uint8_t buffer[1024];
+
+ if (!f)
+ return FALSE;
+
+ if (array->width * array->height - 1 < 256)
+ bpp = 1;
+
+ fprintf(f, "P5 %d %d %d\n", array->width, array->height,
+ array->width * array->height - 1);
+ while (i < array->width * array->height)
+ {
+ int j = 0;
+ for (; j < 1024 / bpp && j < array->width * array->height; ++j)
+ {
+ uint32_t v = array->buffer[i + j];
+ if (bpp == 2)
+ {
+ buffer[2 * j] = v & 0xff;
+ buffer[2 * j + 1] = (v & 0xff00) >> 8;
+ } else {
+ buffer[j] = v;
+ }
+ }
+
+ fwrite((void *)buffer, bpp, j, f);
+ i += j;
+ }
+
+ if (fclose(f) != 0)
+ return FALSE;
+
+ return TRUE;
+}
+
+bool_t
+array_save (array_t *array, const char *filename)
+{
+ int x, y;
+ FILE *f = fopen(filename, "wb");
+
+ if (!f)
+ return FALSE;
+
+ fprintf (f,
+"/* WARNING: This file is generated by make-blue-noise.c\n"
+" * Please edit that file instead of this one.\n"
+" */\n"
+"\n"
+"#ifndef BLUE_NOISE_%dX%d_H\n"
+"#define BLUE_NOISE_%dX%d_H\n"
+"\n"
+"#include <stdint.h>\n"
+"\n", array->width, array->height, array->width, array->height);
+
+ fprintf (f, "static const uint16_t dither_blue_noise_%dx%d[%d] = {\n",
+ array->width, array->height, array->width * array->height);
+
+ for (y = 0; y < array->height; ++y)
+ {
+ fprintf (f, " ");
+ for (x = 0; x < array->width; ++x)
+ {
+ if (x != 0)
+ fprintf (f, ", ");
+
+ fprintf (f, "%d", *array_get (array, x, y));
+ }
+
+ fprintf (f, ",\n");
+ }
+ fprintf (f, "};\n");
+
+ fprintf (f, "\n#endif /* BLUE_NOISE_%dX%d_H */\n",
+ array->width, array->height);
+
+ if (fclose(f) != 0)
+ return FALSE;
+
+ return TRUE;
+}
+
+void
+array_destroy (array_t *array)
+{
+ free (array->buffer);
+}
+
+/* Dither array generation */
+bool_t
+compute_cluster_sizes (pattern_t *pattern, matrix_t *matrix)
+{
+ int width = pattern->width,
+ height = pattern->height;
+
+ if (matrix->width != width || matrix->height != height)
+ return FALSE;
+
+ int px, py, qx, qy, dx, dy;
+ float tsqsi = 2.f * 1.5f * 1.5f;
+
+#ifdef USE_OPENMP
+#pragma omp parallel for default (none) \
+ private (py, px, qy, qx, dx, dy) \
+ shared (height, width, pattern, matrix, tsqsi)
+#endif
+ for (py = 0; py < height; ++py)
+ {
+ for (px = 0; px < width; ++px)
+ {
+ bool_t pixel = *pattern_get (pattern, px, py);
+ float dist = 0.f;
+
+ for (qx = 0; qx < width; ++qx)
+ {
+ dx = imin (abs (qx - px), width - abs (qx - px));
+ dx = dx * dx;
+
+ for (qy = 0; qy < height; ++qy)
+ {
+ dy = imin (abs (qy - py), height - abs (qy - py));
+ dy = dy * dy;
+
+ dist += (pixel == *pattern_get (pattern, qx, qy))
+ * expf (- (dx + dy) / tsqsi);
+ }
+ }
+
+ *matrix_get (matrix, px, py) = dist;
+ }
+ }
+
+ return TRUE;
+}
+
+bool_t
+swap_pixel (pattern_t *pattern, matrix_t *matrix, int x, int y)
+{
+ int width = pattern->width,
+ height = pattern->height;
+
+ bool_t new;
+
+ float f,
+ dist = 0.f,
+ tsqsi = 2.f * 1.5f * 1.5f;
+
+ int px, py, dx, dy;
+ bool_t b;
+
+ new = !*pattern_get (pattern, x, y);
+ *pattern_get (pattern, x, y) = new;
+
+ if (matrix->width != width || matrix->height != height)
+ return FALSE;
+
+
+#ifdef USE_OPENMP
+#pragma omp parallel for reduction (+:dist) default (none) \
+ private (px, py, dx, dy, b, f) \
+ shared (x, y, width, height, pattern, matrix, new, tsqsi)
+#endif
+ for (py = 0; py < height; ++py)
+ {
+ dy = imin (abs (py - y), height - abs (py - y));
+ dy = dy * dy;
+
+ for (px = 0; px < width; ++px)
+ {
+ dx = imin (abs (px - x), width - abs (px - x));
+ dx = dx * dx;
+
+ b = (*pattern_get (pattern, px, py) == new);
+ f = expf (- (dx + dy) / tsqsi);
+ *matrix_get (matrix, px, py) += (2 * b - 1) * f;
+
+ dist += b * f;
+ }
+ }
+
+ *matrix_get (matrix, x, y) = dist;
+ return TRUE;
+}
+
+void
+largest_cluster (pattern_t *pattern, matrix_t *matrix,
+ bool_t pixel, int *xmax, int *ymax)
+{
+ int width = pattern->width,
+ height = pattern->height;
+
+ int x, y;
+
+ float vmax = -INFINITY;
+
+#ifdef USE_OPENMP
+#pragma omp parallel default (none) \
+ private (x, y) \
+ shared (height, width, pattern, matrix, pixel, xmax, ymax, vmax)
+#endif
+ {
+ int xbest = -1,
+ ybest = -1;
+
+#ifdef USE_OPENMP
+ float vbest = -INFINITY;
+
+#pragma omp for reduction (max: vmax) collapse (2)
+#endif
+ for (y = 0; y < height; ++y)
+ {
+ for (x = 0; x < width; ++x)
+ {
+ if (*pattern_get (pattern, x, y) != pixel)
+ continue;
+
+ if (*matrix_get (matrix, x, y) > vmax)
+ {
+ vmax = *matrix_get (matrix, x, y);
+#ifdef USE_OPENMP
+ vbest = vmax;
+#endif
+ xbest = x;
+ ybest = y;
+ }
+ }
+ }
+
+#ifdef USE_OPENMP
+#pragma omp barrier
+#pragma omp critical
+ {
+ if (vmax == vbest)
+ {
+ *xmax = xbest;
+ *ymax = ybest;
+ }
+ }
+#else
+ *xmax = xbest;
+ *ymax = ybest;
+#endif
+ }
+
+ assert (vmax > -INFINITY);
+}
+
+void
+generate_initial_binary_pattern (pattern_t *pattern, matrix_t *matrix)
+{
+ int xcluster = 0,
+ ycluster = 0,
+ xvoid = 0,
+ yvoid = 0;
+
+ for (;;)
+ {
+ largest_cluster (pattern, matrix, TRUE, &xcluster, &ycluster);
+ assert (*pattern_get (pattern, xcluster, ycluster) == TRUE);
+ swap_pixel (pattern, matrix, xcluster, ycluster);
+
+ largest_cluster (pattern, matrix, FALSE, &xvoid, &yvoid);
+ assert (*pattern_get (pattern, xvoid, yvoid) == FALSE);
+ swap_pixel (pattern, matrix, xvoid, yvoid);
+
+ if (xcluster == xvoid && ycluster == yvoid)
+ return;
+ }
+}
+
+bool_t
+generate_dither_array (array_t *array,
+ pattern_t const *prototype, matrix_t const *matrix,
+ pattern_t *temp_pattern, matrix_t *temp_matrix)
+{
+ int width = prototype->width,
+ height = prototype->height;
+
+ int x, y, rank;
+
+ int initial_rank = 0;
+
+ if (array->width != width || array->height != height)
+ return FALSE;
+
+ // Make copies of the prototype and associated sizes matrix since we will
+ // trash them
+ if (!pattern_copy (temp_pattern, prototype))
+ return FALSE;
+
+ if (!matrix_copy (temp_matrix, matrix))
+ return FALSE;
+
+ // Compute initial rank
+ for (y = 0; y < height; ++y)
+ {
+ for (x = 0; x < width; ++x)
+ {
+ if (*pattern_get (temp_pattern, x, y))
+ initial_rank += 1;
+
+ *array_get (array, x, y) = 0;
+ }
+ }
+
+ // Phase 1
+ for (rank = initial_rank; rank > 0; --rank)
+ {
+ largest_cluster (temp_pattern, temp_matrix, TRUE, &x, &y);
+ swap_pixel (temp_pattern, temp_matrix, x, y);
+ *array_get (array, x, y) = rank - 1;
+ }
+
+ // Make copies again for phases 2 & 3
+ if (!pattern_copy (temp_pattern, prototype))
+ return FALSE;
+
+ if (!matrix_copy (temp_matrix, matrix))
+ return FALSE;
+
+ // Phase 2 & 3
+ for (rank = initial_rank; rank < width * height; ++rank)
+ {
+ largest_cluster (temp_pattern, temp_matrix, FALSE, &x, &y);
+ swap_pixel (temp_pattern, temp_matrix, x, y);
+ *array_get (array, x, y) = rank;
+ }
+
+ return TRUE;
+}
+
+bool_t
+generate (int size, xorwow_state_t *s,
+ char const *c_filename, char const *ppm_filename)
+{
+ bool_t ok = TRUE;
+
+ pattern_t prototype, temp_pattern;
+ array_t array;
+ matrix_t matrix, temp_matrix;
+
+ printf ("Generating %dx%d blue noise...\n", size, size);
+
+ if (!pattern_init (&prototype, size, size))
+ return FALSE;
+
+ if (!pattern_init (&temp_pattern, size, size))
+ {
+ pattern_destroy (&prototype);
+ return FALSE;
+ }
+
+ if (!matrix_init (&matrix, size, size))
+ {
+ pattern_destroy (&temp_pattern);
+ pattern_destroy (&prototype);
+ return FALSE;
+ }
+
+ if (!matrix_init (&temp_matrix, size, size))
+ {
+ matrix_destroy (&matrix);
+ pattern_destroy (&temp_pattern);
+ pattern_destroy (&prototype);
+ return FALSE;
+ }
+
+ if (!array_init (&array, size, size))
+ {
+ matrix_destroy (&temp_matrix);
+ matrix_destroy (&matrix);
+ pattern_destroy (&temp_pattern);
+ pattern_destroy (&prototype);
+ return FALSE;
+ }
+
+ printf("Filling initial binary pattern with white noise...\n");
+ pattern_fill_white_noise (&prototype, .1, s);
+
+ printf("Initializing cluster sizes...\n");
+ if (!compute_cluster_sizes (&prototype, &matrix))
+ {
+ fprintf (stderr, "Error while computing cluster sizes\n");
+ ok = FALSE;
+ goto out;
+ }
+
+ printf("Generating initial binary pattern...\n");
+ generate_initial_binary_pattern (&prototype, &matrix);
+
+ printf("Generating dither array...\n");
+ if (!generate_dither_array (&array, &prototype, &matrix,
+ &temp_pattern, &temp_matrix))
+ {
+ fprintf (stderr, "Error while generating dither array\n");
+ ok = FALSE;
+ goto out;
+ }
+
+ printf("Saving dither array...\n");
+ if (!array_save (&array, c_filename))
+ {
+ fprintf (stderr, "Error saving dither array\n");
+ ok = FALSE;
+ goto out;
+ }
+
+#if SAVE_PPM
+ if (!array_save_ppm (&array, ppm_filename))
+ {
+ fprintf (stderr, "Error saving dither array PPM\n");
+ ok = FALSE;
+ goto out;
+ }
+#else
+ (void)ppm_filename;
+#endif
+
+ printf("All done!\n");
+
+out:
+ array_destroy (&array);
+ matrix_destroy (&temp_matrix);
+ matrix_destroy (&matrix);
+ pattern_destroy (&temp_pattern);
+ pattern_destroy (&prototype);
+ return ok;
+}
+
+int
+main (void)
+{
+ xorwow_state_t s = {1185956906, 12385940, 983948, 349208051, 901842};
+
+ if (!generate (64, &s, "blue-noise-64x64.h", "blue-noise-64x64.ppm"))
+ return -1;
+
+ return 0;
+}
diff --git a/gfx/cairo/libpixman/src/loongson-mmintrin.h b/gfx/cairo/libpixman/src/loongson-mmintrin.h
new file mode 100644
index 0000000000..0e79e86484
--- /dev/null
+++ b/gfx/cairo/libpixman/src/loongson-mmintrin.h
@@ -0,0 +1,412 @@
+/* The gcc-provided loongson intrinsic functions are way too fucking broken
+ * to be of any use, otherwise I'd use them.
+ *
+ * - The hardware instructions are very similar to MMX or iwMMXt. Certainly
+ * close enough that they could have implemented the _mm_*-style intrinsic
+ * interface and had a ton of optimized code available to them. Instead they
+ * implemented something much, much worse.
+ *
+ * - pshuf takes a dead first argument, causing extra instructions to be
+ * generated.
+ *
+ * - There are no 64-bit shift or logical intrinsics, which means you have
+ * to implement them with inline assembly, but this is a nightmare because
+ * gcc doesn't understand that the integer vector datatypes are actually in
+ * floating-point registers, so you end up with braindead code like
+ *
+ * punpcklwd $f9,$f9,$f5
+ * dmtc1 v0,$f8
+ * punpcklwd $f19,$f19,$f5
+ * dmfc1 t9,$f9
+ * dmtc1 v0,$f9
+ * dmtc1 t9,$f20
+ * dmfc1 s0,$f19
+ * punpcklbh $f20,$f20,$f2
+ *
+ * where crap just gets copied back and forth between integer and floating-
+ * point registers ad nauseum.
+ *
+ * Instead of trying to workaround the problems from these crap intrinsics, I
+ * just implement the _mm_* intrinsics needed for pixman-mmx.c using inline
+ * assembly.
+ */
+
+#include <stdint.h>
+
+/* vectors are stored in 64-bit floating-point registers */
+typedef double __m64;
+/* having a 32-bit datatype allows us to use 32-bit loads in places like load8888 */
+typedef float __m32;
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_setzero_si64 (void)
+{
+ return 0.0;
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_add_pi16 (__m64 __m1, __m64 __m2)
+{
+ __m64 ret;
+ asm("paddh %0, %1, %2\n\t"
+ : "=f" (ret)
+ : "f" (__m1), "f" (__m2)
+ );
+ return ret;
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_add_pi32 (__m64 __m1, __m64 __m2)
+{
+ __m64 ret;
+ asm("paddw %0, %1, %2\n\t"
+ : "=f" (ret)
+ : "f" (__m1), "f" (__m2)
+ );
+ return ret;
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_adds_pu16 (__m64 __m1, __m64 __m2)
+{
+ __m64 ret;
+ asm("paddush %0, %1, %2\n\t"
+ : "=f" (ret)
+ : "f" (__m1), "f" (__m2)
+ );
+ return ret;
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_adds_pu8 (__m64 __m1, __m64 __m2)
+{
+ __m64 ret;
+ asm("paddusb %0, %1, %2\n\t"
+ : "=f" (ret)
+ : "f" (__m1), "f" (__m2)
+ );
+ return ret;
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_and_si64 (__m64 __m1, __m64 __m2)
+{
+ __m64 ret;
+ asm("and %0, %1, %2\n\t"
+ : "=f" (ret)
+ : "f" (__m1), "f" (__m2)
+ );
+ return ret;
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpeq_pi32 (__m64 __m1, __m64 __m2)
+{
+ __m64 ret;
+ asm("pcmpeqw %0, %1, %2\n\t"
+ : "=f" (ret)
+ : "f" (__m1), "f" (__m2)
+ );
+ return ret;
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_empty (void)
+{
+
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_madd_pi16 (__m64 __m1, __m64 __m2)
+{
+ __m64 ret;
+ asm("pmaddhw %0, %1, %2\n\t"
+ : "=f" (ret)
+ : "f" (__m1), "f" (__m2)
+ );
+ return ret;
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mulhi_pu16 (__m64 __m1, __m64 __m2)
+{
+ __m64 ret;
+ asm("pmulhuh %0, %1, %2\n\t"
+ : "=f" (ret)
+ : "f" (__m1), "f" (__m2)
+ );
+ return ret;
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mullo_pi16 (__m64 __m1, __m64 __m2)
+{
+ __m64 ret;
+ asm("pmullh %0, %1, %2\n\t"
+ : "=f" (ret)
+ : "f" (__m1), "f" (__m2)
+ );
+ return ret;
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_or_si64 (__m64 __m1, __m64 __m2)
+{
+ __m64 ret;
+ asm("or %0, %1, %2\n\t"
+ : "=f" (ret)
+ : "f" (__m1), "f" (__m2)
+ );
+ return ret;
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_packs_pu16 (__m64 __m1, __m64 __m2)
+{
+ __m64 ret;
+ asm("packushb %0, %1, %2\n\t"
+ : "=f" (ret)
+ : "f" (__m1), "f" (__m2)
+ );
+ return ret;
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_packs_pi32 (__m64 __m1, __m64 __m2)
+{
+ __m64 ret;
+ asm("packsswh %0, %1, %2\n\t"
+ : "=f" (ret)
+ : "f" (__m1), "f" (__m2)
+ );
+ return ret;
+}
+
+#define _MM_SHUFFLE(fp3,fp2,fp1,fp0) \
+ (((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | (fp0))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set_pi16 (uint16_t __w3, uint16_t __w2, uint16_t __w1, uint16_t __w0)
+{
+ if (__builtin_constant_p (__w3) &&
+ __builtin_constant_p (__w2) &&
+ __builtin_constant_p (__w1) &&
+ __builtin_constant_p (__w0))
+ {
+ uint64_t val = ((uint64_t)__w3 << 48)
+ | ((uint64_t)__w2 << 32)
+ | ((uint64_t)__w1 << 16)
+ | ((uint64_t)__w0 << 0);
+ return *(__m64 *)&val;
+ }
+ else if (__w3 == __w2 && __w2 == __w1 && __w1 == __w0)
+ {
+ /* TODO: handle other cases */
+ uint64_t val = __w3;
+ uint64_t imm = _MM_SHUFFLE (0, 0, 0, 0);
+ __m64 ret;
+ asm("pshufh %0, %1, %2\n\t"
+ : "=f" (ret)
+ : "f" (*(__m64 *)&val), "f" (*(__m64 *)&imm)
+ );
+ return ret;
+ } else {
+ uint64_t val = ((uint64_t)__w3 << 48)
+ | ((uint64_t)__w2 << 32)
+ | ((uint64_t)__w1 << 16)
+ | ((uint64_t)__w0 << 0);
+ return *(__m64 *)&val;
+ }
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set_pi32 (unsigned __i1, unsigned __i0)
+{
+ if (__builtin_constant_p (__i1) &&
+ __builtin_constant_p (__i0))
+ {
+ uint64_t val = ((uint64_t)__i1 << 32)
+ | ((uint64_t)__i0 << 0);
+ return *(__m64 *)&val;
+ }
+ else if (__i1 == __i0)
+ {
+ uint64_t imm = _MM_SHUFFLE (1, 0, 1, 0);
+ __m64 ret;
+ asm("pshufh %0, %1, %2\n\t"
+ : "=f" (ret)
+ : "f" (*(__m32 *)&__i1), "f" (*(__m64 *)&imm)
+ );
+ return ret;
+ } else {
+ uint64_t val = ((uint64_t)__i1 << 32)
+ | ((uint64_t)__i0 << 0);
+ return *(__m64 *)&val;
+ }
+}
+#undef _MM_SHUFFLE
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_shuffle_pi16 (__m64 __m, int64_t __n)
+{
+ __m64 ret;
+ asm("pshufh %0, %1, %2\n\t"
+ : "=f" (ret)
+ : "f" (__m), "f" (*(__m64 *)&__n)
+ );
+ return ret;
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_slli_pi16 (__m64 __m, int64_t __count)
+{
+ __m64 ret;
+ asm("psllh %0, %1, %2\n\t"
+ : "=f" (ret)
+ : "f" (__m), "f" (*(__m64 *)&__count)
+ );
+ return ret;
+}
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_slli_si64 (__m64 __m, int64_t __count)
+{
+ __m64 ret;
+ asm("dsll %0, %1, %2\n\t"
+ : "=f" (ret)
+ : "f" (__m), "f" (*(__m64 *)&__count)
+ );
+ return ret;
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srli_pi16 (__m64 __m, int64_t __count)
+{
+ __m64 ret;
+ asm("psrlh %0, %1, %2\n\t"
+ : "=f" (ret)
+ : "f" (__m), "f" (*(__m64 *)&__count)
+ );
+ return ret;
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srli_pi32 (__m64 __m, int64_t __count)
+{
+ __m64 ret;
+ asm("psrlw %0, %1, %2\n\t"
+ : "=f" (ret)
+ : "f" (__m), "f" (*(__m64 *)&__count)
+ );
+ return ret;
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srli_si64 (__m64 __m, int64_t __count)
+{
+ __m64 ret;
+ asm("dsrl %0, %1, %2\n\t"
+ : "=f" (ret)
+ : "f" (__m), "f" (*(__m64 *)&__count)
+ );
+ return ret;
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sub_pi16 (__m64 __m1, __m64 __m2)
+{
+ __m64 ret;
+ asm("psubh %0, %1, %2\n\t"
+ : "=f" (ret)
+ : "f" (__m1), "f" (__m2)
+ );
+ return ret;
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpackhi_pi8 (__m64 __m1, __m64 __m2)
+{
+ __m64 ret;
+ asm("punpckhbh %0, %1, %2\n\t"
+ : "=f" (ret)
+ : "f" (__m1), "f" (__m2)
+ );
+ return ret;
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpackhi_pi16 (__m64 __m1, __m64 __m2)
+{
+ __m64 ret;
+ asm("punpckhhw %0, %1, %2\n\t"
+ : "=f" (ret)
+ : "f" (__m1), "f" (__m2)
+ );
+ return ret;
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpacklo_pi8 (__m64 __m1, __m64 __m2)
+{
+ __m64 ret;
+ asm("punpcklbh %0, %1, %2\n\t"
+ : "=f" (ret)
+ : "f" (__m1), "f" (__m2)
+ );
+ return ret;
+}
+
+/* Since punpcklbh doesn't care about the high 32-bits, we use the __m32 datatype which
+ * allows load8888 to use 32-bit loads */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpacklo_pi8_f (__m32 __m1, __m64 __m2)
+{
+ __m64 ret;
+ asm("punpcklbh %0, %1, %2\n\t"
+ : "=f" (ret)
+ : "f" (__m1), "f" (__m2)
+ );
+ return ret;
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpacklo_pi16 (__m64 __m1, __m64 __m2)
+{
+ __m64 ret;
+ asm("punpcklhw %0, %1, %2\n\t"
+ : "=f" (ret)
+ : "f" (__m1), "f" (__m2)
+ );
+ return ret;
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_xor_si64 (__m64 __m1, __m64 __m2)
+{
+ __m64 ret;
+ asm("xor %0, %1, %2\n\t"
+ : "=f" (ret)
+ : "f" (__m1), "f" (__m2)
+ );
+ return ret;
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+loongson_extract_pi16 (__m64 __m, int64_t __pos)
+{
+ __m64 ret;
+ asm("pextrh %0, %1, %2\n\t"
+ : "=f" (ret)
+ : "f" (__m), "f" (*(__m64 *)&__pos)
+ );
+ return ret;
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+loongson_insert_pi16 (__m64 __m1, __m64 __m2, int64_t __pos)
+{
+ __m64 ret;
+ asm("pinsrh_%3 %0, %1, %2\n\t"
+ : "=f" (ret)
+ : "f" (__m1), "f" (__m2), "i" (__pos)
+ );
+ return ret;
+}
diff --git a/gfx/cairo/libpixman/src/make-combine.pl b/gfx/cairo/libpixman/src/make-combine.pl
deleted file mode 100644
index 210a5da12b..0000000000
--- a/gfx/cairo/libpixman/src/make-combine.pl
+++ /dev/null
@@ -1,86 +0,0 @@
-$usage = "Usage: combine.pl { 8 | 16 } < pixman-combine.c.template";
-
-$#ARGV == 0 or die $usage;
-
-# Get the component size.
-$size = int($ARGV[0]);
-$size == 8 or $size == 16 or die $usage;
-
-$pixel_size = $size * 4;
-$half_pixel_size = $size * 2;
-
-sub mask {
- my $str = shift;
- my $suffix;
- $suffix = "ULL" if $size > 8;
-
- return "0x" . $str . $suffix;
-}
-
-# Generate mask strings.
-$nibbles = $size / 4;
-$mask = "f" x $nibbles;
-$zero_mask = "0" x $nibbles;
-$one_half = "8" . "0" x ($nibbles - 1);
-
-print "/* WARNING: This file is generated by combine.pl from combine.inc.\n";
-print " Please edit one of those files rather than this one. */\n";
-print "\n";
-
-print "#line 1 \"pixman-combine.c.template\"\n";
-
-$mask_ = mask($mask);
-$one_half_ = mask($one_half);
-$g_mask = mask($mask . $zero_mask);
-$b_mask = mask($mask . $zero_mask x 2);
-$a_mask = mask($mask . $zero_mask x 3);
-$rb_mask = mask($mask . $zero_mask . $mask);
-$ag_mask = mask($mask . $zero_mask . $mask . $zero_mask);
-$rb_one_half = mask($one_half . $zero_mask . $one_half);
-$rb_mask_plus_one = mask("1" . $zero_mask x 2 . "1" . $zero_mask);
-
-while (<STDIN>) {
- # Mask and 1/2 value for a single component.
- s/#define COMPONENT_SIZE\b/$& $size/;
- s/#define MASK\b/$& $mask_/;
- s/#define ONE_HALF\b/$& $one_half_/;
-
- # Shifts and masks for green, blue, and alpha.
- s/#define G_SHIFT\b/$& $size/;
- s/#define R_SHIFT\b/$& $size * 2/;
- s/#define A_SHIFT\b/$& $size * 3/;
- s/#define G_MASK\b/$& $g_mask/;
- s/#define R_MASK\b/$& $b_mask/;
- s/#define A_MASK\b/$& $a_mask/;
-
- # Special values for dealing with red + blue at the same time.
- s/#define RB_MASK\b/$& $rb_mask/;
- s/#define AG_MASK\b/$& $ag_mask/;
- s/#define RB_ONE_HALF\b/$& $rb_one_half/;
- s/#define RB_MASK_PLUS_ONE\b/$& $rb_mask_plus_one/;
-
- # Add 32/64 suffix to combining function types.
- s/\bCombineFunc\b/CombineFunc$pixel_size/;
- s/\bFbComposeFunctions\b/FbComposeFunctions$pixel_size/;
- s/combine_width/combine_$pixel_size/;
- s/_pixman_setup_combiner_functions_width/_pixman_setup_combiner_functions_$pixel_size/;
- s/UNc/UN$size/g;
- s/ALPHA_c/ALPHA_$size/g;
- s/RED_c/RED_$size/g;
- s/GREEN_c/GREEN_$size/g;
- s/BLUE_c/BLUE_$size/g;
-
- # Convert comp*_t values into the appropriate real types.
- s/comp1_t/uint${size}_t/g;
- s/comp2_t/uint${half_pixel_size}_t/g;
- s/comp4_t/uint${pixel_size}_t/g;
-
- # Change the function table name for the 64-bit version.
- s/pixman_composeFunctions/pixman_composeFunctions64/ if $size == 16;
-
- # Change the header for the 64-bit version
- s/pixman-combine.h/pixman-combine64.h/ if $size == 16;
- s/pixman-combine.h/pixman-combine32.h/ if $size == 8;
-
- print;
-}
diff --git a/gfx/cairo/libpixman/src/meson.build b/gfx/cairo/libpixman/src/meson.build
new file mode 100644
index 0000000000..f48357f261
--- /dev/null
+++ b/gfx/cairo/libpixman/src/meson.build
@@ -0,0 +1,129 @@
+# Copyright © 2018 Intel Corporation
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+config_h = configure_file(
+ configuration : config,
+ output : 'config.h'
+)
+
+version_h = configure_file(
+ configuration : version_conf,
+ input : 'pixman-version.h.in',
+ output : 'pixman-version.h',
+ install_dir : join_paths(get_option('prefix'), get_option('includedir'), 'pixman-1')
+)
+
+libpixman_extra_cargs = []
+if cc.has_function_attribute('dllexport')
+ libpixman_extra_cargs = ['-DPIXMAN_API=__declspec(dllexport)']
+endif
+
+pixman_simd_libs = []
+simds = [
+ # the mmx library can be compiled with mmx on x86/x86_64, iwmmxt on
+ # some arm cores, or loongson mmi on loongson mips systems. The
+ # libraries will all have the same name, "pixman-mmx", but there is
+ # no chance of more than one version being built in the same build
+ # because no system could have mmx, iwmmxt, and mmi, and it
+ # simplifies the build logic to give them the same name.
+ ['mmx', have_mmx, mmx_flags, []],
+ ['mmx', have_loongson_mmi, loongson_mmi_flags, []],
+ ['mmx', have_iwmmxt, iwmmxt_flags, []],
+
+ ['sse2', have_sse2, sse2_flags, []],
+ ['ssse3', have_ssse3, ssse3_flags, []],
+ ['vmx', have_vmx, vmx_flags, []],
+ ['arm-simd', have_armv6_simd, [],
+ ['pixman-arm-simd-asm.S', 'pixman-arm-simd-asm-scaled.S']],
+ ['arm-neon', have_neon, [],
+ ['pixman-arm-neon-asm.S', 'pixman-arm-neon-asm-bilinear.S']],
+ ['mips-dspr2', have_mips_dspr2, mips_dspr2_flags,
+ ['pixman-mips-dspr2-asm.S', 'pixman-mips-memcpy-asm.S']],
+]
+
+foreach simd : simds
+ if simd[1]
+ name = 'pixman-' + simd[0]
+ pixman_simd_libs += static_library(
+ name,
+ [name + '.c', config_h, version_h, simd[3]],
+ c_args : simd[2]
+ )
+ endif
+endforeach
+
+pixman_files = files(
+ 'pixman.c',
+ 'pixman-access.c',
+ 'pixman-access-accessors.c',
+ 'pixman-bits-image.c',
+ 'pixman-combine32.c',
+ 'pixman-combine-float.c',
+ 'pixman-conical-gradient.c',
+ 'pixman-filter.c',
+ 'pixman-x86.c',
+ 'pixman-mips.c',
+ 'pixman-arm.c',
+ 'pixman-ppc.c',
+ 'pixman-edge.c',
+ 'pixman-edge-accessors.c',
+ 'pixman-fast-path.c',
+ 'pixman-glyph.c',
+ 'pixman-general.c',
+ 'pixman-gradient-walker.c',
+ 'pixman-image.c',
+ 'pixman-implementation.c',
+ 'pixman-linear-gradient.c',
+ 'pixman-matrix.c',
+ 'pixman-noop.c',
+ 'pixman-radial-gradient.c',
+ 'pixman-region16.c',
+ 'pixman-region32.c',
+ 'pixman-solid-fill.c',
+ 'pixman-timer.c',
+ 'pixman-trap.c',
+ 'pixman-utils.c',
+)
+
+# We cannot use 'link_with' or 'link_whole' because meson wont do the right
+# thing for static archives.
+_obs = []
+foreach l : pixman_simd_libs
+ _obs += l.extract_all_objects()
+endforeach
+
+libpixman = library(
+ 'pixman-1',
+ [pixman_files, config_h, version_h],
+ objects : _obs,
+ c_args : libpixman_extra_cargs,
+ dependencies : [dep_m, dep_threads],
+ version : meson.project_version(),
+ install : true,
+)
+
+inc_pixman = include_directories('.')
+
+idep_pixman = declare_dependency(
+ link_with: libpixman,
+ include_directories : inc_pixman,
+)
+
+install_headers('pixman.h', subdir : 'pixman-1')
diff --git a/gfx/cairo/libpixman/src/moz.build b/gfx/cairo/libpixman/src/moz.build
index 7c3e900899..02e2582452 100644
--- a/gfx/cairo/libpixman/src/moz.build
+++ b/gfx/cairo/libpixman/src/moz.build
@@ -30,7 +30,6 @@ SOURCES += [
'pixman-arm.c',
'pixman-bits-image.c',
'pixman-combine-float.c',
- 'pixman-combine16.c',
'pixman-combine32.c',
'pixman-conical-gradient.c',
'pixman-edge-accessors.c',
@@ -66,10 +65,7 @@ LOCAL_INCLUDES += [
]
if CONFIG['MOZ_USE_PTHREADS']:
- DEFINES['HAVE_PTHREAD_SETSPECIFIC'] = True
-
-if CONFIG['_MSC_VER']:
- DEFINES['PIXMAN_USE_XP_DLL_TLS_WORKAROUND'] = True
+ DEFINES['HAVE_PTHREADS'] = True
DEFINES['PACKAGE'] = 'mozpixman'
@@ -103,8 +99,8 @@ if use_mmx:
if CONFIG['GNU_CC']:
SOURCES['pixman-mmx.c'].flags += [
'-Winline',
- '--param inline-unit-growth=10000',
- '--param large-function-growth=10000',
+ '--param', 'inline-unit-growth=10000',
+ '--param', 'large-function-growth=10000',
]
if use_sse2:
@@ -130,30 +126,20 @@ if use_arm_neon_gcc:
SOURCES['pixman-arm-neon.c'].flags += CONFIG['NEON_FLAGS']
# Suppress warnings in third-party code.
-if CONFIG['_MSC_VER']:
- CFLAGS += [
- '-wd4047', # different levels of indirection
- '-wd4101', # unreferenced local variable
- '-wd4133', # 'function' : incompatible types
- '-wd4146', # unary minus operator applied to unsigned type
- '-wd4311', # 'variable' : pointer truncation from 'type' to 'type'
- ]
-
if CONFIG['GNU_CC'] or CONFIG['CLANG_CL']:
CFLAGS += [
'-Wno-address',
'-Wno-missing-field-initializers',
'-Wno-sign-compare',
+ '-Wno-incompatible-pointer-types',
'-Wno-unused', # too many unused warnings; ignore
]
-
if CONFIG['CLANG_CXX'] or CONFIG['CLANG_CL']:
CFLAGS += [
'-Wno-incompatible-pointer-types',
'-Wno-tautological-compare',
'-Wno-tautological-constant-out-of-range-compare',
]
-
if CONFIG['CLANG_CL']:
CFLAGS += [
'-Wno-unused-variable',
diff --git a/gfx/cairo/libpixman/src/pixman-access.c b/gfx/cairo/libpixman/src/pixman-access.c
index 00a02140ae..7c5ce783f9 100644
--- a/gfx/cairo/libpixman/src/pixman-access.c
+++ b/gfx/cairo/libpixman/src/pixman-access.c
@@ -68,14 +68,14 @@
#ifdef WORDS_BIGENDIAN
#define FETCH_24(img,l,o) \
- ((READ (img, (((uint8_t *)(l)) + ((o) * 3) + 0)) << 16) | \
- (READ (img, (((uint8_t *)(l)) + ((o) * 3) + 1)) << 8) | \
- (READ (img, (((uint8_t *)(l)) + ((o) * 3) + 2)) << 0))
+ ((uint32_t)(READ (img, (((uint8_t *)(l)) + ((o) * 3) + 0)) << 16) | \
+ (uint32_t)(READ (img, (((uint8_t *)(l)) + ((o) * 3) + 1)) << 8) | \
+ (uint32_t)(READ (img, (((uint8_t *)(l)) + ((o) * 3) + 2)) << 0))
#else
#define FETCH_24(img,l,o) \
- ((READ (img, (((uint8_t *)(l)) + ((o) * 3) + 0)) << 0) | \
- (READ (img, (((uint8_t *)(l)) + ((o) * 3) + 1)) << 8) | \
- (READ (img, (((uint8_t *)(l)) + ((o) * 3) + 2)) << 16))
+ ((uint32_t)(READ (img, (((uint8_t *)(l)) + ((o) * 3) + 0)) << 0) | \
+ (uint32_t)(READ (img, (((uint8_t *)(l)) + ((o) * 3) + 1)) << 8) | \
+ (uint32_t)(READ (img, (((uint8_t *)(l)) + ((o) * 3) + 2)) << 16))
#endif
/* Store macros */
@@ -87,7 +87,7 @@
uint32_t *__d = ((uint32_t *)(l)) + ((o) >> 5); \
uint32_t __m, __v; \
\
- __m = 1 << (0x1f - ((o) & 0x1f)); \
+ __m = 1U << (0x1f - ((o) & 0x1f)); \
__v = (v)? __m : 0; \
\
WRITE((img), __d, (READ((img), __d) & ~__m) | __v); \
@@ -100,7 +100,7 @@
uint32_t *__d = ((uint32_t *)(l)) + ((o) >> 5); \
uint32_t __m, __v; \
\
- __m = 1 << ((o) & 0x1f); \
+ __m = 1U << ((o) & 0x1f); \
__v = (v)? __m : 0; \
\
WRITE((img), __d, (READ((img), __d) & ~__m) | __v); \
@@ -294,14 +294,14 @@ convert_pixel (pixman_format_code_t from, pixman_format_code_t to, uint32_t pixe
}
static force_inline uint32_t
-convert_pixel_to_a8r8g8b8 (pixman_image_t *image,
+convert_pixel_to_a8r8g8b8 (bits_image_t *image,
pixman_format_code_t format,
uint32_t pixel)
{
if (PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_GRAY ||
PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_COLOR)
{
- return image->bits.indexed->rgba[pixel];
+ return image->indexed->rgba[pixel];
}
else
{
@@ -332,7 +332,7 @@ convert_pixel_from_a8r8g8b8 (pixman_image_t *image,
}
static force_inline uint32_t
-fetch_and_convert_pixel (pixman_image_t * image,
+fetch_and_convert_pixel (bits_image_t * image,
const uint8_t * bits,
int offset,
pixman_format_code_t format)
@@ -417,7 +417,7 @@ convert_and_store_pixel (bits_image_t * image,
#define MAKE_ACCESSORS(format) \
static void \
- fetch_scanline_ ## format (pixman_image_t *image, \
+ fetch_scanline_ ## format (bits_image_t *image, \
int x, \
int y, \
int width, \
@@ -425,7 +425,7 @@ convert_and_store_pixel (bits_image_t * image,
const uint32_t *mask) \
{ \
uint8_t *bits = \
- (uint8_t *)(image->bits.bits + y * image->bits.rowstride); \
+ (uint8_t *)(image->bits + y * image->rowstride); \
int i; \
\
for (i = 0; i < width; ++i) \
@@ -461,8 +461,8 @@ convert_and_store_pixel (bits_image_t * image,
uint8_t *bits = \
(uint8_t *)(image->bits + line * image->rowstride); \
\
- return fetch_and_convert_pixel ((pixman_image_t *)image, \
- bits, offset, PIXMAN_ ## format); \
+ return fetch_and_convert_pixel ( \
+ image, bits, offset, PIXMAN_ ## format); \
} \
\
static const void *const __dummy__ ## format
@@ -583,14 +583,14 @@ to_srgb (float f)
}
static void
-fetch_scanline_a8r8g8b8_sRGB_float (pixman_image_t *image,
+fetch_scanline_a8r8g8b8_sRGB_float (bits_image_t * image,
int x,
int y,
int width,
uint32_t * b,
const uint32_t *mask)
{
- const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
+ const uint32_t *bits = image->bits + y * image->rowstride;
const uint32_t *pixel = bits + x;
const uint32_t *end = pixel + width;
argb_t *buffer = (argb_t *)b;
@@ -612,14 +612,14 @@ fetch_scanline_a8r8g8b8_sRGB_float (pixman_image_t *image,
/* Expects a float buffer */
static void
-fetch_scanline_a2r10g10b10_float (pixman_image_t *image,
+fetch_scanline_a2r10g10b10_float (bits_image_t * image,
int x,
int y,
int width,
uint32_t * b,
const uint32_t *mask)
{
- const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
+ const uint32_t *bits = image->bits + y * image->rowstride;
const uint32_t *pixel = bits + x;
const uint32_t *end = pixel + width;
argb_t *buffer = (argb_t *)b;
@@ -642,15 +642,57 @@ fetch_scanline_a2r10g10b10_float (pixman_image_t *image,
}
/* Expects a float buffer */
+#ifndef PIXMAN_FB_ACCESSORS
+static void
+fetch_scanline_rgbf_float (bits_image_t *image,
+ int x,
+ int y,
+ int width,
+ uint32_t * b,
+ const uint32_t *mask)
+{
+ const float *bits = (float *)image->bits + y * image->rowstride;
+ const float *pixel = bits + x * 3;
+ argb_t *buffer = (argb_t *)b;
+
+ for (; width--; buffer++) {
+ buffer->r = *pixel++;
+ buffer->g = *pixel++;
+ buffer->b = *pixel++;
+ buffer->a = 1.f;
+ }
+}
+
+static void
+fetch_scanline_rgbaf_float (bits_image_t *image,
+ int x,
+ int y,
+ int width,
+ uint32_t * b,
+ const uint32_t *mask)
+{
+ const float *bits = (float *)image->bits + y * image->rowstride;
+ const float *pixel = bits + x * 4;
+ argb_t *buffer = (argb_t *)b;
+
+ for (; width--; buffer++) {
+ buffer->r = *pixel++;
+ buffer->g = *pixel++;
+ buffer->b = *pixel++;
+ buffer->a = *pixel++;
+ }
+}
+#endif
+
static void
-fetch_scanline_x2r10g10b10_float (pixman_image_t *image,
+fetch_scanline_x2r10g10b10_float (bits_image_t *image,
int x,
int y,
int width,
uint32_t * b,
const uint32_t *mask)
{
- const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
+ const uint32_t *bits = image->bits + y * image->rowstride;
const uint32_t *pixel = (uint32_t *)bits + x;
const uint32_t *end = pixel + width;
argb_t *buffer = (argb_t *)b;
@@ -673,14 +715,14 @@ fetch_scanline_x2r10g10b10_float (pixman_image_t *image,
/* Expects a float buffer */
static void
-fetch_scanline_a2b10g10r10_float (pixman_image_t *image,
+fetch_scanline_a2b10g10r10_float (bits_image_t *image,
int x,
int y,
int width,
uint32_t * b,
const uint32_t *mask)
{
- const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
+ const uint32_t *bits = image->bits + y * image->rowstride;
const uint32_t *pixel = bits + x;
const uint32_t *end = pixel + width;
argb_t *buffer = (argb_t *)b;
@@ -704,14 +746,14 @@ fetch_scanline_a2b10g10r10_float (pixman_image_t *image,
/* Expects a float buffer */
static void
-fetch_scanline_x2b10g10r10_float (pixman_image_t *image,
+fetch_scanline_x2b10g10r10_float (bits_image_t *image,
int x,
int y,
int width,
uint32_t * b,
const uint32_t *mask)
{
- const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
+ const uint32_t *bits = image->bits + y * image->rowstride;
const uint32_t *pixel = (uint32_t *)bits + x;
const uint32_t *end = pixel + width;
argb_t *buffer = (argb_t *)b;
@@ -733,14 +775,14 @@ fetch_scanline_x2b10g10r10_float (pixman_image_t *image,
}
static void
-fetch_scanline_yuy2 (pixman_image_t *image,
+fetch_scanline_yuy2 (bits_image_t *image,
int x,
int line,
int width,
uint32_t * buffer,
const uint32_t *mask)
{
- const uint32_t *bits = image->bits.bits + image->bits.rowstride * line;
+ const uint32_t *bits = image->bits + image->rowstride * line;
int i;
for (i = 0; i < width; i++)
@@ -767,7 +809,7 @@ fetch_scanline_yuy2 (pixman_image_t *image,
}
static void
-fetch_scanline_yv12 (pixman_image_t *image,
+fetch_scanline_yv12 (bits_image_t *image,
int x,
int line,
int width,
@@ -805,6 +847,40 @@ fetch_scanline_yv12 (pixman_image_t *image,
/**************************** Pixel wise fetching *****************************/
+#ifndef PIXMAN_FB_ACCESSORS
+static argb_t
+fetch_pixel_rgbf_float (bits_image_t *image,
+ int offset,
+ int line)
+{
+ float *bits = (float *)image->bits + line * image->rowstride;
+ argb_t argb;
+
+ argb.r = bits[offset * 3];
+ argb.g = bits[offset * 3 + 1];
+ argb.b = bits[offset * 3 + 2];
+ argb.a = 1.f;
+
+ return argb;
+}
+
+static argb_t
+fetch_pixel_rgbaf_float (bits_image_t *image,
+ int offset,
+ int line)
+{
+ float *bits = (float *)image->bits + line * image->rowstride;
+ argb_t argb;
+
+ argb.r = bits[offset * 4];
+ argb.g = bits[offset * 4 + 1];
+ argb.b = bits[offset * 4 + 2];
+ argb.a = bits[offset * 4 + 3];
+
+ return argb;
+}
+#endif
+
static argb_t
fetch_pixel_x2r10g10b10_float (bits_image_t *image,
int offset,
@@ -962,6 +1038,45 @@ fetch_pixel_yv12 (bits_image_t *image,
/*********************************** Store ************************************/
+#ifndef PIXMAN_FB_ACCESSORS
+static void
+store_scanline_rgbaf_float (bits_image_t * image,
+ int x,
+ int y,
+ int width,
+ const uint32_t *v)
+{
+ float *bits = (float *)image->bits + image->rowstride * y + 4 * x;
+ const argb_t *values = (argb_t *)v;
+
+ for (; width; width--, values++)
+ {
+ *bits++ = values->r;
+ *bits++ = values->g;
+ *bits++ = values->b;
+ *bits++ = values->a;
+ }
+}
+
+static void
+store_scanline_rgbf_float (bits_image_t * image,
+ int x,
+ int y,
+ int width,
+ const uint32_t *v)
+{
+ float *bits = (float *)image->bits + image->rowstride * y + 3 * x;
+ const argb_t *values = (argb_t *)v;
+
+ for (; width; width--, values++)
+ {
+ *bits++ = values->r;
+ *bits++ = values->g;
+ *bits++ = values->b;
+ }
+}
+#endif
+
static void
store_scanline_a2r10g10b10_float (bits_image_t * image,
int x,
@@ -976,7 +1091,7 @@ store_scanline_a2r10g10b10_float (bits_image_t * image,
for (i = 0; i < width; ++i)
{
- uint16_t a, r, g, b;
+ uint32_t a, r, g, b;
a = pixman_float_to_unorm (values[i].a, 2);
r = pixman_float_to_unorm (values[i].r, 10);
@@ -1002,7 +1117,7 @@ store_scanline_x2r10g10b10_float (bits_image_t * image,
for (i = 0; i < width; ++i)
{
- uint16_t r, g, b;
+ uint32_t r, g, b;
r = pixman_float_to_unorm (values[i].r, 10);
g = pixman_float_to_unorm (values[i].g, 10);
@@ -1027,7 +1142,7 @@ store_scanline_a2b10g10r10_float (bits_image_t * image,
for (i = 0; i < width; ++i)
{
- uint16_t a, r, g, b;
+ uint32_t a, r, g, b;
a = pixman_float_to_unorm (values[i].a, 2);
r = pixman_float_to_unorm (values[i].r, 10);
@@ -1053,7 +1168,7 @@ store_scanline_x2b10g10r10_float (bits_image_t * image,
for (i = 0; i < width; ++i)
{
- uint16_t r, g, b;
+ uint32_t r, g, b;
r = pixman_float_to_unorm (values[i].r, 10);
g = pixman_float_to_unorm (values[i].g, 10);
@@ -1078,7 +1193,7 @@ store_scanline_a8r8g8b8_sRGB_float (bits_image_t * image,
for (i = 0; i < width; ++i)
{
- uint8_t a, r, g, b;
+ uint32_t a, r, g, b;
a = pixman_float_to_unorm (values[i].a, 8);
r = to_srgb (values[i].r);
@@ -1090,44 +1205,6 @@ store_scanline_a8r8g8b8_sRGB_float (bits_image_t * image,
}
}
-static void
-store_scanline_16 (bits_image_t * image,
- int x,
- int y,
- int width,
- const uint32_t *v)
-{
- uint16_t *bits = (uint16_t*)(image->bits + image->rowstride * y);
- uint16_t *values = (uint16_t *)v;
- uint16_t *pixel = bits + x;
- int i;
-
- for (i = 0; i < width; ++i)
- {
- WRITE (image, pixel++, values[i]);
- }
-}
-
-static void
-fetch_scanline_16 (pixman_image_t *image,
- int x,
- int y,
- int width,
- uint32_t * b,
- const uint32_t *mask)
-{
- const uint16_t *bits = (uint16_t*)(image->bits.bits + y * image->bits.rowstride);
- const uint16_t *pixel = bits + x;
- int i;
- uint16_t *buffer = (uint16_t *)b;
-
- for (i = 0; i < width; ++i)
- {
- *buffer++ = READ (image, pixel++);
- }
-}
-
-
/*
* Contracts a floating point image to 32bpp and then stores it using a
* regular 32-bit store proc. Despite the type, this function expects an
@@ -1159,37 +1236,37 @@ store_scanline_generic_float (bits_image_t * image,
}
static void
-fetch_scanline_generic_float (pixman_image_t *image,
+fetch_scanline_generic_float (bits_image_t * image,
int x,
int y,
int width,
uint32_t * buffer,
const uint32_t *mask)
{
- image->bits.fetch_scanline_32 (image, x, y, width, buffer, NULL);
+ image->fetch_scanline_32 (image, x, y, width, buffer, NULL);
- pixman_expand_to_float ((argb_t *)buffer, buffer, image->bits.format, width);
+ pixman_expand_to_float ((argb_t *)buffer, buffer, image->format, width);
}
/* The 32_sRGB paths should be deleted after narrow processing
* is no longer invoked for formats that are considered wide.
* (Also see fetch_pixel_generic_lossy_32) */
static void
-fetch_scanline_a8r8g8b8_32_sRGB (pixman_image_t *image,
+fetch_scanline_a8r8g8b8_32_sRGB (bits_image_t *image,
int x,
int y,
int width,
uint32_t *buffer,
const uint32_t *mask)
{
- const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
+ const uint32_t *bits = image->bits + y * image->rowstride;
const uint32_t *pixel = (uint32_t *)bits + x;
const uint32_t *end = pixel + width;
uint32_t tmp;
while (pixel < end)
{
- uint8_t a, r, g, b;
+ uint32_t a, r, g, b;
tmp = READ (image, pixel++);
@@ -1213,7 +1290,7 @@ fetch_pixel_a8r8g8b8_32_sRGB (bits_image_t *image,
{
uint32_t *bits = image->bits + line * image->rowstride;
uint32_t tmp = READ (image, bits + offset);
- uint8_t a, r, g, b;
+ uint32_t a, r, g, b;
a = (tmp >> 24) & 0xff;
r = (tmp >> 16) & 0xff;
@@ -1242,7 +1319,7 @@ store_scanline_a8r8g8b8_32_sRGB (bits_image_t *image,
for (i = 0; i < width; ++i)
{
- uint8_t a, r, g, b;
+ uint32_t a, r, g, b;
tmp = values[i];
@@ -1294,12 +1371,10 @@ fetch_pixel_generic_lossy_32 (bits_image_t *image,
typedef struct
{
pixman_format_code_t format;
- fetch_scanline_t fetch_scanline_16;
fetch_scanline_t fetch_scanline_32;
fetch_scanline_t fetch_scanline_float;
fetch_pixel_32_t fetch_pixel_32;
fetch_pixel_float_t fetch_pixel_float;
- store_scanline_t store_scanline_16;
store_scanline_t store_scanline_32;
store_scanline_t store_scanline_float;
} format_info_t;
@@ -1307,29 +1382,14 @@ typedef struct
#define FORMAT_INFO(format) \
{ \
PIXMAN_ ## format, \
- NULL, \
fetch_scanline_ ## format, \
fetch_scanline_generic_float, \
fetch_pixel_ ## format, \
fetch_pixel_generic_float, \
- NULL, \
- store_scanline_ ## format, \
- store_scanline_generic_float \
- }
-#define FORMAT_INFO16(format) \
- { \
- PIXMAN_ ## format, \
- fetch_scanline_16, \
- fetch_scanline_ ## format, \
- fetch_scanline_generic_float, \
- fetch_pixel_ ## format, \
- fetch_pixel_generic_float, \
- store_scanline_16, \
store_scanline_ ## format, \
store_scanline_generic_float \
}
-
static const format_info_t accessors[] =
{
/* 32 bpp formats */
@@ -1345,10 +1405,8 @@ static const format_info_t accessors[] =
/* sRGB formats */
{ PIXMAN_a8r8g8b8_sRGB,
- NULL,
fetch_scanline_a8r8g8b8_32_sRGB, fetch_scanline_a8r8g8b8_sRGB_float,
fetch_pixel_a8r8g8b8_32_sRGB, fetch_pixel_a8r8g8b8_sRGB_float,
- NULL,
store_scanline_a8r8g8b8_32_sRGB, store_scanline_a8r8g8b8_sRGB_float,
},
@@ -1357,8 +1415,8 @@ static const format_info_t accessors[] =
FORMAT_INFO (b8g8r8),
/* 16bpp formats */
- FORMAT_INFO16 (r5g6b5),
- FORMAT_INFO16 (b5g6r5),
+ FORMAT_INFO (r5g6b5),
+ FORMAT_INFO (b5g6r5),
FORMAT_INFO (a1r5g5b5),
FORMAT_INFO (x1r5g5b5),
@@ -1408,37 +1466,48 @@ static const format_info_t accessors[] =
FORMAT_INFO (g1),
/* Wide formats */
-
+#ifndef PIXMAN_FB_ACCESSORS
+ { PIXMAN_rgba_float,
+ NULL, fetch_scanline_rgbaf_float,
+ fetch_pixel_generic_lossy_32, fetch_pixel_rgbaf_float,
+ NULL, store_scanline_rgbaf_float },
+
+ { PIXMAN_rgb_float,
+ NULL, fetch_scanline_rgbf_float,
+ fetch_pixel_generic_lossy_32, fetch_pixel_rgbf_float,
+ NULL, store_scanline_rgbf_float },
+#endif
+
{ PIXMAN_a2r10g10b10,
- NULL, NULL, fetch_scanline_a2r10g10b10_float,
+ NULL, fetch_scanline_a2r10g10b10_float,
fetch_pixel_generic_lossy_32, fetch_pixel_a2r10g10b10_float,
- NULL, NULL, store_scanline_a2r10g10b10_float },
+ NULL, store_scanline_a2r10g10b10_float },
{ PIXMAN_x2r10g10b10,
- NULL, NULL, fetch_scanline_x2r10g10b10_float,
+ NULL, fetch_scanline_x2r10g10b10_float,
fetch_pixel_generic_lossy_32, fetch_pixel_x2r10g10b10_float,
- NULL, NULL, store_scanline_x2r10g10b10_float },
+ NULL, store_scanline_x2r10g10b10_float },
{ PIXMAN_a2b10g10r10,
- NULL, NULL, fetch_scanline_a2b10g10r10_float,
+ NULL, fetch_scanline_a2b10g10r10_float,
fetch_pixel_generic_lossy_32, fetch_pixel_a2b10g10r10_float,
- NULL, NULL, store_scanline_a2b10g10r10_float },
+ NULL, store_scanline_a2b10g10r10_float },
{ PIXMAN_x2b10g10r10,
- NULL, NULL, fetch_scanline_x2b10g10r10_float,
+ NULL, fetch_scanline_x2b10g10r10_float,
fetch_pixel_generic_lossy_32, fetch_pixel_x2b10g10r10_float,
- NULL, NULL, store_scanline_x2b10g10r10_float },
+ NULL, store_scanline_x2b10g10r10_float },
/* YUV formats */
{ PIXMAN_yuy2,
- NULL, fetch_scanline_yuy2, fetch_scanline_generic_float,
+ fetch_scanline_yuy2, fetch_scanline_generic_float,
fetch_pixel_yuy2, fetch_pixel_generic_float,
- NULL, NULL, NULL },
+ NULL, NULL },
{ PIXMAN_yv12,
- NULL, fetch_scanline_yv12, fetch_scanline_generic_float,
+ fetch_scanline_yv12, fetch_scanline_generic_float,
fetch_pixel_yv12, fetch_pixel_generic_float,
- NULL, NULL, NULL },
+ NULL, NULL },
{ PIXMAN_null },
};
@@ -1452,12 +1521,10 @@ setup_accessors (bits_image_t *image)
{
if (info->format == image->format)
{
- image->fetch_scanline_16 = info->fetch_scanline_16;
image->fetch_scanline_32 = info->fetch_scanline_32;
image->fetch_scanline_float = info->fetch_scanline_float;
image->fetch_pixel_32 = info->fetch_pixel_32;
image->fetch_pixel_float = info->fetch_pixel_float;
- image->store_scanline_16 = info->store_scanline_16;
image->store_scanline_32 = info->store_scanline_32;
image->store_scanline_float = info->store_scanline_float;
diff --git a/gfx/cairo/libpixman/src/pixman-arm-asm.h b/gfx/cairo/libpixman/src/pixman-arm-asm.h
new file mode 100644
index 0000000000..ee78541087
--- /dev/null
+++ b/gfx/cairo/libpixman/src/pixman-arm-asm.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright © 2008 Mozilla Corporation
+ * Copyright © 2010 Nokia Corporation
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of Mozilla Corporation not be used in
+ * advertising or publicity pertaining to distribution of the software without
+ * specific, written prior permission. Mozilla Corporation makes no
+ * representations about the suitability of this software for any purpose. It
+ * is provided "as is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+ * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
+ * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ *
+ * Author: Jeff Muizelaar (jeff@infidigm.net)
+ *
+ */
+
+/* Supplementary macro for setting function attributes */
+.macro pixman_asm_function fname
+ .func fname
+ .global fname
+#ifdef __ELF__
+ .hidden fname
+ .type fname, %function
+#endif
+fname:
+.endm
diff --git a/gfx/cairo/libpixman/src/pixman-arm-common.h b/gfx/cairo/libpixman/src/pixman-arm-common.h
index b598502bba..9537688306 100644
--- a/gfx/cairo/libpixman/src/pixman-arm-common.h
+++ b/gfx/cairo/libpixman/src/pixman-arm-common.h
@@ -266,13 +266,6 @@ FAST_NEAREST_MAINLOOP (cputype##_##name##_normal_##op, \
scaled_nearest_scanline_##cputype##_##name##_##op, \
src_type, dst_type, NORMAL)
-/* Provide entries for the fast path table */
-#define PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH(op,s,d,func) \
- SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func), \
- SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func), \
- SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func), \
- SIMPLE_NEAREST_FAST_PATH_NORMAL (op,s,d,func)
-
#define PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_A8_DST(flags, cputype, name, op, \
src_type, dst_type) \
void \
@@ -318,9 +311,7 @@ FAST_NEAREST_MAINLOOP_COMMON (cputype##_##name##_normal_##op, \
/* Provide entries for the fast path table */
#define PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH(op,s,d,func) \
- SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER (op,s,d,func), \
- SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE (op,s,d,func), \
- SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD (op,s,d,func), \
+ SIMPLE_NEAREST_A8_MASK_FAST_PATH (op,s,d,func), \
SIMPLE_NEAREST_A8_MASK_FAST_PATH_NORMAL (op,s,d,func)
/*****************************************************************************/
@@ -360,16 +351,16 @@ scaled_bilinear_scanline_##cputype##_##name##_##op ( \
\
FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_cover_##op, \
scaled_bilinear_scanline_##cputype##_##name##_##op, \
- NULL, src_type, uint32_t, dst_type, COVER, FLAG_NONE) \
+ src_type, uint32_t, dst_type, COVER, FLAG_NONE) \
FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_none_##op, \
scaled_bilinear_scanline_##cputype##_##name##_##op, \
- NULL, src_type, uint32_t, dst_type, NONE, FLAG_NONE) \
+ src_type, uint32_t, dst_type, NONE, FLAG_NONE) \
FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_pad_##op, \
scaled_bilinear_scanline_##cputype##_##name##_##op, \
- NULL, src_type, uint32_t, dst_type, PAD, FLAG_NONE) \
+ src_type, uint32_t, dst_type, PAD, FLAG_NONE) \
FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_normal_##op, \
scaled_bilinear_scanline_##cputype##_##name##_##op, \
- NULL, src_type, uint32_t, dst_type, NORMAL, \
+ src_type, uint32_t, dst_type, NORMAL, \
FLAG_NONE)
@@ -409,19 +400,19 @@ scaled_bilinear_scanline_##cputype##_##name##_##op ( \
\
FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_cover_##op, \
scaled_bilinear_scanline_##cputype##_##name##_##op, \
- NULL, src_type, uint8_t, dst_type, COVER, \
+ src_type, uint8_t, dst_type, COVER, \
FLAG_HAVE_NON_SOLID_MASK) \
FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_none_##op, \
scaled_bilinear_scanline_##cputype##_##name##_##op, \
- NULL, src_type, uint8_t, dst_type, NONE, \
+ src_type, uint8_t, dst_type, NONE, \
FLAG_HAVE_NON_SOLID_MASK) \
FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_pad_##op, \
scaled_bilinear_scanline_##cputype##_##name##_##op, \
- NULL, src_type, uint8_t, dst_type, PAD, \
+ src_type, uint8_t, dst_type, PAD, \
FLAG_HAVE_NON_SOLID_MASK) \
FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_normal_##op, \
scaled_bilinear_scanline_##cputype##_##name##_##op, \
- NULL, src_type, uint8_t, dst_type, NORMAL, \
+ src_type, uint8_t, dst_type, NORMAL, \
FLAG_HAVE_NON_SOLID_MASK)
diff --git a/gfx/cairo/libpixman/src/pixman-arm-detect-win32.asm b/gfx/cairo/libpixman/src/pixman-arm-detect-win32.asm
deleted file mode 100644
index 8f5d5eb2a9..0000000000
--- a/gfx/cairo/libpixman/src/pixman-arm-detect-win32.asm
+++ /dev/null
@@ -1,21 +0,0 @@
- area pixman_msvc, code, readonly
-
- export pixman_msvc_try_arm_simd_op
-
-pixman_msvc_try_arm_simd_op
- ;; I don't think the msvc arm asm knows how to do SIMD insns
- ;; uqadd8 r3,r3,r3
- dcd 0xe6633f93
- mov pc,lr
- endp
-
- export pixman_msvc_try_arm_neon_op
-
-pixman_msvc_try_arm_neon_op
- ;; I don't think the msvc arm asm knows how to do NEON insns
- ;; veor d0,d0,d0
- dcd 0xf3000110
- mov pc,lr
- endp
-
- end
diff --git a/gfx/cairo/libpixman/src/pixman-arm-neon-asm-bilinear.S b/gfx/cairo/libpixman/src/pixman-arm-neon-asm-bilinear.S
index e37b5c298e..0fd92d61c5 100644
--- a/gfx/cairo/libpixman/src/pixman-arm-neon-asm-bilinear.S
+++ b/gfx/cairo/libpixman/src/pixman-arm-neon-asm-bilinear.S
@@ -65,23 +65,13 @@
.p2align 2
#include "pixman-private.h"
+#include "pixman-arm-asm.h"
#include "pixman-arm-neon-asm.h"
/*
* Bilinear macros from pixman-arm-neon-asm.S
*/
-/* Supplementary macro for setting function attributes */
-.macro pixman_asm_function fname
- .func fname
- .global fname
-#ifdef __ELF__
- .hidden fname
- .type fname, %function
-#endif
-fname:
-.endm
-
/*
* Bilinear scaling support code which tries to provide pixel fetching, color
* format conversion, and interpolation as separate macros which can be used
diff --git a/gfx/cairo/libpixman/src/pixman-arm-neon-asm.S b/gfx/cairo/libpixman/src/pixman-arm-neon-asm.S
index d0e943d712..7e949a38fd 100644
--- a/gfx/cairo/libpixman/src/pixman-arm-neon-asm.S
+++ b/gfx/cairo/libpixman/src/pixman-arm-neon-asm.S
@@ -50,6 +50,7 @@
.p2align 2
#include "pixman-private.h"
+#include "pixman-arm-asm.h"
#include "pixman-arm-neon-asm.h"
/* Global configuration options and preferences */
@@ -954,7 +955,6 @@ generate_composite_function \
*/
.macro pixman_composite_over_n_8_0565_init
add DUMMY, sp, #ARGS_STACK_OFFSET
- .vsave {d8-d15}
vpush {d8-d15}
vld1.32 {d11[0]}, [DUMMY]
vdup.8 d8, d11[0]
@@ -982,7 +982,6 @@ generate_composite_function \
.macro pixman_composite_over_8888_n_0565_init
add DUMMY, sp, #(ARGS_STACK_OFFSET + 8)
- .vsave {d8-d15}
vpush {d8-d15}
vld1.32 {d24[0]}, [DUMMY]
vdup.8 d24, d24[3]
@@ -1449,7 +1448,6 @@ generate_composite_function \
.macro pixman_composite_over_n_8_8888_init
add DUMMY, sp, #ARGS_STACK_OFFSET
- .vsave {d8-d15}
vpush {d8-d15}
vld1.32 {d11[0]}, [DUMMY]
vdup.8 d8, d11[0]
@@ -1521,7 +1519,6 @@ generate_composite_function \
.macro pixman_composite_over_n_8_8_init
add DUMMY, sp, #ARGS_STACK_OFFSET
- .vsave {d8-d15}
vpush {d8-d15}
vld1.32 {d8[0]}, [DUMMY]
vdup.8 d8, d8[3]
@@ -1623,7 +1620,6 @@ generate_composite_function \
.macro pixman_composite_over_n_8888_8888_ca_init
add DUMMY, sp, #ARGS_STACK_OFFSET
- .vsave {d8-d15}
vpush {d8-d15}
vld1.32 {d11[0]}, [DUMMY]
vdup.8 d8, d11[0]
@@ -1793,7 +1789,6 @@ generate_composite_function \
.macro pixman_composite_over_n_8888_0565_ca_init
add DUMMY, sp, #ARGS_STACK_OFFSET
- .vsave {d8-d15}
vpush {d8-d15}
vld1.32 {d11[0]}, [DUMMY]
vdup.8 d8, d11[0]
@@ -1907,7 +1902,6 @@ generate_composite_function \
.macro pixman_composite_add_n_8_8_init
add DUMMY, sp, #ARGS_STACK_OFFSET
- .vsave {d8-d15}
vpush {d8-d15}
vld1.32 {d11[0]}, [DUMMY]
vdup.8 d11, d11[3]
@@ -2214,7 +2208,6 @@ generate_composite_function_single_scanline \
.macro pixman_composite_over_8888_n_8888_init
add DUMMY, sp, #48
- .vsave {d8-d15}
vpush {d8-d15}
vld1.32 {d15[0]}, [DUMMY]
vdup.8 d15, d15[3]
@@ -2587,7 +2580,6 @@ generate_composite_function \
.macro pixman_composite_over_0565_n_0565_init
add DUMMY, sp, #(ARGS_STACK_OFFSET + 8)
- .vsave {d8-d15}
vpush {d8-d15}
vld1.32 {d15[0]}, [DUMMY]
vdup.8 d15, d15[3]
@@ -2839,17 +2831,6 @@ generate_composite_function_nearest_scanline \
/******************************************************************************/
-/* Supplementary macro for setting function attributes */
-.macro pixman_asm_function fname
- .func fname
- .global fname
-#ifdef __ELF__
- .hidden fname
- .type fname, %function
-#endif
-fname:
-.endm
-
/*
* Bilinear scaling support code which tries to provide pixel fetching, color
* format conversion, and interpolation as separate macros which can be used
@@ -3141,16 +3122,13 @@ pixman_asm_function fname
TMP4 .req r9
STRIDE .req r2
- .fnstart
mov ip, sp
- .save {r4, r5, r6, r7, r8, r9}
push {r4, r5, r6, r7, r8, r9}
mov PF_OFFS, #prefetch_distance
ldmia ip, {WB, X, UX, WIDTH}
mul PF_OFFS, PF_OFFS, UX
.if ((flags) & BILINEAR_FLAG_USE_ALL_NEON_REGS) != 0
- .vsave {d8-d15}
vpush {d8-d15}
.endif
@@ -3244,7 +3222,6 @@ pixman_asm_function fname
.endif
pop {r4, r5, r6, r7, r8, r9}
bx lr
- .fnend
.unreq OUT
.unreq TOP
diff --git a/gfx/cairo/libpixman/src/pixman-arm-neon-asm.h b/gfx/cairo/libpixman/src/pixman-arm-neon-asm.h
index f50ade3ef1..bdcf6a9d47 100644
--- a/gfx/cairo/libpixman/src/pixman-arm-neon-asm.h
+++ b/gfx/cairo/libpixman/src/pixman-arm-neon-asm.h
@@ -385,7 +385,7 @@
* execute simultaneously with NEON and be completely shadowed by it. Thus
* we get no performance overhead at all (*). This looks like a very nice
* feature of Cortex-A8, if used wisely. We don't have a hardware prefetcher,
- * but still can implement some rather advanced prefetch logic in sofware
+ * but still can implement some rather advanced prefetch logic in software
* for almost zero cost!
*
* (*) The overhead of the prefetcher is visible when running some trivial
@@ -631,16 +631,8 @@ local skip1
src_basereg_ = 0, \
mask_basereg_ = 24
- .func fname
- .global fname
- /* For ELF format also set function visibility to hidden */
-#ifdef __ELF__
- .hidden fname
- .type fname, %function
-#endif
-fname:
- .fnstart
- .save {r4-r12, lr}
+ pixman_asm_function fname
+
push {r4-r12, lr} /* save all registers */
/*
@@ -818,7 +810,6 @@ fname:
init
.if regs_shortage
- .save {r0, r1}
push {r0, r1}
.endif
subs H, H, #1
@@ -904,7 +895,6 @@ fname:
.endif
cleanup
pop {r4-r12, pc} /* exit */
- .fnend
.purgem fetch_src_pixblock
.purgem pixld_src
@@ -949,15 +939,8 @@ fname:
src_basereg_ = 0, \
mask_basereg_ = 24
- .func fname
- .global fname
- /* For ELF format also set function visibility to hidden */
-#ifdef __ELF__
- .hidden fname
- .type fname, %function
-#endif
-fname:
- .fnstart
+ pixman_asm_function fname
+
.set PREFETCH_TYPE_CURRENT, PREFETCH_TYPE_NONE
/*
* Make some macro arguments globally visible and accessible
@@ -992,7 +975,6 @@ fname:
.endm
ldr UNIT_X, [sp]
- .save {r4-r8, lr}
push {r4-r8, lr}
ldr SRC_WIDTH_FIXED, [sp, #(24 + 4)]
.if mask_bpp != 0
@@ -1108,7 +1090,6 @@ fname:
.purgem fetch_src_pixblock
.purgem pixld_src
- .fnend
.endfunc
.endm
@@ -1135,7 +1116,6 @@ fname:
*/
.macro default_init_need_all_regs
- .vsave {d8-d15}
vpush {d8-d15}
.endm
diff --git a/gfx/cairo/libpixman/src/pixman-arm-neon.c b/gfx/cairo/libpixman/src/pixman-arm-neon.c
index d902193cfa..be761c9652 100644
--- a/gfx/cairo/libpixman/src/pixman-arm-neon.c
+++ b/gfx/cairo/libpixman/src/pixman-arm-neon.c
@@ -145,23 +145,6 @@ PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 0565_0565, SRC,
uint16_t, uint16_t)
PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (SKIP_ZERO_SRC, neon, 8888_8888, OVER,
uint32_t, uint32_t)
-static force_inline void
-pixman_scaled_bilinear_scanline_8888_8888_SRC (
- uint32_t * dst,
- const uint32_t * mask,
- const uint32_t * src_top,
- const uint32_t * src_bottom,
- int32_t w,
- int wt,
- int wb,
- pixman_fixed_t vx,
- pixman_fixed_t unit_x,
- pixman_fixed_t max_vx,
- pixman_bool_t zero_src)
-{
- pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_neon (dst, src_top, src_bottom, wt, wb, vx, unit_x, w);
-}
-
PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (SKIP_ZERO_SRC, neon, 8888_8888, ADD,
uint32_t, uint32_t)
@@ -285,28 +268,6 @@ arm_neon_blt (pixman_implementation_t *imp,
}
}
-static inline void op_bilinear_over_8888_0565(uint16_t *dst, const uint32_t *mask, const uint32_t *src, int width)
-{
- pixman_composite_over_8888_0565_asm_neon (width, 1, dst, 0, src, 0);
-}
-
-FAST_BILINEAR_MAINLOOP_COMMON (neon_8888_0565_cover_OVER,
- pixman_scaled_bilinear_scanline_8888_8888_SRC, op_bilinear_over_8888_0565,
- uint32_t, uint32_t, uint16_t,
- COVER, FLAG_NONE)
-FAST_BILINEAR_MAINLOOP_COMMON (neon_8888_0565_pad_OVER,
- pixman_scaled_bilinear_scanline_8888_8888_SRC, op_bilinear_over_8888_0565,
- uint32_t, uint32_t, uint16_t,
- PAD, FLAG_NONE)
-FAST_BILINEAR_MAINLOOP_COMMON (neon_8888_0565_none_OVER,
- pixman_scaled_bilinear_scanline_8888_8888_SRC, op_bilinear_over_8888_0565,
- uint32_t, uint32_t, uint16_t,
- NONE, FLAG_NONE)
-FAST_BILINEAR_MAINLOOP_COMMON (neon_8888_0565_normal_OVER,
- pixman_scaled_bilinear_scanline_8888_8888_SRC, op_bilinear_over_8888_0565,
- uint32_t, uint32_t, uint16_t,
- NORMAL, FLAG_NONE)
-
static const pixman_fast_path_t arm_neon_fast_paths[] =
{
PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, neon_composite_src_0565_0565),
@@ -401,21 +362,21 @@ static const pixman_fast_path_t arm_neon_fast_paths[] =
PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8, null, a8r8g8b8, neon_composite_out_reverse_8_8888),
PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8, null, a8b8g8r8, neon_composite_out_reverse_8_8888),
- PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, neon_8888_8888),
- PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, neon_8888_8888),
- PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, neon_8888_8888),
- PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, neon_8888_8888),
+ SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, neon_8888_8888),
+ SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, neon_8888_8888),
+ SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, neon_8888_8888),
+ SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, neon_8888_8888),
- PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, r5g6b5, neon_8888_0565),
- PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, b5g6r5, neon_8888_0565),
+ SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, r5g6b5, neon_8888_0565),
+ SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, b5g6r5, neon_8888_0565),
- PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, r5g6b5, neon_8888_0565),
- PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, r5g6b5, neon_8888_0565),
- PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, b5g6r5, neon_8888_0565),
- PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, b5g6r5, neon_8888_0565),
+ SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, r5g6b5, neon_8888_0565),
+ SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, r5g6b5, neon_8888_0565),
+ SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, b5g6r5, neon_8888_0565),
+ SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, b5g6r5, neon_8888_0565),
- PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, b5g6r5, x8b8g8r8, neon_0565_8888),
- PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, x8r8g8b8, neon_0565_8888),
+ SIMPLE_NEAREST_FAST_PATH (SRC, b5g6r5, x8b8g8r8, neon_0565_8888),
+ SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, x8r8g8b8, neon_0565_8888),
/* Note: NONE repeat is not supported yet */
SIMPLE_NEAREST_FAST_PATH_COVER (SRC, r5g6b5, a8r8g8b8, neon_0565_8888),
SIMPLE_NEAREST_FAST_PATH_COVER (SRC, b5g6r5, a8b8g8r8, neon_0565_8888),
@@ -460,8 +421,6 @@ static const pixman_fast_path_t arm_neon_fast_paths[] =
SIMPLE_BILINEAR_A8_MASK_FAST_PATH (ADD, a8r8g8b8, a8r8g8b8, neon_8888_8_8888),
SIMPLE_BILINEAR_A8_MASK_FAST_PATH (ADD, a8r8g8b8, x8r8g8b8, neon_8888_8_8888),
- SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, r5g6b5, neon_8888_0565),
-
{ PIXMAN_OP_NONE },
};
diff --git a/gfx/cairo/libpixman/src/pixman-arm-simd-asm-scaled.S b/gfx/cairo/libpixman/src/pixman-arm-simd-asm-scaled.S
index 7110995488..e050292e05 100644
--- a/gfx/cairo/libpixman/src/pixman-arm-simd-asm-scaled.S
+++ b/gfx/cairo/libpixman/src/pixman-arm-simd-asm-scaled.S
@@ -37,16 +37,7 @@
.altmacro
.p2align 2
-/* Supplementary macro for setting function attributes */
-.macro pixman_asm_function fname
- .func fname
- .global fname
-#ifdef __ELF__
- .hidden fname
- .type fname, %function
-#endif
-fname:
-.endm
+#include "pixman-arm-asm.h"
/*
* Note: This code is only using armv5te instructions (not even armv6),
diff --git a/gfx/cairo/libpixman/src/pixman-arm-simd-asm.S b/gfx/cairo/libpixman/src/pixman-arm-simd-asm.S
index c209688790..a74a0a8f34 100644
--- a/gfx/cairo/libpixman/src/pixman-arm-simd-asm.S
+++ b/gfx/cairo/libpixman/src/pixman-arm-simd-asm.S
@@ -37,6 +37,7 @@
.altmacro
.p2align 2
+#include "pixman-arm-asm.h"
#include "pixman-arm-simd-asm.h"
/* A head macro should do all processing which results in an output of up to
@@ -303,6 +304,83 @@ generate_composite_function \
/******************************************************************************/
+.macro src_x888_0565_init
+ /* Hold loop invariant in MASK */
+ ldr MASK, =0x001F001F
+ line_saved_regs STRIDE_S, ORIG_W
+.endm
+
+.macro src_x888_0565_1pixel s, d
+ and WK&d, MASK, WK&s, lsr #3 @ 00000000000rrrrr00000000000bbbbb
+ and STRIDE_S, WK&s, #0xFC00 @ 0000000000000000gggggg0000000000
+ orr WK&d, WK&d, WK&d, lsr #5 @ 00000000000-----rrrrr000000bbbbb
+ orr WK&d, WK&d, STRIDE_S, lsr #5 @ 00000000000-----rrrrrggggggbbbbb
+ /* Top 16 bits are discarded during the following STRH */
+.endm
+
+.macro src_x888_0565_2pixels slo, shi, d, tmp
+ and SCRATCH, WK&shi, #0xFC00 @ 0000000000000000GGGGGG0000000000
+ and WK&tmp, MASK, WK&shi, lsr #3 @ 00000000000RRRRR00000000000BBBBB
+ and WK&shi, MASK, WK&slo, lsr #3 @ 00000000000rrrrr00000000000bbbbb
+ orr WK&tmp, WK&tmp, WK&tmp, lsr #5 @ 00000000000-----RRRRR000000BBBBB
+ orr WK&tmp, WK&tmp, SCRATCH, lsr #5 @ 00000000000-----RRRRRGGGGGGBBBBB
+ and SCRATCH, WK&slo, #0xFC00 @ 0000000000000000gggggg0000000000
+ orr WK&shi, WK&shi, WK&shi, lsr #5 @ 00000000000-----rrrrr000000bbbbb
+ orr WK&shi, WK&shi, SCRATCH, lsr #5 @ 00000000000-----rrrrrggggggbbbbb
+ pkhbt WK&d, WK&shi, WK&tmp, lsl #16 @ RRRRRGGGGGGBBBBBrrrrrggggggbbbbb
+.endm
+
+.macro src_x888_0565_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
+ WK4 .req STRIDE_S
+ WK5 .req STRIDE_M
+ WK6 .req WK3
+ WK7 .req ORIG_W
+ .if numbytes == 16
+ pixld , 16, 4, SRC, 0
+ src_x888_0565_2pixels 4, 5, 0, 0
+ pixld , 8, 4, SRC, 0
+ src_x888_0565_2pixels 6, 7, 1, 1
+ pixld , 8, 6, SRC, 0
+ .else
+ pixld , numbytes*2, 4, SRC, 0
+ .endif
+.endm
+
+.macro src_x888_0565_process_tail cond, numbytes, firstreg
+ .if numbytes == 16
+ src_x888_0565_2pixels 4, 5, 2, 2
+ src_x888_0565_2pixels 6, 7, 3, 4
+ .elseif numbytes == 8
+ src_x888_0565_2pixels 4, 5, 1, 1
+ src_x888_0565_2pixels 6, 7, 2, 2
+ .elseif numbytes == 4
+ src_x888_0565_2pixels 4, 5, 1, 1
+ .else
+ src_x888_0565_1pixel 4, 1
+ .endif
+ .if numbytes == 16
+ pixst , numbytes, 0, DST
+ .else
+ pixst , numbytes, 1, DST
+ .endif
+ .unreq WK4
+ .unreq WK5
+ .unreq WK6
+ .unreq WK7
+.endm
+
+generate_composite_function \
+ pixman_composite_src_x888_0565_asm_armv6, 32, 0, 16, \
+ FLAG_DST_WRITEONLY | FLAG_BRANCH_OVER | FLAG_PROCESS_DOES_STORE | FLAG_SPILL_LINE_VARS | FLAG_PROCESS_CORRUPTS_SCRATCH, \
+ 3, /* prefetch distance */ \
+ src_x888_0565_init, \
+ nop_macro, /* newline */ \
+ nop_macro, /* cleanup */ \
+ src_x888_0565_process_head, \
+ src_x888_0565_process_tail
+
+/******************************************************************************/
+
.macro add_8_8_8pixels cond, dst1, dst2
uqadd8&cond WK&dst1, WK&dst1, MASK
uqadd8&cond WK&dst2, WK&dst2, STRIDE_M
@@ -611,3 +689,491 @@ generate_composite_function \
/******************************************************************************/
+.macro over_reverse_n_8888_init
+ ldr SRC, [sp, #ARGS_STACK_OFFSET]
+ ldr MASK, =0x00800080
+ /* Split source pixel into RB/AG parts */
+ uxtb16 STRIDE_S, SRC
+ uxtb16 STRIDE_M, SRC, ror #8
+ /* Set GE[3:0] to 0101 so SEL instructions do what we want */
+ uadd8 SCRATCH, MASK, MASK
+ line_saved_regs STRIDE_D, ORIG_W
+.endm
+
+.macro over_reverse_n_8888_newline
+ mov STRIDE_D, #0xFF
+.endm
+
+.macro over_reverse_n_8888_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
+ pixld , numbytes, firstreg, DST, 0
+.endm
+
+.macro over_reverse_n_8888_1pixel d, is_only
+ teq WK&d, #0
+ beq 8f /* replace with source */
+ bics ORIG_W, STRIDE_D, WK&d, lsr #24
+ .if is_only == 1
+ beq 49f /* skip store */
+ .else
+ beq 9f /* write same value back */
+ .endif
+ mla SCRATCH, STRIDE_S, ORIG_W, MASK /* red/blue */
+ mla ORIG_W, STRIDE_M, ORIG_W, MASK /* alpha/green */
+ uxtab16 SCRATCH, SCRATCH, SCRATCH, ror #8
+ uxtab16 ORIG_W, ORIG_W, ORIG_W, ror #8
+ mov SCRATCH, SCRATCH, ror #8
+ sel ORIG_W, SCRATCH, ORIG_W
+ uqadd8 WK&d, WK&d, ORIG_W
+ b 9f
+8: mov WK&d, SRC
+9:
+.endm
+
+.macro over_reverse_n_8888_tail numbytes, reg1, reg2, reg3, reg4
+ .if numbytes == 4
+ over_reverse_n_8888_1pixel reg1, 1
+ .else
+ and SCRATCH, WK&reg1, WK&reg2
+ .if numbytes == 16
+ and SCRATCH, SCRATCH, WK&reg3
+ and SCRATCH, SCRATCH, WK&reg4
+ .endif
+ mvns SCRATCH, SCRATCH, asr #24
+ beq 49f /* skip store if all opaque */
+ over_reverse_n_8888_1pixel reg1, 0
+ over_reverse_n_8888_1pixel reg2, 0
+ .if numbytes == 16
+ over_reverse_n_8888_1pixel reg3, 0
+ over_reverse_n_8888_1pixel reg4, 0
+ .endif
+ .endif
+ pixst , numbytes, reg1, DST
+49:
+.endm
+
+.macro over_reverse_n_8888_process_tail cond, numbytes, firstreg
+ over_reverse_n_8888_tail numbytes, firstreg, %(firstreg+1), %(firstreg+2), %(firstreg+3)
+.endm
+
+generate_composite_function \
+ pixman_composite_over_reverse_n_8888_asm_armv6, 0, 0, 32 \
+ FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_CORRUPTS_PSR | FLAG_PROCESS_DOES_STORE | FLAG_SPILL_LINE_VARS | FLAG_PROCESS_CORRUPTS_SCRATCH, \
+ 3, /* prefetch distance */ \
+ over_reverse_n_8888_init, \
+ over_reverse_n_8888_newline, \
+ nop_macro, /* cleanup */ \
+ over_reverse_n_8888_process_head, \
+ over_reverse_n_8888_process_tail
+
+/******************************************************************************/
+
+.macro over_white_8888_8888_ca_init
+ HALF .req SRC
+ TMP0 .req STRIDE_D
+ TMP1 .req STRIDE_S
+ TMP2 .req STRIDE_M
+ TMP3 .req ORIG_W
+ WK4 .req SCRATCH
+ line_saved_regs STRIDE_D, STRIDE_M, ORIG_W
+ ldr SCRATCH, =0x800080
+ mov HALF, #0x80
+ /* Set GE[3:0] to 0101 so SEL instructions do what we want */
+ uadd8 SCRATCH, SCRATCH, SCRATCH
+ .set DST_PRELOAD_BIAS, 8
+.endm
+
+.macro over_white_8888_8888_ca_cleanup
+ .set DST_PRELOAD_BIAS, 0
+ .unreq HALF
+ .unreq TMP0
+ .unreq TMP1
+ .unreq TMP2
+ .unreq TMP3
+ .unreq WK4
+.endm
+
+.macro over_white_8888_8888_ca_combine m, d
+ uxtb16 TMP1, TMP0 /* rb_notmask */
+ uxtb16 TMP2, d /* rb_dest; 1 stall follows */
+ smlatt TMP3, TMP2, TMP1, HALF /* red */
+ smlabb TMP2, TMP2, TMP1, HALF /* blue */
+ uxtb16 TMP0, TMP0, ror #8 /* ag_notmask */
+ uxtb16 TMP1, d, ror #8 /* ag_dest; 1 stall follows */
+ smlatt d, TMP1, TMP0, HALF /* alpha */
+ smlabb TMP1, TMP1, TMP0, HALF /* green */
+ pkhbt TMP0, TMP2, TMP3, lsl #16 /* rb; 1 stall follows */
+ pkhbt TMP1, TMP1, d, lsl #16 /* ag */
+ uxtab16 TMP0, TMP0, TMP0, ror #8
+ uxtab16 TMP1, TMP1, TMP1, ror #8
+ mov TMP0, TMP0, ror #8
+ sel d, TMP0, TMP1
+ uqadd8 d, d, m /* d is a late result */
+.endm
+
+.macro over_white_8888_8888_ca_1pixel_head
+ pixld , 4, 1, MASK, 0
+ pixld , 4, 3, DST, 0
+.endm
+
+.macro over_white_8888_8888_ca_1pixel_tail
+ mvn TMP0, WK1
+ teq WK1, WK1, asr #32
+ bne 01f
+ bcc 03f
+ mov WK3, WK1
+ b 02f
+01: over_white_8888_8888_ca_combine WK1, WK3
+02: pixst , 4, 3, DST
+03:
+.endm
+
+.macro over_white_8888_8888_ca_2pixels_head
+ pixld , 8, 1, MASK, 0
+.endm
+
+.macro over_white_8888_8888_ca_2pixels_tail
+ pixld , 8, 3, DST
+ mvn TMP0, WK1
+ teq WK1, WK1, asr #32
+ bne 01f
+ movcs WK3, WK1
+ bcs 02f
+ teq WK2, #0
+ beq 05f
+ b 02f
+01: over_white_8888_8888_ca_combine WK1, WK3
+02: mvn TMP0, WK2
+ teq WK2, WK2, asr #32
+ bne 03f
+ movcs WK4, WK2
+ b 04f
+03: over_white_8888_8888_ca_combine WK2, WK4
+04: pixst , 8, 3, DST
+05:
+.endm
+
+.macro over_white_8888_8888_ca_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
+ .if numbytes == 4
+ over_white_8888_8888_ca_1pixel_head
+ .else
+ .if numbytes == 16
+ over_white_8888_8888_ca_2pixels_head
+ over_white_8888_8888_ca_2pixels_tail
+ .endif
+ over_white_8888_8888_ca_2pixels_head
+ .endif
+.endm
+
+.macro over_white_8888_8888_ca_process_tail cond, numbytes, firstreg
+ .if numbytes == 4
+ over_white_8888_8888_ca_1pixel_tail
+ .else
+ over_white_8888_8888_ca_2pixels_tail
+ .endif
+.endm
+
+generate_composite_function \
+ pixman_composite_over_white_8888_8888_ca_asm_armv6, 0, 32, 32 \
+ FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_CORRUPTS_PSR | FLAG_PROCESS_DOES_STORE | FLAG_SPILL_LINE_VARS | FLAG_PROCESS_CORRUPTS_SCRATCH \
+ 2, /* prefetch distance */ \
+ over_white_8888_8888_ca_init, \
+ nop_macro, /* newline */ \
+ over_white_8888_8888_ca_cleanup, \
+ over_white_8888_8888_ca_process_head, \
+ over_white_8888_8888_ca_process_tail
+
+
+.macro over_n_8888_8888_ca_init
+ /* Set up constants. RB_SRC and AG_SRC are in registers;
+ * RB_FLDS, A_SRC, and the two HALF values need to go on the
+ * stack (and the ful SRC value is already there) */
+ ldr SCRATCH, [sp, #ARGS_STACK_OFFSET]
+ mov WK0, #0x00FF0000
+ orr WK0, WK0, #0xFF /* RB_FLDS (0x00FF00FF) */
+ mov WK1, #0x80 /* HALF default value */
+ mov WK2, SCRATCH, lsr #24 /* A_SRC */
+ orr WK3, WK1, WK1, lsl #16 /* HALF alternate value (0x00800080) */
+ push {WK0-WK3}
+ .set ARGS_STACK_OFFSET, ARGS_STACK_OFFSET+16
+ uxtb16 SRC, SCRATCH
+ uxtb16 STRIDE_S, SCRATCH, ror #8
+
+ /* Set GE[3:0] to 0101 so SEL instructions do what we want */
+ uadd8 SCRATCH, WK3, WK3
+
+ .unreq WK0
+ .unreq WK1
+ .unreq WK2
+ .unreq WK3
+ WK0 .req Y
+ WK1 .req STRIDE_D
+ RB_SRC .req SRC
+ AG_SRC .req STRIDE_S
+ WK2 .req STRIDE_M
+ RB_FLDS .req r8 /* the reloaded constants have to be at consecutive registers starting at an even one */
+ A_SRC .req r8
+ HALF .req r9
+ WK3 .req r10
+ WK4 .req r11
+ WK5 .req SCRATCH
+ WK6 .req ORIG_W
+
+ line_saved_regs Y, STRIDE_D, STRIDE_M, ORIG_W
+.endm
+
+.macro over_n_8888_8888_ca_cleanup
+ add sp, sp, #16
+ .set ARGS_STACK_OFFSET, ARGS_STACK_OFFSET-16
+
+ .unreq WK0
+ .unreq WK1
+ .unreq RB_SRC
+ .unreq AG_SRC
+ .unreq WK2
+ .unreq RB_FLDS
+ .unreq A_SRC
+ .unreq HALF
+ .unreq WK3
+ .unreq WK4
+ .unreq WK5
+ .unreq WK6
+ WK0 .req r8
+ WK1 .req r9
+ WK2 .req r10
+ WK3 .req r11
+.endm
+
+.macro over_n_8888_8888_ca_1pixel_head
+ pixld , 4, 6, MASK, 0
+ pixld , 4, 0, DST, 0
+.endm
+
+.macro over_n_8888_8888_ca_1pixel_tail
+ ldrd A_SRC, HALF, [sp, #LOCALS_STACK_OFFSET+8]
+ uxtb16 WK1, WK6 /* rb_mask (first step of hard case placed in what would otherwise be a stall) */
+ teq WK6, WK6, asr #32 /* Zc if transparent, ZC if opaque */
+ bne 20f
+ bcc 40f
+ /* Mask is fully opaque (all channels) */
+ ldr WK6, [sp, #ARGS_STACK_OFFSET] /* get SRC back */
+ eors A_SRC, A_SRC, #0xFF
+ bne 10f
+ /* Source is also opaque - same as src_8888_8888 */
+ mov WK0, WK6
+ b 30f
+10: /* Same as over_8888_8888 */
+ mul_8888_8 WK0, A_SRC, WK5, HALF
+ uqadd8 WK0, WK0, WK6
+ b 30f
+20: /* No simplifications possible - do it the hard way */
+ uxtb16 WK2, WK6, ror #8 /* ag_mask */
+ mla WK3, WK1, A_SRC, HALF /* rb_mul; 2 cycles */
+ mla WK4, WK2, A_SRC, HALF /* ag_mul; 2 cycles */
+ ldrd RB_FLDS, HALF, [sp, #LOCALS_STACK_OFFSET]
+ uxtb16 WK5, WK0 /* rb_dest */
+ uxtab16 WK3, WK3, WK3, ror #8
+ uxtb16 WK6, WK0, ror #8 /* ag_dest */
+ uxtab16 WK4, WK4, WK4, ror #8
+ smlatt WK0, RB_SRC, WK1, HALF /* red1 */
+ smlabb WK1, RB_SRC, WK1, HALF /* blue1 */
+ bic WK3, RB_FLDS, WK3, lsr #8
+ bic WK4, RB_FLDS, WK4, lsr #8
+ pkhbt WK1, WK1, WK0, lsl #16 /* rb1 */
+ smlatt WK0, WK5, WK3, HALF /* red2 */
+ smlabb WK3, WK5, WK3, HALF /* blue2 */
+ uxtab16 WK1, WK1, WK1, ror #8
+ smlatt WK5, AG_SRC, WK2, HALF /* alpha1 */
+ pkhbt WK3, WK3, WK0, lsl #16 /* rb2 */
+ smlabb WK0, AG_SRC, WK2, HALF /* green1 */
+ smlatt WK2, WK6, WK4, HALF /* alpha2 */
+ smlabb WK4, WK6, WK4, HALF /* green2 */
+ pkhbt WK0, WK0, WK5, lsl #16 /* ag1 */
+ uxtab16 WK3, WK3, WK3, ror #8
+ pkhbt WK4, WK4, WK2, lsl #16 /* ag2 */
+ uxtab16 WK0, WK0, WK0, ror #8
+ uxtab16 WK4, WK4, WK4, ror #8
+ mov WK1, WK1, ror #8
+ mov WK3, WK3, ror #8
+ sel WK2, WK1, WK0 /* recombine source*mask */
+ sel WK1, WK3, WK4 /* recombine dest*(1-source_alpha*mask) */
+ uqadd8 WK0, WK1, WK2 /* followed by 1 stall */
+30: /* The destination buffer is already in the L1 cache, so
+ * there's little point in amalgamating writes */
+ pixst , 4, 0, DST
+40:
+.endm
+
+.macro over_n_8888_8888_ca_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
+ .rept (numbytes / 4) - 1
+ over_n_8888_8888_ca_1pixel_head
+ over_n_8888_8888_ca_1pixel_tail
+ .endr
+ over_n_8888_8888_ca_1pixel_head
+.endm
+
+.macro over_n_8888_8888_ca_process_tail cond, numbytes, firstreg
+ over_n_8888_8888_ca_1pixel_tail
+.endm
+
+pixman_asm_function pixman_composite_over_n_8888_8888_ca_asm_armv6
+ ldr ip, [sp]
+ cmp ip, #-1
+ beq pixman_composite_over_white_8888_8888_ca_asm_armv6
+ /* else drop through... */
+ .endfunc
+generate_composite_function \
+ pixman_composite_over_n_8888_8888_ca_asm_armv6_helper, 0, 32, 32 \
+ FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_CORRUPTS_PSR | FLAG_PROCESS_DOES_STORE | FLAG_SPILL_LINE_VARS | FLAG_PROCESS_CORRUPTS_SCRATCH | FLAG_PROCESS_CORRUPTS_WK0 \
+ 2, /* prefetch distance */ \
+ over_n_8888_8888_ca_init, \
+ nop_macro, /* newline */ \
+ over_n_8888_8888_ca_cleanup, \
+ over_n_8888_8888_ca_process_head, \
+ over_n_8888_8888_ca_process_tail
+
+/******************************************************************************/
+
+.macro in_reverse_8888_8888_init
+ /* Hold loop invariant in MASK */
+ ldr MASK, =0x00800080
+ /* Set GE[3:0] to 0101 so SEL instructions do what we want */
+ uadd8 SCRATCH, MASK, MASK
+ /* Offset the source pointer: we only need the alpha bytes */
+ add SRC, SRC, #3
+ line_saved_regs ORIG_W
+.endm
+
+.macro in_reverse_8888_8888_head numbytes, reg1, reg2, reg3
+ ldrb ORIG_W, [SRC], #4
+ .if numbytes >= 8
+ ldrb WK&reg1, [SRC], #4
+ .if numbytes == 16
+ ldrb WK&reg2, [SRC], #4
+ ldrb WK&reg3, [SRC], #4
+ .endif
+ .endif
+ add DST, DST, #numbytes
+.endm
+
+.macro in_reverse_8888_8888_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
+ in_reverse_8888_8888_head numbytes, firstreg, %(firstreg+1), %(firstreg+2)
+.endm
+
+.macro in_reverse_8888_8888_1pixel s, d, offset, is_only
+ .if is_only != 1
+ movs s, ORIG_W
+ .if offset != 0
+ ldrb ORIG_W, [SRC, #offset]
+ .endif
+ beq 01f
+ teq STRIDE_M, #0xFF
+ beq 02f
+ .endif
+ uxtb16 SCRATCH, d /* rb_dest */
+ uxtb16 d, d, ror #8 /* ag_dest */
+ mla SCRATCH, SCRATCH, s, MASK
+ mla d, d, s, MASK
+ uxtab16 SCRATCH, SCRATCH, SCRATCH, ror #8
+ uxtab16 d, d, d, ror #8
+ mov SCRATCH, SCRATCH, ror #8
+ sel d, SCRATCH, d
+ b 02f
+ .if offset == 0
+48: /* Last mov d,#0 of the set - used as part of shortcut for
+ * source values all 0 */
+ .endif
+01: mov d, #0
+02:
+.endm
+
+.macro in_reverse_8888_8888_tail numbytes, reg1, reg2, reg3, reg4
+ .if numbytes == 4
+ teq ORIG_W, ORIG_W, asr #32
+ ldrne WK&reg1, [DST, #-4]
+ .elseif numbytes == 8
+ teq ORIG_W, WK&reg1
+ teqeq ORIG_W, ORIG_W, asr #32 /* all 0 or all -1? */
+ ldmnedb DST, {WK&reg1-WK&reg2}
+ .else
+ teq ORIG_W, WK&reg1
+ teqeq ORIG_W, WK&reg2
+ teqeq ORIG_W, WK&reg3
+ teqeq ORIG_W, ORIG_W, asr #32 /* all 0 or all -1? */
+ ldmnedb DST, {WK&reg1-WK&reg4}
+ .endif
+ cmnne DST, #0 /* clear C if NE */
+ bcs 49f /* no writes to dest if source all -1 */
+ beq 48f /* set dest to all 0 if source all 0 */
+ .if numbytes == 4
+ in_reverse_8888_8888_1pixel ORIG_W, WK&reg1, 0, 1
+ str WK&reg1, [DST, #-4]
+ .elseif numbytes == 8
+ in_reverse_8888_8888_1pixel STRIDE_M, WK&reg1, -4, 0
+ in_reverse_8888_8888_1pixel STRIDE_M, WK&reg2, 0, 0
+ stmdb DST, {WK&reg1-WK&reg2}
+ .else
+ in_reverse_8888_8888_1pixel STRIDE_M, WK&reg1, -12, 0
+ in_reverse_8888_8888_1pixel STRIDE_M, WK&reg2, -8, 0
+ in_reverse_8888_8888_1pixel STRIDE_M, WK&reg3, -4, 0
+ in_reverse_8888_8888_1pixel STRIDE_M, WK&reg4, 0, 0
+ stmdb DST, {WK&reg1-WK&reg4}
+ .endif
+49:
+.endm
+
+.macro in_reverse_8888_8888_process_tail cond, numbytes, firstreg
+ in_reverse_8888_8888_tail numbytes, firstreg, %(firstreg+1), %(firstreg+2), %(firstreg+3)
+.endm
+
+generate_composite_function \
+ pixman_composite_in_reverse_8888_8888_asm_armv6, 32, 0, 32 \
+ FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_CORRUPTS_PSR | FLAG_PROCESS_DOES_STORE | FLAG_SPILL_LINE_VARS | FLAG_PROCESS_CORRUPTS_SCRATCH | FLAG_NO_PRELOAD_DST \
+ 2, /* prefetch distance */ \
+ in_reverse_8888_8888_init, \
+ nop_macro, /* newline */ \
+ nop_macro, /* cleanup */ \
+ in_reverse_8888_8888_process_head, \
+ in_reverse_8888_8888_process_tail
+
+/******************************************************************************/
+
+.macro over_n_8888_init
+ ldr SRC, [sp, #ARGS_STACK_OFFSET]
+ /* Hold loop invariant in MASK */
+ ldr MASK, =0x00800080
+ /* Hold multiplier for destination in STRIDE_M */
+ mov STRIDE_M, #255
+ sub STRIDE_M, STRIDE_M, SRC, lsr #24
+ /* Set GE[3:0] to 0101 so SEL instructions do what we want */
+ uadd8 SCRATCH, MASK, MASK
+.endm
+
+.macro over_n_8888_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
+ pixld , numbytes, firstreg, DST, 0
+.endm
+
+.macro over_n_8888_1pixel dst
+ mul_8888_8 WK&dst, STRIDE_M, SCRATCH, MASK
+ uqadd8 WK&dst, WK&dst, SRC
+.endm
+
+.macro over_n_8888_process_tail cond, numbytes, firstreg
+ .set PROCESS_REG, firstreg
+ .rept numbytes / 4
+ over_n_8888_1pixel %(PROCESS_REG)
+ .set PROCESS_REG, PROCESS_REG+1
+ .endr
+ pixst , numbytes, firstreg, DST
+.endm
+
+generate_composite_function \
+ pixman_composite_over_n_8888_asm_armv6, 0, 0, 32 \
+ FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_DOES_STORE \
+ 2, /* prefetch distance */ \
+ over_n_8888_init, \
+ nop_macro, /* newline */ \
+ nop_macro, /* cleanup */ \
+ over_n_8888_process_head, \
+ over_n_8888_process_tail
+
+/******************************************************************************/
diff --git a/gfx/cairo/libpixman/src/pixman-arm-simd-asm.h b/gfx/cairo/libpixman/src/pixman-arm-simd-asm.h
index 496e37e309..da153c3f58 100644
--- a/gfx/cairo/libpixman/src/pixman-arm-simd-asm.h
+++ b/gfx/cairo/libpixman/src/pixman-arm-simd-asm.h
@@ -76,6 +76,16 @@
.set FLAG_SPILL_LINE_VARS, 48
.set FLAG_PROCESS_CORRUPTS_SCRATCH, 0
.set FLAG_PROCESS_PRESERVES_SCRATCH, 64
+.set FLAG_PROCESS_PRESERVES_WK0, 0
+.set FLAG_PROCESS_CORRUPTS_WK0, 128 /* if possible, use the specified register(s) instead so WK0 can hold number of leading pixels */
+.set FLAG_PRELOAD_DST, 0
+.set FLAG_NO_PRELOAD_DST, 256
+
+/*
+ * Number of bytes by which to adjust preload offset of destination
+ * buffer (allows preload instruction to be moved before the load(s))
+ */
+.set DST_PRELOAD_BIAS, 0
/*
* Offset into stack where mask and source pointer/stride can be accessed.
@@ -87,6 +97,11 @@
#endif
/*
+ * Offset into stack where space allocated during init macro can be accessed.
+ */
+.set LOCALS_STACK_OFFSET, 0
+
+/*
* Constants for selecting preferable prefetch type.
*/
.set PREFETCH_TYPE_NONE, 0
@@ -196,8 +211,8 @@
PF add, SCRATCH, base, WK0, lsl #bpp_shift-dst_bpp_shift
PF and, SCRATCH, SCRATCH, #31
PF rsb, SCRATCH, SCRATCH, WK0, lsl #bpp_shift-dst_bpp_shift
- PF sub, SCRATCH, SCRATCH, #1 /* so now ranges are -16..-1 / 0..31 / 32..63 */
- PF movs, SCRATCH, SCRATCH, #32-6 /* so this sets NC / nc / Nc */
+ PF sub, SCRATCH, SCRATCH, #1 /* so now ranges are -16..-1 / 0..31 / 32..63 */
+ PF movs, SCRATCH, SCRATCH, lsl #32-6 /* so this sets NC / nc / Nc */
PF bcs, 61f
PF bpl, 60f
PF pld, [ptr, #32*(prefetch_distance+2)]
@@ -359,23 +374,41 @@
.macro test_bits_1_0_ptr
+ .if (flags) & FLAG_PROCESS_CORRUPTS_WK0
+ movs SCRATCH, X, lsl #32-1 /* C,N = bits 1,0 of DST */
+ .else
movs SCRATCH, WK0, lsl #32-1 /* C,N = bits 1,0 of DST */
+ .endif
.endm
.macro test_bits_3_2_ptr
+ .if (flags) & FLAG_PROCESS_CORRUPTS_WK0
+ movs SCRATCH, X, lsl #32-3 /* C,N = bits 3, 2 of DST */
+ .else
movs SCRATCH, WK0, lsl #32-3 /* C,N = bits 3, 2 of DST */
+ .endif
.endm
.macro leading_15bytes process_head, process_tail
/* On entry, WK0 bits 0-3 = number of bytes until destination is 16-byte aligned */
+ .set DECREMENT_X, 1
+ .if (flags) & FLAG_PROCESS_CORRUPTS_WK0
+ .set DECREMENT_X, 0
+ sub X, X, WK0, lsr #dst_bpp_shift
+ str X, [sp, #LINE_SAVED_REG_COUNT*4]
+ mov X, WK0
+ .endif
/* Use unaligned loads in all cases for simplicity */
.if dst_w_bpp == 8
- conditional_process2 test_bits_1_0_ptr, mi, cs, process_head, process_tail, 1, 2, 1, 2, 1, 1, 1
+ conditional_process2 test_bits_1_0_ptr, mi, cs, process_head, process_tail, 1, 2, 1, 2, 1, 1, DECREMENT_X
.elseif dst_w_bpp == 16
test_bits_1_0_ptr
- conditional_process1 cs, process_head, process_tail, 2, 2, 1, 1, 1
+ conditional_process1 cs, process_head, process_tail, 2, 2, 1, 1, DECREMENT_X
+ .endif
+ conditional_process2 test_bits_3_2_ptr, mi, cs, process_head, process_tail, 4, 8, 1, 2, 1, 1, DECREMENT_X
+ .if (flags) & FLAG_PROCESS_CORRUPTS_WK0
+ ldr X, [sp, #LINE_SAVED_REG_COUNT*4]
.endif
- conditional_process2 test_bits_3_2_ptr, mi, cs, process_head, process_tail, 4, 8, 1, 2, 1, 1, 1
.endm
.macro test_bits_3_2_pix
@@ -414,7 +447,7 @@
preload_middle src_bpp, SRC, 0
preload_middle mask_bpp, MASK, 0
.endif
- .if (dst_r_bpp > 0) && ((SUBBLOCK % 2) == 0)
+ .if (dst_r_bpp > 0) && ((SUBBLOCK % 2) == 0) && (((flags) & FLAG_NO_PRELOAD_DST) == 0)
/* Because we know that writes are 16-byte aligned, it's relatively easy to ensure that
* destination prefetches are 32-byte aligned. It's also the easiest channel to offset
* preloads for, to achieve staggered prefetches for multiple channels, because there are
@@ -437,11 +470,11 @@
.if dst_r_bpp > 0
tst DST, #16
bne 111f
- process_inner_loop process_head, process_tail, unaligned_src, unaligned_mask, 16
+ process_inner_loop process_head, process_tail, unaligned_src, unaligned_mask, 16 + DST_PRELOAD_BIAS
b 112f
111:
.endif
- process_inner_loop process_head, process_tail, unaligned_src, unaligned_mask, 0
+ process_inner_loop process_head, process_tail, unaligned_src, unaligned_mask, 0 + DST_PRELOAD_BIAS
112:
/* Just before the final (prefetch_distance+1) 32-byte blocks, deal with final preloads */
.if (src_bpp*pix_per_block > 256) || (mask_bpp*pix_per_block > 256) || (dst_r_bpp*pix_per_block > 256)
@@ -449,7 +482,9 @@
.endif
preload_trailing src_bpp, src_bpp_shift, SRC
preload_trailing mask_bpp, mask_bpp_shift, MASK
+ .if ((flags) & FLAG_NO_PRELOAD_DST) == 0
preload_trailing dst_r_bpp, dst_bpp_shift, DST
+ .endif
add X, X, #(prefetch_distance+2)*pix_per_block - 128/dst_w_bpp
/* The remainder of the line is handled identically to the medium case */
medium_case_inner_loop_and_trailing_pixels process_head, process_tail,, exit_label, unaligned_src, unaligned_mask
@@ -561,13 +596,7 @@
process_tail, \
process_inner_loop
- .func fname
- .global fname
- /* For ELF format also set function visibility to hidden */
-#ifdef __ELF__
- .hidden fname
- .type fname, %function
-#endif
+ pixman_asm_function fname
/*
* Make some macro arguments globally visible and accessible
@@ -679,16 +708,12 @@
SCRATCH .req r12
ORIG_W .req r14 /* width (pixels) */
-fname:
- .fnstart
- .save {r4-r11, lr}
push {r4-r11, lr} /* save all registers */
subs Y, Y, #1
blo 199f
#ifdef DEBUG_PARAMS
- .pad #9*4
sub sp, sp, #9*4
#endif
@@ -708,6 +733,13 @@ fname:
#endif
init
+
+ .if (flags) & FLAG_PROCESS_CORRUPTS_WK0
+ /* Reserve a word in which to store X during leading pixels */
+ sub sp, sp, #4
+ .set ARGS_STACK_OFFSET, ARGS_STACK_OFFSET+4
+ .set LOCALS_STACK_OFFSET, LOCALS_STACK_OFFSET+4
+ .endif
lsl STRIDE_D, #dst_bpp_shift /* stride in bytes */
sub STRIDE_D, STRIDE_D, X, lsl #dst_bpp_shift
@@ -737,42 +769,49 @@ fname:
.if (flags) & FLAG_SPILL_LINE_VARS_WIDE
/* This is stmdb sp!,{} */
.word 0xE92D0000 | LINE_SAVED_REGS
+ .set ARGS_STACK_OFFSET, ARGS_STACK_OFFSET + LINE_SAVED_REG_COUNT*4
+ .set LOCALS_STACK_OFFSET, LOCALS_STACK_OFFSET + LINE_SAVED_REG_COUNT*4
.endif
151: /* New line */
newline
preload_leading_step1 src_bpp, WK1, SRC
preload_leading_step1 mask_bpp, WK2, MASK
+ .if ((flags) & FLAG_NO_PRELOAD_DST) == 0
preload_leading_step1 dst_r_bpp, WK3, DST
+ .endif
- tst DST, #15
+ ands WK0, DST, #15
beq 154f
- rsb WK0, DST, #0 /* bits 0-3 = number of leading bytes until destination aligned */
- .if (src_bpp != 0 && src_bpp != 2*dst_w_bpp) || (mask_bpp != 0 && mask_bpp != 2*dst_w_bpp)
- PF and, WK0, WK0, #15
- .endif
+ rsb WK0, WK0, #16 /* number of leading bytes until destination aligned */
preload_leading_step2 src_bpp, src_bpp_shift, WK1, SRC
preload_leading_step2 mask_bpp, mask_bpp_shift, WK2, MASK
+ .if ((flags) & FLAG_NO_PRELOAD_DST) == 0
preload_leading_step2 dst_r_bpp, dst_bpp_shift, WK3, DST
+ .endif
leading_15bytes process_head, process_tail
154: /* Destination now 16-byte aligned; we have at least one prefetch on each channel as well as at least one 16-byte output block */
- .if (src_bpp > 0) && (mask_bpp == 0) && ((flags) & FLAG_PROCESS_PRESERVES_SCRATCH)
+ .if (src_bpp > 0) && (mask_bpp == 0) && ((flags) & FLAG_PROCESS_PRESERVES_SCRATCH)
and SCRATCH, SRC, #31
rsb SCRATCH, SCRATCH, #32*prefetch_distance
- .elseif (src_bpp == 0) && (mask_bpp > 0) && ((flags) & FLAG_PROCESS_PRESERVES_SCRATCH)
+ .elseif (src_bpp == 0) && (mask_bpp > 0) && ((flags) & FLAG_PROCESS_PRESERVES_SCRATCH)
and SCRATCH, MASK, #31
rsb SCRATCH, SCRATCH, #32*prefetch_distance
- .endif
- .ifc "process_inner_loop",""
+ .endif
+ .ifc "process_inner_loop",""
switch_on_alignment wide_case_inner_loop_and_trailing_pixels, process_head, process_tail, wide_case_inner_loop, 157f
- .else
+ .else
switch_on_alignment wide_case_inner_loop_and_trailing_pixels, process_head, process_tail, process_inner_loop, 157f
- .endif
+ .endif
157: /* Check for another line */
end_of_line 1, %((flags) & FLAG_SPILL_LINE_VARS_WIDE), 151b
+ .if (flags) & FLAG_SPILL_LINE_VARS_WIDE
+ .set ARGS_STACK_OFFSET, ARGS_STACK_OFFSET - LINE_SAVED_REG_COUNT*4
+ .set LOCALS_STACK_OFFSET, LOCALS_STACK_OFFSET - LINE_SAVED_REG_COUNT*4
+ .endif
.endif
.ltorg
@@ -782,17 +821,21 @@ fname:
.if (flags) & FLAG_SPILL_LINE_VARS_NON_WIDE
/* This is stmdb sp!,{} */
.word 0xE92D0000 | LINE_SAVED_REGS
+ .set ARGS_STACK_OFFSET, ARGS_STACK_OFFSET + LINE_SAVED_REG_COUNT*4
+ .set LOCALS_STACK_OFFSET, LOCALS_STACK_OFFSET + LINE_SAVED_REG_COUNT*4
.endif
161: /* New line */
newline
preload_line 0, src_bpp, src_bpp_shift, SRC /* in: X, corrupts: WK0-WK1 */
preload_line 0, mask_bpp, mask_bpp_shift, MASK
+ .if ((flags) & FLAG_NO_PRELOAD_DST) == 0
preload_line 0, dst_r_bpp, dst_bpp_shift, DST
+ .endif
sub X, X, #128/dst_w_bpp /* simplifies inner loop termination */
- tst DST, #15
+ ands WK0, DST, #15
beq 164f
- rsb WK0, DST, #0 /* bits 0-3 = number of leading bytes until destination aligned */
+ rsb WK0, WK0, #16 /* number of leading bytes until destination aligned */
leading_15bytes process_head, process_tail
@@ -816,7 +859,9 @@ fname:
newline
preload_line 1, src_bpp, src_bpp_shift, SRC /* in: X, corrupts: WK0-WK1 */
preload_line 1, mask_bpp, mask_bpp_shift, MASK
+ .if ((flags) & FLAG_NO_PRELOAD_DST) == 0
preload_line 1, dst_r_bpp, dst_bpp_shift, DST
+ .endif
.if dst_w_bpp == 8
tst DST, #3
@@ -847,12 +892,22 @@ fname:
177: /* Check for another line */
end_of_line %(dst_w_bpp < 32), %((flags) & FLAG_SPILL_LINE_VARS_NON_WIDE), 171b, last_one
+ .if (flags) & FLAG_SPILL_LINE_VARS_NON_WIDE
+ .set ARGS_STACK_OFFSET, ARGS_STACK_OFFSET - LINE_SAVED_REG_COUNT*4
+ .set LOCALS_STACK_OFFSET, LOCALS_STACK_OFFSET - LINE_SAVED_REG_COUNT*4
+ .endif
197:
.if (flags) & FLAG_SPILL_LINE_VARS
add sp, sp, #LINE_SAVED_REG_COUNT*4
.endif
198:
+ .if (flags) & FLAG_PROCESS_CORRUPTS_WK0
+ .set ARGS_STACK_OFFSET, ARGS_STACK_OFFSET-4
+ .set LOCALS_STACK_OFFSET, LOCALS_STACK_OFFSET-4
+ add sp, sp, #4
+ .endif
+
cleanup
#ifdef DEBUG_PARAMS
@@ -860,7 +915,6 @@ fname:
#endif
199:
pop {r4-r11, pc} /* exit */
- .fnend
.ltorg
diff --git a/gfx/cairo/libpixman/src/pixman-arm-simd.c b/gfx/cairo/libpixman/src/pixman-arm-simd.c
index af062e19dc..f0d14540bc 100644
--- a/gfx/cairo/libpixman/src/pixman-arm-simd.c
+++ b/gfx/cairo/libpixman/src/pixman-arm-simd.c
@@ -41,11 +41,20 @@ PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, src_8_8,
uint8_t, 1, uint8_t, 1)
PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, src_0565_8888,
uint16_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, src_x888_0565,
+ uint32_t, 1, uint16_t, 1)
PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, add_8_8,
uint8_t, 1, uint8_t, 1)
PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, over_8888_8888,
uint32_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, in_reverse_8888_8888,
+ uint32_t, 1, uint32_t, 1)
+
+PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, armv6, over_n_8888,
+ uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_N_DST (0, armv6, over_reverse_n_8888,
+ uint32_t, 1)
PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, armv6, over_8888_n_8888,
uint32_t, 1, uint32_t, 1)
@@ -53,6 +62,9 @@ PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, armv6, over_8888_n_8888,
PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, armv6, over_n_8_8888,
uint8_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, armv6, over_n_8888_8888_ca,
+ uint32_t, 1, uint32_t, 1)
+
PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (armv6, 0565_0565, SRC,
uint16_t, uint16_t)
PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (armv6, 8888_8888, SRC,
@@ -216,6 +228,11 @@ static const pixman_fast_path_t arm_simd_fast_paths[] =
PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, a8b8g8r8, armv6_composite_src_0565_8888),
PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, x8b8g8r8, armv6_composite_src_0565_8888),
+ PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, r5g6b5, armv6_composite_src_x888_0565),
+ PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, r5g6b5, armv6_composite_src_x888_0565),
+ PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, b5g6r5, armv6_composite_src_x888_0565),
+ PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, b5g6r5, armv6_composite_src_x888_0565),
+
PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, armv6_composite_over_8888_8888),
PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, armv6_composite_over_8888_8888),
PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, armv6_composite_over_8888_8888),
@@ -225,6 +242,13 @@ static const pixman_fast_path_t arm_simd_fast_paths[] =
PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, a8b8g8r8, armv6_composite_over_8888_n_8888),
PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, x8b8g8r8, armv6_composite_over_8888_n_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, null, a8r8g8b8, armv6_composite_over_n_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, null, x8r8g8b8, armv6_composite_over_n_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, null, a8b8g8r8, armv6_composite_over_n_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, null, x8b8g8r8, armv6_composite_over_n_8888),
+ PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8r8g8b8, armv6_composite_over_reverse_n_8888),
+ PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, armv6_composite_over_reverse_n_8888),
+
PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, armv6_composite_add_8_8),
PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, armv6_composite_over_n_8_8888),
@@ -232,15 +256,25 @@ static const pixman_fast_path_t arm_simd_fast_paths[] =
PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, armv6_composite_over_n_8_8888),
PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, armv6_composite_over_n_8_8888),
- PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, r5g6b5, armv6_0565_0565),
- PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, b5g6r5, b5g6r5, armv6_0565_0565),
+ PIXMAN_STD_FAST_PATH (IN_REVERSE, a8r8g8b8, null, a8r8g8b8, armv6_composite_in_reverse_8888_8888),
+ PIXMAN_STD_FAST_PATH (IN_REVERSE, a8r8g8b8, null, x8r8g8b8, armv6_composite_in_reverse_8888_8888),
+ PIXMAN_STD_FAST_PATH (IN_REVERSE, a8b8g8r8, null, a8b8g8r8, armv6_composite_in_reverse_8888_8888),
+ PIXMAN_STD_FAST_PATH (IN_REVERSE, a8b8g8r8, null, x8b8g8r8, armv6_composite_in_reverse_8888_8888),
+
+ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, armv6_composite_over_n_8888_8888_ca),
+ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, armv6_composite_over_n_8888_8888_ca),
+ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, armv6_composite_over_n_8888_8888_ca),
+ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, armv6_composite_over_n_8888_8888_ca),
+
+ SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, r5g6b5, armv6_0565_0565),
+ SIMPLE_NEAREST_FAST_PATH (SRC, b5g6r5, b5g6r5, armv6_0565_0565),
- PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, armv6_8888_8888),
- PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, armv6_8888_8888),
- PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, armv6_8888_8888),
- PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8, armv6_8888_8888),
- PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8, armv6_8888_8888),
- PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, armv6_8888_8888),
+ SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, armv6_8888_8888),
+ SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, armv6_8888_8888),
+ SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, armv6_8888_8888),
+ SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8, armv6_8888_8888),
+ SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8, armv6_8888_8888),
+ SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, armv6_8888_8888),
{ PIXMAN_OP_NONE },
};
diff --git a/gfx/cairo/libpixman/src/pixman-arm.c b/gfx/cairo/libpixman/src/pixman-arm.c
index 358372e50a..4a2ae85393 100644
--- a/gfx/cairo/libpixman/src/pixman-arm.c
+++ b/gfx/cairo/libpixman/src/pixman-arm.c
@@ -1,20 +1,19 @@
/*
* Copyright © 2000 SuSE, Inc.
* Copyright © 2007 Red Hat, Inc.
- * Copyright © 2021 Moonchild Productions
*
* Permission to use, copy, modify, distribute, and sell this software and its
* documentation for any purpose is hereby granted without fee, provided that
* the above copyright notice appear in all copies and that both that
* copyright notice and this permission notice appear in supporting
- * documentation, and that the names of the authors not be used in advertising or
+ * documentation, and that the name of SuSE not be used in advertising or
* publicity pertaining to distribution of the software without specific,
- * written prior permission. The authors make no representations about the
+ * written prior permission. SuSE makes no representations about the
* suitability of this software for any purpose. It is provided "as is"
* without express or implied warranty.
*
- * THE AUTHORS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL THE AUTHORS
+ * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
* BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
@@ -95,6 +94,35 @@ detect_cpu_features (void)
return features;
}
+#elif defined(__ANDROID__) || defined(ANDROID) /* Android */
+
+#include <cpu-features.h>
+
+static arm_cpu_features_t
+detect_cpu_features (void)
+{
+ arm_cpu_features_t features = 0;
+ AndroidCpuFamily cpu_family;
+ uint64_t cpu_features;
+
+ cpu_family = android_getCpuFamily();
+ cpu_features = android_getCpuFeatures();
+
+ if (cpu_family == ANDROID_CPU_FAMILY_ARM)
+ {
+ if (cpu_features & ANDROID_CPU_ARM_FEATURE_ARMv7)
+ features |= ARM_V7;
+
+ if (cpu_features & ANDROID_CPU_ARM_FEATURE_VFPv3)
+ features |= ARM_VFP;
+
+ if (cpu_features & ANDROID_CPU_ARM_FEATURE_NEON)
+ features |= ARM_NEON;
+ }
+
+ return features;
+}
+
#elif defined (__linux__) /* linux ELF */
#include <unistd.h>
@@ -148,6 +176,31 @@ detect_cpu_features (void)
return features;
}
+#elif defined (_3DS) /* 3DS homebrew (devkitARM) */
+
+static arm_cpu_features_t
+detect_cpu_features (void)
+{
+ arm_cpu_features_t features = 0;
+
+ features |= ARM_V6;
+
+ return features;
+}
+
+#elif defined (PSP2) || defined (__SWITCH__)
+/* Vita (VitaSDK) or Switch (devkitA64) homebrew */
+
+static arm_cpu_features_t
+detect_cpu_features (void)
+{
+ arm_cpu_features_t features = 0;
+
+ features |= ARM_NEON;
+
+ return features;
+}
+
#else /* Unknown */
static arm_cpu_features_t
diff --git a/gfx/cairo/libpixman/src/pixman-bits-image.c b/gfx/cairo/libpixman/src/pixman-bits-image.c
index e9d2fb69c6..4cfabe318a 100644
--- a/gfx/cairo/libpixman/src/pixman-bits-image.c
+++ b/gfx/cairo/libpixman/src/pixman-bits-image.c
@@ -35,44 +35,47 @@
#include "pixman-private.h"
#include "pixman-combine32.h"
#include "pixman-inlines.h"
+#include "dither/blue-noise-64x64.h"
-static uint32_t *
-_pixman_image_get_scanline_generic_float (pixman_iter_t * iter,
- const uint32_t *mask)
-{
- pixman_iter_get_scanline_t fetch_32 = iter->data;
- uint32_t *buffer = iter->buffer;
-
- fetch_32 (iter, NULL);
+/* Fetch functions */
- pixman_expand_to_float ((argb_t *)buffer, buffer, PIXMAN_a8r8g8b8, iter->width);
+static force_inline void
+fetch_pixel_no_alpha_32 (bits_image_t *image,
+ int x, int y, pixman_bool_t check_bounds,
+ void *out)
+{
+ uint32_t *ret = out;
- return iter->buffer;
+ if (check_bounds &&
+ (x < 0 || x >= image->width || y < 0 || y >= image->height))
+ *ret = 0;
+ else
+ *ret = image->fetch_pixel_32 (image, x, y);
}
-/* Fetch functions */
-
-static force_inline uint32_t
-fetch_pixel_no_alpha (bits_image_t *image,
- int x, int y, pixman_bool_t check_bounds)
+static force_inline void
+fetch_pixel_no_alpha_float (bits_image_t *image,
+ int x, int y, pixman_bool_t check_bounds,
+ void *out)
{
+ argb_t *ret = out;
+
if (check_bounds &&
(x < 0 || x >= image->width || y < 0 || y >= image->height))
- {
- return 0;
- }
-
- return image->fetch_pixel_32 (image, x, y);
+ ret->a = ret->r = ret->g = ret->b = 0.f;
+ else
+ *ret = image->fetch_pixel_float (image, x, y);
}
-typedef uint32_t (* get_pixel_t) (bits_image_t *image,
- int x, int y, pixman_bool_t check_bounds);
+typedef void (* get_pixel_t) (bits_image_t *image,
+ int x, int y, pixman_bool_t check_bounds, void *out);
-static force_inline uint32_t
+static force_inline void
bits_image_fetch_pixel_nearest (bits_image_t *image,
pixman_fixed_t x,
pixman_fixed_t y,
- get_pixel_t get_pixel)
+ get_pixel_t get_pixel,
+ void *out)
{
int x0 = pixman_fixed_to_int (x - pixman_fixed_e);
int y0 = pixman_fixed_to_int (y - pixman_fixed_e);
@@ -82,19 +85,20 @@ bits_image_fetch_pixel_nearest (bits_image_t *image,
repeat (image->common.repeat, &x0, image->width);
repeat (image->common.repeat, &y0, image->height);
- return get_pixel (image, x0, y0, FALSE);
+ get_pixel (image, x0, y0, FALSE, out);
}
else
{
- return get_pixel (image, x0, y0, TRUE);
+ get_pixel (image, x0, y0, TRUE, out);
}
}
-static force_inline uint32_t
-bits_image_fetch_pixel_bilinear (bits_image_t *image,
- pixman_fixed_t x,
- pixman_fixed_t y,
- get_pixel_t get_pixel)
+static force_inline void
+bits_image_fetch_pixel_bilinear_32 (bits_image_t *image,
+ pixman_fixed_t x,
+ pixman_fixed_t y,
+ get_pixel_t get_pixel,
+ void *out)
{
pixman_repeat_t repeat_mode = image->common.repeat;
int width = image->width;
@@ -102,6 +106,7 @@ bits_image_fetch_pixel_bilinear (bits_image_t *image,
int x1, y1, x2, y2;
uint32_t tl, tr, bl, br;
int32_t distx, disty;
+ uint32_t *ret = out;
x1 = x - pixman_fixed_1 / 2;
y1 = y - pixman_fixed_1 / 2;
@@ -121,242 +126,142 @@ bits_image_fetch_pixel_bilinear (bits_image_t *image,
repeat (repeat_mode, &x2, width);
repeat (repeat_mode, &y2, height);
- tl = get_pixel (image, x1, y1, FALSE);
- bl = get_pixel (image, x1, y2, FALSE);
- tr = get_pixel (image, x2, y1, FALSE);
- br = get_pixel (image, x2, y2, FALSE);
+ get_pixel (image, x1, y1, FALSE, &tl);
+ get_pixel (image, x2, y1, FALSE, &tr);
+ get_pixel (image, x1, y2, FALSE, &bl);
+ get_pixel (image, x2, y2, FALSE, &br);
}
else
{
- tl = get_pixel (image, x1, y1, TRUE);
- tr = get_pixel (image, x2, y1, TRUE);
- bl = get_pixel (image, x1, y2, TRUE);
- br = get_pixel (image, x2, y2, TRUE);
+ get_pixel (image, x1, y1, TRUE, &tl);
+ get_pixel (image, x2, y1, TRUE, &tr);
+ get_pixel (image, x1, y2, TRUE, &bl);
+ get_pixel (image, x2, y2, TRUE, &br);
}
- return bilinear_interpolation (tl, tr, bl, br, distx, disty);
+ *ret = bilinear_interpolation (tl, tr, bl, br, distx, disty);
}
-static uint32_t *
-bits_image_fetch_bilinear_no_repeat_8888 (pixman_iter_t *iter,
- const uint32_t *mask)
+static force_inline void
+bits_image_fetch_pixel_bilinear_float (bits_image_t *image,
+ pixman_fixed_t x,
+ pixman_fixed_t y,
+ get_pixel_t get_pixel,
+ void *out)
{
+ pixman_repeat_t repeat_mode = image->common.repeat;
+ int width = image->width;
+ int height = image->height;
+ int x1, y1, x2, y2;
+ argb_t tl, tr, bl, br;
+ float distx, disty;
+ argb_t *ret = out;
- pixman_image_t * ima = iter->image;
- int offset = iter->x;
- int line = iter->y++;
- int width = iter->width;
- uint32_t * buffer = iter->buffer;
-
- bits_image_t *bits = &ima->bits;
- pixman_fixed_t x_top, x_bottom, x;
- pixman_fixed_t ux_top, ux_bottom, ux;
- pixman_vector_t v;
- uint32_t top_mask, bottom_mask;
- uint32_t *top_row;
- uint32_t *bottom_row;
- uint32_t *end;
- uint32_t zero[2] = { 0, 0 };
- uint32_t one = 1;
- int y, y1, y2;
- int disty;
- int mask_inc;
- int w;
-
- /* reference point is the center of the pixel */
- v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2;
- v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2;
- v.vector[2] = pixman_fixed_1;
-
- if (!pixman_transform_point_3d (bits->common.transform, &v))
- return iter->buffer;
-
- ux = ux_top = ux_bottom = bits->common.transform->matrix[0][0];
- x = x_top = x_bottom = v.vector[0] - pixman_fixed_1/2;
+ x1 = x - pixman_fixed_1 / 2;
+ y1 = y - pixman_fixed_1 / 2;
- y = v.vector[1] - pixman_fixed_1/2;
- disty = pixman_fixed_to_bilinear_weight (y);
+ distx = ((float)pixman_fixed_fraction(x1)) / 65536.f;
+ disty = ((float)pixman_fixed_fraction(y1)) / 65536.f;
- /* Load the pointers to the first and second lines from the source
- * image that bilinear code must read.
- *
- * The main trick in this code is about the check if any line are
- * outside of the image;
- *
- * When I realize that a line (any one) is outside, I change
- * the pointer to a dummy area with zeros. Once I change this, I
- * must be sure the pointer will not change, so I set the
- * variables to each pointer increments inside the loop.
- */
- y1 = pixman_fixed_to_int (y);
+ x1 = pixman_fixed_to_int (x1);
+ y1 = pixman_fixed_to_int (y1);
+ x2 = x1 + 1;
y2 = y1 + 1;
- if (y1 < 0 || y1 >= bits->height)
- {
- top_row = zero;
- x_top = 0;
- ux_top = 0;
- }
- else
- {
- top_row = bits->bits + y1 * bits->rowstride;
- x_top = x;
- ux_top = ux;
- }
-
- if (y2 < 0 || y2 >= bits->height)
- {
- bottom_row = zero;
- x_bottom = 0;
- ux_bottom = 0;
- }
- else
- {
- bottom_row = bits->bits + y2 * bits->rowstride;
- x_bottom = x;
- ux_bottom = ux;
- }
-
- /* Instead of checking whether the operation uses the mast in
- * each loop iteration, verify this only once and prepare the
- * variables to make the code smaller inside the loop.
- */
- if (!mask)
- {
- mask_inc = 0;
- mask = &one;
- }
- else
+ if (repeat_mode != PIXMAN_REPEAT_NONE)
{
- /* If have a mask, prepare the variables to check it */
- mask_inc = 1;
- }
+ repeat (repeat_mode, &x1, width);
+ repeat (repeat_mode, &y1, height);
+ repeat (repeat_mode, &x2, width);
+ repeat (repeat_mode, &y2, height);
- /* If both are zero, then the whole thing is zero */
- if (top_row == zero && bottom_row == zero)
- {
- memset (buffer, 0, width * sizeof (uint32_t));
- return iter->buffer;
- }
- else if (bits->format == PIXMAN_x8r8g8b8)
- {
- if (top_row == zero)
- {
- top_mask = 0;
- bottom_mask = 0xff000000;
- }
- else if (bottom_row == zero)
- {
- top_mask = 0xff000000;
- bottom_mask = 0;
- }
- else
- {
- top_mask = 0xff000000;
- bottom_mask = 0xff000000;
- }
+ get_pixel (image, x1, y1, FALSE, &tl);
+ get_pixel (image, x2, y1, FALSE, &tr);
+ get_pixel (image, x1, y2, FALSE, &bl);
+ get_pixel (image, x2, y2, FALSE, &br);
}
else
{
- top_mask = 0;
- bottom_mask = 0;
- }
-
- end = buffer + width;
-
- /* Zero fill to the left of the image */
- while (buffer < end && x < pixman_fixed_minus_1)
- {
- *buffer++ = 0;
- x += ux;
- x_top += ux_top;
- x_bottom += ux_bottom;
- mask += mask_inc;
- }
-
- /* Left edge
- */
- while (buffer < end && x < 0)
- {
- uint32_t tr, br;
- int32_t distx;
-
- tr = top_row[pixman_fixed_to_int (x_top) + 1] | top_mask;
- br = bottom_row[pixman_fixed_to_int (x_bottom) + 1] | bottom_mask;
-
- distx = pixman_fixed_to_bilinear_weight (x);
-
- *buffer++ = bilinear_interpolation (0, tr, 0, br, distx, disty);
-
- x += ux;
- x_top += ux_top;
- x_bottom += ux_bottom;
- mask += mask_inc;
+ get_pixel (image, x1, y1, TRUE, &tl);
+ get_pixel (image, x2, y1, TRUE, &tr);
+ get_pixel (image, x1, y2, TRUE, &bl);
+ get_pixel (image, x2, y2, TRUE, &br);
}
- /* Main part */
- w = pixman_int_to_fixed (bits->width - 1);
+ *ret = bilinear_interpolation_float (tl, tr, bl, br, distx, disty);
+}
- while (buffer < end && x < w)
- {
- if (*mask)
- {
- uint32_t tl, tr, bl, br;
- int32_t distx;
+static force_inline void accum_32(unsigned int *satot, unsigned int *srtot,
+ unsigned int *sgtot, unsigned int *sbtot,
+ const void *p, pixman_fixed_t f)
+{
+ uint32_t pixel = *(uint32_t *)p;
- tl = top_row [pixman_fixed_to_int (x_top)] | top_mask;
- tr = top_row [pixman_fixed_to_int (x_top) + 1] | top_mask;
- bl = bottom_row [pixman_fixed_to_int (x_bottom)] | bottom_mask;
- br = bottom_row [pixman_fixed_to_int (x_bottom) + 1] | bottom_mask;
+ *srtot += (int)RED_8 (pixel) * f;
+ *sgtot += (int)GREEN_8 (pixel) * f;
+ *sbtot += (int)BLUE_8 (pixel) * f;
+ *satot += (int)ALPHA_8 (pixel) * f;
+}
- distx = pixman_fixed_to_bilinear_weight (x);
+static force_inline void reduce_32(unsigned int satot, unsigned int srtot,
+ unsigned int sgtot, unsigned int sbtot,
+ void *p)
+{
+ uint32_t *ret = p;
- *buffer = bilinear_interpolation (tl, tr, bl, br, distx, disty);
- }
+ satot = (satot + 0x8000) >> 16;
+ srtot = (srtot + 0x8000) >> 16;
+ sgtot = (sgtot + 0x8000) >> 16;
+ sbtot = (sbtot + 0x8000) >> 16;
- buffer++;
- x += ux;
- x_top += ux_top;
- x_bottom += ux_bottom;
- mask += mask_inc;
- }
+ satot = CLIP (satot, 0, 0xff);
+ srtot = CLIP (srtot, 0, 0xff);
+ sgtot = CLIP (sgtot, 0, 0xff);
+ sbtot = CLIP (sbtot, 0, 0xff);
- /* Right Edge */
- w = pixman_int_to_fixed (bits->width);
- while (buffer < end && x < w)
- {
- if (*mask)
- {
- uint32_t tl, bl;
- int32_t distx;
+ *ret = ((satot << 24) | (srtot << 16) | (sgtot << 8) | (sbtot));
+}
- tl = top_row [pixman_fixed_to_int (x_top)] | top_mask;
- bl = bottom_row [pixman_fixed_to_int (x_bottom)] | bottom_mask;
+static force_inline void accum_float(unsigned int *satot, unsigned int *srtot,
+ unsigned int *sgtot, unsigned int *sbtot,
+ const void *p, pixman_fixed_t f)
+{
+ const argb_t *pixel = p;
- distx = pixman_fixed_to_bilinear_weight (x);
+ *satot += pixel->a * f;
+ *srtot += pixel->r * f;
+ *sgtot += pixel->g * f;
+ *sbtot += pixel->b * f;
+}
- *buffer = bilinear_interpolation (tl, 0, bl, 0, distx, disty);
- }
+static force_inline void reduce_float(unsigned int satot, unsigned int srtot,
+ unsigned int sgtot, unsigned int sbtot,
+ void *p)
+{
+ argb_t *ret = p;
- buffer++;
- x += ux;
- x_top += ux_top;
- x_bottom += ux_bottom;
- mask += mask_inc;
- }
+ ret->a = CLIP (satot / 65536.f, 0.f, 1.f);
+ ret->r = CLIP (srtot / 65536.f, 0.f, 1.f);
+ ret->g = CLIP (sgtot / 65536.f, 0.f, 1.f);
+ ret->b = CLIP (sbtot / 65536.f, 0.f, 1.f);
+}
- /* Zero fill to the left of the image */
- while (buffer < end)
- *buffer++ = 0;
+typedef void (* accumulate_pixel_t) (unsigned int *satot, unsigned int *srtot,
+ unsigned int *sgtot, unsigned int *sbtot,
+ const void *pixel, pixman_fixed_t f);
- return iter->buffer;
-}
+typedef void (* reduce_pixel_t) (unsigned int satot, unsigned int srtot,
+ unsigned int sgtot, unsigned int sbtot,
+ void *out);
-static force_inline uint32_t
+static force_inline void
bits_image_fetch_pixel_convolution (bits_image_t *image,
pixman_fixed_t x,
pixman_fixed_t y,
- get_pixel_t get_pixel)
+ get_pixel_t get_pixel,
+ void *out,
+ accumulate_pixel_t accum,
+ reduce_pixel_t reduce)
{
pixman_fixed_t *params = image->common.filter_params;
int x_off = (params[0] - pixman_fixed_1) >> 1;
@@ -367,7 +272,7 @@ bits_image_fetch_pixel_convolution (bits_image_t *image,
pixman_repeat_t repeat_mode = image->common.repeat;
int width = image->width;
int height = image->height;
- int srtot, sgtot, sbtot, satot;
+ unsigned int srtot, sgtot, sbtot, satot;
params += 2;
@@ -389,48 +294,39 @@ bits_image_fetch_pixel_convolution (bits_image_t *image,
if (f)
{
- uint32_t pixel;
+ /* Must be big enough to hold a argb_t */
+ argb_t pixel;
if (repeat_mode != PIXMAN_REPEAT_NONE)
{
repeat (repeat_mode, &rx, width);
repeat (repeat_mode, &ry, height);
- pixel = get_pixel (image, rx, ry, FALSE);
+ get_pixel (image, rx, ry, FALSE, &pixel);
}
else
{
- pixel = get_pixel (image, rx, ry, TRUE);
+ get_pixel (image, rx, ry, TRUE, &pixel);
}
- srtot += (int)RED_8 (pixel) * f;
- sgtot += (int)GREEN_8 (pixel) * f;
- sbtot += (int)BLUE_8 (pixel) * f;
- satot += (int)ALPHA_8 (pixel) * f;
+ accum (&satot, &srtot, &sgtot, &sbtot, &pixel, f);
}
params++;
}
}
- satot = (satot + 0x8000) >> 16;
- srtot = (srtot + 0x8000) >> 16;
- sgtot = (sgtot + 0x8000) >> 16;
- sbtot = (sbtot + 0x8000) >> 16;
-
- satot = CLIP (satot, 0, 0xff);
- srtot = CLIP (srtot, 0, 0xff);
- sgtot = CLIP (sgtot, 0, 0xff);
- sbtot = CLIP (sbtot, 0, 0xff);
-
- return ((satot << 24) | (srtot << 16) | (sgtot << 8) | (sbtot));
+ reduce (satot, srtot, sgtot, sbtot, out);
}
-static uint32_t
-bits_image_fetch_pixel_separable_convolution (bits_image_t *image,
- pixman_fixed_t x,
- pixman_fixed_t y,
- get_pixel_t get_pixel)
+static void
+bits_image_fetch_pixel_separable_convolution (bits_image_t *image,
+ pixman_fixed_t x,
+ pixman_fixed_t y,
+ get_pixel_t get_pixel,
+ void *out,
+ accumulate_pixel_t accum,
+ reduce_pixel_t reduce)
{
pixman_fixed_t *params = image->common.filter_params;
pixman_repeat_t repeat_mode = image->common.repeat;
@@ -445,7 +341,7 @@ bits_image_fetch_pixel_separable_convolution (bits_image_t *image,
int x_off = ((cwidth << 16) - pixman_fixed_1) >> 1;
int y_off = ((cheight << 16) - pixman_fixed_1) >> 1;
pixman_fixed_t *y_params;
- int srtot, sgtot, sbtot, satot;
+ unsigned int srtot, sgtot, sbtot, satot;
int32_t x1, x2, y1, y2;
int32_t px, py;
int i, j;
@@ -485,82 +381,100 @@ bits_image_fetch_pixel_separable_convolution (bits_image_t *image,
if (fx)
{
+ /* Must be big enough to hold a argb_t */
+ argb_t pixel;
pixman_fixed_t f;
- uint32_t pixel;
if (repeat_mode != PIXMAN_REPEAT_NONE)
{
repeat (repeat_mode, &rx, width);
repeat (repeat_mode, &ry, height);
- pixel = get_pixel (image, rx, ry, FALSE);
+ get_pixel (image, rx, ry, FALSE, &pixel);
}
else
{
- pixel = get_pixel (image, rx, ry, TRUE);
+ get_pixel (image, rx, ry, TRUE, &pixel);
}
f = (fy * fx + 0x8000) >> 16;
- srtot += (int)RED_8 (pixel) * f;
- sgtot += (int)GREEN_8 (pixel) * f;
- sbtot += (int)BLUE_8 (pixel) * f;
- satot += (int)ALPHA_8 (pixel) * f;
+ accum(&satot, &srtot, &sgtot, &sbtot, &pixel, f);
}
}
}
}
- satot = (satot + 0x8000) >> 16;
- srtot = (srtot + 0x8000) >> 16;
- sgtot = (sgtot + 0x8000) >> 16;
- sbtot = (sbtot + 0x8000) >> 16;
-
- satot = CLIP (satot, 0, 0xff);
- srtot = CLIP (srtot, 0, 0xff);
- sgtot = CLIP (sgtot, 0, 0xff);
- sbtot = CLIP (sbtot, 0, 0xff);
- return ((satot << 24) | (srtot << 16) | (sgtot << 8) | (sbtot));
+ reduce(satot, srtot, sgtot, sbtot, out);
}
-static force_inline uint32_t
-bits_image_fetch_pixel_filtered (bits_image_t *image,
+static force_inline void
+bits_image_fetch_pixel_filtered (bits_image_t *image,
+ pixman_bool_t wide,
pixman_fixed_t x,
pixman_fixed_t y,
- get_pixel_t get_pixel)
+ get_pixel_t get_pixel,
+ void *out)
{
switch (image->common.filter)
{
case PIXMAN_FILTER_NEAREST:
case PIXMAN_FILTER_FAST:
- return bits_image_fetch_pixel_nearest (image, x, y, get_pixel);
+ bits_image_fetch_pixel_nearest (image, x, y, get_pixel, out);
break;
case PIXMAN_FILTER_BILINEAR:
case PIXMAN_FILTER_GOOD:
case PIXMAN_FILTER_BEST:
- return bits_image_fetch_pixel_bilinear (image, x, y, get_pixel);
+ if (wide)
+ bits_image_fetch_pixel_bilinear_float (image, x, y, get_pixel, out);
+ else
+ bits_image_fetch_pixel_bilinear_32 (image, x, y, get_pixel, out);
break;
case PIXMAN_FILTER_CONVOLUTION:
- return bits_image_fetch_pixel_convolution (image, x, y, get_pixel);
+ if (wide)
+ {
+ bits_image_fetch_pixel_convolution (image, x, y,
+ get_pixel, out,
+ accum_float,
+ reduce_float);
+ }
+ else
+ {
+ bits_image_fetch_pixel_convolution (image, x, y,
+ get_pixel, out,
+ accum_32, reduce_32);
+ }
break;
case PIXMAN_FILTER_SEPARABLE_CONVOLUTION:
- return bits_image_fetch_pixel_separable_convolution (image, x, y, get_pixel);
+ if (wide)
+ {
+ bits_image_fetch_pixel_separable_convolution (image, x, y,
+ get_pixel, out,
+ accum_float,
+ reduce_float);
+ }
+ else
+ {
+ bits_image_fetch_pixel_separable_convolution (image, x, y,
+ get_pixel, out,
+ accum_32, reduce_32);
+ }
break;
default:
+ assert (0);
break;
}
-
- return 0;
}
static uint32_t *
-bits_image_fetch_affine_no_alpha (pixman_iter_t * iter,
- const uint32_t * mask)
+__bits_image_fetch_affine_no_alpha (pixman_iter_t * iter,
+ pixman_bool_t wide,
+ const uint32_t * mask)
{
pixman_image_t *image = iter->image;
int offset = iter->x;
@@ -572,6 +486,8 @@ bits_image_fetch_affine_no_alpha (pixman_iter_t * iter,
pixman_fixed_t ux, uy;
pixman_vector_t v;
int i;
+ get_pixel_t get_pixel =
+ wide ? fetch_pixel_no_alpha_float : fetch_pixel_no_alpha_32;
/* reference point is the center of the pixel */
v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2;
@@ -599,27 +515,45 @@ bits_image_fetch_affine_no_alpha (pixman_iter_t * iter,
{
if (!mask || mask[i])
{
- buffer[i] = bits_image_fetch_pixel_filtered (
- &image->bits, x, y, fetch_pixel_no_alpha);
+ bits_image_fetch_pixel_filtered (
+ &image->bits, wide, x, y, get_pixel, buffer);
}
x += ux;
y += uy;
+ buffer += wide ? 4 : 1;
}
- return buffer;
+ return iter->buffer;
+}
+
+static uint32_t *
+bits_image_fetch_affine_no_alpha_32 (pixman_iter_t *iter,
+ const uint32_t *mask)
+{
+ return __bits_image_fetch_affine_no_alpha(iter, FALSE, mask);
+}
+
+static uint32_t *
+bits_image_fetch_affine_no_alpha_float (pixman_iter_t *iter,
+ const uint32_t *mask)
+{
+ return __bits_image_fetch_affine_no_alpha(iter, TRUE, mask);
}
/* General fetcher */
-static force_inline uint32_t
-fetch_pixel_general (bits_image_t *image, int x, int y, pixman_bool_t check_bounds)
+static force_inline void
+fetch_pixel_general_32 (bits_image_t *image,
+ int x, int y, pixman_bool_t check_bounds,
+ void *out)
{
- uint32_t pixel;
+ uint32_t pixel, *ret = out;
if (check_bounds &&
(x < 0 || x >= image->width || y < 0 || y >= image->height))
{
- return 0;
+ *ret = 0;
+ return;
}
pixel = image->fetch_pixel_32 (image, x, y);
@@ -648,18 +582,59 @@ fetch_pixel_general (bits_image_t *image, int x, int y, pixman_bool_t check_boun
pixel |= (pixel_a << 24);
}
- return pixel;
+ *ret = pixel;
+}
+
+static force_inline void
+fetch_pixel_general_float (bits_image_t *image,
+ int x, int y, pixman_bool_t check_bounds,
+ void *out)
+{
+ argb_t *ret = out;
+
+ if (check_bounds &&
+ (x < 0 || x >= image->width || y < 0 || y >= image->height))
+ {
+ ret->a = ret->r = ret->g = ret->b = 0;
+ return;
+ }
+
+ *ret = image->fetch_pixel_float (image, x, y);
+
+ if (image->common.alpha_map)
+ {
+ x -= image->common.alpha_origin_x;
+ y -= image->common.alpha_origin_y;
+
+ if (x < 0 || x >= image->common.alpha_map->width ||
+ y < 0 || y >= image->common.alpha_map->height)
+ {
+ ret->a = 0.f;
+ }
+ else
+ {
+ argb_t alpha;
+
+ alpha = image->common.alpha_map->fetch_pixel_float (
+ image->common.alpha_map, x, y);
+
+ ret->a = alpha.a;
+ }
+ }
}
static uint32_t *
-bits_image_fetch_general (pixman_iter_t *iter,
- const uint32_t *mask)
+__bits_image_fetch_general (pixman_iter_t *iter,
+ pixman_bool_t wide,
+ const uint32_t *mask)
{
pixman_image_t *image = iter->image;
int offset = iter->x;
int line = iter->y++;
int width = iter->width;
uint32_t * buffer = iter->buffer;
+ get_pixel_t get_pixel =
+ wide ? fetch_pixel_general_float : fetch_pixel_general_32;
pixman_fixed_t x, y, w;
pixman_fixed_t ux, uy, uw;
@@ -699,8 +674,8 @@ bits_image_fetch_general (pixman_iter_t *iter,
{
if (w != 0)
{
- x0 = ((pixman_fixed_48_16_t)x << 16) / w;
- y0 = ((pixman_fixed_48_16_t)y << 16) / w;
+ x0 = ((uint64_t)x << 16) / w;
+ y0 = ((uint64_t)y << 16) / w;
}
else
{
@@ -708,484 +683,33 @@ bits_image_fetch_general (pixman_iter_t *iter,
y0 = 0;
}
- buffer[i] = bits_image_fetch_pixel_filtered (
- &image->bits, x0, y0, fetch_pixel_general);
+ bits_image_fetch_pixel_filtered (
+ &image->bits, wide, x0, y0, get_pixel, buffer);
}
x += ux;
y += uy;
w += uw;
+ buffer += wide ? 4 : 1;
}
- return buffer;
-}
-
-typedef uint32_t (* convert_pixel_t) (const uint8_t *row, int x);
-
-static force_inline void
-bits_image_fetch_separable_convolution_affine (pixman_image_t * image,
- int offset,
- int line,
- int width,
- uint32_t * buffer,
- const uint32_t * mask,
-
- convert_pixel_t convert_pixel,
- pixman_format_code_t format,
- pixman_repeat_t repeat_mode)
-{
- bits_image_t *bits = &image->bits;
- pixman_fixed_t *params = image->common.filter_params;
- int cwidth = pixman_fixed_to_int (params[0]);
- int cheight = pixman_fixed_to_int (params[1]);
- int x_off = ((cwidth << 16) - pixman_fixed_1) >> 1;
- int y_off = ((cheight << 16) - pixman_fixed_1) >> 1;
- int x_phase_bits = pixman_fixed_to_int (params[2]);
- int y_phase_bits = pixman_fixed_to_int (params[3]);
- int x_phase_shift = 16 - x_phase_bits;
- int y_phase_shift = 16 - y_phase_bits;
- pixman_fixed_t vx, vy;
- pixman_fixed_t ux, uy;
- pixman_vector_t v;
- int k;
-
- /* reference point is the center of the pixel */
- v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2;
- v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2;
- v.vector[2] = pixman_fixed_1;
-
- if (!pixman_transform_point_3d (image->common.transform, &v))
- return;
-
- ux = image->common.transform->matrix[0][0];
- uy = image->common.transform->matrix[1][0];
-
- vx = v.vector[0];
- vy = v.vector[1];
-
- for (k = 0; k < width; ++k)
- {
- pixman_fixed_t *y_params;
- int satot, srtot, sgtot, sbtot;
- pixman_fixed_t x, y;
- int32_t x1, x2, y1, y2;
- int32_t px, py;
- int i, j;
-
- if (mask && !mask[k])
- goto next;
-
- /* Round x and y to the middle of the closest phase before continuing. This
- * ensures that the convolution matrix is aligned right, since it was
- * positioned relative to a particular phase (and not relative to whatever
- * exact fraction we happen to get here).
- */
- x = ((vx >> x_phase_shift) << x_phase_shift) + ((1 << x_phase_shift) >> 1);
- y = ((vy >> y_phase_shift) << y_phase_shift) + ((1 << y_phase_shift) >> 1);
-
- px = (x & 0xffff) >> x_phase_shift;
- py = (y & 0xffff) >> y_phase_shift;
-
- x1 = pixman_fixed_to_int (x - pixman_fixed_e - x_off);
- y1 = pixman_fixed_to_int (y - pixman_fixed_e - y_off);
- x2 = x1 + cwidth;
- y2 = y1 + cheight;
-
- satot = srtot = sgtot = sbtot = 0;
-
- y_params = params + 4 + (1 << x_phase_bits) * cwidth + py * cheight;
-
- for (i = y1; i < y2; ++i)
- {
- pixman_fixed_t fy = *y_params++;
-
- if (fy)
- {
- pixman_fixed_t *x_params = params + 4 + px * cwidth;
-
- for (j = x1; j < x2; ++j)
- {
- pixman_fixed_t fx = *x_params++;
- int rx = j;
- int ry = i;
-
- if (fx)
- {
- pixman_fixed_t f;
- uint32_t pixel, mask;
- uint8_t *row;
-
- mask = PIXMAN_FORMAT_A (format)? 0 : 0xff000000;
-
- if (repeat_mode != PIXMAN_REPEAT_NONE)
- {
- repeat (repeat_mode, &rx, bits->width);
- repeat (repeat_mode, &ry, bits->height);
-
- row = (uint8_t *)bits->bits + bits->rowstride * 4 * ry;
- pixel = convert_pixel (row, rx) | mask;
- }
- else
- {
- if (rx < 0 || ry < 0 || rx >= bits->width || ry >= bits->height)
- {
- pixel = 0;
- }
- else
- {
- row = (uint8_t *)bits->bits + bits->rowstride * 4 * ry;
- pixel = convert_pixel (row, rx) | mask;
- }
- }
-
- f = ((pixman_fixed_32_32_t)fx * fy + 0x8000) >> 16;
- srtot += (int)RED_8 (pixel) * f;
- sgtot += (int)GREEN_8 (pixel) * f;
- sbtot += (int)BLUE_8 (pixel) * f;
- satot += (int)ALPHA_8 (pixel) * f;
- }
- }
- }
- }
-
- satot = (satot + 0x8000) >> 16;
- srtot = (srtot + 0x8000) >> 16;
- sgtot = (sgtot + 0x8000) >> 16;
- sbtot = (sbtot + 0x8000) >> 16;
-
- satot = CLIP (satot, 0, 0xff);
- srtot = CLIP (srtot, 0, 0xff);
- sgtot = CLIP (sgtot, 0, 0xff);
- sbtot = CLIP (sbtot, 0, 0xff);
-
- buffer[k] = (satot << 24) | (srtot << 16) | (sgtot << 8) | (sbtot << 0);
-
- next:
- vx += ux;
- vy += uy;
- }
-}
-
-static const uint8_t zero[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
-
-static force_inline void
-bits_image_fetch_bilinear_affine (pixman_image_t * image,
- int offset,
- int line,
- int width,
- uint32_t * buffer,
- const uint32_t * mask,
-
- convert_pixel_t convert_pixel,
- pixman_format_code_t format,
- pixman_repeat_t repeat_mode)
-{
- pixman_fixed_t x, y;
- pixman_fixed_t ux, uy;
- pixman_vector_t v;
- bits_image_t *bits = &image->bits;
- int i;
-
- /* reference point is the center of the pixel */
- v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2;
- v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2;
- v.vector[2] = pixman_fixed_1;
-
- if (!pixman_transform_point_3d (image->common.transform, &v))
- return;
-
- ux = image->common.transform->matrix[0][0];
- uy = image->common.transform->matrix[1][0];
-
- x = v.vector[0];
- y = v.vector[1];
-
- for (i = 0; i < width; ++i)
- {
- int x1, y1, x2, y2;
- uint32_t tl, tr, bl, br;
- int32_t distx, disty;
- int width = image->bits.width;
- int height = image->bits.height;
- const uint8_t *row1;
- const uint8_t *row2;
-
- if (mask && !mask[i])
- goto next;
-
- x1 = x - pixman_fixed_1 / 2;
- y1 = y - pixman_fixed_1 / 2;
-
- distx = pixman_fixed_to_bilinear_weight (x1);
- disty = pixman_fixed_to_bilinear_weight (y1);
-
- y1 = pixman_fixed_to_int (y1);
- y2 = y1 + 1;
- x1 = pixman_fixed_to_int (x1);
- x2 = x1 + 1;
-
- if (repeat_mode != PIXMAN_REPEAT_NONE)
- {
- uint32_t mask;
-
- mask = PIXMAN_FORMAT_A (format)? 0 : 0xff000000;
-
- repeat (repeat_mode, &x1, width);
- repeat (repeat_mode, &y1, height);
- repeat (repeat_mode, &x2, width);
- repeat (repeat_mode, &y2, height);
-
- row1 = (uint8_t *)bits->bits + bits->rowstride * 4 * y1;
- row2 = (uint8_t *)bits->bits + bits->rowstride * 4 * y2;
-
- tl = convert_pixel (row1, x1) | mask;
- tr = convert_pixel (row1, x2) | mask;
- bl = convert_pixel (row2, x1) | mask;
- br = convert_pixel (row2, x2) | mask;
- }
- else
- {
- uint32_t mask1, mask2;
- int bpp;
-
- /* Note: PIXMAN_FORMAT_BPP() returns an unsigned value,
- * which means if you use it in expressions, those
- * expressions become unsigned themselves. Since
- * the variables below can be negative in some cases,
- * that will lead to crashes on 64 bit architectures.
- *
- * So this line makes sure bpp is signed
- */
- bpp = PIXMAN_FORMAT_BPP (format);
-
- if (x1 >= width || x2 < 0 || y1 >= height || y2 < 0)
- {
- buffer[i] = 0;
- goto next;
- }
-
- if (y2 == 0)
- {
- row1 = zero;
- mask1 = 0;
- }
- else
- {
- row1 = (uint8_t *)bits->bits + bits->rowstride * 4 * y1;
- row1 += bpp / 8 * x1;
-
- mask1 = PIXMAN_FORMAT_A (format)? 0 : 0xff000000;
- }
-
- if (y1 == height - 1)
- {
- row2 = zero;
- mask2 = 0;
- }
- else
- {
- row2 = (uint8_t *)bits->bits + bits->rowstride * 4 * y2;
- row2 += bpp / 8 * x1;
-
- mask2 = PIXMAN_FORMAT_A (format)? 0 : 0xff000000;
- }
-
- if (x2 == 0)
- {
- tl = 0;
- bl = 0;
- }
- else
- {
- tl = convert_pixel (row1, 0) | mask1;
- bl = convert_pixel (row2, 0) | mask2;
- }
-
- if (x1 == width - 1)
- {
- tr = 0;
- br = 0;
- }
- else
- {
- tr = convert_pixel (row1, 1) | mask1;
- br = convert_pixel (row2, 1) | mask2;
- }
- }
-
- buffer[i] = bilinear_interpolation (
- tl, tr, bl, br, distx, disty);
-
- next:
- x += ux;
- y += uy;
- }
-}
-
-static force_inline void
-bits_image_fetch_nearest_affine (pixman_image_t * image,
- int offset,
- int line,
- int width,
- uint32_t * buffer,
- const uint32_t * mask,
-
- convert_pixel_t convert_pixel,
- pixman_format_code_t format,
- pixman_repeat_t repeat_mode)
-{
- pixman_fixed_t x, y;
- pixman_fixed_t ux, uy;
- pixman_vector_t v;
- bits_image_t *bits = &image->bits;
- int i;
-
- /* reference point is the center of the pixel */
- v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2;
- v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2;
- v.vector[2] = pixman_fixed_1;
-
- if (!pixman_transform_point_3d (image->common.transform, &v))
- return;
-
- ux = image->common.transform->matrix[0][0];
- uy = image->common.transform->matrix[1][0];
-
- x = v.vector[0];
- y = v.vector[1];
-
- for (i = 0; i < width; ++i)
- {
- int width, height, x0, y0;
- const uint8_t *row;
-
- if (mask && !mask[i])
- goto next;
-
- width = image->bits.width;
- height = image->bits.height;
- x0 = pixman_fixed_to_int (x - pixman_fixed_e);
- y0 = pixman_fixed_to_int (y - pixman_fixed_e);
-
- if (repeat_mode == PIXMAN_REPEAT_NONE &&
- (y0 < 0 || y0 >= height || x0 < 0 || x0 >= width))
- {
- buffer[i] = 0;
- }
- else
- {
- uint32_t mask = PIXMAN_FORMAT_A (format)? 0 : 0xff000000;
-
- if (repeat_mode != PIXMAN_REPEAT_NONE)
- {
- repeat (repeat_mode, &x0, width);
- repeat (repeat_mode, &y0, height);
- }
-
- row = (uint8_t *)bits->bits + bits->rowstride * 4 * y0;
-
- buffer[i] = convert_pixel (row, x0) | mask;
- }
-
- next:
- x += ux;
- y += uy;
- }
-}
-
-static force_inline uint32_t
-convert_a8r8g8b8 (const uint8_t *row, int x)
-{
- return *(((uint32_t *)row) + x);
-}
-
-static force_inline uint32_t
-convert_x8r8g8b8 (const uint8_t *row, int x)
-{
- return *(((uint32_t *)row) + x);
+ return iter->buffer;
}
-static force_inline uint32_t
-convert_a8 (const uint8_t *row, int x)
+static uint32_t *
+bits_image_fetch_general_32 (pixman_iter_t *iter,
+ const uint32_t *mask)
{
- return *(row + x) << 24;
+ return __bits_image_fetch_general(iter, FALSE, mask);
}
-static force_inline uint32_t
-convert_r5g6b5 (const uint8_t *row, int x)
+static uint32_t *
+bits_image_fetch_general_float (pixman_iter_t *iter,
+ const uint32_t *mask)
{
- return convert_0565_to_0888 (*((uint16_t *)row + x));
+ return __bits_image_fetch_general(iter, TRUE, mask);
}
-#define MAKE_SEPARABLE_CONVOLUTION_FETCHER(name, format, repeat_mode) \
- static uint32_t * \
- bits_image_fetch_separable_convolution_affine_ ## name (pixman_iter_t *iter, \
- const uint32_t * mask) \
- { \
- bits_image_fetch_separable_convolution_affine ( \
- iter->image, \
- iter->x, iter->y++, \
- iter->width, \
- iter->buffer, mask, \
- convert_ ## format, \
- PIXMAN_ ## format, \
- repeat_mode); \
- \
- return iter->buffer; \
- }
-
-#define MAKE_BILINEAR_FETCHER(name, format, repeat_mode) \
- static uint32_t * \
- bits_image_fetch_bilinear_affine_ ## name (pixman_iter_t *iter, \
- const uint32_t * mask) \
- { \
- bits_image_fetch_bilinear_affine (iter->image, \
- iter->x, iter->y++, \
- iter->width, \
- iter->buffer, mask, \
- convert_ ## format, \
- PIXMAN_ ## format, \
- repeat_mode); \
- return iter->buffer; \
- }
-
-#define MAKE_NEAREST_FETCHER(name, format, repeat_mode) \
- static uint32_t * \
- bits_image_fetch_nearest_affine_ ## name (pixman_iter_t *iter, \
- const uint32_t * mask) \
- { \
- bits_image_fetch_nearest_affine (iter->image, \
- iter->x, iter->y++, \
- iter->width, \
- iter->buffer, mask, \
- convert_ ## format, \
- PIXMAN_ ## format, \
- repeat_mode); \
- return iter->buffer; \
- }
-
-#define MAKE_FETCHERS(name, format, repeat_mode) \
- MAKE_NEAREST_FETCHER (name, format, repeat_mode) \
- MAKE_BILINEAR_FETCHER (name, format, repeat_mode) \
- MAKE_SEPARABLE_CONVOLUTION_FETCHER (name, format, repeat_mode)
-
-MAKE_FETCHERS (pad_a8r8g8b8, a8r8g8b8, PIXMAN_REPEAT_PAD)
-MAKE_FETCHERS (none_a8r8g8b8, a8r8g8b8, PIXMAN_REPEAT_NONE)
-MAKE_FETCHERS (reflect_a8r8g8b8, a8r8g8b8, PIXMAN_REPEAT_REFLECT)
-MAKE_FETCHERS (normal_a8r8g8b8, a8r8g8b8, PIXMAN_REPEAT_NORMAL)
-MAKE_FETCHERS (pad_x8r8g8b8, x8r8g8b8, PIXMAN_REPEAT_PAD)
-MAKE_FETCHERS (none_x8r8g8b8, x8r8g8b8, PIXMAN_REPEAT_NONE)
-MAKE_FETCHERS (reflect_x8r8g8b8, x8r8g8b8, PIXMAN_REPEAT_REFLECT)
-MAKE_FETCHERS (normal_x8r8g8b8, x8r8g8b8, PIXMAN_REPEAT_NORMAL)
-MAKE_FETCHERS (pad_a8, a8, PIXMAN_REPEAT_PAD)
-MAKE_FETCHERS (none_a8, a8, PIXMAN_REPEAT_NONE)
-MAKE_FETCHERS (reflect_a8, a8, PIXMAN_REPEAT_REFLECT)
-MAKE_FETCHERS (normal_a8, a8, PIXMAN_REPEAT_NORMAL)
-MAKE_FETCHERS (pad_r5g6b5, r5g6b5, PIXMAN_REPEAT_PAD)
-MAKE_FETCHERS (none_r5g6b5, r5g6b5, PIXMAN_REPEAT_NONE)
-MAKE_FETCHERS (reflect_r5g6b5, r5g6b5, PIXMAN_REPEAT_REFLECT)
-MAKE_FETCHERS (normal_r5g6b5, r5g6b5, PIXMAN_REPEAT_NORMAL)
-
static void
replicate_pixel_32 (bits_image_t * bits,
int x,
@@ -1253,9 +777,9 @@ bits_image_fetch_untransformed_repeat_none (bits_image_t *image,
w = MIN (width, image->width - x);
if (wide)
- image->fetch_scanline_float ((pixman_image_t *)image, x, y, w, buffer, NULL);
+ image->fetch_scanline_float (image, x, y, w, buffer, NULL);
else
- image->fetch_scanline_32 ((pixman_image_t *)image, x, y, w, buffer, NULL);
+ image->fetch_scanline_32 (image, x, y, w, buffer, NULL);
width -= w;
buffer += w * (wide? 4 : 1);
@@ -1301,9 +825,9 @@ bits_image_fetch_untransformed_repeat_normal (bits_image_t *image,
w = MIN (width, image->width - x);
if (wide)
- image->fetch_scanline_float ((pixman_image_t *)image, x, y, w, buffer, NULL);
+ image->fetch_scanline_float (image, x, y, w, buffer, NULL);
else
- image->fetch_scanline_32 ((pixman_image_t *)image, x, y, w, buffer, NULL);
+ image->fetch_scanline_32 (image, x, y, w, buffer, NULL);
buffer += w * (wide? 4 : 1);
x += w;
@@ -1381,104 +905,18 @@ static const fetcher_info_t fetcher_info[] =
bits_image_fetch_untransformed_float
},
-#define FAST_BILINEAR_FLAGS \
- (FAST_PATH_NO_ALPHA_MAP | \
- FAST_PATH_NO_ACCESSORS | \
- FAST_PATH_HAS_TRANSFORM | \
- FAST_PATH_AFFINE_TRANSFORM | \
- FAST_PATH_X_UNIT_POSITIVE | \
- FAST_PATH_Y_UNIT_ZERO | \
- FAST_PATH_NONE_REPEAT | \
- FAST_PATH_BILINEAR_FILTER)
-
- { PIXMAN_a8r8g8b8,
- FAST_BILINEAR_FLAGS,
- bits_image_fetch_bilinear_no_repeat_8888,
- _pixman_image_get_scanline_generic_float
- },
-
- { PIXMAN_x8r8g8b8,
- FAST_BILINEAR_FLAGS,
- bits_image_fetch_bilinear_no_repeat_8888,
- _pixman_image_get_scanline_generic_float
- },
-
-#define GENERAL_BILINEAR_FLAGS \
- (FAST_PATH_NO_ALPHA_MAP | \
- FAST_PATH_NO_ACCESSORS | \
- FAST_PATH_HAS_TRANSFORM | \
- FAST_PATH_AFFINE_TRANSFORM | \
- FAST_PATH_BILINEAR_FILTER)
-
-#define GENERAL_NEAREST_FLAGS \
- (FAST_PATH_NO_ALPHA_MAP | \
- FAST_PATH_NO_ACCESSORS | \
- FAST_PATH_HAS_TRANSFORM | \
- FAST_PATH_AFFINE_TRANSFORM | \
- FAST_PATH_NEAREST_FILTER)
-
-#define GENERAL_SEPARABLE_CONVOLUTION_FLAGS \
- (FAST_PATH_NO_ALPHA_MAP | \
- FAST_PATH_NO_ACCESSORS | \
- FAST_PATH_HAS_TRANSFORM | \
- FAST_PATH_AFFINE_TRANSFORM | \
- FAST_PATH_SEPARABLE_CONVOLUTION_FILTER)
-
-#define SEPARABLE_CONVOLUTION_AFFINE_FAST_PATH(name, format, repeat) \
- { PIXMAN_ ## format, \
- GENERAL_SEPARABLE_CONVOLUTION_FLAGS | FAST_PATH_ ## repeat ## _REPEAT, \
- bits_image_fetch_separable_convolution_affine_ ## name, \
- _pixman_image_get_scanline_generic_float \
- },
-
-#define BILINEAR_AFFINE_FAST_PATH(name, format, repeat) \
- { PIXMAN_ ## format, \
- GENERAL_BILINEAR_FLAGS | FAST_PATH_ ## repeat ## _REPEAT, \
- bits_image_fetch_bilinear_affine_ ## name, \
- _pixman_image_get_scanline_generic_float \
- },
-
-#define NEAREST_AFFINE_FAST_PATH(name, format, repeat) \
- { PIXMAN_ ## format, \
- GENERAL_NEAREST_FLAGS | FAST_PATH_ ## repeat ## _REPEAT, \
- bits_image_fetch_nearest_affine_ ## name, \
- _pixman_image_get_scanline_generic_float \
- },
-
-#define AFFINE_FAST_PATHS(name, format, repeat) \
- SEPARABLE_CONVOLUTION_AFFINE_FAST_PATH(name, format, repeat) \
- BILINEAR_AFFINE_FAST_PATH(name, format, repeat) \
- NEAREST_AFFINE_FAST_PATH(name, format, repeat)
-
- AFFINE_FAST_PATHS (pad_a8r8g8b8, a8r8g8b8, PAD)
- AFFINE_FAST_PATHS (none_a8r8g8b8, a8r8g8b8, NONE)
- AFFINE_FAST_PATHS (reflect_a8r8g8b8, a8r8g8b8, REFLECT)
- AFFINE_FAST_PATHS (normal_a8r8g8b8, a8r8g8b8, NORMAL)
- AFFINE_FAST_PATHS (pad_x8r8g8b8, x8r8g8b8, PAD)
- AFFINE_FAST_PATHS (none_x8r8g8b8, x8r8g8b8, NONE)
- AFFINE_FAST_PATHS (reflect_x8r8g8b8, x8r8g8b8, REFLECT)
- AFFINE_FAST_PATHS (normal_x8r8g8b8, x8r8g8b8, NORMAL)
- AFFINE_FAST_PATHS (pad_a8, a8, PAD)
- AFFINE_FAST_PATHS (none_a8, a8, NONE)
- AFFINE_FAST_PATHS (reflect_a8, a8, REFLECT)
- AFFINE_FAST_PATHS (normal_a8, a8, NORMAL)
- AFFINE_FAST_PATHS (pad_r5g6b5, r5g6b5, PAD)
- AFFINE_FAST_PATHS (none_r5g6b5, r5g6b5, NONE)
- AFFINE_FAST_PATHS (reflect_r5g6b5, r5g6b5, REFLECT)
- AFFINE_FAST_PATHS (normal_r5g6b5, r5g6b5, NORMAL)
-
/* Affine, no alpha */
{ PIXMAN_any,
(FAST_PATH_NO_ALPHA_MAP | FAST_PATH_HAS_TRANSFORM | FAST_PATH_AFFINE_TRANSFORM),
- bits_image_fetch_affine_no_alpha,
- _pixman_image_get_scanline_generic_float
+ bits_image_fetch_affine_no_alpha_32,
+ bits_image_fetch_affine_no_alpha_float,
},
/* General */
{ PIXMAN_any,
0,
- bits_image_fetch_general,
- _pixman_image_get_scanline_generic_float
+ bits_image_fetch_general_32,
+ bits_image_fetch_general_float,
},
{ PIXMAN_null },
@@ -1508,7 +946,6 @@ _pixman_bits_image_src_iter_init (pixman_image_t *image, pixman_iter_t *iter)
}
else
{
- iter->data = info->get_scanline_32;
iter->get_scanline = info->get_scanline_float;
}
return;
@@ -1520,20 +957,6 @@ _pixman_bits_image_src_iter_init (pixman_image_t *image, pixman_iter_t *iter)
}
static uint32_t *
-dest_get_scanline_16 (pixman_iter_t *iter, const uint32_t *mask)
-{
- pixman_image_t *image = iter->image;
- int x = iter->x;
- int y = iter->y;
- int width = iter->width;
- uint32_t * buffer = iter->buffer;
-
- image->bits.fetch_scanline_16 (image, x, y, width, buffer, mask);
-
- return iter->buffer;
-}
-
-static uint32_t *
dest_get_scanline_narrow (pixman_iter_t *iter, const uint32_t *mask)
{
pixman_image_t *image = iter->image;
@@ -1542,7 +965,7 @@ dest_get_scanline_narrow (pixman_iter_t *iter, const uint32_t *mask)
int width = iter->width;
uint32_t * buffer = iter->buffer;
- image->bits.fetch_scanline_32 (image, x, y, width, buffer, mask);
+ image->bits.fetch_scanline_32 (&image->bits, x, y, width, buffer, mask);
if (image->common.alpha_map)
{
uint32_t *alpha;
@@ -1555,8 +978,7 @@ dest_get_scanline_narrow (pixman_iter_t *iter, const uint32_t *mask)
y -= image->common.alpha_origin_y;
image->common.alpha_map->fetch_scanline_32 (
- (pixman_image_t *)image->common.alpha_map,
- x, y, width, alpha, mask);
+ image->common.alpha_map, x, y, width, alpha, mask);
for (i = 0; i < width; ++i)
{
@@ -1581,7 +1003,7 @@ dest_get_scanline_wide (pixman_iter_t *iter, const uint32_t *mask)
argb_t * buffer = (argb_t *)iter->buffer;
image->fetch_scanline_float (
- (pixman_image_t *)image, x, y, width, (uint32_t *)buffer, mask);
+ image, x, y, width, (uint32_t *)buffer, mask);
if (image->common.alpha_map)
{
argb_t *alpha;
@@ -1594,8 +1016,7 @@ dest_get_scanline_wide (pixman_iter_t *iter, const uint32_t *mask)
y -= image->common.alpha_origin_y;
image->common.alpha_map->fetch_scanline_float (
- (pixman_image_t *)image->common.alpha_map,
- x, y, width, (uint32_t *)alpha, mask);
+ image->common.alpha_map, x, y, width, (uint32_t *)alpha, mask);
for (i = 0; i < width; ++i)
buffer[i].a = alpha[i].a;
@@ -1608,20 +1029,6 @@ dest_get_scanline_wide (pixman_iter_t *iter, const uint32_t *mask)
}
static void
-dest_write_back_16 (pixman_iter_t *iter)
-{
- bits_image_t * image = &iter->image->bits;
- int x = iter->x;
- int y = iter->y;
- int width = iter->width;
- const uint32_t *buffer = iter->buffer;
-
- image->store_scanline_16 (image, x, y, width, buffer);
-
- iter->y++;
-}
-
-static void
dest_write_back_narrow (pixman_iter_t *iter)
{
bits_image_t * image = &iter->image->bits;
@@ -1644,6 +1051,119 @@ dest_write_back_narrow (pixman_iter_t *iter)
iter->y++;
}
+static const float
+dither_factor_blue_noise_64 (int x, int y)
+{
+ float m = dither_blue_noise_64x64[((y & 0x3f) << 6) | (x & 0x3f)];
+ return m * (1. / 4096.f) + (1. / 8192.f);
+}
+
+static const float
+dither_factor_bayer_8 (int x, int y)
+{
+ uint32_t m;
+
+ y ^= x;
+
+ /* Compute reverse(interleave(xor(x mod n, y mod n), x mod n))
+ * Here n = 8 and `mod n` is the bottom 3 bits.
+ */
+ m = ((y & 0x1) << 5) | ((x & 0x1) << 4) |
+ ((y & 0x2) << 2) | ((x & 0x2) << 1) |
+ ((y & 0x4) >> 1) | ((x & 0x4) >> 2);
+
+ /* m is in range [0, 63]. We scale it to [0, 63.0f/64.0f], then
+ * shift it to to [1.0f/128.0f, 127.0f/128.0f] so that 0 < d < 1.
+ * This ensures exact values are not changed by dithering.
+ */
+ return (float)(m) * (1 / 64.0f) + (1.0f / 128.0f);
+}
+
+typedef float (* dither_factor_t)(int x, int y);
+
+static force_inline float
+dither_apply_channel (float f, float d, float s)
+{
+ /* float_to_unorm splits the [0, 1] segment in (1 << n_bits)
+ * subsections of equal length; however unorm_to_float does not
+ * map to the center of those sections. In fact, pixel value u is
+ * mapped to:
+ *
+ * u u u 1
+ * -------------- = ---------- + -------------- * ----------
+ * 2^n_bits - 1 2^n_bits 2^n_bits - 1 2^n_bits
+ *
+ * Hence if f = u / (2^n_bits - 1) is exactly representable on a
+ * n_bits palette, all the numbers between
+ *
+ * u
+ * ---------- = f - f * 2^n_bits = f + (0 - f) * 2^n_bits
+ * 2^n_bits
+ *
+ * and
+ *
+ * u + 1
+ * ---------- = f - (f - 1) * 2^n_bits = f + (1 - f) * 2^n_bits
+ * 2^n_bits
+ *
+ * are also mapped back to u.
+ *
+ * Hence the following calculation ensures that we add as much
+ * noise as possible without perturbing values which are exactly
+ * representable in the target colorspace. Note that this corresponds to
+ * mixing the original color with noise with a ratio of `1 / 2^n_bits`.
+ */
+ return f + (d - f) * s;
+}
+
+static force_inline float
+dither_compute_scale (int n_bits)
+{
+ // No dithering for wide formats
+ if (n_bits == 0 || n_bits >= 32)
+ return 0.f;
+
+ return 1.f / (float)(1 << n_bits);
+}
+
+static const uint32_t *
+dither_apply_ordered (pixman_iter_t *iter, dither_factor_t factor)
+{
+ bits_image_t *image = &iter->image->bits;
+ int x = iter->x + image->dither_offset_x;
+ int y = iter->y + image->dither_offset_y;
+ int width = iter->width;
+ argb_t *buffer = (argb_t *)iter->buffer;
+
+ pixman_format_code_t format = image->format;
+ int a_size = PIXMAN_FORMAT_A (format);
+ int r_size = PIXMAN_FORMAT_R (format);
+ int g_size = PIXMAN_FORMAT_G (format);
+ int b_size = PIXMAN_FORMAT_B (format);
+
+ float a_scale = dither_compute_scale (a_size);
+ float r_scale = dither_compute_scale (r_size);
+ float g_scale = dither_compute_scale (g_size);
+ float b_scale = dither_compute_scale (b_size);
+
+ int i;
+ float d;
+
+ for (i = 0; i < width; ++i)
+ {
+ d = factor (x + i, y);
+
+ buffer->a = dither_apply_channel (buffer->a, d, a_scale);
+ buffer->r = dither_apply_channel (buffer->r, d, r_scale);
+ buffer->g = dither_apply_channel (buffer->g, d, g_scale);
+ buffer->b = dither_apply_channel (buffer->b, d, b_scale);
+
+ buffer++;
+ }
+
+ return iter->buffer;
+}
+
static void
dest_write_back_wide (pixman_iter_t *iter)
{
@@ -1653,6 +1173,23 @@ dest_write_back_wide (pixman_iter_t *iter)
int width = iter->width;
const uint32_t *buffer = iter->buffer;
+ switch (image->dither)
+ {
+ case PIXMAN_DITHER_NONE:
+ break;
+
+ case PIXMAN_DITHER_GOOD:
+ case PIXMAN_DITHER_BEST:
+ case PIXMAN_DITHER_ORDERED_BLUE_NOISE_64:
+ buffer = dither_apply_ordered (iter, dither_factor_blue_noise_64);
+ break;
+
+ case PIXMAN_DITHER_FAST:
+ case PIXMAN_DITHER_ORDERED_BAYER_8:
+ buffer = dither_apply_ordered (iter, dither_factor_bayer_8);
+ break;
+ }
+
image->store_scanline_float (image, x, y, width, buffer);
if (image->common.alpha_map)
@@ -1670,20 +1207,7 @@ dest_write_back_wide (pixman_iter_t *iter)
void
_pixman_bits_image_dest_iter_init (pixman_image_t *image, pixman_iter_t *iter)
{
- if (iter->iter_flags & ITER_16)
- {
- if ((iter->iter_flags & (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA)) ==
- (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA))
- {
- iter->get_scanline = _pixman_iter_get_scanline_noop;
- }
- else
- {
- iter->get_scanline = dest_get_scanline_16;
- }
- iter->write_back = dest_write_back_16;
- }
- else if (iter->iter_flags & ITER_NARROW)
+ if (iter->iter_flags & ITER_NARROW)
{
if ((iter->iter_flags & (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA)) ==
(ITER_IGNORE_RGB | ITER_IGNORE_ALPHA))
@@ -1694,7 +1218,7 @@ _pixman_bits_image_dest_iter_init (pixman_image_t *image, pixman_iter_t *iter)
{
iter->get_scanline = dest_get_scanline_narrow;
}
-
+
iter->write_back = dest_write_back_narrow;
}
else
@@ -1758,6 +1282,9 @@ _pixman_bits_image_init (pixman_image_t * image,
{
uint32_t *free_me = NULL;
+ if (PIXMAN_FORMAT_BPP (format) == 128)
+ return_val_if_fail(!(rowstride % 4), FALSE);
+
if (!bits && width && height)
{
int rowstride_bytes;
@@ -1778,6 +1305,9 @@ _pixman_bits_image_init (pixman_image_t * image,
image->bits.height = height;
image->bits.bits = bits;
image->bits.free_me = free_me;
+ image->bits.dither = PIXMAN_DITHER_NONE;
+ image->bits.dither_offset_x = 0;
+ image->bits.dither_offset_y = 0;
image->bits.read_func = NULL;
image->bits.write_func = NULL;
image->bits.rowstride = rowstride;
diff --git a/gfx/cairo/libpixman/src/pixman-combine-float.c b/gfx/cairo/libpixman/src/pixman-combine-float.c
index 06ce2037ee..f5145bc9d7 100644
--- a/gfx/cairo/libpixman/src/pixman-combine-float.c
+++ b/gfx/cairo/libpixman/src/pixman-combine-float.c
@@ -42,8 +42,6 @@
#define force_inline __inline__
#endif
-#define IS_ZERO(f) (-FLT_MIN < (f) && (f) < FLT_MIN)
-
typedef float (* combine_channel_t) (float sa, float s, float da, float d);
static force_inline void
@@ -203,56 +201,56 @@ get_factor (combine_factor_t factor, float sa, float da)
break;
case SA_OVER_DA:
- if (IS_ZERO (da))
+ if (FLOAT_IS_ZERO (da))
f = 1.0f;
else
f = CLAMP (sa / da);
break;
case DA_OVER_SA:
- if (IS_ZERO (sa))
+ if (FLOAT_IS_ZERO (sa))
f = 1.0f;
else
f = CLAMP (da / sa);
break;
case INV_SA_OVER_DA:
- if (IS_ZERO (da))
+ if (FLOAT_IS_ZERO (da))
f = 1.0f;
else
f = CLAMP ((1.0f - sa) / da);
break;
case INV_DA_OVER_SA:
- if (IS_ZERO (sa))
+ if (FLOAT_IS_ZERO (sa))
f = 1.0f;
else
f = CLAMP ((1.0f - da) / sa);
break;
case ONE_MINUS_SA_OVER_DA:
- if (IS_ZERO (da))
+ if (FLOAT_IS_ZERO (da))
f = 0.0f;
else
f = CLAMP (1.0f - sa / da);
break;
case ONE_MINUS_DA_OVER_SA:
- if (IS_ZERO (sa))
+ if (FLOAT_IS_ZERO (sa))
f = 0.0f;
else
f = CLAMP (1.0f - da / sa);
break;
case ONE_MINUS_INV_DA_OVER_SA:
- if (IS_ZERO (sa))
+ if (FLOAT_IS_ZERO (sa))
f = 0.0f;
else
f = CLAMP (1.0f - (1.0f - da) / sa);
break;
case ONE_MINUS_INV_SA_OVER_DA:
- if (IS_ZERO (da))
+ if (FLOAT_IS_ZERO (da))
f = 0.0f;
else
f = CLAMP (1.0f - (1.0f - sa) / da);
@@ -321,23 +319,44 @@ MAKE_PD_COMBINERS (conjoint_xor, ONE_MINUS_DA_OVER_SA, ONE_MINUS_SA_OVER_DA)
*
* The following blend modes have been taken from the PDF ISO 32000
* specification, which at this point in time is available from
- * http://www.adobe.com/devnet/acrobat/pdfs/PDF32000_2008.pdf
- * The relevant chapters are 11.3.5 and 11.3.6.
+ *
+ * http://www.adobe.com/devnet/pdf/pdf_reference.html
+ *
+ * The specific documents of interest are the PDF spec itself:
+ *
+ * http://wwwimages.adobe.com/www.adobe.com/content/dam/Adobe/en/devnet/pdf/pdfs/PDF32000_2008.pdf
+ *
+ * chapters 11.3.5 and 11.3.6 and a later supplement for Adobe Acrobat
+ * 9.1 and Reader 9.1:
+ *
+ * http://wwwimages.adobe.com/www.adobe.com/content/dam/Adobe/en/devnet/pdf/pdfs/adobe_supplement_iso32000_1.pdf
+ *
+ * that clarifies the specifications for blend modes ColorDodge and
+ * ColorBurn.
+ *
* The formula for computing the final pixel color given in 11.3.6 is:
- * αr × Cr = (1 – αs) × αb × Cb + (1 – αb) × αs × Cs + αb × αs × B(Cb, Cs)
- * with B() being the blend function.
- * Note that OVER is a special case of this operation, using B(Cb, Cs) = Cs
- *
- * These blend modes should match the SVG filter draft specification, as
- * it has been designed to mirror ISO 32000. Note that at the current point
- * no released draft exists that shows this, as the formulas have not been
- * updated yet after the release of ISO 32000.
- *
- * The default implementation here uses the PDF_SEPARABLE_BLEND_MODE and
- * PDF_NON_SEPARABLE_BLEND_MODE macros, which take the blend function as an
- * argument. Note that this implementation operates on premultiplied colors,
- * while the PDF specification does not. Therefore the code uses the formula
- * ar.Cra = (1 – as) . Dca + (1 – ad) . Sca + B(Dca, ad, Sca, as)
+ *
+ * αr × Cr = (1 – αs) × αb × Cb + (1 – αb) × αs × Cs + αb × αs × B(Cb, Cs)
+ *
+ * with B() is the blend function. When B(Cb, Cs) = Cs, this formula
+ * reduces to the regular OVER operator.
+ *
+ * Cs and Cb are not premultiplied, so in our implementation we instead
+ * use:
+ *
+ * cr = (1 – αs) × cb + (1 – αb) × cs + αb × αs × B (cb/αb, cs/αs)
+ *
+ * where cr, cs, and cb are premultiplied colors, and where the
+ *
+ * αb × αs × B(cb/αb, cs/αs)
+ *
+ * part is first arithmetically simplified under the assumption that αb
+ * and αs are not 0, and then updated to produce a meaningful result when
+ * they are.
+ *
+ * For all the blend mode operators, the alpha channel is given by
+ *
+ * αr = αs + αb + αb × αs
*/
#define MAKE_SEPARABLE_PDF_COMBINERS(name) \
@@ -357,18 +376,55 @@ MAKE_PD_COMBINERS (conjoint_xor, ONE_MINUS_DA_OVER_SA, ONE_MINUS_SA_OVER_DA)
\
MAKE_COMBINERS (name, combine_ ## name ## _a, combine_ ## name ## _c)
+/*
+ * Multiply
+ *
+ * ad * as * B(d / ad, s / as)
+ * = ad * as * d/ad * s/as
+ * = d * s
+ *
+ */
static force_inline float
blend_multiply (float sa, float s, float da, float d)
{
return d * s;
}
+/*
+ * Screen
+ *
+ * ad * as * B(d/ad, s/as)
+ * = ad * as * (d/ad + s/as - s/as * d/ad)
+ * = ad * s + as * d - s * d
+ */
static force_inline float
blend_screen (float sa, float s, float da, float d)
{
return d * sa + s * da - s * d;
}
+/*
+ * Overlay
+ *
+ * ad * as * B(d/ad, s/as)
+ * = ad * as * Hardlight (s, d)
+ * = if (d / ad < 0.5)
+ * as * ad * Multiply (s/as, 2 * d/ad)
+ * else
+ * as * ad * Screen (s/as, 2 * d / ad - 1)
+ * = if (d < 0.5 * ad)
+ * as * ad * s/as * 2 * d /ad
+ * else
+ * as * ad * (s/as + 2 * d / ad - 1 - s / as * (2 * d / ad - 1))
+ * = if (2 * d < ad)
+ * 2 * s * d
+ * else
+ * ad * s + 2 * as * d - as * ad - ad * s * (2 * d / ad - 1)
+ * = if (2 * d < ad)
+ * 2 * s * d
+ * else
+ * as * ad - 2 * (ad - d) * (as - s)
+ */
static force_inline float
blend_overlay (float sa, float s, float da, float d)
{
@@ -378,6 +434,13 @@ blend_overlay (float sa, float s, float da, float d)
return sa * da - 2 * (da - d) * (sa - s);
}
+/*
+ * Darken
+ *
+ * ad * as * B(d/ad, s/as)
+ * = ad * as * MIN(d/ad, s/as)
+ * = MIN (as * d, ad * s)
+ */
static force_inline float
blend_darken (float sa, float s, float da, float d)
{
@@ -390,6 +453,13 @@ blend_darken (float sa, float s, float da, float d)
return s;
}
+/*
+ * Lighten
+ *
+ * ad * as * B(d/ad, s/as)
+ * = ad * as * MAX(d/ad, s/as)
+ * = MAX (as * d, ad * s)
+ */
static force_inline float
blend_lighten (float sa, float s, float da, float d)
{
@@ -402,19 +472,57 @@ blend_lighten (float sa, float s, float da, float d)
return d;
}
+/*
+ * Color dodge
+ *
+ * ad * as * B(d/ad, s/as)
+ * = if d/ad = 0
+ * ad * as * 0
+ * else if (d/ad >= (1 - s/as)
+ * ad * as * 1
+ * else
+ * ad * as * ((d/ad) / (1 - s/as))
+ * = if d = 0
+ * 0
+ * elif as * d >= ad * (as - s)
+ * ad * as
+ * else
+ * as * (as * d / (as - s))
+ *
+ */
static force_inline float
blend_color_dodge (float sa, float s, float da, float d)
{
- if (IS_ZERO (d))
+ if (FLOAT_IS_ZERO (d))
return 0.0f;
else if (d * sa >= sa * da - s * da)
return sa * da;
- else if (IS_ZERO (sa - s))
+ else if (FLOAT_IS_ZERO (sa - s))
return sa * da;
else
return sa * sa * d / (sa - s);
}
+/*
+ * Color burn
+ *
+ * We modify the first clause "if d = 1" to "if d >= 1" since with
+ * premultiplied colors d > 1 can actually happen.
+ *
+ * ad * as * B(d/ad, s/as)
+ * = if d/ad >= 1
+ * ad * as * 1
+ * elif (1 - d/ad) >= s/as
+ * ad * as * 0
+ * else
+ * ad * as * (1 - ((1 - d/ad) / (s/as)))
+ * = if d >= ad
+ * ad * as
+ * elif as * ad - as * d >= ad * s
+ * 0
+ * else
+ * ad * as - as * as * (ad - d) / s
+ */
static force_inline float
blend_color_burn (float sa, float s, float da, float d)
{
@@ -422,12 +530,29 @@ blend_color_burn (float sa, float s, float da, float d)
return sa * da;
else if (sa * (da - d) >= s * da)
return 0.0f;
- else if (IS_ZERO (s))
+ else if (FLOAT_IS_ZERO (s))
return 0.0f;
else
return sa * (da - sa * (da - d) / s);
}
+/*
+ * Hard light
+ *
+ * ad * as * B(d/ad, s/as)
+ * = if (s/as <= 0.5)
+ * ad * as * Multiply (d/ad, 2 * s/as)
+ * else
+ * ad * as * Screen (d/ad, 2 * s/as - 1)
+ * = if 2 * s <= as
+ * ad * as * d/ad * 2 * s / as
+ * else
+ * ad * as * (d/ad + (2 * s/as - 1) + d/ad * (2 * s/as - 1))
+ * = if 2 * s <= as
+ * 2 * s * d
+ * else
+ * as * ad - 2 * (ad - d) * (as - s)
+ */
static force_inline float
blend_hard_light (float sa, float s, float da, float d)
{
@@ -437,21 +562,38 @@ blend_hard_light (float sa, float s, float da, float d)
return sa * da - 2 * (da - d) * (sa - s);
}
+/*
+ * Soft light
+ *
+ * ad * as * B(d/ad, s/as)
+ * = if (s/as <= 0.5)
+ * ad * as * (d/ad - (1 - 2 * s/as) * d/ad * (1 - d/ad))
+ * else if (d/ad <= 0.25)
+ * ad * as * (d/ad + (2 * s/as - 1) * ((((16 * d/ad - 12) * d/ad + 4) * d/ad) - d/ad))
+ * else
+ * ad * as * (d/ad + (2 * s/as - 1) * sqrt (d/ad))
+ * = if (2 * s <= as)
+ * d * as - d * (ad - d) * (as - 2 * s) / ad;
+ * else if (4 * d <= ad)
+ * (2 * s - as) * d * ((16 * d / ad - 12) * d / ad + 3);
+ * else
+ * d * as + (sqrt (d * ad) - d) * (2 * s - as);
+ */
static force_inline float
blend_soft_light (float sa, float s, float da, float d)
{
- if (2 * s < sa)
+ if (2 * s <= sa)
{
- if (IS_ZERO (da))
+ if (FLOAT_IS_ZERO (da))
return d * sa;
else
return d * sa - d * (da - d) * (sa - 2 * s) / da;
}
else
{
- if (IS_ZERO (da))
+ if (FLOAT_IS_ZERO (da))
{
- return 0.0f;
+ return d * sa;
}
else
{
@@ -463,6 +605,20 @@ blend_soft_light (float sa, float s, float da, float d)
}
}
+/*
+ * Difference
+ *
+ * ad * as * B(s/as, d/ad)
+ * = ad * as * abs (s/as - d/ad)
+ * = if (s/as <= d/ad)
+ * ad * as * (d/ad - s/as)
+ * else
+ * ad * as * (s/as - d/ad)
+ * = if (ad * s <= as * d)
+ * as * d - ad * s
+ * else
+ * ad * s - as * d
+ */
static force_inline float
blend_difference (float sa, float s, float da, float d)
{
@@ -475,6 +631,13 @@ blend_difference (float sa, float s, float da, float d)
return sda - dsa;
}
+/*
+ * Exclusion
+ *
+ * ad * as * B(s/as, d/ad)
+ * = ad * as * (d/ad + s/as - 2 * d/ad * s/as)
+ * = as * d + ad * s - 2 * s * d
+ */
static force_inline float
blend_exclusion (float sa, float s, float da, float d)
{
@@ -494,116 +657,79 @@ MAKE_SEPARABLE_PDF_COMBINERS (difference)
MAKE_SEPARABLE_PDF_COMBINERS (exclusion)
/*
- * PDF nonseperable blend modes.
- *
- * These are implemented using the following functions to operate in Hsl
- * space, with Cmax, Cmid, Cmin referring to the max, mid and min value
- * of the red, green and blue components.
+ * PDF nonseperable blend modes are implemented using the following functions
+ * to operate in Hsl space, with Cmax, Cmid, Cmin referring to the max, mid
+ * and min value of the red, green and blue components.
*
* LUM (C) = 0.3 × Cred + 0.59 × Cgreen + 0.11 × Cblue
*
* clip_color (C):
- * l = LUM (C)
- * min = Cmin
- * max = Cmax
- * if n < 0.0
- * C = l + (((C – l) × l) ⁄ (l – min))
- * if x > 1.0
- * C = l + (((C – l) × (1 – l)) (max – l))
- * return C
+ * l = LUM (C)
+ * min = Cmin
+ * max = Cmax
+ * if n < 0.0
+ * C = l + (((C – l) × l) ⁄ (l – min))
+ * if x > 1.0
+ * C = l + (((C – l) × (1 – l) ) ⁄ (max – l))
+ * return C
*
* set_lum (C, l):
- * d = l – LUM (C)
- * C += d
- * return clip_color (C)
+ * d = l – LUM (C)
+ * C += d
+ * return clip_color (C)
*
* SAT (C) = CH_MAX (C) - CH_MIN (C)
*
* set_sat (C, s):
- * if Cmax > Cmin
- * Cmid = ( ( ( Cmid – Cmin ) × s ) ⁄ ( Cmax – Cmin ) )
- * Cmax = s
- * else
- * Cmid = Cmax = 0.0
- * Cmin = 0.0
- * return C
+ * if Cmax > Cmin
+ * Cmid = ( ( ( Cmid – Cmin ) × s ) ⁄ ( Cmax – Cmin ) )
+ * Cmax = s
+ * else
+ * Cmid = Cmax = 0.0
+ * Cmin = 0.0
+ * return C
*/
/* For premultiplied colors, we need to know what happens when C is
* multiplied by a real number. LUM and SAT are linear:
*
- * LUM (r × C) = r × LUM (C) SAT (r × C) = r × SAT (C)
+ * LUM (r × C) = r × LUM (C) SAT (r * C) = r * SAT (C)
*
* If we extend clip_color with an extra argument a and change
*
- * if x >= 1.0
+ * if x >= 1.0
*
* into
*
- * if x >= a
+ * if x >= a
*
* then clip_color is also linear:
*
- * r * clip_color (C, a) = clip_color (r_c, ra);
+ * r * clip_color (C, a) = clip_color (r * C, r * a);
*
* for positive r.
*
* Similarly, we can extend set_lum with an extra argument that is just passed
* on to clip_color:
*
- * r × set_lum ( C, l, a)
+ * r * set_lum (C, l, a)
*
- * = r × clip_color ( C + l - LUM (C), a)
+ * = r × clip_color (C + l - LUM (C), a)
*
- * = clip_color ( r * C + r × l - LUM (r × C), r * a)
+ * = clip_color (r * C + r × l - r * LUM (C), r * a)
*
- * = set_lum ( r * C, r * l, r * a)
+ * = set_lum (r * C, r * l, r * a)
*
* Finally, set_sat:
*
- * r * set_sat (C, s) = set_sat (x * C, r * s)
+ * r * set_sat (C, s) = set_sat (x * C, r * s)
*
- * The above holds for all non-zero x because they x'es in the fraction for
+ * The above holds for all non-zero x, because the x'es in the fraction for
* C_mid cancel out. Specifically, it holds for x = r:
*
- * r * set_sat (C, s) = set_sat (r_c, rs)
- *
- *
- *
- *
- * So, for the non-separable PDF blend modes, we have (using s, d for
- * non-premultiplied colors, and S, D for premultiplied:
- *
- * Color:
- *
- * a_s * a_d * B(s, d)
- * = a_s * a_d * set_lum (S/a_s, LUM (D/a_d), 1)
- * = set_lum (S * a_d, a_s * LUM (D), a_s * a_d)
- *
- *
- * Luminosity:
- *
- * a_s * a_d * B(s, d)
- * = a_s * a_d * set_lum (D/a_d, LUM(S/a_s), 1)
- * = set_lum (a_s * D, a_d * LUM(S), a_s * a_d)
- *
- *
- * Saturation:
- *
- * a_s * a_d * B(s, d)
- * = a_s * a_d * set_lum (set_sat (D/a_d, SAT (S/a_s)), LUM (D/a_d), 1)
- * = set_lum (a_s * a_d * set_sat (D/a_d, SAT (S/a_s)),
- * a_s * LUM (D), a_s * a_d)
- * = set_lum (set_sat (a_s * D, a_d * SAT (S), a_s * LUM (D), a_s * a_d))
- *
- * Hue:
- *
- * a_s * a_d * B(s, d)
- * = a_s * a_d * set_lum (set_sat (S/a_s, SAT (D/a_d)), LUM (D/a_d), 1)
- * = set_lum (set_sat (a_d * S, a_s * SAT (D)), a_s * LUM (D), a_s * a_d)
+ * r * set_sat (C, s) = set_sat (r * C, r * s)
*
*/
-
typedef struct
{
float r;
@@ -658,7 +784,7 @@ clip_color (rgb_t *color, float a)
if (n < 0.0f)
{
t = l - n;
- if (IS_ZERO (t))
+ if (FLOAT_IS_ZERO (t))
{
color->r = 0.0f;
color->g = 0.0f;
@@ -674,7 +800,7 @@ clip_color (rgb_t *color, float a)
if (x > a)
{
t = x - l;
- if (IS_ZERO (t))
+ if (FLOAT_IS_ZERO (t))
{
color->r = a;
color->g = a;
@@ -758,7 +884,7 @@ set_sat (rgb_t *src, float sat)
t = *max - *min;
- if (IS_ZERO (t))
+ if (FLOAT_IS_ZERO (t))
{
*mid = *max = 0.0f;
}
@@ -771,9 +897,12 @@ set_sat (rgb_t *src, float sat)
*min = 0.0f;
}
-/*
- * Hue:
- * B(Cb, Cs) = set_lum (set_sat (Cs, SAT (Cb)), LUM (Cb))
+/* Hue:
+ *
+ * as * ad * B(s/as, d/as)
+ * = as * ad * set_lum (set_sat (s/as, SAT (d/ad)), LUM (d/ad), 1)
+ * = set_lum (set_sat (ad * s, as * SAT (d)), as * LUM (d), as * ad)
+ *
*/
static force_inline void
blend_hsl_hue (rgb_t *res,
@@ -788,9 +917,14 @@ blend_hsl_hue (rgb_t *res,
set_lum (res, sa * da, get_lum (dest) * sa);
}
-/*
- * Saturation:
- * B(Cb, Cs) = set_lum (set_sat (Cb, SAT (Cs)), LUM (Cb))
+/*
+ * Saturation
+ *
+ * as * ad * B(s/as, d/ad)
+ * = as * ad * set_lum (set_sat (d/ad, SAT (s/as)), LUM (d/ad), 1)
+ * = set_lum (as * ad * set_sat (d/ad, SAT (s/as)),
+ * as * LUM (d), as * ad)
+ * = set_lum (set_sat (as * d, ad * SAT (s), as * LUM (d), as * ad))
*/
static force_inline void
blend_hsl_saturation (rgb_t *res,
@@ -805,9 +939,12 @@ blend_hsl_saturation (rgb_t *res,
set_lum (res, sa * da, get_lum (dest) * sa);
}
-/*
- * Color:
- * B(Cb, Cs) = set_lum (Cs, LUM (Cb))
+/*
+ * Color
+ *
+ * as * ad * B(s/as, d/as)
+ * = as * ad * set_lum (s/as, LUM (d/ad), 1)
+ * = set_lum (s * ad, as * LUM (d), as * ad)
*/
static force_inline void
blend_hsl_color (rgb_t *res,
@@ -822,8 +959,11 @@ blend_hsl_color (rgb_t *res,
}
/*
- * Luminosity:
- * B(Cb, Cs) = set_lum (Cb, LUM (Cs))
+ * Luminosity
+ *
+ * as * ad * B(s/as, d/ad)
+ * = as * ad * set_lum (d/ad, LUM (s/as), 1)
+ * = set_lum (as * d, ad * LUM (s), as * ad)
*/
static force_inline void
blend_hsl_luminosity (rgb_t *res,
diff --git a/gfx/cairo/libpixman/src/pixman-combine.c.template b/gfx/cairo/libpixman/src/pixman-combine.c.template
deleted file mode 100644
index cd008d9673..0000000000
--- a/gfx/cairo/libpixman/src/pixman-combine.c.template
+++ /dev/null
@@ -1,2461 +0,0 @@
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-#include <string.h>
-
-#include "pixman-private.h"
-
-#include "pixman-combine.h"
-
-/*** per channel helper functions ***/
-
-static void
-combine_mask_ca (comp4_t *src, comp4_t *mask)
-{
- comp4_t a = *mask;
-
- comp4_t x;
- comp2_t xa;
-
- if (!a)
- {
- *(src) = 0;
- return;
- }
-
- x = *(src);
- if (a == ~0)
- {
- x = x >> A_SHIFT;
- x |= x << G_SHIFT;
- x |= x << R_SHIFT;
- *(mask) = x;
- return;
- }
-
- xa = x >> A_SHIFT;
- UNcx4_MUL_UNcx4 (x, a);
- *(src) = x;
-
- UNcx4_MUL_UNc (a, xa);
- *(mask) = a;
-}
-
-static void
-combine_mask_value_ca (comp4_t *src, const comp4_t *mask)
-{
- comp4_t a = *mask;
- comp4_t x;
-
- if (!a)
- {
- *(src) = 0;
- return;
- }
-
- if (a == ~0)
- return;
-
- x = *(src);
- UNcx4_MUL_UNcx4 (x, a);
- *(src) = x;
-}
-
-static void
-combine_mask_alpha_ca (const comp4_t *src, comp4_t *mask)
-{
- comp4_t a = *(mask);
- comp4_t x;
-
- if (!a)
- return;
-
- x = *(src) >> A_SHIFT;
- if (x == MASK)
- return;
-
- if (a == ~0)
- {
- x |= x << G_SHIFT;
- x |= x << R_SHIFT;
- *(mask) = x;
- return;
- }
-
- UNcx4_MUL_UNc (a, x);
- *(mask) = a;
-}
-
-/*
- * There are two ways of handling alpha -- either as a single unified value or
- * a separate value for each component, hence each macro must have two
- * versions. The unified alpha version has a 'U' at the end of the name,
- * the component version has a 'C'. Similarly, functions which deal with
- * this difference will have two versions using the same convention.
- */
-
-/*
- * All of the composing functions
- */
-
-static force_inline comp4_t
-combine_mask (const comp4_t *src, const comp4_t *mask, int i)
-{
- comp4_t s, m;
-
- if (mask)
- {
- m = *(mask + i) >> A_SHIFT;
-
- if (!m)
- return 0;
- }
-
- s = *(src + i);
-
- if (mask)
- UNcx4_MUL_UNc (s, m);
-
- return s;
-}
-
-static void
-combine_clear (pixman_implementation_t *imp,
- pixman_op_t op,
- comp4_t * dest,
- const comp4_t * src,
- const comp4_t * mask,
- int width)
-{
- memset (dest, 0, width * sizeof(comp4_t));
-}
-
-static void
-combine_dst (pixman_implementation_t *imp,
- pixman_op_t op,
- comp4_t * dest,
- const comp4_t * src,
- const comp4_t * mask,
- int width)
-{
- return;
-}
-
-static void
-combine_src_u (pixman_implementation_t *imp,
- pixman_op_t op,
- comp4_t * dest,
- const comp4_t * src,
- const comp4_t * mask,
- int width)
-{
- int i;
-
- if (!mask)
- memcpy (dest, src, width * sizeof (comp4_t));
- else
- {
- for (i = 0; i < width; ++i)
- {
- comp4_t s = combine_mask (src, mask, i);
-
- *(dest + i) = s;
- }
- }
-}
-
-/* if the Src is opaque, call combine_src_u */
-static void
-combine_over_u (pixman_implementation_t *imp,
- pixman_op_t op,
- comp4_t * dest,
- const comp4_t * src,
- const comp4_t * mask,
- int width)
-{
- int i;
-
- for (i = 0; i < width; ++i)
- {
- comp4_t s = combine_mask (src, mask, i);
- comp4_t d = *(dest + i);
- comp4_t ia = ALPHA_c (~s);
-
- UNcx4_MUL_UNc_ADD_UNcx4 (d, ia, s);
- *(dest + i) = d;
- }
-}
-
-/* if the Dst is opaque, this is a noop */
-static void
-combine_over_reverse_u (pixman_implementation_t *imp,
- pixman_op_t op,
- comp4_t * dest,
- const comp4_t * src,
- const comp4_t * mask,
- int width)
-{
- int i;
-
- for (i = 0; i < width; ++i)
- {
- comp4_t s = combine_mask (src, mask, i);
- comp4_t d = *(dest + i);
- comp4_t ia = ALPHA_c (~*(dest + i));
- UNcx4_MUL_UNc_ADD_UNcx4 (s, ia, d);
- *(dest + i) = s;
- }
-}
-
-/* if the Dst is opaque, call combine_src_u */
-static void
-combine_in_u (pixman_implementation_t *imp,
- pixman_op_t op,
- comp4_t * dest,
- const comp4_t * src,
- const comp4_t * mask,
- int width)
-{
- int i;
-
- for (i = 0; i < width; ++i)
- {
- comp4_t s = combine_mask (src, mask, i);
- comp4_t a = ALPHA_c (*(dest + i));
- UNcx4_MUL_UNc (s, a);
- *(dest + i) = s;
- }
-}
-
-/* if the Src is opaque, this is a noop */
-static void
-combine_in_reverse_u (pixman_implementation_t *imp,
- pixman_op_t op,
- comp4_t * dest,
- const comp4_t * src,
- const comp4_t * mask,
- int width)
-{
- int i;
-
- for (i = 0; i < width; ++i)
- {
- comp4_t s = combine_mask (src, mask, i);
- comp4_t d = *(dest + i);
- comp4_t a = ALPHA_c (s);
- UNcx4_MUL_UNc (d, a);
- *(dest + i) = d;
- }
-}
-
-/* if the Dst is opaque, call combine_clear */
-static void
-combine_out_u (pixman_implementation_t *imp,
- pixman_op_t op,
- comp4_t * dest,
- const comp4_t * src,
- const comp4_t * mask,
- int width)
-{
- int i;
-
- for (i = 0; i < width; ++i)
- {
- comp4_t s = combine_mask (src, mask, i);
- comp4_t a = ALPHA_c (~*(dest + i));
- UNcx4_MUL_UNc (s, a);
- *(dest + i) = s;
- }
-}
-
-/* if the Src is opaque, call combine_clear */
-static void
-combine_out_reverse_u (pixman_implementation_t *imp,
- pixman_op_t op,
- comp4_t * dest,
- const comp4_t * src,
- const comp4_t * mask,
- int width)
-{
- int i;
-
- for (i = 0; i < width; ++i)
- {
- comp4_t s = combine_mask (src, mask, i);
- comp4_t d = *(dest + i);
- comp4_t a = ALPHA_c (~s);
- UNcx4_MUL_UNc (d, a);
- *(dest + i) = d;
- }
-}
-
-/* if the Src is opaque, call combine_in_u */
-/* if the Dst is opaque, call combine_over_u */
-/* if both the Src and Dst are opaque, call combine_src_u */
-static void
-combine_atop_u (pixman_implementation_t *imp,
- pixman_op_t op,
- comp4_t * dest,
- const comp4_t * src,
- const comp4_t * mask,
- int width)
-{
- int i;
-
- for (i = 0; i < width; ++i)
- {
- comp4_t s = combine_mask (src, mask, i);
- comp4_t d = *(dest + i);
- comp4_t dest_a = ALPHA_c (d);
- comp4_t src_ia = ALPHA_c (~s);
-
- UNcx4_MUL_UNc_ADD_UNcx4_MUL_UNc (s, dest_a, d, src_ia);
- *(dest + i) = s;
- }
-}
-
-/* if the Src is opaque, call combine_over_reverse_u */
-/* if the Dst is opaque, call combine_in_reverse_u */
-/* if both the Src and Dst are opaque, call combine_dst_u */
-static void
-combine_atop_reverse_u (pixman_implementation_t *imp,
- pixman_op_t op,
- comp4_t * dest,
- const comp4_t * src,
- const comp4_t * mask,
- int width)
-{
- int i;
-
- for (i = 0; i < width; ++i)
- {
- comp4_t s = combine_mask (src, mask, i);
- comp4_t d = *(dest + i);
- comp4_t src_a = ALPHA_c (s);
- comp4_t dest_ia = ALPHA_c (~d);
-
- UNcx4_MUL_UNc_ADD_UNcx4_MUL_UNc (s, dest_ia, d, src_a);
- *(dest + i) = s;
- }
-}
-
-/* if the Src is opaque, call combine_over_u */
-/* if the Dst is opaque, call combine_over_reverse_u */
-/* if both the Src and Dst are opaque, call combine_clear */
-static void
-combine_xor_u (pixman_implementation_t *imp,
- pixman_op_t op,
- comp4_t * dest,
- const comp4_t * src,
- const comp4_t * mask,
- int width)
-{
- int i;
-
- for (i = 0; i < width; ++i)
- {
- comp4_t s = combine_mask (src, mask, i);
- comp4_t d = *(dest + i);
- comp4_t src_ia = ALPHA_c (~s);
- comp4_t dest_ia = ALPHA_c (~d);
-
- UNcx4_MUL_UNc_ADD_UNcx4_MUL_UNc (s, dest_ia, d, src_ia);
- *(dest + i) = s;
- }
-}
-
-static void
-combine_add_u (pixman_implementation_t *imp,
- pixman_op_t op,
- comp4_t * dest,
- const comp4_t * src,
- const comp4_t * mask,
- int width)
-{
- int i;
-
- for (i = 0; i < width; ++i)
- {
- comp4_t s = combine_mask (src, mask, i);
- comp4_t d = *(dest + i);
- UNcx4_ADD_UNcx4 (d, s);
- *(dest + i) = d;
- }
-}
-
-/* if the Src is opaque, call combine_add_u */
-/* if the Dst is opaque, call combine_add_u */
-/* if both the Src and Dst are opaque, call combine_add_u */
-static void
-combine_saturate_u (pixman_implementation_t *imp,
- pixman_op_t op,
- comp4_t * dest,
- const comp4_t * src,
- const comp4_t * mask,
- int width)
-{
- int i;
-
- for (i = 0; i < width; ++i)
- {
- comp4_t s = combine_mask (src, mask, i);
- comp4_t d = *(dest + i);
- comp2_t sa, da;
-
- sa = s >> A_SHIFT;
- da = ~d >> A_SHIFT;
- if (sa > da)
- {
- sa = DIV_UNc (da, sa);
- UNcx4_MUL_UNc (s, sa);
- }
- ;
- UNcx4_ADD_UNcx4 (d, s);
- *(dest + i) = d;
- }
-}
-
-/*
- * PDF blend modes:
- * The following blend modes have been taken from the PDF ISO 32000
- * specification, which at this point in time is available from
- * http://www.adobe.com/devnet/acrobat/pdfs/PDF32000_2008.pdf
- * The relevant chapters are 11.3.5 and 11.3.6.
- * The formula for computing the final pixel color given in 11.3.6 is:
- * αr × Cr = (1 – αs) × αb × Cb + (1 – αb) × αs × Cs + αb × αs × B(Cb, Cs)
- * with B() being the blend function.
- * Note that OVER is a special case of this operation, using B(Cb, Cs) = Cs
- *
- * These blend modes should match the SVG filter draft specification, as
- * it has been designed to mirror ISO 32000. Note that at the current point
- * no released draft exists that shows this, as the formulas have not been
- * updated yet after the release of ISO 32000.
- *
- * The default implementation here uses the PDF_SEPARABLE_BLEND_MODE and
- * PDF_NON_SEPARABLE_BLEND_MODE macros, which take the blend function as an
- * argument. Note that this implementation operates on premultiplied colors,
- * while the PDF specification does not. Therefore the code uses the formula
- * Cra = (1 – as) . Dca + (1 – ad) . Sca + B(Dca, ad, Sca, as)
- */
-
-/*
- * Multiply
- * B(Dca, ad, Sca, as) = Dca.Sca
- */
-
-static void
-combine_multiply_u (pixman_implementation_t *imp,
- pixman_op_t op,
- comp4_t * dest,
- const comp4_t * src,
- const comp4_t * mask,
- int width)
-{
- int i;
-
- for (i = 0; i < width; ++i)
- {
- comp4_t s = combine_mask (src, mask, i);
- comp4_t d = *(dest + i);
- comp4_t ss = s;
- comp4_t src_ia = ALPHA_c (~s);
- comp4_t dest_ia = ALPHA_c (~d);
-
- UNcx4_MUL_UNc_ADD_UNcx4_MUL_UNc (ss, dest_ia, d, src_ia);
- UNcx4_MUL_UNcx4 (d, s);
- UNcx4_ADD_UNcx4 (d, ss);
-
- *(dest + i) = d;
- }
-}
-
-static void
-combine_multiply_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- comp4_t * dest,
- const comp4_t * src,
- const comp4_t * mask,
- int width)
-{
- int i;
-
- for (i = 0; i < width; ++i)
- {
- comp4_t m = *(mask + i);
- comp4_t s = *(src + i);
- comp4_t d = *(dest + i);
- comp4_t r = d;
- comp4_t dest_ia = ALPHA_c (~d);
-
- combine_mask_value_ca (&s, &m);
-
- UNcx4_MUL_UNcx4_ADD_UNcx4_MUL_UNc (r, ~m, s, dest_ia);
- UNcx4_MUL_UNcx4 (d, s);
- UNcx4_ADD_UNcx4 (r, d);
-
- *(dest + i) = r;
- }
-}
-
-#define PDF_SEPARABLE_BLEND_MODE(name) \
- static void \
- combine_ ## name ## _u (pixman_implementation_t *imp, \
- pixman_op_t op, \
- comp4_t * dest, \
- const comp4_t * src, \
- const comp4_t * mask, \
- int width) \
- { \
- int i; \
- for (i = 0; i < width; ++i) { \
- comp4_t s = combine_mask (src, mask, i); \
- comp4_t d = *(dest + i); \
- comp1_t sa = ALPHA_c (s); \
- comp1_t isa = ~sa; \
- comp1_t da = ALPHA_c (d); \
- comp1_t ida = ~da; \
- comp4_t result; \
- \
- result = d; \
- UNcx4_MUL_UNc_ADD_UNcx4_MUL_UNc (result, isa, s, ida); \
- \
- *(dest + i) = result + \
- (DIV_ONE_UNc (sa * (comp4_t)da) << A_SHIFT) + \
- (blend_ ## name (RED_c (d), da, RED_c (s), sa) << R_SHIFT) + \
- (blend_ ## name (GREEN_c (d), da, GREEN_c (s), sa) << G_SHIFT) + \
- (blend_ ## name (BLUE_c (d), da, BLUE_c (s), sa)); \
- } \
- } \
- \
- static void \
- combine_ ## name ## _ca (pixman_implementation_t *imp, \
- pixman_op_t op, \
- comp4_t * dest, \
- const comp4_t * src, \
- const comp4_t * mask, \
- int width) \
- { \
- int i; \
- for (i = 0; i < width; ++i) { \
- comp4_t m = *(mask + i); \
- comp4_t s = *(src + i); \
- comp4_t d = *(dest + i); \
- comp1_t da = ALPHA_c (d); \
- comp1_t ida = ~da; \
- comp4_t result; \
- \
- combine_mask_value_ca (&s, &m); \
- \
- result = d; \
- UNcx4_MUL_UNcx4_ADD_UNcx4_MUL_UNc (result, ~m, s, ida); \
- \
- result += \
- (DIV_ONE_UNc (ALPHA_c (m) * (comp4_t)da) << A_SHIFT) + \
- (blend_ ## name (RED_c (d), da, RED_c (s), RED_c (m)) << R_SHIFT) + \
- (blend_ ## name (GREEN_c (d), da, GREEN_c (s), GREEN_c (m)) << G_SHIFT) + \
- (blend_ ## name (BLUE_c (d), da, BLUE_c (s), BLUE_c (m))); \
- \
- *(dest + i) = result; \
- } \
- }
-
-/*
- * Screen
- * B(Dca, ad, Sca, as) = Dca.sa + Sca.da - Dca.Sca
- */
-static inline comp4_t
-blend_screen (comp4_t dca, comp4_t da, comp4_t sca, comp4_t sa)
-{
- return DIV_ONE_UNc (sca * da + dca * sa - sca * dca);
-}
-
-PDF_SEPARABLE_BLEND_MODE (screen)
-
-/*
- * Overlay
- * B(Dca, Da, Sca, Sa) =
- * if 2.Dca < Da
- * 2.Sca.Dca
- * otherwise
- * Sa.Da - 2.(Da - Dca).(Sa - Sca)
- */
-static inline comp4_t
-blend_overlay (comp4_t dca, comp4_t da, comp4_t sca, comp4_t sa)
-{
- comp4_t rca;
-
- if (2 * dca < da)
- rca = 2 * sca * dca;
- else
- rca = sa * da - 2 * (da - dca) * (sa - sca);
- return DIV_ONE_UNc (rca);
-}
-
-PDF_SEPARABLE_BLEND_MODE (overlay)
-
-/*
- * Darken
- * B(Dca, Da, Sca, Sa) = min (Sca.Da, Dca.Sa)
- */
-static inline comp4_t
-blend_darken (comp4_t dca, comp4_t da, comp4_t sca, comp4_t sa)
-{
- comp4_t s, d;
-
- s = sca * da;
- d = dca * sa;
- return DIV_ONE_UNc (s > d ? d : s);
-}
-
-PDF_SEPARABLE_BLEND_MODE (darken)
-
-/*
- * Lighten
- * B(Dca, Da, Sca, Sa) = max (Sca.Da, Dca.Sa)
- */
-static inline comp4_t
-blend_lighten (comp4_t dca, comp4_t da, comp4_t sca, comp4_t sa)
-{
- comp4_t s, d;
-
- s = sca * da;
- d = dca * sa;
- return DIV_ONE_UNc (s > d ? s : d);
-}
-
-PDF_SEPARABLE_BLEND_MODE (lighten)
-
-/*
- * Color dodge
- * B(Dca, Da, Sca, Sa) =
- * if Dca == 0
- * 0
- * if Sca == Sa
- * Sa.Da
- * otherwise
- * Sa.Da. min (1, Dca / Da / (1 - Sca/Sa))
- */
-static inline comp4_t
-blend_color_dodge (comp4_t dca, comp4_t da, comp4_t sca, comp4_t sa)
-{
- if (sca >= sa)
- {
- return dca == 0 ? 0 : DIV_ONE_UNc (sa * da);
- }
- else
- {
- comp4_t rca = dca * sa / (sa - sca);
- return DIV_ONE_UNc (sa * MIN (rca, da));
- }
-}
-
-PDF_SEPARABLE_BLEND_MODE (color_dodge)
-
-/*
- * Color burn
- * B(Dca, Da, Sca, Sa) =
- * if Dca == Da
- * Sa.Da
- * if Sca == 0
- * 0
- * otherwise
- * Sa.Da.(1 - min (1, (1 - Dca/Da).Sa / Sca))
- */
-static inline comp4_t
-blend_color_burn (comp4_t dca, comp4_t da, comp4_t sca, comp4_t sa)
-{
- if (sca == 0)
- {
- return dca < da ? 0 : DIV_ONE_UNc (sa * da);
- }
- else
- {
- comp4_t rca = (da - dca) * sa / sca;
- return DIV_ONE_UNc (sa * (MAX (rca, da) - rca));
- }
-}
-
-PDF_SEPARABLE_BLEND_MODE (color_burn)
-
-/*
- * Hard light
- * B(Dca, Da, Sca, Sa) =
- * if 2.Sca < Sa
- * 2.Sca.Dca
- * otherwise
- * Sa.Da - 2.(Da - Dca).(Sa - Sca)
- */
-static inline comp4_t
-blend_hard_light (comp4_t dca, comp4_t da, comp4_t sca, comp4_t sa)
-{
- if (2 * sca < sa)
- return DIV_ONE_UNc (2 * sca * dca);
- else
- return DIV_ONE_UNc (sa * da - 2 * (da - dca) * (sa - sca));
-}
-
-PDF_SEPARABLE_BLEND_MODE (hard_light)
-
-/*
- * Soft light
- * B(Dca, Da, Sca, Sa) =
- * if (2.Sca <= Sa)
- * Dca.(Sa - (1 - Dca/Da).(2.Sca - Sa))
- * otherwise if Dca.4 <= Da
- * Dca.(Sa + (2.Sca - Sa).((16.Dca/Da - 12).Dca/Da + 3)
- * otherwise
- * (Dca.Sa + (SQRT (Dca/Da).Da - Dca).(2.Sca - Sa))
- */
-static inline comp4_t
-blend_soft_light (comp4_t dca_org,
- comp4_t da_org,
- comp4_t sca_org,
- comp4_t sa_org)
-{
- double dca = dca_org * (1.0 / MASK);
- double da = da_org * (1.0 / MASK);
- double sca = sca_org * (1.0 / MASK);
- double sa = sa_org * (1.0 / MASK);
- double rca;
-
- if (2 * sca < sa)
- {
- if (da == 0)
- rca = dca * sa;
- else
- rca = dca * sa - dca * (da - dca) * (sa - 2 * sca) / da;
- }
- else if (da == 0)
- {
- rca = 0;
- }
- else if (4 * dca <= da)
- {
- rca = dca * sa +
- (2 * sca - sa) * dca * ((16 * dca / da - 12) * dca / da + 3);
- }
- else
- {
- rca = dca * sa + (sqrt (dca * da) - dca) * (2 * sca - sa);
- }
- return rca * MASK + 0.5;
-}
-
-PDF_SEPARABLE_BLEND_MODE (soft_light)
-
-/*
- * Difference
- * B(Dca, Da, Sca, Sa) = abs (Dca.Sa - Sca.Da)
- */
-static inline comp4_t
-blend_difference (comp4_t dca, comp4_t da, comp4_t sca, comp4_t sa)
-{
- comp4_t dcasa = dca * sa;
- comp4_t scada = sca * da;
-
- if (scada < dcasa)
- return DIV_ONE_UNc (dcasa - scada);
- else
- return DIV_ONE_UNc (scada - dcasa);
-}
-
-PDF_SEPARABLE_BLEND_MODE (difference)
-
-/*
- * Exclusion
- * B(Dca, Da, Sca, Sa) = (Sca.Da + Dca.Sa - 2.Sca.Dca)
- */
-
-/* This can be made faster by writing it directly and not using
- * PDF_SEPARABLE_BLEND_MODE, but that's a performance optimization */
-
-static inline comp4_t
-blend_exclusion (comp4_t dca, comp4_t da, comp4_t sca, comp4_t sa)
-{
- return DIV_ONE_UNc (sca * da + dca * sa - 2 * dca * sca);
-}
-
-PDF_SEPARABLE_BLEND_MODE (exclusion)
-
-#undef PDF_SEPARABLE_BLEND_MODE
-
-/*
- * PDF nonseperable blend modes are implemented using the following functions
- * to operate in Hsl space, with Cmax, Cmid, Cmin referring to the max, mid
- * and min value of the red, green and blue components.
- *
- * LUM (C) = 0.3 × Cred + 0.59 × Cgreen + 0.11 × Cblue
- *
- * clip_color (C):
- * l = LUM (C)
- * min = Cmin
- * max = Cmax
- * if n < 0.0
- * C = l + ( ( ( C – l ) × l ) ⁄ ( l – min ) )
- * if x > 1.0
- * C = l + ( ( ( C – l ) × ( 1 – l ) ) ⁄ ( max – l ) )
- * return C
- *
- * set_lum (C, l):
- * d = l – LUM (C)
- * C += d
- * return clip_color (C)
- *
- * SAT (C) = CH_MAX (C) - CH_MIN (C)
- *
- * set_sat (C, s):
- * if Cmax > Cmin
- * Cmid = ( ( ( Cmid – Cmin ) × s ) ⁄ ( Cmax – Cmin ) )
- * Cmax = s
- * else
- * Cmid = Cmax = 0.0
- * Cmin = 0.0
- * return C
- */
-
-/* For premultiplied colors, we need to know what happens when C is
- * multiplied by a real number. LUM and SAT are linear:
- *
- * LUM (r × C) = r × LUM (C) SAT (r * C) = r * SAT (C)
- *
- * If we extend clip_color with an extra argument a and change
- *
- * if x >= 1.0
- *
- * into
- *
- * if x >= a
- *
- * then clip_color is also linear:
- *
- * r * clip_color (C, a) = clip_color (r_c, ra);
- *
- * for positive r.
- *
- * Similarly, we can extend set_lum with an extra argument that is just passed
- * on to clip_color:
- *
- * r * set_lum ( C, l, a)
- *
- * = r × clip_color ( C + l - LUM (C), a)
- *
- * = clip_color ( r * C + r × l - r * LUM (C), r * a)
- *
- * = set_lum ( r * C, r * l, r * a)
- *
- * Finally, set_sat:
- *
- * r * set_sat (C, s) = set_sat (x * C, r * s)
- *
- * The above holds for all non-zero x, because the x'es in the fraction for
- * C_mid cancel out. Specifically, it holds for x = r:
- *
- * r * set_sat (C, s) = set_sat (r_c, rs)
- *
- */
-
-/* So, for the non-separable PDF blend modes, we have (using s, d for
- * non-premultiplied colors, and S, D for premultiplied:
- *
- * Color:
- *
- * a_s * a_d * B(s, d)
- * = a_s * a_d * set_lum (S/a_s, LUM (D/a_d), 1)
- * = set_lum (S * a_d, a_s * LUM (D), a_s * a_d)
- *
- *
- * Luminosity:
- *
- * a_s * a_d * B(s, d)
- * = a_s * a_d * set_lum (D/a_d, LUM(S/a_s), 1)
- * = set_lum (a_s * D, a_d * LUM(S), a_s * a_d)
- *
- *
- * Saturation:
- *
- * a_s * a_d * B(s, d)
- * = a_s * a_d * set_lum (set_sat (D/a_d, SAT (S/a_s)), LUM (D/a_d), 1)
- * = set_lum (a_s * a_d * set_sat (D/a_d, SAT (S/a_s)),
- * a_s * LUM (D), a_s * a_d)
- * = set_lum (set_sat (a_s * D, a_d * SAT (S), a_s * LUM (D), a_s * a_d))
- *
- * Hue:
- *
- * a_s * a_d * B(s, d)
- * = a_s * a_d * set_lum (set_sat (S/a_s, SAT (D/a_d)), LUM (D/a_d), 1)
- * = set_lum (set_sat (a_d * S, a_s * SAT (D)), a_s * LUM (D), a_s * a_d)
- *
- */
-
-#define CH_MIN(c) (c[0] < c[1] ? (c[0] < c[2] ? c[0] : c[2]) : (c[1] < c[2] ? c[1] : c[2]))
-#define CH_MAX(c) (c[0] > c[1] ? (c[0] > c[2] ? c[0] : c[2]) : (c[1] > c[2] ? c[1] : c[2]))
-#define LUM(c) ((c[0] * 30 + c[1] * 59 + c[2] * 11) / 100)
-#define SAT(c) (CH_MAX (c) - CH_MIN (c))
-
-#define PDF_NON_SEPARABLE_BLEND_MODE(name) \
- static void \
- combine_ ## name ## _u (pixman_implementation_t *imp, \
- pixman_op_t op, \
- comp4_t *dest, \
- const comp4_t *src, \
- const comp4_t *mask, \
- int width) \
- { \
- int i; \
- for (i = 0; i < width; ++i) \
- { \
- comp4_t s = combine_mask (src, mask, i); \
- comp4_t d = *(dest + i); \
- comp1_t sa = ALPHA_c (s); \
- comp1_t isa = ~sa; \
- comp1_t da = ALPHA_c (d); \
- comp1_t ida = ~da; \
- comp4_t result; \
- comp4_t sc[3], dc[3], c[3]; \
- \
- result = d; \
- UNcx4_MUL_UNc_ADD_UNcx4_MUL_UNc (result, isa, s, ida); \
- dc[0] = RED_c (d); \
- sc[0] = RED_c (s); \
- dc[1] = GREEN_c (d); \
- sc[1] = GREEN_c (s); \
- dc[2] = BLUE_c (d); \
- sc[2] = BLUE_c (s); \
- blend_ ## name (c, dc, da, sc, sa); \
- \
- *(dest + i) = result + \
- (DIV_ONE_UNc (sa * (comp4_t)da) << A_SHIFT) + \
- (DIV_ONE_UNc (c[0]) << R_SHIFT) + \
- (DIV_ONE_UNc (c[1]) << G_SHIFT) + \
- (DIV_ONE_UNc (c[2])); \
- } \
- }
-
-static void
-set_lum (comp4_t dest[3], comp4_t src[3], comp4_t sa, comp4_t lum)
-{
- double a, l, min, max;
- double tmp[3];
-
- a = sa * (1.0 / MASK);
-
- l = lum * (1.0 / MASK);
- tmp[0] = src[0] * (1.0 / MASK);
- tmp[1] = src[1] * (1.0 / MASK);
- tmp[2] = src[2] * (1.0 / MASK);
-
- l = l - LUM (tmp);
- tmp[0] += l;
- tmp[1] += l;
- tmp[2] += l;
-
- /* clip_color */
- l = LUM (tmp);
- min = CH_MIN (tmp);
- max = CH_MAX (tmp);
-
- if (min < 0)
- {
- if (l - min == 0.0)
- {
- tmp[0] = 0;
- tmp[1] = 0;
- tmp[2] = 0;
- }
- else
- {
- tmp[0] = l + (tmp[0] - l) * l / (l - min);
- tmp[1] = l + (tmp[1] - l) * l / (l - min);
- tmp[2] = l + (tmp[2] - l) * l / (l - min);
- }
- }
- if (max > a)
- {
- if (max - l == 0.0)
- {
- tmp[0] = a;
- tmp[1] = a;
- tmp[2] = a;
- }
- else
- {
- tmp[0] = l + (tmp[0] - l) * (a - l) / (max - l);
- tmp[1] = l + (tmp[1] - l) * (a - l) / (max - l);
- tmp[2] = l + (tmp[2] - l) * (a - l) / (max - l);
- }
- }
-
- dest[0] = tmp[0] * MASK + 0.5;
- dest[1] = tmp[1] * MASK + 0.5;
- dest[2] = tmp[2] * MASK + 0.5;
-}
-
-static void
-set_sat (comp4_t dest[3], comp4_t src[3], comp4_t sat)
-{
- int id[3];
- comp4_t min, max;
-
- if (src[0] > src[1])
- {
- if (src[0] > src[2])
- {
- id[0] = 0;
- if (src[1] > src[2])
- {
- id[1] = 1;
- id[2] = 2;
- }
- else
- {
- id[1] = 2;
- id[2] = 1;
- }
- }
- else
- {
- id[0] = 2;
- id[1] = 0;
- id[2] = 1;
- }
- }
- else
- {
- if (src[0] > src[2])
- {
- id[0] = 1;
- id[1] = 0;
- id[2] = 2;
- }
- else
- {
- id[2] = 0;
- if (src[1] > src[2])
- {
- id[0] = 1;
- id[1] = 2;
- }
- else
- {
- id[0] = 2;
- id[1] = 1;
- }
- }
- }
-
- max = dest[id[0]];
- min = dest[id[2]];
- if (max > min)
- {
- dest[id[1]] = (dest[id[1]] - min) * sat / (max - min);
- dest[id[0]] = sat;
- dest[id[2]] = 0;
- }
- else
- {
- dest[0] = dest[1] = dest[2] = 0;
- }
-}
-
-/*
- * Hue:
- * B(Cb, Cs) = set_lum (set_sat (Cs, SAT (Cb)), LUM (Cb))
- */
-static inline void
-blend_hsl_hue (comp4_t c[3],
- comp4_t dc[3],
- comp4_t da,
- comp4_t sc[3],
- comp4_t sa)
-{
- c[0] = sc[0] * da;
- c[1] = sc[1] * da;
- c[2] = sc[2] * da;
- set_sat (c, c, SAT (dc) * sa);
- set_lum (c, c, sa * da, LUM (dc) * sa);
-}
-
-PDF_NON_SEPARABLE_BLEND_MODE (hsl_hue)
-
-/*
- * Saturation:
- * B(Cb, Cs) = set_lum (set_sat (Cb, SAT (Cs)), LUM (Cb))
- */
-static inline void
-blend_hsl_saturation (comp4_t c[3],
- comp4_t dc[3],
- comp4_t da,
- comp4_t sc[3],
- comp4_t sa)
-{
- c[0] = dc[0] * sa;
- c[1] = dc[1] * sa;
- c[2] = dc[2] * sa;
- set_sat (c, c, SAT (sc) * da);
- set_lum (c, c, sa * da, LUM (dc) * sa);
-}
-
-PDF_NON_SEPARABLE_BLEND_MODE (hsl_saturation)
-
-/*
- * Color:
- * B(Cb, Cs) = set_lum (Cs, LUM (Cb))
- */
-static inline void
-blend_hsl_color (comp4_t c[3],
- comp4_t dc[3],
- comp4_t da,
- comp4_t sc[3],
- comp4_t sa)
-{
- c[0] = sc[0] * da;
- c[1] = sc[1] * da;
- c[2] = sc[2] * da;
- set_lum (c, c, sa * da, LUM (dc) * sa);
-}
-
-PDF_NON_SEPARABLE_BLEND_MODE (hsl_color)
-
-/*
- * Luminosity:
- * B(Cb, Cs) = set_lum (Cb, LUM (Cs))
- */
-static inline void
-blend_hsl_luminosity (comp4_t c[3],
- comp4_t dc[3],
- comp4_t da,
- comp4_t sc[3],
- comp4_t sa)
-{
- c[0] = dc[0] * sa;
- c[1] = dc[1] * sa;
- c[2] = dc[2] * sa;
- set_lum (c, c, sa * da, LUM (sc) * da);
-}
-
-PDF_NON_SEPARABLE_BLEND_MODE (hsl_luminosity)
-
-#undef SAT
-#undef LUM
-#undef CH_MAX
-#undef CH_MIN
-#undef PDF_NON_SEPARABLE_BLEND_MODE
-
-/* All of the disjoint/conjoint composing functions
- *
- * The four entries in the first column indicate what source contributions
- * come from each of the four areas of the picture -- areas covered by neither
- * A nor B, areas covered only by A, areas covered only by B and finally
- * areas covered by both A and B.
- *
- * Disjoint Conjoint
- * Fa Fb Fa Fb
- * (0,0,0,0) 0 0 0 0
- * (0,A,0,A) 1 0 1 0
- * (0,0,B,B) 0 1 0 1
- * (0,A,B,A) 1 min((1-a)/b,1) 1 max(1-a/b,0)
- * (0,A,B,B) min((1-b)/a,1) 1 max(1-b/a,0) 1
- * (0,0,0,A) max(1-(1-b)/a,0) 0 min(1,b/a) 0
- * (0,0,0,B) 0 max(1-(1-a)/b,0) 0 min(a/b,1)
- * (0,A,0,0) min(1,(1-b)/a) 0 max(1-b/a,0) 0
- * (0,0,B,0) 0 min(1,(1-a)/b) 0 max(1-a/b,0)
- * (0,0,B,A) max(1-(1-b)/a,0) min(1,(1-a)/b) min(1,b/a) max(1-a/b,0)
- * (0,A,0,B) min(1,(1-b)/a) max(1-(1-a)/b,0) max(1-b/a,0) min(1,a/b)
- * (0,A,B,0) min(1,(1-b)/a) min(1,(1-a)/b) max(1-b/a,0) max(1-a/b,0)
- *
- * See http://marc.info/?l=xfree-render&m=99792000027857&w=2 for more
- * information about these operators.
- */
-
-#define COMBINE_A_OUT 1
-#define COMBINE_A_IN 2
-#define COMBINE_B_OUT 4
-#define COMBINE_B_IN 8
-
-#define COMBINE_CLEAR 0
-#define COMBINE_A (COMBINE_A_OUT | COMBINE_A_IN)
-#define COMBINE_B (COMBINE_B_OUT | COMBINE_B_IN)
-#define COMBINE_A_OVER (COMBINE_A_OUT | COMBINE_B_OUT | COMBINE_A_IN)
-#define COMBINE_B_OVER (COMBINE_A_OUT | COMBINE_B_OUT | COMBINE_B_IN)
-#define COMBINE_A_ATOP (COMBINE_B_OUT | COMBINE_A_IN)
-#define COMBINE_B_ATOP (COMBINE_A_OUT | COMBINE_B_IN)
-#define COMBINE_XOR (COMBINE_A_OUT | COMBINE_B_OUT)
-
-/* portion covered by a but not b */
-static comp1_t
-combine_disjoint_out_part (comp1_t a, comp1_t b)
-{
- /* min (1, (1-b) / a) */
-
- b = ~b; /* 1 - b */
- if (b >= a) /* 1 - b >= a -> (1-b)/a >= 1 */
- return MASK; /* 1 */
- return DIV_UNc (b, a); /* (1-b) / a */
-}
-
-/* portion covered by both a and b */
-static comp1_t
-combine_disjoint_in_part (comp1_t a, comp1_t b)
-{
- /* max (1-(1-b)/a,0) */
- /* = - min ((1-b)/a - 1, 0) */
- /* = 1 - min (1, (1-b)/a) */
-
- b = ~b; /* 1 - b */
- if (b >= a) /* 1 - b >= a -> (1-b)/a >= 1 */
- return 0; /* 1 - 1 */
- return ~DIV_UNc(b, a); /* 1 - (1-b) / a */
-}
-
-/* portion covered by a but not b */
-static comp1_t
-combine_conjoint_out_part (comp1_t a, comp1_t b)
-{
- /* max (1-b/a,0) */
- /* = 1-min(b/a,1) */
-
- /* min (1, (1-b) / a) */
-
- if (b >= a) /* b >= a -> b/a >= 1 */
- return 0x00; /* 0 */
- return ~DIV_UNc(b, a); /* 1 - b/a */
-}
-
-/* portion covered by both a and b */
-static comp1_t
-combine_conjoint_in_part (comp1_t a, comp1_t b)
-{
- /* min (1,b/a) */
-
- if (b >= a) /* b >= a -> b/a >= 1 */
- return MASK; /* 1 */
- return DIV_UNc (b, a); /* b/a */
-}
-
-#define GET_COMP(v, i) ((comp2_t) (comp1_t) ((v) >> i))
-
-#define ADD(x, y, i, t) \
- ((t) = GET_COMP (x, i) + GET_COMP (y, i), \
- (comp4_t) ((comp1_t) ((t) | (0 - ((t) >> G_SHIFT)))) << (i))
-
-#define GENERIC(x, y, i, ax, ay, t, u, v) \
- ((t) = (MUL_UNc (GET_COMP (y, i), ay, (u)) + \
- MUL_UNc (GET_COMP (x, i), ax, (v))), \
- (comp4_t) ((comp1_t) ((t) | \
- (0 - ((t) >> G_SHIFT)))) << (i))
-
-static void
-combine_disjoint_general_u (comp4_t * dest,
- const comp4_t *src,
- const comp4_t *mask,
- int width,
- comp1_t combine)
-{
- int i;
-
- for (i = 0; i < width; ++i)
- {
- comp4_t s = combine_mask (src, mask, i);
- comp4_t d = *(dest + i);
- comp4_t m, n, o, p;
- comp2_t Fa, Fb, t, u, v;
- comp1_t sa = s >> A_SHIFT;
- comp1_t da = d >> A_SHIFT;
-
- switch (combine & COMBINE_A)
- {
- default:
- Fa = 0;
- break;
-
- case COMBINE_A_OUT:
- Fa = combine_disjoint_out_part (sa, da);
- break;
-
- case COMBINE_A_IN:
- Fa = combine_disjoint_in_part (sa, da);
- break;
-
- case COMBINE_A:
- Fa = MASK;
- break;
- }
-
- switch (combine & COMBINE_B)
- {
- default:
- Fb = 0;
- break;
-
- case COMBINE_B_OUT:
- Fb = combine_disjoint_out_part (da, sa);
- break;
-
- case COMBINE_B_IN:
- Fb = combine_disjoint_in_part (da, sa);
- break;
-
- case COMBINE_B:
- Fb = MASK;
- break;
- }
- m = GENERIC (s, d, 0, Fa, Fb, t, u, v);
- n = GENERIC (s, d, G_SHIFT, Fa, Fb, t, u, v);
- o = GENERIC (s, d, R_SHIFT, Fa, Fb, t, u, v);
- p = GENERIC (s, d, A_SHIFT, Fa, Fb, t, u, v);
- s = m | n | o | p;
- *(dest + i) = s;
- }
-}
-
-static void
-combine_disjoint_over_u (pixman_implementation_t *imp,
- pixman_op_t op,
- comp4_t * dest,
- const comp4_t * src,
- const comp4_t * mask,
- int width)
-{
- int i;
-
- for (i = 0; i < width; ++i)
- {
- comp4_t s = combine_mask (src, mask, i);
- comp2_t a = s >> A_SHIFT;
-
- if (s != 0x00)
- {
- comp4_t d = *(dest + i);
- a = combine_disjoint_out_part (d >> A_SHIFT, a);
- UNcx4_MUL_UNc_ADD_UNcx4 (d, a, s);
-
- *(dest + i) = d;
- }
- }
-}
-
-static void
-combine_disjoint_in_u (pixman_implementation_t *imp,
- pixman_op_t op,
- comp4_t * dest,
- const comp4_t * src,
- const comp4_t * mask,
- int width)
-{
- combine_disjoint_general_u (dest, src, mask, width, COMBINE_A_IN);
-}
-
-static void
-combine_disjoint_in_reverse_u (pixman_implementation_t *imp,
- pixman_op_t op,
- comp4_t * dest,
- const comp4_t * src,
- const comp4_t * mask,
- int width)
-{
- combine_disjoint_general_u (dest, src, mask, width, COMBINE_B_IN);
-}
-
-static void
-combine_disjoint_out_u (pixman_implementation_t *imp,
- pixman_op_t op,
- comp4_t * dest,
- const comp4_t * src,
- const comp4_t * mask,
- int width)
-{
- combine_disjoint_general_u (dest, src, mask, width, COMBINE_A_OUT);
-}
-
-static void
-combine_disjoint_out_reverse_u (pixman_implementation_t *imp,
- pixman_op_t op,
- comp4_t * dest,
- const comp4_t * src,
- const comp4_t * mask,
- int width)
-{
- combine_disjoint_general_u (dest, src, mask, width, COMBINE_B_OUT);
-}
-
-static void
-combine_disjoint_atop_u (pixman_implementation_t *imp,
- pixman_op_t op,
- comp4_t * dest,
- const comp4_t * src,
- const comp4_t * mask,
- int width)
-{
- combine_disjoint_general_u (dest, src, mask, width, COMBINE_A_ATOP);
-}
-
-static void
-combine_disjoint_atop_reverse_u (pixman_implementation_t *imp,
- pixman_op_t op,
- comp4_t * dest,
- const comp4_t * src,
- const comp4_t * mask,
- int width)
-{
- combine_disjoint_general_u (dest, src, mask, width, COMBINE_B_ATOP);
-}
-
-static void
-combine_disjoint_xor_u (pixman_implementation_t *imp,
- pixman_op_t op,
- comp4_t * dest,
- const comp4_t * src,
- const comp4_t * mask,
- int width)
-{
- combine_disjoint_general_u (dest, src, mask, width, COMBINE_XOR);
-}
-
-static void
-combine_conjoint_general_u (comp4_t * dest,
- const comp4_t *src,
- const comp4_t *mask,
- int width,
- comp1_t combine)
-{
- int i;
-
- for (i = 0; i < width; ++i)
- {
- comp4_t s = combine_mask (src, mask, i);
- comp4_t d = *(dest + i);
- comp4_t m, n, o, p;
- comp2_t Fa, Fb, t, u, v;
- comp1_t sa = s >> A_SHIFT;
- comp1_t da = d >> A_SHIFT;
-
- switch (combine & COMBINE_A)
- {
- default:
- Fa = 0;
- break;
-
- case COMBINE_A_OUT:
- Fa = combine_conjoint_out_part (sa, da);
- break;
-
- case COMBINE_A_IN:
- Fa = combine_conjoint_in_part (sa, da);
- break;
-
- case COMBINE_A:
- Fa = MASK;
- break;
- }
-
- switch (combine & COMBINE_B)
- {
- default:
- Fb = 0;
- break;
-
- case COMBINE_B_OUT:
- Fb = combine_conjoint_out_part (da, sa);
- break;
-
- case COMBINE_B_IN:
- Fb = combine_conjoint_in_part (da, sa);
- break;
-
- case COMBINE_B:
- Fb = MASK;
- break;
- }
-
- m = GENERIC (s, d, 0, Fa, Fb, t, u, v);
- n = GENERIC (s, d, G_SHIFT, Fa, Fb, t, u, v);
- o = GENERIC (s, d, R_SHIFT, Fa, Fb, t, u, v);
- p = GENERIC (s, d, A_SHIFT, Fa, Fb, t, u, v);
-
- s = m | n | o | p;
-
- *(dest + i) = s;
- }
-}
-
-static void
-combine_conjoint_over_u (pixman_implementation_t *imp,
- pixman_op_t op,
- comp4_t * dest,
- const comp4_t * src,
- const comp4_t * mask,
- int width)
-{
- combine_conjoint_general_u (dest, src, mask, width, COMBINE_A_OVER);
-}
-
-static void
-combine_conjoint_over_reverse_u (pixman_implementation_t *imp,
- pixman_op_t op,
- comp4_t * dest,
- const comp4_t * src,
- const comp4_t * mask,
- int width)
-{
- combine_conjoint_general_u (dest, src, mask, width, COMBINE_B_OVER);
-}
-
-static void
-combine_conjoint_in_u (pixman_implementation_t *imp,
- pixman_op_t op,
- comp4_t * dest,
- const comp4_t * src,
- const comp4_t * mask,
- int width)
-{
- combine_conjoint_general_u (dest, src, mask, width, COMBINE_A_IN);
-}
-
-static void
-combine_conjoint_in_reverse_u (pixman_implementation_t *imp,
- pixman_op_t op,
- comp4_t * dest,
- const comp4_t * src,
- const comp4_t * mask,
- int width)
-{
- combine_conjoint_general_u (dest, src, mask, width, COMBINE_B_IN);
-}
-
-static void
-combine_conjoint_out_u (pixman_implementation_t *imp,
- pixman_op_t op,
- comp4_t * dest,
- const comp4_t * src,
- const comp4_t * mask,
- int width)
-{
- combine_conjoint_general_u (dest, src, mask, width, COMBINE_A_OUT);
-}
-
-static void
-combine_conjoint_out_reverse_u (pixman_implementation_t *imp,
- pixman_op_t op,
- comp4_t * dest,
- const comp4_t * src,
- const comp4_t * mask,
- int width)
-{
- combine_conjoint_general_u (dest, src, mask, width, COMBINE_B_OUT);
-}
-
-static void
-combine_conjoint_atop_u (pixman_implementation_t *imp,
- pixman_op_t op,
- comp4_t * dest,
- const comp4_t * src,
- const comp4_t * mask,
- int width)
-{
- combine_conjoint_general_u (dest, src, mask, width, COMBINE_A_ATOP);
-}
-
-static void
-combine_conjoint_atop_reverse_u (pixman_implementation_t *imp,
- pixman_op_t op,
- comp4_t * dest,
- const comp4_t * src,
- const comp4_t * mask,
- int width)
-{
- combine_conjoint_general_u (dest, src, mask, width, COMBINE_B_ATOP);
-}
-
-static void
-combine_conjoint_xor_u (pixman_implementation_t *imp,
- pixman_op_t op,
- comp4_t * dest,
- const comp4_t * src,
- const comp4_t * mask,
- int width)
-{
- combine_conjoint_general_u (dest, src, mask, width, COMBINE_XOR);
-}
-
-/************************************************************************/
-/*********************** Per Channel functions **************************/
-/************************************************************************/
-
-static void
-combine_clear_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- comp4_t * dest,
- const comp4_t * src,
- const comp4_t * mask,
- int width)
-{
- memset (dest, 0, width * sizeof(comp4_t));
-}
-
-static void
-combine_src_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- comp4_t * dest,
- const comp4_t * src,
- const comp4_t * mask,
- int width)
-{
- int i;
-
- for (i = 0; i < width; ++i)
- {
- comp4_t s = *(src + i);
- comp4_t m = *(mask + i);
-
- combine_mask_value_ca (&s, &m);
-
- *(dest + i) = s;
- }
-}
-
-static void
-combine_over_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- comp4_t * dest,
- const comp4_t * src,
- const comp4_t * mask,
- int width)
-{
- int i;
-
- for (i = 0; i < width; ++i)
- {
- comp4_t s = *(src + i);
- comp4_t m = *(mask + i);
- comp4_t a;
-
- combine_mask_ca (&s, &m);
-
- a = ~m;
- if (a)
- {
- comp4_t d = *(dest + i);
- UNcx4_MUL_UNcx4_ADD_UNcx4 (d, a, s);
- s = d;
- }
-
- *(dest + i) = s;
- }
-}
-
-static void
-combine_over_reverse_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- comp4_t * dest,
- const comp4_t * src,
- const comp4_t * mask,
- int width)
-{
- int i;
-
- for (i = 0; i < width; ++i)
- {
- comp4_t d = *(dest + i);
- comp4_t a = ~d >> A_SHIFT;
-
- if (a)
- {
- comp4_t s = *(src + i);
- comp4_t m = *(mask + i);
-
- UNcx4_MUL_UNcx4 (s, m);
- UNcx4_MUL_UNc_ADD_UNcx4 (s, a, d);
-
- *(dest + i) = s;
- }
- }
-}
-
-static void
-combine_in_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- comp4_t * dest,
- const comp4_t * src,
- const comp4_t * mask,
- int width)
-{
- int i;
-
- for (i = 0; i < width; ++i)
- {
- comp4_t d = *(dest + i);
- comp2_t a = d >> A_SHIFT;
- comp4_t s = 0;
-
- if (a)
- {
- comp4_t m = *(mask + i);
-
- s = *(src + i);
- combine_mask_value_ca (&s, &m);
-
- if (a != MASK)
- UNcx4_MUL_UNc (s, a);
- }
-
- *(dest + i) = s;
- }
-}
-
-static void
-combine_in_reverse_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- comp4_t * dest,
- const comp4_t * src,
- const comp4_t * mask,
- int width)
-{
- int i;
-
- for (i = 0; i < width; ++i)
- {
- comp4_t s = *(src + i);
- comp4_t m = *(mask + i);
- comp4_t a;
-
- combine_mask_alpha_ca (&s, &m);
-
- a = m;
- if (a != ~0)
- {
- comp4_t d = 0;
-
- if (a)
- {
- d = *(dest + i);
- UNcx4_MUL_UNcx4 (d, a);
- }
-
- *(dest + i) = d;
- }
- }
-}
-
-static void
-combine_out_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- comp4_t * dest,
- const comp4_t * src,
- const comp4_t * mask,
- int width)
-{
- int i;
-
- for (i = 0; i < width; ++i)
- {
- comp4_t d = *(dest + i);
- comp2_t a = ~d >> A_SHIFT;
- comp4_t s = 0;
-
- if (a)
- {
- comp4_t m = *(mask + i);
-
- s = *(src + i);
- combine_mask_value_ca (&s, &m);
-
- if (a != MASK)
- UNcx4_MUL_UNc (s, a);
- }
-
- *(dest + i) = s;
- }
-}
-
-static void
-combine_out_reverse_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- comp4_t * dest,
- const comp4_t * src,
- const comp4_t * mask,
- int width)
-{
- int i;
-
- for (i = 0; i < width; ++i)
- {
- comp4_t s = *(src + i);
- comp4_t m = *(mask + i);
- comp4_t a;
-
- combine_mask_alpha_ca (&s, &m);
-
- a = ~m;
- if (a != ~0)
- {
- comp4_t d = 0;
-
- if (a)
- {
- d = *(dest + i);
- UNcx4_MUL_UNcx4 (d, a);
- }
-
- *(dest + i) = d;
- }
- }
-}
-
-static void
-combine_atop_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- comp4_t * dest,
- const comp4_t * src,
- const comp4_t * mask,
- int width)
-{
- int i;
-
- for (i = 0; i < width; ++i)
- {
- comp4_t d = *(dest + i);
- comp4_t s = *(src + i);
- comp4_t m = *(mask + i);
- comp4_t ad;
- comp2_t as = d >> A_SHIFT;
-
- combine_mask_ca (&s, &m);
-
- ad = ~m;
-
- UNcx4_MUL_UNcx4_ADD_UNcx4_MUL_UNc (d, ad, s, as);
-
- *(dest + i) = d;
- }
-}
-
-static void
-combine_atop_reverse_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- comp4_t * dest,
- const comp4_t * src,
- const comp4_t * mask,
- int width)
-{
- int i;
-
- for (i = 0; i < width; ++i)
- {
- comp4_t d = *(dest + i);
- comp4_t s = *(src + i);
- comp4_t m = *(mask + i);
- comp4_t ad;
- comp2_t as = ~d >> A_SHIFT;
-
- combine_mask_ca (&s, &m);
-
- ad = m;
-
- UNcx4_MUL_UNcx4_ADD_UNcx4_MUL_UNc (d, ad, s, as);
-
- *(dest + i) = d;
- }
-}
-
-static void
-combine_xor_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- comp4_t * dest,
- const comp4_t * src,
- const comp4_t * mask,
- int width)
-{
- int i;
-
- for (i = 0; i < width; ++i)
- {
- comp4_t d = *(dest + i);
- comp4_t s = *(src + i);
- comp4_t m = *(mask + i);
- comp4_t ad;
- comp2_t as = ~d >> A_SHIFT;
-
- combine_mask_ca (&s, &m);
-
- ad = ~m;
-
- UNcx4_MUL_UNcx4_ADD_UNcx4_MUL_UNc (d, ad, s, as);
-
- *(dest + i) = d;
- }
-}
-
-static void
-combine_add_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- comp4_t * dest,
- const comp4_t * src,
- const comp4_t * mask,
- int width)
-{
- int i;
-
- for (i = 0; i < width; ++i)
- {
- comp4_t s = *(src + i);
- comp4_t m = *(mask + i);
- comp4_t d = *(dest + i);
-
- combine_mask_value_ca (&s, &m);
-
- UNcx4_ADD_UNcx4 (d, s);
-
- *(dest + i) = d;
- }
-}
-
-static void
-combine_saturate_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- comp4_t * dest,
- const comp4_t * src,
- const comp4_t * mask,
- int width)
-{
- int i;
-
- for (i = 0; i < width; ++i)
- {
- comp4_t s, d;
- comp2_t sa, sr, sg, sb, da;
- comp2_t t, u, v;
- comp4_t m, n, o, p;
-
- d = *(dest + i);
- s = *(src + i);
- m = *(mask + i);
-
- combine_mask_ca (&s, &m);
-
- sa = (m >> A_SHIFT);
- sr = (m >> R_SHIFT) & MASK;
- sg = (m >> G_SHIFT) & MASK;
- sb = m & MASK;
- da = ~d >> A_SHIFT;
-
- if (sb <= da)
- m = ADD (s, d, 0, t);
- else
- m = GENERIC (s, d, 0, (da << G_SHIFT) / sb, MASK, t, u, v);
-
- if (sg <= da)
- n = ADD (s, d, G_SHIFT, t);
- else
- n = GENERIC (s, d, G_SHIFT, (da << G_SHIFT) / sg, MASK, t, u, v);
-
- if (sr <= da)
- o = ADD (s, d, R_SHIFT, t);
- else
- o = GENERIC (s, d, R_SHIFT, (da << G_SHIFT) / sr, MASK, t, u, v);
-
- if (sa <= da)
- p = ADD (s, d, A_SHIFT, t);
- else
- p = GENERIC (s, d, A_SHIFT, (da << G_SHIFT) / sa, MASK, t, u, v);
-
- *(dest + i) = m | n | o | p;
- }
-}
-
-static void
-combine_disjoint_general_ca (comp4_t * dest,
- const comp4_t *src,
- const comp4_t *mask,
- int width,
- comp1_t combine)
-{
- int i;
-
- for (i = 0; i < width; ++i)
- {
- comp4_t s, d;
- comp4_t m, n, o, p;
- comp4_t Fa, Fb;
- comp2_t t, u, v;
- comp4_t sa;
- comp1_t da;
-
- s = *(src + i);
- m = *(mask + i);
- d = *(dest + i);
- da = d >> A_SHIFT;
-
- combine_mask_ca (&s, &m);
-
- sa = m;
-
- switch (combine & COMBINE_A)
- {
- default:
- Fa = 0;
- break;
-
- case COMBINE_A_OUT:
- m = (comp4_t)combine_disjoint_out_part ((comp1_t) (sa >> 0), da);
- n = (comp4_t)combine_disjoint_out_part ((comp1_t) (sa >> G_SHIFT), da) << G_SHIFT;
- o = (comp4_t)combine_disjoint_out_part ((comp1_t) (sa >> R_SHIFT), da) << R_SHIFT;
- p = (comp4_t)combine_disjoint_out_part ((comp1_t) (sa >> A_SHIFT), da) << A_SHIFT;
- Fa = m | n | o | p;
- break;
-
- case COMBINE_A_IN:
- m = (comp4_t)combine_disjoint_in_part ((comp1_t) (sa >> 0), da);
- n = (comp4_t)combine_disjoint_in_part ((comp1_t) (sa >> G_SHIFT), da) << G_SHIFT;
- o = (comp4_t)combine_disjoint_in_part ((comp1_t) (sa >> R_SHIFT), da) << R_SHIFT;
- p = (comp4_t)combine_disjoint_in_part ((comp1_t) (sa >> A_SHIFT), da) << A_SHIFT;
- Fa = m | n | o | p;
- break;
-
- case COMBINE_A:
- Fa = ~0;
- break;
- }
-
- switch (combine & COMBINE_B)
- {
- default:
- Fb = 0;
- break;
-
- case COMBINE_B_OUT:
- m = (comp4_t)combine_disjoint_out_part (da, (comp1_t) (sa >> 0));
- n = (comp4_t)combine_disjoint_out_part (da, (comp1_t) (sa >> G_SHIFT)) << G_SHIFT;
- o = (comp4_t)combine_disjoint_out_part (da, (comp1_t) (sa >> R_SHIFT)) << R_SHIFT;
- p = (comp4_t)combine_disjoint_out_part (da, (comp1_t) (sa >> A_SHIFT)) << A_SHIFT;
- Fb = m | n | o | p;
- break;
-
- case COMBINE_B_IN:
- m = (comp4_t)combine_disjoint_in_part (da, (comp1_t) (sa >> 0));
- n = (comp4_t)combine_disjoint_in_part (da, (comp1_t) (sa >> G_SHIFT)) << G_SHIFT;
- o = (comp4_t)combine_disjoint_in_part (da, (comp1_t) (sa >> R_SHIFT)) << R_SHIFT;
- p = (comp4_t)combine_disjoint_in_part (da, (comp1_t) (sa >> A_SHIFT)) << A_SHIFT;
- Fb = m | n | o | p;
- break;
-
- case COMBINE_B:
- Fb = ~0;
- break;
- }
- m = GENERIC (s, d, 0, GET_COMP (Fa, 0), GET_COMP (Fb, 0), t, u, v);
- n = GENERIC (s, d, G_SHIFT, GET_COMP (Fa, G_SHIFT), GET_COMP (Fb, G_SHIFT), t, u, v);
- o = GENERIC (s, d, R_SHIFT, GET_COMP (Fa, R_SHIFT), GET_COMP (Fb, R_SHIFT), t, u, v);
- p = GENERIC (s, d, A_SHIFT, GET_COMP (Fa, A_SHIFT), GET_COMP (Fb, A_SHIFT), t, u, v);
-
- s = m | n | o | p;
-
- *(dest + i) = s;
- }
-}
-
-static void
-combine_disjoint_over_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- comp4_t * dest,
- const comp4_t * src,
- const comp4_t * mask,
- int width)
-{
- combine_disjoint_general_ca (dest, src, mask, width, COMBINE_A_OVER);
-}
-
-static void
-combine_disjoint_in_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- comp4_t * dest,
- const comp4_t * src,
- const comp4_t * mask,
- int width)
-{
- combine_disjoint_general_ca (dest, src, mask, width, COMBINE_A_IN);
-}
-
-static void
-combine_disjoint_in_reverse_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- comp4_t * dest,
- const comp4_t * src,
- const comp4_t * mask,
- int width)
-{
- combine_disjoint_general_ca (dest, src, mask, width, COMBINE_B_IN);
-}
-
-static void
-combine_disjoint_out_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- comp4_t * dest,
- const comp4_t * src,
- const comp4_t * mask,
- int width)
-{
- combine_disjoint_general_ca (dest, src, mask, width, COMBINE_A_OUT);
-}
-
-static void
-combine_disjoint_out_reverse_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- comp4_t * dest,
- const comp4_t * src,
- const comp4_t * mask,
- int width)
-{
- combine_disjoint_general_ca (dest, src, mask, width, COMBINE_B_OUT);
-}
-
-static void
-combine_disjoint_atop_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- comp4_t * dest,
- const comp4_t * src,
- const comp4_t * mask,
- int width)
-{
- combine_disjoint_general_ca (dest, src, mask, width, COMBINE_A_ATOP);
-}
-
-static void
-combine_disjoint_atop_reverse_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- comp4_t * dest,
- const comp4_t * src,
- const comp4_t * mask,
- int width)
-{
- combine_disjoint_general_ca (dest, src, mask, width, COMBINE_B_ATOP);
-}
-
-static void
-combine_disjoint_xor_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- comp4_t * dest,
- const comp4_t * src,
- const comp4_t * mask,
- int width)
-{
- combine_disjoint_general_ca (dest, src, mask, width, COMBINE_XOR);
-}
-
-static void
-combine_conjoint_general_ca (comp4_t * dest,
- const comp4_t *src,
- const comp4_t *mask,
- int width,
- comp1_t combine)
-{
- int i;
-
- for (i = 0; i < width; ++i)
- {
- comp4_t s, d;
- comp4_t m, n, o, p;
- comp4_t Fa, Fb;
- comp2_t t, u, v;
- comp4_t sa;
- comp1_t da;
-
- s = *(src + i);
- m = *(mask + i);
- d = *(dest + i);
- da = d >> A_SHIFT;
-
- combine_mask_ca (&s, &m);
-
- sa = m;
-
- switch (combine & COMBINE_A)
- {
- default:
- Fa = 0;
- break;
-
- case COMBINE_A_OUT:
- m = (comp4_t)combine_conjoint_out_part ((comp1_t) (sa >> 0), da);
- n = (comp4_t)combine_conjoint_out_part ((comp1_t) (sa >> G_SHIFT), da) << G_SHIFT;
- o = (comp4_t)combine_conjoint_out_part ((comp1_t) (sa >> R_SHIFT), da) << R_SHIFT;
- p = (comp4_t)combine_conjoint_out_part ((comp1_t) (sa >> A_SHIFT), da) << A_SHIFT;
- Fa = m | n | o | p;
- break;
-
- case COMBINE_A_IN:
- m = (comp4_t)combine_conjoint_in_part ((comp1_t) (sa >> 0), da);
- n = (comp4_t)combine_conjoint_in_part ((comp1_t) (sa >> G_SHIFT), da) << G_SHIFT;
- o = (comp4_t)combine_conjoint_in_part ((comp1_t) (sa >> R_SHIFT), da) << R_SHIFT;
- p = (comp4_t)combine_conjoint_in_part ((comp1_t) (sa >> A_SHIFT), da) << A_SHIFT;
- Fa = m | n | o | p;
- break;
-
- case COMBINE_A:
- Fa = ~0;
- break;
- }
-
- switch (combine & COMBINE_B)
- {
- default:
- Fb = 0;
- break;
-
- case COMBINE_B_OUT:
- m = (comp4_t)combine_conjoint_out_part (da, (comp1_t) (sa >> 0));
- n = (comp4_t)combine_conjoint_out_part (da, (comp1_t) (sa >> G_SHIFT)) << G_SHIFT;
- o = (comp4_t)combine_conjoint_out_part (da, (comp1_t) (sa >> R_SHIFT)) << R_SHIFT;
- p = (comp4_t)combine_conjoint_out_part (da, (comp1_t) (sa >> A_SHIFT)) << A_SHIFT;
- Fb = m | n | o | p;
- break;
-
- case COMBINE_B_IN:
- m = (comp4_t)combine_conjoint_in_part (da, (comp1_t) (sa >> 0));
- n = (comp4_t)combine_conjoint_in_part (da, (comp1_t) (sa >> G_SHIFT)) << G_SHIFT;
- o = (comp4_t)combine_conjoint_in_part (da, (comp1_t) (sa >> R_SHIFT)) << R_SHIFT;
- p = (comp4_t)combine_conjoint_in_part (da, (comp1_t) (sa >> A_SHIFT)) << A_SHIFT;
- Fb = m | n | o | p;
- break;
-
- case COMBINE_B:
- Fb = ~0;
- break;
- }
- m = GENERIC (s, d, 0, GET_COMP (Fa, 0), GET_COMP (Fb, 0), t, u, v);
- n = GENERIC (s, d, G_SHIFT, GET_COMP (Fa, G_SHIFT), GET_COMP (Fb, G_SHIFT), t, u, v);
- o = GENERIC (s, d, R_SHIFT, GET_COMP (Fa, R_SHIFT), GET_COMP (Fb, R_SHIFT), t, u, v);
- p = GENERIC (s, d, A_SHIFT, GET_COMP (Fa, A_SHIFT), GET_COMP (Fb, A_SHIFT), t, u, v);
-
- s = m | n | o | p;
-
- *(dest + i) = s;
- }
-}
-
-static void
-combine_conjoint_over_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- comp4_t * dest,
- const comp4_t * src,
- const comp4_t * mask,
- int width)
-{
- combine_conjoint_general_ca (dest, src, mask, width, COMBINE_A_OVER);
-}
-
-static void
-combine_conjoint_over_reverse_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- comp4_t * dest,
- const comp4_t * src,
- const comp4_t * mask,
- int width)
-{
- combine_conjoint_general_ca (dest, src, mask, width, COMBINE_B_OVER);
-}
-
-static void
-combine_conjoint_in_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- comp4_t * dest,
- const comp4_t * src,
- const comp4_t * mask,
- int width)
-{
- combine_conjoint_general_ca (dest, src, mask, width, COMBINE_A_IN);
-}
-
-static void
-combine_conjoint_in_reverse_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- comp4_t * dest,
- const comp4_t * src,
- const comp4_t * mask,
- int width)
-{
- combine_conjoint_general_ca (dest, src, mask, width, COMBINE_B_IN);
-}
-
-static void
-combine_conjoint_out_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- comp4_t * dest,
- const comp4_t * src,
- const comp4_t * mask,
- int width)
-{
- combine_conjoint_general_ca (dest, src, mask, width, COMBINE_A_OUT);
-}
-
-static void
-combine_conjoint_out_reverse_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- comp4_t * dest,
- const comp4_t * src,
- const comp4_t * mask,
- int width)
-{
- combine_conjoint_general_ca (dest, src, mask, width, COMBINE_B_OUT);
-}
-
-static void
-combine_conjoint_atop_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- comp4_t * dest,
- const comp4_t * src,
- const comp4_t * mask,
- int width)
-{
- combine_conjoint_general_ca (dest, src, mask, width, COMBINE_A_ATOP);
-}
-
-static void
-combine_conjoint_atop_reverse_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- comp4_t * dest,
- const comp4_t * src,
- const comp4_t * mask,
- int width)
-{
- combine_conjoint_general_ca (dest, src, mask, width, COMBINE_B_ATOP);
-}
-
-static void
-combine_conjoint_xor_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- comp4_t * dest,
- const comp4_t * src,
- const comp4_t * mask,
- int width)
-{
- combine_conjoint_general_ca (dest, src, mask, width, COMBINE_XOR);
-}
-
-void
-_pixman_setup_combiner_functions_width (pixman_implementation_t *imp)
-{
- /* Unified alpha */
- imp->combine_width[PIXMAN_OP_CLEAR] = combine_clear;
- imp->combine_width[PIXMAN_OP_SRC] = combine_src_u;
- imp->combine_width[PIXMAN_OP_DST] = combine_dst;
- imp->combine_width[PIXMAN_OP_OVER] = combine_over_u;
- imp->combine_width[PIXMAN_OP_OVER_REVERSE] = combine_over_reverse_u;
- imp->combine_width[PIXMAN_OP_IN] = combine_in_u;
- imp->combine_width[PIXMAN_OP_IN_REVERSE] = combine_in_reverse_u;
- imp->combine_width[PIXMAN_OP_OUT] = combine_out_u;
- imp->combine_width[PIXMAN_OP_OUT_REVERSE] = combine_out_reverse_u;
- imp->combine_width[PIXMAN_OP_ATOP] = combine_atop_u;
- imp->combine_width[PIXMAN_OP_ATOP_REVERSE] = combine_atop_reverse_u;
- imp->combine_width[PIXMAN_OP_XOR] = combine_xor_u;
- imp->combine_width[PIXMAN_OP_ADD] = combine_add_u;
- imp->combine_width[PIXMAN_OP_SATURATE] = combine_saturate_u;
-
- /* Disjoint, unified */
- imp->combine_width[PIXMAN_OP_DISJOINT_CLEAR] = combine_clear;
- imp->combine_width[PIXMAN_OP_DISJOINT_SRC] = combine_src_u;
- imp->combine_width[PIXMAN_OP_DISJOINT_DST] = combine_dst;
- imp->combine_width[PIXMAN_OP_DISJOINT_OVER] = combine_disjoint_over_u;
- imp->combine_width[PIXMAN_OP_DISJOINT_OVER_REVERSE] = combine_saturate_u;
- imp->combine_width[PIXMAN_OP_DISJOINT_IN] = combine_disjoint_in_u;
- imp->combine_width[PIXMAN_OP_DISJOINT_IN_REVERSE] = combine_disjoint_in_reverse_u;
- imp->combine_width[PIXMAN_OP_DISJOINT_OUT] = combine_disjoint_out_u;
- imp->combine_width[PIXMAN_OP_DISJOINT_OUT_REVERSE] = combine_disjoint_out_reverse_u;
- imp->combine_width[PIXMAN_OP_DISJOINT_ATOP] = combine_disjoint_atop_u;
- imp->combine_width[PIXMAN_OP_DISJOINT_ATOP_REVERSE] = combine_disjoint_atop_reverse_u;
- imp->combine_width[PIXMAN_OP_DISJOINT_XOR] = combine_disjoint_xor_u;
-
- /* Conjoint, unified */
- imp->combine_width[PIXMAN_OP_CONJOINT_CLEAR] = combine_clear;
- imp->combine_width[PIXMAN_OP_CONJOINT_SRC] = combine_src_u;
- imp->combine_width[PIXMAN_OP_CONJOINT_DST] = combine_dst;
- imp->combine_width[PIXMAN_OP_CONJOINT_OVER] = combine_conjoint_over_u;
- imp->combine_width[PIXMAN_OP_CONJOINT_OVER_REVERSE] = combine_conjoint_over_reverse_u;
- imp->combine_width[PIXMAN_OP_CONJOINT_IN] = combine_conjoint_in_u;
- imp->combine_width[PIXMAN_OP_CONJOINT_IN_REVERSE] = combine_conjoint_in_reverse_u;
- imp->combine_width[PIXMAN_OP_CONJOINT_OUT] = combine_conjoint_out_u;
- imp->combine_width[PIXMAN_OP_CONJOINT_OUT_REVERSE] = combine_conjoint_out_reverse_u;
- imp->combine_width[PIXMAN_OP_CONJOINT_ATOP] = combine_conjoint_atop_u;
- imp->combine_width[PIXMAN_OP_CONJOINT_ATOP_REVERSE] = combine_conjoint_atop_reverse_u;
- imp->combine_width[PIXMAN_OP_CONJOINT_XOR] = combine_conjoint_xor_u;
-
- imp->combine_width[PIXMAN_OP_MULTIPLY] = combine_multiply_u;
- imp->combine_width[PIXMAN_OP_SCREEN] = combine_screen_u;
- imp->combine_width[PIXMAN_OP_OVERLAY] = combine_overlay_u;
- imp->combine_width[PIXMAN_OP_DARKEN] = combine_darken_u;
- imp->combine_width[PIXMAN_OP_LIGHTEN] = combine_lighten_u;
- imp->combine_width[PIXMAN_OP_COLOR_DODGE] = combine_color_dodge_u;
- imp->combine_width[PIXMAN_OP_COLOR_BURN] = combine_color_burn_u;
- imp->combine_width[PIXMAN_OP_HARD_LIGHT] = combine_hard_light_u;
- imp->combine_width[PIXMAN_OP_SOFT_LIGHT] = combine_soft_light_u;
- imp->combine_width[PIXMAN_OP_DIFFERENCE] = combine_difference_u;
- imp->combine_width[PIXMAN_OP_EXCLUSION] = combine_exclusion_u;
- imp->combine_width[PIXMAN_OP_HSL_HUE] = combine_hsl_hue_u;
- imp->combine_width[PIXMAN_OP_HSL_SATURATION] = combine_hsl_saturation_u;
- imp->combine_width[PIXMAN_OP_HSL_COLOR] = combine_hsl_color_u;
- imp->combine_width[PIXMAN_OP_HSL_LUMINOSITY] = combine_hsl_luminosity_u;
-
- /* Component alpha combiners */
- imp->combine_width_ca[PIXMAN_OP_CLEAR] = combine_clear_ca;
- imp->combine_width_ca[PIXMAN_OP_SRC] = combine_src_ca;
- /* dest */
- imp->combine_width_ca[PIXMAN_OP_OVER] = combine_over_ca;
- imp->combine_width_ca[PIXMAN_OP_OVER_REVERSE] = combine_over_reverse_ca;
- imp->combine_width_ca[PIXMAN_OP_IN] = combine_in_ca;
- imp->combine_width_ca[PIXMAN_OP_IN_REVERSE] = combine_in_reverse_ca;
- imp->combine_width_ca[PIXMAN_OP_OUT] = combine_out_ca;
- imp->combine_width_ca[PIXMAN_OP_OUT_REVERSE] = combine_out_reverse_ca;
- imp->combine_width_ca[PIXMAN_OP_ATOP] = combine_atop_ca;
- imp->combine_width_ca[PIXMAN_OP_ATOP_REVERSE] = combine_atop_reverse_ca;
- imp->combine_width_ca[PIXMAN_OP_XOR] = combine_xor_ca;
- imp->combine_width_ca[PIXMAN_OP_ADD] = combine_add_ca;
- imp->combine_width_ca[PIXMAN_OP_SATURATE] = combine_saturate_ca;
-
- /* Disjoint CA */
- imp->combine_width_ca[PIXMAN_OP_DISJOINT_CLEAR] = combine_clear_ca;
- imp->combine_width_ca[PIXMAN_OP_DISJOINT_SRC] = combine_src_ca;
- imp->combine_width_ca[PIXMAN_OP_DISJOINT_DST] = combine_dst;
- imp->combine_width_ca[PIXMAN_OP_DISJOINT_OVER] = combine_disjoint_over_ca;
- imp->combine_width_ca[PIXMAN_OP_DISJOINT_OVER_REVERSE] = combine_saturate_ca;
- imp->combine_width_ca[PIXMAN_OP_DISJOINT_IN] = combine_disjoint_in_ca;
- imp->combine_width_ca[PIXMAN_OP_DISJOINT_IN_REVERSE] = combine_disjoint_in_reverse_ca;
- imp->combine_width_ca[PIXMAN_OP_DISJOINT_OUT] = combine_disjoint_out_ca;
- imp->combine_width_ca[PIXMAN_OP_DISJOINT_OUT_REVERSE] = combine_disjoint_out_reverse_ca;
- imp->combine_width_ca[PIXMAN_OP_DISJOINT_ATOP] = combine_disjoint_atop_ca;
- imp->combine_width_ca[PIXMAN_OP_DISJOINT_ATOP_REVERSE] = combine_disjoint_atop_reverse_ca;
- imp->combine_width_ca[PIXMAN_OP_DISJOINT_XOR] = combine_disjoint_xor_ca;
-
- /* Conjoint CA */
- imp->combine_width_ca[PIXMAN_OP_CONJOINT_CLEAR] = combine_clear_ca;
- imp->combine_width_ca[PIXMAN_OP_CONJOINT_SRC] = combine_src_ca;
- imp->combine_width_ca[PIXMAN_OP_CONJOINT_DST] = combine_dst;
- imp->combine_width_ca[PIXMAN_OP_CONJOINT_OVER] = combine_conjoint_over_ca;
- imp->combine_width_ca[PIXMAN_OP_CONJOINT_OVER_REVERSE] = combine_conjoint_over_reverse_ca;
- imp->combine_width_ca[PIXMAN_OP_CONJOINT_IN] = combine_conjoint_in_ca;
- imp->combine_width_ca[PIXMAN_OP_CONJOINT_IN_REVERSE] = combine_conjoint_in_reverse_ca;
- imp->combine_width_ca[PIXMAN_OP_CONJOINT_OUT] = combine_conjoint_out_ca;
- imp->combine_width_ca[PIXMAN_OP_CONJOINT_OUT_REVERSE] = combine_conjoint_out_reverse_ca;
- imp->combine_width_ca[PIXMAN_OP_CONJOINT_ATOP] = combine_conjoint_atop_ca;
- imp->combine_width_ca[PIXMAN_OP_CONJOINT_ATOP_REVERSE] = combine_conjoint_atop_reverse_ca;
- imp->combine_width_ca[PIXMAN_OP_CONJOINT_XOR] = combine_conjoint_xor_ca;
-
- imp->combine_width_ca[PIXMAN_OP_MULTIPLY] = combine_multiply_ca;
- imp->combine_width_ca[PIXMAN_OP_SCREEN] = combine_screen_ca;
- imp->combine_width_ca[PIXMAN_OP_OVERLAY] = combine_overlay_ca;
- imp->combine_width_ca[PIXMAN_OP_DARKEN] = combine_darken_ca;
- imp->combine_width_ca[PIXMAN_OP_LIGHTEN] = combine_lighten_ca;
- imp->combine_width_ca[PIXMAN_OP_COLOR_DODGE] = combine_color_dodge_ca;
- imp->combine_width_ca[PIXMAN_OP_COLOR_BURN] = combine_color_burn_ca;
- imp->combine_width_ca[PIXMAN_OP_HARD_LIGHT] = combine_hard_light_ca;
- imp->combine_width_ca[PIXMAN_OP_SOFT_LIGHT] = combine_soft_light_ca;
- imp->combine_width_ca[PIXMAN_OP_DIFFERENCE] = combine_difference_ca;
- imp->combine_width_ca[PIXMAN_OP_EXCLUSION] = combine_exclusion_ca;
-
- /* It is not clear that these make sense, so make them noops for now */
- imp->combine_width_ca[PIXMAN_OP_HSL_HUE] = combine_dst;
- imp->combine_width_ca[PIXMAN_OP_HSL_SATURATION] = combine_dst;
- imp->combine_width_ca[PIXMAN_OP_HSL_COLOR] = combine_dst;
- imp->combine_width_ca[PIXMAN_OP_HSL_LUMINOSITY] = combine_dst;
-}
-
diff --git a/gfx/cairo/libpixman/src/pixman-combine.h.template b/gfx/cairo/libpixman/src/pixman-combine.h.template
deleted file mode 100644
index 20f784b5b4..0000000000
--- a/gfx/cairo/libpixman/src/pixman-combine.h.template
+++ /dev/null
@@ -1,226 +0,0 @@
-
-#define COMPONENT_SIZE
-#define MASK
-#define ONE_HALF
-
-#define A_SHIFT
-#define R_SHIFT
-#define G_SHIFT
-#define A_MASK
-#define R_MASK
-#define G_MASK
-
-#define RB_MASK
-#define AG_MASK
-#define RB_ONE_HALF
-#define RB_MASK_PLUS_ONE
-
-#define ALPHA_c(x) ((x) >> A_SHIFT)
-#define RED_c(x) (((x) >> R_SHIFT) & MASK)
-#define GREEN_c(x) (((x) >> G_SHIFT) & MASK)
-#define BLUE_c(x) ((x) & MASK)
-
-/*
- * Helper macros.
- */
-
-#define MUL_UNc(a, b, t) \
- ((t) = (a) * (comp2_t)(b) + ONE_HALF, ((((t) >> G_SHIFT ) + (t) ) >> G_SHIFT ))
-
-#define DIV_UNc(a, b) \
- (((comp2_t) (a) * MASK + ((b) / 2)) / (b))
-
-#define ADD_UNc(x, y, t) \
- ((t) = (x) + (y), \
- (comp4_t) (comp1_t) ((t) | (0 - ((t) >> G_SHIFT))))
-
-#define DIV_ONE_UNc(x) \
- (((x) + ONE_HALF + (((x) + ONE_HALF) >> G_SHIFT)) >> G_SHIFT)
-
-/*
- * The methods below use some tricks to be able to do two color
- * components at the same time.
- */
-
-/*
- * x_rb = (x_rb * a) / 255
- */
-#define UNc_rb_MUL_UNc(x, a, t) \
- do \
- { \
- t = ((x) & RB_MASK) * (a); \
- t += RB_ONE_HALF; \
- x = (t + ((t >> G_SHIFT) & RB_MASK)) >> G_SHIFT; \
- x &= RB_MASK; \
- } while (0)
-
-/*
- * x_rb = min (x_rb + y_rb, 255)
- */
-#define UNc_rb_ADD_UNc_rb(x, y, t) \
- do \
- { \
- t = ((x) + (y)); \
- t |= RB_MASK_PLUS_ONE - ((t >> G_SHIFT) & RB_MASK); \
- x = (t & RB_MASK); \
- } while (0)
-
-/*
- * x_rb = (x_rb * a_rb) / 255
- */
-#define UNc_rb_MUL_UNc_rb(x, a, t) \
- do \
- { \
- t = (x & MASK) * (a & MASK); \
- t |= (x & R_MASK) * ((a >> R_SHIFT) & MASK); \
- t += RB_ONE_HALF; \
- t = (t + ((t >> G_SHIFT) & RB_MASK)) >> G_SHIFT; \
- x = t & RB_MASK; \
- } while (0)
-
-/*
- * x_c = (x_c * a) / 255
- */
-#define UNcx4_MUL_UNc(x, a) \
- do \
- { \
- comp4_t r1__, r2__, t__; \
- \
- r1__ = (x); \
- UNc_rb_MUL_UNc (r1__, (a), t__); \
- \
- r2__ = (x) >> G_SHIFT; \
- UNc_rb_MUL_UNc (r2__, (a), t__); \
- \
- (x) = r1__ | (r2__ << G_SHIFT); \
- } while (0)
-
-/*
- * x_c = (x_c * a) / 255 + y_c
- */
-#define UNcx4_MUL_UNc_ADD_UNcx4(x, a, y) \
- do \
- { \
- comp4_t r1__, r2__, r3__, t__; \
- \
- r1__ = (x); \
- r2__ = (y) & RB_MASK; \
- UNc_rb_MUL_UNc (r1__, (a), t__); \
- UNc_rb_ADD_UNc_rb (r1__, r2__, t__); \
- \
- r2__ = (x) >> G_SHIFT; \
- r3__ = ((y) >> G_SHIFT) & RB_MASK; \
- UNc_rb_MUL_UNc (r2__, (a), t__); \
- UNc_rb_ADD_UNc_rb (r2__, r3__, t__); \
- \
- (x) = r1__ | (r2__ << G_SHIFT); \
- } while (0)
-
-/*
- * x_c = (x_c * a + y_c * b) / 255
- */
-#define UNcx4_MUL_UNc_ADD_UNcx4_MUL_UNc(x, a, y, b) \
- do \
- { \
- comp4_t r1__, r2__, r3__, t__; \
- \
- r1__ = (x); \
- r2__ = (y); \
- UNc_rb_MUL_UNc (r1__, (a), t__); \
- UNc_rb_MUL_UNc (r2__, (b), t__); \
- UNc_rb_ADD_UNc_rb (r1__, r2__, t__); \
- \
- r2__ = ((x) >> G_SHIFT); \
- r3__ = ((y) >> G_SHIFT); \
- UNc_rb_MUL_UNc (r2__, (a), t__); \
- UNc_rb_MUL_UNc (r3__, (b), t__); \
- UNc_rb_ADD_UNc_rb (r2__, r3__, t__); \
- \
- (x) = r1__ | (r2__ << G_SHIFT); \
- } while (0)
-
-/*
- * x_c = (x_c * a_c) / 255
- */
-#define UNcx4_MUL_UNcx4(x, a) \
- do \
- { \
- comp4_t r1__, r2__, r3__, t__; \
- \
- r1__ = (x); \
- r2__ = (a); \
- UNc_rb_MUL_UNc_rb (r1__, r2__, t__); \
- \
- r2__ = (x) >> G_SHIFT; \
- r3__ = (a) >> G_SHIFT; \
- UNc_rb_MUL_UNc_rb (r2__, r3__, t__); \
- \
- (x) = r1__ | (r2__ << G_SHIFT); \
- } while (0)
-
-/*
- * x_c = (x_c * a_c) / 255 + y_c
- */
-#define UNcx4_MUL_UNcx4_ADD_UNcx4(x, a, y) \
- do \
- { \
- comp4_t r1__, r2__, r3__, t__; \
- \
- r1__ = (x); \
- r2__ = (a); \
- UNc_rb_MUL_UNc_rb (r1__, r2__, t__); \
- r2__ = (y) & RB_MASK; \
- UNc_rb_ADD_UNc_rb (r1__, r2__, t__); \
- \
- r2__ = ((x) >> G_SHIFT); \
- r3__ = ((a) >> G_SHIFT); \
- UNc_rb_MUL_UNc_rb (r2__, r3__, t__); \
- r3__ = ((y) >> G_SHIFT) & RB_MASK; \
- UNc_rb_ADD_UNc_rb (r2__, r3__, t__); \
- \
- (x) = r1__ | (r2__ << G_SHIFT); \
- } while (0)
-
-/*
- * x_c = (x_c * a_c + y_c * b) / 255
- */
-#define UNcx4_MUL_UNcx4_ADD_UNcx4_MUL_UNc(x, a, y, b) \
- do \
- { \
- comp4_t r1__, r2__, r3__, t__; \
- \
- r1__ = (x); \
- r2__ = (a); \
- UNc_rb_MUL_UNc_rb (r1__, r2__, t__); \
- r2__ = (y); \
- UNc_rb_MUL_UNc (r2__, (b), t__); \
- UNc_rb_ADD_UNc_rb (r1__, r2__, t__); \
- \
- r2__ = (x) >> G_SHIFT; \
- r3__ = (a) >> G_SHIFT; \
- UNc_rb_MUL_UNc_rb (r2__, r3__, t__); \
- r3__ = (y) >> G_SHIFT; \
- UNc_rb_MUL_UNc (r3__, (b), t__); \
- UNc_rb_ADD_UNc_rb (r2__, r3__, t__); \
- \
- x = r1__ | (r2__ << G_SHIFT); \
- } while (0)
-
-/*
- x_c = min(x_c + y_c, 255)
-*/
-#define UNcx4_ADD_UNcx4(x, y) \
- do \
- { \
- comp4_t r1__, r2__, r3__, t__; \
- \
- r1__ = (x) & RB_MASK; \
- r2__ = (y) & RB_MASK; \
- UNc_rb_ADD_UNc_rb (r1__, r2__, t__); \
- \
- r2__ = ((x) >> G_SHIFT) & RB_MASK; \
- r3__ = ((y) >> G_SHIFT) & RB_MASK; \
- UNc_rb_ADD_UNc_rb (r2__, r3__, t__); \
- \
- x = r1__ | (r2__ << G_SHIFT); \
- } while (0)
diff --git a/gfx/cairo/libpixman/src/pixman-combine16.c b/gfx/cairo/libpixman/src/pixman-combine16.c
deleted file mode 100644
index 9ba439fe5b..0000000000
--- a/gfx/cairo/libpixman/src/pixman-combine16.c
+++ /dev/null
@@ -1,114 +0,0 @@
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-#include <string.h>
-
-#include "pixman-private.h"
-
-#include "pixman-combine32.h"
-
-static force_inline uint32_t
-combine_mask (const uint32_t src, const uint32_t mask)
-{
- uint32_t s, m;
-
- m = mask >> A_SHIFT;
-
- if (!m)
- return 0;
- s = src;
-
- UN8x4_MUL_UN8 (s, m);
-
- return s;
-}
-
-static void
-combine_src_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dest,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- int i;
-
- if (!mask)
- memcpy (dest, src, width * sizeof (uint16_t));
- else
- {
- uint16_t *d = (uint16_t*)dest;
- uint16_t *src16 = (uint16_t*)src;
- for (i = 0; i < width; ++i)
- {
- if ((*mask & 0xff000000) == 0xff000000) {
- // it's likely worth special casing
- // fully opaque because it avoids
- // the cost of conversion as well the multiplication
- *(d + i) = *src16;
- } else {
- // the mask is still 32bits
- uint32_t s = combine_mask (convert_0565_to_8888(*src16), *mask);
- *(d + i) = convert_8888_to_0565(s);
- }
- mask++;
- src16++;
- }
- }
-
-}
-
-static void
-combine_over_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dest,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- int i;
-
- if (!mask)
- memcpy (dest, src, width * sizeof (uint16_t));
- else
- {
- uint16_t *d = (uint16_t*)dest;
- uint16_t *src16 = (uint16_t*)src;
- for (i = 0; i < width; ++i)
- {
- if ((*mask & 0xff000000) == 0xff000000) {
- // it's likely worth special casing
- // fully opaque because it avoids
- // the cost of conversion as well the multiplication
- *(d + i) = *src16;
- } else if ((*mask & 0xff000000) == 0x00000000) {
- // keep the dest the same
- } else {
- // the mask is still 32bits
- uint32_t s = combine_mask (convert_0565_to_8888(*src16), *mask);
- uint32_t ia = ALPHA_8 (~s);
- uint32_t d32 = convert_0565_to_8888(*(d + i));
- UN8x4_MUL_UN8_ADD_UN8x4 (d32, ia, s);
- *(d + i) = convert_8888_to_0565(d32);
- }
- mask++;
- src16++;
- }
- }
-
-}
-
-
-void
-_pixman_setup_combiner_functions_16 (pixman_implementation_t *imp)
-{
- int i;
- for (i = 0; i < PIXMAN_N_OPERATORS; i++) {
- imp->combine_16[i] = NULL;
- }
- imp->combine_16[PIXMAN_OP_SRC] = combine_src_u;
- imp->combine_16[PIXMAN_OP_OVER] = combine_over_u;
-}
-
diff --git a/gfx/cairo/libpixman/src/pixman-combine32.c b/gfx/cairo/libpixman/src/pixman-combine32.c
index 3ac7576bdc..4a89384d9c 100644
--- a/gfx/cairo/libpixman/src/pixman-combine32.c
+++ b/gfx/cairo/libpixman/src/pixman-combine32.c
@@ -142,12 +142,12 @@ combine_mask (const uint32_t *src, const uint32_t *mask, int i)
static void
combine_clear (pixman_implementation_t *imp,
pixman_op_t op,
- uint32_t * dest,
- const uint32_t * src,
- const uint32_t * mask,
+ uint32_t * dest,
+ const uint32_t * src,
+ const uint32_t * mask,
int width)
{
- memset (dest, 0, width * sizeof(uint32_t));
+ memset (dest, 0, width * sizeof (uint32_t));
}
static void
@@ -155,7 +155,7 @@ combine_dst (pixman_implementation_t *imp,
pixman_op_t op,
uint32_t * dest,
const uint32_t * src,
- const uint32_t * mask,
+ const uint32_t * mask,
int width)
{
return;
@@ -164,9 +164,9 @@ combine_dst (pixman_implementation_t *imp,
static void
combine_src_u (pixman_implementation_t *imp,
pixman_op_t op,
- uint32_t * dest,
- const uint32_t * src,
- const uint32_t * mask,
+ uint32_t * dest,
+ const uint32_t * src,
+ const uint32_t * mask,
int width)
{
int i;
@@ -189,9 +189,9 @@ combine_src_u (pixman_implementation_t *imp,
static void
combine_over_u (pixman_implementation_t *imp,
pixman_op_t op,
- uint32_t * dest,
- const uint32_t * src,
- const uint32_t * mask,
+ uint32_t * dest,
+ const uint32_t * src,
+ const uint32_t * mask,
int width)
{
int i;
@@ -254,9 +254,9 @@ combine_over_u (pixman_implementation_t *imp,
static void
combine_over_reverse_u (pixman_implementation_t *imp,
pixman_op_t op,
- uint32_t * dest,
- const uint32_t * src,
- const uint32_t * mask,
+ uint32_t * dest,
+ const uint32_t * src,
+ const uint32_t * mask,
int width)
{
int i;
@@ -274,9 +274,9 @@ combine_over_reverse_u (pixman_implementation_t *imp,
static void
combine_in_u (pixman_implementation_t *imp,
pixman_op_t op,
- uint32_t * dest,
- const uint32_t * src,
- const uint32_t * mask,
+ uint32_t * dest,
+ const uint32_t * src,
+ const uint32_t * mask,
int width)
{
int i;
@@ -293,9 +293,9 @@ combine_in_u (pixman_implementation_t *imp,
static void
combine_in_reverse_u (pixman_implementation_t *imp,
pixman_op_t op,
- uint32_t * dest,
- const uint32_t * src,
- const uint32_t * mask,
+ uint32_t * dest,
+ const uint32_t * src,
+ const uint32_t * mask,
int width)
{
int i;
@@ -313,9 +313,9 @@ combine_in_reverse_u (pixman_implementation_t *imp,
static void
combine_out_u (pixman_implementation_t *imp,
pixman_op_t op,
- uint32_t * dest,
- const uint32_t * src,
- const uint32_t * mask,
+ uint32_t * dest,
+ const uint32_t * src,
+ const uint32_t * mask,
int width)
{
int i;
@@ -332,9 +332,9 @@ combine_out_u (pixman_implementation_t *imp,
static void
combine_out_reverse_u (pixman_implementation_t *imp,
pixman_op_t op,
- uint32_t * dest,
- const uint32_t * src,
- const uint32_t * mask,
+ uint32_t * dest,
+ const uint32_t * src,
+ const uint32_t * mask,
int width)
{
int i;
@@ -352,9 +352,9 @@ combine_out_reverse_u (pixman_implementation_t *imp,
static void
combine_atop_u (pixman_implementation_t *imp,
pixman_op_t op,
- uint32_t * dest,
- const uint32_t * src,
- const uint32_t * mask,
+ uint32_t * dest,
+ const uint32_t * src,
+ const uint32_t * mask,
int width)
{
int i;
@@ -374,9 +374,9 @@ combine_atop_u (pixman_implementation_t *imp,
static void
combine_atop_reverse_u (pixman_implementation_t *imp,
pixman_op_t op,
- uint32_t * dest,
- const uint32_t * src,
- const uint32_t * mask,
+ uint32_t * dest,
+ const uint32_t * src,
+ const uint32_t * mask,
int width)
{
int i;
@@ -396,9 +396,9 @@ combine_atop_reverse_u (pixman_implementation_t *imp,
static void
combine_xor_u (pixman_implementation_t *imp,
pixman_op_t op,
- uint32_t * dest,
- const uint32_t * src,
- const uint32_t * mask,
+ uint32_t * dest,
+ const uint32_t * src,
+ const uint32_t * mask,
int width)
{
int i;
@@ -418,9 +418,9 @@ combine_xor_u (pixman_implementation_t *imp,
static void
combine_add_u (pixman_implementation_t *imp,
pixman_op_t op,
- uint32_t * dest,
- const uint32_t * src,
- const uint32_t * mask,
+ uint32_t * dest,
+ const uint32_t * src,
+ const uint32_t * mask,
int width)
{
int i;
@@ -434,68 +434,65 @@ combine_add_u (pixman_implementation_t *imp,
}
}
-static void
-combine_saturate_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dest,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- int i;
-
- for (i = 0; i < width; ++i)
- {
- uint32_t s = combine_mask (src, mask, i);
- uint32_t d = *(dest + i);
- uint16_t sa, da;
-
- sa = s >> A_SHIFT;
- da = ~d >> A_SHIFT;
- if (sa > da)
- {
- sa = DIV_UN8 (da, sa);
- UN8x4_MUL_UN8 (s, sa);
- }
- ;
- UN8x4_ADD_UN8x4 (d, s);
- *(dest + i) = d;
- }
-}
-
/*
* PDF blend modes:
+ *
* The following blend modes have been taken from the PDF ISO 32000
* specification, which at this point in time is available from
- * http://www.adobe.com/devnet/acrobat/pdfs/PDF32000_2008.pdf
- * The relevant chapters are 11.3.5 and 11.3.6.
+ *
+ * http://www.adobe.com/devnet/pdf/pdf_reference.html
+ *
+ * The specific documents of interest are the PDF spec itself:
+ *
+ * http://wwwimages.adobe.com/www.adobe.com/content/dam/Adobe/en/devnet/pdf/pdfs/PDF32000_2008.pdf
+ *
+ * chapters 11.3.5 and 11.3.6 and a later supplement for Adobe Acrobat
+ * 9.1 and Reader 9.1:
+ *
+ * http://wwwimages.adobe.com/www.adobe.com/content/dam/Adobe/en/devnet/pdf/pdfs/adobe_supplement_iso32000_1.pdf
+ *
+ * that clarifies the specifications for blend modes ColorDodge and
+ * ColorBurn.
+ *
* The formula for computing the final pixel color given in 11.3.6 is:
- * αr × Cr = (1 – αs) × αb × Cb + (1 – αb) × αs × Cs + αb × αs × B(Cb, Cs)
- * with B() being the blend function.
- * Note that OVER is a special case of this operation, using B(Cb, Cs) = Cs
*
- * These blend modes should match the SVG filter draft specification, as
- * it has been designed to mirror ISO 32000. Note that at the current point
- * no released draft exists that shows this, as the formulas have not been
- * updated yet after the release of ISO 32000.
+ * αr × Cr = (1 – αs) × αb × Cb + (1 – αb) × αs × Cs + αb × αs × B(Cb, Cs)
+ *
+ * with B() is the blend function. When B(Cb, Cs) = Cs, this formula
+ * reduces to the regular OVER operator.
+ *
+ * Cs and Cb are not premultiplied, so in our implementation we instead
+ * use:
+ *
+ * cr = (1 – αs) × cb + (1 – αb) × cs + αb × αs × B (cb/αb, cs/αs)
*
- * The default implementation here uses the PDF_SEPARABLE_BLEND_MODE and
- * PDF_NON_SEPARABLE_BLEND_MODE macros, which take the blend function as an
- * argument. Note that this implementation operates on premultiplied colors,
- * while the PDF specification does not. Therefore the code uses the formula
- * Cra = (1 – as) . Dca + (1 – ad) . Sca + B(Dca, ad, Sca, as)
+ * where cr, cs, and cb are premultiplied colors, and where the
+ *
+ * αb × αs × B(cb/αb, cs/αs)
+ *
+ * part is first arithmetically simplified under the assumption that αb
+ * and αs are not 0, and then updated to produce a meaningful result when
+ * they are.
+ *
+ * For all the blend mode operators, the alpha channel is given by
+ *
+ * αr = αs + αb + αb × αs
*/
/*
* Multiply
- * B(Dca, ad, Sca, as) = Dca.Sca
+ *
+ * ad * as * B(d / ad, s / as)
+ * = ad * as * d/ad * s/as
+ * = d * s
+ *
*/
static void
combine_multiply_u (pixman_implementation_t *imp,
pixman_op_t op,
- uint32_t * dest,
- const uint32_t * src,
- const uint32_t * mask,
+ uint32_t * dest,
+ const uint32_t * src,
+ const uint32_t * mask,
int width)
{
int i;
@@ -519,9 +516,9 @@ combine_multiply_u (pixman_implementation_t *imp,
static void
combine_multiply_ca (pixman_implementation_t *imp,
pixman_op_t op,
- uint32_t * dest,
- const uint32_t * src,
- const uint32_t * mask,
+ uint32_t * dest,
+ const uint32_t * src,
+ const uint32_t * mask,
int width)
{
int i;
@@ -544,1087 +541,271 @@ combine_multiply_ca (pixman_implementation_t *imp,
}
}
+#define CLAMP(v, low, high) \
+ do \
+ { \
+ if (v < (low)) \
+ v = (low); \
+ if (v > (high)) \
+ v = (high); \
+ } while (0)
+
#define PDF_SEPARABLE_BLEND_MODE(name) \
static void \
combine_ ## name ## _u (pixman_implementation_t *imp, \
pixman_op_t op, \
- uint32_t * dest, \
- const uint32_t * src, \
- const uint32_t * mask, \
+ uint32_t * dest, \
+ const uint32_t * src, \
+ const uint32_t * mask, \
int width) \
{ \
int i; \
- for (i = 0; i < width; ++i) { \
+ for (i = 0; i < width; ++i) \
+ { \
uint32_t s = combine_mask (src, mask, i); \
uint32_t d = *(dest + i); \
uint8_t sa = ALPHA_8 (s); \
uint8_t isa = ~sa; \
uint8_t da = ALPHA_8 (d); \
uint8_t ida = ~da; \
- uint32_t result; \
- \
- result = d; \
- UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (result, isa, s, ida); \
+ uint32_t ra, rr, rg, rb; \
\
- *(dest + i) = result + \
- (DIV_ONE_UN8 (sa * (uint32_t)da) << A_SHIFT) + \
- (blend_ ## name (RED_8 (d), da, RED_8 (s), sa) << R_SHIFT) + \
- (blend_ ## name (GREEN_8 (d), da, GREEN_8 (s), sa) << G_SHIFT) + \
- (blend_ ## name (BLUE_8 (d), da, BLUE_8 (s), sa)); \
+ ra = da * 0xff + sa * 0xff - sa * da; \
+ rr = isa * RED_8 (d) + ida * RED_8 (s); \
+ rg = isa * GREEN_8 (d) + ida * GREEN_8 (s); \
+ rb = isa * BLUE_8 (d) + ida * BLUE_8 (s); \
+ \
+ rr += blend_ ## name (RED_8 (d), da, RED_8 (s), sa); \
+ rg += blend_ ## name (GREEN_8 (d), da, GREEN_8 (s), sa); \
+ rb += blend_ ## name (BLUE_8 (d), da, BLUE_8 (s), sa); \
+ \
+ CLAMP (ra, 0, 255 * 255); \
+ CLAMP (rr, 0, 255 * 255); \
+ CLAMP (rg, 0, 255 * 255); \
+ CLAMP (rb, 0, 255 * 255); \
+ \
+ ra = DIV_ONE_UN8 (ra); \
+ rr = DIV_ONE_UN8 (rr); \
+ rg = DIV_ONE_UN8 (rg); \
+ rb = DIV_ONE_UN8 (rb); \
+ \
+ *(dest + i) = ra << 24 | rr << 16 | rg << 8 | rb; \
} \
} \
\
static void \
combine_ ## name ## _ca (pixman_implementation_t *imp, \
pixman_op_t op, \
- uint32_t * dest, \
- const uint32_t * src, \
- const uint32_t * mask, \
- int width) \
+ uint32_t * dest, \
+ const uint32_t * src, \
+ const uint32_t * mask, \
+ int width) \
{ \
int i; \
- for (i = 0; i < width; ++i) { \
+ for (i = 0; i < width; ++i) \
+ { \
uint32_t m = *(mask + i); \
uint32_t s = *(src + i); \
uint32_t d = *(dest + i); \
uint8_t da = ALPHA_8 (d); \
uint8_t ida = ~da; \
- uint32_t result; \
- \
+ uint32_t ra, rr, rg, rb; \
+ uint8_t ira, iga, iba; \
+ \
combine_mask_ca (&s, &m); \
- \
- result = d; \
- UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (result, ~m, s, ida); \
- \
- result += \
- (DIV_ONE_UN8 (ALPHA_8 (m) * (uint32_t)da) << A_SHIFT) + \
- (blend_ ## name (RED_8 (d), da, RED_8 (s), RED_8 (m)) << R_SHIFT) + \
- (blend_ ## name (GREEN_8 (d), da, GREEN_8 (s), GREEN_8 (m)) << G_SHIFT) + \
- (blend_ ## name (BLUE_8 (d), da, BLUE_8 (s), BLUE_8 (m))); \
\
- *(dest + i) = result; \
+ ira = ~RED_8 (m); \
+ iga = ~GREEN_8 (m); \
+ iba = ~BLUE_8 (m); \
+ \
+ ra = da * 0xff + ALPHA_8 (s) * 0xff - ALPHA_8 (s) * da; \
+ rr = ira * RED_8 (d) + ida * RED_8 (s); \
+ rg = iga * GREEN_8 (d) + ida * GREEN_8 (s); \
+ rb = iba * BLUE_8 (d) + ida * BLUE_8 (s); \
+ \
+ rr += blend_ ## name (RED_8 (d), da, RED_8 (s), RED_8 (m)); \
+ rg += blend_ ## name (GREEN_8 (d), da, GREEN_8 (s), GREEN_8 (m)); \
+ rb += blend_ ## name (BLUE_8 (d), da, BLUE_8 (s), BLUE_8 (m)); \
+ \
+ CLAMP (ra, 0, 255 * 255); \
+ CLAMP (rr, 0, 255 * 255); \
+ CLAMP (rg, 0, 255 * 255); \
+ CLAMP (rb, 0, 255 * 255); \
+ \
+ ra = DIV_ONE_UN8 (ra); \
+ rr = DIV_ONE_UN8 (rr); \
+ rg = DIV_ONE_UN8 (rg); \
+ rb = DIV_ONE_UN8 (rb); \
+ \
+ *(dest + i) = ra << 24 | rr << 16 | rg << 8 | rb; \
} \
}
/*
* Screen
- * B(Dca, ad, Sca, as) = Dca.sa + Sca.da - Dca.Sca
+ *
+ * ad * as * B(d/ad, s/as)
+ * = ad * as * (d/ad + s/as - s/as * d/ad)
+ * = ad * s + as * d - s * d
*/
-static inline uint32_t
-blend_screen (uint32_t dca, uint32_t da, uint32_t sca, uint32_t sa)
+static inline int32_t
+blend_screen (int32_t d, int32_t ad, int32_t s, int32_t as)
{
- return DIV_ONE_UN8 (sca * da + dca * sa - sca * dca);
+ return s * ad + d * as - s * d;
}
PDF_SEPARABLE_BLEND_MODE (screen)
/*
* Overlay
- * B(Dca, Da, Sca, Sa) =
- * if 2.Dca < Da
- * 2.Sca.Dca
- * otherwise
- * Sa.Da - 2.(Da - Dca).(Sa - Sca)
+ *
+ * ad * as * B(d/ad, s/as)
+ * = ad * as * Hardlight (s, d)
+ * = if (d / ad < 0.5)
+ * as * ad * Multiply (s/as, 2 * d/ad)
+ * else
+ * as * ad * Screen (s/as, 2 * d / ad - 1)
+ * = if (d < 0.5 * ad)
+ * as * ad * s/as * 2 * d /ad
+ * else
+ * as * ad * (s/as + 2 * d / ad - 1 - s / as * (2 * d / ad - 1))
+ * = if (2 * d < ad)
+ * 2 * s * d
+ * else
+ * ad * s + 2 * as * d - as * ad - ad * s * (2 * d / ad - 1)
+ * = if (2 * d < ad)
+ * 2 * s * d
+ * else
+ * as * ad - 2 * (ad - d) * (as - s)
*/
-static inline uint32_t
-blend_overlay (uint32_t dca, uint32_t da, uint32_t sca, uint32_t sa)
+static inline int32_t
+blend_overlay (int32_t d, int32_t ad, int32_t s, int32_t as)
{
- uint32_t rca;
+ uint32_t r;
- if (2 * dca < da)
- rca = 2 * sca * dca;
+ if (2 * d < ad)
+ r = 2 * s * d;
else
- rca = sa * da - 2 * (da - dca) * (sa - sca);
- return DIV_ONE_UN8 (rca);
+ r = as * ad - 2 * (ad - d) * (as - s);
+
+ return r;
}
PDF_SEPARABLE_BLEND_MODE (overlay)
/*
* Darken
- * B(Dca, Da, Sca, Sa) = min (Sca.Da, Dca.Sa)
+ *
+ * ad * as * B(d/ad, s/as)
+ * = ad * as * MIN(d/ad, s/as)
+ * = MIN (as * d, ad * s)
*/
-static inline uint32_t
-blend_darken (uint32_t dca, uint32_t da, uint32_t sca, uint32_t sa)
+static inline int32_t
+blend_darken (int32_t d, int32_t ad, int32_t s, int32_t as)
{
- uint32_t s, d;
+ s = ad * s;
+ d = as * d;
- s = sca * da;
- d = dca * sa;
- return DIV_ONE_UN8 (s > d ? d : s);
+ return s > d ? d : s;
}
PDF_SEPARABLE_BLEND_MODE (darken)
/*
* Lighten
- * B(Dca, Da, Sca, Sa) = max (Sca.Da, Dca.Sa)
+ *
+ * ad * as * B(d/ad, s/as)
+ * = ad * as * MAX(d/ad, s/as)
+ * = MAX (as * d, ad * s)
*/
-static inline uint32_t
-blend_lighten (uint32_t dca, uint32_t da, uint32_t sca, uint32_t sa)
+static inline int32_t
+blend_lighten (int32_t d, int32_t ad, int32_t s, int32_t as)
{
- uint32_t s, d;
-
- s = sca * da;
- d = dca * sa;
- return DIV_ONE_UN8 (s > d ? s : d);
+ s = ad * s;
+ d = as * d;
+
+ return s > d ? s : d;
}
PDF_SEPARABLE_BLEND_MODE (lighten)
/*
- * Color dodge
- * B(Dca, Da, Sca, Sa) =
- * if Dca == 0
- * 0
- * if Sca == Sa
- * Sa.Da
- * otherwise
- * Sa.Da. min (1, Dca / Da / (1 - Sca/Sa))
- */
-static inline uint32_t
-blend_color_dodge (uint32_t dca, uint32_t da, uint32_t sca, uint32_t sa)
-{
- if (sca >= sa)
- {
- return dca == 0 ? 0 : DIV_ONE_UN8 (sa * da);
- }
- else
- {
- uint32_t rca = dca * sa / (sa - sca);
- return DIV_ONE_UN8 (sa * MIN (rca, da));
- }
-}
-
-PDF_SEPARABLE_BLEND_MODE (color_dodge)
-
-/*
- * Color burn
- * B(Dca, Da, Sca, Sa) =
- * if Dca == Da
- * Sa.Da
- * if Sca == 0
- * 0
- * otherwise
- * Sa.Da.(1 - min (1, (1 - Dca/Da).Sa / Sca))
- */
-static inline uint32_t
-blend_color_burn (uint32_t dca, uint32_t da, uint32_t sca, uint32_t sa)
-{
- if (sca == 0)
- {
- return dca < da ? 0 : DIV_ONE_UN8 (sa * da);
- }
- else
- {
- uint32_t rca = (da - dca) * sa / sca;
- return DIV_ONE_UN8 (sa * (MAX (rca, da) - rca));
- }
-}
-
-PDF_SEPARABLE_BLEND_MODE (color_burn)
-
-/*
* Hard light
- * B(Dca, Da, Sca, Sa) =
- * if 2.Sca < Sa
- * 2.Sca.Dca
- * otherwise
- * Sa.Da - 2.(Da - Dca).(Sa - Sca)
+ *
+ * ad * as * B(d/ad, s/as)
+ * = if (s/as <= 0.5)
+ * ad * as * Multiply (d/ad, 2 * s/as)
+ * else
+ * ad * as * Screen (d/ad, 2 * s/as - 1)
+ * = if 2 * s <= as
+ * ad * as * d/ad * 2 * s / as
+ * else
+ * ad * as * (d/ad + (2 * s/as - 1) + d/ad * (2 * s/as - 1))
+ * = if 2 * s <= as
+ * 2 * s * d
+ * else
+ * as * ad - 2 * (ad - d) * (as - s)
*/
-static inline uint32_t
-blend_hard_light (uint32_t dca, uint32_t da, uint32_t sca, uint32_t sa)
+static inline int32_t
+blend_hard_light (int32_t d, int32_t ad, int32_t s, int32_t as)
{
- if (2 * sca < sa)
- return DIV_ONE_UN8 (2 * sca * dca);
+ if (2 * s < as)
+ return 2 * s * d;
else
- return DIV_ONE_UN8 (sa * da - 2 * (da - dca) * (sa - sca));
+ return as * ad - 2 * (ad - d) * (as - s);
}
PDF_SEPARABLE_BLEND_MODE (hard_light)
/*
- * Soft light
- * B(Dca, Da, Sca, Sa) =
- * if (2.Sca <= Sa)
- * Dca.(Sa - (1 - Dca/Da).(2.Sca - Sa))
- * otherwise if Dca.4 <= Da
- * Dca.(Sa + (2.Sca - Sa).((16.Dca/Da - 12).Dca/Da + 3)
- * otherwise
- * (Dca.Sa + (SQRT (Dca/Da).Da - Dca).(2.Sca - Sa))
- */
-static inline uint32_t
-blend_soft_light (uint32_t dca_org,
- uint32_t da_org,
- uint32_t sca_org,
- uint32_t sa_org)
-{
- double dca = dca_org * (1.0 / MASK);
- double da = da_org * (1.0 / MASK);
- double sca = sca_org * (1.0 / MASK);
- double sa = sa_org * (1.0 / MASK);
- double rca;
-
- if (2 * sca < sa)
- {
- if (da == 0)
- rca = dca * sa;
- else
- rca = dca * sa - dca * (da - dca) * (sa - 2 * sca) / da;
- }
- else if (da == 0)
- {
- rca = 0;
- }
- else if (4 * dca <= da)
- {
- rca = dca * sa +
- (2 * sca - sa) * dca * ((16 * dca / da - 12) * dca / da + 3);
- }
- else
- {
- rca = dca * sa + (sqrt (dca * da) - dca) * (2 * sca - sa);
- }
- return rca * MASK + 0.5;
-}
-
-PDF_SEPARABLE_BLEND_MODE (soft_light)
-
-/*
* Difference
- * B(Dca, Da, Sca, Sa) = abs (Dca.Sa - Sca.Da)
+ *
+ * ad * as * B(s/as, d/ad)
+ * = ad * as * abs (s/as - d/ad)
+ * = if (s/as <= d/ad)
+ * ad * as * (d/ad - s/as)
+ * else
+ * ad * as * (s/as - d/ad)
+ * = if (ad * s <= as * d)
+ * as * d - ad * s
+ * else
+ * ad * s - as * d
*/
-static inline uint32_t
-blend_difference (uint32_t dca, uint32_t da, uint32_t sca, uint32_t sa)
+static inline int32_t
+blend_difference (int32_t d, int32_t ad, int32_t s, int32_t as)
{
- uint32_t dcasa = dca * sa;
- uint32_t scada = sca * da;
+ int32_t das = d * as;
+ int32_t sad = s * ad;
- if (scada < dcasa)
- return DIV_ONE_UN8 (dcasa - scada);
+ if (sad < das)
+ return das - sad;
else
- return DIV_ONE_UN8 (scada - dcasa);
+ return sad - das;
}
PDF_SEPARABLE_BLEND_MODE (difference)
/*
* Exclusion
- * B(Dca, Da, Sca, Sa) = (Sca.Da + Dca.Sa - 2.Sca.Dca)
+ *
+ * ad * as * B(s/as, d/ad)
+ * = ad * as * (d/ad + s/as - 2 * d/ad * s/as)
+ * = as * d + ad * s - 2 * s * d
*/
/* This can be made faster by writing it directly and not using
* PDF_SEPARABLE_BLEND_MODE, but that's a performance optimization */
-static inline uint32_t
-blend_exclusion (uint32_t dca, uint32_t da, uint32_t sca, uint32_t sa)
+static inline int32_t
+blend_exclusion (int32_t d, int32_t ad, int32_t s, int32_t as)
{
- return DIV_ONE_UN8 (sca * da + dca * sa - 2 * dca * sca);
+ return s * ad + d * as - 2 * d * s;
}
PDF_SEPARABLE_BLEND_MODE (exclusion)
#undef PDF_SEPARABLE_BLEND_MODE
-/*
- * PDF nonseperable blend modes are implemented using the following functions
- * to operate in Hsl space, with Cmax, Cmid, Cmin referring to the max, mid
- * and min value of the red, green and blue components.
- *
- * LUM (C) = 0.3 × Cred + 0.59 × Cgreen + 0.11 × Cblue
- *
- * clip_color (C):
- * l = LUM (C)
- * min = Cmin
- * max = Cmax
- * if n < 0.0
- * C = l + ( ( ( C – l ) × l ) ⁄ ( l – min ) )
- * if x > 1.0
- * C = l + ( ( ( C – l ) × ( 1 – l ) ) ⁄ ( max – l ) )
- * return C
- *
- * set_lum (C, l):
- * d = l – LUM (C)
- * C += d
- * return clip_color (C)
- *
- * SAT (C) = CH_MAX (C) - CH_MIN (C)
- *
- * set_sat (C, s):
- * if Cmax > Cmin
- * Cmid = ( ( ( Cmid – Cmin ) × s ) ⁄ ( Cmax – Cmin ) )
- * Cmax = s
- * else
- * Cmid = Cmax = 0.0
- * Cmin = 0.0
- * return C
- */
-
-/* For premultiplied colors, we need to know what happens when C is
- * multiplied by a real number. LUM and SAT are linear:
- *
- * LUM (r × C) = r × LUM (C) SAT (r * C) = r * SAT (C)
- *
- * If we extend clip_color with an extra argument a and change
- *
- * if x >= 1.0
- *
- * into
- *
- * if x >= a
- *
- * then clip_color is also linear:
- *
- * r * clip_color (C, a) = clip_color (r_c, ra);
- *
- * for positive r.
- *
- * Similarly, we can extend set_lum with an extra argument that is just passed
- * on to clip_color:
- *
- * r * set_lum ( C, l, a)
- *
- * = r × clip_color ( C + l - LUM (C), a)
- *
- * = clip_color ( r * C + r × l - r * LUM (C), r * a)
- *
- * = set_lum ( r * C, r * l, r * a)
- *
- * Finally, set_sat:
- *
- * r * set_sat (C, s) = set_sat (x * C, r * s)
- *
- * The above holds for all non-zero x, because the x'es in the fraction for
- * C_mid cancel out. Specifically, it holds for x = r:
- *
- * r * set_sat (C, s) = set_sat (r_c, rs)
- *
- */
-
-/* So, for the non-separable PDF blend modes, we have (using s, d for
- * non-premultiplied colors, and S, D for premultiplied:
- *
- * Color:
- *
- * a_s * a_d * B(s, d)
- * = a_s * a_d * set_lum (S/a_s, LUM (D/a_d), 1)
- * = set_lum (S * a_d, a_s * LUM (D), a_s * a_d)
- *
- *
- * Luminosity:
- *
- * a_s * a_d * B(s, d)
- * = a_s * a_d * set_lum (D/a_d, LUM(S/a_s), 1)
- * = set_lum (a_s * D, a_d * LUM(S), a_s * a_d)
- *
- *
- * Saturation:
- *
- * a_s * a_d * B(s, d)
- * = a_s * a_d * set_lum (set_sat (D/a_d, SAT (S/a_s)), LUM (D/a_d), 1)
- * = set_lum (a_s * a_d * set_sat (D/a_d, SAT (S/a_s)),
- * a_s * LUM (D), a_s * a_d)
- * = set_lum (set_sat (a_s * D, a_d * SAT (S), a_s * LUM (D), a_s * a_d))
- *
- * Hue:
- *
- * a_s * a_d * B(s, d)
- * = a_s * a_d * set_lum (set_sat (S/a_s, SAT (D/a_d)), LUM (D/a_d), 1)
- * = set_lum (set_sat (a_d * S, a_s * SAT (D)), a_s * LUM (D), a_s * a_d)
- *
- */
-
-#define CH_MIN(c) (c[0] < c[1] ? (c[0] < c[2] ? c[0] : c[2]) : (c[1] < c[2] ? c[1] : c[2]))
-#define CH_MAX(c) (c[0] > c[1] ? (c[0] > c[2] ? c[0] : c[2]) : (c[1] > c[2] ? c[1] : c[2]))
-#define LUM(c) ((c[0] * 30 + c[1] * 59 + c[2] * 11) / 100)
-#define SAT(c) (CH_MAX (c) - CH_MIN (c))
-
-#define PDF_NON_SEPARABLE_BLEND_MODE(name) \
- static void \
- combine_ ## name ## _u (pixman_implementation_t *imp, \
- pixman_op_t op, \
- uint32_t *dest, \
- const uint32_t *src, \
- const uint32_t *mask, \
- int width) \
- { \
- int i; \
- for (i = 0; i < width; ++i) \
- { \
- uint32_t s = combine_mask (src, mask, i); \
- uint32_t d = *(dest + i); \
- uint8_t sa = ALPHA_8 (s); \
- uint8_t isa = ~sa; \
- uint8_t da = ALPHA_8 (d); \
- uint8_t ida = ~da; \
- uint32_t result; \
- uint32_t sc[3], dc[3], c[3]; \
- \
- result = d; \
- UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (result, isa, s, ida); \
- dc[0] = RED_8 (d); \
- sc[0] = RED_8 (s); \
- dc[1] = GREEN_8 (d); \
- sc[1] = GREEN_8 (s); \
- dc[2] = BLUE_8 (d); \
- sc[2] = BLUE_8 (s); \
- blend_ ## name (c, dc, da, sc, sa); \
- \
- *(dest + i) = result + \
- (DIV_ONE_UN8 (sa * (uint32_t)da) << A_SHIFT) + \
- (DIV_ONE_UN8 (c[0]) << R_SHIFT) + \
- (DIV_ONE_UN8 (c[1]) << G_SHIFT) + \
- (DIV_ONE_UN8 (c[2])); \
- } \
- }
-
-static void
-set_lum (uint32_t dest[3], uint32_t src[3], uint32_t sa, uint32_t lum)
-{
- double a, l, min, max;
- double tmp[3];
-
- a = sa * (1.0 / MASK);
-
- l = lum * (1.0 / MASK);
- tmp[0] = src[0] * (1.0 / MASK);
- tmp[1] = src[1] * (1.0 / MASK);
- tmp[2] = src[2] * (1.0 / MASK);
-
- l = l - LUM (tmp);
- tmp[0] += l;
- tmp[1] += l;
- tmp[2] += l;
-
- /* clip_color */
- l = LUM (tmp);
- min = CH_MIN (tmp);
- max = CH_MAX (tmp);
-
- if (min < 0)
- {
- if (l - min == 0.0)
- {
- tmp[0] = 0;
- tmp[1] = 0;
- tmp[2] = 0;
- }
- else
- {
- tmp[0] = l + (tmp[0] - l) * l / (l - min);
- tmp[1] = l + (tmp[1] - l) * l / (l - min);
- tmp[2] = l + (tmp[2] - l) * l / (l - min);
- }
- }
- if (max > a)
- {
- if (max - l == 0.0)
- {
- tmp[0] = a;
- tmp[1] = a;
- tmp[2] = a;
- }
- else
- {
- tmp[0] = l + (tmp[0] - l) * (a - l) / (max - l);
- tmp[1] = l + (tmp[1] - l) * (a - l) / (max - l);
- tmp[2] = l + (tmp[2] - l) * (a - l) / (max - l);
- }
- }
-
- dest[0] = tmp[0] * MASK + 0.5;
- dest[1] = tmp[1] * MASK + 0.5;
- dest[2] = tmp[2] * MASK + 0.5;
-}
-
-static void
-set_sat (uint32_t dest[3], uint32_t src[3], uint32_t sat)
-{
- int id[3];
- uint32_t min, max;
-
- if (src[0] > src[1])
- {
- if (src[0] > src[2])
- {
- id[0] = 0;
- if (src[1] > src[2])
- {
- id[1] = 1;
- id[2] = 2;
- }
- else
- {
- id[1] = 2;
- id[2] = 1;
- }
- }
- else
- {
- id[0] = 2;
- id[1] = 0;
- id[2] = 1;
- }
- }
- else
- {
- if (src[0] > src[2])
- {
- id[0] = 1;
- id[1] = 0;
- id[2] = 2;
- }
- else
- {
- id[2] = 0;
- if (src[1] > src[2])
- {
- id[0] = 1;
- id[1] = 2;
- }
- else
- {
- id[0] = 2;
- id[1] = 1;
- }
- }
- }
-
- max = dest[id[0]];
- min = dest[id[2]];
- if (max > min)
- {
- dest[id[1]] = (dest[id[1]] - min) * sat / (max - min);
- dest[id[0]] = sat;
- dest[id[2]] = 0;
- }
- else
- {
- dest[0] = dest[1] = dest[2] = 0;
- }
-}
-
-/*
- * Hue:
- * B(Cb, Cs) = set_lum (set_sat (Cs, SAT (Cb)), LUM (Cb))
- */
-static inline void
-blend_hsl_hue (uint32_t c[3],
- uint32_t dc[3],
- uint32_t da,
- uint32_t sc[3],
- uint32_t sa)
-{
- c[0] = sc[0] * da;
- c[1] = sc[1] * da;
- c[2] = sc[2] * da;
- set_sat (c, c, SAT (dc) * sa);
- set_lum (c, c, sa * da, LUM (dc) * sa);
-}
-
-PDF_NON_SEPARABLE_BLEND_MODE (hsl_hue)
-
-/*
- * Saturation:
- * B(Cb, Cs) = set_lum (set_sat (Cb, SAT (Cs)), LUM (Cb))
- */
-static inline void
-blend_hsl_saturation (uint32_t c[3],
- uint32_t dc[3],
- uint32_t da,
- uint32_t sc[3],
- uint32_t sa)
-{
- c[0] = dc[0] * sa;
- c[1] = dc[1] * sa;
- c[2] = dc[2] * sa;
- set_sat (c, c, SAT (sc) * da);
- set_lum (c, c, sa * da, LUM (dc) * sa);
-}
-
-PDF_NON_SEPARABLE_BLEND_MODE (hsl_saturation)
-
-/*
- * Color:
- * B(Cb, Cs) = set_lum (Cs, LUM (Cb))
- */
-static inline void
-blend_hsl_color (uint32_t c[3],
- uint32_t dc[3],
- uint32_t da,
- uint32_t sc[3],
- uint32_t sa)
-{
- c[0] = sc[0] * da;
- c[1] = sc[1] * da;
- c[2] = sc[2] * da;
- set_lum (c, c, sa * da, LUM (dc) * sa);
-}
-
-PDF_NON_SEPARABLE_BLEND_MODE (hsl_color)
-
-/*
- * Luminosity:
- * B(Cb, Cs) = set_lum (Cb, LUM (Cs))
- */
-static inline void
-blend_hsl_luminosity (uint32_t c[3],
- uint32_t dc[3],
- uint32_t da,
- uint32_t sc[3],
- uint32_t sa)
-{
- c[0] = dc[0] * sa;
- c[1] = dc[1] * sa;
- c[2] = dc[2] * sa;
- set_lum (c, c, sa * da, LUM (sc) * da);
-}
-
-PDF_NON_SEPARABLE_BLEND_MODE (hsl_luminosity)
-
-#undef SAT
-#undef LUM
-#undef CH_MAX
-#undef CH_MIN
-#undef PDF_NON_SEPARABLE_BLEND_MODE
-
-/* All of the disjoint/conjoint composing functions
- *
- * The four entries in the first column indicate what source contributions
- * come from each of the four areas of the picture -- areas covered by neither
- * A nor B, areas covered only by A, areas covered only by B and finally
- * areas covered by both A and B.
- *
- * Disjoint Conjoint
- * Fa Fb Fa Fb
- * (0,0,0,0) 0 0 0 0
- * (0,A,0,A) 1 0 1 0
- * (0,0,B,B) 0 1 0 1
- * (0,A,B,A) 1 min((1-a)/b,1) 1 max(1-a/b,0)
- * (0,A,B,B) min((1-b)/a,1) 1 max(1-b/a,0) 1
- * (0,0,0,A) max(1-(1-b)/a,0) 0 min(1,b/a) 0
- * (0,0,0,B) 0 max(1-(1-a)/b,0) 0 min(a/b,1)
- * (0,A,0,0) min(1,(1-b)/a) 0 max(1-b/a,0) 0
- * (0,0,B,0) 0 min(1,(1-a)/b) 0 max(1-a/b,0)
- * (0,0,B,A) max(1-(1-b)/a,0) min(1,(1-a)/b) min(1,b/a) max(1-a/b,0)
- * (0,A,0,B) min(1,(1-b)/a) max(1-(1-a)/b,0) max(1-b/a,0) min(1,a/b)
- * (0,A,B,0) min(1,(1-b)/a) min(1,(1-a)/b) max(1-b/a,0) max(1-a/b,0)
- *
- * See http://marc.info/?l=xfree-render&m=99792000027857&w=2 for more
- * information about these operators.
- */
-
-#define COMBINE_A_OUT 1
-#define COMBINE_A_IN 2
-#define COMBINE_B_OUT 4
-#define COMBINE_B_IN 8
-
-#define COMBINE_CLEAR 0
-#define COMBINE_A (COMBINE_A_OUT | COMBINE_A_IN)
-#define COMBINE_B (COMBINE_B_OUT | COMBINE_B_IN)
-#define COMBINE_A_OVER (COMBINE_A_OUT | COMBINE_B_OUT | COMBINE_A_IN)
-#define COMBINE_B_OVER (COMBINE_A_OUT | COMBINE_B_OUT | COMBINE_B_IN)
-#define COMBINE_A_ATOP (COMBINE_B_OUT | COMBINE_A_IN)
-#define COMBINE_B_ATOP (COMBINE_A_OUT | COMBINE_B_IN)
-#define COMBINE_XOR (COMBINE_A_OUT | COMBINE_B_OUT)
-
-/* portion covered by a but not b */
-static uint8_t
-combine_disjoint_out_part (uint8_t a, uint8_t b)
-{
- /* min (1, (1-b) / a) */
-
- b = ~b; /* 1 - b */
- if (b >= a) /* 1 - b >= a -> (1-b)/a >= 1 */
- return MASK; /* 1 */
- return DIV_UN8 (b, a); /* (1-b) / a */
-}
-
-/* portion covered by both a and b */
-static uint8_t
-combine_disjoint_in_part (uint8_t a, uint8_t b)
-{
- /* max (1-(1-b)/a,0) */
- /* = - min ((1-b)/a - 1, 0) */
- /* = 1 - min (1, (1-b)/a) */
-
- b = ~b; /* 1 - b */
- if (b >= a) /* 1 - b >= a -> (1-b)/a >= 1 */
- return 0; /* 1 - 1 */
- return ~DIV_UN8(b, a); /* 1 - (1-b) / a */
-}
-
-/* portion covered by a but not b */
-static uint8_t
-combine_conjoint_out_part (uint8_t a, uint8_t b)
-{
- /* max (1-b/a,0) */
- /* = 1-min(b/a,1) */
-
- /* min (1, (1-b) / a) */
-
- if (b >= a) /* b >= a -> b/a >= 1 */
- return 0x00; /* 0 */
- return ~DIV_UN8(b, a); /* 1 - b/a */
-}
-
-/* portion covered by both a and b */
-static uint8_t
-combine_conjoint_in_part (uint8_t a, uint8_t b)
-{
- /* min (1,b/a) */
-
- if (b >= a) /* b >= a -> b/a >= 1 */
- return MASK; /* 1 */
- return DIV_UN8 (b, a); /* b/a */
-}
-
-#define GET_COMP(v, i) ((uint16_t) (uint8_t) ((v) >> i))
-
-#define ADD(x, y, i, t) \
- ((t) = GET_COMP (x, i) + GET_COMP (y, i), \
- (uint32_t) ((uint8_t) ((t) | (0 - ((t) >> G_SHIFT)))) << (i))
-
-#define GENERIC(x, y, i, ax, ay, t, u, v) \
- ((t) = (MUL_UN8 (GET_COMP (y, i), ay, (u)) + \
- MUL_UN8 (GET_COMP (x, i), ax, (v))), \
- (uint32_t) ((uint8_t) ((t) | \
- (0 - ((t) >> G_SHIFT)))) << (i))
-
-static void
-combine_disjoint_general_u (uint32_t * dest,
- const uint32_t *src,
- const uint32_t *mask,
- int width,
- uint8_t combine)
-{
- int i;
-
- for (i = 0; i < width; ++i)
- {
- uint32_t s = combine_mask (src, mask, i);
- uint32_t d = *(dest + i);
- uint32_t m, n, o, p;
- uint16_t Fa, Fb, t, u, v;
- uint8_t sa = s >> A_SHIFT;
- uint8_t da = d >> A_SHIFT;
-
- switch (combine & COMBINE_A)
- {
- default:
- Fa = 0;
- break;
-
- case COMBINE_A_OUT:
- Fa = combine_disjoint_out_part (sa, da);
- break;
-
- case COMBINE_A_IN:
- Fa = combine_disjoint_in_part (sa, da);
- break;
-
- case COMBINE_A:
- Fa = MASK;
- break;
- }
-
- switch (combine & COMBINE_B)
- {
- default:
- Fb = 0;
- break;
-
- case COMBINE_B_OUT:
- Fb = combine_disjoint_out_part (da, sa);
- break;
-
- case COMBINE_B_IN:
- Fb = combine_disjoint_in_part (da, sa);
- break;
-
- case COMBINE_B:
- Fb = MASK;
- break;
- }
- m = GENERIC (s, d, 0, Fa, Fb, t, u, v);
- n = GENERIC (s, d, G_SHIFT, Fa, Fb, t, u, v);
- o = GENERIC (s, d, R_SHIFT, Fa, Fb, t, u, v);
- p = GENERIC (s, d, A_SHIFT, Fa, Fb, t, u, v);
- s = m | n | o | p;
- *(dest + i) = s;
- }
-}
-
-static void
-combine_disjoint_over_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dest,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- int i;
-
- for (i = 0; i < width; ++i)
- {
- uint32_t s = combine_mask (src, mask, i);
- uint16_t a = s >> A_SHIFT;
-
- if (s != 0x00)
- {
- uint32_t d = *(dest + i);
- a = combine_disjoint_out_part (d >> A_SHIFT, a);
- UN8x4_MUL_UN8_ADD_UN8x4 (d, a, s);
-
- *(dest + i) = d;
- }
- }
-}
-
-static void
-combine_disjoint_in_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dest,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- combine_disjoint_general_u (dest, src, mask, width, COMBINE_A_IN);
-}
-
-static void
-combine_disjoint_in_reverse_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dest,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- combine_disjoint_general_u (dest, src, mask, width, COMBINE_B_IN);
-}
-
-static void
-combine_disjoint_out_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dest,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- combine_disjoint_general_u (dest, src, mask, width, COMBINE_A_OUT);
-}
-
-static void
-combine_disjoint_out_reverse_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dest,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- combine_disjoint_general_u (dest, src, mask, width, COMBINE_B_OUT);
-}
-
-static void
-combine_disjoint_atop_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dest,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- combine_disjoint_general_u (dest, src, mask, width, COMBINE_A_ATOP);
-}
-
-static void
-combine_disjoint_atop_reverse_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dest,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- combine_disjoint_general_u (dest, src, mask, width, COMBINE_B_ATOP);
-}
-
-static void
-combine_disjoint_xor_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dest,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- combine_disjoint_general_u (dest, src, mask, width, COMBINE_XOR);
-}
-
-static void
-combine_conjoint_general_u (uint32_t * dest,
- const uint32_t *src,
- const uint32_t *mask,
- int width,
- uint8_t combine)
-{
- int i;
-
- for (i = 0; i < width; ++i)
- {
- uint32_t s = combine_mask (src, mask, i);
- uint32_t d = *(dest + i);
- uint32_t m, n, o, p;
- uint16_t Fa, Fb, t, u, v;
- uint8_t sa = s >> A_SHIFT;
- uint8_t da = d >> A_SHIFT;
-
- switch (combine & COMBINE_A)
- {
- default:
- Fa = 0;
- break;
-
- case COMBINE_A_OUT:
- Fa = combine_conjoint_out_part (sa, da);
- break;
-
- case COMBINE_A_IN:
- Fa = combine_conjoint_in_part (sa, da);
- break;
-
- case COMBINE_A:
- Fa = MASK;
- break;
- }
-
- switch (combine & COMBINE_B)
- {
- default:
- Fb = 0;
- break;
-
- case COMBINE_B_OUT:
- Fb = combine_conjoint_out_part (da, sa);
- break;
-
- case COMBINE_B_IN:
- Fb = combine_conjoint_in_part (da, sa);
- break;
-
- case COMBINE_B:
- Fb = MASK;
- break;
- }
-
- m = GENERIC (s, d, 0, Fa, Fb, t, u, v);
- n = GENERIC (s, d, G_SHIFT, Fa, Fb, t, u, v);
- o = GENERIC (s, d, R_SHIFT, Fa, Fb, t, u, v);
- p = GENERIC (s, d, A_SHIFT, Fa, Fb, t, u, v);
-
- s = m | n | o | p;
-
- *(dest + i) = s;
- }
-}
-
-static void
-combine_conjoint_over_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dest,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- combine_conjoint_general_u (dest, src, mask, width, COMBINE_A_OVER);
-}
-
-static void
-combine_conjoint_over_reverse_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dest,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- combine_conjoint_general_u (dest, src, mask, width, COMBINE_B_OVER);
-}
-
-static void
-combine_conjoint_in_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dest,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- combine_conjoint_general_u (dest, src, mask, width, COMBINE_A_IN);
-}
-
-static void
-combine_conjoint_in_reverse_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dest,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- combine_conjoint_general_u (dest, src, mask, width, COMBINE_B_IN);
-}
-
-static void
-combine_conjoint_out_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dest,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- combine_conjoint_general_u (dest, src, mask, width, COMBINE_A_OUT);
-}
-
-static void
-combine_conjoint_out_reverse_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dest,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- combine_conjoint_general_u (dest, src, mask, width, COMBINE_B_OUT);
-}
-
-static void
-combine_conjoint_atop_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dest,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- combine_conjoint_general_u (dest, src, mask, width, COMBINE_A_ATOP);
-}
-
-static void
-combine_conjoint_atop_reverse_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dest,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- combine_conjoint_general_u (dest, src, mask, width, COMBINE_B_ATOP);
-}
-
-static void
-combine_conjoint_xor_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dest,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- combine_conjoint_general_u (dest, src, mask, width, COMBINE_XOR);
-}
-
-
/* Component alpha combiners */
static void
@@ -1955,428 +1136,6 @@ combine_add_ca (pixman_implementation_t *imp,
}
}
-static void
-combine_saturate_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dest,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- int i;
-
- for (i = 0; i < width; ++i)
- {
- uint32_t s, d;
- uint16_t sa, sr, sg, sb, da;
- uint16_t t, u, v;
- uint32_t m, n, o, p;
-
- d = *(dest + i);
- s = *(src + i);
- m = *(mask + i);
-
- combine_mask_ca (&s, &m);
-
- sa = (m >> A_SHIFT);
- sr = (m >> R_SHIFT) & MASK;
- sg = (m >> G_SHIFT) & MASK;
- sb = m & MASK;
- da = ~d >> A_SHIFT;
-
- if (sb <= da)
- m = ADD (s, d, 0, t);
- else
- m = GENERIC (s, d, 0, (da << G_SHIFT) / sb, MASK, t, u, v);
-
- if (sg <= da)
- n = ADD (s, d, G_SHIFT, t);
- else
- n = GENERIC (s, d, G_SHIFT, (da << G_SHIFT) / sg, MASK, t, u, v);
-
- if (sr <= da)
- o = ADD (s, d, R_SHIFT, t);
- else
- o = GENERIC (s, d, R_SHIFT, (da << G_SHIFT) / sr, MASK, t, u, v);
-
- if (sa <= da)
- p = ADD (s, d, A_SHIFT, t);
- else
- p = GENERIC (s, d, A_SHIFT, (da << G_SHIFT) / sa, MASK, t, u, v);
-
- *(dest + i) = m | n | o | p;
- }
-}
-
-static void
-combine_disjoint_general_ca (uint32_t * dest,
- const uint32_t *src,
- const uint32_t *mask,
- int width,
- uint8_t combine)
-{
- int i;
-
- for (i = 0; i < width; ++i)
- {
- uint32_t s, d;
- uint32_t m, n, o, p;
- uint32_t Fa, Fb;
- uint16_t t, u, v;
- uint32_t sa;
- uint8_t da;
-
- s = *(src + i);
- m = *(mask + i);
- d = *(dest + i);
- da = d >> A_SHIFT;
-
- combine_mask_ca (&s, &m);
-
- sa = m;
-
- switch (combine & COMBINE_A)
- {
- default:
- Fa = 0;
- break;
-
- case COMBINE_A_OUT:
- m = (uint32_t)combine_disjoint_out_part ((uint8_t) (sa >> 0), da);
- n = (uint32_t)combine_disjoint_out_part ((uint8_t) (sa >> G_SHIFT), da) << G_SHIFT;
- o = (uint32_t)combine_disjoint_out_part ((uint8_t) (sa >> R_SHIFT), da) << R_SHIFT;
- p = (uint32_t)combine_disjoint_out_part ((uint8_t) (sa >> A_SHIFT), da) << A_SHIFT;
- Fa = m | n | o | p;
- break;
-
- case COMBINE_A_IN:
- m = (uint32_t)combine_disjoint_in_part ((uint8_t) (sa >> 0), da);
- n = (uint32_t)combine_disjoint_in_part ((uint8_t) (sa >> G_SHIFT), da) << G_SHIFT;
- o = (uint32_t)combine_disjoint_in_part ((uint8_t) (sa >> R_SHIFT), da) << R_SHIFT;
- p = (uint32_t)combine_disjoint_in_part ((uint8_t) (sa >> A_SHIFT), da) << A_SHIFT;
- Fa = m | n | o | p;
- break;
-
- case COMBINE_A:
- Fa = ~0;
- break;
- }
-
- switch (combine & COMBINE_B)
- {
- default:
- Fb = 0;
- break;
-
- case COMBINE_B_OUT:
- m = (uint32_t)combine_disjoint_out_part (da, (uint8_t) (sa >> 0));
- n = (uint32_t)combine_disjoint_out_part (da, (uint8_t) (sa >> G_SHIFT)) << G_SHIFT;
- o = (uint32_t)combine_disjoint_out_part (da, (uint8_t) (sa >> R_SHIFT)) << R_SHIFT;
- p = (uint32_t)combine_disjoint_out_part (da, (uint8_t) (sa >> A_SHIFT)) << A_SHIFT;
- Fb = m | n | o | p;
- break;
-
- case COMBINE_B_IN:
- m = (uint32_t)combine_disjoint_in_part (da, (uint8_t) (sa >> 0));
- n = (uint32_t)combine_disjoint_in_part (da, (uint8_t) (sa >> G_SHIFT)) << G_SHIFT;
- o = (uint32_t)combine_disjoint_in_part (da, (uint8_t) (sa >> R_SHIFT)) << R_SHIFT;
- p = (uint32_t)combine_disjoint_in_part (da, (uint8_t) (sa >> A_SHIFT)) << A_SHIFT;
- Fb = m | n | o | p;
- break;
-
- case COMBINE_B:
- Fb = ~0;
- break;
- }
- m = GENERIC (s, d, 0, GET_COMP (Fa, 0), GET_COMP (Fb, 0), t, u, v);
- n = GENERIC (s, d, G_SHIFT, GET_COMP (Fa, G_SHIFT), GET_COMP (Fb, G_SHIFT), t, u, v);
- o = GENERIC (s, d, R_SHIFT, GET_COMP (Fa, R_SHIFT), GET_COMP (Fb, R_SHIFT), t, u, v);
- p = GENERIC (s, d, A_SHIFT, GET_COMP (Fa, A_SHIFT), GET_COMP (Fb, A_SHIFT), t, u, v);
-
- s = m | n | o | p;
-
- *(dest + i) = s;
- }
-}
-
-static void
-combine_disjoint_over_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dest,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- combine_disjoint_general_ca (dest, src, mask, width, COMBINE_A_OVER);
-}
-
-static void
-combine_disjoint_in_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dest,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- combine_disjoint_general_ca (dest, src, mask, width, COMBINE_A_IN);
-}
-
-static void
-combine_disjoint_in_reverse_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dest,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- combine_disjoint_general_ca (dest, src, mask, width, COMBINE_B_IN);
-}
-
-static void
-combine_disjoint_out_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dest,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- combine_disjoint_general_ca (dest, src, mask, width, COMBINE_A_OUT);
-}
-
-static void
-combine_disjoint_out_reverse_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dest,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- combine_disjoint_general_ca (dest, src, mask, width, COMBINE_B_OUT);
-}
-
-static void
-combine_disjoint_atop_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dest,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- combine_disjoint_general_ca (dest, src, mask, width, COMBINE_A_ATOP);
-}
-
-static void
-combine_disjoint_atop_reverse_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dest,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- combine_disjoint_general_ca (dest, src, mask, width, COMBINE_B_ATOP);
-}
-
-static void
-combine_disjoint_xor_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dest,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- combine_disjoint_general_ca (dest, src, mask, width, COMBINE_XOR);
-}
-
-static void
-combine_conjoint_general_ca (uint32_t * dest,
- const uint32_t *src,
- const uint32_t *mask,
- int width,
- uint8_t combine)
-{
- int i;
-
- for (i = 0; i < width; ++i)
- {
- uint32_t s, d;
- uint32_t m, n, o, p;
- uint32_t Fa, Fb;
- uint16_t t, u, v;
- uint32_t sa;
- uint8_t da;
-
- s = *(src + i);
- m = *(mask + i);
- d = *(dest + i);
- da = d >> A_SHIFT;
-
- combine_mask_ca (&s, &m);
-
- sa = m;
-
- switch (combine & COMBINE_A)
- {
- default:
- Fa = 0;
- break;
-
- case COMBINE_A_OUT:
- m = (uint32_t)combine_conjoint_out_part ((uint8_t) (sa >> 0), da);
- n = (uint32_t)combine_conjoint_out_part ((uint8_t) (sa >> G_SHIFT), da) << G_SHIFT;
- o = (uint32_t)combine_conjoint_out_part ((uint8_t) (sa >> R_SHIFT), da) << R_SHIFT;
- p = (uint32_t)combine_conjoint_out_part ((uint8_t) (sa >> A_SHIFT), da) << A_SHIFT;
- Fa = m | n | o | p;
- break;
-
- case COMBINE_A_IN:
- m = (uint32_t)combine_conjoint_in_part ((uint8_t) (sa >> 0), da);
- n = (uint32_t)combine_conjoint_in_part ((uint8_t) (sa >> G_SHIFT), da) << G_SHIFT;
- o = (uint32_t)combine_conjoint_in_part ((uint8_t) (sa >> R_SHIFT), da) << R_SHIFT;
- p = (uint32_t)combine_conjoint_in_part ((uint8_t) (sa >> A_SHIFT), da) << A_SHIFT;
- Fa = m | n | o | p;
- break;
-
- case COMBINE_A:
- Fa = ~0;
- break;
- }
-
- switch (combine & COMBINE_B)
- {
- default:
- Fb = 0;
- break;
-
- case COMBINE_B_OUT:
- m = (uint32_t)combine_conjoint_out_part (da, (uint8_t) (sa >> 0));
- n = (uint32_t)combine_conjoint_out_part (da, (uint8_t) (sa >> G_SHIFT)) << G_SHIFT;
- o = (uint32_t)combine_conjoint_out_part (da, (uint8_t) (sa >> R_SHIFT)) << R_SHIFT;
- p = (uint32_t)combine_conjoint_out_part (da, (uint8_t) (sa >> A_SHIFT)) << A_SHIFT;
- Fb = m | n | o | p;
- break;
-
- case COMBINE_B_IN:
- m = (uint32_t)combine_conjoint_in_part (da, (uint8_t) (sa >> 0));
- n = (uint32_t)combine_conjoint_in_part (da, (uint8_t) (sa >> G_SHIFT)) << G_SHIFT;
- o = (uint32_t)combine_conjoint_in_part (da, (uint8_t) (sa >> R_SHIFT)) << R_SHIFT;
- p = (uint32_t)combine_conjoint_in_part (da, (uint8_t) (sa >> A_SHIFT)) << A_SHIFT;
- Fb = m | n | o | p;
- break;
-
- case COMBINE_B:
- Fb = ~0;
- break;
- }
- m = GENERIC (s, d, 0, GET_COMP (Fa, 0), GET_COMP (Fb, 0), t, u, v);
- n = GENERIC (s, d, G_SHIFT, GET_COMP (Fa, G_SHIFT), GET_COMP (Fb, G_SHIFT), t, u, v);
- o = GENERIC (s, d, R_SHIFT, GET_COMP (Fa, R_SHIFT), GET_COMP (Fb, R_SHIFT), t, u, v);
- p = GENERIC (s, d, A_SHIFT, GET_COMP (Fa, A_SHIFT), GET_COMP (Fb, A_SHIFT), t, u, v);
-
- s = m | n | o | p;
-
- *(dest + i) = s;
- }
-}
-
-static void
-combine_conjoint_over_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dest,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- combine_conjoint_general_ca (dest, src, mask, width, COMBINE_A_OVER);
-}
-
-static void
-combine_conjoint_over_reverse_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dest,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- combine_conjoint_general_ca (dest, src, mask, width, COMBINE_B_OVER);
-}
-
-static void
-combine_conjoint_in_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dest,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- combine_conjoint_general_ca (dest, src, mask, width, COMBINE_A_IN);
-}
-
-static void
-combine_conjoint_in_reverse_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dest,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- combine_conjoint_general_ca (dest, src, mask, width, COMBINE_B_IN);
-}
-
-static void
-combine_conjoint_out_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dest,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- combine_conjoint_general_ca (dest, src, mask, width, COMBINE_A_OUT);
-}
-
-static void
-combine_conjoint_out_reverse_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dest,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- combine_conjoint_general_ca (dest, src, mask, width, COMBINE_B_OUT);
-}
-
-static void
-combine_conjoint_atop_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dest,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- combine_conjoint_general_ca (dest, src, mask, width, COMBINE_A_ATOP);
-}
-
-static void
-combine_conjoint_atop_reverse_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dest,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- combine_conjoint_general_ca (dest, src, mask, width, COMBINE_B_ATOP);
-}
-
-static void
-combine_conjoint_xor_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dest,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- combine_conjoint_general_ca (dest, src, mask, width, COMBINE_XOR);
-}
-
void
_pixman_setup_combiner_functions_32 (pixman_implementation_t *imp)
{
@@ -2394,51 +1153,15 @@ _pixman_setup_combiner_functions_32 (pixman_implementation_t *imp)
imp->combine_32[PIXMAN_OP_ATOP_REVERSE] = combine_atop_reverse_u;
imp->combine_32[PIXMAN_OP_XOR] = combine_xor_u;
imp->combine_32[PIXMAN_OP_ADD] = combine_add_u;
- imp->combine_32[PIXMAN_OP_SATURATE] = combine_saturate_u;
-
- /* Disjoint, unified */
- imp->combine_32[PIXMAN_OP_DISJOINT_CLEAR] = combine_clear;
- imp->combine_32[PIXMAN_OP_DISJOINT_SRC] = combine_src_u;
- imp->combine_32[PIXMAN_OP_DISJOINT_DST] = combine_dst;
- imp->combine_32[PIXMAN_OP_DISJOINT_OVER] = combine_disjoint_over_u;
- imp->combine_32[PIXMAN_OP_DISJOINT_OVER_REVERSE] = combine_saturate_u;
- imp->combine_32[PIXMAN_OP_DISJOINT_IN] = combine_disjoint_in_u;
- imp->combine_32[PIXMAN_OP_DISJOINT_IN_REVERSE] = combine_disjoint_in_reverse_u;
- imp->combine_32[PIXMAN_OP_DISJOINT_OUT] = combine_disjoint_out_u;
- imp->combine_32[PIXMAN_OP_DISJOINT_OUT_REVERSE] = combine_disjoint_out_reverse_u;
- imp->combine_32[PIXMAN_OP_DISJOINT_ATOP] = combine_disjoint_atop_u;
- imp->combine_32[PIXMAN_OP_DISJOINT_ATOP_REVERSE] = combine_disjoint_atop_reverse_u;
- imp->combine_32[PIXMAN_OP_DISJOINT_XOR] = combine_disjoint_xor_u;
-
- /* Conjoint, unified */
- imp->combine_32[PIXMAN_OP_CONJOINT_CLEAR] = combine_clear;
- imp->combine_32[PIXMAN_OP_CONJOINT_SRC] = combine_src_u;
- imp->combine_32[PIXMAN_OP_CONJOINT_DST] = combine_dst;
- imp->combine_32[PIXMAN_OP_CONJOINT_OVER] = combine_conjoint_over_u;
- imp->combine_32[PIXMAN_OP_CONJOINT_OVER_REVERSE] = combine_conjoint_over_reverse_u;
- imp->combine_32[PIXMAN_OP_CONJOINT_IN] = combine_conjoint_in_u;
- imp->combine_32[PIXMAN_OP_CONJOINT_IN_REVERSE] = combine_conjoint_in_reverse_u;
- imp->combine_32[PIXMAN_OP_CONJOINT_OUT] = combine_conjoint_out_u;
- imp->combine_32[PIXMAN_OP_CONJOINT_OUT_REVERSE] = combine_conjoint_out_reverse_u;
- imp->combine_32[PIXMAN_OP_CONJOINT_ATOP] = combine_conjoint_atop_u;
- imp->combine_32[PIXMAN_OP_CONJOINT_ATOP_REVERSE] = combine_conjoint_atop_reverse_u;
- imp->combine_32[PIXMAN_OP_CONJOINT_XOR] = combine_conjoint_xor_u;
imp->combine_32[PIXMAN_OP_MULTIPLY] = combine_multiply_u;
imp->combine_32[PIXMAN_OP_SCREEN] = combine_screen_u;
imp->combine_32[PIXMAN_OP_OVERLAY] = combine_overlay_u;
imp->combine_32[PIXMAN_OP_DARKEN] = combine_darken_u;
imp->combine_32[PIXMAN_OP_LIGHTEN] = combine_lighten_u;
- imp->combine_32[PIXMAN_OP_COLOR_DODGE] = combine_color_dodge_u;
- imp->combine_32[PIXMAN_OP_COLOR_BURN] = combine_color_burn_u;
imp->combine_32[PIXMAN_OP_HARD_LIGHT] = combine_hard_light_u;
- imp->combine_32[PIXMAN_OP_SOFT_LIGHT] = combine_soft_light_u;
imp->combine_32[PIXMAN_OP_DIFFERENCE] = combine_difference_u;
imp->combine_32[PIXMAN_OP_EXCLUSION] = combine_exclusion_u;
- imp->combine_32[PIXMAN_OP_HSL_HUE] = combine_hsl_hue_u;
- imp->combine_32[PIXMAN_OP_HSL_SATURATION] = combine_hsl_saturation_u;
- imp->combine_32[PIXMAN_OP_HSL_COLOR] = combine_hsl_color_u;
- imp->combine_32[PIXMAN_OP_HSL_LUMINOSITY] = combine_hsl_luminosity_u;
/* Component alpha combiners */
imp->combine_32_ca[PIXMAN_OP_CLEAR] = combine_clear_ca;
@@ -2454,51 +1177,13 @@ _pixman_setup_combiner_functions_32 (pixman_implementation_t *imp)
imp->combine_32_ca[PIXMAN_OP_ATOP_REVERSE] = combine_atop_reverse_ca;
imp->combine_32_ca[PIXMAN_OP_XOR] = combine_xor_ca;
imp->combine_32_ca[PIXMAN_OP_ADD] = combine_add_ca;
- imp->combine_32_ca[PIXMAN_OP_SATURATE] = combine_saturate_ca;
-
- /* Disjoint CA */
- imp->combine_32_ca[PIXMAN_OP_DISJOINT_CLEAR] = combine_clear_ca;
- imp->combine_32_ca[PIXMAN_OP_DISJOINT_SRC] = combine_src_ca;
- imp->combine_32_ca[PIXMAN_OP_DISJOINT_DST] = combine_dst;
- imp->combine_32_ca[PIXMAN_OP_DISJOINT_OVER] = combine_disjoint_over_ca;
- imp->combine_32_ca[PIXMAN_OP_DISJOINT_OVER_REVERSE] = combine_saturate_ca;
- imp->combine_32_ca[PIXMAN_OP_DISJOINT_IN] = combine_disjoint_in_ca;
- imp->combine_32_ca[PIXMAN_OP_DISJOINT_IN_REVERSE] = combine_disjoint_in_reverse_ca;
- imp->combine_32_ca[PIXMAN_OP_DISJOINT_OUT] = combine_disjoint_out_ca;
- imp->combine_32_ca[PIXMAN_OP_DISJOINT_OUT_REVERSE] = combine_disjoint_out_reverse_ca;
- imp->combine_32_ca[PIXMAN_OP_DISJOINT_ATOP] = combine_disjoint_atop_ca;
- imp->combine_32_ca[PIXMAN_OP_DISJOINT_ATOP_REVERSE] = combine_disjoint_atop_reverse_ca;
- imp->combine_32_ca[PIXMAN_OP_DISJOINT_XOR] = combine_disjoint_xor_ca;
-
- /* Conjoint CA */
- imp->combine_32_ca[PIXMAN_OP_CONJOINT_CLEAR] = combine_clear_ca;
- imp->combine_32_ca[PIXMAN_OP_CONJOINT_SRC] = combine_src_ca;
- imp->combine_32_ca[PIXMAN_OP_CONJOINT_DST] = combine_dst;
- imp->combine_32_ca[PIXMAN_OP_CONJOINT_OVER] = combine_conjoint_over_ca;
- imp->combine_32_ca[PIXMAN_OP_CONJOINT_OVER_REVERSE] = combine_conjoint_over_reverse_ca;
- imp->combine_32_ca[PIXMAN_OP_CONJOINT_IN] = combine_conjoint_in_ca;
- imp->combine_32_ca[PIXMAN_OP_CONJOINT_IN_REVERSE] = combine_conjoint_in_reverse_ca;
- imp->combine_32_ca[PIXMAN_OP_CONJOINT_OUT] = combine_conjoint_out_ca;
- imp->combine_32_ca[PIXMAN_OP_CONJOINT_OUT_REVERSE] = combine_conjoint_out_reverse_ca;
- imp->combine_32_ca[PIXMAN_OP_CONJOINT_ATOP] = combine_conjoint_atop_ca;
- imp->combine_32_ca[PIXMAN_OP_CONJOINT_ATOP_REVERSE] = combine_conjoint_atop_reverse_ca;
- imp->combine_32_ca[PIXMAN_OP_CONJOINT_XOR] = combine_conjoint_xor_ca;
imp->combine_32_ca[PIXMAN_OP_MULTIPLY] = combine_multiply_ca;
imp->combine_32_ca[PIXMAN_OP_SCREEN] = combine_screen_ca;
imp->combine_32_ca[PIXMAN_OP_OVERLAY] = combine_overlay_ca;
imp->combine_32_ca[PIXMAN_OP_DARKEN] = combine_darken_ca;
imp->combine_32_ca[PIXMAN_OP_LIGHTEN] = combine_lighten_ca;
- imp->combine_32_ca[PIXMAN_OP_COLOR_DODGE] = combine_color_dodge_ca;
- imp->combine_32_ca[PIXMAN_OP_COLOR_BURN] = combine_color_burn_ca;
imp->combine_32_ca[PIXMAN_OP_HARD_LIGHT] = combine_hard_light_ca;
- imp->combine_32_ca[PIXMAN_OP_SOFT_LIGHT] = combine_soft_light_ca;
imp->combine_32_ca[PIXMAN_OP_DIFFERENCE] = combine_difference_ca;
imp->combine_32_ca[PIXMAN_OP_EXCLUSION] = combine_exclusion_ca;
-
- /* It is not clear that these make sense, so make them noops for now */
- imp->combine_32_ca[PIXMAN_OP_HSL_HUE] = combine_dst;
- imp->combine_32_ca[PIXMAN_OP_HSL_SATURATION] = combine_dst;
- imp->combine_32_ca[PIXMAN_OP_HSL_COLOR] = combine_dst;
- imp->combine_32_ca[PIXMAN_OP_HSL_LUMINOSITY] = combine_dst;
}
diff --git a/gfx/cairo/libpixman/src/pixman-combine32.h b/gfx/cairo/libpixman/src/pixman-combine32.h
index cdd56a61a1..59bb2477aa 100644
--- a/gfx/cairo/libpixman/src/pixman-combine32.h
+++ b/gfx/cairo/libpixman/src/pixman-combine32.h
@@ -12,7 +12,7 @@
#define RB_MASK 0xff00ff
#define AG_MASK 0xff00ff00
#define RB_ONE_HALF 0x800080
-#define RB_MASK_PLUS_ONE 0x10000100
+#define RB_MASK_PLUS_ONE 0x1000100
#define ALPHA_8(x) ((x) >> A_SHIFT)
#define RED_8(x) (((x) >> R_SHIFT) & MASK)
diff --git a/gfx/cairo/libpixman/src/pixman-combine64.c b/gfx/cairo/libpixman/src/pixman-combine64.c
deleted file mode 100644
index 1c85af8eec..0000000000
--- a/gfx/cairo/libpixman/src/pixman-combine64.c
+++ /dev/null
@@ -1,2465 +0,0 @@
-/* WARNING: This file is generated by combine.pl from combine.inc.
- Please edit one of those files rather than this one. */
-
-#line 1 "pixman-combine.c.template"
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-#include <string.h>
-
-#include "pixman-private.h"
-
-#include "pixman-combine64.h"
-
-/*** per channel helper functions ***/
-
-static void
-combine_mask_ca (uint64_t *src, uint64_t *mask)
-{
- uint64_t a = *mask;
-
- uint64_t x;
- uint32_t xa;
-
- if (!a)
- {
- *(src) = 0;
- return;
- }
-
- x = *(src);
- if (a == ~0)
- {
- x = x >> A_SHIFT;
- x |= x << G_SHIFT;
- x |= x << R_SHIFT;
- *(mask) = x;
- return;
- }
-
- xa = x >> A_SHIFT;
- UN16x4_MUL_UN16x4 (x, a);
- *(src) = x;
-
- UN16x4_MUL_UN16 (a, xa);
- *(mask) = a;
-}
-
-static void
-combine_mask_value_ca (uint64_t *src, const uint64_t *mask)
-{
- uint64_t a = *mask;
- uint64_t x;
-
- if (!a)
- {
- *(src) = 0;
- return;
- }
-
- if (a == ~0)
- return;
-
- x = *(src);
- UN16x4_MUL_UN16x4 (x, a);
- *(src) = x;
-}
-
-static void
-combine_mask_alpha_ca (const uint64_t *src, uint64_t *mask)
-{
- uint64_t a = *(mask);
- uint64_t x;
-
- if (!a)
- return;
-
- x = *(src) >> A_SHIFT;
- if (x == MASK)
- return;
-
- if (a == ~0)
- {
- x |= x << G_SHIFT;
- x |= x << R_SHIFT;
- *(mask) = x;
- return;
- }
-
- UN16x4_MUL_UN16 (a, x);
- *(mask) = a;
-}
-
-/*
- * There are two ways of handling alpha -- either as a single unified value or
- * a separate value for each component, hence each macro must have two
- * versions. The unified alpha version has a 'U' at the end of the name,
- * the component version has a 'C'. Similarly, functions which deal with
- * this difference will have two versions using the same convention.
- */
-
-/*
- * All of the composing functions
- */
-
-static force_inline uint64_t
-combine_mask (const uint64_t *src, const uint64_t *mask, int i)
-{
- uint64_t s, m;
-
- if (mask)
- {
- m = *(mask + i) >> A_SHIFT;
-
- if (!m)
- return 0;
- }
-
- s = *(src + i);
-
- if (mask)
- UN16x4_MUL_UN16 (s, m);
-
- return s;
-}
-
-static void
-combine_clear (pixman_implementation_t *imp,
- pixman_op_t op,
- uint64_t * dest,
- const uint64_t * src,
- const uint64_t * mask,
- int width)
-{
- memset (dest, 0, width * sizeof(uint64_t));
-}
-
-static void
-combine_dst (pixman_implementation_t *imp,
- pixman_op_t op,
- uint64_t * dest,
- const uint64_t * src,
- const uint64_t * mask,
- int width)
-{
- return;
-}
-
-static void
-combine_src_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint64_t * dest,
- const uint64_t * src,
- const uint64_t * mask,
- int width)
-{
- int i;
-
- if (!mask)
- memcpy (dest, src, width * sizeof (uint64_t));
- else
- {
- for (i = 0; i < width; ++i)
- {
- uint64_t s = combine_mask (src, mask, i);
-
- *(dest + i) = s;
- }
- }
-}
-
-/* if the Src is opaque, call combine_src_u */
-static void
-combine_over_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint64_t * dest,
- const uint64_t * src,
- const uint64_t * mask,
- int width)
-{
- int i;
-
- for (i = 0; i < width; ++i)
- {
- uint64_t s = combine_mask (src, mask, i);
- uint64_t d = *(dest + i);
- uint64_t ia = ALPHA_16 (~s);
-
- UN16x4_MUL_UN16_ADD_UN16x4 (d, ia, s);
- *(dest + i) = d;
- }
-}
-
-/* if the Dst is opaque, this is a noop */
-static void
-combine_over_reverse_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint64_t * dest,
- const uint64_t * src,
- const uint64_t * mask,
- int width)
-{
- int i;
-
- for (i = 0; i < width; ++i)
- {
- uint64_t s = combine_mask (src, mask, i);
- uint64_t d = *(dest + i);
- uint64_t ia = ALPHA_16 (~*(dest + i));
- UN16x4_MUL_UN16_ADD_UN16x4 (s, ia, d);
- *(dest + i) = s;
- }
-}
-
-/* if the Dst is opaque, call combine_src_u */
-static void
-combine_in_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint64_t * dest,
- const uint64_t * src,
- const uint64_t * mask,
- int width)
-{
- int i;
-
- for (i = 0; i < width; ++i)
- {
- uint64_t s = combine_mask (src, mask, i);
- uint64_t a = ALPHA_16 (*(dest + i));
- UN16x4_MUL_UN16 (s, a);
- *(dest + i) = s;
- }
-}
-
-/* if the Src is opaque, this is a noop */
-static void
-combine_in_reverse_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint64_t * dest,
- const uint64_t * src,
- const uint64_t * mask,
- int width)
-{
- int i;
-
- for (i = 0; i < width; ++i)
- {
- uint64_t s = combine_mask (src, mask, i);
- uint64_t d = *(dest + i);
- uint64_t a = ALPHA_16 (s);
- UN16x4_MUL_UN16 (d, a);
- *(dest + i) = d;
- }
-}
-
-/* if the Dst is opaque, call combine_clear */
-static void
-combine_out_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint64_t * dest,
- const uint64_t * src,
- const uint64_t * mask,
- int width)
-{
- int i;
-
- for (i = 0; i < width; ++i)
- {
- uint64_t s = combine_mask (src, mask, i);
- uint64_t a = ALPHA_16 (~*(dest + i));
- UN16x4_MUL_UN16 (s, a);
- *(dest + i) = s;
- }
-}
-
-/* if the Src is opaque, call combine_clear */
-static void
-combine_out_reverse_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint64_t * dest,
- const uint64_t * src,
- const uint64_t * mask,
- int width)
-{
- int i;
-
- for (i = 0; i < width; ++i)
- {
- uint64_t s = combine_mask (src, mask, i);
- uint64_t d = *(dest + i);
- uint64_t a = ALPHA_16 (~s);
- UN16x4_MUL_UN16 (d, a);
- *(dest + i) = d;
- }
-}
-
-/* if the Src is opaque, call combine_in_u */
-/* if the Dst is opaque, call combine_over_u */
-/* if both the Src and Dst are opaque, call combine_src_u */
-static void
-combine_atop_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint64_t * dest,
- const uint64_t * src,
- const uint64_t * mask,
- int width)
-{
- int i;
-
- for (i = 0; i < width; ++i)
- {
- uint64_t s = combine_mask (src, mask, i);
- uint64_t d = *(dest + i);
- uint64_t dest_a = ALPHA_16 (d);
- uint64_t src_ia = ALPHA_16 (~s);
-
- UN16x4_MUL_UN16_ADD_UN16x4_MUL_UN16 (s, dest_a, d, src_ia);
- *(dest + i) = s;
- }
-}
-
-/* if the Src is opaque, call combine_over_reverse_u */
-/* if the Dst is opaque, call combine_in_reverse_u */
-/* if both the Src and Dst are opaque, call combine_dst_u */
-static void
-combine_atop_reverse_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint64_t * dest,
- const uint64_t * src,
- const uint64_t * mask,
- int width)
-{
- int i;
-
- for (i = 0; i < width; ++i)
- {
- uint64_t s = combine_mask (src, mask, i);
- uint64_t d = *(dest + i);
- uint64_t src_a = ALPHA_16 (s);
- uint64_t dest_ia = ALPHA_16 (~d);
-
- UN16x4_MUL_UN16_ADD_UN16x4_MUL_UN16 (s, dest_ia, d, src_a);
- *(dest + i) = s;
- }
-}
-
-/* if the Src is opaque, call combine_over_u */
-/* if the Dst is opaque, call combine_over_reverse_u */
-/* if both the Src and Dst are opaque, call combine_clear */
-static void
-combine_xor_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint64_t * dest,
- const uint64_t * src,
- const uint64_t * mask,
- int width)
-{
- int i;
-
- for (i = 0; i < width; ++i)
- {
- uint64_t s = combine_mask (src, mask, i);
- uint64_t d = *(dest + i);
- uint64_t src_ia = ALPHA_16 (~s);
- uint64_t dest_ia = ALPHA_16 (~d);
-
- UN16x4_MUL_UN16_ADD_UN16x4_MUL_UN16 (s, dest_ia, d, src_ia);
- *(dest + i) = s;
- }
-}
-
-static void
-combine_add_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint64_t * dest,
- const uint64_t * src,
- const uint64_t * mask,
- int width)
-{
- int i;
-
- for (i = 0; i < width; ++i)
- {
- uint64_t s = combine_mask (src, mask, i);
- uint64_t d = *(dest + i);
- UN16x4_ADD_UN16x4 (d, s);
- *(dest + i) = d;
- }
-}
-
-/* if the Src is opaque, call combine_add_u */
-/* if the Dst is opaque, call combine_add_u */
-/* if both the Src and Dst are opaque, call combine_add_u */
-static void
-combine_saturate_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint64_t * dest,
- const uint64_t * src,
- const uint64_t * mask,
- int width)
-{
- int i;
-
- for (i = 0; i < width; ++i)
- {
- uint64_t s = combine_mask (src, mask, i);
- uint64_t d = *(dest + i);
- uint32_t sa, da;
-
- sa = s >> A_SHIFT;
- da = ~d >> A_SHIFT;
- if (sa > da)
- {
- sa = DIV_UN16 (da, sa);
- UN16x4_MUL_UN16 (s, sa);
- }
- ;
- UN16x4_ADD_UN16x4 (d, s);
- *(dest + i) = d;
- }
-}
-
-/*
- * PDF blend modes:
- * The following blend modes have been taken from the PDF ISO 32000
- * specification, which at this point in time is available from
- * http://www.adobe.com/devnet/acrobat/pdfs/PDF32000_2008.pdf
- * The relevant chapters are 11.3.5 and 11.3.6.
- * The formula for computing the final pixel color given in 11.3.6 is:
- * αr × Cr = (1 – αs) × αb × Cb + (1 – αb) × αs × Cs + αb × αs × B(Cb, Cs)
- * with B() being the blend function.
- * Note that OVER is a special case of this operation, using B(Cb, Cs) = Cs
- *
- * These blend modes should match the SVG filter draft specification, as
- * it has been designed to mirror ISO 32000. Note that at the current point
- * no released draft exists that shows this, as the formulas have not been
- * updated yet after the release of ISO 32000.
- *
- * The default implementation here uses the PDF_SEPARABLE_BLEND_MODE and
- * PDF_NON_SEPARABLE_BLEND_MODE macros, which take the blend function as an
- * argument. Note that this implementation operates on premultiplied colors,
- * while the PDF specification does not. Therefore the code uses the formula
- * Cra = (1 – as) . Dca + (1 – ad) . Sca + B(Dca, ad, Sca, as)
- */
-
-/*
- * Multiply
- * B(Dca, ad, Sca, as) = Dca.Sca
- */
-
-static void
-combine_multiply_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint64_t * dest,
- const uint64_t * src,
- const uint64_t * mask,
- int width)
-{
- int i;
-
- for (i = 0; i < width; ++i)
- {
- uint64_t s = combine_mask (src, mask, i);
- uint64_t d = *(dest + i);
- uint64_t ss = s;
- uint64_t src_ia = ALPHA_16 (~s);
- uint64_t dest_ia = ALPHA_16 (~d);
-
- UN16x4_MUL_UN16_ADD_UN16x4_MUL_UN16 (ss, dest_ia, d, src_ia);
- UN16x4_MUL_UN16x4 (d, s);
- UN16x4_ADD_UN16x4 (d, ss);
-
- *(dest + i) = d;
- }
-}
-
-static void
-combine_multiply_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint64_t * dest,
- const uint64_t * src,
- const uint64_t * mask,
- int width)
-{
- int i;
-
- for (i = 0; i < width; ++i)
- {
- uint64_t m = *(mask + i);
- uint64_t s = *(src + i);
- uint64_t d = *(dest + i);
- uint64_t r = d;
- uint64_t dest_ia = ALPHA_16 (~d);
-
- combine_mask_value_ca (&s, &m);
-
- UN16x4_MUL_UN16x4_ADD_UN16x4_MUL_UN16 (r, ~m, s, dest_ia);
- UN16x4_MUL_UN16x4 (d, s);
- UN16x4_ADD_UN16x4 (r, d);
-
- *(dest + i) = r;
- }
-}
-
-#define PDF_SEPARABLE_BLEND_MODE(name) \
- static void \
- combine_ ## name ## _u (pixman_implementation_t *imp, \
- pixman_op_t op, \
- uint64_t * dest, \
- const uint64_t * src, \
- const uint64_t * mask, \
- int width) \
- { \
- int i; \
- for (i = 0; i < width; ++i) { \
- uint64_t s = combine_mask (src, mask, i); \
- uint64_t d = *(dest + i); \
- uint16_t sa = ALPHA_16 (s); \
- uint16_t isa = ~sa; \
- uint16_t da = ALPHA_16 (d); \
- uint16_t ida = ~da; \
- uint64_t result; \
- \
- result = d; \
- UN16x4_MUL_UN16_ADD_UN16x4_MUL_UN16 (result, isa, s, ida); \
- \
- *(dest + i) = result + \
- (DIV_ONE_UN16 (sa * (uint64_t)da) << A_SHIFT) + \
- (blend_ ## name (RED_16 (d), da, RED_16 (s), sa) << R_SHIFT) + \
- (blend_ ## name (GREEN_16 (d), da, GREEN_16 (s), sa) << G_SHIFT) + \
- (blend_ ## name (BLUE_16 (d), da, BLUE_16 (s), sa)); \
- } \
- } \
- \
- static void \
- combine_ ## name ## _ca (pixman_implementation_t *imp, \
- pixman_op_t op, \
- uint64_t * dest, \
- const uint64_t * src, \
- const uint64_t * mask, \
- int width) \
- { \
- int i; \
- for (i = 0; i < width; ++i) { \
- uint64_t m = *(mask + i); \
- uint64_t s = *(src + i); \
- uint64_t d = *(dest + i); \
- uint16_t da = ALPHA_16 (d); \
- uint16_t ida = ~da; \
- uint64_t result; \
- \
- combine_mask_value_ca (&s, &m); \
- \
- result = d; \
- UN16x4_MUL_UN16x4_ADD_UN16x4_MUL_UN16 (result, ~m, s, ida); \
- \
- result += \
- (DIV_ONE_UN16 (ALPHA_16 (m) * (uint64_t)da) << A_SHIFT) + \
- (blend_ ## name (RED_16 (d), da, RED_16 (s), RED_16 (m)) << R_SHIFT) + \
- (blend_ ## name (GREEN_16 (d), da, GREEN_16 (s), GREEN_16 (m)) << G_SHIFT) + \
- (blend_ ## name (BLUE_16 (d), da, BLUE_16 (s), BLUE_16 (m))); \
- \
- *(dest + i) = result; \
- } \
- }
-
-/*
- * Screen
- * B(Dca, ad, Sca, as) = Dca.sa + Sca.da - Dca.Sca
- */
-static inline uint64_t
-blend_screen (uint64_t dca, uint64_t da, uint64_t sca, uint64_t sa)
-{
- return DIV_ONE_UN16 (sca * da + dca * sa - sca * dca);
-}
-
-PDF_SEPARABLE_BLEND_MODE (screen)
-
-/*
- * Overlay
- * B(Dca, Da, Sca, Sa) =
- * if 2.Dca < Da
- * 2.Sca.Dca
- * otherwise
- * Sa.Da - 2.(Da - Dca).(Sa - Sca)
- */
-static inline uint64_t
-blend_overlay (uint64_t dca, uint64_t da, uint64_t sca, uint64_t sa)
-{
- uint64_t rca;
-
- if (2 * dca < da)
- rca = 2 * sca * dca;
- else
- rca = sa * da - 2 * (da - dca) * (sa - sca);
- return DIV_ONE_UN16 (rca);
-}
-
-PDF_SEPARABLE_BLEND_MODE (overlay)
-
-/*
- * Darken
- * B(Dca, Da, Sca, Sa) = min (Sca.Da, Dca.Sa)
- */
-static inline uint64_t
-blend_darken (uint64_t dca, uint64_t da, uint64_t sca, uint64_t sa)
-{
- uint64_t s, d;
-
- s = sca * da;
- d = dca * sa;
- return DIV_ONE_UN16 (s > d ? d : s);
-}
-
-PDF_SEPARABLE_BLEND_MODE (darken)
-
-/*
- * Lighten
- * B(Dca, Da, Sca, Sa) = max (Sca.Da, Dca.Sa)
- */
-static inline uint64_t
-blend_lighten (uint64_t dca, uint64_t da, uint64_t sca, uint64_t sa)
-{
- uint64_t s, d;
-
- s = sca * da;
- d = dca * sa;
- return DIV_ONE_UN16 (s > d ? s : d);
-}
-
-PDF_SEPARABLE_BLEND_MODE (lighten)
-
-/*
- * Color dodge
- * B(Dca, Da, Sca, Sa) =
- * if Dca == 0
- * 0
- * if Sca == Sa
- * Sa.Da
- * otherwise
- * Sa.Da. min (1, Dca / Da / (1 - Sca/Sa))
- */
-static inline uint64_t
-blend_color_dodge (uint64_t dca, uint64_t da, uint64_t sca, uint64_t sa)
-{
- if (sca >= sa)
- {
- return dca == 0 ? 0 : DIV_ONE_UN16 (sa * da);
- }
- else
- {
- uint64_t rca = dca * sa / (sa - sca);
- return DIV_ONE_UN16 (sa * MIN (rca, da));
- }
-}
-
-PDF_SEPARABLE_BLEND_MODE (color_dodge)
-
-/*
- * Color burn
- * B(Dca, Da, Sca, Sa) =
- * if Dca == Da
- * Sa.Da
- * if Sca == 0
- * 0
- * otherwise
- * Sa.Da.(1 - min (1, (1 - Dca/Da).Sa / Sca))
- */
-static inline uint64_t
-blend_color_burn (uint64_t dca, uint64_t da, uint64_t sca, uint64_t sa)
-{
- if (sca == 0)
- {
- return dca < da ? 0 : DIV_ONE_UN16 (sa * da);
- }
- else
- {
- uint64_t rca = (da - dca) * sa / sca;
- return DIV_ONE_UN16 (sa * (MAX (rca, da) - rca));
- }
-}
-
-PDF_SEPARABLE_BLEND_MODE (color_burn)
-
-/*
- * Hard light
- * B(Dca, Da, Sca, Sa) =
- * if 2.Sca < Sa
- * 2.Sca.Dca
- * otherwise
- * Sa.Da - 2.(Da - Dca).(Sa - Sca)
- */
-static inline uint64_t
-blend_hard_light (uint64_t dca, uint64_t da, uint64_t sca, uint64_t sa)
-{
- if (2 * sca < sa)
- return DIV_ONE_UN16 (2 * sca * dca);
- else
- return DIV_ONE_UN16 (sa * da - 2 * (da - dca) * (sa - sca));
-}
-
-PDF_SEPARABLE_BLEND_MODE (hard_light)
-
-/*
- * Soft light
- * B(Dca, Da, Sca, Sa) =
- * if (2.Sca <= Sa)
- * Dca.(Sa - (1 - Dca/Da).(2.Sca - Sa))
- * otherwise if Dca.4 <= Da
- * Dca.(Sa + (2.Sca - Sa).((16.Dca/Da - 12).Dca/Da + 3)
- * otherwise
- * (Dca.Sa + (SQRT (Dca/Da).Da - Dca).(2.Sca - Sa))
- */
-static inline uint64_t
-blend_soft_light (uint64_t dca_org,
- uint64_t da_org,
- uint64_t sca_org,
- uint64_t sa_org)
-{
- double dca = dca_org * (1.0 / MASK);
- double da = da_org * (1.0 / MASK);
- double sca = sca_org * (1.0 / MASK);
- double sa = sa_org * (1.0 / MASK);
- double rca;
-
- if (2 * sca < sa)
- {
- if (da == 0)
- rca = dca * sa;
- else
- rca = dca * sa - dca * (da - dca) * (sa - 2 * sca) / da;
- }
- else if (da == 0)
- {
- rca = 0;
- }
- else if (4 * dca <= da)
- {
- rca = dca * sa +
- (2 * sca - sa) * dca * ((16 * dca / da - 12) * dca / da + 3);
- }
- else
- {
- rca = dca * sa + (sqrt (dca * da) - dca) * (2 * sca - sa);
- }
- return rca * MASK + 0.5;
-}
-
-PDF_SEPARABLE_BLEND_MODE (soft_light)
-
-/*
- * Difference
- * B(Dca, Da, Sca, Sa) = abs (Dca.Sa - Sca.Da)
- */
-static inline uint64_t
-blend_difference (uint64_t dca, uint64_t da, uint64_t sca, uint64_t sa)
-{
- uint64_t dcasa = dca * sa;
- uint64_t scada = sca * da;
-
- if (scada < dcasa)
- return DIV_ONE_UN16 (dcasa - scada);
- else
- return DIV_ONE_UN16 (scada - dcasa);
-}
-
-PDF_SEPARABLE_BLEND_MODE (difference)
-
-/*
- * Exclusion
- * B(Dca, Da, Sca, Sa) = (Sca.Da + Dca.Sa - 2.Sca.Dca)
- */
-
-/* This can be made faster by writing it directly and not using
- * PDF_SEPARABLE_BLEND_MODE, but that's a performance optimization */
-
-static inline uint64_t
-blend_exclusion (uint64_t dca, uint64_t da, uint64_t sca, uint64_t sa)
-{
- return DIV_ONE_UN16 (sca * da + dca * sa - 2 * dca * sca);
-}
-
-PDF_SEPARABLE_BLEND_MODE (exclusion)
-
-#undef PDF_SEPARABLE_BLEND_MODE
-
-/*
- * PDF nonseperable blend modes are implemented using the following functions
- * to operate in Hsl space, with Cmax, Cmid, Cmin referring to the max, mid
- * and min value of the red, green and blue components.
- *
- * LUM (C) = 0.3 × Cred + 0.59 × Cgreen + 0.11 × Cblue
- *
- * clip_color (C):
- * l = LUM (C)
- * min = Cmin
- * max = Cmax
- * if n < 0.0
- * C = l + ( ( ( C – l ) × l ) ⁄ ( l – min ) )
- * if x > 1.0
- * C = l + ( ( ( C – l ) × ( 1 – l ) ) ⁄ ( max – l ) )
- * return C
- *
- * set_lum (C, l):
- * d = l – LUM (C)
- * C += d
- * return clip_color (C)
- *
- * SAT (C) = CH_MAX (C) - CH_MIN (C)
- *
- * set_sat (C, s):
- * if Cmax > Cmin
- * Cmid = ( ( ( Cmid – Cmin ) × s ) ⁄ ( Cmax – Cmin ) )
- * Cmax = s
- * else
- * Cmid = Cmax = 0.0
- * Cmin = 0.0
- * return C
- */
-
-/* For premultiplied colors, we need to know what happens when C is
- * multiplied by a real number. LUM and SAT are linear:
- *
- * LUM (r × C) = r × LUM (C) SAT (r * C) = r * SAT (C)
- *
- * If we extend clip_color with an extra argument a and change
- *
- * if x >= 1.0
- *
- * into
- *
- * if x >= a
- *
- * then clip_color is also linear:
- *
- * r * clip_color (C, a) = clip_color (r_c, ra);
- *
- * for positive r.
- *
- * Similarly, we can extend set_lum with an extra argument that is just passed
- * on to clip_color:
- *
- * r * set_lum ( C, l, a)
- *
- * = r × clip_color ( C + l - LUM (C), a)
- *
- * = clip_color ( r * C + r × l - r * LUM (C), r * a)
- *
- * = set_lum ( r * C, r * l, r * a)
- *
- * Finally, set_sat:
- *
- * r * set_sat (C, s) = set_sat (x * C, r * s)
- *
- * The above holds for all non-zero x, because the x'es in the fraction for
- * C_mid cancel out. Specifically, it holds for x = r:
- *
- * r * set_sat (C, s) = set_sat (r_c, rs)
- *
- */
-
-/* So, for the non-separable PDF blend modes, we have (using s, d for
- * non-premultiplied colors, and S, D for premultiplied:
- *
- * Color:
- *
- * a_s * a_d * B(s, d)
- * = a_s * a_d * set_lum (S/a_s, LUM (D/a_d), 1)
- * = set_lum (S * a_d, a_s * LUM (D), a_s * a_d)
- *
- *
- * Luminosity:
- *
- * a_s * a_d * B(s, d)
- * = a_s * a_d * set_lum (D/a_d, LUM(S/a_s), 1)
- * = set_lum (a_s * D, a_d * LUM(S), a_s * a_d)
- *
- *
- * Saturation:
- *
- * a_s * a_d * B(s, d)
- * = a_s * a_d * set_lum (set_sat (D/a_d, SAT (S/a_s)), LUM (D/a_d), 1)
- * = set_lum (a_s * a_d * set_sat (D/a_d, SAT (S/a_s)),
- * a_s * LUM (D), a_s * a_d)
- * = set_lum (set_sat (a_s * D, a_d * SAT (S), a_s * LUM (D), a_s * a_d))
- *
- * Hue:
- *
- * a_s * a_d * B(s, d)
- * = a_s * a_d * set_lum (set_sat (S/a_s, SAT (D/a_d)), LUM (D/a_d), 1)
- * = set_lum (set_sat (a_d * S, a_s * SAT (D)), a_s * LUM (D), a_s * a_d)
- *
- */
-
-#define CH_MIN(c) (c[0] < c[1] ? (c[0] < c[2] ? c[0] : c[2]) : (c[1] < c[2] ? c[1] : c[2]))
-#define CH_MAX(c) (c[0] > c[1] ? (c[0] > c[2] ? c[0] : c[2]) : (c[1] > c[2] ? c[1] : c[2]))
-#define LUM(c) ((c[0] * 30 + c[1] * 59 + c[2] * 11) / 100)
-#define SAT(c) (CH_MAX (c) - CH_MIN (c))
-
-#define PDF_NON_SEPARABLE_BLEND_MODE(name) \
- static void \
- combine_ ## name ## _u (pixman_implementation_t *imp, \
- pixman_op_t op, \
- uint64_t *dest, \
- const uint64_t *src, \
- const uint64_t *mask, \
- int width) \
- { \
- int i; \
- for (i = 0; i < width; ++i) \
- { \
- uint64_t s = combine_mask (src, mask, i); \
- uint64_t d = *(dest + i); \
- uint16_t sa = ALPHA_16 (s); \
- uint16_t isa = ~sa; \
- uint16_t da = ALPHA_16 (d); \
- uint16_t ida = ~da; \
- uint64_t result; \
- uint64_t sc[3], dc[3], c[3]; \
- \
- result = d; \
- UN16x4_MUL_UN16_ADD_UN16x4_MUL_UN16 (result, isa, s, ida); \
- dc[0] = RED_16 (d); \
- sc[0] = RED_16 (s); \
- dc[1] = GREEN_16 (d); \
- sc[1] = GREEN_16 (s); \
- dc[2] = BLUE_16 (d); \
- sc[2] = BLUE_16 (s); \
- blend_ ## name (c, dc, da, sc, sa); \
- \
- *(dest + i) = result + \
- (DIV_ONE_UN16 (sa * (uint64_t)da) << A_SHIFT) + \
- (DIV_ONE_UN16 (c[0]) << R_SHIFT) + \
- (DIV_ONE_UN16 (c[1]) << G_SHIFT) + \
- (DIV_ONE_UN16 (c[2])); \
- } \
- }
-
-static void
-set_lum (uint64_t dest[3], uint64_t src[3], uint64_t sa, uint64_t lum)
-{
- double a, l, min, max;
- double tmp[3];
-
- a = sa * (1.0 / MASK);
-
- l = lum * (1.0 / MASK);
- tmp[0] = src[0] * (1.0 / MASK);
- tmp[1] = src[1] * (1.0 / MASK);
- tmp[2] = src[2] * (1.0 / MASK);
-
- l = l - LUM (tmp);
- tmp[0] += l;
- tmp[1] += l;
- tmp[2] += l;
-
- /* clip_color */
- l = LUM (tmp);
- min = CH_MIN (tmp);
- max = CH_MAX (tmp);
-
- if (min < 0)
- {
- if (l - min == 0.0)
- {
- tmp[0] = 0;
- tmp[1] = 0;
- tmp[2] = 0;
- }
- else
- {
- tmp[0] = l + (tmp[0] - l) * l / (l - min);
- tmp[1] = l + (tmp[1] - l) * l / (l - min);
- tmp[2] = l + (tmp[2] - l) * l / (l - min);
- }
- }
- if (max > a)
- {
- if (max - l == 0.0)
- {
- tmp[0] = a;
- tmp[1] = a;
- tmp[2] = a;
- }
- else
- {
- tmp[0] = l + (tmp[0] - l) * (a - l) / (max - l);
- tmp[1] = l + (tmp[1] - l) * (a - l) / (max - l);
- tmp[2] = l + (tmp[2] - l) * (a - l) / (max - l);
- }
- }
-
- dest[0] = tmp[0] * MASK + 0.5;
- dest[1] = tmp[1] * MASK + 0.5;
- dest[2] = tmp[2] * MASK + 0.5;
-}
-
-static void
-set_sat (uint64_t dest[3], uint64_t src[3], uint64_t sat)
-{
- int id[3];
- uint64_t min, max;
-
- if (src[0] > src[1])
- {
- if (src[0] > src[2])
- {
- id[0] = 0;
- if (src[1] > src[2])
- {
- id[1] = 1;
- id[2] = 2;
- }
- else
- {
- id[1] = 2;
- id[2] = 1;
- }
- }
- else
- {
- id[0] = 2;
- id[1] = 0;
- id[2] = 1;
- }
- }
- else
- {
- if (src[0] > src[2])
- {
- id[0] = 1;
- id[1] = 0;
- id[2] = 2;
- }
- else
- {
- id[2] = 0;
- if (src[1] > src[2])
- {
- id[0] = 1;
- id[1] = 2;
- }
- else
- {
- id[0] = 2;
- id[1] = 1;
- }
- }
- }
-
- max = dest[id[0]];
- min = dest[id[2]];
- if (max > min)
- {
- dest[id[1]] = (dest[id[1]] - min) * sat / (max - min);
- dest[id[0]] = sat;
- dest[id[2]] = 0;
- }
- else
- {
- dest[0] = dest[1] = dest[2] = 0;
- }
-}
-
-/*
- * Hue:
- * B(Cb, Cs) = set_lum (set_sat (Cs, SAT (Cb)), LUM (Cb))
- */
-static inline void
-blend_hsl_hue (uint64_t c[3],
- uint64_t dc[3],
- uint64_t da,
- uint64_t sc[3],
- uint64_t sa)
-{
- c[0] = sc[0] * da;
- c[1] = sc[1] * da;
- c[2] = sc[2] * da;
- set_sat (c, c, SAT (dc) * sa);
- set_lum (c, c, sa * da, LUM (dc) * sa);
-}
-
-PDF_NON_SEPARABLE_BLEND_MODE (hsl_hue)
-
-/*
- * Saturation:
- * B(Cb, Cs) = set_lum (set_sat (Cb, SAT (Cs)), LUM (Cb))
- */
-static inline void
-blend_hsl_saturation (uint64_t c[3],
- uint64_t dc[3],
- uint64_t da,
- uint64_t sc[3],
- uint64_t sa)
-{
- c[0] = dc[0] * sa;
- c[1] = dc[1] * sa;
- c[2] = dc[2] * sa;
- set_sat (c, c, SAT (sc) * da);
- set_lum (c, c, sa * da, LUM (dc) * sa);
-}
-
-PDF_NON_SEPARABLE_BLEND_MODE (hsl_saturation)
-
-/*
- * Color:
- * B(Cb, Cs) = set_lum (Cs, LUM (Cb))
- */
-static inline void
-blend_hsl_color (uint64_t c[3],
- uint64_t dc[3],
- uint64_t da,
- uint64_t sc[3],
- uint64_t sa)
-{
- c[0] = sc[0] * da;
- c[1] = sc[1] * da;
- c[2] = sc[2] * da;
- set_lum (c, c, sa * da, LUM (dc) * sa);
-}
-
-PDF_NON_SEPARABLE_BLEND_MODE (hsl_color)
-
-/*
- * Luminosity:
- * B(Cb, Cs) = set_lum (Cb, LUM (Cs))
- */
-static inline void
-blend_hsl_luminosity (uint64_t c[3],
- uint64_t dc[3],
- uint64_t da,
- uint64_t sc[3],
- uint64_t sa)
-{
- c[0] = dc[0] * sa;
- c[1] = dc[1] * sa;
- c[2] = dc[2] * sa;
- set_lum (c, c, sa * da, LUM (sc) * da);
-}
-
-PDF_NON_SEPARABLE_BLEND_MODE (hsl_luminosity)
-
-#undef SAT
-#undef LUM
-#undef CH_MAX
-#undef CH_MIN
-#undef PDF_NON_SEPARABLE_BLEND_MODE
-
-/* All of the disjoint/conjoint composing functions
- *
- * The four entries in the first column indicate what source contributions
- * come from each of the four areas of the picture -- areas covered by neither
- * A nor B, areas covered only by A, areas covered only by B and finally
- * areas covered by both A and B.
- *
- * Disjoint Conjoint
- * Fa Fb Fa Fb
- * (0,0,0,0) 0 0 0 0
- * (0,A,0,A) 1 0 1 0
- * (0,0,B,B) 0 1 0 1
- * (0,A,B,A) 1 min((1-a)/b,1) 1 max(1-a/b,0)
- * (0,A,B,B) min((1-b)/a,1) 1 max(1-b/a,0) 1
- * (0,0,0,A) max(1-(1-b)/a,0) 0 min(1,b/a) 0
- * (0,0,0,B) 0 max(1-(1-a)/b,0) 0 min(a/b,1)
- * (0,A,0,0) min(1,(1-b)/a) 0 max(1-b/a,0) 0
- * (0,0,B,0) 0 min(1,(1-a)/b) 0 max(1-a/b,0)
- * (0,0,B,A) max(1-(1-b)/a,0) min(1,(1-a)/b) min(1,b/a) max(1-a/b,0)
- * (0,A,0,B) min(1,(1-b)/a) max(1-(1-a)/b,0) max(1-b/a,0) min(1,a/b)
- * (0,A,B,0) min(1,(1-b)/a) min(1,(1-a)/b) max(1-b/a,0) max(1-a/b,0)
- *
- * See http://marc.info/?l=xfree-render&m=99792000027857&w=2 for more
- * information about these operators.
- */
-
-#define COMBINE_A_OUT 1
-#define COMBINE_A_IN 2
-#define COMBINE_B_OUT 4
-#define COMBINE_B_IN 8
-
-#define COMBINE_CLEAR 0
-#define COMBINE_A (COMBINE_A_OUT | COMBINE_A_IN)
-#define COMBINE_B (COMBINE_B_OUT | COMBINE_B_IN)
-#define COMBINE_A_OVER (COMBINE_A_OUT | COMBINE_B_OUT | COMBINE_A_IN)
-#define COMBINE_B_OVER (COMBINE_A_OUT | COMBINE_B_OUT | COMBINE_B_IN)
-#define COMBINE_A_ATOP (COMBINE_B_OUT | COMBINE_A_IN)
-#define COMBINE_B_ATOP (COMBINE_A_OUT | COMBINE_B_IN)
-#define COMBINE_XOR (COMBINE_A_OUT | COMBINE_B_OUT)
-
-/* portion covered by a but not b */
-static uint16_t
-combine_disjoint_out_part (uint16_t a, uint16_t b)
-{
- /* min (1, (1-b) / a) */
-
- b = ~b; /* 1 - b */
- if (b >= a) /* 1 - b >= a -> (1-b)/a >= 1 */
- return MASK; /* 1 */
- return DIV_UN16 (b, a); /* (1-b) / a */
-}
-
-/* portion covered by both a and b */
-static uint16_t
-combine_disjoint_in_part (uint16_t a, uint16_t b)
-{
- /* max (1-(1-b)/a,0) */
- /* = - min ((1-b)/a - 1, 0) */
- /* = 1 - min (1, (1-b)/a) */
-
- b = ~b; /* 1 - b */
- if (b >= a) /* 1 - b >= a -> (1-b)/a >= 1 */
- return 0; /* 1 - 1 */
- return ~DIV_UN16(b, a); /* 1 - (1-b) / a */
-}
-
-/* portion covered by a but not b */
-static uint16_t
-combine_conjoint_out_part (uint16_t a, uint16_t b)
-{
- /* max (1-b/a,0) */
- /* = 1-min(b/a,1) */
-
- /* min (1, (1-b) / a) */
-
- if (b >= a) /* b >= a -> b/a >= 1 */
- return 0x00; /* 0 */
- return ~DIV_UN16(b, a); /* 1 - b/a */
-}
-
-/* portion covered by both a and b */
-static uint16_t
-combine_conjoint_in_part (uint16_t a, uint16_t b)
-{
- /* min (1,b/a) */
-
- if (b >= a) /* b >= a -> b/a >= 1 */
- return MASK; /* 1 */
- return DIV_UN16 (b, a); /* b/a */
-}
-
-#define GET_COMP(v, i) ((uint32_t) (uint16_t) ((v) >> i))
-
-#define ADD(x, y, i, t) \
- ((t) = GET_COMP (x, i) + GET_COMP (y, i), \
- (uint64_t) ((uint16_t) ((t) | (0 - ((t) >> G_SHIFT)))) << (i))
-
-#define GENERIC(x, y, i, ax, ay, t, u, v) \
- ((t) = (MUL_UN16 (GET_COMP (y, i), ay, (u)) + \
- MUL_UN16 (GET_COMP (x, i), ax, (v))), \
- (uint64_t) ((uint16_t) ((t) | \
- (0 - ((t) >> G_SHIFT)))) << (i))
-
-static void
-combine_disjoint_general_u (uint64_t * dest,
- const uint64_t *src,
- const uint64_t *mask,
- int width,
- uint16_t combine)
-{
- int i;
-
- for (i = 0; i < width; ++i)
- {
- uint64_t s = combine_mask (src, mask, i);
- uint64_t d = *(dest + i);
- uint64_t m, n, o, p;
- uint32_t Fa, Fb, t, u, v;
- uint16_t sa = s >> A_SHIFT;
- uint16_t da = d >> A_SHIFT;
-
- switch (combine & COMBINE_A)
- {
- default:
- Fa = 0;
- break;
-
- case COMBINE_A_OUT:
- Fa = combine_disjoint_out_part (sa, da);
- break;
-
- case COMBINE_A_IN:
- Fa = combine_disjoint_in_part (sa, da);
- break;
-
- case COMBINE_A:
- Fa = MASK;
- break;
- }
-
- switch (combine & COMBINE_B)
- {
- default:
- Fb = 0;
- break;
-
- case COMBINE_B_OUT:
- Fb = combine_disjoint_out_part (da, sa);
- break;
-
- case COMBINE_B_IN:
- Fb = combine_disjoint_in_part (da, sa);
- break;
-
- case COMBINE_B:
- Fb = MASK;
- break;
- }
- m = GENERIC (s, d, 0, Fa, Fb, t, u, v);
- n = GENERIC (s, d, G_SHIFT, Fa, Fb, t, u, v);
- o = GENERIC (s, d, R_SHIFT, Fa, Fb, t, u, v);
- p = GENERIC (s, d, A_SHIFT, Fa, Fb, t, u, v);
- s = m | n | o | p;
- *(dest + i) = s;
- }
-}
-
-static void
-combine_disjoint_over_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint64_t * dest,
- const uint64_t * src,
- const uint64_t * mask,
- int width)
-{
- int i;
-
- for (i = 0; i < width; ++i)
- {
- uint64_t s = combine_mask (src, mask, i);
- uint32_t a = s >> A_SHIFT;
-
- if (s != 0x00)
- {
- uint64_t d = *(dest + i);
- a = combine_disjoint_out_part (d >> A_SHIFT, a);
- UN16x4_MUL_UN16_ADD_UN16x4 (d, a, s);
-
- *(dest + i) = d;
- }
- }
-}
-
-static void
-combine_disjoint_in_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint64_t * dest,
- const uint64_t * src,
- const uint64_t * mask,
- int width)
-{
- combine_disjoint_general_u (dest, src, mask, width, COMBINE_A_IN);
-}
-
-static void
-combine_disjoint_in_reverse_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint64_t * dest,
- const uint64_t * src,
- const uint64_t * mask,
- int width)
-{
- combine_disjoint_general_u (dest, src, mask, width, COMBINE_B_IN);
-}
-
-static void
-combine_disjoint_out_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint64_t * dest,
- const uint64_t * src,
- const uint64_t * mask,
- int width)
-{
- combine_disjoint_general_u (dest, src, mask, width, COMBINE_A_OUT);
-}
-
-static void
-combine_disjoint_out_reverse_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint64_t * dest,
- const uint64_t * src,
- const uint64_t * mask,
- int width)
-{
- combine_disjoint_general_u (dest, src, mask, width, COMBINE_B_OUT);
-}
-
-static void
-combine_disjoint_atop_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint64_t * dest,
- const uint64_t * src,
- const uint64_t * mask,
- int width)
-{
- combine_disjoint_general_u (dest, src, mask, width, COMBINE_A_ATOP);
-}
-
-static void
-combine_disjoint_atop_reverse_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint64_t * dest,
- const uint64_t * src,
- const uint64_t * mask,
- int width)
-{
- combine_disjoint_general_u (dest, src, mask, width, COMBINE_B_ATOP);
-}
-
-static void
-combine_disjoint_xor_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint64_t * dest,
- const uint64_t * src,
- const uint64_t * mask,
- int width)
-{
- combine_disjoint_general_u (dest, src, mask, width, COMBINE_XOR);
-}
-
-static void
-combine_conjoint_general_u (uint64_t * dest,
- const uint64_t *src,
- const uint64_t *mask,
- int width,
- uint16_t combine)
-{
- int i;
-
- for (i = 0; i < width; ++i)
- {
- uint64_t s = combine_mask (src, mask, i);
- uint64_t d = *(dest + i);
- uint64_t m, n, o, p;
- uint32_t Fa, Fb, t, u, v;
- uint16_t sa = s >> A_SHIFT;
- uint16_t da = d >> A_SHIFT;
-
- switch (combine & COMBINE_A)
- {
- default:
- Fa = 0;
- break;
-
- case COMBINE_A_OUT:
- Fa = combine_conjoint_out_part (sa, da);
- break;
-
- case COMBINE_A_IN:
- Fa = combine_conjoint_in_part (sa, da);
- break;
-
- case COMBINE_A:
- Fa = MASK;
- break;
- }
-
- switch (combine & COMBINE_B)
- {
- default:
- Fb = 0;
- break;
-
- case COMBINE_B_OUT:
- Fb = combine_conjoint_out_part (da, sa);
- break;
-
- case COMBINE_B_IN:
- Fb = combine_conjoint_in_part (da, sa);
- break;
-
- case COMBINE_B:
- Fb = MASK;
- break;
- }
-
- m = GENERIC (s, d, 0, Fa, Fb, t, u, v);
- n = GENERIC (s, d, G_SHIFT, Fa, Fb, t, u, v);
- o = GENERIC (s, d, R_SHIFT, Fa, Fb, t, u, v);
- p = GENERIC (s, d, A_SHIFT, Fa, Fb, t, u, v);
-
- s = m | n | o | p;
-
- *(dest + i) = s;
- }
-}
-
-static void
-combine_conjoint_over_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint64_t * dest,
- const uint64_t * src,
- const uint64_t * mask,
- int width)
-{
- combine_conjoint_general_u (dest, src, mask, width, COMBINE_A_OVER);
-}
-
-static void
-combine_conjoint_over_reverse_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint64_t * dest,
- const uint64_t * src,
- const uint64_t * mask,
- int width)
-{
- combine_conjoint_general_u (dest, src, mask, width, COMBINE_B_OVER);
-}
-
-static void
-combine_conjoint_in_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint64_t * dest,
- const uint64_t * src,
- const uint64_t * mask,
- int width)
-{
- combine_conjoint_general_u (dest, src, mask, width, COMBINE_A_IN);
-}
-
-static void
-combine_conjoint_in_reverse_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint64_t * dest,
- const uint64_t * src,
- const uint64_t * mask,
- int width)
-{
- combine_conjoint_general_u (dest, src, mask, width, COMBINE_B_IN);
-}
-
-static void
-combine_conjoint_out_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint64_t * dest,
- const uint64_t * src,
- const uint64_t * mask,
- int width)
-{
- combine_conjoint_general_u (dest, src, mask, width, COMBINE_A_OUT);
-}
-
-static void
-combine_conjoint_out_reverse_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint64_t * dest,
- const uint64_t * src,
- const uint64_t * mask,
- int width)
-{
- combine_conjoint_general_u (dest, src, mask, width, COMBINE_B_OUT);
-}
-
-static void
-combine_conjoint_atop_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint64_t * dest,
- const uint64_t * src,
- const uint64_t * mask,
- int width)
-{
- combine_conjoint_general_u (dest, src, mask, width, COMBINE_A_ATOP);
-}
-
-static void
-combine_conjoint_atop_reverse_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint64_t * dest,
- const uint64_t * src,
- const uint64_t * mask,
- int width)
-{
- combine_conjoint_general_u (dest, src, mask, width, COMBINE_B_ATOP);
-}
-
-static void
-combine_conjoint_xor_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint64_t * dest,
- const uint64_t * src,
- const uint64_t * mask,
- int width)
-{
- combine_conjoint_general_u (dest, src, mask, width, COMBINE_XOR);
-}
-
-/************************************************************************/
-/*********************** Per Channel functions **************************/
-/************************************************************************/
-
-static void
-combine_clear_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint64_t * dest,
- const uint64_t * src,
- const uint64_t * mask,
- int width)
-{
- memset (dest, 0, width * sizeof(uint64_t));
-}
-
-static void
-combine_src_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint64_t * dest,
- const uint64_t * src,
- const uint64_t * mask,
- int width)
-{
- int i;
-
- for (i = 0; i < width; ++i)
- {
- uint64_t s = *(src + i);
- uint64_t m = *(mask + i);
-
- combine_mask_value_ca (&s, &m);
-
- *(dest + i) = s;
- }
-}
-
-static void
-combine_over_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint64_t * dest,
- const uint64_t * src,
- const uint64_t * mask,
- int width)
-{
- int i;
-
- for (i = 0; i < width; ++i)
- {
- uint64_t s = *(src + i);
- uint64_t m = *(mask + i);
- uint64_t a;
-
- combine_mask_ca (&s, &m);
-
- a = ~m;
- if (a)
- {
- uint64_t d = *(dest + i);
- UN16x4_MUL_UN16x4_ADD_UN16x4 (d, a, s);
- s = d;
- }
-
- *(dest + i) = s;
- }
-}
-
-static void
-combine_over_reverse_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint64_t * dest,
- const uint64_t * src,
- const uint64_t * mask,
- int width)
-{
- int i;
-
- for (i = 0; i < width; ++i)
- {
- uint64_t d = *(dest + i);
- uint64_t a = ~d >> A_SHIFT;
-
- if (a)
- {
- uint64_t s = *(src + i);
- uint64_t m = *(mask + i);
-
- UN16x4_MUL_UN16x4 (s, m);
- UN16x4_MUL_UN16_ADD_UN16x4 (s, a, d);
-
- *(dest + i) = s;
- }
- }
-}
-
-static void
-combine_in_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint64_t * dest,
- const uint64_t * src,
- const uint64_t * mask,
- int width)
-{
- int i;
-
- for (i = 0; i < width; ++i)
- {
- uint64_t d = *(dest + i);
- uint32_t a = d >> A_SHIFT;
- uint64_t s = 0;
-
- if (a)
- {
- uint64_t m = *(mask + i);
-
- s = *(src + i);
- combine_mask_value_ca (&s, &m);
-
- if (a != MASK)
- UN16x4_MUL_UN16 (s, a);
- }
-
- *(dest + i) = s;
- }
-}
-
-static void
-combine_in_reverse_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint64_t * dest,
- const uint64_t * src,
- const uint64_t * mask,
- int width)
-{
- int i;
-
- for (i = 0; i < width; ++i)
- {
- uint64_t s = *(src + i);
- uint64_t m = *(mask + i);
- uint64_t a;
-
- combine_mask_alpha_ca (&s, &m);
-
- a = m;
- if (a != ~0)
- {
- uint64_t d = 0;
-
- if (a)
- {
- d = *(dest + i);
- UN16x4_MUL_UN16x4 (d, a);
- }
-
- *(dest + i) = d;
- }
- }
-}
-
-static void
-combine_out_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint64_t * dest,
- const uint64_t * src,
- const uint64_t * mask,
- int width)
-{
- int i;
-
- for (i = 0; i < width; ++i)
- {
- uint64_t d = *(dest + i);
- uint32_t a = ~d >> A_SHIFT;
- uint64_t s = 0;
-
- if (a)
- {
- uint64_t m = *(mask + i);
-
- s = *(src + i);
- combine_mask_value_ca (&s, &m);
-
- if (a != MASK)
- UN16x4_MUL_UN16 (s, a);
- }
-
- *(dest + i) = s;
- }
-}
-
-static void
-combine_out_reverse_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint64_t * dest,
- const uint64_t * src,
- const uint64_t * mask,
- int width)
-{
- int i;
-
- for (i = 0; i < width; ++i)
- {
- uint64_t s = *(src + i);
- uint64_t m = *(mask + i);
- uint64_t a;
-
- combine_mask_alpha_ca (&s, &m);
-
- a = ~m;
- if (a != ~0)
- {
- uint64_t d = 0;
-
- if (a)
- {
- d = *(dest + i);
- UN16x4_MUL_UN16x4 (d, a);
- }
-
- *(dest + i) = d;
- }
- }
-}
-
-static void
-combine_atop_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint64_t * dest,
- const uint64_t * src,
- const uint64_t * mask,
- int width)
-{
- int i;
-
- for (i = 0; i < width; ++i)
- {
- uint64_t d = *(dest + i);
- uint64_t s = *(src + i);
- uint64_t m = *(mask + i);
- uint64_t ad;
- uint32_t as = d >> A_SHIFT;
-
- combine_mask_ca (&s, &m);
-
- ad = ~m;
-
- UN16x4_MUL_UN16x4_ADD_UN16x4_MUL_UN16 (d, ad, s, as);
-
- *(dest + i) = d;
- }
-}
-
-static void
-combine_atop_reverse_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint64_t * dest,
- const uint64_t * src,
- const uint64_t * mask,
- int width)
-{
- int i;
-
- for (i = 0; i < width; ++i)
- {
- uint64_t d = *(dest + i);
- uint64_t s = *(src + i);
- uint64_t m = *(mask + i);
- uint64_t ad;
- uint32_t as = ~d >> A_SHIFT;
-
- combine_mask_ca (&s, &m);
-
- ad = m;
-
- UN16x4_MUL_UN16x4_ADD_UN16x4_MUL_UN16 (d, ad, s, as);
-
- *(dest + i) = d;
- }
-}
-
-static void
-combine_xor_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint64_t * dest,
- const uint64_t * src,
- const uint64_t * mask,
- int width)
-{
- int i;
-
- for (i = 0; i < width; ++i)
- {
- uint64_t d = *(dest + i);
- uint64_t s = *(src + i);
- uint64_t m = *(mask + i);
- uint64_t ad;
- uint32_t as = ~d >> A_SHIFT;
-
- combine_mask_ca (&s, &m);
-
- ad = ~m;
-
- UN16x4_MUL_UN16x4_ADD_UN16x4_MUL_UN16 (d, ad, s, as);
-
- *(dest + i) = d;
- }
-}
-
-static void
-combine_add_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint64_t * dest,
- const uint64_t * src,
- const uint64_t * mask,
- int width)
-{
- int i;
-
- for (i = 0; i < width; ++i)
- {
- uint64_t s = *(src + i);
- uint64_t m = *(mask + i);
- uint64_t d = *(dest + i);
-
- combine_mask_value_ca (&s, &m);
-
- UN16x4_ADD_UN16x4 (d, s);
-
- *(dest + i) = d;
- }
-}
-
-static void
-combine_saturate_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint64_t * dest,
- const uint64_t * src,
- const uint64_t * mask,
- int width)
-{
- int i;
-
- for (i = 0; i < width; ++i)
- {
- uint64_t s, d;
- uint32_t sa, sr, sg, sb, da;
- uint32_t t, u, v;
- uint64_t m, n, o, p;
-
- d = *(dest + i);
- s = *(src + i);
- m = *(mask + i);
-
- combine_mask_ca (&s, &m);
-
- sa = (m >> A_SHIFT);
- sr = (m >> R_SHIFT) & MASK;
- sg = (m >> G_SHIFT) & MASK;
- sb = m & MASK;
- da = ~d >> A_SHIFT;
-
- if (sb <= da)
- m = ADD (s, d, 0, t);
- else
- m = GENERIC (s, d, 0, (da << G_SHIFT) / sb, MASK, t, u, v);
-
- if (sg <= da)
- n = ADD (s, d, G_SHIFT, t);
- else
- n = GENERIC (s, d, G_SHIFT, (da << G_SHIFT) / sg, MASK, t, u, v);
-
- if (sr <= da)
- o = ADD (s, d, R_SHIFT, t);
- else
- o = GENERIC (s, d, R_SHIFT, (da << G_SHIFT) / sr, MASK, t, u, v);
-
- if (sa <= da)
- p = ADD (s, d, A_SHIFT, t);
- else
- p = GENERIC (s, d, A_SHIFT, (da << G_SHIFT) / sa, MASK, t, u, v);
-
- *(dest + i) = m | n | o | p;
- }
-}
-
-static void
-combine_disjoint_general_ca (uint64_t * dest,
- const uint64_t *src,
- const uint64_t *mask,
- int width,
- uint16_t combine)
-{
- int i;
-
- for (i = 0; i < width; ++i)
- {
- uint64_t s, d;
- uint64_t m, n, o, p;
- uint64_t Fa, Fb;
- uint32_t t, u, v;
- uint64_t sa;
- uint16_t da;
-
- s = *(src + i);
- m = *(mask + i);
- d = *(dest + i);
- da = d >> A_SHIFT;
-
- combine_mask_ca (&s, &m);
-
- sa = m;
-
- switch (combine & COMBINE_A)
- {
- default:
- Fa = 0;
- break;
-
- case COMBINE_A_OUT:
- m = (uint64_t)combine_disjoint_out_part ((uint16_t) (sa >> 0), da);
- n = (uint64_t)combine_disjoint_out_part ((uint16_t) (sa >> G_SHIFT), da) << G_SHIFT;
- o = (uint64_t)combine_disjoint_out_part ((uint16_t) (sa >> R_SHIFT), da) << R_SHIFT;
- p = (uint64_t)combine_disjoint_out_part ((uint16_t) (sa >> A_SHIFT), da) << A_SHIFT;
- Fa = m | n | o | p;
- break;
-
- case COMBINE_A_IN:
- m = (uint64_t)combine_disjoint_in_part ((uint16_t) (sa >> 0), da);
- n = (uint64_t)combine_disjoint_in_part ((uint16_t) (sa >> G_SHIFT), da) << G_SHIFT;
- o = (uint64_t)combine_disjoint_in_part ((uint16_t) (sa >> R_SHIFT), da) << R_SHIFT;
- p = (uint64_t)combine_disjoint_in_part ((uint16_t) (sa >> A_SHIFT), da) << A_SHIFT;
- Fa = m | n | o | p;
- break;
-
- case COMBINE_A:
- Fa = ~0;
- break;
- }
-
- switch (combine & COMBINE_B)
- {
- default:
- Fb = 0;
- break;
-
- case COMBINE_B_OUT:
- m = (uint64_t)combine_disjoint_out_part (da, (uint16_t) (sa >> 0));
- n = (uint64_t)combine_disjoint_out_part (da, (uint16_t) (sa >> G_SHIFT)) << G_SHIFT;
- o = (uint64_t)combine_disjoint_out_part (da, (uint16_t) (sa >> R_SHIFT)) << R_SHIFT;
- p = (uint64_t)combine_disjoint_out_part (da, (uint16_t) (sa >> A_SHIFT)) << A_SHIFT;
- Fb = m | n | o | p;
- break;
-
- case COMBINE_B_IN:
- m = (uint64_t)combine_disjoint_in_part (da, (uint16_t) (sa >> 0));
- n = (uint64_t)combine_disjoint_in_part (da, (uint16_t) (sa >> G_SHIFT)) << G_SHIFT;
- o = (uint64_t)combine_disjoint_in_part (da, (uint16_t) (sa >> R_SHIFT)) << R_SHIFT;
- p = (uint64_t)combine_disjoint_in_part (da, (uint16_t) (sa >> A_SHIFT)) << A_SHIFT;
- Fb = m | n | o | p;
- break;
-
- case COMBINE_B:
- Fb = ~0;
- break;
- }
- m = GENERIC (s, d, 0, GET_COMP (Fa, 0), GET_COMP (Fb, 0), t, u, v);
- n = GENERIC (s, d, G_SHIFT, GET_COMP (Fa, G_SHIFT), GET_COMP (Fb, G_SHIFT), t, u, v);
- o = GENERIC (s, d, R_SHIFT, GET_COMP (Fa, R_SHIFT), GET_COMP (Fb, R_SHIFT), t, u, v);
- p = GENERIC (s, d, A_SHIFT, GET_COMP (Fa, A_SHIFT), GET_COMP (Fb, A_SHIFT), t, u, v);
-
- s = m | n | o | p;
-
- *(dest + i) = s;
- }
-}
-
-static void
-combine_disjoint_over_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint64_t * dest,
- const uint64_t * src,
- const uint64_t * mask,
- int width)
-{
- combine_disjoint_general_ca (dest, src, mask, width, COMBINE_A_OVER);
-}
-
-static void
-combine_disjoint_in_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint64_t * dest,
- const uint64_t * src,
- const uint64_t * mask,
- int width)
-{
- combine_disjoint_general_ca (dest, src, mask, width, COMBINE_A_IN);
-}
-
-static void
-combine_disjoint_in_reverse_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint64_t * dest,
- const uint64_t * src,
- const uint64_t * mask,
- int width)
-{
- combine_disjoint_general_ca (dest, src, mask, width, COMBINE_B_IN);
-}
-
-static void
-combine_disjoint_out_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint64_t * dest,
- const uint64_t * src,
- const uint64_t * mask,
- int width)
-{
- combine_disjoint_general_ca (dest, src, mask, width, COMBINE_A_OUT);
-}
-
-static void
-combine_disjoint_out_reverse_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint64_t * dest,
- const uint64_t * src,
- const uint64_t * mask,
- int width)
-{
- combine_disjoint_general_ca (dest, src, mask, width, COMBINE_B_OUT);
-}
-
-static void
-combine_disjoint_atop_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint64_t * dest,
- const uint64_t * src,
- const uint64_t * mask,
- int width)
-{
- combine_disjoint_general_ca (dest, src, mask, width, COMBINE_A_ATOP);
-}
-
-static void
-combine_disjoint_atop_reverse_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint64_t * dest,
- const uint64_t * src,
- const uint64_t * mask,
- int width)
-{
- combine_disjoint_general_ca (dest, src, mask, width, COMBINE_B_ATOP);
-}
-
-static void
-combine_disjoint_xor_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint64_t * dest,
- const uint64_t * src,
- const uint64_t * mask,
- int width)
-{
- combine_disjoint_general_ca (dest, src, mask, width, COMBINE_XOR);
-}
-
-static void
-combine_conjoint_general_ca (uint64_t * dest,
- const uint64_t *src,
- const uint64_t *mask,
- int width,
- uint16_t combine)
-{
- int i;
-
- for (i = 0; i < width; ++i)
- {
- uint64_t s, d;
- uint64_t m, n, o, p;
- uint64_t Fa, Fb;
- uint32_t t, u, v;
- uint64_t sa;
- uint16_t da;
-
- s = *(src + i);
- m = *(mask + i);
- d = *(dest + i);
- da = d >> A_SHIFT;
-
- combine_mask_ca (&s, &m);
-
- sa = m;
-
- switch (combine & COMBINE_A)
- {
- default:
- Fa = 0;
- break;
-
- case COMBINE_A_OUT:
- m = (uint64_t)combine_conjoint_out_part ((uint16_t) (sa >> 0), da);
- n = (uint64_t)combine_conjoint_out_part ((uint16_t) (sa >> G_SHIFT), da) << G_SHIFT;
- o = (uint64_t)combine_conjoint_out_part ((uint16_t) (sa >> R_SHIFT), da) << R_SHIFT;
- p = (uint64_t)combine_conjoint_out_part ((uint16_t) (sa >> A_SHIFT), da) << A_SHIFT;
- Fa = m | n | o | p;
- break;
-
- case COMBINE_A_IN:
- m = (uint64_t)combine_conjoint_in_part ((uint16_t) (sa >> 0), da);
- n = (uint64_t)combine_conjoint_in_part ((uint16_t) (sa >> G_SHIFT), da) << G_SHIFT;
- o = (uint64_t)combine_conjoint_in_part ((uint16_t) (sa >> R_SHIFT), da) << R_SHIFT;
- p = (uint64_t)combine_conjoint_in_part ((uint16_t) (sa >> A_SHIFT), da) << A_SHIFT;
- Fa = m | n | o | p;
- break;
-
- case COMBINE_A:
- Fa = ~0;
- break;
- }
-
- switch (combine & COMBINE_B)
- {
- default:
- Fb = 0;
- break;
-
- case COMBINE_B_OUT:
- m = (uint64_t)combine_conjoint_out_part (da, (uint16_t) (sa >> 0));
- n = (uint64_t)combine_conjoint_out_part (da, (uint16_t) (sa >> G_SHIFT)) << G_SHIFT;
- o = (uint64_t)combine_conjoint_out_part (da, (uint16_t) (sa >> R_SHIFT)) << R_SHIFT;
- p = (uint64_t)combine_conjoint_out_part (da, (uint16_t) (sa >> A_SHIFT)) << A_SHIFT;
- Fb = m | n | o | p;
- break;
-
- case COMBINE_B_IN:
- m = (uint64_t)combine_conjoint_in_part (da, (uint16_t) (sa >> 0));
- n = (uint64_t)combine_conjoint_in_part (da, (uint16_t) (sa >> G_SHIFT)) << G_SHIFT;
- o = (uint64_t)combine_conjoint_in_part (da, (uint16_t) (sa >> R_SHIFT)) << R_SHIFT;
- p = (uint64_t)combine_conjoint_in_part (da, (uint16_t) (sa >> A_SHIFT)) << A_SHIFT;
- Fb = m | n | o | p;
- break;
-
- case COMBINE_B:
- Fb = ~0;
- break;
- }
- m = GENERIC (s, d, 0, GET_COMP (Fa, 0), GET_COMP (Fb, 0), t, u, v);
- n = GENERIC (s, d, G_SHIFT, GET_COMP (Fa, G_SHIFT), GET_COMP (Fb, G_SHIFT), t, u, v);
- o = GENERIC (s, d, R_SHIFT, GET_COMP (Fa, R_SHIFT), GET_COMP (Fb, R_SHIFT), t, u, v);
- p = GENERIC (s, d, A_SHIFT, GET_COMP (Fa, A_SHIFT), GET_COMP (Fb, A_SHIFT), t, u, v);
-
- s = m | n | o | p;
-
- *(dest + i) = s;
- }
-}
-
-static void
-combine_conjoint_over_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint64_t * dest,
- const uint64_t * src,
- const uint64_t * mask,
- int width)
-{
- combine_conjoint_general_ca (dest, src, mask, width, COMBINE_A_OVER);
-}
-
-static void
-combine_conjoint_over_reverse_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint64_t * dest,
- const uint64_t * src,
- const uint64_t * mask,
- int width)
-{
- combine_conjoint_general_ca (dest, src, mask, width, COMBINE_B_OVER);
-}
-
-static void
-combine_conjoint_in_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint64_t * dest,
- const uint64_t * src,
- const uint64_t * mask,
- int width)
-{
- combine_conjoint_general_ca (dest, src, mask, width, COMBINE_A_IN);
-}
-
-static void
-combine_conjoint_in_reverse_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint64_t * dest,
- const uint64_t * src,
- const uint64_t * mask,
- int width)
-{
- combine_conjoint_general_ca (dest, src, mask, width, COMBINE_B_IN);
-}
-
-static void
-combine_conjoint_out_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint64_t * dest,
- const uint64_t * src,
- const uint64_t * mask,
- int width)
-{
- combine_conjoint_general_ca (dest, src, mask, width, COMBINE_A_OUT);
-}
-
-static void
-combine_conjoint_out_reverse_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint64_t * dest,
- const uint64_t * src,
- const uint64_t * mask,
- int width)
-{
- combine_conjoint_general_ca (dest, src, mask, width, COMBINE_B_OUT);
-}
-
-static void
-combine_conjoint_atop_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint64_t * dest,
- const uint64_t * src,
- const uint64_t * mask,
- int width)
-{
- combine_conjoint_general_ca (dest, src, mask, width, COMBINE_A_ATOP);
-}
-
-static void
-combine_conjoint_atop_reverse_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint64_t * dest,
- const uint64_t * src,
- const uint64_t * mask,
- int width)
-{
- combine_conjoint_general_ca (dest, src, mask, width, COMBINE_B_ATOP);
-}
-
-static void
-combine_conjoint_xor_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint64_t * dest,
- const uint64_t * src,
- const uint64_t * mask,
- int width)
-{
- combine_conjoint_general_ca (dest, src, mask, width, COMBINE_XOR);
-}
-
-void
-_pixman_setup_combiner_functions_64 (pixman_implementation_t *imp)
-{
- /* Unified alpha */
- imp->combine_64[PIXMAN_OP_CLEAR] = combine_clear;
- imp->combine_64[PIXMAN_OP_SRC] = combine_src_u;
- imp->combine_64[PIXMAN_OP_DST] = combine_dst;
- imp->combine_64[PIXMAN_OP_OVER] = combine_over_u;
- imp->combine_64[PIXMAN_OP_OVER_REVERSE] = combine_over_reverse_u;
- imp->combine_64[PIXMAN_OP_IN] = combine_in_u;
- imp->combine_64[PIXMAN_OP_IN_REVERSE] = combine_in_reverse_u;
- imp->combine_64[PIXMAN_OP_OUT] = combine_out_u;
- imp->combine_64[PIXMAN_OP_OUT_REVERSE] = combine_out_reverse_u;
- imp->combine_64[PIXMAN_OP_ATOP] = combine_atop_u;
- imp->combine_64[PIXMAN_OP_ATOP_REVERSE] = combine_atop_reverse_u;
- imp->combine_64[PIXMAN_OP_XOR] = combine_xor_u;
- imp->combine_64[PIXMAN_OP_ADD] = combine_add_u;
- imp->combine_64[PIXMAN_OP_SATURATE] = combine_saturate_u;
-
- /* Disjoint, unified */
- imp->combine_64[PIXMAN_OP_DISJOINT_CLEAR] = combine_clear;
- imp->combine_64[PIXMAN_OP_DISJOINT_SRC] = combine_src_u;
- imp->combine_64[PIXMAN_OP_DISJOINT_DST] = combine_dst;
- imp->combine_64[PIXMAN_OP_DISJOINT_OVER] = combine_disjoint_over_u;
- imp->combine_64[PIXMAN_OP_DISJOINT_OVER_REVERSE] = combine_saturate_u;
- imp->combine_64[PIXMAN_OP_DISJOINT_IN] = combine_disjoint_in_u;
- imp->combine_64[PIXMAN_OP_DISJOINT_IN_REVERSE] = combine_disjoint_in_reverse_u;
- imp->combine_64[PIXMAN_OP_DISJOINT_OUT] = combine_disjoint_out_u;
- imp->combine_64[PIXMAN_OP_DISJOINT_OUT_REVERSE] = combine_disjoint_out_reverse_u;
- imp->combine_64[PIXMAN_OP_DISJOINT_ATOP] = combine_disjoint_atop_u;
- imp->combine_64[PIXMAN_OP_DISJOINT_ATOP_REVERSE] = combine_disjoint_atop_reverse_u;
- imp->combine_64[PIXMAN_OP_DISJOINT_XOR] = combine_disjoint_xor_u;
-
- /* Conjoint, unified */
- imp->combine_64[PIXMAN_OP_CONJOINT_CLEAR] = combine_clear;
- imp->combine_64[PIXMAN_OP_CONJOINT_SRC] = combine_src_u;
- imp->combine_64[PIXMAN_OP_CONJOINT_DST] = combine_dst;
- imp->combine_64[PIXMAN_OP_CONJOINT_OVER] = combine_conjoint_over_u;
- imp->combine_64[PIXMAN_OP_CONJOINT_OVER_REVERSE] = combine_conjoint_over_reverse_u;
- imp->combine_64[PIXMAN_OP_CONJOINT_IN] = combine_conjoint_in_u;
- imp->combine_64[PIXMAN_OP_CONJOINT_IN_REVERSE] = combine_conjoint_in_reverse_u;
- imp->combine_64[PIXMAN_OP_CONJOINT_OUT] = combine_conjoint_out_u;
- imp->combine_64[PIXMAN_OP_CONJOINT_OUT_REVERSE] = combine_conjoint_out_reverse_u;
- imp->combine_64[PIXMAN_OP_CONJOINT_ATOP] = combine_conjoint_atop_u;
- imp->combine_64[PIXMAN_OP_CONJOINT_ATOP_REVERSE] = combine_conjoint_atop_reverse_u;
- imp->combine_64[PIXMAN_OP_CONJOINT_XOR] = combine_conjoint_xor_u;
-
- imp->combine_64[PIXMAN_OP_MULTIPLY] = combine_multiply_u;
- imp->combine_64[PIXMAN_OP_SCREEN] = combine_screen_u;
- imp->combine_64[PIXMAN_OP_OVERLAY] = combine_overlay_u;
- imp->combine_64[PIXMAN_OP_DARKEN] = combine_darken_u;
- imp->combine_64[PIXMAN_OP_LIGHTEN] = combine_lighten_u;
- imp->combine_64[PIXMAN_OP_COLOR_DODGE] = combine_color_dodge_u;
- imp->combine_64[PIXMAN_OP_COLOR_BURN] = combine_color_burn_u;
- imp->combine_64[PIXMAN_OP_HARD_LIGHT] = combine_hard_light_u;
- imp->combine_64[PIXMAN_OP_SOFT_LIGHT] = combine_soft_light_u;
- imp->combine_64[PIXMAN_OP_DIFFERENCE] = combine_difference_u;
- imp->combine_64[PIXMAN_OP_EXCLUSION] = combine_exclusion_u;
- imp->combine_64[PIXMAN_OP_HSL_HUE] = combine_hsl_hue_u;
- imp->combine_64[PIXMAN_OP_HSL_SATURATION] = combine_hsl_saturation_u;
- imp->combine_64[PIXMAN_OP_HSL_COLOR] = combine_hsl_color_u;
- imp->combine_64[PIXMAN_OP_HSL_LUMINOSITY] = combine_hsl_luminosity_u;
-
- /* Component alpha combiners */
- imp->combine_64_ca[PIXMAN_OP_CLEAR] = combine_clear_ca;
- imp->combine_64_ca[PIXMAN_OP_SRC] = combine_src_ca;
- /* dest */
- imp->combine_64_ca[PIXMAN_OP_OVER] = combine_over_ca;
- imp->combine_64_ca[PIXMAN_OP_OVER_REVERSE] = combine_over_reverse_ca;
- imp->combine_64_ca[PIXMAN_OP_IN] = combine_in_ca;
- imp->combine_64_ca[PIXMAN_OP_IN_REVERSE] = combine_in_reverse_ca;
- imp->combine_64_ca[PIXMAN_OP_OUT] = combine_out_ca;
- imp->combine_64_ca[PIXMAN_OP_OUT_REVERSE] = combine_out_reverse_ca;
- imp->combine_64_ca[PIXMAN_OP_ATOP] = combine_atop_ca;
- imp->combine_64_ca[PIXMAN_OP_ATOP_REVERSE] = combine_atop_reverse_ca;
- imp->combine_64_ca[PIXMAN_OP_XOR] = combine_xor_ca;
- imp->combine_64_ca[PIXMAN_OP_ADD] = combine_add_ca;
- imp->combine_64_ca[PIXMAN_OP_SATURATE] = combine_saturate_ca;
-
- /* Disjoint CA */
- imp->combine_64_ca[PIXMAN_OP_DISJOINT_CLEAR] = combine_clear_ca;
- imp->combine_64_ca[PIXMAN_OP_DISJOINT_SRC] = combine_src_ca;
- imp->combine_64_ca[PIXMAN_OP_DISJOINT_DST] = combine_dst;
- imp->combine_64_ca[PIXMAN_OP_DISJOINT_OVER] = combine_disjoint_over_ca;
- imp->combine_64_ca[PIXMAN_OP_DISJOINT_OVER_REVERSE] = combine_saturate_ca;
- imp->combine_64_ca[PIXMAN_OP_DISJOINT_IN] = combine_disjoint_in_ca;
- imp->combine_64_ca[PIXMAN_OP_DISJOINT_IN_REVERSE] = combine_disjoint_in_reverse_ca;
- imp->combine_64_ca[PIXMAN_OP_DISJOINT_OUT] = combine_disjoint_out_ca;
- imp->combine_64_ca[PIXMAN_OP_DISJOINT_OUT_REVERSE] = combine_disjoint_out_reverse_ca;
- imp->combine_64_ca[PIXMAN_OP_DISJOINT_ATOP] = combine_disjoint_atop_ca;
- imp->combine_64_ca[PIXMAN_OP_DISJOINT_ATOP_REVERSE] = combine_disjoint_atop_reverse_ca;
- imp->combine_64_ca[PIXMAN_OP_DISJOINT_XOR] = combine_disjoint_xor_ca;
-
- /* Conjoint CA */
- imp->combine_64_ca[PIXMAN_OP_CONJOINT_CLEAR] = combine_clear_ca;
- imp->combine_64_ca[PIXMAN_OP_CONJOINT_SRC] = combine_src_ca;
- imp->combine_64_ca[PIXMAN_OP_CONJOINT_DST] = combine_dst;
- imp->combine_64_ca[PIXMAN_OP_CONJOINT_OVER] = combine_conjoint_over_ca;
- imp->combine_64_ca[PIXMAN_OP_CONJOINT_OVER_REVERSE] = combine_conjoint_over_reverse_ca;
- imp->combine_64_ca[PIXMAN_OP_CONJOINT_IN] = combine_conjoint_in_ca;
- imp->combine_64_ca[PIXMAN_OP_CONJOINT_IN_REVERSE] = combine_conjoint_in_reverse_ca;
- imp->combine_64_ca[PIXMAN_OP_CONJOINT_OUT] = combine_conjoint_out_ca;
- imp->combine_64_ca[PIXMAN_OP_CONJOINT_OUT_REVERSE] = combine_conjoint_out_reverse_ca;
- imp->combine_64_ca[PIXMAN_OP_CONJOINT_ATOP] = combine_conjoint_atop_ca;
- imp->combine_64_ca[PIXMAN_OP_CONJOINT_ATOP_REVERSE] = combine_conjoint_atop_reverse_ca;
- imp->combine_64_ca[PIXMAN_OP_CONJOINT_XOR] = combine_conjoint_xor_ca;
-
- imp->combine_64_ca[PIXMAN_OP_MULTIPLY] = combine_multiply_ca;
- imp->combine_64_ca[PIXMAN_OP_SCREEN] = combine_screen_ca;
- imp->combine_64_ca[PIXMAN_OP_OVERLAY] = combine_overlay_ca;
- imp->combine_64_ca[PIXMAN_OP_DARKEN] = combine_darken_ca;
- imp->combine_64_ca[PIXMAN_OP_LIGHTEN] = combine_lighten_ca;
- imp->combine_64_ca[PIXMAN_OP_COLOR_DODGE] = combine_color_dodge_ca;
- imp->combine_64_ca[PIXMAN_OP_COLOR_BURN] = combine_color_burn_ca;
- imp->combine_64_ca[PIXMAN_OP_HARD_LIGHT] = combine_hard_light_ca;
- imp->combine_64_ca[PIXMAN_OP_SOFT_LIGHT] = combine_soft_light_ca;
- imp->combine_64_ca[PIXMAN_OP_DIFFERENCE] = combine_difference_ca;
- imp->combine_64_ca[PIXMAN_OP_EXCLUSION] = combine_exclusion_ca;
-
- /* It is not clear that these make sense, so make them noops for now */
- imp->combine_64_ca[PIXMAN_OP_HSL_HUE] = combine_dst;
- imp->combine_64_ca[PIXMAN_OP_HSL_SATURATION] = combine_dst;
- imp->combine_64_ca[PIXMAN_OP_HSL_COLOR] = combine_dst;
- imp->combine_64_ca[PIXMAN_OP_HSL_LUMINOSITY] = combine_dst;
-}
-
diff --git a/gfx/cairo/libpixman/src/pixman-combine64.h b/gfx/cairo/libpixman/src/pixman-combine64.h
deleted file mode 100644
index 00413a85f6..0000000000
--- a/gfx/cairo/libpixman/src/pixman-combine64.h
+++ /dev/null
@@ -1,230 +0,0 @@
-/* WARNING: This file is generated by combine.pl from combine.inc.
- Please edit one of those files rather than this one. */
-
-#line 1 "pixman-combine.c.template"
-
-#define COMPONENT_SIZE 16
-#define MASK 0xffffULL
-#define ONE_HALF 0x8000ULL
-
-#define A_SHIFT 16 * 3
-#define R_SHIFT 16 * 2
-#define G_SHIFT 16
-#define A_MASK 0xffff000000000000ULL
-#define R_MASK 0xffff00000000ULL
-#define G_MASK 0xffff0000ULL
-
-#define RB_MASK 0xffff0000ffffULL
-#define AG_MASK 0xffff0000ffff0000ULL
-#define RB_ONE_HALF 0x800000008000ULL
-#define RB_MASK_PLUS_ONE 0x10000000010000ULL
-
-#define ALPHA_16(x) ((x) >> A_SHIFT)
-#define RED_16(x) (((x) >> R_SHIFT) & MASK)
-#define GREEN_16(x) (((x) >> G_SHIFT) & MASK)
-#define BLUE_16(x) ((x) & MASK)
-
-/*
- * Helper macros.
- */
-
-#define MUL_UN16(a, b, t) \
- ((t) = (a) * (uint32_t)(b) + ONE_HALF, ((((t) >> G_SHIFT ) + (t) ) >> G_SHIFT ))
-
-#define DIV_UN16(a, b) \
- (((uint32_t) (a) * MASK + ((b) / 2)) / (b))
-
-#define ADD_UN16(x, y, t) \
- ((t) = (x) + (y), \
- (uint64_t) (uint16_t) ((t) | (0 - ((t) >> G_SHIFT))))
-
-#define DIV_ONE_UN16(x) \
- (((x) + ONE_HALF + (((x) + ONE_HALF) >> G_SHIFT)) >> G_SHIFT)
-
-/*
- * The methods below use some tricks to be able to do two color
- * components at the same time.
- */
-
-/*
- * x_rb = (x_rb * a) / 255
- */
-#define UN16_rb_MUL_UN16(x, a, t) \
- do \
- { \
- t = ((x) & RB_MASK) * (a); \
- t += RB_ONE_HALF; \
- x = (t + ((t >> G_SHIFT) & RB_MASK)) >> G_SHIFT; \
- x &= RB_MASK; \
- } while (0)
-
-/*
- * x_rb = min (x_rb + y_rb, 255)
- */
-#define UN16_rb_ADD_UN16_rb(x, y, t) \
- do \
- { \
- t = ((x) + (y)); \
- t |= RB_MASK_PLUS_ONE - ((t >> G_SHIFT) & RB_MASK); \
- x = (t & RB_MASK); \
- } while (0)
-
-/*
- * x_rb = (x_rb * a_rb) / 255
- */
-#define UN16_rb_MUL_UN16_rb(x, a, t) \
- do \
- { \
- t = (x & MASK) * (a & MASK); \
- t |= (x & R_MASK) * ((a >> R_SHIFT) & MASK); \
- t += RB_ONE_HALF; \
- t = (t + ((t >> G_SHIFT) & RB_MASK)) >> G_SHIFT; \
- x = t & RB_MASK; \
- } while (0)
-
-/*
- * x_c = (x_c * a) / 255
- */
-#define UN16x4_MUL_UN16(x, a) \
- do \
- { \
- uint64_t r1__, r2__, t__; \
- \
- r1__ = (x); \
- UN16_rb_MUL_UN16 (r1__, (a), t__); \
- \
- r2__ = (x) >> G_SHIFT; \
- UN16_rb_MUL_UN16 (r2__, (a), t__); \
- \
- (x) = r1__ | (r2__ << G_SHIFT); \
- } while (0)
-
-/*
- * x_c = (x_c * a) / 255 + y_c
- */
-#define UN16x4_MUL_UN16_ADD_UN16x4(x, a, y) \
- do \
- { \
- uint64_t r1__, r2__, r3__, t__; \
- \
- r1__ = (x); \
- r2__ = (y) & RB_MASK; \
- UN16_rb_MUL_UN16 (r1__, (a), t__); \
- UN16_rb_ADD_UN16_rb (r1__, r2__, t__); \
- \
- r2__ = (x) >> G_SHIFT; \
- r3__ = ((y) >> G_SHIFT) & RB_MASK; \
- UN16_rb_MUL_UN16 (r2__, (a), t__); \
- UN16_rb_ADD_UN16_rb (r2__, r3__, t__); \
- \
- (x) = r1__ | (r2__ << G_SHIFT); \
- } while (0)
-
-/*
- * x_c = (x_c * a + y_c * b) / 255
- */
-#define UN16x4_MUL_UN16_ADD_UN16x4_MUL_UN16(x, a, y, b) \
- do \
- { \
- uint64_t r1__, r2__, r3__, t__; \
- \
- r1__ = (x); \
- r2__ = (y); \
- UN16_rb_MUL_UN16 (r1__, (a), t__); \
- UN16_rb_MUL_UN16 (r2__, (b), t__); \
- UN16_rb_ADD_UN16_rb (r1__, r2__, t__); \
- \
- r2__ = ((x) >> G_SHIFT); \
- r3__ = ((y) >> G_SHIFT); \
- UN16_rb_MUL_UN16 (r2__, (a), t__); \
- UN16_rb_MUL_UN16 (r3__, (b), t__); \
- UN16_rb_ADD_UN16_rb (r2__, r3__, t__); \
- \
- (x) = r1__ | (r2__ << G_SHIFT); \
- } while (0)
-
-/*
- * x_c = (x_c * a_c) / 255
- */
-#define UN16x4_MUL_UN16x4(x, a) \
- do \
- { \
- uint64_t r1__, r2__, r3__, t__; \
- \
- r1__ = (x); \
- r2__ = (a); \
- UN16_rb_MUL_UN16_rb (r1__, r2__, t__); \
- \
- r2__ = (x) >> G_SHIFT; \
- r3__ = (a) >> G_SHIFT; \
- UN16_rb_MUL_UN16_rb (r2__, r3__, t__); \
- \
- (x) = r1__ | (r2__ << G_SHIFT); \
- } while (0)
-
-/*
- * x_c = (x_c * a_c) / 255 + y_c
- */
-#define UN16x4_MUL_UN16x4_ADD_UN16x4(x, a, y) \
- do \
- { \
- uint64_t r1__, r2__, r3__, t__; \
- \
- r1__ = (x); \
- r2__ = (a); \
- UN16_rb_MUL_UN16_rb (r1__, r2__, t__); \
- r2__ = (y) & RB_MASK; \
- UN16_rb_ADD_UN16_rb (r1__, r2__, t__); \
- \
- r2__ = ((x) >> G_SHIFT); \
- r3__ = ((a) >> G_SHIFT); \
- UN16_rb_MUL_UN16_rb (r2__, r3__, t__); \
- r3__ = ((y) >> G_SHIFT) & RB_MASK; \
- UN16_rb_ADD_UN16_rb (r2__, r3__, t__); \
- \
- (x) = r1__ | (r2__ << G_SHIFT); \
- } while (0)
-
-/*
- * x_c = (x_c * a_c + y_c * b) / 255
- */
-#define UN16x4_MUL_UN16x4_ADD_UN16x4_MUL_UN16(x, a, y, b) \
- do \
- { \
- uint64_t r1__, r2__, r3__, t__; \
- \
- r1__ = (x); \
- r2__ = (a); \
- UN16_rb_MUL_UN16_rb (r1__, r2__, t__); \
- r2__ = (y); \
- UN16_rb_MUL_UN16 (r2__, (b), t__); \
- UN16_rb_ADD_UN16_rb (r1__, r2__, t__); \
- \
- r2__ = (x) >> G_SHIFT; \
- r3__ = (a) >> G_SHIFT; \
- UN16_rb_MUL_UN16_rb (r2__, r3__, t__); \
- r3__ = (y) >> G_SHIFT; \
- UN16_rb_MUL_UN16 (r3__, (b), t__); \
- UN16_rb_ADD_UN16_rb (r2__, r3__, t__); \
- \
- x = r1__ | (r2__ << G_SHIFT); \
- } while (0)
-
-/*
- x_c = min(x_c + y_c, 255)
-*/
-#define UN16x4_ADD_UN16x4(x, y) \
- do \
- { \
- uint64_t r1__, r2__, r3__, t__; \
- \
- r1__ = (x) & RB_MASK; \
- r2__ = (y) & RB_MASK; \
- UN16_rb_ADD_UN16_rb (r1__, r2__, t__); \
- \
- r2__ = ((x) >> G_SHIFT) & RB_MASK; \
- r3__ = ((y) >> G_SHIFT) & RB_MASK; \
- UN16_rb_ADD_UN16_rb (r2__, r3__, t__); \
- \
- x = r1__ | (r2__ << G_SHIFT); \
- } while (0)
diff --git a/gfx/cairo/libpixman/src/pixman-compiler.h b/gfx/cairo/libpixman/src/pixman-compiler.h
index fb674c5f08..d852f93798 100644
--- a/gfx/cairo/libpixman/src/pixman-compiler.h
+++ b/gfx/cairo/libpixman/src/pixman-compiler.h
@@ -19,6 +19,12 @@
#endif
#if defined (__GNUC__)
+# define unlikely(expr) __builtin_expect ((expr), 0)
+#else
+# define unlikely(expr) (expr)
+#endif
+
+#if defined (__GNUC__)
# define MAYBE_UNUSED __attribute__((unused))
#else
# define MAYBE_UNUSED
@@ -106,6 +112,8 @@
/* Sun Studio 8 visibility */
#elif defined(__SUNPRO_C) && (__SUNPRO_C >= 0x550)
# define PIXMAN_EXPORT __global
+#elif defined (_MSC_VER) || defined(__MINGW32__)
+# define PIXMAN_EXPORT PIXMAN_API
#else
# define PIXMAN_EXPORT
#endif
@@ -131,12 +139,10 @@
# define PIXMAN_GET_THREAD_LOCAL(name) \
(&name)
-#elif defined(__MINGW32__) || defined(PIXMAN_USE_XP_DLL_TLS_WORKAROUND)
+#elif defined(__MINGW32__)
# define _NO_W32_PSEUDO_MODIFIERS
# include <windows.h>
-#undef IN
-#undef OUT
# define PIXMAN_DEFINE_THREAD_LOCAL(type, name) \
static volatile int tls_ ## name ## _initialized = 0; \
@@ -193,7 +199,7 @@
# define PIXMAN_GET_THREAD_LOCAL(name) \
(&name)
-#elif defined(HAVE_PTHREAD_SETSPECIFIC)
+#elif defined(HAVE_PTHREADS)
#include <pthread.h>
diff --git a/gfx/cairo/libpixman/src/pixman-conical-gradient.c b/gfx/cairo/libpixman/src/pixman-conical-gradient.c
index 8bb46aecdc..a39e20c4eb 100644
--- a/gfx/cairo/libpixman/src/pixman-conical-gradient.c
+++ b/gfx/cairo/libpixman/src/pixman-conical-gradient.c
@@ -51,7 +51,10 @@ coordinates_to_parameter (double x, double y, double angle)
}
static uint32_t *
-conical_get_scanline_narrow (pixman_iter_t *iter, const uint32_t *mask)
+conical_get_scanline (pixman_iter_t *iter,
+ const uint32_t *mask,
+ int Bpp,
+ pixman_gradient_walker_write_t write_pixel)
{
pixman_image_t *image = iter->image;
int x = iter->x;
@@ -61,7 +64,7 @@ conical_get_scanline_narrow (pixman_iter_t *iter, const uint32_t *mask)
gradient_t *gradient = (gradient_t *)image;
conical_gradient_t *conical = (conical_gradient_t *)image;
- uint32_t *end = buffer + width;
+ uint32_t *end = buffer + width * (Bpp / 4);
pixman_gradient_walker_t walker;
pixman_bool_t affine = TRUE;
double cx = 1.;
@@ -109,11 +112,12 @@ conical_get_scanline_narrow (pixman_iter_t *iter, const uint32_t *mask)
{
double t = coordinates_to_parameter (rx, ry, conical->angle);
- *buffer = _pixman_gradient_walker_pixel (
- &walker, (pixman_fixed_48_16_t)pixman_double_to_fixed (t));
+ write_pixel (&walker,
+ (pixman_fixed_48_16_t)pixman_double_to_fixed (t),
+ buffer);
}
- ++buffer;
+ buffer += (Bpp / 4);
rx += cx;
ry += cy;
@@ -144,11 +148,12 @@ conical_get_scanline_narrow (pixman_iter_t *iter, const uint32_t *mask)
t = coordinates_to_parameter (x, y, conical->angle);
- *buffer = _pixman_gradient_walker_pixel (
- &walker, (pixman_fixed_48_16_t)pixman_double_to_fixed (t));
+ write_pixel (&walker,
+ (pixman_fixed_48_16_t)pixman_double_to_fixed (t),
+ buffer);
}
- ++buffer;
+ buffer += (Bpp / 4);
rx += cx;
ry += cy;
@@ -161,14 +166,17 @@ conical_get_scanline_narrow (pixman_iter_t *iter, const uint32_t *mask)
}
static uint32_t *
-conical_get_scanline_wide (pixman_iter_t *iter, const uint32_t *mask)
+conical_get_scanline_narrow (pixman_iter_t *iter, const uint32_t *mask)
{
- uint32_t *buffer = conical_get_scanline_narrow (iter, NULL);
-
- pixman_expand_to_float (
- (argb_t *)buffer, buffer, PIXMAN_a8r8g8b8, iter->width);
+ return conical_get_scanline (iter, mask, 4,
+ _pixman_gradient_walker_write_narrow);
+}
- return buffer;
+static uint32_t *
+conical_get_scanline_wide (pixman_iter_t *iter, const uint32_t *mask)
+{
+ return conical_get_scanline (iter, NULL, 16,
+ _pixman_gradient_walker_write_wide);
}
void
diff --git a/gfx/cairo/libpixman/src/pixman-cpu.c b/gfx/cairo/libpixman/src/pixman-cpu.c
deleted file mode 100644
index 30f5fba8a4..0000000000
--- a/gfx/cairo/libpixman/src/pixman-cpu.c
+++ /dev/null
@@ -1,765 +0,0 @@
-/*
- * Copyright © 2000 SuSE, Inc.
- * Copyright © 2007 Red Hat, Inc.
- * Copyright © 2021 Moonchild Productions
- *
- * Permission to use, copy, modify, distribute, and sell this software and its
- * documentation for any purpose is hereby granted without fee, provided that
- * the above copyright notice appear in all copies and that both that
- * copyright notice and this permission notice appear in supporting
- * documentation, and that the names of the authors not be used in advertising or
- * publicity pertaining to distribution of the software without specific,
- * written prior permission. The authors make no representations about the
- * suitability of this software for any purpose. It is provided "as is"
- * without express or implied warranty.
- *
- * THE AUTHORS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL THE AUTHORS
- * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
- * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <string.h>
-#include <stdlib.h>
-
-#if defined(USE_ARM_SIMD) && defined(_MSC_VER)
-/* Needed for EXCEPTION_ILLEGAL_INSTRUCTION */
-#include <windows.h>
-#endif
-
-#if defined(__APPLE__)
-#include "TargetConditionals.h"
-#endif
-
-#include "pixman-private.h"
-
-#ifdef USE_VMX
-
-/* The CPU detection code needs to be in a file not compiled with
- * "-maltivec -mabi=altivec", as gcc would try to save vector register
- * across function calls causing SIGILL on cpus without Altivec/vmx.
- */
-static pixman_bool_t initialized = FALSE;
-static volatile pixman_bool_t have_vmx = TRUE;
-
-#ifdef __APPLE__
-#include <sys/sysctl.h>
-
-static pixman_bool_t
-pixman_have_vmx (void)
-{
- if (!initialized)
- {
- size_t length = sizeof(have_vmx);
- int error =
- sysctlbyname ("hw.optional.altivec", &have_vmx, &length, NULL, 0);
-
- if (error)
- have_vmx = FALSE;
-
- initialized = TRUE;
- }
- return have_vmx;
-}
-
-#elif defined (__OpenBSD__)
-#include <sys/param.h>
-#include <sys/sysctl.h>
-#include <machine/cpu.h>
-
-static pixman_bool_t
-pixman_have_vmx (void)
-{
- if (!initialized)
- {
- int mib[2] = { CTL_MACHDEP, CPU_ALTIVEC };
- size_t length = sizeof(have_vmx);
- int error =
- sysctl (mib, 2, &have_vmx, &length, NULL, 0);
-
- if (error != 0)
- have_vmx = FALSE;
-
- initialized = TRUE;
- }
- return have_vmx;
-}
-
-#elif defined (__linux__)
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <unistd.h>
-#include <stdio.h>
-#include <linux/auxvec.h>
-#include <asm/cputable.h>
-
-static pixman_bool_t
-pixman_have_vmx (void)
-{
- if (!initialized)
- {
- char fname[64];
- unsigned long buf[64];
- ssize_t count = 0;
- pid_t pid;
- int fd, i;
-
- pid = getpid ();
- snprintf (fname, sizeof(fname) - 1, "/proc/%d/auxv", pid);
-
- fd = open (fname, O_RDONLY);
- if (fd >= 0)
- {
- for (i = 0; i <= (count / sizeof(unsigned long)); i += 2)
- {
- /* Read more if buf is empty... */
- if (i == (count / sizeof(unsigned long)))
- {
- count = read (fd, buf, sizeof(buf));
- if (count <= 0)
- break;
- i = 0;
- }
-
- if (buf[i] == AT_HWCAP)
- {
- have_vmx = !!(buf[i + 1] & PPC_FEATURE_HAS_ALTIVEC);
- initialized = TRUE;
- break;
- }
- else if (buf[i] == AT_NULL)
- {
- break;
- }
- }
- close (fd);
- }
- }
- if (!initialized)
- {
- /* Something went wrong. Assume 'no' rather than playing
- fragile tricks with catching SIGILL. */
- have_vmx = FALSE;
- initialized = TRUE;
- }
-
- return have_vmx;
-}
-
-#else /* !__APPLE__ && !__OpenBSD__ && !__linux__ */
-#include <signal.h>
-#include <setjmp.h>
-
-static jmp_buf jump_env;
-
-static void
-vmx_test (int sig,
- siginfo_t *si,
- void * unused)
-{
- longjmp (jump_env, 1);
-}
-
-static pixman_bool_t
-pixman_have_vmx (void)
-{
- struct sigaction sa, osa;
- int jmp_result;
-
- if (!initialized)
- {
- sa.sa_flags = SA_SIGINFO;
- sigemptyset (&sa.sa_mask);
- sa.sa_sigaction = vmx_test;
- sigaction (SIGILL, &sa, &osa);
- jmp_result = setjmp (jump_env);
- if (jmp_result == 0)
- {
- asm volatile ( "vor 0, 0, 0" );
- }
- sigaction (SIGILL, &osa, NULL);
- have_vmx = (jmp_result == 0);
- initialized = TRUE;
- }
- return have_vmx;
-}
-
-#endif /* __APPLE__ */
-#endif /* USE_VMX */
-
-#if defined(USE_ARM_SIMD) || defined(USE_ARM_NEON) || defined(USE_ARM_IWMMXT)
-
-#if defined(_MSC_VER)
-
-#if defined(USE_ARM_SIMD)
-extern int pixman_msvc_try_arm_simd_op ();
-
-pixman_bool_t
-pixman_have_arm_simd (void)
-{
- static pixman_bool_t initialized = FALSE;
- static pixman_bool_t have_arm_simd = FALSE;
-
- if (!initialized)
- {
- __try {
- pixman_msvc_try_arm_simd_op ();
- have_arm_simd = TRUE;
- } __except (GetExceptionCode () == EXCEPTION_ILLEGAL_INSTRUCTION) {
- have_arm_simd = FALSE;
- }
- initialized = TRUE;
- }
-
- return have_arm_simd;
-}
-
-#endif /* USE_ARM_SIMD */
-
-#if defined(USE_ARM_NEON)
-extern int pixman_msvc_try_arm_neon_op ();
-
-pixman_bool_t
-pixman_have_arm_neon (void)
-{
- static pixman_bool_t initialized = FALSE;
- static pixman_bool_t have_arm_neon = FALSE;
-
- if (!initialized)
- {
- __try
- {
- pixman_msvc_try_arm_neon_op ();
- have_arm_neon = TRUE;
- }
- __except (GetExceptionCode () == EXCEPTION_ILLEGAL_INSTRUCTION)
- {
- have_arm_neon = FALSE;
- }
- initialized = TRUE;
- }
-
- return have_arm_neon;
-}
-
-#endif /* USE_ARM_NEON */
-
-#elif (defined (__APPLE__) && defined(TARGET_OS_IPHONE)) /* iOS (iPhone/iPad/iPod touch) */
-
-/* Detection of ARM NEON on iOS is fairly simple because iOS binaries
- * contain separate executable images for each processor architecture.
- * So all we have to do is detect the armv7 architecture build. The
- * operating system automatically runs the armv7 binary for armv7 devices
- * and the armv6 binary for armv6 devices.
- */
-
-pixman_bool_t
-pixman_have_arm_simd (void)
-{
-#if defined(USE_ARM_SIMD)
- return TRUE;
-#else
- return FALSE;
-#endif
-}
-
-pixman_bool_t
-pixman_have_arm_neon (void)
-{
-#if defined(USE_ARM_NEON) && defined(__ARM_NEON__)
- /* This is an armv7 cpu build */
- return TRUE;
-#else
- /* This is an armv6 cpu build */
- return FALSE;
-#endif
-}
-
-pixman_bool_t
-pixman_have_arm_iwmmxt (void)
-{
-#if defined(USE_ARM_IWMMXT)
- return FALSE;
-#else
- return FALSE;
-#endif
-}
-
-#elif defined (__linux__) /* linux ELF */
-
-static pixman_bool_t arm_has_v7 = FALSE;
-static pixman_bool_t arm_has_v6 = FALSE;
-static pixman_bool_t arm_has_vfp = FALSE;
-static pixman_bool_t arm_has_neon = FALSE;
-static pixman_bool_t arm_has_iwmmxt = FALSE;
-static pixman_bool_t arm_tests_initialized = FALSE;
-
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/mman.h>
-#include <fcntl.h>
-#include <string.h>
-#include <elf.h>
-
-static void
-pixman_arm_read_auxv_or_cpu_features ()
-{
- int fd;
- Elf32_auxv_t aux;
-
- fd = open ("/proc/self/auxv", O_RDONLY);
- if (fd >= 0)
- {
- while (read (fd, &aux, sizeof(Elf32_auxv_t)) == sizeof(Elf32_auxv_t))
- {
- if (aux.a_type == AT_HWCAP)
- {
- uint32_t hwcap = aux.a_un.a_val;
- /* hardcode these values to avoid depending on specific
- * versions of the hwcap header, e.g. HWCAP_NEON
- */
- arm_has_vfp = (hwcap & 64) != 0;
- arm_has_iwmmxt = (hwcap & 512) != 0;
- /* this flag is only present on kernel 2.6.29 */
- arm_has_neon = (hwcap & 4096) != 0;
- }
- else if (aux.a_type == AT_PLATFORM)
- {
- const char *plat = (const char*) aux.a_un.a_val;
- if (strncmp (plat, "v7l", 3) == 0)
- {
- arm_has_v7 = TRUE;
- arm_has_v6 = TRUE;
- }
- else if (strncmp (plat, "v6l", 3) == 0)
- {
- arm_has_v6 = TRUE;
- }
- }
- }
- close (fd);
- }
-
- arm_tests_initialized = TRUE;
-}
-
-#if defined(USE_ARM_SIMD)
-pixman_bool_t
-pixman_have_arm_simd (void)
-{
- if (!arm_tests_initialized)
- pixman_arm_read_auxv_or_cpu_features ();
-
- return arm_has_v6;
-}
-
-#endif /* USE_ARM_SIMD */
-
-#if defined(USE_ARM_NEON)
-pixman_bool_t
-pixman_have_arm_neon (void)
-{
- if (!arm_tests_initialized)
- pixman_arm_read_auxv_or_cpu_features ();
-
- return arm_has_neon;
-}
-
-#endif /* USE_ARM_NEON */
-
-#if defined(USE_ARM_IWMMXT)
-pixman_bool_t
-pixman_have_arm_iwmmxt (void)
-{
- if (!arm_tests_initialized)
- pixman_arm_read_auxv_or_cpu_features ();
-
- return arm_has_iwmmxt;
-}
-
-#endif /* USE_ARM_IWMMXT */
-
-#else /* !_MSC_VER && !Linux elf */
-
-#define pixman_have_arm_simd() FALSE
-#define pixman_have_arm_neon() FALSE
-#define pixman_have_arm_iwmmxt() FALSE
-
-#endif
-
-#endif /* USE_ARM_SIMD || USE_ARM_NEON || USE_ARM_IWMMXT */
-
-#if defined(USE_MIPS_DSPR2)
-
-#if defined (__linux__) /* linux ELF */
-
-pixman_bool_t
-pixman_have_mips_dspr2 (void)
-{
- const char *search_string = "MIPS 74K";
- const char *file_name = "/proc/cpuinfo";
- /* Simple detection of MIPS DSP ASE (revision 2) at runtime for Linux.
- * It is based on /proc/cpuinfo, which reveals hardware configuration
- * to user-space applications. According to MIPS (early 2010), no similar
- * facility is universally available on the MIPS architectures, so it's up
- * to individual OSes to provide such.
- *
- * Only currently available MIPS core that supports DSPr2 is 74K.
- */
-
- char cpuinfo_line[256];
-
- FILE *f = NULL;
-
- if ((f = fopen (file_name, "r")) == NULL)
- return FALSE;
-
- while (fgets (cpuinfo_line, sizeof (cpuinfo_line), f) != NULL)
- {
- if (strstr (cpuinfo_line, search_string) != NULL)
- {
- fclose (f);
- return TRUE;
- }
- }
-
- fclose (f);
-
- /* Did not find string in the proc file. */
- return FALSE;
-}
-
-#else /* linux ELF */
-
-#define pixman_have_mips_dspr2() FALSE
-
-#endif /* linux ELF */
-
-#endif /* USE_MIPS_DSPR2 */
-
-#if defined(USE_X86_MMX) || defined(USE_SSE2)
-/* The CPU detection code needs to be in a file not compiled with
- * "-mmmx -msse", as gcc would generate CMOV instructions otherwise
- * that would lead to SIGILL instructions on old CPUs that don't have
- * it.
- */
-#if !defined(__amd64__) && !defined(__x86_64__) && !defined(_M_AMD64)
-
-#ifdef HAVE_GETISAX
-#include <sys/auxv.h>
-#endif
-
-typedef enum
-{
- NO_FEATURES = 0,
- MMX = 0x1,
- MMX_EXTENSIONS = 0x2,
- SSE = 0x6,
- SSE2 = 0x8,
- CMOV = 0x10
-} cpu_features_t;
-
-
-static unsigned int
-detect_cpu_features (void)
-{
- unsigned int features = 0;
- unsigned int result = 0;
-
-#ifdef HAVE_GETISAX
- if (getisax (&result, 1))
- {
- if (result & AV_386_CMOV)
- features |= CMOV;
- if (result & AV_386_MMX)
- features |= MMX;
- if (result & AV_386_AMD_MMX)
- features |= MMX_EXTENSIONS;
- if (result & AV_386_SSE)
- features |= SSE;
- if (result & AV_386_SSE2)
- features |= SSE2;
- }
-#else
- char vendor[13];
-#ifdef _MSC_VER
- int vendor0 = 0, vendor1, vendor2;
-#endif
- vendor[0] = 0;
- vendor[12] = 0;
-
-#ifdef __GNUC__
- /* see p. 118 of amd64 instruction set manual Vol3 */
- /* We need to be careful about the handling of %ebx and
- * %esp here. We can't declare either one as clobbered
- * since they are special registers (%ebx is the "PIC
- * register" holding an offset to global data, %esp the
- * stack pointer), so we need to make sure they have their
- * original values when we access the output operands.
- */
- __asm__ (
- "pushf\n"
- "pop %%eax\n"
- "mov %%eax, %%ecx\n"
- "xor $0x00200000, %%eax\n"
- "push %%eax\n"
- "popf\n"
- "pushf\n"
- "pop %%eax\n"
- "mov $0x0, %%edx\n"
- "xor %%ecx, %%eax\n"
- "jz 1f\n"
-
- "mov $0x00000000, %%eax\n"
- "push %%ebx\n"
- "cpuid\n"
- "mov %%ebx, %%eax\n"
- "pop %%ebx\n"
- "mov %%eax, %1\n"
- "mov %%edx, %2\n"
- "mov %%ecx, %3\n"
- "mov $0x00000001, %%eax\n"
- "push %%ebx\n"
- "cpuid\n"
- "pop %%ebx\n"
- "1:\n"
- "mov %%edx, %0\n"
- : "=r" (result),
- "=m" (vendor[0]),
- "=m" (vendor[4]),
- "=m" (vendor[8])
- :
- : "%eax", "%ecx", "%edx"
- );
-
-#elif defined (_MSC_VER)
-
- _asm {
- pushfd
- pop eax
- mov ecx, eax
- xor eax, 00200000h
- push eax
- popfd
- pushfd
- pop eax
- mov edx, 0
- xor eax, ecx
- jz nocpuid
-
- mov eax, 0
- push ebx
- cpuid
- mov eax, ebx
- pop ebx
- mov vendor0, eax
- mov vendor1, edx
- mov vendor2, ecx
- mov eax, 1
- push ebx
- cpuid
- pop ebx
- nocpuid:
- mov result, edx
- }
- memmove (vendor + 0, &vendor0, 4);
- memmove (vendor + 4, &vendor1, 4);
- memmove (vendor + 8, &vendor2, 4);
-
-#else
-# error unsupported compiler
-#endif
-
- features = 0;
- if (result)
- {
- /* result now contains the standard feature bits */
- if (result & (1 << 15))
- features |= CMOV;
- if (result & (1 << 23))
- features |= MMX;
- if (result & (1 << 25))
- features |= SSE;
- if (result & (1 << 26))
- features |= SSE2;
- if ((features & MMX) && !(features & SSE) &&
- (strcmp (vendor, "AuthenticAMD") == 0 ||
- strcmp (vendor, "Geode by NSC") == 0))
- {
- /* check for AMD MMX extensions */
-#ifdef __GNUC__
- __asm__ (
- " push %%ebx\n"
- " mov $0x80000000, %%eax\n"
- " cpuid\n"
- " xor %%edx, %%edx\n"
- " cmp $0x1, %%eax\n"
- " jge 2f\n"
- " mov $0x80000001, %%eax\n"
- " cpuid\n"
- "2:\n"
- " pop %%ebx\n"
- " mov %%edx, %0\n"
- : "=r" (result)
- :
- : "%eax", "%ecx", "%edx"
- );
-#elif defined _MSC_VER
- _asm {
- push ebx
- mov eax, 80000000h
- cpuid
- xor edx, edx
- cmp eax, 1
- jge notamd
- mov eax, 80000001h
- cpuid
- notamd:
- pop ebx
- mov result, edx
- }
-#endif
- if (result & (1 << 22))
- features |= MMX_EXTENSIONS;
- }
- }
-#endif /* HAVE_GETISAX */
-
- return features;
-}
-
-#ifdef USE_X86_MMX
-static pixman_bool_t
-pixman_have_mmx (void)
-{
- static pixman_bool_t initialized = FALSE;
- static pixman_bool_t mmx_present;
-
- if (!initialized)
- {
- unsigned int features = detect_cpu_features ();
- mmx_present = (features & (MMX | MMX_EXTENSIONS)) == (MMX | MMX_EXTENSIONS);
- initialized = TRUE;
- }
-
- return mmx_present;
-}
-#endif
-
-#ifdef USE_SSE2
-static pixman_bool_t
-pixman_have_sse2 (void)
-{
- static pixman_bool_t initialized = FALSE;
- static pixman_bool_t sse2_present;
-
- if (!initialized)
- {
- unsigned int features = detect_cpu_features ();
- sse2_present = (features & (MMX | MMX_EXTENSIONS | SSE | SSE2)) == (MMX | MMX_EXTENSIONS | SSE | SSE2);
- initialized = TRUE;
- }
-
- return sse2_present;
-}
-
-#endif
-
-#else /* __amd64__ */
-#ifdef USE_X86_MMX
-#define pixman_have_mmx() TRUE
-#endif
-#ifdef USE_SSE2
-#define pixman_have_sse2() TRUE
-#endif
-#endif /* __amd64__ */
-#endif
-
-static pixman_bool_t
-disabled (const char *name)
-{
- const char *env;
-
- if ((env = getenv ("PIXMAN_DISABLE")))
- {
- do
- {
- const char *end;
- int len;
-
- if ((end = strchr (env, ' ')))
- len = end - env;
- else
- len = strlen (env);
-
- if (strlen (name) == len && strncmp (name, env, len) == 0)
- {
- printf ("pixman: Disabled %s implementation\n", name);
- return TRUE;
- }
-
- env += len;
- }
- while (*env++);
- }
-
- return FALSE;
-}
-
-pixman_implementation_t *
-_pixman_choose_implementation (void)
-{
- pixman_implementation_t *imp;
-
- imp = _pixman_implementation_create_general();
-
- if (!disabled ("fast"))
- imp = _pixman_implementation_create_fast_path (imp);
-
-#ifdef USE_X86_MMX
- if (!disabled ("mmx") && pixman_have_mmx ())
- imp = _pixman_implementation_create_mmx (imp);
-#endif
-
-#ifdef USE_SSE2
- if (!disabled ("sse2") && pixman_have_sse2 ())
- imp = _pixman_implementation_create_sse2 (imp);
-#endif
-
-#ifdef USE_ARM_SIMD
- if (!disabled ("arm-simd") && pixman_have_arm_simd ())
- imp = _pixman_implementation_create_arm_simd (imp);
-#endif
-
-#ifdef USE_ARM_IWMMXT
- if (!disabled ("arm-iwmmxt") && pixman_have_arm_iwmmxt ())
- imp = _pixman_implementation_create_mmx (imp);
-#endif
-
-#ifdef USE_ARM_NEON
- if (!disabled ("arm-neon") && pixman_have_arm_neon ())
- imp = _pixman_implementation_create_arm_neon (imp);
-#endif
-
-#ifdef USE_MIPS_DSPR2
- if (!disabled ("mips-dspr2") && pixman_have_mips_dspr2 ())
- imp = _pixman_implementation_create_mips_dspr2 (imp);
-#endif
-
-#ifdef USE_VMX
- if (!disabled ("vmx") && pixman_have_vmx ())
- imp = _pixman_implementation_create_vmx (imp);
-#endif
-
- imp = _pixman_implementation_create_noop (imp);
-
- return imp;
-}
-
diff --git a/gfx/cairo/libpixman/src/pixman-dither.h b/gfx/cairo/libpixman/src/pixman-dither.h
deleted file mode 100644
index ead9f38d8c..0000000000
--- a/gfx/cairo/libpixman/src/pixman-dither.h
+++ /dev/null
@@ -1,51 +0,0 @@
-#define R16_BITS 5
-#define G16_BITS 6
-#define B16_BITS 5
-
-#define R16_SHIFT (B16_BITS + G16_BITS)
-#define G16_SHIFT (B16_BITS)
-#define B16_SHIFT 0
-
-#define MASK 0xff
-#define ONE_HALF 0x80
-
-#define A_SHIFT 8 * 3
-#define R_SHIFT 8 * 2
-#define G_SHIFT 8
-#define A_MASK 0xff000000
-#define R_MASK 0xff0000
-#define G_MASK 0xff00
-
-#define RB_MASK 0xff00ff
-#define AG_MASK 0xff00ff00
-#define RB_ONE_HALF 0x800080
-#define RB_MASK_PLUS_ONE 0x10000100
-
-#define ALPHA_8(x) ((x) >> A_SHIFT)
-#define RED_8(x) (((x) >> R_SHIFT) & MASK)
-#define GREEN_8(x) (((x) >> G_SHIFT) & MASK)
-#define BLUE_8(x) ((x) & MASK)
-
-// This uses the same dithering technique that Skia does.
-// It is essentially preturbing the lower bit based on the
-// high bit
-static inline uint16_t dither_32_to_16(uint32_t c)
-{
- uint8_t b = BLUE_8(c);
- uint8_t g = GREEN_8(c);
- uint8_t r = RED_8(c);
- r = ((r << 1) - ((r >> (8 - R16_BITS) << (8 - R16_BITS)) | (r >> R16_BITS))) >> (8 - R16_BITS);
- g = ((g << 1) - ((g >> (8 - G16_BITS) << (8 - G16_BITS)) | (g >> G16_BITS))) >> (8 - G16_BITS);
- b = ((b << 1) - ((b >> (8 - B16_BITS) << (8 - B16_BITS)) | (b >> B16_BITS))) >> (8 - B16_BITS);
- return ((r << R16_SHIFT) | (g << G16_SHIFT) | (b << B16_SHIFT));
-}
-
-static inline uint16_t dither_8888_to_0565(uint32_t color, pixman_bool_t toggle)
-{
- // alternate between a preturbed truncation and a regular truncation
- if (toggle) {
- return dither_32_to_16(color);
- } else {
- return convert_8888_to_0565(color);
- }
-}
diff --git a/gfx/cairo/libpixman/src/pixman-edge-imp.h b/gfx/cairo/libpixman/src/pixman-edge-imp.h
index a47098a896..a4698eddb2 100644
--- a/gfx/cairo/libpixman/src/pixman-edge-imp.h
+++ b/gfx/cairo/libpixman/src/pixman-edge-imp.h
@@ -55,9 +55,8 @@ RASTERIZE_EDGES (pixman_image_t *image,
*
* (The AA case does a similar adjustment in RENDER_SAMPLES_X)
*/
- /* we cast to unsigned to get defined behaviour for overflow */
- lx = (unsigned)lx + X_FRAC_FIRST(1) - pixman_fixed_e;
- rx = (unsigned)rx + X_FRAC_FIRST(1) - pixman_fixed_e;
+ lx += X_FRAC_FIRST(1) - pixman_fixed_e;
+ rx += X_FRAC_FIRST(1) - pixman_fixed_e;
#endif
/* clip X */
if (lx < 0)
diff --git a/gfx/cairo/libpixman/src/pixman-fast-path.c b/gfx/cairo/libpixman/src/pixman-fast-path.c
index 9b22d7b464..4b7a6f897f 100644
--- a/gfx/cairo/libpixman/src/pixman-fast-path.c
+++ b/gfx/cairo/libpixman/src/pixman-fast-path.c
@@ -908,7 +908,7 @@ fast_composite_add_n_8_8 (pixman_implementation_t *imp,
#define CREATE_BITMASK(n) (0x80000000 >> (n))
#define UPDATE_BITMASK(n) ((n) >> 1)
#else
-#define CREATE_BITMASK(n) (1 << (n))
+#define CREATE_BITMASK(n) (1U << (n))
#define UPDATE_BITMASK(n) ((n) << 1)
#endif
@@ -1203,228 +1203,6 @@ FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, OVER, NONE)
FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, OVER, PAD)
FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, OVER, NORMAL)
-static force_inline void
-scaled_bilinear_scanline_8888_565_OVER (uint16_t * dst,
- const uint32_t * mask,
- const uint32_t * src_top,
- const uint32_t * src_bottom,
- int32_t w,
- int wt,
- int wb,
- pixman_fixed_t vx,
- pixman_fixed_t unit_x,
- pixman_fixed_t max_vx,
- pixman_bool_t zero_src)
-{
- while ((w -= 1) >= 0)
- {
- uint32_t tl = src_top [pixman_fixed_to_int (vx)];
- uint32_t tr = src_top [pixman_fixed_to_int (vx) + 1];
- uint32_t bl = src_bottom [pixman_fixed_to_int (vx)];
- uint32_t br = src_bottom [pixman_fixed_to_int (vx) + 1];
- uint32_t src, result;
- uint16_t d;
- d = *dst;
- src = bilinear_interpolation (tl, tr,
- bl, br,
- pixman_fixed_to_bilinear_weight(vx),
- wb);
- vx += unit_x;
- result = over (src, convert_0565_to_0888 (d));
- *dst++ = convert_8888_to_0565 (result);
- }
-}
-
-static force_inline void
-scaled_bilinear_scanline_8888_8888_OVER (uint32_t * dst,
- const uint32_t * mask,
- const uint32_t * src_top,
- const uint32_t * src_bottom,
- int32_t w,
- int wt,
- int wb,
- pixman_fixed_t vx,
- pixman_fixed_t unit_x,
- pixman_fixed_t max_vx,
- pixman_bool_t zero_src)
-{
- while ((w -= 1) >= 0)
- {
- uint32_t tl = src_top [pixman_fixed_to_int (vx)];
- uint32_t tr = src_top [pixman_fixed_to_int (vx) + 1];
- uint32_t bl = src_bottom [pixman_fixed_to_int (vx)];
- uint32_t br = src_bottom [pixman_fixed_to_int (vx) + 1];
- uint32_t src;
- uint32_t d;
- uint32_t result;
- d = *dst;
- src = bilinear_interpolation (tl, tr,
- bl, br,
- pixman_fixed_to_bilinear_weight(vx),
- wb);
- vx += unit_x;
- *dst++ = over (src, d);
- }
-}
-
-#ifndef LOWER_QUALITY_INTERPOLATION
-
-static force_inline void
-scaled_bilinear_scanline_565_565_SRC (uint16_t * dst,
- const uint32_t * mask,
- const uint16_t * src_top,
- const uint16_t * src_bottom,
- int32_t w,
- int wt,
- int wb,
- pixman_fixed_t vx,
- pixman_fixed_t unit_x,
- pixman_fixed_t max_vx,
- pixman_bool_t zero_src)
-{
- while ((w -= 1) >= 0)
- {
- uint16_t tl = src_top [pixman_fixed_to_int (vx)];
- uint16_t tr = src_top [pixman_fixed_to_int (vx) + 1];
- uint16_t bl = src_bottom [pixman_fixed_to_int (vx)];
- uint16_t br = src_bottom [pixman_fixed_to_int (vx) + 1];
- uint32_t d;
- d = bilinear_interpolation(convert_0565_to_8888 (tl),
- convert_0565_to_8888 (tr),
- convert_0565_to_8888 (bl),
- convert_0565_to_8888 (br),
- pixman_fixed_to_bilinear_weight (vx),
- wb);
- vx += unit_x;
- *dst++ = convert_8888_to_0565 (d);
- }
-}
-
-#else
-
-/* This is a clever low resolution bilinear interpolation inspired by the code
- in Skia */
-
-/* This takes the green component from the 565 representation and moves it:
- 00000000 00000000 rrrrrggg gggbbbbb
-
- 00000ggg ggg00000 rrrrr000 000bbbbb
-
- This gives us 5 extra bits of space before each component to let us do
- SWAR style optimizations
-*/
-
-#define GREEN_MASK (((1 << 6) - 1) << 5)
-
-static inline uint32_t
-expand_rgb_565 (uint16_t c) {
- return ((c & GREEN_MASK) << 16) | (c & ~GREEN_MASK);
-}
-
-static inline uint16_t
-compact_rgb_565 (uint32_t c) {
- return ((c >> 16) & GREEN_MASK) | (c & ~GREEN_MASK);
-}
-
-static inline uint16_t
-bilinear_interpolation_565(uint16_t tl, uint16_t tr,
- uint16_t bl, uint16_t br,
- int x, int y)
-{
- int xy;
- uint32_t a00 = expand_rgb_565 (tl);
- uint32_t a01 = expand_rgb_565 (tr);
- uint32_t a10 = expand_rgb_565 (bl);
- uint32_t a11 = expand_rgb_565 (br);
-
- xy = (x * y) >> 3;
- return compact_rgb_565 ((a00 * (32 - 2*y - 2*x + xy) +
- a01 * (2*x - xy) +
- a10 * (2*y - xy) +
- a11 * xy) >> 5);
-}
-
-static force_inline void
-scaled_bilinear_scanline_565_565_SRC (uint16_t * dst,
- const uint32_t * mask,
- const uint16_t * src_top,
- const uint16_t * src_bottom,
- int32_t w,
- int wt,
- int wb,
- pixman_fixed_t vx,
- pixman_fixed_t unit_x,
- pixman_fixed_t max_vx,
- pixman_bool_t zero_src)
-{
- while ((w -= 1) >= 0)
- {
- uint16_t tl = src_top [pixman_fixed_to_int (vx)];
- uint16_t tr = src_top [pixman_fixed_to_int (vx) + 1];
- uint16_t bl = src_bottom [pixman_fixed_to_int (vx)];
- uint16_t br = src_bottom [pixman_fixed_to_int (vx) + 1];
-
- uint16_t d = bilinear_interpolation_565 (tl, tr, bl, br,
- pixman_fixed_to_bilinear_weight(vx),
- wb);
- vx += unit_x;
- *dst++ = d;
- }
-}
-
-#endif
-
-FAST_BILINEAR_MAINLOOP_COMMON (565_565_cover_SRC,
- scaled_bilinear_scanline_565_565_SRC, NULL,
- uint16_t, uint32_t, uint16_t,
- COVER, FLAG_NONE)
-FAST_BILINEAR_MAINLOOP_COMMON (565_565_pad_SRC,
- scaled_bilinear_scanline_565_565_SRC, NULL,
- uint16_t, uint32_t, uint16_t,
- PAD, FLAG_NONE)
-FAST_BILINEAR_MAINLOOP_COMMON (565_565_none_SRC,
- scaled_bilinear_scanline_565_565_SRC, NULL,
- uint16_t, uint32_t, uint16_t,
- NONE, FLAG_NONE)
-FAST_BILINEAR_MAINLOOP_COMMON (565_565_normal_SRC,
- scaled_bilinear_scanline_565_565_SRC, NULL,
- uint16_t, uint32_t, uint16_t,
- NORMAL, FLAG_NONE)
-
-FAST_BILINEAR_MAINLOOP_COMMON (8888_565_cover_OVER,
- scaled_bilinear_scanline_8888_565_OVER, NULL,
- uint32_t, uint32_t, uint16_t,
- COVER, FLAG_NONE)
-FAST_BILINEAR_MAINLOOP_COMMON (8888_565_pad_OVER,
- scaled_bilinear_scanline_8888_565_OVER, NULL,
- uint32_t, uint32_t, uint16_t,
- PAD, FLAG_NONE)
-FAST_BILINEAR_MAINLOOP_COMMON (8888_565_none_OVER,
- scaled_bilinear_scanline_8888_565_OVER, NULL,
- uint32_t, uint32_t, uint16_t,
- NONE, FLAG_NONE)
-FAST_BILINEAR_MAINLOOP_COMMON (8888_565_normal_OVER,
- scaled_bilinear_scanline_8888_565_OVER, NULL,
- uint32_t, uint32_t, uint16_t,
- NORMAL, FLAG_NONE)
-
-FAST_BILINEAR_MAINLOOP_COMMON (8888_8888_cover_OVER,
- scaled_bilinear_scanline_8888_8888_OVER, NULL,
- uint32_t, uint32_t, uint32_t,
- COVER, FLAG_NONE)
-FAST_BILINEAR_MAINLOOP_COMMON (8888_8888_pad_OVER,
- scaled_bilinear_scanline_8888_8888_OVER, NULL,
- uint32_t, uint32_t, uint32_t,
- PAD, FLAG_NONE)
-FAST_BILINEAR_MAINLOOP_COMMON (8888_8888_none_OVER,
- scaled_bilinear_scanline_8888_8888_OVER, NULL,
- uint32_t, uint32_t, uint32_t,
- NONE, FLAG_NONE)
-FAST_BILINEAR_MAINLOOP_COMMON (8888_8888_normal_OVER,
- scaled_bilinear_scanline_8888_8888_OVER, NULL,
- uint32_t, uint32_t, uint32_t,
- NORMAL, FLAG_NONE)
-
#define REPEAT_MIN_WIDTH 32
static void
@@ -2193,10 +1971,6 @@ static const pixman_fast_path_t c_fast_paths[] =
fast_composite_tiled_repeat
},
- SIMPLE_BILINEAR_FAST_PATH (SRC, r5g6b5, r5g6b5, 565_565),
- SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, r5g6b5, 8888_565),
- SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, 8888_8888),
-
{ PIXMAN_OP_NONE },
};
@@ -2489,93 +2263,1024 @@ fast_write_back_r5g6b5 (pixman_iter_t *iter)
typedef struct
{
- pixman_format_code_t format;
- pixman_iter_get_scanline_t get_scanline;
- pixman_iter_write_back_t write_back;
-} fetcher_info_t;
+ int y;
+ uint64_t * buffer;
+} line_t;
-static const fetcher_info_t fetchers[] =
+typedef struct
{
- { PIXMAN_r5g6b5, fast_fetch_r5g6b5, fast_write_back_r5g6b5 },
- { PIXMAN_null }
-};
+ line_t lines[2];
+ pixman_fixed_t y;
+ pixman_fixed_t x;
+ uint64_t data[1];
+} bilinear_info_t;
-static pixman_bool_t
-fast_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
+static void
+fetch_horizontal (bits_image_t *image, line_t *line,
+ int y, pixman_fixed_t x, pixman_fixed_t ux, int n)
{
- pixman_image_t *image = iter->image;
+ uint32_t *bits = image->bits + y * image->rowstride;
+ int i;
-#define FLAGS \
- (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | \
- FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST)
+ for (i = 0; i < n; ++i)
+ {
+ int x0 = pixman_fixed_to_int (x);
+ int x1 = x0 + 1;
+ int32_t dist_x;
+
+ uint32_t left = *(bits + x0);
+ uint32_t right = *(bits + x1);
+
+ dist_x = pixman_fixed_to_bilinear_weight (x);
+ dist_x <<= (8 - BILINEAR_INTERPOLATION_BITS);
+
+#if SIZEOF_LONG <= 4
+ {
+ uint32_t lag, rag, ag;
+ uint32_t lrb, rrb, rb;
+
+ lag = (left & 0xff00ff00) >> 8;
+ rag = (right & 0xff00ff00) >> 8;
+ ag = (lag << 8) + dist_x * (rag - lag);
+
+ lrb = (left & 0x00ff00ff);
+ rrb = (right & 0x00ff00ff);
+ rb = (lrb << 8) + dist_x * (rrb - lrb);
+
+ *((uint32_t *)(line->buffer + i)) = ag;
+ *((uint32_t *)(line->buffer + i) + 1) = rb;
+ }
+#else
+ {
+ uint64_t lagrb, ragrb;
+ uint32_t lag, rag;
+ uint32_t lrb, rrb;
+
+ lag = (left & 0xff00ff00);
+ lrb = (left & 0x00ff00ff);
+ rag = (right & 0xff00ff00);
+ rrb = (right & 0x00ff00ff);
+ lagrb = (((uint64_t)lag) << 24) | lrb;
+ ragrb = (((uint64_t)rag) << 24) | rrb;
+
+ line->buffer[i] = (lagrb << 8) + dist_x * (ragrb - lagrb);
+ }
+#endif
+
+ x += ux;
+ }
+
+ line->y = y;
+}
+
+static uint32_t *
+fast_fetch_bilinear_cover (pixman_iter_t *iter, const uint32_t *mask)
+{
+ pixman_fixed_t fx, ux;
+ bilinear_info_t *info = iter->data;
+ line_t *line0, *line1;
+ int y0, y1;
+ int32_t dist_y;
+ int i;
+
+ COMPILE_TIME_ASSERT (BILINEAR_INTERPOLATION_BITS < 8);
+
+ fx = info->x;
+ ux = iter->image->common.transform->matrix[0][0];
+
+ y0 = pixman_fixed_to_int (info->y);
+ y1 = y0 + 1;
+ dist_y = pixman_fixed_to_bilinear_weight (info->y);
+ dist_y <<= (8 - BILINEAR_INTERPOLATION_BITS);
+
+ line0 = &info->lines[y0 & 0x01];
+ line1 = &info->lines[y1 & 0x01];
+
+ if (line0->y != y0)
+ {
+ fetch_horizontal (
+ &iter->image->bits, line0, y0, fx, ux, iter->width);
+ }
+
+ if (line1->y != y1)
+ {
+ fetch_horizontal (
+ &iter->image->bits, line1, y1, fx, ux, iter->width);
+ }
+
+ for (i = 0; i < iter->width; ++i)
+ {
+#if SIZEOF_LONG <= 4
+ uint32_t ta, tr, tg, tb;
+ uint32_t ba, br, bg, bb;
+ uint32_t tag, trb;
+ uint32_t bag, brb;
+ uint32_t a, r, g, b;
+
+ tag = *((uint32_t *)(line0->buffer + i));
+ trb = *((uint32_t *)(line0->buffer + i) + 1);
+ bag = *((uint32_t *)(line1->buffer + i));
+ brb = *((uint32_t *)(line1->buffer + i) + 1);
+
+ ta = tag >> 16;
+ ba = bag >> 16;
+ a = (ta << 8) + dist_y * (ba - ta);
+
+ tr = trb >> 16;
+ br = brb >> 16;
+ r = (tr << 8) + dist_y * (br - tr);
+
+ tg = tag & 0xffff;
+ bg = bag & 0xffff;
+ g = (tg << 8) + dist_y * (bg - tg);
+
+ tb = trb & 0xffff;
+ bb = brb & 0xffff;
+ b = (tb << 8) + dist_y * (bb - tb);
+
+ a = (a << 8) & 0xff000000;
+ r = (r << 0) & 0x00ff0000;
+ g = (g >> 8) & 0x0000ff00;
+ b = (b >> 16) & 0x000000ff;
+#else
+ uint64_t top = line0->buffer[i];
+ uint64_t bot = line1->buffer[i];
+ uint64_t tar = (top & 0xffff0000ffff0000ULL) >> 16;
+ uint64_t bar = (bot & 0xffff0000ffff0000ULL) >> 16;
+ uint64_t tgb = (top & 0x0000ffff0000ffffULL);
+ uint64_t bgb = (bot & 0x0000ffff0000ffffULL);
+ uint64_t ar, gb;
+ uint32_t a, r, g, b;
+
+ ar = (tar << 8) + dist_y * (bar - tar);
+ gb = (tgb << 8) + dist_y * (bgb - tgb);
+
+ a = ((ar >> 24) & 0xff000000);
+ r = ((ar >> 0) & 0x00ff0000);
+ g = ((gb >> 40) & 0x0000ff00);
+ b = ((gb >> 16) & 0x000000ff);
+#endif
+
+ iter->buffer[i] = a | r | g | b;
+ }
+
+ info->y += iter->image->common.transform->matrix[1][1];
+
+ return iter->buffer;
+}
+
+static void
+bilinear_cover_iter_fini (pixman_iter_t *iter)
+{
+ free (iter->data);
+}
+
+static void
+fast_bilinear_cover_iter_init (pixman_iter_t *iter, const pixman_iter_info_t *iter_info)
+{
+ int width = iter->width;
+ bilinear_info_t *info;
+ pixman_vector_t v;
+
+ /* Reference point is the center of the pixel */
+ v.vector[0] = pixman_int_to_fixed (iter->x) + pixman_fixed_1 / 2;
+ v.vector[1] = pixman_int_to_fixed (iter->y) + pixman_fixed_1 / 2;
+ v.vector[2] = pixman_fixed_1;
+
+ if (!pixman_transform_point_3d (iter->image->common.transform, &v))
+ goto fail;
+
+ info = malloc (sizeof (*info) + (2 * width - 1) * sizeof (uint64_t));
+ if (!info)
+ goto fail;
- if (iter->iter_flags & ITER_16)
- return FALSE;
+ info->x = v.vector[0] - pixman_fixed_1 / 2;
+ info->y = v.vector[1] - pixman_fixed_1 / 2;
- if ((iter->iter_flags & ITER_NARROW) &&
- (iter->image_flags & FLAGS) == FLAGS)
+ /* It is safe to set the y coordinates to -1 initially
+ * because COVER_CLIP_BILINEAR ensures that we will only
+ * be asked to fetch lines in the [0, height) interval
+ */
+ info->lines[0].y = -1;
+ info->lines[0].buffer = &(info->data[0]);
+ info->lines[1].y = -1;
+ info->lines[1].buffer = &(info->data[width]);
+
+ iter->get_scanline = fast_fetch_bilinear_cover;
+ iter->fini = bilinear_cover_iter_fini;
+
+ iter->data = info;
+ return;
+
+fail:
+ /* Something went wrong, either a bad matrix or OOM; in such cases,
+ * we don't guarantee any particular rendering.
+ */
+ _pixman_log_error (
+ FUNC, "Allocation failure or bad matrix, skipping rendering\n");
+
+ iter->get_scanline = _pixman_iter_get_scanline_noop;
+ iter->fini = NULL;
+}
+
+static uint32_t *
+bits_image_fetch_bilinear_no_repeat_8888 (pixman_iter_t *iter,
+ const uint32_t *mask)
+{
+
+ pixman_image_t * ima = iter->image;
+ int offset = iter->x;
+ int line = iter->y++;
+ int width = iter->width;
+ uint32_t * buffer = iter->buffer;
+
+ bits_image_t *bits = &ima->bits;
+ pixman_fixed_t x_top, x_bottom, x;
+ pixman_fixed_t ux_top, ux_bottom, ux;
+ pixman_vector_t v;
+ uint32_t top_mask, bottom_mask;
+ uint32_t *top_row;
+ uint32_t *bottom_row;
+ uint32_t *end;
+ uint32_t zero[2] = { 0, 0 };
+ uint32_t one = 1;
+ int y, y1, y2;
+ int disty;
+ int mask_inc;
+ int w;
+
+ /* reference point is the center of the pixel */
+ v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2;
+ v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2;
+ v.vector[2] = pixman_fixed_1;
+
+ if (!pixman_transform_point_3d (bits->common.transform, &v))
+ return iter->buffer;
+
+ ux = ux_top = ux_bottom = bits->common.transform->matrix[0][0];
+ x = x_top = x_bottom = v.vector[0] - pixman_fixed_1/2;
+
+ y = v.vector[1] - pixman_fixed_1/2;
+ disty = pixman_fixed_to_bilinear_weight (y);
+
+ /* Load the pointers to the first and second lines from the source
+ * image that bilinear code must read.
+ *
+ * The main trick in this code is about the check if any line are
+ * outside of the image;
+ *
+ * When I realize that a line (any one) is outside, I change
+ * the pointer to a dummy area with zeros. Once I change this, I
+ * must be sure the pointer will not change, so I set the
+ * variables to each pointer increments inside the loop.
+ */
+ y1 = pixman_fixed_to_int (y);
+ y2 = y1 + 1;
+
+ if (y1 < 0 || y1 >= bits->height)
+ {
+ top_row = zero;
+ x_top = 0;
+ ux_top = 0;
+ }
+ else
{
- const fetcher_info_t *f;
+ top_row = bits->bits + y1 * bits->rowstride;
+ x_top = x;
+ ux_top = ux;
+ }
- for (f = &fetchers[0]; f->format != PIXMAN_null; f++)
+ if (y2 < 0 || y2 >= bits->height)
+ {
+ bottom_row = zero;
+ x_bottom = 0;
+ ux_bottom = 0;
+ }
+ else
+ {
+ bottom_row = bits->bits + y2 * bits->rowstride;
+ x_bottom = x;
+ ux_bottom = ux;
+ }
+
+ /* Instead of checking whether the operation uses the mast in
+ * each loop iteration, verify this only once and prepare the
+ * variables to make the code smaller inside the loop.
+ */
+ if (!mask)
+ {
+ mask_inc = 0;
+ mask = &one;
+ }
+ else
+ {
+ /* If have a mask, prepare the variables to check it */
+ mask_inc = 1;
+ }
+
+ /* If both are zero, then the whole thing is zero */
+ if (top_row == zero && bottom_row == zero)
+ {
+ memset (buffer, 0, width * sizeof (uint32_t));
+ return iter->buffer;
+ }
+ else if (bits->format == PIXMAN_x8r8g8b8)
+ {
+ if (top_row == zero)
{
- if (image->common.extended_format_code == f->format)
- {
- uint8_t *b = (uint8_t *)image->bits.bits;
- int s = image->bits.rowstride * 4;
+ top_mask = 0;
+ bottom_mask = 0xff000000;
+ }
+ else if (bottom_row == zero)
+ {
+ top_mask = 0xff000000;
+ bottom_mask = 0;
+ }
+ else
+ {
+ top_mask = 0xff000000;
+ bottom_mask = 0xff000000;
+ }
+ }
+ else
+ {
+ top_mask = 0;
+ bottom_mask = 0;
+ }
- iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (f->format) / 8;
- iter->stride = s;
+ end = buffer + width;
- iter->get_scanline = f->get_scanline;
- return TRUE;
- }
+ /* Zero fill to the left of the image */
+ while (buffer < end && x < pixman_fixed_minus_1)
+ {
+ *buffer++ = 0;
+ x += ux;
+ x_top += ux_top;
+ x_bottom += ux_bottom;
+ mask += mask_inc;
+ }
+
+ /* Left edge
+ */
+ while (buffer < end && x < 0)
+ {
+ uint32_t tr, br;
+ int32_t distx;
+
+ tr = top_row[pixman_fixed_to_int (x_top) + 1] | top_mask;
+ br = bottom_row[pixman_fixed_to_int (x_bottom) + 1] | bottom_mask;
+
+ distx = pixman_fixed_to_bilinear_weight (x);
+
+ *buffer++ = bilinear_interpolation (0, tr, 0, br, distx, disty);
+
+ x += ux;
+ x_top += ux_top;
+ x_bottom += ux_bottom;
+ mask += mask_inc;
+ }
+
+ /* Main part */
+ w = pixman_int_to_fixed (bits->width - 1);
+
+ while (buffer < end && x < w)
+ {
+ if (*mask)
+ {
+ uint32_t tl, tr, bl, br;
+ int32_t distx;
+
+ tl = top_row [pixman_fixed_to_int (x_top)] | top_mask;
+ tr = top_row [pixman_fixed_to_int (x_top) + 1] | top_mask;
+ bl = bottom_row [pixman_fixed_to_int (x_bottom)] | bottom_mask;
+ br = bottom_row [pixman_fixed_to_int (x_bottom) + 1] | bottom_mask;
+
+ distx = pixman_fixed_to_bilinear_weight (x);
+
+ *buffer = bilinear_interpolation (tl, tr, bl, br, distx, disty);
}
+
+ buffer++;
+ x += ux;
+ x_top += ux_top;
+ x_bottom += ux_bottom;
+ mask += mask_inc;
}
- return FALSE;
+ /* Right Edge */
+ w = pixman_int_to_fixed (bits->width);
+ while (buffer < end && x < w)
+ {
+ if (*mask)
+ {
+ uint32_t tl, bl;
+ int32_t distx;
+
+ tl = top_row [pixman_fixed_to_int (x_top)] | top_mask;
+ bl = bottom_row [pixman_fixed_to_int (x_bottom)] | bottom_mask;
+
+ distx = pixman_fixed_to_bilinear_weight (x);
+
+ *buffer = bilinear_interpolation (tl, 0, bl, 0, distx, disty);
+ }
+
+ buffer++;
+ x += ux;
+ x_top += ux_top;
+ x_bottom += ux_bottom;
+ mask += mask_inc;
+ }
+
+ /* Zero fill to the left of the image */
+ while (buffer < end)
+ *buffer++ = 0;
+
+ return iter->buffer;
}
-static pixman_bool_t
-fast_dest_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
+typedef uint32_t (* convert_pixel_t) (const uint8_t *row, int x);
+
+static force_inline void
+bits_image_fetch_separable_convolution_affine (pixman_image_t * image,
+ int offset,
+ int line,
+ int width,
+ uint32_t * buffer,
+ const uint32_t * mask,
+
+ convert_pixel_t convert_pixel,
+ pixman_format_code_t format,
+ pixman_repeat_t repeat_mode)
{
- pixman_image_t *image = iter->image;
+ bits_image_t *bits = &image->bits;
+ pixman_fixed_t *params = image->common.filter_params;
+ int cwidth = pixman_fixed_to_int (params[0]);
+ int cheight = pixman_fixed_to_int (params[1]);
+ int x_off = ((cwidth << 16) - pixman_fixed_1) >> 1;
+ int y_off = ((cheight << 16) - pixman_fixed_1) >> 1;
+ int x_phase_bits = pixman_fixed_to_int (params[2]);
+ int y_phase_bits = pixman_fixed_to_int (params[3]);
+ int x_phase_shift = 16 - x_phase_bits;
+ int y_phase_shift = 16 - y_phase_bits;
+ pixman_fixed_t vx, vy;
+ pixman_fixed_t ux, uy;
+ pixman_vector_t v;
+ int k;
- if (iter->iter_flags & ITER_16)
- return FALSE;
+ /* reference point is the center of the pixel */
+ v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2;
+ v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2;
+ v.vector[2] = pixman_fixed_1;
+
+ if (!pixman_transform_point_3d (image->common.transform, &v))
+ return;
+
+ ux = image->common.transform->matrix[0][0];
+ uy = image->common.transform->matrix[1][0];
- if ((iter->iter_flags & ITER_NARROW) &&
- (iter->image_flags & FAST_PATH_STD_DEST_FLAGS) == FAST_PATH_STD_DEST_FLAGS)
+ vx = v.vector[0];
+ vy = v.vector[1];
+
+ for (k = 0; k < width; ++k)
{
- const fetcher_info_t *f;
+ pixman_fixed_t *y_params;
+ int satot, srtot, sgtot, sbtot;
+ pixman_fixed_t x, y;
+ int32_t x1, x2, y1, y2;
+ int32_t px, py;
+ int i, j;
+
+ if (mask && !mask[k])
+ goto next;
+
+ /* Round x and y to the middle of the closest phase before continuing. This
+ * ensures that the convolution matrix is aligned right, since it was
+ * positioned relative to a particular phase (and not relative to whatever
+ * exact fraction we happen to get here).
+ */
+ x = ((vx >> x_phase_shift) << x_phase_shift) + ((1 << x_phase_shift) >> 1);
+ y = ((vy >> y_phase_shift) << y_phase_shift) + ((1 << y_phase_shift) >> 1);
+
+ px = (x & 0xffff) >> x_phase_shift;
+ py = (y & 0xffff) >> y_phase_shift;
+
+ x1 = pixman_fixed_to_int (x - pixman_fixed_e - x_off);
+ y1 = pixman_fixed_to_int (y - pixman_fixed_e - y_off);
+ x2 = x1 + cwidth;
+ y2 = y1 + cheight;
- for (f = &fetchers[0]; f->format != PIXMAN_null; f++)
+ satot = srtot = sgtot = sbtot = 0;
+
+ y_params = params + 4 + (1 << x_phase_bits) * cwidth + py * cheight;
+
+ for (i = y1; i < y2; ++i)
{
- if (image->common.extended_format_code == f->format)
- {
- uint8_t *b = (uint8_t *)image->bits.bits;
- int s = image->bits.rowstride * 4;
+ pixman_fixed_t fy = *y_params++;
- iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (f->format) / 8;
- iter->stride = s;
+ if (fy)
+ {
+ pixman_fixed_t *x_params = params + 4 + px * cwidth;
- if ((iter->iter_flags & (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA)) ==
- (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA))
- {
- iter->get_scanline = fast_dest_fetch_noop;
- }
- else
+ for (j = x1; j < x2; ++j)
{
- iter->get_scanline = f->get_scanline;
+ pixman_fixed_t fx = *x_params++;
+ int rx = j;
+ int ry = i;
+
+ if (fx)
+ {
+ pixman_fixed_t f;
+ uint32_t pixel, mask;
+ uint8_t *row;
+
+ mask = PIXMAN_FORMAT_A (format)? 0 : 0xff000000;
+
+ if (repeat_mode != PIXMAN_REPEAT_NONE)
+ {
+ repeat (repeat_mode, &rx, bits->width);
+ repeat (repeat_mode, &ry, bits->height);
+
+ row = (uint8_t *)(bits->bits + bits->rowstride * ry);
+ pixel = convert_pixel (row, rx) | mask;
+ }
+ else
+ {
+ if (rx < 0 || ry < 0 || rx >= bits->width || ry >= bits->height)
+ {
+ pixel = 0;
+ }
+ else
+ {
+ row = (uint8_t *)(bits->bits + bits->rowstride * ry);
+ pixel = convert_pixel (row, rx) | mask;
+ }
+ }
+
+ f = ((pixman_fixed_32_32_t)fx * fy + 0x8000) >> 16;
+ srtot += (int)RED_8 (pixel) * f;
+ sgtot += (int)GREEN_8 (pixel) * f;
+ sbtot += (int)BLUE_8 (pixel) * f;
+ satot += (int)ALPHA_8 (pixel) * f;
+ }
}
- iter->write_back = f->write_back;
- return TRUE;
}
}
+
+ satot = (satot + 0x8000) >> 16;
+ srtot = (srtot + 0x8000) >> 16;
+ sgtot = (sgtot + 0x8000) >> 16;
+ sbtot = (sbtot + 0x8000) >> 16;
+
+ satot = CLIP (satot, 0, 0xff);
+ srtot = CLIP (srtot, 0, 0xff);
+ sgtot = CLIP (sgtot, 0, 0xff);
+ sbtot = CLIP (sbtot, 0, 0xff);
+
+ buffer[k] = (satot << 24) | (srtot << 16) | (sgtot << 8) | (sbtot << 0);
+
+ next:
+ vx += ux;
+ vy += uy;
+ }
+}
+
+static const uint8_t zero[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
+
+static force_inline void
+bits_image_fetch_bilinear_affine (pixman_image_t * image,
+ int offset,
+ int line,
+ int width,
+ uint32_t * buffer,
+ const uint32_t * mask,
+
+ convert_pixel_t convert_pixel,
+ pixman_format_code_t format,
+ pixman_repeat_t repeat_mode)
+{
+ pixman_fixed_t x, y;
+ pixman_fixed_t ux, uy;
+ pixman_vector_t v;
+ bits_image_t *bits = &image->bits;
+ int i;
+
+ /* reference point is the center of the pixel */
+ v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2;
+ v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2;
+ v.vector[2] = pixman_fixed_1;
+
+ if (!pixman_transform_point_3d (image->common.transform, &v))
+ return;
+
+ ux = image->common.transform->matrix[0][0];
+ uy = image->common.transform->matrix[1][0];
+
+ x = v.vector[0];
+ y = v.vector[1];
+
+ for (i = 0; i < width; ++i)
+ {
+ int x1, y1, x2, y2;
+ uint32_t tl, tr, bl, br;
+ int32_t distx, disty;
+ int width = image->bits.width;
+ int height = image->bits.height;
+ const uint8_t *row1;
+ const uint8_t *row2;
+
+ if (mask && !mask[i])
+ goto next;
+
+ x1 = x - pixman_fixed_1 / 2;
+ y1 = y - pixman_fixed_1 / 2;
+
+ distx = pixman_fixed_to_bilinear_weight (x1);
+ disty = pixman_fixed_to_bilinear_weight (y1);
+
+ y1 = pixman_fixed_to_int (y1);
+ y2 = y1 + 1;
+ x1 = pixman_fixed_to_int (x1);
+ x2 = x1 + 1;
+
+ if (repeat_mode != PIXMAN_REPEAT_NONE)
+ {
+ uint32_t mask;
+
+ mask = PIXMAN_FORMAT_A (format)? 0 : 0xff000000;
+
+ repeat (repeat_mode, &x1, width);
+ repeat (repeat_mode, &y1, height);
+ repeat (repeat_mode, &x2, width);
+ repeat (repeat_mode, &y2, height);
+
+ row1 = (uint8_t *)(bits->bits + bits->rowstride * y1);
+ row2 = (uint8_t *)(bits->bits + bits->rowstride * y2);
+
+ tl = convert_pixel (row1, x1) | mask;
+ tr = convert_pixel (row1, x2) | mask;
+ bl = convert_pixel (row2, x1) | mask;
+ br = convert_pixel (row2, x2) | mask;
+ }
+ else
+ {
+ uint32_t mask1, mask2;
+ int bpp;
+
+ /* Note: PIXMAN_FORMAT_BPP() returns an unsigned value,
+ * which means if you use it in expressions, those
+ * expressions become unsigned themselves. Since
+ * the variables below can be negative in some cases,
+ * that will lead to crashes on 64 bit architectures.
+ *
+ * So this line makes sure bpp is signed
+ */
+ bpp = PIXMAN_FORMAT_BPP (format);
+
+ if (x1 >= width || x2 < 0 || y1 >= height || y2 < 0)
+ {
+ buffer[i] = 0;
+ goto next;
+ }
+
+ if (y2 == 0)
+ {
+ row1 = zero;
+ mask1 = 0;
+ }
+ else
+ {
+ row1 = (uint8_t *)(bits->bits + bits->rowstride * y1);
+ row1 += bpp / 8 * x1;
+
+ mask1 = PIXMAN_FORMAT_A (format)? 0 : 0xff000000;
+ }
+
+ if (y1 == height - 1)
+ {
+ row2 = zero;
+ mask2 = 0;
+ }
+ else
+ {
+ row2 = (uint8_t *)(bits->bits + bits->rowstride * y2);
+ row2 += bpp / 8 * x1;
+
+ mask2 = PIXMAN_FORMAT_A (format)? 0 : 0xff000000;
+ }
+
+ if (x2 == 0)
+ {
+ tl = 0;
+ bl = 0;
+ }
+ else
+ {
+ tl = convert_pixel (row1, 0) | mask1;
+ bl = convert_pixel (row2, 0) | mask2;
+ }
+
+ if (x1 == width - 1)
+ {
+ tr = 0;
+ br = 0;
+ }
+ else
+ {
+ tr = convert_pixel (row1, 1) | mask1;
+ br = convert_pixel (row2, 1) | mask2;
+ }
+ }
+
+ buffer[i] = bilinear_interpolation (
+ tl, tr, bl, br, distx, disty);
+
+ next:
+ x += ux;
+ y += uy;
}
- return FALSE;
}
+static force_inline void
+bits_image_fetch_nearest_affine (pixman_image_t * image,
+ int offset,
+ int line,
+ int width,
+ uint32_t * buffer,
+ const uint32_t * mask,
+
+ convert_pixel_t convert_pixel,
+ pixman_format_code_t format,
+ pixman_repeat_t repeat_mode)
+{
+ pixman_fixed_t x, y;
+ pixman_fixed_t ux, uy;
+ pixman_vector_t v;
+ bits_image_t *bits = &image->bits;
+ int i;
+
+ /* reference point is the center of the pixel */
+ v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2;
+ v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2;
+ v.vector[2] = pixman_fixed_1;
+
+ if (!pixman_transform_point_3d (image->common.transform, &v))
+ return;
+
+ ux = image->common.transform->matrix[0][0];
+ uy = image->common.transform->matrix[1][0];
+
+ x = v.vector[0];
+ y = v.vector[1];
+
+ for (i = 0; i < width; ++i)
+ {
+ int width, height, x0, y0;
+ const uint8_t *row;
+
+ if (mask && !mask[i])
+ goto next;
+
+ width = image->bits.width;
+ height = image->bits.height;
+ x0 = pixman_fixed_to_int (x - pixman_fixed_e);
+ y0 = pixman_fixed_to_int (y - pixman_fixed_e);
+
+ if (repeat_mode == PIXMAN_REPEAT_NONE &&
+ (y0 < 0 || y0 >= height || x0 < 0 || x0 >= width))
+ {
+ buffer[i] = 0;
+ }
+ else
+ {
+ uint32_t mask = PIXMAN_FORMAT_A (format)? 0 : 0xff000000;
+
+ if (repeat_mode != PIXMAN_REPEAT_NONE)
+ {
+ repeat (repeat_mode, &x0, width);
+ repeat (repeat_mode, &y0, height);
+ }
+
+ row = (uint8_t *)(bits->bits + bits->rowstride * y0);
+
+ buffer[i] = convert_pixel (row, x0) | mask;
+ }
+
+ next:
+ x += ux;
+ y += uy;
+ }
+}
+
+static force_inline uint32_t
+convert_a8r8g8b8 (const uint8_t *row, int x)
+{
+ return *(((uint32_t *)row) + x);
+}
+
+static force_inline uint32_t
+convert_x8r8g8b8 (const uint8_t *row, int x)
+{
+ return *(((uint32_t *)row) + x);
+}
+
+static force_inline uint32_t
+convert_a8 (const uint8_t *row, int x)
+{
+ return (uint32_t) *(row + x) << 24;
+}
+
+static force_inline uint32_t
+convert_r5g6b5 (const uint8_t *row, int x)
+{
+ return convert_0565_to_0888 (*((uint16_t *)row + x));
+}
+
+#define MAKE_SEPARABLE_CONVOLUTION_FETCHER(name, format, repeat_mode) \
+ static uint32_t * \
+ bits_image_fetch_separable_convolution_affine_ ## name (pixman_iter_t *iter, \
+ const uint32_t * mask) \
+ { \
+ bits_image_fetch_separable_convolution_affine ( \
+ iter->image, \
+ iter->x, iter->y++, \
+ iter->width, \
+ iter->buffer, mask, \
+ convert_ ## format, \
+ PIXMAN_ ## format, \
+ repeat_mode); \
+ \
+ return iter->buffer; \
+ }
+
+#define MAKE_BILINEAR_FETCHER(name, format, repeat_mode) \
+ static uint32_t * \
+ bits_image_fetch_bilinear_affine_ ## name (pixman_iter_t *iter, \
+ const uint32_t * mask) \
+ { \
+ bits_image_fetch_bilinear_affine (iter->image, \
+ iter->x, iter->y++, \
+ iter->width, \
+ iter->buffer, mask, \
+ convert_ ## format, \
+ PIXMAN_ ## format, \
+ repeat_mode); \
+ return iter->buffer; \
+ }
+
+#define MAKE_NEAREST_FETCHER(name, format, repeat_mode) \
+ static uint32_t * \
+ bits_image_fetch_nearest_affine_ ## name (pixman_iter_t *iter, \
+ const uint32_t * mask) \
+ { \
+ bits_image_fetch_nearest_affine (iter->image, \
+ iter->x, iter->y++, \
+ iter->width, \
+ iter->buffer, mask, \
+ convert_ ## format, \
+ PIXMAN_ ## format, \
+ repeat_mode); \
+ return iter->buffer; \
+ }
+
+#define MAKE_FETCHERS(name, format, repeat_mode) \
+ MAKE_NEAREST_FETCHER (name, format, repeat_mode) \
+ MAKE_BILINEAR_FETCHER (name, format, repeat_mode) \
+ MAKE_SEPARABLE_CONVOLUTION_FETCHER (name, format, repeat_mode)
+
+MAKE_FETCHERS (pad_a8r8g8b8, a8r8g8b8, PIXMAN_REPEAT_PAD)
+MAKE_FETCHERS (none_a8r8g8b8, a8r8g8b8, PIXMAN_REPEAT_NONE)
+MAKE_FETCHERS (reflect_a8r8g8b8, a8r8g8b8, PIXMAN_REPEAT_REFLECT)
+MAKE_FETCHERS (normal_a8r8g8b8, a8r8g8b8, PIXMAN_REPEAT_NORMAL)
+MAKE_FETCHERS (pad_x8r8g8b8, x8r8g8b8, PIXMAN_REPEAT_PAD)
+MAKE_FETCHERS (none_x8r8g8b8, x8r8g8b8, PIXMAN_REPEAT_NONE)
+MAKE_FETCHERS (reflect_x8r8g8b8, x8r8g8b8, PIXMAN_REPEAT_REFLECT)
+MAKE_FETCHERS (normal_x8r8g8b8, x8r8g8b8, PIXMAN_REPEAT_NORMAL)
+MAKE_FETCHERS (pad_a8, a8, PIXMAN_REPEAT_PAD)
+MAKE_FETCHERS (none_a8, a8, PIXMAN_REPEAT_NONE)
+MAKE_FETCHERS (reflect_a8, a8, PIXMAN_REPEAT_REFLECT)
+MAKE_FETCHERS (normal_a8, a8, PIXMAN_REPEAT_NORMAL)
+MAKE_FETCHERS (pad_r5g6b5, r5g6b5, PIXMAN_REPEAT_PAD)
+MAKE_FETCHERS (none_r5g6b5, r5g6b5, PIXMAN_REPEAT_NONE)
+MAKE_FETCHERS (reflect_r5g6b5, r5g6b5, PIXMAN_REPEAT_REFLECT)
+MAKE_FETCHERS (normal_r5g6b5, r5g6b5, PIXMAN_REPEAT_NORMAL)
+
+#define IMAGE_FLAGS \
+ (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | \
+ FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST)
+
+static const pixman_iter_info_t fast_iters[] =
+{
+ { PIXMAN_r5g6b5, IMAGE_FLAGS, ITER_NARROW | ITER_SRC,
+ _pixman_iter_init_bits_stride, fast_fetch_r5g6b5, NULL },
+
+ { PIXMAN_r5g6b5, FAST_PATH_STD_DEST_FLAGS,
+ ITER_NARROW | ITER_DEST,
+ _pixman_iter_init_bits_stride,
+ fast_fetch_r5g6b5, fast_write_back_r5g6b5 },
+
+ { PIXMAN_r5g6b5, FAST_PATH_STD_DEST_FLAGS,
+ ITER_NARROW | ITER_DEST | ITER_IGNORE_RGB | ITER_IGNORE_ALPHA,
+ _pixman_iter_init_bits_stride,
+ fast_dest_fetch_noop, fast_write_back_r5g6b5 },
+
+ { PIXMAN_a8r8g8b8,
+ (FAST_PATH_STANDARD_FLAGS |
+ FAST_PATH_SCALE_TRANSFORM |
+ FAST_PATH_BILINEAR_FILTER |
+ FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR),
+ ITER_NARROW | ITER_SRC,
+ fast_bilinear_cover_iter_init,
+ NULL, NULL
+ },
+
+#define FAST_BILINEAR_FLAGS \
+ (FAST_PATH_NO_ALPHA_MAP | \
+ FAST_PATH_NO_ACCESSORS | \
+ FAST_PATH_HAS_TRANSFORM | \
+ FAST_PATH_AFFINE_TRANSFORM | \
+ FAST_PATH_X_UNIT_POSITIVE | \
+ FAST_PATH_Y_UNIT_ZERO | \
+ FAST_PATH_NONE_REPEAT | \
+ FAST_PATH_BILINEAR_FILTER)
+
+ { PIXMAN_a8r8g8b8,
+ FAST_BILINEAR_FLAGS,
+ ITER_NARROW | ITER_SRC,
+ NULL, bits_image_fetch_bilinear_no_repeat_8888, NULL
+ },
+
+ { PIXMAN_x8r8g8b8,
+ FAST_BILINEAR_FLAGS,
+ ITER_NARROW | ITER_SRC,
+ NULL, bits_image_fetch_bilinear_no_repeat_8888, NULL
+ },
+
+#define GENERAL_BILINEAR_FLAGS \
+ (FAST_PATH_NO_ALPHA_MAP | \
+ FAST_PATH_NO_ACCESSORS | \
+ FAST_PATH_HAS_TRANSFORM | \
+ FAST_PATH_AFFINE_TRANSFORM | \
+ FAST_PATH_BILINEAR_FILTER)
+
+#define GENERAL_NEAREST_FLAGS \
+ (FAST_PATH_NO_ALPHA_MAP | \
+ FAST_PATH_NO_ACCESSORS | \
+ FAST_PATH_HAS_TRANSFORM | \
+ FAST_PATH_AFFINE_TRANSFORM | \
+ FAST_PATH_NEAREST_FILTER)
+
+#define GENERAL_SEPARABLE_CONVOLUTION_FLAGS \
+ (FAST_PATH_NO_ALPHA_MAP | \
+ FAST_PATH_NO_ACCESSORS | \
+ FAST_PATH_HAS_TRANSFORM | \
+ FAST_PATH_AFFINE_TRANSFORM | \
+ FAST_PATH_SEPARABLE_CONVOLUTION_FILTER)
+
+#define SEPARABLE_CONVOLUTION_AFFINE_FAST_PATH(name, format, repeat) \
+ { PIXMAN_ ## format, \
+ GENERAL_SEPARABLE_CONVOLUTION_FLAGS | FAST_PATH_ ## repeat ## _REPEAT, \
+ ITER_NARROW | ITER_SRC, \
+ NULL, bits_image_fetch_separable_convolution_affine_ ## name, NULL \
+ },
+
+#define BILINEAR_AFFINE_FAST_PATH(name, format, repeat) \
+ { PIXMAN_ ## format, \
+ GENERAL_BILINEAR_FLAGS | FAST_PATH_ ## repeat ## _REPEAT, \
+ ITER_NARROW | ITER_SRC, \
+ NULL, bits_image_fetch_bilinear_affine_ ## name, NULL, \
+ },
+
+#define NEAREST_AFFINE_FAST_PATH(name, format, repeat) \
+ { PIXMAN_ ## format, \
+ GENERAL_NEAREST_FLAGS | FAST_PATH_ ## repeat ## _REPEAT, \
+ ITER_NARROW | ITER_SRC, \
+ NULL, bits_image_fetch_nearest_affine_ ## name, NULL \
+ },
+
+#define AFFINE_FAST_PATHS(name, format, repeat) \
+ NEAREST_AFFINE_FAST_PATH(name, format, repeat) \
+ BILINEAR_AFFINE_FAST_PATH(name, format, repeat) \
+ SEPARABLE_CONVOLUTION_AFFINE_FAST_PATH(name, format, repeat)
+
+ AFFINE_FAST_PATHS (pad_a8r8g8b8, a8r8g8b8, PAD)
+ AFFINE_FAST_PATHS (none_a8r8g8b8, a8r8g8b8, NONE)
+ AFFINE_FAST_PATHS (reflect_a8r8g8b8, a8r8g8b8, REFLECT)
+ AFFINE_FAST_PATHS (normal_a8r8g8b8, a8r8g8b8, NORMAL)
+ AFFINE_FAST_PATHS (pad_x8r8g8b8, x8r8g8b8, PAD)
+ AFFINE_FAST_PATHS (none_x8r8g8b8, x8r8g8b8, NONE)
+ AFFINE_FAST_PATHS (reflect_x8r8g8b8, x8r8g8b8, REFLECT)
+ AFFINE_FAST_PATHS (normal_x8r8g8b8, x8r8g8b8, NORMAL)
+ AFFINE_FAST_PATHS (pad_a8, a8, PAD)
+ AFFINE_FAST_PATHS (none_a8, a8, NONE)
+ AFFINE_FAST_PATHS (reflect_a8, a8, REFLECT)
+ AFFINE_FAST_PATHS (normal_a8, a8, NORMAL)
+ AFFINE_FAST_PATHS (pad_r5g6b5, r5g6b5, PAD)
+ AFFINE_FAST_PATHS (none_r5g6b5, r5g6b5, NONE)
+ AFFINE_FAST_PATHS (reflect_r5g6b5, r5g6b5, REFLECT)
+ AFFINE_FAST_PATHS (normal_r5g6b5, r5g6b5, NORMAL)
+
+ { PIXMAN_null },
+};
pixman_implementation_t *
_pixman_implementation_create_fast_path (pixman_implementation_t *fallback)
@@ -2583,8 +3288,7 @@ _pixman_implementation_create_fast_path (pixman_implementation_t *fallback)
pixman_implementation_t *imp = _pixman_implementation_create (fallback, c_fast_paths);
imp->fill = fast_path_fill;
- imp->src_iter_init = fast_src_iter_init;
- imp->dest_iter_init = fast_dest_iter_init;
+ imp->iter_info = fast_iters;
return imp;
}
diff --git a/gfx/cairo/libpixman/src/pixman-fast-path.h b/gfx/cairo/libpixman/src/pixman-fast-path.h
deleted file mode 100644
index 1885d47e77..0000000000
--- a/gfx/cairo/libpixman/src/pixman-fast-path.h
+++ /dev/null
@@ -1,1022 +0,0 @@
-/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */
-/*
- * Copyright © 2000 SuSE, Inc.
- * Copyright © 2007 Red Hat, Inc.
- *
- * Permission to use, copy, modify, distribute, and sell this software and its
- * documentation for any purpose is hereby granted without fee, provided that
- * the above copyright notice appear in all copies and that both that
- * copyright notice and this permission notice appear in supporting
- * documentation, and that the name of SuSE not be used in advertising or
- * publicity pertaining to distribution of the software without specific,
- * written prior permission. SuSE makes no representations about the
- * suitability of this software for any purpose. It is provided "as is"
- * without express or implied warranty.
- *
- * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
- * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
- * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- *
- * Author: Keith Packard, SuSE, Inc.
- */
-
-#ifndef PIXMAN_FAST_PATH_H__
-#define PIXMAN_FAST_PATH_H__
-
-#include "pixman-private.h"
-
-#define PIXMAN_REPEAT_COVER -1
-
-static force_inline pixman_bool_t
-repeat (pixman_repeat_t repeat, int *c, int size)
-{
- if (repeat == PIXMAN_REPEAT_NONE)
- {
- if (*c < 0 || *c >= size)
- return FALSE;
- }
- else if (repeat == PIXMAN_REPEAT_NORMAL)
- {
- while (*c >= size)
- *c -= size;
- while (*c < 0)
- *c += size;
- }
- else if (repeat == PIXMAN_REPEAT_PAD)
- {
- *c = CLIP (*c, 0, size - 1);
- }
- else /* REFLECT */
- {
- *c = MOD (*c, size * 2);
- if (*c >= size)
- *c = size * 2 - *c - 1;
- }
- return TRUE;
-}
-
-/*
- * For each scanline fetched from source image with PAD repeat:
- * - calculate how many pixels need to be padded on the left side
- * - calculate how many pixels need to be padded on the right side
- * - update width to only count pixels which are fetched from the image
- * All this information is returned via 'width', 'left_pad', 'right_pad'
- * arguments. The code is assuming that 'unit_x' is positive.
- *
- * Note: 64-bit math is used in order to avoid potential overflows, which
- * is probably excessive in many cases. This particular function
- * may need its own correctness test and performance tuning.
- */
-static force_inline void
-pad_repeat_get_scanline_bounds (int32_t source_image_width,
- pixman_fixed_t vx,
- pixman_fixed_t unit_x,
- int32_t * width,
- int32_t * left_pad,
- int32_t * right_pad)
-{
- int64_t max_vx = (int64_t) source_image_width << 16;
- int64_t tmp;
- if (vx < 0)
- {
- tmp = ((int64_t) unit_x - 1 - vx) / unit_x;
- if (tmp > *width)
- {
- *left_pad = *width;
- *width = 0;
- }
- else
- {
- *left_pad = (int32_t) tmp;
- *width -= (int32_t) tmp;
- }
- }
- else
- {
- *left_pad = 0;
- }
- tmp = ((int64_t) unit_x - 1 - vx + max_vx) / unit_x - *left_pad;
- if (tmp < 0)
- {
- *right_pad = *width;
- *width = 0;
- }
- else if (tmp >= *width)
- {
- *right_pad = 0;
- }
- else
- {
- *right_pad = *width - (int32_t) tmp;
- *width = (int32_t) tmp;
- }
-}
-
-/* A macroified version of specialized nearest scalers for some
- * common 8888 and 565 formats. It supports SRC and OVER ops.
- *
- * There are two repeat versions, one that handles repeat normal,
- * and one without repeat handling that only works if the src region
- * used is completely covered by the pre-repeated source samples.
- *
- * The loops are unrolled to process two pixels per iteration for better
- * performance on most CPU architectures (superscalar processors
- * can issue several operations simultaneously, other processors can hide
- * instructions latencies by pipelining operations). Unrolling more
- * does not make much sense because the compiler will start running out
- * of spare registers soon.
- */
-
-#define GET_8888_ALPHA(s) ((s) >> 24)
- /* This is not actually used since we don't have an OVER with
- 565 source, but it is needed to build. */
-#define GET_0565_ALPHA(s) 0xff
-
-#define FAST_NEAREST_SCANLINE(scanline_func_name, SRC_FORMAT, DST_FORMAT, \
- src_type_t, dst_type_t, OP, repeat_mode) \
-static force_inline void \
-scanline_func_name (dst_type_t *dst, \
- const src_type_t *src, \
- int32_t w, \
- pixman_fixed_t vx, \
- pixman_fixed_t unit_x, \
- pixman_fixed_t max_vx, \
- pixman_bool_t fully_transparent_src) \
-{ \
- uint32_t d; \
- src_type_t s1, s2; \
- uint8_t a1, a2; \
- int x1, x2; \
- \
- if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER && fully_transparent_src) \
- return; \
- \
- if (PIXMAN_OP_ ## OP != PIXMAN_OP_SRC && PIXMAN_OP_ ## OP != PIXMAN_OP_OVER) \
- abort(); \
- \
- while ((w -= 2) >= 0) \
- { \
- x1 = vx >> 16; \
- vx += unit_x; \
- if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
- { \
- /* This works because we know that unit_x is positive */ \
- while (vx >= max_vx) \
- vx -= max_vx; \
- } \
- s1 = src[x1]; \
- \
- x2 = vx >> 16; \
- vx += unit_x; \
- if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
- { \
- /* This works because we know that unit_x is positive */ \
- while (vx >= max_vx) \
- vx -= max_vx; \
- } \
- s2 = src[x2]; \
- \
- if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER) \
- { \
- a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1); \
- a2 = GET_ ## SRC_FORMAT ## _ALPHA(s2); \
- \
- if (a1 == 0xff) \
- { \
- *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \
- } \
- else if (s1) \
- { \
- d = CONVERT_ ## DST_FORMAT ## _TO_8888 (*dst); \
- s1 = CONVERT_ ## SRC_FORMAT ## _TO_8888 (s1); \
- a1 ^= 0xff; \
- UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1); \
- *dst = CONVERT_8888_TO_ ## DST_FORMAT (d); \
- } \
- dst++; \
- \
- if (a2 == 0xff) \
- { \
- *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s2); \
- } \
- else if (s2) \
- { \
- d = CONVERT_## DST_FORMAT ## _TO_8888 (*dst); \
- s2 = CONVERT_## SRC_FORMAT ## _TO_8888 (s2); \
- a2 ^= 0xff; \
- UN8x4_MUL_UN8_ADD_UN8x4 (d, a2, s2); \
- *dst = CONVERT_8888_TO_ ## DST_FORMAT (d); \
- } \
- dst++; \
- } \
- else /* PIXMAN_OP_SRC */ \
- { \
- *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \
- *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s2); \
- } \
- } \
- \
- if (w & 1) \
- { \
- x1 = vx >> 16; \
- s1 = src[x1]; \
- \
- if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER) \
- { \
- a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1); \
- \
- if (a1 == 0xff) \
- { \
- *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \
- } \
- else if (s1) \
- { \
- d = CONVERT_## DST_FORMAT ## _TO_8888 (*dst); \
- s1 = CONVERT_ ## SRC_FORMAT ## _TO_8888 (s1); \
- a1 ^= 0xff; \
- UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1); \
- *dst = CONVERT_8888_TO_ ## DST_FORMAT (d); \
- } \
- dst++; \
- } \
- else /* PIXMAN_OP_SRC */ \
- { \
- *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \
- } \
- } \
-}
-
-#define FAST_NEAREST_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, mask_type_t, \
- dst_type_t, repeat_mode, have_mask, mask_is_solid) \
-static void \
-fast_composite_scaled_nearest ## scale_func_name (pixman_implementation_t *imp, \
- pixman_op_t op, \
- pixman_image_t * src_image, \
- pixman_image_t * mask_image, \
- pixman_image_t * dst_image, \
- int32_t src_x, \
- int32_t src_y, \
- int32_t mask_x, \
- int32_t mask_y, \
- int32_t dst_x, \
- int32_t dst_y, \
- int32_t width, \
- int32_t height) \
-{ \
- dst_type_t *dst_line; \
- mask_type_t *mask_line; \
- src_type_t *src_first_line; \
- int y; \
- pixman_fixed_t max_vx = INT32_MAX; /* suppress uninitialized variable warning */ \
- pixman_fixed_t max_vy; \
- pixman_vector_t v; \
- pixman_fixed_t vx, vy; \
- pixman_fixed_t unit_x, unit_y; \
- int32_t left_pad, right_pad; \
- \
- src_type_t *src; \
- dst_type_t *dst; \
- mask_type_t solid_mask; \
- const mask_type_t *mask = &solid_mask; \
- int src_stride, mask_stride, dst_stride; \
- \
- PIXMAN_IMAGE_GET_LINE (dst_image, dst_x, dst_y, dst_type_t, dst_stride, dst_line, 1); \
- if (have_mask) \
- { \
- if (mask_is_solid) \
- solid_mask = _pixman_image_get_solid (imp, mask_image, dst_image->bits.format); \
- else \
- PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type_t, \
- mask_stride, mask_line, 1); \
- } \
- /* pass in 0 instead of src_x and src_y because src_x and src_y need to be \
- * transformed from destination space to source space */ \
- PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1); \
- \
- /* reference point is the center of the pixel */ \
- v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2; \
- v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2; \
- v.vector[2] = pixman_fixed_1; \
- \
- if (!pixman_transform_point_3d (src_image->common.transform, &v)) \
- return; \
- \
- unit_x = src_image->common.transform->matrix[0][0]; \
- unit_y = src_image->common.transform->matrix[1][1]; \
- \
- /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */ \
- v.vector[0] -= pixman_fixed_e; \
- v.vector[1] -= pixman_fixed_e; \
- \
- vx = v.vector[0]; \
- vy = v.vector[1]; \
- \
- if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
- { \
- /* Clamp repeating positions inside the actual samples */ \
- max_vx = src_image->bits.width << 16; \
- max_vy = src_image->bits.height << 16; \
- \
- repeat (PIXMAN_REPEAT_NORMAL, &vx, max_vx); \
- repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); \
- } \
- \
- if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD || \
- PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \
- { \
- pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x, \
- &width, &left_pad, &right_pad); \
- vx += left_pad * unit_x; \
- } \
- \
- while (--height >= 0) \
- { \
- dst = dst_line; \
- dst_line += dst_stride; \
- if (have_mask && !mask_is_solid) \
- { \
- mask = mask_line; \
- mask_line += mask_stride; \
- } \
- \
- y = vy >> 16; \
- vy += unit_y; \
- if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
- repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); \
- if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \
- { \
- repeat (PIXMAN_REPEAT_PAD, &y, src_image->bits.height); \
- src = src_first_line + src_stride * y; \
- if (left_pad > 0) \
- { \
- scanline_func (mask, dst, src, left_pad, 0, 0, 0, FALSE); \
- } \
- if (width > 0) \
- { \
- scanline_func (mask + (mask_is_solid ? 0 : left_pad), \
- dst + left_pad, src, width, vx, unit_x, 0, FALSE); \
- } \
- if (right_pad > 0) \
- { \
- scanline_func (mask + (mask_is_solid ? 0 : left_pad + width), \
- dst + left_pad + width, src + src_image->bits.width - 1, \
- right_pad, 0, 0, 0, FALSE); \
- } \
- } \
- else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \
- { \
- static const src_type_t zero[1] = { 0 }; \
- if (y < 0 || y >= src_image->bits.height) \
- { \
- scanline_func (mask, dst, zero, left_pad + width + right_pad, 0, 0, 0, TRUE); \
- continue; \
- } \
- src = src_first_line + src_stride * y; \
- if (left_pad > 0) \
- { \
- scanline_func (mask, dst, zero, left_pad, 0, 0, 0, TRUE); \
- } \
- if (width > 0) \
- { \
- scanline_func (mask + (mask_is_solid ? 0 : left_pad), \
- dst + left_pad, src, width, vx, unit_x, 0, FALSE); \
- } \
- if (right_pad > 0) \
- { \
- scanline_func (mask + (mask_is_solid ? 0 : left_pad + width), \
- dst + left_pad + width, zero, right_pad, 0, 0, 0, TRUE); \
- } \
- } \
- else \
- { \
- src = src_first_line + src_stride * y; \
- scanline_func (mask, dst, src, width, vx, unit_x, max_vx, FALSE); \
- } \
- } \
-}
-
-/* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */
-#define FAST_NEAREST_MAINLOOP_COMMON(scale_func_name, scanline_func, src_type_t, mask_type_t, \
- dst_type_t, repeat_mode, have_mask, mask_is_solid) \
- FAST_NEAREST_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, mask_type_t, \
- dst_type_t, repeat_mode, have_mask, mask_is_solid)
-
-#define FAST_NEAREST_MAINLOOP_NOMASK(scale_func_name, scanline_func, src_type_t, dst_type_t, \
- repeat_mode) \
- static force_inline void \
- scanline_func##scale_func_name##_wrapper ( \
- const uint8_t *mask, \
- dst_type_t *dst, \
- const src_type_t *src, \
- int32_t w, \
- pixman_fixed_t vx, \
- pixman_fixed_t unit_x, \
- pixman_fixed_t max_vx, \
- pixman_bool_t fully_transparent_src) \
- { \
- scanline_func (dst, src, w, vx, unit_x, max_vx, fully_transparent_src); \
- } \
- FAST_NEAREST_MAINLOOP_INT (scale_func_name, scanline_func##scale_func_name##_wrapper, \
- src_type_t, uint8_t, dst_type_t, repeat_mode, FALSE, FALSE)
-
-#define FAST_NEAREST_MAINLOOP(scale_func_name, scanline_func, src_type_t, dst_type_t, \
- repeat_mode) \
- FAST_NEAREST_MAINLOOP_NOMASK(_ ## scale_func_name, scanline_func, src_type_t, \
- dst_type_t, repeat_mode)
-
-#define FAST_NEAREST(scale_func_name, SRC_FORMAT, DST_FORMAT, \
- src_type_t, dst_type_t, OP, repeat_mode) \
- FAST_NEAREST_SCANLINE(scaled_nearest_scanline_ ## scale_func_name ## _ ## OP, \
- SRC_FORMAT, DST_FORMAT, src_type_t, dst_type_t, \
- OP, repeat_mode) \
- FAST_NEAREST_MAINLOOP_NOMASK(_ ## scale_func_name ## _ ## OP, \
- scaled_nearest_scanline_ ## scale_func_name ## _ ## OP, \
- src_type_t, dst_type_t, repeat_mode)
-
-
-#define SCALED_NEAREST_FLAGS \
- (FAST_PATH_SCALE_TRANSFORM | \
- FAST_PATH_NO_ALPHA_MAP | \
- FAST_PATH_NEAREST_FILTER | \
- FAST_PATH_NO_ACCESSORS | \
- FAST_PATH_NARROW_FORMAT)
-
-#define SIMPLE_NEAREST_FAST_PATH_NORMAL(op,s,d,func) \
- { PIXMAN_OP_ ## op, \
- PIXMAN_ ## s, \
- (SCALED_NEAREST_FLAGS | \
- FAST_PATH_NORMAL_REPEAT | \
- FAST_PATH_X_UNIT_POSITIVE), \
- PIXMAN_null, 0, \
- PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
- fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \
- }
-
-#define SIMPLE_NEAREST_FAST_PATH_PAD(op,s,d,func) \
- { PIXMAN_OP_ ## op, \
- PIXMAN_ ## s, \
- (SCALED_NEAREST_FLAGS | \
- FAST_PATH_PAD_REPEAT | \
- FAST_PATH_X_UNIT_POSITIVE), \
- PIXMAN_null, 0, \
- PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
- fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op, \
- }
-
-#define SIMPLE_NEAREST_FAST_PATH_NONE(op,s,d,func) \
- { PIXMAN_OP_ ## op, \
- PIXMAN_ ## s, \
- (SCALED_NEAREST_FLAGS | \
- FAST_PATH_NONE_REPEAT | \
- FAST_PATH_X_UNIT_POSITIVE), \
- PIXMAN_null, 0, \
- PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
- fast_composite_scaled_nearest_ ## func ## _none ## _ ## op, \
- }
-
-#define SIMPLE_NEAREST_FAST_PATH_COVER(op,s,d,func) \
- { PIXMAN_OP_ ## op, \
- PIXMAN_ ## s, \
- SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \
- PIXMAN_null, 0, \
- PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
- fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op, \
- }
-
-#define SIMPLE_NEAREST_A8_MASK_FAST_PATH_NORMAL(op,s,d,func) \
- { PIXMAN_OP_ ## op, \
- PIXMAN_ ## s, \
- (SCALED_NEAREST_FLAGS | \
- FAST_PATH_NORMAL_REPEAT | \
- FAST_PATH_X_UNIT_POSITIVE), \
- PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
- PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
- fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \
- }
-
-#define SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD(op,s,d,func) \
- { PIXMAN_OP_ ## op, \
- PIXMAN_ ## s, \
- (SCALED_NEAREST_FLAGS | \
- FAST_PATH_PAD_REPEAT | \
- FAST_PATH_X_UNIT_POSITIVE), \
- PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
- PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
- fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op, \
- }
-
-#define SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE(op,s,d,func) \
- { PIXMAN_OP_ ## op, \
- PIXMAN_ ## s, \
- (SCALED_NEAREST_FLAGS | \
- FAST_PATH_NONE_REPEAT | \
- FAST_PATH_X_UNIT_POSITIVE), \
- PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
- PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
- fast_composite_scaled_nearest_ ## func ## _none ## _ ## op, \
- }
-
-#define SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER(op,s,d,func) \
- { PIXMAN_OP_ ## op, \
- PIXMAN_ ## s, \
- SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \
- PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
- PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
- fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op, \
- }
-
-#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL(op,s,d,func) \
- { PIXMAN_OP_ ## op, \
- PIXMAN_ ## s, \
- (SCALED_NEAREST_FLAGS | \
- FAST_PATH_NORMAL_REPEAT | \
- FAST_PATH_X_UNIT_POSITIVE), \
- PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
- PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
- fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \
- }
-
-#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD(op,s,d,func) \
- { PIXMAN_OP_ ## op, \
- PIXMAN_ ## s, \
- (SCALED_NEAREST_FLAGS | \
- FAST_PATH_PAD_REPEAT | \
- FAST_PATH_X_UNIT_POSITIVE), \
- PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
- PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
- fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op, \
- }
-
-#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE(op,s,d,func) \
- { PIXMAN_OP_ ## op, \
- PIXMAN_ ## s, \
- (SCALED_NEAREST_FLAGS | \
- FAST_PATH_NONE_REPEAT | \
- FAST_PATH_X_UNIT_POSITIVE), \
- PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
- PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
- fast_composite_scaled_nearest_ ## func ## _none ## _ ## op, \
- }
-
-#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_COVER(op,s,d,func) \
- { PIXMAN_OP_ ## op, \
- PIXMAN_ ## s, \
- SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \
- PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
- PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
- fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op, \
- }
-
-/* Prefer the use of 'cover' variant, because it is faster */
-#define SIMPLE_NEAREST_FAST_PATH(op,s,d,func) \
- SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func), \
- SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func), \
- SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func), \
- SIMPLE_NEAREST_FAST_PATH_NORMAL (op,s,d,func)
-
-#define SIMPLE_NEAREST_A8_MASK_FAST_PATH(op,s,d,func) \
- SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER (op,s,d,func), \
- SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE (op,s,d,func), \
- SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD (op,s,d,func)
-
-#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH(op,s,d,func) \
- SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_COVER (op,s,d,func), \
- SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE (op,s,d,func), \
- SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD (op,s,d,func)
-
-/*****************************************************************************/
-
-/*
- * Identify 5 zones in each scanline for bilinear scaling. Depending on
- * whether 2 pixels to be interpolated are fetched from the image itself,
- * from the padding area around it or from both image and padding area.
- */
-static force_inline void
-bilinear_pad_repeat_get_scanline_bounds (int32_t source_image_width,
- pixman_fixed_t vx,
- pixman_fixed_t unit_x,
- int32_t * left_pad,
- int32_t * left_tz,
- int32_t * width,
- int32_t * right_tz,
- int32_t * right_pad)
-{
- int width1 = *width, left_pad1, right_pad1;
- int width2 = *width, left_pad2, right_pad2;
-
- pad_repeat_get_scanline_bounds (source_image_width, vx, unit_x,
- &width1, &left_pad1, &right_pad1);
- pad_repeat_get_scanline_bounds (source_image_width, vx + pixman_fixed_1,
- unit_x, &width2, &left_pad2, &right_pad2);
-
- *left_pad = left_pad2;
- *left_tz = left_pad1 - left_pad2;
- *right_tz = right_pad2 - right_pad1;
- *right_pad = right_pad1;
- *width -= *left_pad + *left_tz + *right_tz + *right_pad;
-}
-
-/*
- * Main loop template for single pass bilinear scaling. It needs to be
- * provided with 'scanline_func' which should do the compositing operation.
- * The needed function has the following prototype:
- *
- * scanline_func (dst_type_t * dst,
- * const mask_type_ * mask,
- * const src_type_t * src_top,
- * const src_type_t * src_bottom,
- * int32_t width,
- * int weight_top,
- * int weight_bottom,
- * pixman_fixed_t vx,
- * pixman_fixed_t unit_x,
- * pixman_fixed_t max_vx,
- * pixman_bool_t zero_src)
- *
- * Where:
- * dst - destination scanline buffer for storing results
- * mask - mask buffer (or single value for solid mask)
- * src_top, src_bottom - two source scanlines
- * width - number of pixels to process
- * weight_top - weight of the top row for interpolation
- * weight_bottom - weight of the bottom row for interpolation
- * vx - initial position for fetching the first pair of
- * pixels from the source buffer
- * unit_x - position increment needed to move to the next pair
- * of pixels
- * max_vx - image size as a fixed point value, can be used for
- * implementing NORMAL repeat (when it is supported)
- * zero_src - boolean hint variable, which is set to TRUE when
- * all source pixels are fetched from zero padding
- * zone for NONE repeat
- *
- * Note: normally the sum of 'weight_top' and 'weight_bottom' is equal to 256,
- * but sometimes it may be less than that for NONE repeat when handling
- * fuzzy antialiased top or bottom image edges. Also both top and
- * bottom weight variables are guaranteed to have value in 0-255
- * range and can fit into unsigned byte or be used with 8-bit SIMD
- * multiplication instructions.
- */
-#define FAST_BILINEAR_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, mask_type_t, \
- dst_type_t, repeat_mode, have_mask, mask_is_solid) \
-static void \
-fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp, \
- pixman_op_t op, \
- pixman_image_t * src_image, \
- pixman_image_t * mask_image, \
- pixman_image_t * dst_image, \
- int32_t src_x, \
- int32_t src_y, \
- int32_t mask_x, \
- int32_t mask_y, \
- int32_t dst_x, \
- int32_t dst_y, \
- int32_t width, \
- int32_t height) \
-{ \
- dst_type_t *dst_line; \
- mask_type_t *mask_line; \
- src_type_t *src_first_line; \
- int y1, y2; \
- pixman_fixed_t max_vx = INT32_MAX; /* suppress uninitialized variable warning */ \
- pixman_vector_t v; \
- pixman_fixed_t vx, vy; \
- pixman_fixed_t unit_x, unit_y; \
- int32_t left_pad, left_tz, right_tz, right_pad; \
- \
- dst_type_t *dst; \
- mask_type_t solid_mask; \
- const mask_type_t *mask = &solid_mask; \
- int src_stride, mask_stride, dst_stride; \
- \
- PIXMAN_IMAGE_GET_LINE (dst_image, dst_x, dst_y, dst_type_t, dst_stride, dst_line, 1); \
- if (have_mask) \
- { \
- if (mask_is_solid) \
- { \
- solid_mask = _pixman_image_get_solid (imp, mask_image, dst_image->bits.format); \
- mask_stride = 0; \
- } \
- else \
- { \
- PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type_t, \
- mask_stride, mask_line, 1); \
- } \
- } \
- /* pass in 0 instead of src_x and src_y because src_x and src_y need to be \
- * transformed from destination space to source space */ \
- PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1); \
- \
- /* reference point is the center of the pixel */ \
- v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2; \
- v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2; \
- v.vector[2] = pixman_fixed_1; \
- \
- if (!pixman_transform_point_3d (src_image->common.transform, &v)) \
- return; \
- \
- unit_x = src_image->common.transform->matrix[0][0]; \
- unit_y = src_image->common.transform->matrix[1][1]; \
- \
- v.vector[0] -= pixman_fixed_1 / 2; \
- v.vector[1] -= pixman_fixed_1 / 2; \
- \
- vy = v.vector[1]; \
- \
- if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD || \
- PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \
- { \
- bilinear_pad_repeat_get_scanline_bounds (src_image->bits.width, v.vector[0], unit_x, \
- &left_pad, &left_tz, &width, &right_tz, &right_pad); \
- if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \
- { \
- /* PAD repeat does not need special handling for 'transition zones' and */ \
- /* they can be combined with 'padding zones' safely */ \
- left_pad += left_tz; \
- right_pad += right_tz; \
- left_tz = right_tz = 0; \
- } \
- v.vector[0] += left_pad * unit_x; \
- } \
- \
- while (--height >= 0) \
- { \
- int weight1, weight2; \
- dst = dst_line; \
- dst_line += dst_stride; \
- vx = v.vector[0]; \
- if (have_mask && !mask_is_solid) \
- { \
- mask = mask_line; \
- mask_line += mask_stride; \
- } \
- \
- y1 = pixman_fixed_to_int (vy); \
- weight2 = (vy >> 8) & 0xff; \
- if (weight2) \
- { \
- /* normal case, both row weights are in 0-255 range and fit unsigned byte */ \
- y2 = y1 + 1; \
- weight1 = 256 - weight2; \
- } \
- else \
- { \
- /* set both top and bottom row to the same scanline, and weights to 128+128 */ \
- y2 = y1; \
- weight1 = weight2 = 128; \
- } \
- vy += unit_y; \
- if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \
- { \
- src_type_t *src1, *src2; \
- src_type_t buf1[2]; \
- src_type_t buf2[2]; \
- repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height); \
- repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height); \
- src1 = src_first_line + src_stride * y1; \
- src2 = src_first_line + src_stride * y2; \
- \
- if (left_pad > 0) \
- { \
- buf1[0] = buf1[1] = src1[0]; \
- buf2[0] = buf2[1] = src2[0]; \
- scanline_func (dst, mask, \
- buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, FALSE); \
- dst += left_pad; \
- if (have_mask && !mask_is_solid) \
- mask += left_pad; \
- } \
- if (width > 0) \
- { \
- scanline_func (dst, mask, \
- src1, src2, width, weight1, weight2, vx, unit_x, 0, FALSE); \
- dst += width; \
- if (have_mask && !mask_is_solid) \
- mask += width; \
- } \
- if (right_pad > 0) \
- { \
- buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; \
- buf2[0] = buf2[1] = src2[src_image->bits.width - 1]; \
- scanline_func (dst, mask, \
- buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, FALSE); \
- } \
- } \
- else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \
- { \
- src_type_t *src1, *src2; \
- src_type_t buf1[2]; \
- src_type_t buf2[2]; \
- /* handle top/bottom zero padding by just setting weights to 0 if needed */ \
- if (y1 < 0) \
- { \
- weight1 = 0; \
- y1 = 0; \
- } \
- if (y1 >= src_image->bits.height) \
- { \
- weight1 = 0; \
- y1 = src_image->bits.height - 1; \
- } \
- if (y2 < 0) \
- { \
- weight2 = 0; \
- y2 = 0; \
- } \
- if (y2 >= src_image->bits.height) \
- { \
- weight2 = 0; \
- y2 = src_image->bits.height - 1; \
- } \
- src1 = src_first_line + src_stride * y1; \
- src2 = src_first_line + src_stride * y2; \
- \
- if (left_pad > 0) \
- { \
- buf1[0] = buf1[1] = 0; \
- buf2[0] = buf2[1] = 0; \
- scanline_func (dst, mask, \
- buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, TRUE); \
- dst += left_pad; \
- if (have_mask && !mask_is_solid) \
- mask += left_pad; \
- } \
- if (left_tz > 0) \
- { \
- buf1[0] = 0; \
- buf1[1] = src1[0]; \
- buf2[0] = 0; \
- buf2[1] = src2[0]; \
- scanline_func (dst, mask, \
- buf1, buf2, left_tz, weight1, weight2, \
- pixman_fixed_frac (vx), unit_x, 0, FALSE); \
- dst += left_tz; \
- if (have_mask && !mask_is_solid) \
- mask += left_tz; \
- vx += left_tz * unit_x; \
- } \
- if (width > 0) \
- { \
- scanline_func (dst, mask, \
- src1, src2, width, weight1, weight2, vx, unit_x, 0, FALSE); \
- dst += width; \
- if (have_mask && !mask_is_solid) \
- mask += width; \
- vx += width * unit_x; \
- } \
- if (right_tz > 0) \
- { \
- buf1[0] = src1[src_image->bits.width - 1]; \
- buf1[1] = 0; \
- buf2[0] = src2[src_image->bits.width - 1]; \
- buf2[1] = 0; \
- scanline_func (dst, mask, \
- buf1, buf2, right_tz, weight1, weight2, \
- pixman_fixed_frac (vx), unit_x, 0, FALSE); \
- dst += right_tz; \
- if (have_mask && !mask_is_solid) \
- mask += right_tz; \
- } \
- if (right_pad > 0) \
- { \
- buf1[0] = buf1[1] = 0; \
- buf2[0] = buf2[1] = 0; \
- scanline_func (dst, mask, \
- buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, TRUE); \
- } \
- } \
- else \
- { \
- scanline_func (dst, mask, src_first_line + src_stride * y1, \
- src_first_line + src_stride * y2, width, \
- weight1, weight2, vx, unit_x, max_vx, FALSE); \
- } \
- } \
-}
-
-/* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */
-#define FAST_BILINEAR_MAINLOOP_COMMON(scale_func_name, scanline_func, src_type_t, mask_type_t, \
- dst_type_t, repeat_mode, have_mask, mask_is_solid) \
- FAST_BILINEAR_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, mask_type_t,\
- dst_type_t, repeat_mode, have_mask, mask_is_solid)
-
-#define SCALED_BILINEAR_FLAGS \
- (FAST_PATH_SCALE_TRANSFORM | \
- FAST_PATH_NO_ALPHA_MAP | \
- FAST_PATH_BILINEAR_FILTER | \
- FAST_PATH_NO_ACCESSORS | \
- FAST_PATH_NARROW_FORMAT)
-
-#define SIMPLE_BILINEAR_FAST_PATH_PAD(op,s,d,func) \
- { PIXMAN_OP_ ## op, \
- PIXMAN_ ## s, \
- (SCALED_BILINEAR_FLAGS | \
- FAST_PATH_PAD_REPEAT | \
- FAST_PATH_X_UNIT_POSITIVE), \
- PIXMAN_null, 0, \
- PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
- fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op, \
- }
-
-#define SIMPLE_BILINEAR_FAST_PATH_NONE(op,s,d,func) \
- { PIXMAN_OP_ ## op, \
- PIXMAN_ ## s, \
- (SCALED_BILINEAR_FLAGS | \
- FAST_PATH_NONE_REPEAT | \
- FAST_PATH_X_UNIT_POSITIVE), \
- PIXMAN_null, 0, \
- PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
- fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op, \
- }
-
-#define SIMPLE_BILINEAR_FAST_PATH_COVER(op,s,d,func) \
- { PIXMAN_OP_ ## op, \
- PIXMAN_ ## s, \
- SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \
- PIXMAN_null, 0, \
- PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
- fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op, \
- }
-
-#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_PAD(op,s,d,func) \
- { PIXMAN_OP_ ## op, \
- PIXMAN_ ## s, \
- (SCALED_BILINEAR_FLAGS | \
- FAST_PATH_PAD_REPEAT | \
- FAST_PATH_X_UNIT_POSITIVE), \
- PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
- PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
- fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op, \
- }
-
-#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NONE(op,s,d,func) \
- { PIXMAN_OP_ ## op, \
- PIXMAN_ ## s, \
- (SCALED_BILINEAR_FLAGS | \
- FAST_PATH_NONE_REPEAT | \
- FAST_PATH_X_UNIT_POSITIVE), \
- PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
- PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
- fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op, \
- }
-
-#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_COVER(op,s,d,func) \
- { PIXMAN_OP_ ## op, \
- PIXMAN_ ## s, \
- SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \
- PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
- PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
- fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op, \
- }
-
-#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_PAD(op,s,d,func) \
- { PIXMAN_OP_ ## op, \
- PIXMAN_ ## s, \
- (SCALED_BILINEAR_FLAGS | \
- FAST_PATH_PAD_REPEAT | \
- FAST_PATH_X_UNIT_POSITIVE), \
- PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
- PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
- fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op, \
- }
-
-#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NONE(op,s,d,func) \
- { PIXMAN_OP_ ## op, \
- PIXMAN_ ## s, \
- (SCALED_BILINEAR_FLAGS | \
- FAST_PATH_NONE_REPEAT | \
- FAST_PATH_X_UNIT_POSITIVE), \
- PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
- PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
- fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op, \
- }
-
-#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_COVER(op,s,d,func) \
- { PIXMAN_OP_ ## op, \
- PIXMAN_ ## s, \
- SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \
- PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
- PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
- fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op, \
- }
-
-/* Prefer the use of 'cover' variant, because it is faster */
-#define SIMPLE_BILINEAR_FAST_PATH(op,s,d,func) \
- SIMPLE_BILINEAR_FAST_PATH_COVER (op,s,d,func), \
- SIMPLE_BILINEAR_FAST_PATH_NONE (op,s,d,func), \
- SIMPLE_BILINEAR_FAST_PATH_PAD (op,s,d,func)
-
-#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH(op,s,d,func) \
- SIMPLE_BILINEAR_A8_MASK_FAST_PATH_COVER (op,s,d,func), \
- SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NONE (op,s,d,func), \
- SIMPLE_BILINEAR_A8_MASK_FAST_PATH_PAD (op,s,d,func)
-
-#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH(op,s,d,func) \
- SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_COVER (op,s,d,func), \
- SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NONE (op,s,d,func), \
- SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_PAD (op,s,d,func)
-
-#endif
diff --git a/gfx/cairo/libpixman/src/pixman-filter.c b/gfx/cairo/libpixman/src/pixman-filter.c
index 5ff7b6eaad..5f3b752f9b 100644
--- a/gfx/cairo/libpixman/src/pixman-filter.c
+++ b/gfx/cairo/libpixman/src/pixman-filter.c
@@ -109,14 +109,16 @@ general_cubic (double x, double B, double C)
if (ax < 1)
{
- return ((12 - 9 * B - 6 * C) * ax * ax * ax +
- (-18 + 12 * B + 6 * C) * ax * ax + (6 - 2 * B)) / 6;
+ return (((12 - 9 * B - 6 * C) * ax +
+ (-18 + 12 * B + 6 * C)) * ax * ax +
+ (6 - 2 * B)) / 6;
}
- else if (ax >= 1 && ax < 2)
+ else if (ax < 2)
{
- return ((-B - 6 * C) * ax * ax * ax +
- (6 * B + 30 * C) * ax * ax + (-12 * B - 48 * C) *
- ax + (8 * B + 24 * C)) / 6;
+ return ((((-B - 6 * C) * ax +
+ (6 * B + 30 * C)) * ax +
+ (-12 * B - 48 * C)) * ax +
+ (8 * B + 24 * C)) / 6;
}
else
{
@@ -141,7 +143,7 @@ static const filter_info_t filters[] =
{ PIXMAN_KERNEL_BOX, box_kernel, 1.0 },
{ PIXMAN_KERNEL_LINEAR, linear_kernel, 2.0 },
{ PIXMAN_KERNEL_CUBIC, cubic_kernel, 4.0 },
- { PIXMAN_KERNEL_GAUSSIAN, gaussian_kernel, 6 * SIGMA },
+ { PIXMAN_KERNEL_GAUSSIAN, gaussian_kernel, 5.0 },
{ PIXMAN_KERNEL_LANCZOS2, lanczos2_kernel, 4.0 },
{ PIXMAN_KERNEL_LANCZOS3, lanczos3_kernel, 6.0 },
{ PIXMAN_KERNEL_LANCZOS3_STRETCHED, nice_kernel, 8.0 },
@@ -160,18 +162,21 @@ integral (pixman_kernel_t kernel1, double x1,
pixman_kernel_t kernel2, double scale, double x2,
double width)
{
- /* If the integration interval crosses zero, break it into
- * two separate integrals. This ensures that filters such
- * as LINEAR that are not differentiable at 0 will still
- * integrate properly.
+ if (kernel1 == PIXMAN_KERNEL_BOX && kernel2 == PIXMAN_KERNEL_BOX)
+ {
+ return width;
+ }
+ /* The LINEAR filter is not differentiable at 0, so if the
+ * integration interval crosses zero, break it into two
+ * separate integrals.
*/
- if (x1 < 0 && x1 + width > 0)
+ else if (kernel1 == PIXMAN_KERNEL_LINEAR && x1 < 0 && x1 + width > 0)
{
return
integral (kernel1, x1, kernel2, scale, x2, - x1) +
integral (kernel1, 0, kernel2, scale, x2 - x1, width + x1);
}
- else if (x2 < 0 && x2 + width > 0)
+ else if (kernel2 == PIXMAN_KERNEL_LINEAR && x2 < 0 && x2 + width > 0)
{
return
integral (kernel1, x1, kernel2, scale, x2, - x2) +
@@ -189,13 +194,19 @@ integral (pixman_kernel_t kernel1, double x1,
}
else
{
- /* Integration via Simpson's rule */
-#define N_SEGMENTS 128
+ /* Integration via Simpson's rule
+ * See http://www.intmath.com/integration/6-simpsons-rule.php
+ * 12 segments (6 cubic approximations) seems to produce best
+ * result for lanczos3.linear, which was the combination that
+ * showed the most errors. This makes sense as the lanczos3
+ * filter is 6 wide.
+ */
+#define N_SEGMENTS 12
#define SAMPLE(a1, a2) \
(filters[kernel1].func ((a1)) * filters[kernel2].func ((a2) * scale))
double s = 0.0;
- double h = width / (double)N_SEGMENTS;
+ double h = width / N_SEGMENTS;
int i;
s = SAMPLE (x1, x2);
@@ -204,11 +215,14 @@ integral (pixman_kernel_t kernel1, double x1,
{
double a1 = x1 + h * i;
double a2 = x2 + h * i;
+ s += 4 * SAMPLE (a1, a2);
+ }
+ for (i = 2; i < N_SEGMENTS; i += 2)
+ {
+ double a1 = x1 + h * i;
+ double a2 = x2 + h * i;
s += 2 * SAMPLE (a1, a2);
-
- if (i >= 2 && i < N_SEGMENTS - 1)
- s += 4 * SAMPLE (a1, a2);
}
s += SAMPLE (x1 + width, x2 + width);
@@ -217,25 +231,17 @@ integral (pixman_kernel_t kernel1, double x1,
}
}
-static pixman_fixed_t *
-create_1d_filter (int *width,
+static void
+create_1d_filter (int width,
pixman_kernel_t reconstruct,
pixman_kernel_t sample,
double scale,
- int n_phases)
+ int n_phases,
+ pixman_fixed_t *p)
{
- pixman_fixed_t *params, *p;
double step;
- double size;
int i;
- size = scale * filters[sample].width + filters[reconstruct].width;
- *width = ceil (size);
-
- p = params = malloc (*width * n_phases * sizeof (pixman_fixed_t));
- if (!params)
- return NULL;
-
step = 1.0 / n_phases;
for (i = 0; i < n_phases; ++i)
@@ -243,15 +249,15 @@ create_1d_filter (int *width,
double frac = step / 2.0 + i * step;
pixman_fixed_t new_total;
int x, x1, x2;
- double total;
+ double total, e;
/* Sample convolution of reconstruction and sampling
* filter. See rounding.txt regarding the rounding
* and sample positions.
*/
- x1 = ceil (frac - *width / 2.0 - 0.5);
- x2 = x1 + *width;
+ x1 = ceil (frac - width / 2.0 - 0.5);
+ x2 = x1 + width;
total = 0;
for (x = x1; x < x2; ++x)
@@ -274,29 +280,154 @@ create_1d_filter (int *width,
ihigh - ilow);
}
- total += c;
- *p++ = (pixman_fixed_t)(c * 65535.0 + 0.5);
+ *p = (pixman_fixed_t)floor (c * 65536.0 + 0.5);
+ total += *p;
+ p++;
}
- /* Normalize */
- p -= *width;
- total = 1 / total;
+ /* Normalize, with error diffusion */
+ p -= width;
+ total = 65536.0 / total;
new_total = 0;
+ e = 0.0;
for (x = x1; x < x2; ++x)
{
- pixman_fixed_t t = (*p) * total + 0.5;
+ double v = (*p) * total + e;
+ pixman_fixed_t t = floor (v + 0.5);
+ e = v - t;
new_total += t;
*p++ = t;
}
- if (new_total != pixman_fixed_1)
- *(p - *width / 2) += (pixman_fixed_1 - new_total);
+ /* pixman_fixed_e's worth of error may remain; put it
+ * at the first sample, since that is the only one that
+ * hasn't had any error diffused into it.
+ */
+ *(p - width) += pixman_fixed_1 - new_total;
}
+}
- return params;
+
+static int
+filter_width (pixman_kernel_t reconstruct, pixman_kernel_t sample, double size)
+{
+ return ceil (filters[reconstruct].width + size * filters[sample].width);
+}
+
+#ifdef PIXMAN_GNUPLOT
+
+/* If enable-gnuplot is configured, then you can pipe the output of a
+ * pixman-using program to gnuplot and get a continuously-updated plot
+ * of the horizontal filter. This works well with demos/scale to test
+ * the filter generation.
+ *
+ * The plot is all the different subposition filters shuffled
+ * together. This is misleading in a few cases:
+ *
+ * IMPULSE.BOX - goes up and down as the subfilters have different
+ * numbers of non-zero samples
+ * IMPULSE.TRIANGLE - somewhat crooked for the same reason
+ * 1-wide filters - looks triangular, but a 1-wide box would be more
+ * accurate
+ */
+static void
+gnuplot_filter (int width, int n_phases, const pixman_fixed_t* p)
+{
+ double step;
+ int i, j;
+ int first;
+
+ step = 1.0 / n_phases;
+
+ printf ("set style line 1 lc rgb '#0060ad' lt 1 lw 0.5 pt 7 pi 1 ps 0.5\n");
+ printf ("plot [x=%g:%g] '-' with linespoints ls 1\n", -width*0.5, width*0.5);
+ /* Print a point at the origin so that y==0 line is included: */
+ printf ("0 0\n\n");
+
+ /* The position of the first sample of the phase corresponding to
+ * frac is given by:
+ *
+ * ceil (frac - width / 2.0 - 0.5) + 0.5 - frac
+ *
+ * We have to find the frac that minimizes this expression.
+ *
+ * For odd widths, we have
+ *
+ * ceil (frac - width / 2.0 - 0.5) + 0.5 - frac
+ * = ceil (frac) + K - frac
+ * = 1 + K - frac
+ *
+ * for some K, so this is minimized when frac is maximized and
+ * strictly growing with frac. So for odd widths, we can simply
+ * start at the last phase and go backwards.
+ *
+ * For even widths, we have
+ *
+ * ceil (frac - width / 2.0 - 0.5) + 0.5 - frac
+ * = ceil (frac - 0.5) + K - frac
+ *
+ * The graph for this function (ignoring K) looks like this:
+ *
+ * 0.5
+ * | |\
+ * | | \
+ * | | \
+ * 0 | | \
+ * |\ |
+ * | \ |
+ * | \ |
+ * -0.5 | \|
+ * ---------------------------------
+ * 0 0.5 1
+ *
+ * So in this case we need to start with the phase whose frac is
+ * less than, but as close as possible to 0.5, then go backwards
+ * until we hit the first phase, then wrap around to the last
+ * phase and continue backwards.
+ *
+ * Which phase is as close as possible 0.5? The locations of the
+ * sampling point corresponding to the kth phase is given by
+ * 1/(2 * n_phases) + k / n_phases:
+ *
+ * 1/(2 * n_phases) + k / n_phases = 0.5
+ *
+ * from which it follows that
+ *
+ * k = (n_phases - 1) / 2
+ *
+ * rounded down is the phase in question.
+ */
+ if (width & 1)
+ first = n_phases - 1;
+ else
+ first = (n_phases - 1) / 2;
+
+ for (j = 0; j < width; ++j)
+ {
+ for (i = 0; i < n_phases; ++i)
+ {
+ int phase = first - i;
+ double frac, pos;
+
+ if (phase < 0)
+ phase = n_phases + phase;
+
+ frac = step / 2.0 + phase * step;
+ pos = ceil (frac - width / 2.0 - 0.5) + 0.5 - frac + j;
+
+ printf ("%g %g\n",
+ pos,
+ pixman_fixed_to_double (*(p + phase * width + j)));
+ }
+ }
+
+ printf ("e\n");
+ fflush (stdout);
}
+#endif
+
/* Create the parameter list for a SEPARABLE_CONVOLUTION filter
* with the given kernels and scale parameters
*/
@@ -313,38 +444,35 @@ pixman_filter_create_separable_convolution (int *n_values,
{
double sx = fabs (pixman_fixed_to_double (scale_x));
double sy = fabs (pixman_fixed_to_double (scale_y));
- pixman_fixed_t *horz = NULL, *vert = NULL, *params = NULL;
+ pixman_fixed_t *params;
int subsample_x, subsample_y;
int width, height;
+ width = filter_width (reconstruct_x, sample_x, sx);
subsample_x = (1 << subsample_bits_x);
- subsample_y = (1 << subsample_bits_y);
- horz = create_1d_filter (&width, reconstruct_x, sample_x, sx, subsample_x);
- vert = create_1d_filter (&height, reconstruct_y, sample_y, sy, subsample_y);
+ height = filter_width (reconstruct_y, sample_y, sy);
+ subsample_y = (1 << subsample_bits_y);
- if (!horz || !vert)
- goto out;
-
*n_values = 4 + width * subsample_x + height * subsample_y;
params = malloc (*n_values * sizeof (pixman_fixed_t));
if (!params)
- goto out;
+ return NULL;
params[0] = pixman_int_to_fixed (width);
params[1] = pixman_int_to_fixed (height);
params[2] = pixman_int_to_fixed (subsample_bits_x);
params[3] = pixman_int_to_fixed (subsample_bits_y);
- memcpy (params + 4, horz,
- width * subsample_x * sizeof (pixman_fixed_t));
- memcpy (params + 4 + width * subsample_x, vert,
- height * subsample_y * sizeof (pixman_fixed_t));
+ create_1d_filter (width, reconstruct_x, sample_x, sx, subsample_x,
+ params + 4);
+ create_1d_filter (height, reconstruct_y, sample_y, sy, subsample_y,
+ params + 4 + width * subsample_x);
-out:
- free (horz);
- free (vert);
+#ifdef PIXMAN_GNUPLOT
+ gnuplot_filter(width, subsample_x, params + 4);
+#endif
return params;
}
diff --git a/gfx/cairo/libpixman/src/pixman-general.c b/gfx/cairo/libpixman/src/pixman-general.c
index 2a551e3a5e..7e5a0d09cc 100644
--- a/gfx/cairo/libpixman/src/pixman-general.c
+++ b/gfx/cairo/libpixman/src/pixman-general.c
@@ -37,43 +37,47 @@
#include <string.h>
#include "pixman-private.h"
-static pixman_bool_t
-general_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
+static void
+general_iter_init (pixman_iter_t *iter, const pixman_iter_info_t *info)
{
pixman_image_t *image = iter->image;
- if (image->type == LINEAR)
- _pixman_linear_gradient_iter_init (image, iter);
- else if (image->type == RADIAL)
+ switch (image->type)
+ {
+ case BITS:
+ if ((iter->iter_flags & ITER_SRC) == ITER_SRC)
+ _pixman_bits_image_src_iter_init (image, iter);
+ else
+ _pixman_bits_image_dest_iter_init (image, iter);
+ break;
+
+ case LINEAR:
+ _pixman_linear_gradient_iter_init (image, iter);
+ break;
+
+ case RADIAL:
_pixman_radial_gradient_iter_init (image, iter);
- else if (image->type == CONICAL)
+ break;
+
+ case CONICAL:
_pixman_conical_gradient_iter_init (image, iter);
- else if (image->type == BITS)
- _pixman_bits_image_src_iter_init (image, iter);
- else if (image->type == SOLID)
+ break;
+
+ case SOLID:
_pixman_log_error (FUNC, "Solid image not handled by noop");
- else
- _pixman_log_error (FUNC, "Pixman bug: unknown image type\n");
+ break;
- return TRUE;
+ default:
+ _pixman_log_error (FUNC, "Pixman bug: unknown image type\n");
+ break;
+ }
}
-static pixman_bool_t
-general_dest_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
+static const pixman_iter_info_t general_iters[] =
{
- if (iter->image->type == BITS)
- {
- _pixman_bits_image_dest_iter_init (iter->image, iter);
-
- return TRUE;
- }
- else
- {
- _pixman_log_error (FUNC, "Trying to write to a non-writable image");
-
- return FALSE;
- }
-}
+ { PIXMAN_any, 0, 0, general_iter_init, NULL, NULL },
+ { PIXMAN_null },
+};
typedef struct op_info_t op_info_t;
struct op_info_t
@@ -105,62 +109,75 @@ static const op_info_t op_flags[PIXMAN_N_OPERATORS] =
#define SCANLINE_BUFFER_LENGTH 8192
+static pixman_bool_t
+operator_needs_division (pixman_op_t op)
+{
+ static const uint8_t needs_division[] =
+ {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, /* SATURATE */
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, /* DISJOINT */
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, /* CONJOINT */
+ 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, /* blend ops */
+ };
+
+ return needs_division[op];
+}
+
static void
general_composite_rect (pixman_implementation_t *imp,
pixman_composite_info_t *info)
{
PIXMAN_COMPOSITE_ARGS (info);
- uint64_t stack_scanline_buffer[(SCANLINE_BUFFER_LENGTH * 3 + 7) / 8];
+ uint8_t stack_scanline_buffer[3 * SCANLINE_BUFFER_LENGTH];
uint8_t *scanline_buffer = (uint8_t *) stack_scanline_buffer;
uint8_t *src_buffer, *mask_buffer, *dest_buffer;
pixman_iter_t src_iter, mask_iter, dest_iter;
pixman_combine_32_func_t compose;
pixman_bool_t component_alpha;
- iter_flags_t narrow, src_iter_flags;
- iter_flags_t rgb16;
+ iter_flags_t width_flag, src_iter_flags;
int Bpp;
int i;
- if ((src_image->common.flags & FAST_PATH_NARROW_FORMAT) &&
- (!mask_image || mask_image->common.flags & FAST_PATH_NARROW_FORMAT) &&
- (dest_image->common.flags & FAST_PATH_NARROW_FORMAT))
+ if ((src_image->common.flags & FAST_PATH_NARROW_FORMAT) &&
+ (!mask_image || mask_image->common.flags & FAST_PATH_NARROW_FORMAT) &&
+ (dest_image->common.flags & FAST_PATH_NARROW_FORMAT) &&
+ !(operator_needs_division (op)) &&
+ (dest_image->bits.dither == PIXMAN_DITHER_NONE))
{
- narrow = ITER_NARROW;
+ width_flag = ITER_NARROW;
Bpp = 4;
}
else
{
- narrow = 0;
+ width_flag = ITER_WIDE;
Bpp = 16;
}
- // XXX: This special casing is bad. Ideally, we'd keep the general code general perhaps
- // by having it deal more specifically with different intermediate formats
- if (
- (dest_image->common.flags & FAST_PATH_16_FORMAT && (src_image->type == LINEAR || src_image->type == RADIAL)) &&
- ( op == PIXMAN_OP_SRC ||
- (op == PIXMAN_OP_OVER && (src_image->common.flags & FAST_PATH_IS_OPAQUE))
- )
- ) {
- rgb16 = ITER_16;
- } else {
- rgb16 = 0;
- }
+#define ALIGN(addr) \
+ ((uint8_t *)((((uintptr_t)(addr)) + 15) & (~15)))
+ if (width <= 0 || _pixman_multiply_overflows_int (width, Bpp * 3))
+ return;
- if (width * Bpp > SCANLINE_BUFFER_LENGTH)
+ if (width * Bpp * 3 > sizeof (stack_scanline_buffer) - 15 * 3)
{
- scanline_buffer = pixman_malloc_abc (width, 3, Bpp);
+ scanline_buffer = pixman_malloc_ab_plus_c (width, Bpp * 3, 15 * 3);
if (!scanline_buffer)
return;
+
+ memset (scanline_buffer, 0, width * Bpp * 3 + 15 * 3);
+ }
+ else
+ {
+ memset (stack_scanline_buffer, 0, sizeof (stack_scanline_buffer));
}
- src_buffer = scanline_buffer;
- mask_buffer = src_buffer + width * Bpp;
- dest_buffer = mask_buffer + width * Bpp;
+ src_buffer = ALIGN (scanline_buffer);
+ mask_buffer = ALIGN (src_buffer + width * Bpp);
+ dest_buffer = ALIGN (mask_buffer + width * Bpp);
- if (!narrow)
+ if (width_flag == ITER_WIDE)
{
/* To make sure there aren't any NANs in the buffers */
memset (src_buffer, 0, width * Bpp);
@@ -169,11 +186,12 @@ general_composite_rect (pixman_implementation_t *imp,
}
/* src iter */
- src_iter_flags = narrow | op_flags[op].src | rgb16;
+ src_iter_flags = width_flag | op_flags[op].src | ITER_SRC;
- _pixman_implementation_src_iter_init (imp->toplevel, &src_iter, src_image,
- src_x, src_y, width, height,
- src_buffer, src_iter_flags, info->src_flags);
+ _pixman_implementation_iter_init (imp->toplevel, &src_iter, src_image,
+ src_x, src_y, width, height,
+ src_buffer, src_iter_flags,
+ info->src_flags);
/* mask iter */
if ((src_iter_flags & (ITER_IGNORE_ALPHA | ITER_IGNORE_RGB)) ==
@@ -185,23 +203,21 @@ general_composite_rect (pixman_implementation_t *imp,
mask_image = NULL;
}
- component_alpha =
- mask_image &&
- mask_image->common.type == BITS &&
- mask_image->common.component_alpha &&
- PIXMAN_FORMAT_RGB (mask_image->bits.format);
+ component_alpha = mask_image && mask_image->common.component_alpha;
- _pixman_implementation_src_iter_init (
- imp->toplevel, &mask_iter, mask_image, mask_x, mask_y, width, height,
- mask_buffer, narrow | (component_alpha? 0 : ITER_IGNORE_RGB), info->mask_flags);
+ _pixman_implementation_iter_init (
+ imp->toplevel, &mask_iter,
+ mask_image, mask_x, mask_y, width, height, mask_buffer,
+ ITER_SRC | width_flag | (component_alpha? 0 : ITER_IGNORE_RGB),
+ info->mask_flags);
/* dest iter */
- _pixman_implementation_dest_iter_init (
+ _pixman_implementation_iter_init (
imp->toplevel, &dest_iter, dest_image, dest_x, dest_y, width, height,
- dest_buffer, narrow | op_flags[op].dst | rgb16, info->dest_flags);
+ dest_buffer, ITER_DEST | width_flag | op_flags[op].dst, info->dest_flags);
compose = _pixman_implementation_lookup_combiner (
- imp->toplevel, op, component_alpha, narrow, !!rgb16);
+ imp->toplevel, op, component_alpha, width_flag != ITER_WIDE);
for (i = 0; i < height; ++i)
{
@@ -216,6 +232,13 @@ general_composite_rect (pixman_implementation_t *imp,
dest_iter.write_back (&dest_iter);
}
+ if (src_iter.fini)
+ src_iter.fini (&src_iter);
+ if (mask_iter.fini)
+ mask_iter.fini (&mask_iter);
+ if (dest_iter.fini)
+ dest_iter.fini (&dest_iter);
+
if (scanline_buffer != (uint8_t *) stack_scanline_buffer)
free (scanline_buffer);
}
@@ -231,12 +254,10 @@ _pixman_implementation_create_general (void)
{
pixman_implementation_t *imp = _pixman_implementation_create (NULL, general_fast_path);
- _pixman_setup_combiner_functions_16 (imp);
_pixman_setup_combiner_functions_32 (imp);
_pixman_setup_combiner_functions_float (imp);
- imp->src_iter_init = general_src_iter_init;
- imp->dest_iter_init = general_dest_iter_init;
+ imp->iter_info = general_iters;
return imp;
}
diff --git a/gfx/cairo/libpixman/src/pixman-glyph.c b/gfx/cairo/libpixman/src/pixman-glyph.c
index 5a271b64b8..96a349ab47 100644
--- a/gfx/cairo/libpixman/src/pixman-glyph.c
+++ b/gfx/cairo/libpixman/src/pixman-glyph.c
@@ -391,6 +391,9 @@ box32_intersect (pixman_box32_t *dest,
return dest->x2 > dest->x1 && dest->y2 > dest->y1;
}
+#if defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__)
+__attribute__((__force_align_arg_pointer__))
+#endif
PIXMAN_EXPORT void
pixman_composite_glyphs_no_mask (pixman_op_t op,
pixman_image_t *src,
@@ -630,6 +633,9 @@ out:
* - Trim the mask to the destination clip/image?
* - Trim composite region based on sources, when the op ignores 0s.
*/
+#if defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__)
+__attribute__((__force_align_arg_pointer__))
+#endif
PIXMAN_EXPORT void
pixman_composite_glyphs (pixman_op_t op,
pixman_image_t *src,
diff --git a/gfx/cairo/libpixman/src/pixman-gradient-walker.c b/gfx/cairo/libpixman/src/pixman-gradient-walker.c
index e7e724fa6c..fb7f401dac 100644
--- a/gfx/cairo/libpixman/src/pixman-gradient-walker.c
+++ b/gfx/cairo/libpixman/src/pixman-gradient-walker.c
@@ -37,11 +37,14 @@ _pixman_gradient_walker_init (pixman_gradient_walker_t *walker,
walker->stops = gradient->stops;
walker->left_x = 0;
walker->right_x = 0x10000;
- walker->stepper = 0;
- walker->left_ag = 0;
- walker->left_rb = 0;
- walker->right_ag = 0;
- walker->right_rb = 0;
+ walker->a_s = 0.0f;
+ walker->a_b = 0.0f;
+ walker->r_s = 0.0f;
+ walker->r_b = 0.0f;
+ walker->g_s = 0.0f;
+ walker->g_b = 0.0f;
+ walker->b_s = 0.0f;
+ walker->b_b = 0.0f;
walker->repeat = repeat;
walker->need_reset = TRUE;
@@ -51,10 +54,13 @@ static void
gradient_walker_reset (pixman_gradient_walker_t *walker,
pixman_fixed_48_16_t pos)
{
- int32_t x, left_x, right_x;
+ int64_t x, left_x, right_x;
pixman_color_t *left_c, *right_c;
int n, count = walker->num_stops;
pixman_gradient_stop_t *stops = walker->stops;
+ float la, lr, lg, lb;
+ float ra, rr, rg, rb;
+ float lx, rx;
if (walker->repeat == PIXMAN_REPEAT_NORMAL)
{
@@ -116,57 +122,143 @@ gradient_walker_reset (pixman_gradient_walker_t *walker,
left_c = right_c;
}
- walker->left_x = left_x;
- walker->right_x = right_x;
- walker->left_ag = ((left_c->alpha >> 8) << 16) | (left_c->green >> 8);
- walker->left_rb = ((left_c->red & 0xff00) << 8) | (left_c->blue >> 8);
- walker->right_ag = ((right_c->alpha >> 8) << 16) | (right_c->green >> 8);
- walker->right_rb = ((right_c->red & 0xff00) << 8) | (right_c->blue >> 8);
+ /* The alpha/red/green/blue channels are scaled to be in [0, 1].
+ * This ensures that after premultiplication all channels will
+ * be in the [0, 1] interval.
+ */
+ la = (left_c->alpha * (1.0f/257.0f));
+ lr = (left_c->red * (1.0f/257.0f));
+ lg = (left_c->green * (1.0f/257.0f));
+ lb = (left_c->blue * (1.0f/257.0f));
- if (walker->left_x == walker->right_x ||
- (walker->left_ag == walker->right_ag &&
- walker->left_rb == walker->right_rb))
+ ra = (right_c->alpha * (1.0f/257.0f));
+ rr = (right_c->red * (1.0f/257.0f));
+ rg = (right_c->green * (1.0f/257.0f));
+ rb = (right_c->blue * (1.0f/257.0f));
+
+ lx = left_x * (1.0f/65536.0f);
+ rx = right_x * (1.0f/65536.0f);
+
+ if (FLOAT_IS_ZERO (rx - lx) || left_x == INT32_MIN || right_x == INT32_MAX)
{
- walker->stepper = 0;
+ walker->a_s = walker->r_s = walker->g_s = walker->b_s = 0.0f;
+ walker->a_b = (la + ra) / 510.0f;
+ walker->r_b = (lr + rr) / 510.0f;
+ walker->g_b = (lg + rg) / 510.0f;
+ walker->b_b = (lb + rb) / 510.0f;
}
else
{
- int32_t width = right_x - left_x;
- walker->stepper = ((1 << 24) + width / 2) / width;
+ float w_rec = 1.0f / (rx - lx);
+
+ walker->a_b = (la * rx - ra * lx) * w_rec * (1.0f/255.0f);
+ walker->r_b = (lr * rx - rr * lx) * w_rec * (1.0f/255.0f);
+ walker->g_b = (lg * rx - rg * lx) * w_rec * (1.0f/255.0f);
+ walker->b_b = (lb * rx - rb * lx) * w_rec * (1.0f/255.0f);
+
+ walker->a_s = (ra - la) * w_rec * (1.0f/255.0f);
+ walker->r_s = (rr - lr) * w_rec * (1.0f/255.0f);
+ walker->g_s = (rg - lg) * w_rec * (1.0f/255.0f);
+ walker->b_s = (rb - lb) * w_rec * (1.0f/255.0f);
}
+
+ walker->left_x = left_x;
+ walker->right_x = right_x;
walker->need_reset = FALSE;
}
-uint32_t
-_pixman_gradient_walker_pixel (pixman_gradient_walker_t *walker,
- pixman_fixed_48_16_t x)
+static argb_t
+pixman_gradient_walker_pixel_float (pixman_gradient_walker_t *walker,
+ pixman_fixed_48_16_t x)
+{
+ argb_t f;
+ float y;
+
+ if (walker->need_reset || x < walker->left_x || x >= walker->right_x)
+ gradient_walker_reset (walker, x);
+
+ y = x * (1.0f / 65536.0f);
+
+ f.a = walker->a_s * y + walker->a_b;
+ f.r = f.a * (walker->r_s * y + walker->r_b);
+ f.g = f.a * (walker->g_s * y + walker->g_b);
+ f.b = f.a * (walker->b_s * y + walker->b_b);
+
+ return f;
+}
+
+static uint32_t
+pixman_gradient_walker_pixel_32 (pixman_gradient_walker_t *walker,
+ pixman_fixed_48_16_t x)
{
- int dist, idist;
- uint32_t t1, t2, a, color;
+ argb_t f;
+ float y;
if (walker->need_reset || x < walker->left_x || x >= walker->right_x)
gradient_walker_reset (walker, x);
- dist = ((int)(x - walker->left_x) * walker->stepper) >> 16;
- idist = 256 - dist;
+ y = x * (1.0f / 65536.0f);
- /* combined INTERPOLATE and premultiply */
- t1 = walker->left_rb * idist + walker->right_rb * dist;
- t1 = (t1 >> 8) & 0xff00ff;
+ /* Instead of [0...1] for ARGB, we want [0...255],
+ * multiply alpha with 255 and the color channels
+ * also get multiplied by the alpha multiplier.
+ *
+ * We don't use pixman_contract_from_float because it causes a 2x
+ * slowdown to do so, and the values are already normalized,
+ * so we don't have to worry about values < 0.f or > 1.f
+ */
+ f.a = 255.f * (walker->a_s * y + walker->a_b);
+ f.r = f.a * (walker->r_s * y + walker->r_b);
+ f.g = f.a * (walker->g_s * y + walker->g_b);
+ f.b = f.a * (walker->b_s * y + walker->b_b);
- t2 = walker->left_ag * idist + walker->right_ag * dist;
- t2 &= 0xff00ff00;
+ return (((uint32_t)(f.a + .5f) << 24) & 0xff000000) |
+ (((uint32_t)(f.r + .5f) << 16) & 0x00ff0000) |
+ (((uint32_t)(f.g + .5f) << 8) & 0x0000ff00) |
+ (((uint32_t)(f.b + .5f) >> 0) & 0x000000ff);
+}
- color = t2 & 0xff000000;
- a = t2 >> 24;
+void
+_pixman_gradient_walker_write_narrow (pixman_gradient_walker_t *walker,
+ pixman_fixed_48_16_t x,
+ uint32_t *buffer)
+{
+ *buffer = pixman_gradient_walker_pixel_32 (walker, x);
+}
- t1 = t1 * a + 0x800080;
- t1 = (t1 + ((t1 >> 8) & 0xff00ff)) >> 8;
+void
+_pixman_gradient_walker_write_wide (pixman_gradient_walker_t *walker,
+ pixman_fixed_48_16_t x,
+ uint32_t *buffer)
+{
+ *(argb_t *)buffer = pixman_gradient_walker_pixel_float (walker, x);
+}
- t2 = (t2 >> 8) * a + 0x800080;
- t2 = (t2 + ((t2 >> 8) & 0xff00ff));
+void
+_pixman_gradient_walker_fill_narrow (pixman_gradient_walker_t *walker,
+ pixman_fixed_48_16_t x,
+ uint32_t *buffer,
+ uint32_t *end)
+{
+ register uint32_t color;
- return (color | (t1 & 0xff00ff) | (t2 & 0xff00));
+ color = pixman_gradient_walker_pixel_32 (walker, x);
+ while (buffer < end)
+ *buffer++ = color;
}
+void
+_pixman_gradient_walker_fill_wide (pixman_gradient_walker_t *walker,
+ pixman_fixed_48_16_t x,
+ uint32_t *buffer,
+ uint32_t *end)
+{
+ register argb_t color;
+ argb_t *buffer_wide = (argb_t *)buffer;
+ argb_t *end_wide = (argb_t *)end;
+
+ color = pixman_gradient_walker_pixel_float (walker, x);
+ while (buffer_wide < end_wide)
+ *buffer_wide++ = color;
+}
diff --git a/gfx/cairo/libpixman/src/pixman-image.c b/gfx/cairo/libpixman/src/pixman-image.c
index f43e07f41d..db29ff5b4f 100644
--- a/gfx/cairo/libpixman/src/pixman-image.c
+++ b/gfx/cairo/libpixman/src/pixman-image.c
@@ -33,25 +33,6 @@
static const pixman_color_t transparent_black = { 0, 0, 0, 0 };
-/**
- ** bug 1293598 - clean up every pointer after free to avoid
- ** "dereferencing freed memory" problem
- **/
-#define PIXMAN_POSION
-
-static void
-free_memory (void** p)
-{
-#ifdef PIXMAN_POISON
- if (*p) {
-#endif
- free (*p);
-#ifdef PIXMAN_POISON
- *p = NULL;
- }
-#endif
-}
-
static void
gradient_property_changed (pixman_image_t *image)
{
@@ -164,8 +145,8 @@ _pixman_image_fini (pixman_image_t *image)
pixman_region32_fini (&common->clip_region);
- free_memory (&common->transform);
- free_memory (&common->filter_params);
+ free (common->transform);
+ free (common->filter_params);
if (common->alpha_map)
pixman_image_unref ((pixman_image_t *)common->alpha_map);
@@ -177,8 +158,7 @@ _pixman_image_fini (pixman_image_t *image)
if (image->gradient.stops)
{
/* See _pixman_init_gradient() for an explanation of the - 1 */
- void *addr = image->gradient.stops - 1;
- free_memory (&addr);
+ free (image->gradient.stops - 1);
}
/* This will trigger if someone adds a property_changed
@@ -189,11 +169,8 @@ _pixman_image_fini (pixman_image_t *image)
image->common.property_changed == gradient_property_changed);
}
- if (image->type == BITS && image->bits.free_me) {
- free_memory (&image->bits.free_me);
- image->bits.bits = NULL;
- }
-
+ if (image->type == BITS && image->bits.free_me)
+ free (image->bits.free_me);
return TRUE;
}
@@ -233,7 +210,7 @@ pixman_image_unref (pixman_image_t *image)
{
if (_pixman_image_fini (image))
{
- free_memory (&image);
+ free (image);
return TRUE;
}
@@ -358,37 +335,47 @@ compute_image_info (pixman_image_t *image)
{
flags |= FAST_PATH_NEAREST_FILTER;
}
- else if (
- /* affine and integer translation components in matrix ... */
- ((flags & FAST_PATH_AFFINE_TRANSFORM) &&
- !pixman_fixed_frac (image->common.transform->matrix[0][2] |
- image->common.transform->matrix[1][2])) &&
- (
- /* ... combined with a simple rotation */
- (flags & (FAST_PATH_ROTATE_90_TRANSFORM |
- FAST_PATH_ROTATE_180_TRANSFORM |
- FAST_PATH_ROTATE_270_TRANSFORM)) ||
- /* ... or combined with a simple non-rotated translation */
- (image->common.transform->matrix[0][0] == pixman_fixed_1 &&
- image->common.transform->matrix[1][1] == pixman_fixed_1 &&
- image->common.transform->matrix[0][1] == 0 &&
- image->common.transform->matrix[1][0] == 0)
- )
- )
+ else if (flags & FAST_PATH_AFFINE_TRANSFORM)
{
- /* FIXME: there are some affine-test failures, showing that
- * handling of BILINEAR and NEAREST filter is not quite
- * equivalent when getting close to 32K for the translation
- * components of the matrix. That's likely some bug, but for
- * now just skip BILINEAR->NEAREST optimization in this case.
+ /* Suppose the transform is
+ *
+ * [ t00, t01, t02 ]
+ * [ t10, t11, t12 ]
+ * [ 0, 0, 1 ]
+ *
+ * and the destination coordinates are (n + 0.5, m + 0.5). Then
+ * the transformed x coordinate is:
+ *
+ * tx = t00 * (n + 0.5) + t01 * (m + 0.5) + t02
+ * = t00 * n + t01 * m + t02 + (t00 + t01) * 0.5
+ *
+ * which implies that if t00, t01 and t02 are all integers
+ * and (t00 + t01) is odd, then tx will be an integer plus 0.5,
+ * which means a BILINEAR filter will reduce to NEAREST. The same
+ * applies in the y direction
*/
- pixman_fixed_t magic_limit = pixman_int_to_fixed (30000);
- if (image->common.transform->matrix[0][2] <= magic_limit &&
- image->common.transform->matrix[1][2] <= magic_limit &&
- image->common.transform->matrix[0][2] >= -magic_limit &&
- image->common.transform->matrix[1][2] >= -magic_limit)
+ pixman_fixed_t (*t)[3] = image->common.transform->matrix;
+
+ if ((pixman_fixed_frac (
+ t[0][0] | t[0][1] | t[0][2] |
+ t[1][0] | t[1][1] | t[1][2]) == 0) &&
+ (pixman_fixed_to_int (
+ (t[0][0] + t[0][1]) & (t[1][0] + t[1][1])) % 2) == 1)
{
- flags |= FAST_PATH_NEAREST_FILTER;
+ /* FIXME: there are some affine-test failures, showing that
+ * handling of BILINEAR and NEAREST filter is not quite
+ * equivalent when getting close to 32K for the translation
+ * components of the matrix. That's likely some bug, but for
+ * now just skip BILINEAR->NEAREST optimization in this case.
+ */
+ pixman_fixed_t magic_limit = pixman_int_to_fixed (30000);
+ if (image->common.transform->matrix[0][2] <= magic_limit &&
+ image->common.transform->matrix[1][2] <= magic_limit &&
+ image->common.transform->matrix[0][2] >= -magic_limit &&
+ image->common.transform->matrix[1][2] >= -magic_limit)
+ {
+ flags |= FAST_PATH_NEAREST_FILTER;
+ }
}
}
break;
@@ -483,10 +470,6 @@ compute_image_info (pixman_image_t *image)
if (PIXMAN_FORMAT_IS_WIDE (image->bits.format))
flags &= ~FAST_PATH_NARROW_FORMAT;
-
- if (image->bits.format == PIXMAN_r5g6b5)
- flags |= FAST_PATH_16_FORMAT;
-
break;
case RADIAL:
@@ -529,8 +512,10 @@ compute_image_info (pixman_image_t *image)
break;
}
- /* Alpha map */
- if (!image->common.alpha_map)
+ /* Alpha maps are only supported for BITS images, so it's always
+ * safe to ignore their presense for non-BITS images
+ */
+ if (!image->common.alpha_map || image->type != BITS)
{
flags |= FAST_PATH_NO_ALPHA_MAP;
}
@@ -699,6 +684,41 @@ pixman_image_set_repeat (pixman_image_t *image,
image_property_changed (image);
}
+PIXMAN_EXPORT void
+pixman_image_set_dither (pixman_image_t *image,
+ pixman_dither_t dither)
+{
+ if (image->type == BITS)
+ {
+ if (image->bits.dither == dither)
+ return;
+
+ image->bits.dither = dither;
+
+ image_property_changed (image);
+ }
+}
+
+PIXMAN_EXPORT void
+pixman_image_set_dither_offset (pixman_image_t *image,
+ int offset_x,
+ int offset_y)
+{
+ if (image->type == BITS)
+ {
+ if (image->bits.dither_offset_x == offset_x &&
+ image->bits.dither_offset_y == offset_y)
+ {
+ return;
+ }
+
+ image->bits.dither_offset_x = offset_x;
+ image->bits.dither_offset_y = offset_y;
+
+ image_property_changed (image);
+ }
+}
+
PIXMAN_EXPORT pixman_bool_t
pixman_image_set_filter (pixman_image_t * image,
pixman_filter_t filter,
@@ -857,6 +877,10 @@ pixman_image_set_accessors (pixman_image_t * image,
if (image->type == BITS)
{
+ /* Accessors only work for <= 32 bpp. */
+ if (PIXMAN_FORMAT_BPP(image->bits.format) > 32)
+ return_if_fail (!read_func && !write_func);
+
image->bits.read_func = read_func;
image->bits.write_func = write_func;
@@ -936,7 +960,7 @@ _pixman_image_get_solid (pixman_implementation_t *imp,
else if (image->bits.format == PIXMAN_x8r8g8b8)
result = image->bits.bits[0] | 0xff000000;
else if (image->bits.format == PIXMAN_a8)
- result = (*(uint8_t *)image->bits.bits) << 24;
+ result = (uint32_t)(*(uint8_t *)image->bits.bits) << 24;
else
goto otherwise;
}
@@ -945,12 +969,15 @@ _pixman_image_get_solid (pixman_implementation_t *imp,
pixman_iter_t iter;
otherwise:
- _pixman_implementation_src_iter_init (
+ _pixman_implementation_iter_init (
imp, &iter, image, 0, 0, 1, 1,
(uint8_t *)&result,
- ITER_NARROW, image->common.flags);
+ ITER_NARROW | ITER_SRC, image->common.flags);
result = *iter.get_scanline (&iter, NULL);
+
+ if (iter.fini)
+ iter.fini (&iter);
}
/* If necessary, convert RGB <--> BGR. */
diff --git a/gfx/cairo/libpixman/src/pixman-implementation.c b/gfx/cairo/libpixman/src/pixman-implementation.c
index 44d4097852..2c7de4c687 100644
--- a/gfx/cairo/libpixman/src/pixman-implementation.c
+++ b/gfx/cairo/libpixman/src/pixman-implementation.c
@@ -90,15 +90,7 @@ _pixman_implementation_lookup_composite (pixman_implementation_t *toplevel,
/* Check cache for fast paths */
cache = PIXMAN_GET_THREAD_LOCAL (fast_path_cache);
- /* Bug 1324130 - For compatibility with Windows XP, we have to use Tls
- * functions for the per-thread fast-path cache instead of the safer
- * __declspec(thread) mechanism. If the Tls functions fail to set up
- * the storage for some reason, cache will end up null here. As a
- * temporary workaround, just check that cache is not null before
- * using it. The implementation lookup will still function without the
- * fast-path cache, however, it will incur a slow linear search.
- */
- if (cache) for (i = 0; i < N_CACHED_FAST_PATHS; ++i)
+ for (i = 0; i < N_CACHED_FAST_PATHS; ++i)
{
const pixman_fast_path_t *info = &(cache->cache[i].fast_path);
@@ -158,12 +150,19 @@ _pixman_implementation_lookup_composite (pixman_implementation_t *toplevel,
}
/* We should never reach this point */
- _pixman_log_error (FUNC, "No known composite function\n");
+ _pixman_log_error (
+ FUNC,
+ "No composite function found\n"
+ "\n"
+ "The most likely cause of this is that this system has issues with\n"
+ "thread local storage\n");
+
*out_imp = NULL;
*out_func = dummy_composite_rect;
+ return;
update_cache:
- if (cache && i)
+ if (i)
{
while (i--)
cache->cache[i + 1] = cache->cache[i];
@@ -194,8 +193,7 @@ pixman_combine_32_func_t
_pixman_implementation_lookup_combiner (pixman_implementation_t *imp,
pixman_op_t op,
pixman_bool_t component_alpha,
- pixman_bool_t narrow,
- pixman_bool_t rgb16)
+ pixman_bool_t narrow)
{
while (imp)
{
@@ -219,8 +217,6 @@ _pixman_implementation_lookup_combiner (pixman_implementation_t *imp,
f = imp->combine_32_ca[op];
break;
}
- if (rgb16)
- f = (pixman_combine_32_func_t *)imp->combine_16[op];
if (f)
return f;
@@ -289,18 +285,26 @@ _pixman_implementation_fill (pixman_implementation_t *imp,
return FALSE;
}
-pixman_bool_t
-_pixman_implementation_src_iter_init (pixman_implementation_t *imp,
- pixman_iter_t *iter,
- pixman_image_t *image,
- int x,
- int y,
- int width,
- int height,
- uint8_t *buffer,
- iter_flags_t iter_flags,
- uint32_t image_flags)
+static uint32_t *
+get_scanline_null (pixman_iter_t *iter, const uint32_t *mask)
{
+ return NULL;
+}
+
+void
+_pixman_implementation_iter_init (pixman_implementation_t *imp,
+ pixman_iter_t *iter,
+ pixman_image_t *image,
+ int x,
+ int y,
+ int width,
+ int height,
+ uint8_t *buffer,
+ iter_flags_t iter_flags,
+ uint32_t image_flags)
+{
+ pixman_format_code_t format;
+
iter->image = image;
iter->buffer = (uint32_t *)buffer;
iter->x = x;
@@ -309,48 +313,40 @@ _pixman_implementation_src_iter_init (pixman_implementation_t *imp,
iter->height = height;
iter->iter_flags = iter_flags;
iter->image_flags = image_flags;
+ iter->fini = NULL;
- while (imp)
+ if (!iter->image)
{
- if (imp->src_iter_init && (*imp->src_iter_init) (imp, iter))
- return TRUE;
-
- imp = imp->fallback;
+ iter->get_scanline = get_scanline_null;
+ return;
}
- return FALSE;
-}
-
-pixman_bool_t
-_pixman_implementation_dest_iter_init (pixman_implementation_t *imp,
- pixman_iter_t *iter,
- pixman_image_t *image,
- int x,
- int y,
- int width,
- int height,
- uint8_t *buffer,
- iter_flags_t iter_flags,
- uint32_t image_flags)
-{
- iter->image = image;
- iter->buffer = (uint32_t *)buffer;
- iter->x = x;
- iter->y = y;
- iter->width = width;
- iter->height = height;
- iter->iter_flags = iter_flags;
- iter->image_flags = image_flags;
+ format = iter->image->common.extended_format_code;
while (imp)
{
- if (imp->dest_iter_init && (*imp->dest_iter_init) (imp, iter))
- return TRUE;
-
- imp = imp->fallback;
+ if (imp->iter_info)
+ {
+ const pixman_iter_info_t *info;
+
+ for (info = imp->iter_info; info->format != PIXMAN_null; ++info)
+ {
+ if ((info->format == PIXMAN_any || info->format == format) &&
+ (info->image_flags & image_flags) == info->image_flags &&
+ (info->iter_flags & iter_flags) == info->iter_flags)
+ {
+ iter->get_scanline = info->get_scanline;
+ iter->write_back = info->write_back;
+
+ if (info->initializer)
+ info->initializer (iter, info);
+ return;
+ }
+ }
+ }
+
+ imp = imp->fallback;
}
-
- return FALSE;
}
pixman_bool_t
@@ -384,6 +380,11 @@ _pixman_disabled (const char *name)
return FALSE;
}
+static const pixman_fast_path_t empty_fast_path[] =
+{
+ { PIXMAN_OP_NONE }
+};
+
pixman_implementation_t *
_pixman_choose_implementation (void)
{
@@ -401,5 +402,16 @@ _pixman_choose_implementation (void)
imp = _pixman_implementation_create_noop (imp);
+ if (_pixman_disabled ("wholeops"))
+ {
+ pixman_implementation_t *cur;
+
+ /* Disable all whole-operation paths except the general one,
+ * so that optimized iterators are used as much as possible.
+ */
+ for (cur = imp; cur->fallback; cur = cur->fallback)
+ cur->fast_paths = empty_fast_path;
+ }
+
return imp;
}
diff --git a/gfx/cairo/libpixman/src/pixman-inlines.h b/gfx/cairo/libpixman/src/pixman-inlines.h
index 6d78aa7cb0..f785910f80 100644
--- a/gfx/cairo/libpixman/src/pixman-inlines.h
+++ b/gfx/cairo/libpixman/src/pixman-inlines.h
@@ -173,34 +173,6 @@ bilinear_interpolation (uint32_t tl, uint32_t tr,
#else
-#ifdef LOW_QUALITY_INTERPOLATION
-/* Based on Filter_32_opaque_portable from Skia */
-static force_inline uint32_t
-bilinear_interpolation(uint32_t a00, uint32_t a01,
- uint32_t a10, uint32_t a11,
- int x, int y)
-{
- int xy = x * y;
- static const uint32_t mask = 0xff00ff;
-
- int scale = 256 - 16*y - 16*x + xy;
- uint32_t lo = (a00 & mask) * scale;
- uint32_t hi = ((a00 >> 8) & mask) * scale;
-
- scale = 16*x - xy;
- lo += (a01 & mask) * scale;
- hi += ((a01 >> 8) & mask) * scale;
-
- scale = 16*y - xy;
- lo += (a10 & mask) * scale;
- hi += ((a10 >> 8) & mask) * scale;
-
- lo += (a11 & mask) * xy;
- hi += ((a11 >> 8) & mask) * xy;
-
- return ((lo >> 8) & mask) | (hi & ~mask);
-}
-#else
static force_inline uint32_t
bilinear_interpolation (uint32_t tl, uint32_t tr,
uint32_t bl, uint32_t br,
@@ -246,10 +218,35 @@ bilinear_interpolation (uint32_t tl, uint32_t tr,
return r;
}
-#endif
+
#endif
#endif // BILINEAR_INTERPOLATION_BITS <= 4
+static force_inline argb_t
+bilinear_interpolation_float (argb_t tl, argb_t tr,
+ argb_t bl, argb_t br,
+ float distx, float disty)
+{
+ float distxy, distxiy, distixy, distixiy;
+ argb_t r;
+
+ distxy = distx * disty;
+ distxiy = distx * (1.f - disty);
+ distixy = (1.f - distx) * disty;
+ distixiy = (1.f - distx) * (1.f - disty);
+
+ r.a = tl.a * distixiy + tr.a * distxiy +
+ bl.a * distixy + br.a * distxy;
+ r.r = tl.r * distixiy + tr.r * distxiy +
+ bl.r * distixy + br.r * distxy;
+ r.g = tl.g * distixiy + tr.g * distxiy +
+ bl.g * distixy + br.g * distxy;
+ r.b = tl.b * distixiy + tr.b * distxiy +
+ bl.b * distixy + br.b * distxy;
+
+ return r;
+}
+
/*
* For each scanline fetched from source image with PAD repeat:
* - calculate how many pixels need to be padded on the left side
@@ -775,7 +772,8 @@ fast_composite_scaled_nearest ## scale_func_name (pixman_implementation_t *imp,
#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH(op,s,d,func) \
SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_COVER (op,s,d,func), \
SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE (op,s,d,func), \
- SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD (op,s,d,func)
+ SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD (op,s,d,func), \
+ SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL (op,s,d,func)
/*****************************************************************************/
@@ -852,38 +850,8 @@ bilinear_pad_repeat_get_scanline_bounds (int32_t source_image_width,
* with 8-bit SIMD multiplication instructions for 8-bit interpolation
* precision.
*/
-
-/* Replace a single "scanline_func" with "fetch_func" & "op_func" to allow optional
- * two stage processing (bilinear fetch to a temp buffer, followed by unscaled
- * combine), "op_func" may be NULL, in this case we keep old behavior.
- * This is ugly and gcc issues some warnings, but works.
- *
- * An advice: clang has much better error reporting than gcc for deeply nested macros.
- */
-
-#define scanline_func(dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst, \
- scanline_buf, mask, src_top, src_bottom, width, \
- weight_top, weight_bottom, vx, unit_x, max_vx, zero_src) \
- do { \
- if (op_func != NULL) \
- { \
- fetch_func ((void *)scanline_buf, (mask), (src_top), (src_bottom), (width), \
- (weight_top), (weight_bottom), (vx), (unit_x), (max_vx), (zero_src)); \
- ((void (*)(dst_type_t *, const mask_type_t *, const src_type_t *, int)) op_func)\
- ((dst), (mask), (src_type_t *)scanline_buf, (width)); \
- } \
- else \
- { \
- fetch_func ((void*)(dst), (mask), (src_top), (src_bottom), (width), (weight_top), \
- (weight_bottom), (vx), (unit_x), (max_vx), (zero_src)); \
- } \
- } while (0)
-
-
-#define SCANLINE_BUFFER_LENGTH 3072
-
-#define FAST_BILINEAR_MAINLOOP_INT(scale_func_name, fetch_func, op_func, src_type_t, \
- mask_type_t, dst_type_t, repeat_mode, flags) \
+#define FAST_BILINEAR_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, mask_type_t, \
+ dst_type_t, repeat_mode, flags) \
static void \
fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp, \
pixman_composite_info_t *info) \
@@ -908,9 +876,6 @@ fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp,
pixman_fixed_t src_width_fixed; \
int max_x; \
pixman_bool_t need_src_extension; \
- \
- uint64_t stack_scanline_buffer[SCANLINE_BUFFER_LENGTH]; \
- uint8_t *scanline_buffer = (uint8_t *) stack_scanline_buffer; \
\
PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type_t, dst_stride, dst_line, 1); \
if (flags & FLAG_HAVE_SOLID_MASK) \
@@ -983,14 +948,6 @@ fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp,
\
src_width_fixed = pixman_int_to_fixed (src_width); \
} \
- \
- if (op_func != NULL && width * sizeof(src_type_t) > sizeof(stack_scanline_buffer)) \
- { \
- scanline_buffer = pixman_malloc_ab (width, sizeof(src_type_t)); \
- \
- if (!scanline_buffer) \
- return; \
- } \
\
while (--height >= 0) \
{ \
@@ -1033,18 +990,16 @@ fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp,
{ \
buf1[0] = buf1[1] = src1[0]; \
buf2[0] = buf2[1] = src2[0]; \
- scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst, \
- scanline_buffer, mask, buf1, buf2, left_pad, weight1, weight2, \
- 0, 0, 0, FALSE); \
+ scanline_func (dst, mask, \
+ buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, FALSE); \
dst += left_pad; \
if (flags & FLAG_HAVE_NON_SOLID_MASK) \
mask += left_pad; \
} \
if (width > 0) \
{ \
- scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst, \
- scanline_buffer, mask, src1, src2, width, weight1, weight2, \
- vx, unit_x, 0, FALSE); \
+ scanline_func (dst, mask, \
+ src1, src2, width, weight1, weight2, vx, unit_x, 0, FALSE); \
dst += width; \
if (flags & FLAG_HAVE_NON_SOLID_MASK) \
mask += width; \
@@ -1053,9 +1008,8 @@ fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp,
{ \
buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; \
buf2[0] = buf2[1] = src2[src_image->bits.width - 1]; \
- scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst, \
- scanline_buffer, mask, buf1, buf2, right_pad, weight1, weight2, \
- 0, 0, 0, FALSE); \
+ scanline_func (dst, mask, \
+ buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, FALSE); \
} \
} \
else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \
@@ -1091,9 +1045,8 @@ fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp,
{ \
buf1[0] = buf1[1] = 0; \
buf2[0] = buf2[1] = 0; \
- scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst, \
- scanline_buffer, mask, buf1, buf2, left_pad, weight1, weight2, \
- 0, 0, 0, TRUE); \
+ scanline_func (dst, mask, \
+ buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, TRUE); \
dst += left_pad; \
if (flags & FLAG_HAVE_NON_SOLID_MASK) \
mask += left_pad; \
@@ -1104,8 +1057,8 @@ fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp,
buf1[1] = src1[0]; \
buf2[0] = 0; \
buf2[1] = src2[0]; \
- scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst, \
- scanline_buffer, mask, buf1, buf2, left_tz, weight1, weight2, \
+ scanline_func (dst, mask, \
+ buf1, buf2, left_tz, weight1, weight2, \
pixman_fixed_frac (vx), unit_x, 0, FALSE); \
dst += left_tz; \
if (flags & FLAG_HAVE_NON_SOLID_MASK) \
@@ -1114,9 +1067,8 @@ fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp,
} \
if (width > 0) \
{ \
- scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst, \
- scanline_buffer, mask, src1, src2, width, weight1, weight2, \
- vx, unit_x, 0, FALSE); \
+ scanline_func (dst, mask, \
+ src1, src2, width, weight1, weight2, vx, unit_x, 0, FALSE); \
dst += width; \
if (flags & FLAG_HAVE_NON_SOLID_MASK) \
mask += width; \
@@ -1128,8 +1080,8 @@ fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp,
buf1[1] = 0; \
buf2[0] = src2[src_image->bits.width - 1]; \
buf2[1] = 0; \
- scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst, \
- scanline_buffer, mask, buf1, buf2, right_tz, weight1, weight2, \
+ scanline_func (dst, mask, \
+ buf1, buf2, right_tz, weight1, weight2, \
pixman_fixed_frac (vx), unit_x, 0, FALSE); \
dst += right_tz; \
if (flags & FLAG_HAVE_NON_SOLID_MASK) \
@@ -1139,9 +1091,8 @@ fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp,
{ \
buf1[0] = buf1[1] = 0; \
buf2[0] = buf2[1] = 0; \
- scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst, \
- scanline_buffer, mask, buf1, buf2, right_pad, weight1, weight2, \
- 0, 0, 0, TRUE); \
+ scanline_func (dst, mask, \
+ buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, TRUE); \
} \
} \
else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
@@ -1203,8 +1154,7 @@ fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp,
if (num_pixels > width_remain) \
num_pixels = width_remain; \
\
- scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, \
- dst, scanline_buffer, mask, buf1, buf2, num_pixels, \
+ scanline_func (dst, mask, buf1, buf2, num_pixels, \
weight1, weight2, pixman_fixed_frac(vx), \
unit_x, src_width_fixed, FALSE); \
\
@@ -1233,10 +1183,8 @@ fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp,
if (num_pixels > width_remain) \
num_pixels = width_remain; \
\
- scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, \
- dst, scanline_buffer, mask, src_line_top, src_line_bottom, \
- num_pixels, weight1, weight2, vx, unit_x, src_width_fixed, \
- FALSE); \
+ scanline_func (dst, mask, src_line_top, src_line_bottom, num_pixels, \
+ weight1, weight2, vx, unit_x, src_width_fixed, FALSE); \
\
width_remain -= num_pixels; \
vx += num_pixels * unit_x; \
@@ -1249,21 +1197,17 @@ fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp,
} \
else \
{ \
- scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst, \
- scanline_buffer, mask, \
- src_first_line + src_stride * y1, \
+ scanline_func (dst, mask, src_first_line + src_stride * y1, \
src_first_line + src_stride * y2, width, \
weight1, weight2, vx, unit_x, max_vx, FALSE); \
} \
} \
- if (scanline_buffer != (uint8_t *) stack_scanline_buffer) \
- free (scanline_buffer); \
}
/* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */
-#define FAST_BILINEAR_MAINLOOP_COMMON(scale_func_name, fetch_func, op_func, src_type_t, mask_type_t,\
+#define FAST_BILINEAR_MAINLOOP_COMMON(scale_func_name, scanline_func, src_type_t, mask_type_t, \
dst_type_t, repeat_mode, flags) \
- FAST_BILINEAR_MAINLOOP_INT(_ ## scale_func_name, fetch_func, op_func, src_type_t, mask_type_t,\
+ FAST_BILINEAR_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, mask_type_t,\
dst_type_t, repeat_mode, flags)
#define SCALED_BILINEAR_FLAGS \
diff --git a/gfx/cairo/libpixman/src/pixman-linear-gradient.c b/gfx/cairo/libpixman/src/pixman-linear-gradient.c
index f5ba51b604..3f528508a1 100644
--- a/gfx/cairo/libpixman/src/pixman-linear-gradient.c
+++ b/gfx/cairo/libpixman/src/pixman-linear-gradient.c
@@ -31,8 +31,6 @@
#include <stdlib.h>
#include "pixman-private.h"
-#include "pixman-dither.h"
-
static pixman_bool_t
linear_gradient_is_horizontal (pixman_image_t *image,
int x,
@@ -91,8 +89,11 @@ linear_gradient_is_horizontal (pixman_image_t *image,
}
static uint32_t *
-linear_get_scanline_narrow (pixman_iter_t *iter,
- const uint32_t *mask)
+linear_get_scanline (pixman_iter_t *iter,
+ const uint32_t *mask,
+ int Bpp,
+ pixman_gradient_walker_write_t write_pixel,
+ pixman_gradient_walker_fill_t fill_pixel)
{
pixman_image_t *image = iter->image;
int x = iter->x;
@@ -105,7 +106,7 @@ linear_get_scanline_narrow (pixman_iter_t *iter,
pixman_fixed_48_16_t dx, dy;
gradient_t *gradient = (gradient_t *)image;
linear_gradient_t *linear = (linear_gradient_t *)image;
- uint32_t *end = buffer + width;
+ uint32_t *end = buffer + width * (Bpp / 4);
pixman_gradient_walker_t walker;
_pixman_gradient_walker_init (&walker, gradient, image->common.repeat);
@@ -139,7 +140,7 @@ linear_get_scanline_narrow (pixman_iter_t *iter,
if (l == 0 || unit.vector[2] == 0)
{
/* affine transformation only */
- pixman_fixed_32_32_t t, next_inc;
+ pixman_fixed_32_32_t t, next_inc;
double inc;
if (l == 0 || v.vector[2] == 0)
@@ -154,7 +155,7 @@ linear_get_scanline_narrow (pixman_iter_t *iter,
invden = pixman_fixed_1 * (double) pixman_fixed_1 /
(l * (double) v.vector[2]);
v2 = v.vector[2] * (1. / pixman_fixed_1);
- t = ((dx * v.vector[0] + dy * v.vector[1]) -
+ t = ((dx * v.vector[0] + dy * v.vector[1]) -
(dx * linear->p1.x + dy * linear->p1.y) * v2) * invden;
inc = (dx * unit.vector[0] + dy * unit.vector[1]) * invden;
}
@@ -162,11 +163,7 @@ linear_get_scanline_narrow (pixman_iter_t *iter,
if (((pixman_fixed_32_32_t )(inc * width)) == 0)
{
- register uint32_t color;
-
- color = _pixman_gradient_walker_pixel (&walker, t);
- while (buffer < end)
- *buffer++ = color;
+ fill_pixel (&walker, t, buffer, end);
}
else
{
@@ -177,12 +174,11 @@ linear_get_scanline_narrow (pixman_iter_t *iter,
{
if (!mask || *mask++)
{
- *buffer = _pixman_gradient_walker_pixel (&walker,
- t + next_inc);
+ write_pixel (&walker, t + next_inc, buffer);
}
i++;
next_inc = inc * i;
- buffer++;
+ buffer += (Bpp / 4);
}
}
}
@@ -204,14 +200,14 @@ linear_get_scanline_narrow (pixman_iter_t *iter,
invden = pixman_fixed_1 * (double) pixman_fixed_1 /
(l * (double) v.vector[2]);
v2 = v.vector[2] * (1. / pixman_fixed_1);
- t = ((dx * v.vector[0] + dy * v.vector[1]) -
+ t = ((dx * v.vector[0] + dy * v.vector[1]) -
(dx * linear->p1.x + dy * linear->p1.y) * v2) * invden;
}
- *buffer = _pixman_gradient_walker_pixel (&walker, t);
+ write_pixel (&walker, t, buffer);
}
- ++buffer;
+ buffer += (Bpp / 4);
v.vector[0] += unit.vector[0];
v.vector[1] += unit.vector[1];
@@ -225,176 +221,30 @@ linear_get_scanline_narrow (pixman_iter_t *iter,
}
static uint32_t *
-linear_get_scanline_16 (pixman_iter_t *iter,
- const uint32_t *mask)
+linear_get_scanline_narrow (pixman_iter_t *iter,
+ const uint32_t *mask)
{
- pixman_image_t *image = iter->image;
- int x = iter->x;
- int y = iter->y;
- int width = iter->width;
- uint16_t * buffer = (uint16_t*)iter->buffer;
- pixman_bool_t toggle = ((x ^ y) & 1);
-
- pixman_vector_t v, unit;
- pixman_fixed_32_32_t l;
- pixman_fixed_48_16_t dx, dy;
- gradient_t *gradient = (gradient_t *)image;
- linear_gradient_t *linear = (linear_gradient_t *)image;
- uint16_t *end = buffer + width;
- pixman_gradient_walker_t walker;
-
- _pixman_gradient_walker_init (&walker, gradient, image->common.repeat);
-
- /* reference point is the center of the pixel */
- v.vector[0] = pixman_int_to_fixed (x) + pixman_fixed_1 / 2;
- v.vector[1] = pixman_int_to_fixed (y) + pixman_fixed_1 / 2;
- v.vector[2] = pixman_fixed_1;
-
- if (image->common.transform)
- {
- if (!pixman_transform_point_3d (image->common.transform, &v))
- return iter->buffer;
-
- unit.vector[0] = image->common.transform->matrix[0][0];
- unit.vector[1] = image->common.transform->matrix[1][0];
- unit.vector[2] = image->common.transform->matrix[2][0];
- }
- else
- {
- unit.vector[0] = pixman_fixed_1;
- unit.vector[1] = 0;
- unit.vector[2] = 0;
- }
-
- dx = linear->p2.x - linear->p1.x;
- dy = linear->p2.y - linear->p1.y;
-
- l = dx * dx + dy * dy;
-
- if (l == 0 || unit.vector[2] == 0)
- {
- /* affine transformation only */
- pixman_fixed_32_32_t t, next_inc;
- double inc;
-
- if (l == 0 || v.vector[2] == 0)
- {
- t = 0;
- inc = 0;
- }
- else
- {
- double invden, v2;
-
- invden = pixman_fixed_1 * (double) pixman_fixed_1 /
- (l * (double) v.vector[2]);
- v2 = v.vector[2] * (1. / pixman_fixed_1);
- t = ((dx * v.vector[0] + dy * v.vector[1]) -
- (dx * linear->p1.x + dy * linear->p1.y) * v2) * invden;
- inc = (dx * unit.vector[0] + dy * unit.vector[1]) * invden;
- }
- next_inc = 0;
-
- if (((pixman_fixed_32_32_t )(inc * width)) == 0)
- {
- register uint32_t color;
- uint16_t dither_diff;
- uint16_t color16;
- uint16_t color16b;
-
- color = _pixman_gradient_walker_pixel (&walker, t);
- color16 = dither_8888_to_0565(color, toggle);
- color16b = dither_8888_to_0565(color, toggle^1);
- // compute the difference
- dither_diff = color16 ^ color16b;
- while (buffer < end) {
- *buffer++ = color16;
- // use dither_diff to toggle between color16 and color16b
- color16 ^= dither_diff;
- toggle ^= 1;
- }
- }
- else
- {
- int i;
-
- i = 0;
- while (buffer < end)
- {
- if (!mask || *mask++)
- {
- *buffer = dither_8888_to_0565(_pixman_gradient_walker_pixel (&walker,
- t + next_inc),
- toggle);
- }
- toggle ^= 1;
- i++;
- next_inc = inc * i;
- buffer++;
- }
- }
- }
- else
- {
- /* projective transformation */
- double t;
-
- t = 0;
-
- while (buffer < end)
- {
- if (!mask || *mask++)
- {
- if (v.vector[2] != 0)
- {
- double invden, v2;
-
- invden = pixman_fixed_1 * (double) pixman_fixed_1 /
- (l * (double) v.vector[2]);
- v2 = v.vector[2] * (1. / pixman_fixed_1);
- t = ((dx * v.vector[0] + dy * v.vector[1]) -
- (dx * linear->p1.x + dy * linear->p1.y) * v2) * invden;
- }
-
- *buffer = dither_8888_to_0565(_pixman_gradient_walker_pixel (&walker, t),
- toggle);
- }
- toggle ^= 1;
-
- ++buffer;
-
- v.vector[0] += unit.vector[0];
- v.vector[1] += unit.vector[1];
- v.vector[2] += unit.vector[2];
- }
- }
-
- iter->y++;
-
- return iter->buffer;
+ return linear_get_scanline (iter, mask, 4,
+ _pixman_gradient_walker_write_narrow,
+ _pixman_gradient_walker_fill_narrow);
}
+
static uint32_t *
linear_get_scanline_wide (pixman_iter_t *iter, const uint32_t *mask)
{
- uint32_t *buffer = linear_get_scanline_narrow (iter, NULL);
-
- pixman_expand_to_float (
- (argb_t *)buffer, buffer, PIXMAN_a8r8g8b8, iter->width);
-
- return buffer;
+ return linear_get_scanline (iter, NULL, 16,
+ _pixman_gradient_walker_write_wide,
+ _pixman_gradient_walker_fill_wide);
}
void
_pixman_linear_gradient_iter_init (pixman_image_t *image, pixman_iter_t *iter)
{
- // XXX: we can't use this optimization when dithering
- if (0 && linear_gradient_is_horizontal (
+ if (linear_gradient_is_horizontal (
iter->image, iter->x, iter->y, iter->width, iter->height))
{
- if (iter->iter_flags & ITER_16)
- linear_get_scanline_16 (iter, NULL);
- else if (iter->iter_flags & ITER_NARROW)
+ if (iter->iter_flags & ITER_NARROW)
linear_get_scanline_narrow (iter, NULL);
else
linear_get_scanline_wide (iter, NULL);
@@ -403,9 +253,7 @@ _pixman_linear_gradient_iter_init (pixman_image_t *image, pixman_iter_t *iter)
}
else
{
- if (iter->iter_flags & ITER_16)
- iter->get_scanline = linear_get_scanline_16;
- else if (iter->iter_flags & ITER_NARROW)
+ if (iter->iter_flags & ITER_NARROW)
iter->get_scanline = linear_get_scanline_narrow;
else
iter->get_scanline = linear_get_scanline_wide;
diff --git a/gfx/cairo/libpixman/src/pixman-matrix.c b/gfx/cairo/libpixman/src/pixman-matrix.c
index 89b96826b8..81b6e613ed 100644
--- a/gfx/cairo/libpixman/src/pixman-matrix.c
+++ b/gfx/cairo/libpixman/src/pixman-matrix.c
@@ -37,7 +37,7 @@
static force_inline int
count_leading_zeros (uint32_t x)
{
-#ifdef __GNUC__
+#ifdef HAVE_BUILTIN_CLZ
return __builtin_clz (x);
#else
int n = 0;
@@ -273,7 +273,7 @@ pixman_transform_point_31_16 (const pixman_transform_t *t,
{
/* the divisor is small, we can actually keep all the bits */
int64_t hi, rhi, lo, rlo;
- int64_t div = (divint << 16) + divfrac;
+ int64_t div = ((uint64_t)divint << 16) + divfrac;
fixed_64_16_to_int128 (tmp[0][0], tmp[0][1], &hi, &lo, 32);
rlo = rounded_sdiv_128_by_49 (hi, lo, div, &rhi);
diff --git a/gfx/cairo/libpixman/src/pixman-mips-dspr2-asm.S b/gfx/cairo/libpixman/src/pixman-mips-dspr2-asm.S
index ddfacef62e..9dad163b79 100644
--- a/gfx/cairo/libpixman/src/pixman-mips-dspr2-asm.S
+++ b/gfx/cairo/libpixman/src/pixman-mips-dspr2-asm.S
@@ -26,7 +26,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * Author: Nemanja Lukic (nlukic@mips.com)
+ * Author: Nemanja Lukic (nemanja.lukic@rt-rk.com)
*/
#include "pixman-private.h"
@@ -310,6 +310,516 @@ LEAF_MIPS_DSPR2(pixman_composite_src_x888_8888_asm_mips)
END(pixman_composite_src_x888_8888_asm_mips)
+#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL)
+LEAF_MIPS_DSPR2(pixman_composite_src_0888_8888_rev_asm_mips)
+/*
+ * a0 - dst (a8r8g8b8)
+ * a1 - src (b8g8r8)
+ * a2 - w
+ */
+
+ beqz a2, 6f
+ nop
+
+ lui t8, 0xff00;
+ srl t9, a2, 2 /* t9 = how many multiples of 4 src pixels */
+ beqz t9, 4f /* branch if less than 4 src pixels */
+ nop
+
+ li t0, 0x1
+ li t1, 0x2
+ li t2, 0x3
+ andi t3, a1, 0x3
+ beq t3, t0, 1f
+ nop
+ beq t3, t1, 2f
+ nop
+ beq t3, t2, 3f
+ nop
+
+0:
+ beqz t9, 4f
+ addiu t9, t9, -1
+ lw t0, 0(a1) /* t0 = R2 | B1 | G1 | R1 */
+ lw t1, 4(a1) /* t1 = G3 | R3 | B2 | G2 */
+ lw t2, 8(a1) /* t2 = B4 | G4 | R4 | B3 */
+
+ addiu a1, a1, 12
+ addiu a2, a2, -4
+
+ wsbh t0, t0 /* t0 = B1 | R2 | R1 | G1 */
+ wsbh t1, t1 /* t1 = R3 | G3 | G2 | B2 */
+ wsbh t2, t2 /* t2 = G4 | B4 | B3 | R4 */
+
+ packrl.ph t3, t1, t0 /* t3 = G2 | B2 | B1 | R2 */
+ packrl.ph t4, t0, t0 /* t4 = R1 | G1 | B1 | R2 */
+ rotr t3, t3, 16 /* t3 = B1 | R2 | G2 | B2 */
+ or t3, t3, t8 /* t3 = FF | R2 | G2 | B2 */
+ srl t4, t4, 8 /* t4 = 0 | R1 | G1 | B1 */
+ or t4, t4, t8 /* t4 = FF | R1 | G1 | B1 */
+ packrl.ph t5, t2, t1 /* t5 = B3 | R4 | R3 | G3 */
+ rotr t5, t5, 24 /* t5 = R4 | R3 | G3 | B3 */
+ or t5, t5, t8 /* t5 = FF | R3 | G3 | B3 */
+ rotr t2, t2, 16 /* t2 = B3 | R4 | G4 | B4 */
+ or t2, t2, t8 /* t5 = FF | R3 | G3 | B3 */
+
+ sw t4, 0(a0)
+ sw t3, 4(a0)
+ sw t5, 8(a0)
+ sw t2, 12(a0)
+ b 0b
+ addiu a0, a0, 16
+
+1:
+ lbu t6, 0(a1) /* t6 = 0 | 0 | 0 | R1 */
+ lhu t7, 1(a1) /* t7 = 0 | 0 | B1 | G1 */
+ sll t6, t6, 16 /* t6 = 0 | R1 | 0 | 0 */
+ wsbh t7, t7 /* t7 = 0 | 0 | G1 | B1 */
+ or t7, t6, t7 /* t7 = 0 | R1 | G1 | B1 */
+11:
+ beqz t9, 4f
+ addiu t9, t9, -1
+ lw t0, 3(a1) /* t0 = R3 | B2 | G2 | R2 */
+ lw t1, 7(a1) /* t1 = G4 | R4 | B3 | G3 */
+ lw t2, 11(a1) /* t2 = B5 | G5 | R5 | B4 */
+
+ addiu a1, a1, 12
+ addiu a2, a2, -4
+
+ wsbh t0, t0 /* t0 = B2 | R3 | R2 | G2 */
+ wsbh t1, t1 /* t1 = R4 | G4 | G3 | B3 */
+ wsbh t2, t2 /* t2 = G5 | B5 | B4 | R5 */
+
+ packrl.ph t3, t1, t0 /* t3 = G3 | B3 | B2 | R3 */
+ packrl.ph t4, t2, t1 /* t4 = B4 | R5 | R4 | G4 */
+ rotr t0, t0, 24 /* t0 = R3 | R2 | G2 | B2 */
+ rotr t3, t3, 16 /* t3 = B2 | R3 | G3 | B3 */
+ rotr t4, t4, 24 /* t4 = R5 | R4 | G4 | B4 */
+ or t7, t7, t8 /* t7 = FF | R1 | G1 | B1 */
+ or t0, t0, t8 /* t0 = FF | R2 | G2 | B2 */
+ or t3, t3, t8 /* t1 = FF | R3 | G3 | B3 */
+ or t4, t4, t8 /* t3 = FF | R4 | G4 | B4 */
+
+ sw t7, 0(a0)
+ sw t0, 4(a0)
+ sw t3, 8(a0)
+ sw t4, 12(a0)
+ rotr t7, t2, 16 /* t7 = xx | R5 | G5 | B5 */
+ b 11b
+ addiu a0, a0, 16
+
+2:
+ lhu t7, 0(a1) /* t7 = 0 | 0 | G1 | R1 */
+ wsbh t7, t7 /* t7 = 0 | 0 | R1 | G1 */
+21:
+ beqz t9, 4f
+ addiu t9, t9, -1
+ lw t0, 2(a1) /* t0 = B2 | G2 | R2 | B1 */
+ lw t1, 6(a1) /* t1 = R4 | B3 | G3 | R3 */
+ lw t2, 10(a1) /* t2 = G5 | R5 | B4 | G4 */
+
+ addiu a1, a1, 12
+ addiu a2, a2, -4
+
+ wsbh t0, t0 /* t0 = G2 | B2 | B1 | R2 */
+ wsbh t1, t1 /* t1 = B3 | R4 | R3 | G3 */
+ wsbh t2, t2 /* t2 = R5 | G5 | G4 | B4 */
+
+ precr_sra.ph.w t7, t0, 0 /* t7 = R1 | G1 | B1 | R2 */
+ rotr t0, t0, 16 /* t0 = B1 | R2 | G2 | B2 */
+ packrl.ph t3, t2, t1 /* t3 = G4 | B4 | B3 | R4 */
+ rotr t1, t1, 24 /* t1 = R4 | R3 | G3 | B3 */
+ srl t7, t7, 8 /* t7 = 0 | R1 | G1 | B1 */
+ rotr t3, t3, 16 /* t3 = B3 | R4 | G4 | B4 */
+ or t7, t7, t8 /* t7 = FF | R1 | G1 | B1 */
+ or t0, t0, t8 /* t0 = FF | R2 | G2 | B2 */
+ or t1, t1, t8 /* t1 = FF | R3 | G3 | B3 */
+ or t3, t3, t8 /* t3 = FF | R4 | G4 | B4 */
+
+ sw t7, 0(a0)
+ sw t0, 4(a0)
+ sw t1, 8(a0)
+ sw t3, 12(a0)
+ srl t7, t2, 16 /* t7 = 0 | 0 | R5 | G5 */
+ b 21b
+ addiu a0, a0, 16
+
+3:
+ lbu t7, 0(a1) /* t7 = 0 | 0 | 0 | R1 */
+31:
+ beqz t9, 4f
+ addiu t9, t9, -1
+ lw t0, 1(a1) /* t0 = G2 | R2 | B1 | G1 */
+ lw t1, 5(a1) /* t1 = B3 | G3 | R3 | B2 */
+ lw t2, 9(a1) /* t2 = R5 | B4 | G4 | R4 */
+
+ addiu a1, a1, 12
+ addiu a2, a2, -4
+
+ wsbh t0, t0 /* t0 = R2 | G2 | G1 | B1 */
+ wsbh t1, t1 /* t1 = G3 | B3 | B2 | R3 */
+ wsbh t2, t2 /* t2 = B4 | R5 | R4 | G4 */
+
+ precr_sra.ph.w t7, t0, 0 /* t7 = xx | R1 | G1 | B1 */
+ packrl.ph t3, t1, t0 /* t3 = B2 | R3 | R2 | G2 */
+ rotr t1, t1, 16 /* t1 = B2 | R3 | G3 | B3 */
+ rotr t4, t2, 24 /* t4 = R5 | R4 | G4 | B4 */
+ rotr t3, t3, 24 /* t3 = R3 | R2 | G2 | B2 */
+ or t7, t7, t8 /* t7 = FF | R1 | G1 | B1 */
+ or t3, t3, t8 /* t3 = FF | R2 | G2 | B2 */
+ or t1, t1, t8 /* t1 = FF | R3 | G3 | B3 */
+ or t4, t4, t8 /* t4 = FF | R4 | G4 | B4 */
+
+ sw t7, 0(a0)
+ sw t3, 4(a0)
+ sw t1, 8(a0)
+ sw t4, 12(a0)
+ srl t7, t2, 16 /* t7 = 0 | 0 | xx | R5 */
+ b 31b
+ addiu a0, a0, 16
+
+4:
+ beqz a2, 6f
+ nop
+5:
+ lbu t0, 0(a1) /* t0 = 0 | 0 | 0 | R */
+ lbu t1, 1(a1) /* t1 = 0 | 0 | 0 | G */
+ lbu t2, 2(a1) /* t2 = 0 | 0 | 0 | B */
+ addiu a1, a1, 3
+
+ sll t0, t0, 16 /* t2 = 0 | R | 0 | 0 */
+ sll t1, t1, 8 /* t1 = 0 | 0 | G | 0 */
+
+ or t2, t2, t1 /* t2 = 0 | 0 | G | B */
+ or t2, t2, t0 /* t2 = 0 | R | G | B */
+ or t2, t2, t8 /* t2 = FF | R | G | B */
+
+ sw t2, 0(a0)
+ addiu a2, a2, -1
+ bnez a2, 5b
+ addiu a0, a0, 4
+6:
+ j ra
+ nop
+
+END(pixman_composite_src_0888_8888_rev_asm_mips)
+
+LEAF_MIPS_DSPR2(pixman_composite_src_0888_0565_rev_asm_mips)
+/*
+ * a0 - dst (r5g6b5)
+ * a1 - src (b8g8r8)
+ * a2 - w
+ */
+
+ SAVE_REGS_ON_STACK 0, v0, v1
+ beqz a2, 6f
+ nop
+
+ li t6, 0xf800f800
+ li t7, 0x07e007e0
+ li t8, 0x001F001F
+ srl t9, a2, 2 /* t9 = how many multiples of 4 src pixels */
+ beqz t9, 4f /* branch if less than 4 src pixels */
+ nop
+
+ li t0, 0x1
+ li t1, 0x2
+ li t2, 0x3
+ andi t3, a1, 0x3
+ beq t3, t0, 1f
+ nop
+ beq t3, t1, 2f
+ nop
+ beq t3, t2, 3f
+ nop
+
+0:
+ beqz t9, 4f
+ addiu t9, t9, -1
+ lw t0, 0(a1) /* t0 = R2 | B1 | G1 | R1 */
+ lw t1, 4(a1) /* t1 = G3 | R3 | B2 | G2 */
+ lw t2, 8(a1) /* t2 = B4 | G4 | R4 | B3 */
+
+ addiu a1, a1, 12
+ addiu a2, a2, -4
+
+ wsbh t0, t0 /* t0 = B1 | R2 | R1 | G1 */
+ wsbh t1, t1 /* t1 = R3 | G3 | G2 | B2 */
+ wsbh t2, t2 /* t2 = G4 | B4 | B3 | R4 */
+
+ packrl.ph t3, t1, t0 /* t3 = G2 | B2 | B1 | R2 */
+ packrl.ph t4, t0, t0 /* t4 = R1 | G1 | B1 | R2 */
+ rotr t3, t3, 16 /* t3 = B1 | R2 | G2 | B2 */
+ srl t4, t4, 8 /* t4 = 0 | R1 | G1 | B1 */
+ packrl.ph t5, t2, t1 /* t5 = B3 | R4 | R3 | G3 */
+ rotr t5, t5, 24 /* t5 = R4 | R3 | G3 | B3 */
+ rotr t2, t2, 16 /* t2 = B3 | R4 | G4 | B4 */
+
+ CONVERT_2x8888_TO_2x0565 t4, t3, t4, t3, t6, t7, t8, v0, v1
+ CONVERT_2x8888_TO_2x0565 t5, t2, t5, t2, t6, t7, t8, v0, v1
+
+ sh t4, 0(a0)
+ sh t3, 2(a0)
+ sh t5, 4(a0)
+ sh t2, 6(a0)
+ b 0b
+ addiu a0, a0, 8
+
+1:
+ lbu t4, 0(a1) /* t4 = 0 | 0 | 0 | R1 */
+ lhu t5, 1(a1) /* t5 = 0 | 0 | B1 | G1 */
+ sll t4, t4, 16 /* t4 = 0 | R1 | 0 | 0 */
+ wsbh t5, t5 /* t5 = 0 | 0 | G1 | B1 */
+ or t5, t4, t5 /* t5 = 0 | R1 | G1 | B1 */
+11:
+ beqz t9, 4f
+ addiu t9, t9, -1
+ lw t0, 3(a1) /* t0 = R3 | B2 | G2 | R2 */
+ lw t1, 7(a1) /* t1 = G4 | R4 | B3 | G3 */
+ lw t2, 11(a1) /* t2 = B5 | G5 | R5 | B4 */
+
+ addiu a1, a1, 12
+ addiu a2, a2, -4
+
+ wsbh t0, t0 /* t0 = B2 | R3 | R2 | G2 */
+ wsbh t1, t1 /* t1 = R4 | G4 | G3 | B3 */
+ wsbh t2, t2 /* t2 = G5 | B5 | B4 | R5 */
+
+ packrl.ph t3, t1, t0 /* t3 = G3 | B3 | B2 | R3 */
+ packrl.ph t4, t2, t1 /* t4 = B4 | R5 | R4 | G4 */
+ rotr t0, t0, 24 /* t0 = R3 | R2 | G2 | B2 */
+ rotr t3, t3, 16 /* t3 = B2 | R3 | G3 | B3 */
+ rotr t4, t4, 24 /* t4 = R5 | R4 | G4 | B4 */
+
+ CONVERT_2x8888_TO_2x0565 t5, t0, t5, t0, t6, t7, t8, v0, v1
+ CONVERT_2x8888_TO_2x0565 t3, t4, t3, t4, t6, t7, t8, v0, v1
+
+ sh t5, 0(a0)
+ sh t0, 2(a0)
+ sh t3, 4(a0)
+ sh t4, 6(a0)
+ rotr t5, t2, 16 /* t5 = xx | R5 | G5 | B5 */
+ b 11b
+ addiu a0, a0, 8
+
+2:
+ lhu t5, 0(a1) /* t5 = 0 | 0 | G1 | R1 */
+ wsbh t5, t5 /* t5 = 0 | 0 | R1 | G1 */
+21:
+ beqz t9, 4f
+ addiu t9, t9, -1
+ lw t0, 2(a1) /* t0 = B2 | G2 | R2 | B1 */
+ lw t1, 6(a1) /* t1 = R4 | B3 | G3 | R3 */
+ lw t2, 10(a1) /* t2 = G5 | R5 | B4 | G4 */
+
+ addiu a1, a1, 12
+ addiu a2, a2, -4
+
+ wsbh t0, t0 /* t0 = G2 | B2 | B1 | R2 */
+ wsbh t1, t1 /* t1 = B3 | R4 | R3 | G3 */
+ wsbh t2, t2 /* t2 = R5 | G5 | G4 | B4 */
+
+ precr_sra.ph.w t5, t0, 0 /* t5 = R1 | G1 | B1 | R2 */
+ rotr t0, t0, 16 /* t0 = B1 | R2 | G2 | B2 */
+ packrl.ph t3, t2, t1 /* t3 = G4 | B4 | B3 | R4 */
+ rotr t1, t1, 24 /* t1 = R4 | R3 | G3 | B3 */
+ srl t5, t5, 8 /* t5 = 0 | R1 | G1 | B1 */
+ rotr t3, t3, 16 /* t3 = B3 | R4 | G4 | B4 */
+
+ CONVERT_2x8888_TO_2x0565 t5, t0, t5, t0, t6, t7, t8, v0, v1
+ CONVERT_2x8888_TO_2x0565 t1, t3, t1, t3, t6, t7, t8, v0, v1
+
+ sh t5, 0(a0)
+ sh t0, 2(a0)
+ sh t1, 4(a0)
+ sh t3, 6(a0)
+ srl t5, t2, 16 /* t5 = 0 | 0 | R5 | G5 */
+ b 21b
+ addiu a0, a0, 8
+
+3:
+ lbu t5, 0(a1) /* t5 = 0 | 0 | 0 | R1 */
+31:
+ beqz t9, 4f
+ addiu t9, t9, -1
+ lw t0, 1(a1) /* t0 = G2 | R2 | B1 | G1 */
+ lw t1, 5(a1) /* t1 = B3 | G3 | R3 | B2 */
+ lw t2, 9(a1) /* t2 = R5 | B4 | G4 | R4 */
+
+ addiu a1, a1, 12
+ addiu a2, a2, -4
+
+ wsbh t0, t0 /* t0 = R2 | G2 | G1 | B1 */
+ wsbh t1, t1 /* t1 = G3 | B3 | B2 | R3 */
+ wsbh t2, t2 /* t2 = B4 | R5 | R4 | G4 */
+
+ precr_sra.ph.w t5, t0, 0 /* t5 = xx | R1 | G1 | B1 */
+ packrl.ph t3, t1, t0 /* t3 = B2 | R3 | R2 | G2 */
+ rotr t1, t1, 16 /* t1 = B2 | R3 | G3 | B3 */
+ rotr t4, t2, 24 /* t4 = R5 | R4 | G4 | B4 */
+ rotr t3, t3, 24 /* t3 = R3 | R2 | G2 | B2 */
+
+ CONVERT_2x8888_TO_2x0565 t5, t3, t5, t3, t6, t7, t8, v0, v1
+ CONVERT_2x8888_TO_2x0565 t1, t4, t1, t4, t6, t7, t8, v0, v1
+
+ sh t5, 0(a0)
+ sh t3, 2(a0)
+ sh t1, 4(a0)
+ sh t4, 6(a0)
+ srl t5, t2, 16 /* t5 = 0 | 0 | xx | R5 */
+ b 31b
+ addiu a0, a0, 8
+
+4:
+ beqz a2, 6f
+ nop
+5:
+ lbu t0, 0(a1) /* t0 = 0 | 0 | 0 | R */
+ lbu t1, 1(a1) /* t1 = 0 | 0 | 0 | G */
+ lbu t2, 2(a1) /* t2 = 0 | 0 | 0 | B */
+ addiu a1, a1, 3
+
+ sll t0, t0, 16 /* t2 = 0 | R | 0 | 0 */
+ sll t1, t1, 8 /* t1 = 0 | 0 | G | 0 */
+
+ or t2, t2, t1 /* t2 = 0 | 0 | G | B */
+ or t2, t2, t0 /* t2 = 0 | R | G | B */
+
+ CONVERT_1x8888_TO_1x0565 t2, t3, t4, t5
+
+ sh t3, 0(a0)
+ addiu a2, a2, -1
+ bnez a2, 5b
+ addiu a0, a0, 2
+6:
+ RESTORE_REGS_FROM_STACK 0, v0, v1
+ j ra
+ nop
+
+END(pixman_composite_src_0888_0565_rev_asm_mips)
+#endif
+
+LEAF_MIPS_DSPR2(pixman_composite_src_pixbuf_8888_asm_mips)
+/*
+ * a0 - dst (a8b8g8r8)
+ * a1 - src (a8r8g8b8)
+ * a2 - w
+ */
+
+ SAVE_REGS_ON_STACK 0, v0
+ li v0, 0x00ff00ff
+
+ beqz a2, 3f
+ nop
+ addiu t1, a2, -1
+ beqz t1, 2f
+ nop
+1:
+ lw t0, 0(a1)
+ lw t1, 4(a1)
+ addiu a1, a1, 8
+ addiu a2, a2, -2
+ srl t2, t0, 24
+ srl t3, t1, 24
+
+ MIPS_2xUN8x4_MUL_2xUN8 t0, t1, t2, t3, t0, t1, v0, t4, t5, t6, t7, t8, t9
+
+ sll t0, t0, 8
+ sll t1, t1, 8
+ andi t2, t2, 0xff
+ andi t3, t3, 0xff
+ or t0, t0, t2
+ or t1, t1, t3
+ wsbh t0, t0
+ wsbh t1, t1
+ rotr t0, t0, 16
+ rotr t1, t1, 16
+ sw t0, 0(a0)
+ sw t1, 4(a0)
+
+ addiu t2, a2, -1
+ bgtz t2, 1b
+ addiu a0, a0, 8
+2:
+ beqz a2, 3f
+ nop
+ lw t0, 0(a1)
+ srl t1, t0, 24
+
+ MIPS_UN8x4_MUL_UN8 t0, t1, t0, v0, t3, t4, t5
+
+ sll t0, t0, 8
+ andi t1, t1, 0xff
+ or t0, t0, t1
+ wsbh t0, t0
+ rotr t0, t0, 16
+ sw t0, 0(a0)
+3:
+ RESTORE_REGS_FROM_STACK 0, v0
+ j ra
+ nop
+
+END(pixman_composite_src_pixbuf_8888_asm_mips)
+
+LEAF_MIPS_DSPR2(pixman_composite_src_rpixbuf_8888_asm_mips)
+/*
+ * a0 - dst (a8r8g8b8)
+ * a1 - src (a8r8g8b8)
+ * a2 - w
+ */
+
+ SAVE_REGS_ON_STACK 0, v0
+ li v0, 0x00ff00ff
+
+ beqz a2, 3f
+ nop
+ addiu t1, a2, -1
+ beqz t1, 2f
+ nop
+1:
+ lw t0, 0(a1)
+ lw t1, 4(a1)
+ addiu a1, a1, 8
+ addiu a2, a2, -2
+ srl t2, t0, 24
+ srl t3, t1, 24
+
+ MIPS_2xUN8x4_MUL_2xUN8 t0, t1, t2, t3, t0, t1, v0, t4, t5, t6, t7, t8, t9
+
+ sll t0, t0, 8
+ sll t1, t1, 8
+ andi t2, t2, 0xff
+ andi t3, t3, 0xff
+ or t0, t0, t2
+ or t1, t1, t3
+ rotr t0, t0, 8
+ rotr t1, t1, 8
+ sw t0, 0(a0)
+ sw t1, 4(a0)
+
+ addiu t2, a2, -1
+ bgtz t2, 1b
+ addiu a0, a0, 8
+2:
+ beqz a2, 3f
+ nop
+ lw t0, 0(a1)
+ srl t1, t0, 24
+
+ MIPS_UN8x4_MUL_UN8 t0, t1, t0, v0, t3, t4, t5
+
+ sll t0, t0, 8
+ andi t1, t1, 0xff
+ or t0, t0, t1
+ rotr t0, t0, 8
+ sw t0, 0(a0)
+3:
+ RESTORE_REGS_FROM_STACK 0, v0
+ j ra
+ nop
+
+END(pixman_composite_src_rpixbuf_8888_asm_mips)
+
LEAF_MIPS_DSPR2(pixman_composite_src_n_8_8888_asm_mips)
/*
* a0 - dst (a8r8g8b8)
@@ -451,34 +961,35 @@ LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_8888_ca_asm_mips)
* a3 - w
*/
- SAVE_REGS_ON_STACK 8, s0, s1, s2, s3, s4, s5
- beqz a3, 4f
+ beqz a3, 8f
nop
+ SAVE_REGS_ON_STACK 8, s0, s1, s2, s3, s4, s5
+
li t6, 0xff
addiu t7, zero, -1 /* t7 = 0xffffffff */
srl t8, a1, 24 /* t8 = srca */
li t9, 0x00ff00ff
+
addiu t1, a3, -1
- beqz t1, 3f /* last pixel */
- nop
- beq t8, t6, 2f /* if (srca == 0xff) */
+ beqz t1, 4f /* last pixel */
nop
-1:
- /* a1 = src */
+
+0:
lw t0, 0(a2) /* t0 = mask */
lw t1, 4(a2) /* t1 = mask */
+ addiu a3, a3, -2 /* w = w - 2 */
or t2, t0, t1
- beqz t2, 12f /* if (t0 == 0) && (t1 == 0) */
+ beqz t2, 3f /* if (t0 == 0) && (t1 == 0) */
addiu a2, a2, 8
- and t3, t0, t1
- move t4, a1 /* t4 = src */
- move t5, a1 /* t5 = src */
+ and t2, t0, t1
+ beq t2, t7, 1f /* if (t0 == 0xffffffff) && (t1 == 0xffffffff) */
+ nop
+
+//if(ma)
lw t2, 0(a0) /* t2 = dst */
- beq t3, t7, 11f /* if (t0 == 0xffffffff) && (t1 == 0xffffffff) */
- lw t3, 4(a0) /* t3 = dst */
+ lw t3, 4(a0) /* t3 = dst */
MIPS_2xUN8x4_MUL_2xUN8x4 a1, a1, t0, t1, t4, t5, t9, s0, s1, s2, s3, s4, s5
MIPS_2xUN8x4_MUL_2xUN8 t0, t1, t8, t8, t0, t1, t9, s0, s1, s2, s3, s4, s5
-11:
not t0, t0
not t1, t1
MIPS_2xUN8x4_MUL_2xUN8x4 t2, t3, t0, t1, t2, t3, t9, s0, s1, s2, s3, s4, s5
@@ -486,62 +997,79 @@ LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_8888_ca_asm_mips)
addu_s.qb t3, t5, t3
sw t2, 0(a0)
sw t3, 4(a0)
-12:
- addiu a3, a3, -2
addiu t1, a3, -1
- bgtz t1, 1b
+ bgtz t1, 0b
addiu a0, a0, 8
- b 3f
+ b 4f
+ nop
+1:
+//if (t0 == 0xffffffff) && (t1 == 0xffffffff):
+ beq t8, t6, 2f /* if (srca == 0xff) */
nop
-2:
- /* a1 = src */
- lw t0, 0(a2) /* t0 = mask */
- lw t1, 4(a2) /* t1 = mask */
- or t2, t0, t1
- beqz t2, 22f /* if (t0 == 0) & (t1 == 0) */
- addiu a2, a2, 8
- and t2, t0, t1
- move t4, a1
- beq t2, t7, 21f /* if (t0 == 0xffffffff) && (t1 == 0xffffffff) */
- move t5, a1
lw t2, 0(a0) /* t2 = dst */
lw t3, 4(a0) /* t3 = dst */
- MIPS_2xUN8x4_MUL_2xUN8x4 a1, a1, t0, t1, t4, t5, t9, s0, s1, s2, s3, s4, s5
- not t0, t0
- not t1, t1
- MIPS_2xUN8x4_MUL_2xUN8x4 t2, t3, t0, t1, t2, t3, t9, s0, s1, s2, s3, s4, s5
- addu_s.qb t4, t4, t2
- addu_s.qb t5, t5, t3
-21:
- sw t4, 0(a0)
- sw t5, 4(a0)
-22:
- addiu a3, a3, -2
+ not t0, a1
+ not t1, a1
+ srl t0, t0, 24
+ srl t1, t1, 24
+ MIPS_2xUN8x4_MUL_2xUN8 t2, t3, t0, t1, t2, t3, t9, s0, s1, s2, s3, s4, s5
+ addu_s.qb t2, a1, t2
+ addu_s.qb t3, a1, t3
+ sw t2, 0(a0)
+ sw t3, 4(a0)
addiu t1, a3, -1
- bgtz t1, 2b
+ bgtz t1, 0b
addiu a0, a0, 8
+ b 4f
+ nop
+2:
+ sw a1, 0(a0)
+ sw a1, 4(a0)
3:
- blez a3, 4f
+ addiu t1, a3, -1
+ bgtz t1, 0b
+ addiu a0, a0, 8
+
+4:
+ beqz a3, 7f
nop
/* a1 = src */
- lw t1, 0(a2) /* t1 = mask */
- beqz t1, 4f
+ lw t0, 0(a2) /* t0 = mask */
+ beqz t0, 7f /* if (t0 == 0) */
nop
- move t2, a1 /* t2 = src */
- beq t1, t7, 31f
- lw t0, 0(a0) /* t0 = dst */
-
- MIPS_UN8x4_MUL_UN8x4 a1, t1, t2, t9, t3, t4, t5, t6
- MIPS_UN8x4_MUL_UN8 t1, t8, t1, t9, t3, t4, t5
-31:
- not t1, t1
- MIPS_UN8x4_MUL_UN8x4 t0, t1, t0, t9, t3, t4, t5, t6
- addu_s.qb t0, t2, t0
- sw t0, 0(a0)
-4:
+ beq t0, t7, 5f /* if (t0 == 0xffffffff) */
+ nop
+//if(ma)
+ lw t1, 0(a0) /* t1 = dst */
+ MIPS_UN8x4_MUL_UN8x4 a1, t0, t2, t9, t3, t4, t5, s0
+ MIPS_UN8x4_MUL_UN8 t0, t8, t0, t9, t3, t4, t5
+ not t0, t0
+ MIPS_UN8x4_MUL_UN8x4 t1, t0, t1, t9, t3, t4, t5, s0
+ addu_s.qb t1, t2, t1
+ sw t1, 0(a0)
+ RESTORE_REGS_FROM_STACK 8, s0, s1, s2, s3, s4, s5
+ j ra
+ nop
+5:
+//if (t0 == 0xffffffff)
+ beq t8, t6, 6f /* if (srca == 0xff) */
+ nop
+ lw t1, 0(a0) /* t1 = dst */
+ not t0, a1
+ srl t0, t0, 24
+ MIPS_UN8x4_MUL_UN8 t1, t0, t1, t9, t2, t3, t4
+ addu_s.qb t1, a1, t1
+ sw t1, 0(a0)
RESTORE_REGS_FROM_STACK 8, s0, s1, s2, s3, s4, s5
j ra
nop
+6:
+ sw a1, 0(a0)
+7:
+ RESTORE_REGS_FROM_STACK 8, s0, s1, s2, s3, s4, s5
+8:
+ j ra
+ nop
END(pixman_composite_over_n_8888_8888_ca_asm_mips)
@@ -553,111 +1081,251 @@ LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_0565_ca_asm_mips)
* a3 - w
*/
- SAVE_REGS_ON_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7, s8
- beqz a3, 4f
+ beqz a3, 8f
nop
- li t5, 0xf800f800
- li t6, 0x07e007e0
- li t7, 0x001F001F
- li t9, 0x00ff00ff
+ SAVE_REGS_ON_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7, s8
+ li t6, 0xff
+ addiu t7, zero, -1 /* t7 = 0xffffffff */
srl t8, a1, 24 /* t8 = srca */
+ li t9, 0x00ff00ff
+ li s6, 0xf800f800
+ li s7, 0x07e007e0
+ li s8, 0x001F001F
+
addiu t1, a3, -1
- beqz t1, 3f /* last pixel */
+ beqz t1, 4f /* last pixel */
nop
- li s0, 0xff /* s0 = 0xff */
- addiu s1, zero, -1 /* s1 = 0xffffffff */
- beq t8, s0, 2f /* if (srca == 0xff) */
- nop
-1:
- /* a1 = src */
+0:
lw t0, 0(a2) /* t0 = mask */
lw t1, 4(a2) /* t1 = mask */
+ addiu a3, a3, -2 /* w = w - 2 */
or t2, t0, t1
- beqz t2, 12f /* if (t0 == 0) && (t1 == 0) */
+ beqz t2, 3f /* if (t0 == 0) && (t1 == 0) */
addiu a2, a2, 8
- and t3, t0, t1
- move s2, a1 /* s2 = src */
- move s3, a1 /* s3 = src */
+ and t2, t0, t1
+ beq t2, t7, 1f /* if (t0 == 0xffffffff) && (t1 == 0xffffffff) */
+ nop
+
+//if(ma)
lhu t2, 0(a0) /* t2 = dst */
- beq t3, s1, 11f /* if (t0 == 0xffffffff) && (t1 == 0xffffffff) */
- lhu t3, 2(a0) /* t3 = dst */
- MIPS_2xUN8x4_MUL_2xUN8x4 a1, a1, t0, t1, s2, s3, t9, t4, s4, s5, s6, s7, s8
- MIPS_2xUN8x4_MUL_2xUN8 t0, t1, t8, t8, t0, t1, t9, t4, s4, s5, s6, s7, s8
-11:
+ lhu t3, 2(a0) /* t3 = dst */
+ MIPS_2xUN8x4_MUL_2xUN8x4 a1, a1, t0, t1, t4, t5, t9, s0, s1, s2, s3, s4, s5
+ MIPS_2xUN8x4_MUL_2xUN8 t0, t1, t8, t8, t0, t1, t9, s0, s1, s2, s3, s4, s5
not t0, t0
not t1, t1
- CONVERT_2x0565_TO_2x8888 t2, t3, s4, s5, t6, t7, t4, s6, s7, s8
- MIPS_2xUN8x4_MUL_2xUN8x4 s4, s5, t0, t1, s4, s5, t9, t4, s6, s7, s8, t0, t1
- addu_s.qb s2, s2, s4
- addu_s.qb s3, s3, s5
- CONVERT_2x8888_TO_2x0565 s2, s3, t2, t3, t5, t6, t7, s4, s5
+ CONVERT_2x0565_TO_2x8888 t2, t3, t2, t3, s7, s8, s0, s1, s2, s3
+ MIPS_2xUN8x4_MUL_2xUN8x4 t2, t3, t0, t1, t2, t3, t9, s0, s1, s2, s3, s4, s5
+ addu_s.qb t2, t4, t2
+ addu_s.qb t3, t5, t3
+ CONVERT_2x8888_TO_2x0565 t2, t3, t2, t3, s6, s7, s8, s0, s1
sh t2, 0(a0)
sh t3, 2(a0)
-12:
- addiu a3, a3, -2
addiu t1, a3, -1
- bgtz t1, 1b
+ bgtz t1, 0b
addiu a0, a0, 4
- b 3f
+ b 4f
+ nop
+1:
+//if (t0 == 0xffffffff) && (t1 == 0xffffffff):
+ beq t8, t6, 2f /* if (srca == 0xff) */
nop
-2:
- /* a1 = src */
- lw t0, 0(a2) /* t0 = mask */
- lw t1, 4(a2) /* t1 = mask */
- or t2, t0, t1
- beqz t2, 22f /* if (t0 == 0) & (t1 == 0) */
- addiu a2, a2, 8
- and t3, t0, t1
- move t2, a1
- beq t3, s1, 21f /* if (t0 == 0xffffffff) && (t1 == 0xffffffff) */
- move t3, a1
lhu t2, 0(a0) /* t2 = dst */
lhu t3, 2(a0) /* t3 = dst */
- MIPS_2xUN8x4_MUL_2xUN8x4 a1, a1, t0, t1, s2, s3, t9, t4, s4, s5, s6, s7, s8
- not t0, t0
- not t1, t1
- CONVERT_2x0565_TO_2x8888 t2, t3, s4, s5, t6, t7, t4, s6, s7, s8
- MIPS_2xUN8x4_MUL_2xUN8x4 s4, s5, t0, t1, s4, s5, t9, t4, s6, s7, s8, t2, t3
- addu_s.qb t2, s2, s4
- addu_s.qb t3, s3, s5
-21:
- CONVERT_2x8888_TO_2x0565 t2, t3, t0, t1, t5, t6, t7, s2, s3
- sh t0, 0(a0)
- sh t1, 2(a0)
-22:
- addiu a3, a3, -2
+ not t0, a1
+ not t1, a1
+ srl t0, t0, 24
+ srl t1, t1, 24
+ CONVERT_2x0565_TO_2x8888 t2, t3, t2, t3, s7, s8, s0, s1, s2, s3
+ MIPS_2xUN8x4_MUL_2xUN8 t2, t3, t0, t1, t2, t3, t9, s0, s1, s2, s3, s4, s5
+ addu_s.qb t2, a1, t2
+ addu_s.qb t3, a1, t3
+ CONVERT_2x8888_TO_2x0565 t2, t3, t2, t3, s6, s7, s8, s0, s1
+ sh t2, 0(a0)
+ sh t3, 2(a0)
addiu t1, a3, -1
- bgtz t1, 2b
+ bgtz t1, 0b
addiu a0, a0, 4
+ b 4f
+ nop
+2:
+ CONVERT_1x8888_TO_1x0565 a1, t2, s0, s1
+ sh t2, 0(a0)
+ sh t2, 2(a0)
3:
- blez a3, 4f
+ addiu t1, a3, -1
+ bgtz t1, 0b
+ addiu a0, a0, 4
+
+4:
+ beqz a3, 7f
nop
/* a1 = src */
- lw t1, 0(a2) /* t1 = mask */
- beqz t1, 4f
+ lw t0, 0(a2) /* t0 = mask */
+ beqz t0, 7f /* if (t0 == 0) */
nop
- move t2, a1 /* t2 = src */
- beq t1, t7, 31f
- lhu t0, 0(a0) /* t0 = dst */
-
- MIPS_UN8x4_MUL_UN8x4 a1, t1, t2, t9, t3, t4, t5, t6
- MIPS_UN8x4_MUL_UN8 t1, t8, t1, t9, t3, t4, t5
-31:
- not t1, t1
- CONVERT_1x0565_TO_1x8888 t0, s1, s2, s3
- MIPS_UN8x4_MUL_UN8x4 s1, t1, t3, t9, t4, t5, t6, t7
- addu_s.qb t0, t2, t3
- CONVERT_1x8888_TO_1x0565 t0, s1, s2, s3
- sh s1, 0(a0)
-4:
- RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7, s8
+ beq t0, t7, 5f /* if (t0 == 0xffffffff) */
+ nop
+//if(ma)
+ lhu t1, 0(a0) /* t1 = dst */
+ MIPS_UN8x4_MUL_UN8x4 a1, t0, t2, t9, t3, t4, t5, s0
+ MIPS_UN8x4_MUL_UN8 t0, t8, t0, t9, t3, t4, t5
+ not t0, t0
+ CONVERT_1x0565_TO_1x8888 t1, s1, s2, s3
+ MIPS_UN8x4_MUL_UN8x4 s1, t0, s1, t9, t3, t4, t5, s0
+ addu_s.qb s1, t2, s1
+ CONVERT_1x8888_TO_1x0565 s1, t1, s0, s2
+ sh t1, 0(a0)
+ RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7, s8
+ j ra
+ nop
+5:
+//if (t0 == 0xffffffff)
+ beq t8, t6, 6f /* if (srca == 0xff) */
+ nop
+ lhu t1, 0(a0) /* t1 = dst */
+ not t0, a1
+ srl t0, t0, 24
+ CONVERT_1x0565_TO_1x8888 t1, s1, s2, s3
+ MIPS_UN8x4_MUL_UN8 s1, t0, s1, t9, t2, t3, t4
+ addu_s.qb s1, a1, s1
+ CONVERT_1x8888_TO_1x0565 s1, t1, s0, s2
+ sh t1, 0(a0)
+ RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7, s8
+ j ra
+ nop
+6:
+ CONVERT_1x8888_TO_1x0565 a1, t1, s0, s2
+ sh t1, 0(a0)
+7:
+ RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7, s8
+8:
j ra
nop
END(pixman_composite_over_n_8888_0565_ca_asm_mips)
+LEAF_MIPS_DSPR2(pixman_composite_over_n_8_8_asm_mips)
+/*
+ * a0 - dst (a8)
+ * a1 - src (32bit constant)
+ * a2 - mask (a8)
+ * a3 - w
+ */
+
+ SAVE_REGS_ON_STACK 0, v0
+ li t9, 0x00ff00ff
+ beqz a3, 3f
+ nop
+ srl v0, a3, 2 /* v0 = how many multiples of 4 dst pixels */
+ beqz v0, 1f /* branch if less than 4 src pixels */
+ nop
+
+ srl t8, a1, 24
+ replv.ph t8, t8
+
+0:
+ beqz v0, 1f
+ addiu v0, v0, -1
+ lbu t0, 0(a2)
+ lbu t1, 1(a2)
+ lbu t2, 2(a2)
+ lbu t3, 3(a2)
+ lbu t4, 0(a0)
+ lbu t5, 1(a0)
+ lbu t6, 2(a0)
+ lbu t7, 3(a0)
+
+ addiu a2, a2, 4
+
+ precr_sra.ph.w t1, t0, 0
+ precr_sra.ph.w t3, t2, 0
+ precr_sra.ph.w t5, t4, 0
+ precr_sra.ph.w t7, t6, 0
+
+ precr.qb.ph t0, t3, t1
+ precr.qb.ph t1, t7, t5
+
+ muleu_s.ph.qbl t2, t0, t8
+ muleu_s.ph.qbr t3, t0, t8
+ shra_r.ph t4, t2, 8
+ shra_r.ph t5, t3, 8
+ and t4, t4, t9
+ and t5, t5, t9
+ addq.ph t2, t2, t4
+ addq.ph t3, t3, t5
+ shra_r.ph t2, t2, 8
+ shra_r.ph t3, t3, 8
+ precr.qb.ph t0, t2, t3
+ not t6, t0
+
+ preceu.ph.qbl t7, t6
+ preceu.ph.qbr t6, t6
+
+ muleu_s.ph.qbl t2, t1, t7
+ muleu_s.ph.qbr t3, t1, t6
+ shra_r.ph t4, t2, 8
+ shra_r.ph t5, t3, 8
+ and t4, t4, t9
+ and t5, t5, t9
+ addq.ph t2, t2, t4
+ addq.ph t3, t3, t5
+ shra_r.ph t2, t2, 8
+ shra_r.ph t3, t3, 8
+ precr.qb.ph t1, t2, t3
+
+ addu_s.qb t2, t0, t1
+
+ sb t2, 0(a0)
+ srl t2, t2, 8
+ sb t2, 1(a0)
+ srl t2, t2, 8
+ sb t2, 2(a0)
+ srl t2, t2, 8
+ sb t2, 3(a0)
+ addiu a3, a3, -4
+ b 0b
+ addiu a0, a0, 4
+
+1:
+ beqz a3, 3f
+ nop
+ srl t8, a1, 24
+2:
+ lbu t0, 0(a2)
+ lbu t1, 0(a0)
+ addiu a2, a2, 1
+
+ mul t2, t0, t8
+ shra_r.ph t3, t2, 8
+ andi t3, t3, 0x00ff
+ addq.ph t2, t2, t3
+ shra_r.ph t2, t2, 8
+ not t3, t2
+ andi t3, t3, 0x00ff
+
+
+ mul t4, t1, t3
+ shra_r.ph t5, t4, 8
+ andi t5, t5, 0x00ff
+ addq.ph t4, t4, t5
+ shra_r.ph t4, t4, 8
+ andi t4, t4, 0x00ff
+
+ addu_s.qb t2, t2, t4
+ sb t2, 0(a0)
+ addiu a3, a3, -1
+ bnez a3, 2b
+ addiu a0, a0, 1
+
+3:
+ RESTORE_REGS_FROM_STACK 0, v0
+ j ra
+ nop
+
+END(pixman_composite_over_n_8_8_asm_mips)
+
LEAF_MIPS_DSPR2(pixman_composite_over_n_8_8888_asm_mips)
/*
* a0 - dst (a8r8g8b8)
@@ -1342,6 +2010,84 @@ LEAF_MIPS_DSPR2(pixman_composite_over_8888_8888_asm_mips)
END(pixman_composite_over_8888_8888_asm_mips)
+LEAF_MIPS_DSPR2(pixman_composite_over_8888_0565_asm_mips)
+/*
+ * a0 - dst (r5g6b5)
+ * a1 - src (a8r8g8b8)
+ * a2 - w
+ */
+
+ SAVE_REGS_ON_STACK 8, s0, s1, s2, s3, s4, s5
+ li t4, 0x00ff00ff
+ li s3, 0xf800f800
+ li s4, 0x07e007e0
+ li s5, 0x001F001F
+ beqz a2, 3f
+ nop
+ addiu t1, a2, -1
+ beqz t1, 2f
+ nop
+1:
+ lw t0, 0(a1) /* t0 = source (a8r8g8b8) */
+ lw t1, 4(a1) /* t1 = source (a8r8g8b8) */
+ lhu t2, 0(a0) /* t2 = destination (r5g6b5) */
+ lhu t3, 2(a0) /* t3 = destination (r5g6b5) */
+ addiu a1, a1, 8
+
+ not t5, t0
+ srl t5, t5, 24
+ not t6, t1
+ srl t6, t6, 24
+
+ or t7, t5, t6
+ beqz t7, 11f
+ or t8, t0, t1
+ beqz t8, 12f
+
+ CONVERT_2x0565_TO_2x8888 t2, t3, s0, s1, s4, s5, t7, t8, t9, s2
+ MIPS_2xUN8x4_MUL_2xUN8 s0, s1, t5, t6, t7, t8, t4, t9, t2, t3, s2, s0, s1
+
+ addu_s.qb t0, t7, t0
+ addu_s.qb t1, t8, t1
+11:
+ CONVERT_2x8888_TO_2x0565 t0, t1, t7, t8, s3, s4, s5, t2, t3
+ sh t7, 0(a0)
+ sh t8, 2(a0)
+12:
+ addiu a2, a2, -2
+ addiu t1, a2, -1
+ bgtz t1, 1b
+ addiu a0, a0, 4
+2:
+ beqz a2, 3f
+ nop
+
+ lw t0, 0(a1) /* t0 = source (a8r8g8b8) */
+ lhu t1, 0(a0) /* t1 = destination (r5g6b5) */
+ addiu a1, a1, 4
+
+ not t2, t0
+ srl t2, t2, 24
+
+ beqz t2, 21f
+ nop
+ beqz t0, 3f
+
+ CONVERT_1x0565_TO_1x8888 t1, s0, t8, t9
+ MIPS_UN8x4_MUL_UN8 s0, t2, t3, t4, t5, t6, t7
+
+ addu_s.qb t0, t3, t0
+21:
+ CONVERT_1x8888_TO_1x0565 t0, s0, t8, t9
+ sh s0, 0(a0)
+
+3:
+ RESTORE_REGS_FROM_STACK 8, s0, s1, s2, s3, s4, s5
+ j ra
+ nop
+
+END(pixman_composite_over_8888_0565_asm_mips)
+
LEAF_MIPS_DSPR2(pixman_composite_over_n_0565_asm_mips)
/*
* a0 - dst (r5g6b5)
@@ -2349,101 +3095,265 @@ END(pixman_composite_over_reverse_n_8888_asm_mips)
LEAF_MIPS_DSPR2(pixman_composite_in_n_8_asm_mips)
/*
* a0 - dst (a8)
- * a1 - src (a8r8g8b8)
+ * a1 - src (32bit constant)
* a2 - w
*/
- beqz a2, 5f
+ li t9, 0x00ff00ff
+ beqz a2, 3f
nop
-
- SAVE_REGS_ON_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7
- move t7, a1
- srl t5, t7, 24
- replv.ph t5, t5
- srl t9, a2, 2 /* t1 = how many multiples of 4 src pixels */
- beqz t9, 2f /* branch if less than 4 src pixels */
+ srl t7, a2, 2 /* t7 = how many multiples of 4 dst pixels */
+ beqz t7, 1f /* branch if less than 4 src pixels */
nop
-1:
- addiu t9, t9, -1
- addiu a2, a2, -4
+ srl t8, a1, 24
+ replv.ph t8, t8
+
+0:
+ beqz t7, 1f
+ addiu t7, t7, -1
lbu t0, 0(a0)
lbu t1, 1(a0)
lbu t2, 2(a0)
lbu t3, 3(a0)
- muleu_s.ph.qbl s0, t0, t5
- muleu_s.ph.qbr s1, t0, t5
- muleu_s.ph.qbl s2, t1, t5
- muleu_s.ph.qbr s3, t1, t5
- muleu_s.ph.qbl s4, t2, t5
- muleu_s.ph.qbr s5, t2, t5
- muleu_s.ph.qbl s6, t3, t5
- muleu_s.ph.qbr s7, t3, t5
-
- shrl.ph t4, s0, 8
- shrl.ph t6, s1, 8
- shrl.ph t7, s2, 8
- shrl.ph t8, s3, 8
- addq.ph t0, s0, t4
- addq.ph t1, s1, t6
- addq.ph t2, s2, t7
- addq.ph t3, s3, t8
- shra_r.ph t0, t0, 8
- shra_r.ph t1, t1, 8
+ precr_sra.ph.w t1, t0, 0
+ precr_sra.ph.w t3, t2, 0
+ precr.qb.ph t0, t3, t1
+
+ muleu_s.ph.qbl t2, t0, t8
+ muleu_s.ph.qbr t3, t0, t8
+ shra_r.ph t4, t2, 8
+ shra_r.ph t5, t3, 8
+ and t4, t4, t9
+ and t5, t5, t9
+ addq.ph t2, t2, t4
+ addq.ph t3, t3, t5
shra_r.ph t2, t2, 8
shra_r.ph t3, t3, 8
- shrl.ph t4, s4, 8
- shrl.ph t6, s5, 8
- shrl.ph t7, s6, 8
- shrl.ph t8, s7, 8
- addq.ph s0, s4, t4
- addq.ph s1, s5, t6
- addq.ph s2, s6, t7
- addq.ph s3, s7, t8
- shra_r.ph t4, s0, 8
- shra_r.ph t6, s1, 8
- shra_r.ph t7, s2, 8
- shra_r.ph t8, s3, 8
-
- precr.qb.ph s0, t0, t1
- precr.qb.ph s1, t2, t3
- precr.qb.ph s2, t4, t6
- precr.qb.ph s3, t7, t8
+ precr.qb.ph t2, t2, t3
- sb s0, 0(a0)
- sb s1, 1(a0)
- sb s2, 2(a0)
- sb s3, 3(a0)
- bgtz t9, 1b
+ sb t2, 0(a0)
+ srl t2, t2, 8
+ sb t2, 1(a0)
+ srl t2, t2, 8
+ sb t2, 2(a0)
+ srl t2, t2, 8
+ sb t2, 3(a0)
+ addiu a2, a2, -4
+ b 0b
addiu a0, a0, 4
-2:
- beqz a2, 4f
+
+1:
+ beqz a2, 3f
nop
-3:
- lbu t1, 0(a0)
+ srl t8, a1, 24
+2:
+ lbu t0, 0(a0)
+
+ mul t2, t0, t8
+ shra_r.ph t3, t2, 8
+ andi t3, t3, 0x00ff
+ addq.ph t2, t2, t3
+ shra_r.ph t2, t2, 8
- muleu_s.ph.qbl t4, t1, t5
- muleu_s.ph.qbr t7, t1, t5
- shrl.ph t6, t4, 8
- shrl.ph t0, t7, 8
- addq.ph t8, t4, t6
- addq.ph t9, t7, t0
- shra_r.ph t8, t8, 8
- shra_r.ph t9, t9, 8
- precr.qb.ph t2, t8, t9
sb t2, 0(a0)
addiu a2, a2, -1
- bnez a2, 3b
+ bnez a2, 2b
addiu a0, a0, 1
-4:
- RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7
-5:
+
+3:
j ra
nop
END(pixman_composite_in_n_8_asm_mips)
+LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_8888_8888_OVER_asm_mips)
+/*
+ * a0 - dst (a8r8g8b8)
+ * a1 - src (a8r8g8b8)
+ * a2 - w
+ * a3 - vx
+ * 16(sp) - unit_x
+ */
+
+ SAVE_REGS_ON_STACK 0, s0, s1, s2, s3
+ lw t8, 16(sp) /* t8 = unit_x */
+ li t6, 0x00ff00ff
+ beqz a2, 3f
+ nop
+ addiu t1, a2, -1
+ beqz t1, 2f
+ nop
+1:
+ sra t0, a3, 16 /* t0 = vx >> 16 */
+ sll t0, t0, 2 /* t0 = t0 * 4 (a8r8g8b8) */
+ addu t0, a1, t0
+ lw t0, 0(t0) /* t0 = source (a8r8g8b8) */
+ addu a3, a3, t8 /* a3 = vx + unit_x */
+
+ sra t1, a3, 16 /* t0 = vx >> 16 */
+ sll t1, t1, 2 /* t0 = t0 * 4 (a8r8g8b8) */
+ addu t1, a1, t1
+ lw t1, 0(t1) /* t1 = source (a8r8g8b8) */
+ addu a3, a3, t8 /* a3 = vx + unit_x */
+
+ lw t2, 0(a0) /* t2 = destination (a8r8g8b8) */
+ lw t3, 4(a0) /* t3 = destination (a8r8g8b8) */
+
+ OVER_2x8888_2x8888 t0, t1, t2, t3, t4, t5, t6, t7, t9, s0, s1, s2, s3
+
+ sw t4, 0(a0)
+ sw t5, 4(a0)
+ addiu a2, a2, -2
+ addiu t1, a2, -1
+ bgtz t1, 1b
+ addiu a0, a0, 8
+2:
+ beqz a2, 3f
+ nop
+ sra t0, a3, 16 /* t0 = vx >> 16 */
+ sll t0, t0, 2 /* t0 = t0 * 4 (a8r8g8b8) */
+ addu t0, a1, t0
+ lw t0, 0(t0) /* t0 = source (a8r8g8b8) */
+ lw t1, 0(a0) /* t1 = destination (a8r8g8b8) */
+ addu a3, a3, t8 /* a3 = vx + unit_x */
+
+ OVER_8888_8888 t0, t1, t2, t6, t4, t5, t3, t7
+
+ sw t2, 0(a0)
+3:
+ RESTORE_REGS_FROM_STACK 0, s0, s1, s2, s3
+ j ra
+ nop
+
+END(pixman_scaled_nearest_scanline_8888_8888_OVER_asm_mips)
+
+LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_8888_0565_OVER_asm_mips)
+/*
+ * a0 - dst (r5g6b5)
+ * a1 - src (a8r8g8b8)
+ * a2 - w
+ * a3 - vx
+ * 16(sp) - unit_x
+ */
+
+ SAVE_REGS_ON_STACK 24, s0, s1, s2, s3, s4, v0, v1
+ lw t8, 40(sp) /* t8 = unit_x */
+ li t4, 0x00ff00ff
+ li t5, 0xf800f800
+ li t6, 0x07e007e0
+ li t7, 0x001F001F
+ beqz a2, 3f
+ nop
+ addiu t1, a2, -1
+ beqz t1, 2f
+ nop
+1:
+ sra t0, a3, 16 /* t0 = vx >> 16 */
+ sll t0, t0, 2 /* t0 = t0 * 4 (a8r8g8b8) */
+ addu t0, a1, t0
+ lw t0, 0(t0) /* t0 = source (a8r8g8b8) */
+ addu a3, a3, t8 /* a3 = vx + unit_x */
+ sra t1, a3, 16 /* t0 = vx >> 16 */
+ sll t1, t1, 2 /* t0 = t0 * 4 (a8r8g8b8) */
+ addu t1, a1, t1
+ lw t1, 0(t1) /* t1 = source (a8r8g8b8) */
+ addu a3, a3, t8 /* a3 = vx + unit_x */
+ lhu t2, 0(a0) /* t2 = destination (r5g6b5) */
+ lhu t3, 2(a0) /* t3 = destination (r5g6b5) */
+
+ CONVERT_2x0565_TO_2x8888 t2, t3, v0, v1, t6, t7, s0, s1, s2, s3
+ OVER_2x8888_2x8888 t0, t1, v0, v1, t2, t3, t4, t9, s0, s1, s2, s3, s4
+ CONVERT_2x8888_TO_2x0565 t2, t3, v0, v1, t5, t6, t7, t9, s2
+
+ sh v0, 0(a0)
+ sh v1, 2(a0)
+ addiu a2, a2, -2
+ addiu t1, a2, -1
+ bgtz t1, 1b
+ addiu a0, a0, 4
+2:
+ beqz a2, 3f
+ nop
+ sra t0, a3, 16 /* t0 = vx >> 16 */
+ sll t0, t0, 2 /* t0 = t0 * 4 (a8r8g8b8) */
+ addu t0, a1, t0
+ lw t0, 0(t0) /* t0 = source (a8r8g8b8) */
+ lhu t1, 0(a0) /* t1 = destination (r5g6b5) */
+ addu a3, a3, t8 /* a3 = vx + unit_x */
+
+ CONVERT_1x0565_TO_1x8888 t1, t2, t5, t6
+ OVER_8888_8888 t0, t2, t1, t4, t3, t5, t6, t7
+ CONVERT_1x8888_TO_1x0565 t1, t2, t5, t6
+
+ sh t2, 0(a0)
+3:
+ RESTORE_REGS_FROM_STACK 24, s0, s1, s2, s3, s4, v0, v1
+ j ra
+ nop
+
+END(pixman_scaled_nearest_scanline_8888_0565_OVER_asm_mips)
+
+LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_0565_8888_SRC_asm_mips)
+/*
+ * a0 - dst (a8r8g8b8)
+ * a1 - src (r5g6b5)
+ * a2 - w
+ * a3 - vx
+ * 16(sp) - unit_x
+ */
+
+ SAVE_REGS_ON_STACK 0, v0
+ beqz a2, 3f
+ nop
+
+ lw v0, 16(sp) /* v0 = unit_x */
+ addiu t1, a2, -1
+ beqz t1, 2f
+ nop
+
+ li t4, 0x07e007e0
+ li t5, 0x001F001F
+1:
+ sra t0, a3, 16 /* t0 = vx >> 16 */
+ sll t0, t0, 1 /* t0 = t0 * 2 ((r5g6b5)) */
+ addu t0, a1, t0
+ lhu t0, 0(t0) /* t0 = source ((r5g6b5)) */
+ addu a3, a3, v0 /* a3 = vx + unit_x */
+ sra t1, a3, 16 /* t1 = vx >> 16 */
+ sll t1, t1, 1 /* t1 = t1 * 2 ((r5g6b5)) */
+ addu t1, a1, t1
+ lhu t1, 0(t1) /* t1 = source ((r5g6b5)) */
+ addu a3, a3, v0 /* a3 = vx + unit_x */
+ addiu a2, a2, -2
+
+ CONVERT_2x0565_TO_2x8888 t0, t1, t2, t3, t4, t5, t6, t7, t8, t9
+
+ sw t2, 0(a0)
+ sw t3, 4(a0)
+
+ addiu t2, a2, -1
+ bgtz t2, 1b
+ addiu a0, a0, 8
+2:
+ beqz a2, 3f
+ nop
+ sra t0, a3, 16 /* t0 = vx >> 16 */
+ sll t0, t0, 1 /* t0 = t0 * 2 ((r5g6b5)) */
+ addu t0, a1, t0
+ lhu t0, 0(t0) /* t0 = source ((r5g6b5)) */
+
+ CONVERT_1x0565_TO_1x8888 t0, t1, t2, t3
+
+ sw t1, 0(a0)
+3:
+ RESTORE_REGS_FROM_STACK 0, v0
+ j ra
+ nop
+
+END(pixman_scaled_nearest_scanline_0565_8888_SRC_asm_mips)
+
LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_8888_8_0565_OVER_asm_mips)
/*
* a0 - dst (r5g6b5)
diff --git a/gfx/cairo/libpixman/src/pixman-mips-dspr2-asm.h b/gfx/cairo/libpixman/src/pixman-mips-dspr2-asm.h
index b330c0f0d9..e238566196 100644
--- a/gfx/cairo/libpixman/src/pixman-mips-dspr2-asm.h
+++ b/gfx/cairo/libpixman/src/pixman-mips-dspr2-asm.h
@@ -26,7 +26,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * Author: Nemanja Lukic (nlukic@mips.com)
+ * Author: Nemanja Lukic (nemanja.lukic@rt-rk.com)
*/
#ifndef PIXMAN_MIPS_DSPR2_ASM_H
@@ -72,6 +72,7 @@
#define LEAF_MIPS32R2(symbol) \
.globl symbol; \
.align 2; \
+ .hidden symbol; \
.type symbol, @function; \
.ent symbol, 0; \
symbol: .frame sp, 0, ra; \
@@ -354,17 +355,16 @@ LEAF_MIPS32R2(symbol) \
out1_565, out2_565, \
maskR, maskG, maskB, \
scratch1, scratch2
- precrq.ph.w \scratch1, \in2_8888, \in1_8888
- precr_sra.ph.w \in2_8888, \in1_8888, 0
- shll.ph \scratch1, \scratch1, 8
- srl \in2_8888, \in2_8888, 3
- and \scratch2, \in2_8888, \maskB
- and \scratch1, \scratch1, \maskR
- srl \in2_8888, \in2_8888, 2
- and \out2_565, \in2_8888, \maskG
- or \out2_565, \out2_565, \scratch2
- or \out1_565, \out2_565, \scratch1
- srl \out2_565, \out1_565, 16
+ precr.qb.ph \scratch1, \in2_8888, \in1_8888
+ precrq.qb.ph \in2_8888, \in2_8888, \in1_8888
+ and \out1_565, \scratch1, \maskR
+ shrl.ph \scratch1, \scratch1, 3
+ shll.ph \in2_8888, \in2_8888, 3
+ and \scratch1, \scratch1, \maskB
+ or \out1_565, \out1_565, \scratch1
+ and \in2_8888, \in2_8888, \maskG
+ or \out1_565, \out1_565, \in2_8888
+ srl \out2_565, \out1_565, 16
.endm
/*
@@ -587,6 +587,36 @@ LEAF_MIPS32R2(symbol) \
addu_s.qb \out_8888, \out_8888, \s_8888
.endm
+/*
+ * OVER operation on two a8r8g8b8 source pixels (s1_8888 and s2_8888) and two
+ * a8r8g8b8 destination pixels (d1_8888 and d2_8888). It also requires maskLSR
+ * needed for rounding process. maskLSR must have following value:
+ * li maskLSR, 0x00ff00ff
+ */
+.macro OVER_2x8888_2x8888 s1_8888, \
+ s2_8888, \
+ d1_8888, \
+ d2_8888, \
+ out1_8888, \
+ out2_8888, \
+ maskLSR, \
+ scratch1, scratch2, scratch3, \
+ scratch4, scratch5, scratch6
+ not \scratch1, \s1_8888
+ srl \scratch1, \scratch1, 24
+ not \scratch2, \s2_8888
+ srl \scratch2, \scratch2, 24
+ MIPS_2xUN8x4_MUL_2xUN8 \d1_8888, \d2_8888, \
+ \scratch1, \scratch2, \
+ \out1_8888, \out2_8888, \
+ \maskLSR, \
+ \scratch3, \scratch4, \scratch5, \
+ \scratch6, \d1_8888, \d2_8888
+
+ addu_s.qb \out1_8888, \out1_8888, \s1_8888
+ addu_s.qb \out2_8888, \out2_8888, \s2_8888
+.endm
+
.macro MIPS_UN8x4_MUL_UN8_ADD_UN8x4 s_8888, \
m_8, \
d_8888, \
diff --git a/gfx/cairo/libpixman/src/pixman-mips-dspr2.c b/gfx/cairo/libpixman/src/pixman-mips-dspr2.c
index e14e1c43b9..87969ae704 100644
--- a/gfx/cairo/libpixman/src/pixman-mips-dspr2.c
+++ b/gfx/cairo/libpixman/src/pixman-mips-dspr2.c
@@ -26,7 +26,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * Author: Nemanja Lukic (nlukic@mips.com)
+ * Author: Nemanja Lukic (nemanja.lukic@rt-rk.com)
*/
#ifdef HAVE_CONFIG_H
@@ -48,8 +48,20 @@ PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (DO_FAST_MEMCPY, src_8888_8888,
uint32_t, 1, uint32_t, 1)
PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (DO_FAST_MEMCPY, src_0888_0888,
uint8_t, 3, uint8_t, 3)
+#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL)
+PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, src_0888_8888_rev,
+ uint8_t, 3, uint32_t, 1)
+PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, src_0888_0565_rev,
+ uint8_t, 3, uint16_t, 1)
+#endif
+PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, src_pixbuf_8888,
+ uint32_t, 1, uint32_t, 1)
+PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, src_rpixbuf_8888,
+ uint32_t, 1, uint32_t, 1)
PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, over_8888_8888,
uint32_t, 1, uint32_t, 1)
+PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, over_8888_0565,
+ uint32_t, 1, uint16_t, 1)
PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, add_8_8,
uint8_t, 1, uint8_t, 1)
PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, add_8888_8888,
@@ -67,6 +79,8 @@ PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8888_8888_ca,
uint32_t, 1, uint32_t, 1)
PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8888_0565_ca,
uint32_t, 1, uint16_t, 1)
+PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8_8,
+ uint8_t, 1, uint8_t, 1)
PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8_8888,
uint8_t, 1, uint32_t, 1)
PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8_0565,
@@ -111,6 +125,13 @@ PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (over_0565_8_0565, uint16_t, 1,
PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (over_8888_8888_8888, uint32_t, 1,
uint32_t, 1, uint32_t, 1)
+PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_DST (8888_8888, OVER,
+ uint32_t, uint32_t)
+PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_DST (8888_0565, OVER,
+ uint32_t, uint16_t)
+PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_DST (0565_8888, SRC,
+ uint16_t, uint32_t)
+
PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (0, 8888_8888, SRC,
uint32_t, uint32_t)
PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (0, 8888_0565, SRC,
@@ -278,6 +299,14 @@ static const pixman_fast_path_t mips_dspr2_fast_paths[] =
PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, mips_composite_src_x888_8888),
PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, mips_composite_src_x888_8888),
PIXMAN_STD_FAST_PATH (SRC, r8g8b8, null, r8g8b8, mips_composite_src_0888_0888),
+#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL)
+ PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, x8r8g8b8, mips_composite_src_0888_8888_rev),
+ PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, r5g6b5, mips_composite_src_0888_0565_rev),
+#endif
+ PIXMAN_STD_FAST_PATH (SRC, pixbuf, pixbuf, a8r8g8b8, mips_composite_src_pixbuf_8888),
+ PIXMAN_STD_FAST_PATH (SRC, pixbuf, pixbuf, a8b8g8r8, mips_composite_src_rpixbuf_8888),
+ PIXMAN_STD_FAST_PATH (SRC, rpixbuf, rpixbuf, a8r8g8b8, mips_composite_src_rpixbuf_8888),
+ PIXMAN_STD_FAST_PATH (SRC, rpixbuf, rpixbuf, a8b8g8r8, mips_composite_src_pixbuf_8888),
PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8r8g8b8, mips_composite_src_n_8_8888),
PIXMAN_STD_FAST_PATH (SRC, solid, a8, x8r8g8b8, mips_composite_src_n_8_8888),
PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8b8g8r8, mips_composite_src_n_8_8888),
@@ -290,6 +319,7 @@ static const pixman_fast_path_t mips_dspr2_fast_paths[] =
PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, mips_composite_over_n_8888_8888_ca),
PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5, mips_composite_over_n_8888_0565_ca),
PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5, mips_composite_over_n_8888_0565_ca),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8, mips_composite_over_n_8_8),
PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, mips_composite_over_n_8_8888),
PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, mips_composite_over_n_8_8888),
PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, mips_composite_over_n_8_8888),
@@ -318,6 +348,8 @@ static const pixman_fast_path_t mips_dspr2_fast_paths[] =
PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, mips_composite_over_8888_8888),
PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, mips_composite_over_8888_8888),
PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, mips_composite_over_8888_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, r5g6b5, mips_composite_over_8888_0565),
+ PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, mips_composite_over_8888_0565),
PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, mips_composite_add_n_8_8),
PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8r8g8b8, mips_composite_add_n_8_8888),
PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8b8g8r8, mips_composite_add_n_8_8888),
@@ -340,11 +372,27 @@ static const pixman_fast_path_t mips_dspr2_fast_paths[] =
PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, mips_composite_over_reverse_n_8888),
PIXMAN_STD_FAST_PATH (IN, solid, null, a8, mips_composite_in_n_8),
- PIXMAN_MIPS_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8r8g8b8, r5g6b5, mips_8888_8_0565),
- PIXMAN_MIPS_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8b8g8r8, b5g6r5, mips_8888_8_0565),
+ PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, mips_8888_8888),
+ PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, mips_8888_8888),
+ PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, mips_8888_8888),
+ PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, mips_8888_8888),
+
+ PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, r5g6b5, mips_8888_0565),
+ PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, b5g6r5, mips_8888_0565),
+
+ PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (SRC, b5g6r5, x8b8g8r8, mips_0565_8888),
+ PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, x8r8g8b8, mips_0565_8888),
+ /* Note: NONE repeat is not supported yet */
+ SIMPLE_NEAREST_FAST_PATH_COVER (SRC, r5g6b5, a8r8g8b8, mips_0565_8888),
+ SIMPLE_NEAREST_FAST_PATH_COVER (SRC, b5g6r5, a8b8g8r8, mips_0565_8888),
+ SIMPLE_NEAREST_FAST_PATH_PAD (SRC, r5g6b5, a8r8g8b8, mips_0565_8888),
+ SIMPLE_NEAREST_FAST_PATH_PAD (SRC, b5g6r5, a8b8g8r8, mips_0565_8888),
+
+ SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8r8g8b8, r5g6b5, mips_8888_8_0565),
+ SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8b8g8r8, b5g6r5, mips_8888_8_0565),
- PIXMAN_MIPS_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, r5g6b5, r5g6b5, mips_0565_8_0565),
- PIXMAN_MIPS_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, b5g6r5, b5g6r5, mips_0565_8_0565),
+ SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, r5g6b5, r5g6b5, mips_0565_8_0565),
+ SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, b5g6r5, b5g6r5, mips_0565_8_0565),
SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, mips_8888_8888),
SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, mips_8888_8888),
diff --git a/gfx/cairo/libpixman/src/pixman-mips-dspr2.h b/gfx/cairo/libpixman/src/pixman-mips-dspr2.h
index 4ac9ff95df..57b38359e3 100644
--- a/gfx/cairo/libpixman/src/pixman-mips-dspr2.h
+++ b/gfx/cairo/libpixman/src/pixman-mips-dspr2.h
@@ -26,7 +26,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * Author: Nemanja Lukic (nlukic@mips.com)
+ * Author: Nemanja Lukic (nemanja.lukic@rt-rk.com)
*/
#ifndef PIXMAN_MIPS_DSPR2_H
@@ -246,6 +246,48 @@ mips_composite_##name (pixman_implementation_t *imp, \
} \
}
+/****************************************************************************/
+
+#define PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_DST(name, op, \
+ src_type, dst_type) \
+void \
+pixman_scaled_nearest_scanline_##name##_##op##_asm_mips ( \
+ dst_type * dst, \
+ const src_type * src, \
+ int32_t w, \
+ pixman_fixed_t vx, \
+ pixman_fixed_t unit_x); \
+ \
+static force_inline void \
+scaled_nearest_scanline_mips_##name##_##op (dst_type * pd, \
+ const src_type * ps, \
+ int32_t w, \
+ pixman_fixed_t vx, \
+ pixman_fixed_t unit_x, \
+ pixman_fixed_t max_vx, \
+ pixman_bool_t zero_src) \
+{ \
+ pixman_scaled_nearest_scanline_##name##_##op##_asm_mips (pd, ps, w, \
+ vx, unit_x); \
+} \
+ \
+FAST_NEAREST_MAINLOOP (mips_##name##_cover_##op, \
+ scaled_nearest_scanline_mips_##name##_##op, \
+ src_type, dst_type, COVER) \
+FAST_NEAREST_MAINLOOP (mips_##name##_none_##op, \
+ scaled_nearest_scanline_mips_##name##_##op, \
+ src_type, dst_type, NONE) \
+FAST_NEAREST_MAINLOOP (mips_##name##_pad_##op, \
+ scaled_nearest_scanline_mips_##name##_##op, \
+ src_type, dst_type, PAD)
+
+/* Provide entries for the fast path table */
+#define PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH(op,s,d,func) \
+ SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func), \
+ SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func), \
+ SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func)
+
+
/*****************************************************************************/
#define PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_A8_DST(flags, name, op, \
@@ -286,12 +328,6 @@ FAST_NEAREST_MAINLOOP_COMMON (mips_##name##_pad_##op, \
scaled_nearest_scanline_mips_##name##_##op, \
src_type, uint8_t, dst_type, PAD, TRUE, FALSE)
-/* Provide entries for the fast path table */
-#define PIXMAN_MIPS_SIMPLE_NEAREST_A8_MASK_FAST_PATH(op,s,d,func) \
- SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER (op,s,d,func), \
- SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE (op,s,d,func), \
- SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD (op,s,d,func)
-
/****************************************************************************/
#define PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST(flags, name, op, \
diff --git a/gfx/cairo/libpixman/src/pixman-mmx.c b/gfx/cairo/libpixman/src/pixman-mmx.c
index ca2ac83d90..d7cf2659df 100644
--- a/gfx/cairo/libpixman/src/pixman-mmx.c
+++ b/gfx/cairo/libpixman/src/pixman-mmx.c
@@ -44,8 +44,6 @@
#include "pixman-combine32.h"
#include "pixman-inlines.h"
-#define no_vERBOSE
-
#ifdef VERBOSE
#define CHECKPOINT() error_f ("at %s %d\n", __FUNCTION__, __LINE__)
#else
@@ -91,21 +89,7 @@ _mm_mulhi_pu16 (__m64 __A, __m64 __B)
return __A;
}
-# ifdef __OPTIMIZE__
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_shuffle_pi16 (__m64 __A, int8_t const __N)
-{
- __m64 ret;
-
- asm ("pshufw %2, %1, %0\n\t"
- : "=y" (ret)
- : "y" (__A), "K" (__N)
- );
-
- return ret;
-}
-# else
-# define _mm_shuffle_pi16(A, N) \
+# define _mm_shuffle_pi16(A, N) \
({ \
__m64 ret; \
\
@@ -116,7 +100,6 @@ _mm_shuffle_pi16 (__m64 __A, int8_t const __N)
\
ret; \
})
-# endif
# endif
#endif
@@ -303,6 +286,29 @@ negate (__m64 mask)
return _mm_xor_si64 (mask, MC (4x00ff));
}
+/* Computes the product of two unsigned fixed-point 8-bit values from 0 to 1
+ * and maps its result to the same range.
+ *
+ * Jim Blinn gives multiple ways to compute this in "Jim Blinn's Corner:
+ * Notation, Notation, Notation", the first of which is
+ *
+ * prod(a, b) = (a * b + 128) / 255.
+ *
+ * By approximating the division by 255 as 257/65536 it can be replaced by a
+ * multiply and a right shift. This is the implementation that we use in
+ * pix_multiply(), but we _mm_mulhi_pu16() by 257 (part of SSE1 or Extended
+ * 3DNow!, and unavailable at the time of the book's publication) to perform
+ * the multiplication and right shift in a single operation.
+ *
+ * prod(a, b) = ((a * b + 128) * 257) >> 16.
+ *
+ * A third way (how pix_multiply() was implemented prior to 14208344) exists
+ * also that performs the multiplication by 257 with adds and shifts.
+ *
+ * Where temp = a * b + 128
+ *
+ * prod(a, b) = (temp + (temp >> 8)) >> 8.
+ */
static force_inline __m64
pix_multiply (__m64 a, __m64 b)
{
@@ -381,8 +387,10 @@ in_over (__m64 src, __m64 srca, __m64 mask, __m64 dest)
static force_inline __m64 ldq_u(__m64 *p)
{
#ifdef USE_X86_MMX
- /* x86's alignment restrictions are very relaxed. */
- return *(__m64 *)p;
+ /* x86's alignment restrictions are very relaxed, but that's no excuse */
+ __m64 r;
+ memcpy(&r, p, sizeof(__m64));
+ return r;
#elif defined USE_ARM_IWMMXT
int align = (uintptr_t)p & 7;
__m64 *aligned_p;
@@ -401,7 +409,9 @@ static force_inline uint32_t ldl_u(const uint32_t *p)
{
#ifdef USE_X86_MMX
/* x86's alignment restrictions are very relaxed. */
- return *p;
+ uint32_t r;
+ memcpy(&r, p, sizeof(uint32_t));
+ return r;
#else
struct __una_u32 { uint32_t x __attribute__((packed)); };
const struct __una_u32 *ptr = (const struct __una_u32 *) p;
@@ -3534,13 +3544,111 @@ mmx_composite_over_reverse_n_8888 (pixman_implementation_t *imp,
_mm_empty ();
}
+static force_inline void
+scaled_nearest_scanline_mmx_8888_8888_OVER (uint32_t* pd,
+ const uint32_t* ps,
+ int32_t w,
+ pixman_fixed_t vx,
+ pixman_fixed_t unit_x,
+ pixman_fixed_t src_width_fixed,
+ pixman_bool_t fully_transparent_src)
+{
+ if (fully_transparent_src)
+ return;
+
+ while (w)
+ {
+ __m64 d = load (pd);
+ __m64 s = load (ps + pixman_fixed_to_int (vx));
+ vx += unit_x;
+ while (vx >= 0)
+ vx -= src_width_fixed;
+
+ store8888 (pd, core_combine_over_u_pixel_mmx (s, d));
+ pd++;
+
+ w--;
+ }
+
+ _mm_empty ();
+}
+
+FAST_NEAREST_MAINLOOP (mmx_8888_8888_cover_OVER,
+ scaled_nearest_scanline_mmx_8888_8888_OVER,
+ uint32_t, uint32_t, COVER)
+FAST_NEAREST_MAINLOOP (mmx_8888_8888_none_OVER,
+ scaled_nearest_scanline_mmx_8888_8888_OVER,
+ uint32_t, uint32_t, NONE)
+FAST_NEAREST_MAINLOOP (mmx_8888_8888_pad_OVER,
+ scaled_nearest_scanline_mmx_8888_8888_OVER,
+ uint32_t, uint32_t, PAD)
+FAST_NEAREST_MAINLOOP (mmx_8888_8888_normal_OVER,
+ scaled_nearest_scanline_mmx_8888_8888_OVER,
+ uint32_t, uint32_t, NORMAL)
+
+static force_inline void
+scaled_nearest_scanline_mmx_8888_n_8888_OVER (const uint32_t * mask,
+ uint32_t * dst,
+ const uint32_t * src,
+ int32_t w,
+ pixman_fixed_t vx,
+ pixman_fixed_t unit_x,
+ pixman_fixed_t src_width_fixed,
+ pixman_bool_t zero_src)
+{
+ __m64 mm_mask;
+
+ if (zero_src || (*mask >> 24) == 0)
+ {
+ /* A workaround for https://gcc.gnu.org/PR47759 */
+ _mm_empty ();
+ return;
+ }
+
+ mm_mask = expand_alpha (load8888 (mask));
+
+ while (w)
+ {
+ uint32_t s = *(src + pixman_fixed_to_int (vx));
+ vx += unit_x;
+ while (vx >= 0)
+ vx -= src_width_fixed;
+
+ if (s)
+ {
+ __m64 ms = load8888 (&s);
+ __m64 alpha = expand_alpha (ms);
+ __m64 dest = load8888 (dst);
+
+ store8888 (dst, (in_over (ms, alpha, mm_mask, dest)));
+ }
+
+ dst++;
+ w--;
+ }
+
+ _mm_empty ();
+}
+
+FAST_NEAREST_MAINLOOP_COMMON (mmx_8888_n_8888_cover_OVER,
+ scaled_nearest_scanline_mmx_8888_n_8888_OVER,
+ uint32_t, uint32_t, uint32_t, COVER, TRUE, TRUE)
+FAST_NEAREST_MAINLOOP_COMMON (mmx_8888_n_8888_pad_OVER,
+ scaled_nearest_scanline_mmx_8888_n_8888_OVER,
+ uint32_t, uint32_t, uint32_t, PAD, TRUE, TRUE)
+FAST_NEAREST_MAINLOOP_COMMON (mmx_8888_n_8888_none_OVER,
+ scaled_nearest_scanline_mmx_8888_n_8888_OVER,
+ uint32_t, uint32_t, uint32_t, NONE, TRUE, TRUE)
+FAST_NEAREST_MAINLOOP_COMMON (mmx_8888_n_8888_normal_OVER,
+ scaled_nearest_scanline_mmx_8888_n_8888_OVER,
+ uint32_t, uint32_t, uint32_t, NORMAL, TRUE, TRUE)
+
#define BSHIFT ((1 << BILINEAR_INTERPOLATION_BITS))
#define BMSK (BSHIFT - 1)
#define BILINEAR_DECLARE_VARIABLES \
const __m64 mm_wt = _mm_set_pi16 (wt, wt, wt, wt); \
const __m64 mm_wb = _mm_set_pi16 (wb, wb, wb, wb); \
- const __m64 mm_BSHIFT = _mm_set_pi16 (BSHIFT, BSHIFT, BSHIFT, BSHIFT); \
const __m64 mm_addc7 = _mm_set_pi16 (0, 1, 0, 1); \
const __m64 mm_xorc7 = _mm_set_pi16 (0, BMSK, 0, BMSK); \
const __m64 mm_ux = _mm_set_pi16 (unit_x, unit_x, unit_x, unit_x); \
@@ -3559,36 +3667,16 @@ do { \
__m64 b_lo = _mm_mullo_pi16 (_mm_unpacklo_pi8 (b, mm_zero), mm_wb); \
__m64 hi = _mm_add_pi16 (t_hi, b_hi); \
__m64 lo = _mm_add_pi16 (t_lo, b_lo); \
- vx += unit_x; \
- if (BILINEAR_INTERPOLATION_BITS < 8) \
- { \
- /* calculate horizontal weights */ \
- __m64 mm_wh = _mm_add_pi16 (mm_addc7, _mm_xor_si64 (mm_xorc7, \
+ /* calculate horizontal weights */ \
+ __m64 mm_wh = _mm_add_pi16 (mm_addc7, _mm_xor_si64 (mm_xorc7, \
_mm_srli_pi16 (mm_x, \
16 - BILINEAR_INTERPOLATION_BITS))); \
- /* horizontal interpolation */ \
- __m64 p = _mm_unpacklo_pi16 (lo, hi); \
- __m64 q = _mm_unpackhi_pi16 (lo, hi); \
- lo = _mm_madd_pi16 (p, mm_wh); \
- hi = _mm_madd_pi16 (q, mm_wh); \
- } \
- else \
- { \
- /* calculate horizontal weights */ \
- __m64 mm_wh_lo = _mm_sub_pi16 (mm_BSHIFT, _mm_srli_pi16 (mm_x, \
- 16 - BILINEAR_INTERPOLATION_BITS)); \
- __m64 mm_wh_hi = _mm_srli_pi16 (mm_x, \
- 16 - BILINEAR_INTERPOLATION_BITS); \
- /* horizontal interpolation */ \
- __m64 mm_lo_lo = _mm_mullo_pi16 (lo, mm_wh_lo); \
- __m64 mm_lo_hi = _mm_mullo_pi16 (hi, mm_wh_hi); \
- __m64 mm_hi_lo = _mm_mulhi_pu16 (lo, mm_wh_lo); \
- __m64 mm_hi_hi = _mm_mulhi_pu16 (hi, mm_wh_hi); \
- lo = _mm_add_pi32 (_mm_unpacklo_pi16 (mm_lo_lo, mm_hi_lo), \
- _mm_unpacklo_pi16 (mm_lo_hi, mm_hi_hi)); \
- hi = _mm_add_pi32 (_mm_unpackhi_pi16 (mm_lo_lo, mm_hi_lo), \
- _mm_unpackhi_pi16 (mm_lo_hi, mm_hi_hi)); \
- } \
+ /* horizontal interpolation */ \
+ __m64 p = _mm_unpacklo_pi16 (lo, hi); \
+ __m64 q = _mm_unpackhi_pi16 (lo, hi); \
+ vx += unit_x; \
+ lo = _mm_madd_pi16 (p, mm_wh); \
+ hi = _mm_madd_pi16 (q, mm_wh); \
mm_x = _mm_add_pi16 (mm_x, mm_ux); \
/* shift and pack the result */ \
hi = _mm_srli_pi32 (hi, BILINEAR_INTERPOLATION_BITS * 2); \
@@ -3866,7 +3954,7 @@ mmx_fetch_a8 (pixman_iter_t *iter, const uint32_t *mask)
while (w && (((uintptr_t)dst) & 15))
{
- *dst++ = *(src++) << 24;
+ *dst++ = (uint32_t)*(src++) << 24;
w--;
}
@@ -3893,7 +3981,7 @@ mmx_fetch_a8 (pixman_iter_t *iter, const uint32_t *mask)
while (w)
{
- *dst++ = *(src++) << 24;
+ *dst++ = (uint32_t)*(src++) << 24;
w--;
}
@@ -3901,52 +3989,23 @@ mmx_fetch_a8 (pixman_iter_t *iter, const uint32_t *mask)
return iter->buffer;
}
-typedef struct
-{
- pixman_format_code_t format;
- pixman_iter_get_scanline_t get_scanline;
-} fetcher_info_t;
-
-static const fetcher_info_t fetchers[] =
-{
- { PIXMAN_x8r8g8b8, mmx_fetch_x8r8g8b8 },
- { PIXMAN_r5g6b5, mmx_fetch_r5g6b5 },
- { PIXMAN_a8, mmx_fetch_a8 },
- { PIXMAN_null }
-};
-
-static pixman_bool_t
-mmx_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
-{
- pixman_image_t *image = iter->image;
-
-#define FLAGS \
+#define IMAGE_FLAGS \
(FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | \
FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST)
- if ((iter->iter_flags & ITER_NARROW) &&
- (iter->image_flags & FLAGS) == FLAGS)
- {
- const fetcher_info_t *f;
-
- for (f = &fetchers[0]; f->format != PIXMAN_null; f++)
- {
- if (image->common.extended_format_code == f->format)
- {
- uint8_t *b = (uint8_t *)image->bits.bits;
- int s = image->bits.rowstride * 4;
-
- iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (f->format) / 8;
- iter->stride = s;
-
- iter->get_scanline = f->get_scanline;
- return TRUE;
- }
- }
- }
-
- return FALSE;
-}
+static const pixman_iter_info_t mmx_iters[] =
+{
+ { PIXMAN_x8r8g8b8, IMAGE_FLAGS, ITER_NARROW,
+ _pixman_iter_init_bits_stride, mmx_fetch_x8r8g8b8, NULL
+ },
+ { PIXMAN_r5g6b5, IMAGE_FLAGS, ITER_NARROW,
+ _pixman_iter_init_bits_stride, mmx_fetch_r5g6b5, NULL
+ },
+ { PIXMAN_a8, IMAGE_FLAGS, ITER_NARROW,
+ _pixman_iter_init_bits_stride, mmx_fetch_a8, NULL
+ },
+ { PIXMAN_null },
+};
static const pixman_fast_path_t mmx_fast_paths[] =
{
@@ -4024,6 +4083,16 @@ static const pixman_fast_path_t mmx_fast_paths[] =
PIXMAN_STD_FAST_PATH (IN, a8, null, a8, mmx_composite_in_8_8 ),
PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, mmx_composite_in_n_8_8 ),
+ SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, mmx_8888_8888 ),
+ SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, mmx_8888_8888 ),
+ SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, mmx_8888_8888 ),
+ SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, mmx_8888_8888 ),
+
+ SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, mmx_8888_n_8888 ),
+ SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, mmx_8888_n_8888 ),
+ SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, mmx_8888_n_8888 ),
+ SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, mmx_8888_n_8888 ),
+
SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, mmx_8888_8888 ),
SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, mmx_8888_8888 ),
SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, mmx_8888_8888 ),
@@ -4076,7 +4145,7 @@ _pixman_implementation_create_mmx (pixman_implementation_t *fallback)
imp->blt = mmx_blt;
imp->fill = mmx_fill;
- imp->src_iter_init = mmx_src_iter_init;
+ imp->iter_info = mmx_iters;
return imp;
}
diff --git a/gfx/cairo/libpixman/src/pixman-noop.c b/gfx/cairo/libpixman/src/pixman-noop.c
index e39996d9df..e59890492f 100644
--- a/gfx/cairo/libpixman/src/pixman-noop.c
+++ b/gfx/cairo/libpixman/src/pixman-noop.c
@@ -37,12 +37,6 @@ noop_composite (pixman_implementation_t *imp,
return;
}
-static void
-dest_write_back_direct (pixman_iter_t *iter)
-{
- iter->buffer += iter->image->bits.rowstride;
-}
-
static uint32_t *
noop_get_scanline (pixman_iter_t *iter, const uint32_t *mask)
{
@@ -53,110 +47,102 @@ noop_get_scanline (pixman_iter_t *iter, const uint32_t *mask)
return result;
}
-static uint32_t *
-get_scanline_null (pixman_iter_t *iter, const uint32_t *mask)
-{
- return NULL;
+static void
+noop_init_solid_narrow (pixman_iter_t *iter,
+ const pixman_iter_info_t *info)
+{
+ pixman_image_t *image = iter->image;
+ uint32_t *buffer = iter->buffer;
+ uint32_t *end = buffer + iter->width;
+ uint32_t color;
+
+ if (iter->image->type == SOLID)
+ color = image->solid.color_32;
+ else
+ color = image->bits.fetch_pixel_32 (&image->bits, 0, 0);
+
+ while (buffer < end)
+ *(buffer++) = color;
}
-static pixman_bool_t
-noop_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
+static void
+noop_init_solid_wide (pixman_iter_t *iter,
+ const pixman_iter_info_t *info)
{
pixman_image_t *image = iter->image;
+ argb_t *buffer = (argb_t *)iter->buffer;
+ argb_t *end = buffer + iter->width;
+ argb_t color;
-#define FLAGS \
- (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM)
-
- if (!image)
- {
- iter->get_scanline = get_scanline_null;
- }
- else if ((iter->iter_flags & (ITER_IGNORE_ALPHA | ITER_IGNORE_RGB)) ==
- (ITER_IGNORE_ALPHA | ITER_IGNORE_RGB))
- {
- iter->get_scanline = _pixman_iter_get_scanline_noop;
- }
- else if (image->common.extended_format_code == PIXMAN_solid &&
- (iter->image->type == SOLID ||
- (iter->image_flags & FAST_PATH_NO_ALPHA_MAP)))
- {
- if (iter->iter_flags & ITER_NARROW)
- {
- uint32_t *buffer = iter->buffer;
- uint32_t *end = buffer + iter->width;
- uint32_t color;
-
- if (image->type == SOLID)
- color = image->solid.color_32;
- else
- color = image->bits.fetch_pixel_32 (&image->bits, 0, 0);
-
- while (buffer < end)
- *(buffer++) = color;
- }
- else
- {
- argb_t *buffer = (argb_t *)iter->buffer;
- argb_t *end = buffer + iter->width;
- argb_t color;
-
- if (image->type == SOLID)
- color = image->solid.color_float;
- else
- color = image->bits.fetch_pixel_float (&image->bits, 0, 0);
-
- while (buffer < end)
- *(buffer++) = color;
- }
-
- iter->get_scanline = _pixman_iter_get_scanline_noop;
- }
- else if (image->common.extended_format_code == PIXMAN_a8r8g8b8 &&
- (iter->iter_flags & ITER_NARROW) &&
- (iter->image_flags & FLAGS) == FLAGS &&
- iter->x >= 0 && iter->y >= 0 &&
- iter->x + iter->width <= image->bits.width &&
- iter->y + iter->height <= image->bits.height)
- {
- iter->buffer =
- image->bits.bits + iter->y * image->bits.rowstride + iter->x;
-
- iter->get_scanline = noop_get_scanline;
- }
+ if (iter->image->type == SOLID)
+ color = image->solid.color_float;
else
- {
- return FALSE;
- }
+ color = image->bits.fetch_pixel_float (&image->bits, 0, 0);
- return TRUE;
+ while (buffer < end)
+ *(buffer++) = color;
}
-static pixman_bool_t
-noop_dest_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
+static void
+noop_init_direct_buffer (pixman_iter_t *iter, const pixman_iter_info_t *info)
{
pixman_image_t *image = iter->image;
- uint32_t image_flags = iter->image_flags;
- uint32_t iter_flags = iter->iter_flags;
-
- if ((image_flags & FAST_PATH_STD_DEST_FLAGS) == FAST_PATH_STD_DEST_FLAGS &&
- (iter_flags & ITER_NARROW) == ITER_NARROW &&
- ((image->common.extended_format_code == PIXMAN_a8r8g8b8) ||
- (image->common.extended_format_code == PIXMAN_x8r8g8b8 &&
- (iter_flags & (ITER_LOCALIZED_ALPHA)))))
- {
- iter->buffer = image->bits.bits + iter->y * image->bits.rowstride + iter->x;
-
- iter->get_scanline = _pixman_iter_get_scanline_noop;
- iter->write_back = dest_write_back_direct;
-
- return TRUE;
- }
- else
- {
- return FALSE;
- }
+
+ iter->buffer =
+ image->bits.bits + iter->y * image->bits.rowstride + iter->x;
}
+static void
+dest_write_back_direct (pixman_iter_t *iter)
+{
+ iter->buffer += iter->image->bits.rowstride;
+}
+
+static const pixman_iter_info_t noop_iters[] =
+{
+ /* Source iters */
+ { PIXMAN_any,
+ 0, ITER_IGNORE_ALPHA | ITER_IGNORE_RGB | ITER_SRC,
+ NULL,
+ _pixman_iter_get_scanline_noop,
+ NULL
+ },
+ { PIXMAN_solid,
+ FAST_PATH_NO_ALPHA_MAP, ITER_NARROW | ITER_SRC,
+ noop_init_solid_narrow,
+ _pixman_iter_get_scanline_noop,
+ NULL,
+ },
+ { PIXMAN_solid,
+ FAST_PATH_NO_ALPHA_MAP, ITER_WIDE | ITER_SRC,
+ noop_init_solid_wide,
+ _pixman_iter_get_scanline_noop,
+ NULL
+ },
+ { PIXMAN_a8r8g8b8,
+ FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM |
+ FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST,
+ ITER_NARROW | ITER_SRC,
+ noop_init_direct_buffer,
+ noop_get_scanline,
+ NULL
+ },
+ /* Dest iters */
+ { PIXMAN_a8r8g8b8,
+ FAST_PATH_STD_DEST_FLAGS, ITER_NARROW | ITER_DEST,
+ noop_init_direct_buffer,
+ _pixman_iter_get_scanline_noop,
+ dest_write_back_direct
+ },
+ { PIXMAN_x8r8g8b8,
+ FAST_PATH_STD_DEST_FLAGS, ITER_NARROW | ITER_DEST | ITER_LOCALIZED_ALPHA,
+ noop_init_direct_buffer,
+ _pixman_iter_get_scanline_noop,
+ dest_write_back_direct
+ },
+ { PIXMAN_null },
+};
+
static const pixman_fast_path_t noop_fast_paths[] =
{
{ PIXMAN_OP_DST, PIXMAN_any, 0, PIXMAN_any, 0, PIXMAN_any, 0, noop_composite },
@@ -169,8 +155,7 @@ _pixman_implementation_create_noop (pixman_implementation_t *fallback)
pixman_implementation_t *imp =
_pixman_implementation_create (fallback, noop_fast_paths);
- imp->src_iter_init = noop_src_iter_init;
- imp->dest_iter_init = noop_dest_iter_init;
+ imp->iter_info = noop_iters;
return imp;
}
diff --git a/gfx/cairo/libpixman/src/pixman-private.h b/gfx/cairo/libpixman/src/pixman-private.h
index 2313e65583..a0459dac19 100644
--- a/gfx/cairo/libpixman/src/pixman-private.h
+++ b/gfx/cairo/libpixman/src/pixman-private.h
@@ -9,7 +9,13 @@
#ifndef MOZILLA_VERSION
#error "Need mozilla headers"
#endif
+#ifdef MOZ_GFX_OPTIMIZE_MOBILE
+#define LOW_QUALITY_INTERPOLATION
+#define LOWER_QUALITY_INTERPOLATION
+#define BILINEAR_INTERPOLATION_BITS 4
+#else
#define BILINEAR_INTERPOLATION_BITS 7
+#endif
#define BILINEAR_INTERPOLATION_RANGE (1 << BILINEAR_INTERPOLATION_BITS)
/*
@@ -31,6 +37,7 @@
#include <stdio.h>
#include <string.h>
#include <stddef.h>
+#include <float.h>
#include "pixman-compiler.h"
@@ -58,7 +65,7 @@ struct argb_t
float b;
};
-typedef void (*fetch_scanline_t) (pixman_image_t *image,
+typedef void (*fetch_scanline_t) (bits_image_t *image,
int x,
int y,
int width,
@@ -182,7 +189,9 @@ struct bits_image
uint32_t * free_me;
int rowstride; /* in number of uint32_t's */
- fetch_scanline_t fetch_scanline_16;
+ pixman_dither_t dither;
+ uint32_t dither_offset_y;
+ uint32_t dither_offset_x;
fetch_scanline_t fetch_scanline_32;
fetch_pixel_32_t fetch_pixel_32;
@@ -192,8 +201,6 @@ struct bits_image
fetch_pixel_float_t fetch_pixel_float;
store_scanline_t store_scanline_float;
- store_scanline_t store_scanline_16;
-
/* Used for indirect access to the bits */
pixman_read_memory_func_t read_func;
pixman_write_memory_func_t write_func;
@@ -214,10 +221,12 @@ union pixman_image
typedef struct pixman_iter_t pixman_iter_t;
typedef uint32_t *(* pixman_iter_get_scanline_t) (pixman_iter_t *iter, const uint32_t *mask);
typedef void (* pixman_iter_write_back_t) (pixman_iter_t *iter);
+typedef void (* pixman_iter_fini_t) (pixman_iter_t *iter);
typedef enum
{
- ITER_NARROW = (1 << 0),
+ ITER_NARROW = (1 << 0),
+ ITER_WIDE = (1 << 1),
/* "Localized alpha" is when the alpha channel is used only to compute
* the alpha value of the destination. This means that the computation
@@ -234,16 +243,15 @@ typedef enum
* we can treat it as if it were ARGB, which means in some cases we can
* avoid copying it to a temporary buffer.
*/
- ITER_LOCALIZED_ALPHA = (1 << 1),
- ITER_IGNORE_ALPHA = (1 << 2),
- ITER_IGNORE_RGB = (1 << 3),
-
- /* With the addition of ITER_16 we now have two flags that to represent
- * 3 pipelines. This means that there can be an invalid state when
- * both ITER_NARROW and ITER_16 are set. In this case
- * ITER_16 overrides NARROW and we should use the 16 bit pipeline.
- * Note: ITER_16 still has a 32 bit mask, which is a bit weird. */
- ITER_16 = (1 << 4)
+ ITER_LOCALIZED_ALPHA = (1 << 2),
+ ITER_IGNORE_ALPHA = (1 << 3),
+ ITER_IGNORE_RGB = (1 << 4),
+
+ /* These indicate whether the iterator is for a source
+ * or a destination image
+ */
+ ITER_SRC = (1 << 5),
+ ITER_DEST = (1 << 6)
} iter_flags_t;
struct pixman_iter_t
@@ -260,6 +268,7 @@ struct pixman_iter_t
/* These function pointers are initialized by the implementation */
pixman_iter_get_scanline_t get_scanline;
pixman_iter_write_back_t write_back;
+ pixman_iter_fini_t fini;
/* These fields are scratch data that implementations can use */
void * data;
@@ -267,6 +276,19 @@ struct pixman_iter_t
int stride;
};
+typedef struct pixman_iter_info_t pixman_iter_info_t;
+typedef void (* pixman_iter_initializer_t) (pixman_iter_t *iter,
+ const pixman_iter_info_t *info);
+struct pixman_iter_info_t
+{
+ pixman_format_code_t format;
+ uint32_t image_flags;
+ iter_flags_t iter_flags;
+ pixman_iter_initializer_t initializer;
+ pixman_iter_get_scanline_t get_scanline;
+ pixman_iter_write_back_t write_back;
+};
+
void
_pixman_bits_image_setup_accessors (bits_image_t *image);
@@ -331,13 +353,12 @@ _pixman_image_validate (pixman_image_t *image);
*/
typedef struct
{
- uint32_t left_ag;
- uint32_t left_rb;
- uint32_t right_ag;
- uint32_t right_rb;
- pixman_fixed_t left_x;
- pixman_fixed_t right_x;
- pixman_fixed_t stepper;
+ float a_s, a_b;
+ float r_s, r_b;
+ float g_s, g_b;
+ float b_s, b_b;
+ pixman_fixed_48_16_t left_x;
+ pixman_fixed_48_16_t right_x;
pixman_gradient_stop_t *stops;
int num_stops;
@@ -355,9 +376,38 @@ void
_pixman_gradient_walker_reset (pixman_gradient_walker_t *walker,
pixman_fixed_48_16_t pos);
-uint32_t
-_pixman_gradient_walker_pixel (pixman_gradient_walker_t *walker,
- pixman_fixed_48_16_t x);
+typedef void (*pixman_gradient_walker_write_t) (
+ pixman_gradient_walker_t *walker,
+ pixman_fixed_48_16_t x,
+ uint32_t *buffer);
+
+void
+_pixman_gradient_walker_write_narrow(pixman_gradient_walker_t *walker,
+ pixman_fixed_48_16_t x,
+ uint32_t *buffer);
+
+void
+_pixman_gradient_walker_write_wide(pixman_gradient_walker_t *walker,
+ pixman_fixed_48_16_t x,
+ uint32_t *buffer);
+
+typedef void (*pixman_gradient_walker_fill_t) (
+ pixman_gradient_walker_t *walker,
+ pixman_fixed_48_16_t x,
+ uint32_t *buffer,
+ uint32_t *end);
+
+void
+_pixman_gradient_walker_fill_narrow(pixman_gradient_walker_t *walker,
+ pixman_fixed_48_16_t x,
+ uint32_t *buffer,
+ uint32_t *end);
+
+void
+_pixman_gradient_walker_fill_wide(pixman_gradient_walker_t *walker,
+ pixman_fixed_48_16_t x,
+ uint32_t *buffer,
+ uint32_t *end);
/*
* Edges
@@ -467,10 +517,7 @@ typedef pixman_bool_t (*pixman_fill_func_t) (pixman_implementation_t *imp,
int width,
int height,
uint32_t filler);
-typedef pixman_bool_t (*pixman_iter_init_func_t) (pixman_implementation_t *imp,
- pixman_iter_t *iter);
-void _pixman_setup_combiner_functions_16 (pixman_implementation_t *imp);
void _pixman_setup_combiner_functions_32 (pixman_implementation_t *imp);
void _pixman_setup_combiner_functions_float (pixman_implementation_t *imp);
@@ -491,14 +538,11 @@ struct pixman_implementation_t
pixman_implementation_t * toplevel;
pixman_implementation_t * fallback;
const pixman_fast_path_t * fast_paths;
+ const pixman_iter_info_t * iter_info;
pixman_blt_func_t blt;
pixman_fill_func_t fill;
- pixman_iter_init_func_t src_iter_init;
- pixman_iter_init_func_t dest_iter_init;
- pixman_combine_32_func_t combine_16[PIXMAN_N_OPERATORS];
- pixman_combine_32_func_t combine_16_ca[PIXMAN_N_OPERATORS];
pixman_combine_32_func_t combine_32[PIXMAN_N_OPERATORS];
pixman_combine_32_func_t combine_32_ca[PIXMAN_N_OPERATORS];
pixman_combine_float_func_t combine_float[PIXMAN_N_OPERATORS];
@@ -530,8 +574,7 @@ pixman_combine_32_func_t
_pixman_implementation_lookup_combiner (pixman_implementation_t *imp,
pixman_op_t op,
pixman_bool_t component_alpha,
- pixman_bool_t wide,
- pixman_bool_t rgb16);
+ pixman_bool_t wide);
pixman_bool_t
_pixman_implementation_blt (pixman_implementation_t *imp,
@@ -559,29 +602,17 @@ _pixman_implementation_fill (pixman_implementation_t *imp,
int height,
uint32_t filler);
-pixman_bool_t
-_pixman_implementation_src_iter_init (pixman_implementation_t *imp,
- pixman_iter_t *iter,
- pixman_image_t *image,
- int x,
- int y,
- int width,
- int height,
- uint8_t *buffer,
- iter_flags_t flags,
- uint32_t image_flags);
-
-pixman_bool_t
-_pixman_implementation_dest_iter_init (pixman_implementation_t *imp,
- pixman_iter_t *iter,
- pixman_image_t *image,
- int x,
- int y,
- int width,
- int height,
- uint8_t *buffer,
- iter_flags_t flags,
- uint32_t image_flags);
+void
+_pixman_implementation_iter_init (pixman_implementation_t *imp,
+ pixman_iter_t *iter,
+ pixman_image_t *image,
+ int x,
+ int y,
+ int width,
+ int height,
+ uint8_t *buffer,
+ iter_flags_t flags,
+ uint32_t image_flags);
/* Specific implementations */
pixman_implementation_t *
@@ -603,6 +634,11 @@ pixman_implementation_t *
_pixman_implementation_create_sse2 (pixman_implementation_t *fallback);
#endif
+#ifdef USE_SSSE3
+pixman_implementation_t *
+_pixman_implementation_create_ssse3 (pixman_implementation_t *fallback);
+#endif
+
#ifdef USE_ARM_SIMD
pixman_implementation_t *
_pixman_implementation_create_arm_simd (pixman_implementation_t *fallback);
@@ -664,6 +700,9 @@ _pixman_compute_composite_region32 (pixman_region32_t * region,
uint32_t *
_pixman_iter_get_scanline_noop (pixman_iter_t *iter, const uint32_t *mask);
+void
+_pixman_iter_init_bits_stride (pixman_iter_t *iter, const pixman_iter_info_t *info);
+
/* These "formats" all have depth 0, so they
* will never clash with any real ones
*/
@@ -703,7 +742,6 @@ _pixman_iter_get_scanline_noop (pixman_iter_t *iter, const uint32_t *mask);
#define FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR (1 << 24)
#define FAST_PATH_BITS_IMAGE (1 << 25)
#define FAST_PATH_SEPARABLE_CONVOLUTION_FILTER (1 << 26)
-#define FAST_PATH_16_FORMAT (1 << 27)
#define FAST_PATH_PAD_REPEAT \
(FAST_PATH_NO_NONE_REPEAT | \
@@ -795,6 +833,9 @@ pixman_malloc_ab (unsigned int n, unsigned int b);
void *
pixman_malloc_abc (unsigned int a, unsigned int b, unsigned int c);
+void *
+pixman_malloc_ab_plus_c (unsigned int a, unsigned int b, unsigned int c);
+
pixman_bool_t
_pixman_multiply_overflows_size (size_t a, size_t b);
@@ -898,6 +939,8 @@ pixman_list_move_to_front (pixman_list_t *list, pixman_link_t *link)
#define CLIP(v, low, high) ((v) < (low) ? (low) : ((v) > (high) ? (high) : (v)))
+#define FLOAT_IS_ZERO(f) (-FLT_MIN < (f) && (f) < FLT_MIN)
+
/* Conversion between 8888 and 0565 */
static force_inline uint16_t
@@ -1033,15 +1076,13 @@ float pixman_unorm_to_float (uint16_t u, int n_bits);
#endif
-#ifdef DEBUG
-
void
_pixman_log_error (const char *function, const char *message);
#define return_if_fail(expr) \
do \
{ \
- if (!(expr)) \
+ if (unlikely (!(expr))) \
{ \
_pixman_log_error (FUNC, "The expression " # expr " was false"); \
return; \
@@ -1052,7 +1093,7 @@ _pixman_log_error (const char *function, const char *message);
#define return_val_if_fail(expr, retval) \
do \
{ \
- if (!(expr)) \
+ if (unlikely (!(expr))) \
{ \
_pixman_log_error (FUNC, "The expression " # expr " was false"); \
return (retval); \
@@ -1063,55 +1104,30 @@ _pixman_log_error (const char *function, const char *message);
#define critical_if_fail(expr) \
do \
{ \
- if (!(expr)) \
+ if (unlikely (!(expr))) \
_pixman_log_error (FUNC, "The expression " # expr " was false"); \
} \
while (0)
-
-#else
-
-#define _pixman_log_error(f,m) do { } while (0)
-
-#define return_if_fail(expr) \
- do \
- { \
- if (!(expr)) \
- return; \
- } \
- while (0)
-
-#define return_val_if_fail(expr, retval) \
- do \
- { \
- if (!(expr)) \
- return (retval); \
- } \
- while (0)
-
-#define critical_if_fail(expr) \
- do \
- { \
- } \
- while (0)
-#endif
-
/*
* Matrix
*/
typedef struct { pixman_fixed_48_16_t v[3]; } pixman_vector_48_16_t;
+PIXMAN_EXPORT
pixman_bool_t
pixman_transform_point_31_16 (const pixman_transform_t *t,
const pixman_vector_48_16_t *v,
pixman_vector_48_16_t *result);
+PIXMAN_EXPORT
void
pixman_transform_point_31_16_3d (const pixman_transform_t *t,
const pixman_vector_48_16_t *v,
pixman_vector_48_16_t *result);
+PIXMAN_EXPORT
void
pixman_transform_point_31_16_affine (const pixman_transform_t *t,
const pixman_vector_48_16_t *v,
diff --git a/gfx/cairo/libpixman/src/pixman-radial-gradient.c b/gfx/cairo/libpixman/src/pixman-radial-gradient.c
index 3d539b1c86..e8e99c98b9 100644
--- a/gfx/cairo/libpixman/src/pixman-radial-gradient.c
+++ b/gfx/cairo/libpixman/src/pixman-radial-gradient.c
@@ -34,8 +34,6 @@
#include <math.h>
#include "pixman-private.h"
-#include "pixman-dither.h"
-
static inline pixman_fixed_32_32_t
dot (pixman_fixed_48_16_t x1,
pixman_fixed_48_16_t y1,
@@ -68,15 +66,18 @@ fdot (double x1,
return x1 * x2 + y1 * y2 + z1 * z2;
}
-static uint32_t
-radial_compute_color (double a,
- double b,
- double c,
- double inva,
- double dr,
- double mindr,
- pixman_gradient_walker_t *walker,
- pixman_repeat_t repeat)
+static void
+radial_write_color (double a,
+ double b,
+ double c,
+ double inva,
+ double dr,
+ double mindr,
+ pixman_gradient_walker_t *walker,
+ pixman_repeat_t repeat,
+ int Bpp,
+ pixman_gradient_walker_write_t write_pixel,
+ uint32_t *buffer)
{
/*
* In this function error propagation can lead to bad results:
@@ -101,21 +102,31 @@ radial_compute_color (double a,
double t;
if (b == 0)
- return 0;
+ {
+ memset (buffer, 0, Bpp);
+ return;
+ }
t = pixman_fixed_1 / 2 * c / b;
if (repeat == PIXMAN_REPEAT_NONE)
{
if (0 <= t && t <= pixman_fixed_1)
- return _pixman_gradient_walker_pixel (walker, t);
+ {
+ write_pixel (walker, t, buffer);
+ return;
+ }
}
else
{
if (t * dr >= mindr)
- return _pixman_gradient_walker_pixel (walker, t);
+ {
+ write_pixel (walker, t, buffer);
+ return;
+ }
}
- return 0;
+ memset (buffer, 0, Bpp);
+ return;
}
discr = fdot (b, a, 0, b, -c, 0);
@@ -141,24 +152,40 @@ radial_compute_color (double a,
if (repeat == PIXMAN_REPEAT_NONE)
{
if (0 <= t0 && t0 <= pixman_fixed_1)
- return _pixman_gradient_walker_pixel (walker, t0);
+ {
+ write_pixel (walker, t0, buffer);
+ return;
+ }
else if (0 <= t1 && t1 <= pixman_fixed_1)
- return _pixman_gradient_walker_pixel (walker, t1);
+ {
+ write_pixel (walker, t1, buffer);
+ return;
+ }
}
else
{
if (t0 * dr >= mindr)
- return _pixman_gradient_walker_pixel (walker, t0);
+ {
+ write_pixel (walker, t0, buffer);
+ return;
+ }
else if (t1 * dr >= mindr)
- return _pixman_gradient_walker_pixel (walker, t1);
+ {
+ write_pixel (walker, t1, buffer);
+ return;
+ }
}
}
- return 0;
+ memset (buffer, 0, Bpp);
+ return;
}
static uint32_t *
-radial_get_scanline_narrow (pixman_iter_t *iter, const uint32_t *mask)
+radial_get_scanline (pixman_iter_t *iter,
+ const uint32_t *mask,
+ int Bpp,
+ pixman_gradient_walker_write_t write_pixel)
{
/*
* Implementation of radial gradients following the PDF specification.
@@ -249,7 +276,7 @@ radial_get_scanline_narrow (pixman_iter_t *iter, const uint32_t *mask)
gradient_t *gradient = (gradient_t *)image;
radial_gradient_t *radial = (radial_gradient_t *)image;
- uint32_t *end = buffer + width;
+ uint32_t *end = buffer + width * (Bpp / 4);
pixman_gradient_walker_t walker;
pixman_vector_t v, unit;
@@ -332,18 +359,21 @@ radial_get_scanline_narrow (pixman_iter_t *iter, const uint32_t *mask)
{
if (!mask || *mask++)
{
- *buffer = radial_compute_color (radial->a, b, c,
- radial->inva,
- radial->delta.radius,
- radial->mindr,
- &walker,
- image->common.repeat);
+ radial_write_color (radial->a, b, c,
+ radial->inva,
+ radial->delta.radius,
+ radial->mindr,
+ &walker,
+ image->common.repeat,
+ Bpp,
+ write_pixel,
+ buffer);
}
b += db;
c += dc;
dc += ddc;
- ++buffer;
+ buffer += (Bpp / 4);
}
}
else
@@ -377,20 +407,23 @@ radial_get_scanline_narrow (pixman_iter_t *iter, const uint32_t *mask)
pdx, pdy, radial->c1.radius);
/* / pixman_fixed_1 / pixman_fixed_1 */
- *buffer = radial_compute_color (radial->a, b, c,
- radial->inva,
- radial->delta.radius,
- radial->mindr,
- &walker,
- image->common.repeat);
+ radial_write_color (radial->a, b, c,
+ radial->inva,
+ radial->delta.radius,
+ radial->mindr,
+ &walker,
+ image->common.repeat,
+ Bpp,
+ write_pixel,
+ buffer);
}
else
{
- *buffer = 0;
+ memset (buffer, 0, Bpp);
}
}
- ++buffer;
+ buffer += (Bpp / 4);
v.vector[0] += unit.vector[0];
v.vector[1] += unit.vector[1];
@@ -403,286 +436,35 @@ radial_get_scanline_narrow (pixman_iter_t *iter, const uint32_t *mask)
}
static uint32_t *
-radial_get_scanline_16 (pixman_iter_t *iter, const uint32_t *mask)
+radial_get_scanline_narrow (pixman_iter_t *iter, const uint32_t *mask)
{
- /*
- * Implementation of radial gradients following the PDF specification.
- * See section 8.7.4.5.4 Type 3 (Radial) Shadings of the PDF Reference
- * Manual (PDF 32000-1:2008 at the time of this writing).
- *
- * In the radial gradient problem we are given two circles (c₁,r₁) and
- * (c₂,r₂) that define the gradient itself.
- *
- * Mathematically the gradient can be defined as the family of circles
- *
- * ((1-t)·c₁ + t·(c₂), (1-t)·r₁ + t·r₂)
- *
- * excluding those circles whose radius would be < 0. When a point
- * belongs to more than one circle, the one with a bigger t is the only
- * one that contributes to its color. When a point does not belong
- * to any of the circles, it is transparent black, i.e. RGBA (0, 0, 0, 0).
- * Further limitations on the range of values for t are imposed when
- * the gradient is not repeated, namely t must belong to [0,1].
- *
- * The graphical result is the same as drawing the valid (radius > 0)
- * circles with increasing t in [-inf, +inf] (or in [0,1] if the gradient
- * is not repeated) using SOURCE operator composition.
- *
- * It looks like a cone pointing towards the viewer if the ending circle
- * is smaller than the starting one, a cone pointing inside the page if
- * the starting circle is the smaller one and like a cylinder if they
- * have the same radius.
- *
- * What we actually do is, given the point whose color we are interested
- * in, compute the t values for that point, solving for t in:
- *
- * length((1-t)·c₁ + t·(c₂) - p) = (1-t)·r₁ + t·r₂
- *
- * Let's rewrite it in a simpler way, by defining some auxiliary
- * variables:
- *
- * cd = c₂ - c₁
- * pd = p - c₁
- * dr = r₂ - r₁
- * length(t·cd - pd) = r₁ + t·dr
- *
- * which actually means
- *
- * hypot(t·cdx - pdx, t·cdy - pdy) = r₁ + t·dr
- *
- * or
- *
- * ⎷((t·cdx - pdx)² + (t·cdy - pdy)²) = r₁ + t·dr.
- *
- * If we impose (as stated earlier) that r₁ + t·dr >= 0, it becomes:
- *
- * (t·cdx - pdx)² + (t·cdy - pdy)² = (r₁ + t·dr)²
- *
- * where we can actually expand the squares and solve for t:
- *
- * t²cdx² - 2t·cdx·pdx + pdx² + t²cdy² - 2t·cdy·pdy + pdy² =
- * = r₁² + 2·r₁·t·dr + t²·dr²
- *
- * (cdx² + cdy² - dr²)t² - 2(cdx·pdx + cdy·pdy + r₁·dr)t +
- * (pdx² + pdy² - r₁²) = 0
- *
- * A = cdx² + cdy² - dr²
- * B = pdx·cdx + pdy·cdy + r₁·dr
- * C = pdx² + pdy² - r₁²
- * At² - 2Bt + C = 0
- *
- * The solutions (unless the equation degenerates because of A = 0) are:
- *
- * t = (B ± ⎷(B² - A·C)) / A
- *
- * The solution we are going to prefer is the bigger one, unless the
- * radius associated to it is negative (or it falls outside the valid t
- * range).
- *
- * Additional observations (useful for optimizations):
- * A does not depend on p
- *
- * A < 0 <=> one of the two circles completely contains the other one
- * <=> for every p, the radiuses associated with the two t solutions
- * have opposite sign
- */
- pixman_image_t *image = iter->image;
- int x = iter->x;
- int y = iter->y;
- int width = iter->width;
- uint16_t *buffer = iter->buffer;
- pixman_bool_t toggle = ((x ^ y) & 1);
-
- gradient_t *gradient = (gradient_t *)image;
- radial_gradient_t *radial = (radial_gradient_t *)image;
- uint16_t *end = buffer + width;
- pixman_gradient_walker_t walker;
- pixman_vector_t v, unit;
-
- /* reference point is the center of the pixel */
- v.vector[0] = pixman_int_to_fixed (x) + pixman_fixed_1 / 2;
- v.vector[1] = pixman_int_to_fixed (y) + pixman_fixed_1 / 2;
- v.vector[2] = pixman_fixed_1;
-
- _pixman_gradient_walker_init (&walker, gradient, image->common.repeat);
-
- if (image->common.transform)
- {
- if (!pixman_transform_point_3d (image->common.transform, &v))
- return iter->buffer;
-
- unit.vector[0] = image->common.transform->matrix[0][0];
- unit.vector[1] = image->common.transform->matrix[1][0];
- unit.vector[2] = image->common.transform->matrix[2][0];
- }
- else
- {
- unit.vector[0] = pixman_fixed_1;
- unit.vector[1] = 0;
- unit.vector[2] = 0;
- }
-
- if (unit.vector[2] == 0 && v.vector[2] == pixman_fixed_1)
- {
- /*
- * Given:
- *
- * t = (B ± ⎷(B² - A·C)) / A
- *
- * where
- *
- * A = cdx² + cdy² - dr²
- * B = pdx·cdx + pdy·cdy + r₁·dr
- * C = pdx² + pdy² - r₁²
- * det = B² - A·C
- *
- * Since we have an affine transformation, we know that (pdx, pdy)
- * increase linearly with each pixel,
- *
- * pdx = pdx₀ + n·ux,
- * pdy = pdy₀ + n·uy,
- *
- * we can then express B, C and det through multiple differentiation.
- */
- pixman_fixed_32_32_t b, db, c, dc, ddc;
-
- /* warning: this computation may overflow */
- v.vector[0] -= radial->c1.x;
- v.vector[1] -= radial->c1.y;
-
- /*
- * B and C are computed and updated exactly.
- * If fdot was used instead of dot, in the worst case it would
- * lose 11 bits of precision in each of the multiplication and
- * summing up would zero out all the bit that were preserved,
- * thus making the result 0 instead of the correct one.
- * This would mean a worst case of unbound relative error or
- * about 2^10 absolute error
- */
- b = dot (v.vector[0], v.vector[1], radial->c1.radius,
- radial->delta.x, radial->delta.y, radial->delta.radius);
- db = dot (unit.vector[0], unit.vector[1], 0,
- radial->delta.x, radial->delta.y, 0);
-
- c = dot (v.vector[0], v.vector[1],
- -((pixman_fixed_48_16_t) radial->c1.radius),
- v.vector[0], v.vector[1], radial->c1.radius);
- dc = dot (2 * (pixman_fixed_48_16_t) v.vector[0] + unit.vector[0],
- 2 * (pixman_fixed_48_16_t) v.vector[1] + unit.vector[1],
- 0,
- unit.vector[0], unit.vector[1], 0);
- ddc = 2 * dot (unit.vector[0], unit.vector[1], 0,
- unit.vector[0], unit.vector[1], 0);
-
- while (buffer < end)
- {
- if (!mask || *mask++)
- {
- *buffer = dither_8888_to_0565(
- radial_compute_color (radial->a, b, c,
- radial->inva,
- radial->delta.radius,
- radial->mindr,
- &walker,
- image->common.repeat),
- toggle);
- }
-
- toggle ^= 1;
- b += db;
- c += dc;
- dc += ddc;
- ++buffer;
- }
- }
- else
- {
- /* projective */
- /* Warning:
- * error propagation guarantees are much looser than in the affine case
- */
- while (buffer < end)
- {
- if (!mask || *mask++)
- {
- if (v.vector[2] != 0)
- {
- double pdx, pdy, invv2, b, c;
-
- invv2 = 1. * pixman_fixed_1 / v.vector[2];
-
- pdx = v.vector[0] * invv2 - radial->c1.x;
- /* / pixman_fixed_1 */
-
- pdy = v.vector[1] * invv2 - radial->c1.y;
- /* / pixman_fixed_1 */
-
- b = fdot (pdx, pdy, radial->c1.radius,
- radial->delta.x, radial->delta.y,
- radial->delta.radius);
- /* / pixman_fixed_1 / pixman_fixed_1 */
-
- c = fdot (pdx, pdy, -radial->c1.radius,
- pdx, pdy, radial->c1.radius);
- /* / pixman_fixed_1 / pixman_fixed_1 */
-
- *buffer = dither_8888_to_0565 (
- radial_compute_color (radial->a, b, c,
- radial->inva,
- radial->delta.radius,
- radial->mindr,
- &walker,
- image->common.repeat),
- toggle);
- }
- else
- {
- *buffer = 0;
- }
- }
-
- ++buffer;
- toggle ^= 1;
-
- v.vector[0] += unit.vector[0];
- v.vector[1] += unit.vector[1];
- v.vector[2] += unit.vector[2];
- }
- }
-
- iter->y++;
- return iter->buffer;
+ return radial_get_scanline (iter, mask, 4,
+ _pixman_gradient_walker_write_narrow);
}
+
static uint32_t *
radial_get_scanline_wide (pixman_iter_t *iter, const uint32_t *mask)
{
- uint32_t *buffer = radial_get_scanline_narrow (iter, NULL);
-
- pixman_expand_to_float (
- (argb_t *)buffer, buffer, PIXMAN_a8r8g8b8, iter->width);
-
- return buffer;
+ return radial_get_scanline (iter, NULL, 16,
+ _pixman_gradient_walker_write_wide);
}
void
_pixman_radial_gradient_iter_init (pixman_image_t *image, pixman_iter_t *iter)
{
- if (iter->iter_flags & ITER_16)
- iter->get_scanline = radial_get_scanline_16;
- else if (iter->iter_flags & ITER_NARROW)
+ if (iter->iter_flags & ITER_NARROW)
iter->get_scanline = radial_get_scanline_narrow;
else
iter->get_scanline = radial_get_scanline_wide;
}
-
PIXMAN_EXPORT pixman_image_t *
pixman_image_create_radial_gradient (const pixman_point_fixed_t * inner,
- const pixman_point_fixed_t * outer,
- pixman_fixed_t inner_radius,
- pixman_fixed_t outer_radius,
- const pixman_gradient_stop_t *stops,
- int n_stops)
+ const pixman_point_fixed_t * outer,
+ pixman_fixed_t inner_radius,
+ pixman_fixed_t outer_radius,
+ const pixman_gradient_stop_t *stops,
+ int n_stops)
{
pixman_image_t *image;
radial_gradient_t *radial;
diff --git a/gfx/cairo/libpixman/src/pixman-region.c b/gfx/cairo/libpixman/src/pixman-region.c
index 7f2e29b15b..59bc9c7971 100644
--- a/gfx/cairo/libpixman/src/pixman-region.c
+++ b/gfx/cairo/libpixman/src/pixman-region.c
@@ -298,13 +298,6 @@ PREFIX (_equal) (region_type_t *reg1, region_type_t *reg2)
box_type_t *rects1;
box_type_t *rects2;
- /*
- * If the region is empty the extents are undefined so we need to check
- * for empty before comparing the extents.
- */
- if (PIXREGION_NIL (reg1) && PIXREGION_NIL(reg2))
- return TRUE;
-
if (reg1->extents.x1 != reg2->extents.x1)
return FALSE;
@@ -1341,15 +1334,6 @@ PREFIX(_intersect_rect) (region_type_t *dest,
region.extents.x2 = x + width;
region.extents.y2 = y + height;
- if (!GOOD_RECT (&region.extents))
- {
- if (BAD_RECT (&region.extents))
- _pixman_log_error (FUNC, "Invalid rectangle passed");
- FREE_DATA (dest);
- PREFIX (_init) (dest);
- return TRUE;
- }
-
return PREFIX(_intersect) (dest, source, &region);
}
@@ -1874,7 +1858,7 @@ pixman_region_subtract_o (region_type_t * region,
else if (r2->x1 <= x1)
{
/*
- * Subtrahend preceeds minuend: nuke left edge of minuend.
+ * Subtrahend precedes minuend: nuke left edge of minuend.
*/
x1 = r2->x2;
if (x1 >= r1->x2)
@@ -1998,7 +1982,7 @@ PREFIX (_subtract) (region_type_t *reg_d,
}
/* Add those rectangles in region 1 that aren't in region 2,
- do yucky substraction for overlaps, and
+ do yucky subtraction for overlaps, and
just throw away rectangles in region 2 that aren't in region 1 */
if (!pixman_op (reg_d, reg_m, reg_s, pixman_region_subtract_o, TRUE, FALSE))
return FALSE;
@@ -2058,7 +2042,7 @@ PREFIX (_inverse) (region_type_t *new_reg, /* Destination region */
}
/* Add those rectangles in region 1 that aren't in region 2,
- * do yucky substraction for overlaps, and
+ * do yucky subtraction for overlaps, and
* just throw away rectangles in region 2 that aren't in region 1
*/
inv_reg.extents = *inv_rect;
diff --git a/gfx/cairo/libpixman/src/pixman-solid-fill.c b/gfx/cairo/libpixman/src/pixman-solid-fill.c
index 5f9fef6306..4694ebc700 100644
--- a/gfx/cairo/libpixman/src/pixman-solid-fill.c
+++ b/gfx/cairo/libpixman/src/pixman-solid-fill.c
@@ -30,10 +30,10 @@ static uint32_t
color_to_uint32 (const pixman_color_t *color)
{
return
- (color->alpha >> 8 << 24) |
- (color->red >> 8 << 16) |
- (color->green & 0xff00) |
- (color->blue >> 8);
+ ((unsigned int) color->alpha >> 8 << 24) |
+ ((unsigned int) color->red >> 8 << 16) |
+ ((unsigned int) color->green & 0xff00) |
+ ((unsigned int) color->blue >> 8);
}
static argb_t
diff --git a/gfx/cairo/libpixman/src/pixman-sse2.c b/gfx/cairo/libpixman/src/pixman-sse2.c
index e4e668d389..ce4e75f247 100644
--- a/gfx/cairo/libpixman/src/pixman-sse2.c
+++ b/gfx/cairo/libpixman/src/pixman-sse2.c
@@ -30,6 +30,9 @@
#include <config.h>
#endif
+/* PSHUFD is slow on a lot of old processors, and new processors have SSSE3 */
+#define PSHUFD_IS_FAST 0
+
#include <xmmintrin.h> /* for _mm_shuffle_pi16 and _MM_SHUFFLE */
#include <emmintrin.h> /* for SSE2 intrinsics */
#include "pixman-private.h"
@@ -515,7 +518,8 @@ core_combine_over_u_pixel_sse2 (uint32_t src, uint32_t dst)
static force_inline uint32_t
combine1 (const uint32_t *ps, const uint32_t *pm)
{
- uint32_t s = *ps;
+ uint32_t s;
+ memcpy(&s, ps, sizeof(uint32_t));
if (pm)
{
@@ -3198,7 +3202,7 @@ sse2_composite_over_n_8_8888 (pixman_implementation_t *imp,
uint8_t *mask_line, *mask;
int dst_stride, mask_stride;
int32_t w;
- uint32_t m, d;
+ uint32_t d;
__m128i xmm_src, xmm_alpha, xmm_def;
__m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
@@ -3253,7 +3257,8 @@ sse2_composite_over_n_8_8888 (pixman_implementation_t *imp,
while (w >= 4)
{
- m = *((uint32_t*)mask);
+ uint32_t m;
+ memcpy(&m, mask, sizeof(uint32_t));
if (srca == 0xff && m == 0xffffffff)
{
@@ -3330,8 +3335,8 @@ sse2_fill (pixman_implementation_t *imp,
if (bpp == 8)
{
- uint8_t b;
- uint16_t w;
+ uint32_t b;
+ uint32_t w;
stride = stride * (int) sizeof (uint32_t) / 1;
byte_line = (uint8_t *)(((uint8_t *)bits) + stride * y + x);
@@ -3473,7 +3478,6 @@ sse2_composite_src_n_8_8888 (pixman_implementation_t *imp,
uint8_t *mask_line, *mask;
int dst_stride, mask_stride;
int32_t w;
- uint32_t m;
__m128i xmm_src, xmm_def;
__m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
@@ -3525,7 +3529,8 @@ sse2_composite_src_n_8_8888 (pixman_implementation_t *imp,
while (w >= 4)
{
- m = *((uint32_t*)mask);
+ uint32_t m;
+ memcpy(&m, mask, sizeof(uint32_t));
if (srca == 0xff && m == 0xffffffff)
{
@@ -3591,7 +3596,6 @@ sse2_composite_over_n_8_0565 (pixman_implementation_t *imp,
uint8_t *mask_line, *mask;
int dst_stride, mask_stride;
int32_t w;
- uint32_t m;
__m128i mmx_src, mmx_alpha, mmx_mask, mmx_dest;
__m128i xmm_src, xmm_alpha;
@@ -3623,7 +3627,7 @@ sse2_composite_over_n_8_0565 (pixman_implementation_t *imp,
while (w && (uintptr_t)dst & 15)
{
- m = *mask++;
+ uint8_t m = *mask++;
if (m)
{
@@ -3643,11 +3647,13 @@ sse2_composite_over_n_8_0565 (pixman_implementation_t *imp,
while (w >= 8)
{
+ uint32_t m;
+
xmm_dst = load_128_aligned ((__m128i*) dst);
unpack_565_128_4x128 (xmm_dst,
&xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3);
- m = *((uint32_t*)mask);
+ memcpy(&m, mask, sizeof(uint32_t));
mask += 4;
if (m)
@@ -3667,7 +3673,7 @@ sse2_composite_over_n_8_0565 (pixman_implementation_t *imp,
&xmm_dst0, &xmm_dst1);
}
- m = *((uint32_t*)mask);
+ memcpy(&m, mask, sizeof(uint32_t));
mask += 4;
if (m)
@@ -3696,7 +3702,7 @@ sse2_composite_over_n_8_0565 (pixman_implementation_t *imp,
while (w)
{
- m = *mask++;
+ uint8_t m = *mask++;
if (m)
{
@@ -4058,7 +4064,7 @@ sse2_composite_in_n_8_8 (pixman_implementation_t *imp,
uint8_t *dst_line, *dst;
uint8_t *mask_line, *mask;
int dst_stride, mask_stride;
- uint32_t d, m;
+ uint32_t d;
uint32_t src;
int32_t w;
@@ -4085,7 +4091,7 @@ sse2_composite_in_n_8_8 (pixman_implementation_t *imp,
while (w && ((uintptr_t)dst & 15))
{
- m = (uint32_t) *mask++;
+ uint8_t m = *mask++;
d = (uint32_t) *dst;
*dst++ = (uint8_t) pack_1x128_32 (
@@ -4122,7 +4128,7 @@ sse2_composite_in_n_8_8 (pixman_implementation_t *imp,
while (w)
{
- m = (uint32_t) *mask++;
+ uint8_t m = *mask++;
d = (uint32_t) *dst;
*dst++ = (uint8_t) pack_1x128_32 (
@@ -4299,7 +4305,7 @@ sse2_composite_add_n_8_8 (pixman_implementation_t *imp,
int dst_stride, mask_stride;
int32_t w;
uint32_t src;
- uint32_t m, d;
+ uint32_t d;
__m128i xmm_alpha;
__m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
@@ -4324,7 +4330,7 @@ sse2_composite_add_n_8_8 (pixman_implementation_t *imp,
while (w && ((uintptr_t)dst & 15))
{
- m = (uint32_t) *mask++;
+ uint8_t m = *mask++;
d = (uint32_t) *dst;
*dst++ = (uint8_t) pack_1x128_32 (
@@ -4360,7 +4366,7 @@ sse2_composite_add_n_8_8 (pixman_implementation_t *imp,
while (w)
{
- m = (uint32_t) *mask++;
+ uint8_t m = (uint32_t) *mask++;
d = (uint32_t) *dst;
*dst++ = (uint8_t) pack_1x128_32 (
@@ -4558,7 +4564,7 @@ sse2_composite_add_n_8888 (pixman_implementation_t *imp,
dst = dst_line;
dst_line += dst_stride;
- while (w && (unsigned long)dst & 15)
+ while (w && (uintptr_t)dst & 15)
{
d = *dst;
*dst++ =
@@ -4617,7 +4623,7 @@ sse2_composite_add_n_8_8888 (pixman_implementation_t *imp,
mask_line += mask_stride;
w = width;
- while (w && ((unsigned long)dst & 15))
+ while (w && ((uintptr_t)dst & 15))
{
uint8_t m = *mask++;
if (m)
@@ -4633,7 +4639,9 @@ sse2_composite_add_n_8_8888 (pixman_implementation_t *imp,
while (w >= 4)
{
- uint32_t m = *(uint32_t*)mask;
+ uint32_t m;
+ memcpy(&m, mask, sizeof(uint32_t));
+
if (m)
{
__m128i xmm_mask_lo, xmm_mask_hi;
@@ -4740,7 +4748,7 @@ sse2_blt (pixman_implementation_t *imp,
while (w >= 2 && ((uintptr_t)d & 3))
{
- *(uint16_t *)d = *(uint16_t *)s;
+ memmove(d, s, 2);
w -= 2;
s += 2;
d += 2;
@@ -4748,7 +4756,7 @@ sse2_blt (pixman_implementation_t *imp,
while (w >= 4 && ((uintptr_t)d & 15))
{
- *(uint32_t *)d = *(uint32_t *)s;
+ memmove(d, s, 4);
w -= 4;
s += 4;
@@ -4785,7 +4793,7 @@ sse2_blt (pixman_implementation_t *imp,
while (w >= 4)
{
- *(uint32_t *)d = *(uint32_t *)s;
+ memmove(d, s, 4);
w -= 4;
s += 4;
@@ -4794,7 +4802,7 @@ sse2_blt (pixman_implementation_t *imp,
if (w >= 2)
{
- *(uint16_t *)d = *(uint16_t *)s;
+ memmove(d, s, 2);
w -= 2;
s += 2;
d += 2;
@@ -4826,7 +4834,6 @@ sse2_composite_over_x888_8_8888 (pixman_implementation_t *imp,
uint32_t *src, *src_line, s;
uint32_t *dst, *dst_line, d;
uint8_t *mask, *mask_line;
- uint32_t m;
int src_stride, mask_stride, dst_stride;
int32_t w;
__m128i ms;
@@ -4855,8 +4862,8 @@ sse2_composite_over_x888_8_8888 (pixman_implementation_t *imp,
while (w && (uintptr_t)dst & 15)
{
+ uint8_t m = *mask++;
s = 0xff000000 | *src++;
- m = (uint32_t) *mask++;
d = *dst;
ms = unpack_32_1x128 (s);
@@ -4874,7 +4881,8 @@ sse2_composite_over_x888_8_8888 (pixman_implementation_t *imp,
while (w >= 4)
{
- m = *(uint32_t*) mask;
+ uint32_t m;
+ memcpy(&m, mask, sizeof(uint32_t));
xmm_src = _mm_or_si128 (
load_128_unaligned ((__m128i*)src), mask_ff000000);
@@ -4910,7 +4918,7 @@ sse2_composite_over_x888_8_8888 (pixman_implementation_t *imp,
while (w)
{
- m = (uint32_t) *mask++;
+ uint8_t m = *mask++;
if (m)
{
@@ -4951,7 +4959,6 @@ sse2_composite_over_8888_8_8888 (pixman_implementation_t *imp,
uint32_t *src, *src_line, s;
uint32_t *dst, *dst_line, d;
uint8_t *mask, *mask_line;
- uint32_t m;
int src_stride, mask_stride, dst_stride;
int32_t w;
@@ -4980,9 +4987,9 @@ sse2_composite_over_8888_8_8888 (pixman_implementation_t *imp,
while (w && (uintptr_t)dst & 15)
{
uint32_t sa;
+ uint8_t m = *mask++;
s = *src++;
- m = (uint32_t) *mask++;
d = *dst;
sa = s >> 24;
@@ -5013,7 +5020,8 @@ sse2_composite_over_8888_8_8888 (pixman_implementation_t *imp,
while (w >= 4)
{
- m = *(uint32_t *) mask;
+ uint32_t m;
+ memcpy(&m, mask, sizeof(uint32_t));
if (m)
{
@@ -5052,9 +5060,9 @@ sse2_composite_over_8888_8_8888 (pixman_implementation_t *imp,
while (w)
{
uint32_t sa;
+ uint8_t m = *mask++;
s = *src++;
- m = (uint32_t) *mask++;
d = *dst;
sa = s >> 24;
@@ -5554,69 +5562,134 @@ FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_normal_OVER,
scaled_nearest_scanline_sse2_8888_n_8888_OVER,
uint32_t, uint32_t, uint32_t, NORMAL, TRUE, TRUE)
-#define BMSK ((1 << BILINEAR_INTERPOLATION_BITS) - 1)
+#if PSHUFD_IS_FAST
+
+/***********************************************************************************/
-#define BILINEAR_DECLARE_VARIABLES \
+# define BILINEAR_DECLARE_VARIABLES \
const __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt, wt, wt); \
const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb, wb, wb, wb, wb); \
- const __m128i xmm_xorc8 = _mm_set_epi16 (0, 0, 0, 0, BMSK, BMSK, BMSK, BMSK);\
- const __m128i xmm_addc8 = _mm_set_epi16 (0, 0, 0, 0, 1, 1, 1, 1); \
- const __m128i xmm_xorc7 = _mm_set_epi16 (0, BMSK, 0, BMSK, 0, BMSK, 0, BMSK);\
- const __m128i xmm_addc7 = _mm_set_epi16 (0, 1, 0, 1, 0, 1, 0, 1); \
- const __m128i xmm_ux = _mm_set_epi16 (unit_x, unit_x, unit_x, unit_x, \
- unit_x, unit_x, unit_x, unit_x); \
+ const __m128i xmm_addc = _mm_set_epi16 (0, 1, 0, 1, 0, 1, 0, 1); \
+ const __m128i xmm_ux1 = _mm_set_epi16 (unit_x, -unit_x, unit_x, -unit_x, \
+ unit_x, -unit_x, unit_x, -unit_x); \
+ const __m128i xmm_ux4 = _mm_set_epi16 (unit_x * 4, -unit_x * 4, \
+ unit_x * 4, -unit_x * 4, \
+ unit_x * 4, -unit_x * 4, \
+ unit_x * 4, -unit_x * 4); \
const __m128i xmm_zero = _mm_setzero_si128 (); \
- __m128i xmm_x = _mm_set_epi16 (vx, vx, vx, vx, vx, vx, vx, vx)
+ __m128i xmm_x = _mm_set_epi16 (vx + unit_x * 3, -(vx + 1) - unit_x * 3, \
+ vx + unit_x * 2, -(vx + 1) - unit_x * 2, \
+ vx + unit_x * 1, -(vx + 1) - unit_x * 1, \
+ vx + unit_x * 0, -(vx + 1) - unit_x * 0); \
+ __m128i xmm_wh_state;
-#define BILINEAR_INTERPOLATE_ONE_PIXEL(pix) \
+#define BILINEAR_INTERPOLATE_ONE_PIXEL_HELPER(pix, phase_) \
do { \
- __m128i xmm_wh, xmm_lo, xmm_hi, a; \
+ int phase = phase_; \
+ __m128i xmm_wh, xmm_a, xmm_b; \
/* fetch 2x2 pixel block into sse2 registers */ \
- __m128i tltr = _mm_loadl_epi64 ( \
- (__m128i *)&src_top[pixman_fixed_to_int (vx)]); \
- __m128i blbr = _mm_loadl_epi64 ( \
- (__m128i *)&src_bottom[pixman_fixed_to_int (vx)]); \
+ __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]); \
+ __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom[vx >> 16]); \
vx += unit_x; \
/* vertical interpolation */ \
- a = _mm_add_epi16 (_mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), \
- xmm_wt), \
- _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), \
- xmm_wb)); \
- if (BILINEAR_INTERPOLATION_BITS < 8) \
- { \
- /* calculate horizontal weights */ \
- xmm_wh = _mm_add_epi16 (xmm_addc7, _mm_xor_si128 (xmm_xorc7, \
- _mm_srli_epi16 (xmm_x, 16 - BILINEAR_INTERPOLATION_BITS))); \
- xmm_x = _mm_add_epi16 (xmm_x, xmm_ux); \
- /* horizontal interpolation */ \
- a = _mm_madd_epi16 (_mm_unpackhi_epi16 (_mm_shuffle_epi32 ( \
- a, _MM_SHUFFLE (1, 0, 3, 2)), a), xmm_wh); \
- } \
- else \
+ xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); \
+ xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); \
+ xmm_a = _mm_add_epi16 (xmm_a, xmm_b); \
+ /* calculate horizontal weights */ \
+ if (phase <= 0) \
{ \
- /* calculate horizontal weights */ \
- xmm_wh = _mm_add_epi16 (xmm_addc8, _mm_xor_si128 (xmm_xorc8, \
- _mm_srli_epi16 (xmm_x, 16 - BILINEAR_INTERPOLATION_BITS))); \
- xmm_x = _mm_add_epi16 (xmm_x, xmm_ux); \
- /* horizontal interpolation */ \
- xmm_lo = _mm_mullo_epi16 (a, xmm_wh); \
- xmm_hi = _mm_mulhi_epu16 (a, xmm_wh); \
- a = _mm_add_epi32 (_mm_unpacklo_epi16 (xmm_lo, xmm_hi), \
- _mm_unpackhi_epi16 (xmm_lo, xmm_hi)); \
+ xmm_wh_state = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, \
+ 16 - BILINEAR_INTERPOLATION_BITS)); \
+ xmm_x = _mm_add_epi16 (xmm_x, (phase < 0) ? xmm_ux1 : xmm_ux4); \
+ phase = 0; \
} \
- /* shift and pack the result */ \
- a = _mm_srli_epi32 (a, BILINEAR_INTERPOLATION_BITS * 2); \
- a = _mm_packs_epi32 (a, a); \
- a = _mm_packus_epi16 (a, a); \
- pix = _mm_cvtsi128_si32 (a); \
+ xmm_wh = _mm_shuffle_epi32 (xmm_wh_state, _MM_SHUFFLE (phase, phase, \
+ phase, phase)); \
+ /* horizontal interpolation */ \
+ xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (_mm_shuffle_epi32 ( \
+ xmm_a, _MM_SHUFFLE (1, 0, 3, 2)), xmm_a), xmm_wh); \
+ /* shift the result */ \
+ pix = _mm_srli_epi32 (xmm_a, BILINEAR_INTERPOLATION_BITS * 2); \
} while (0)
+#else /************************************************************************/
+
+# define BILINEAR_DECLARE_VARIABLES \
+ const __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt, wt, wt); \
+ const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb, wb, wb, wb, wb); \
+ const __m128i xmm_addc = _mm_set_epi16 (0, 1, 0, 1, 0, 1, 0, 1); \
+ const __m128i xmm_ux1 = _mm_set_epi16 (unit_x, -unit_x, unit_x, -unit_x, \
+ unit_x, -unit_x, unit_x, -unit_x); \
+ const __m128i xmm_ux4 = _mm_set_epi16 (unit_x * 4, -unit_x * 4, \
+ unit_x * 4, -unit_x * 4, \
+ unit_x * 4, -unit_x * 4, \
+ unit_x * 4, -unit_x * 4); \
+ const __m128i xmm_zero = _mm_setzero_si128 (); \
+ __m128i xmm_x = _mm_set_epi16 (vx, -(vx + 1), vx, -(vx + 1), \
+ vx, -(vx + 1), vx, -(vx + 1))
+
+#define BILINEAR_INTERPOLATE_ONE_PIXEL_HELPER(pix, phase) \
+do { \
+ __m128i xmm_wh, xmm_a, xmm_b; \
+ /* fetch 2x2 pixel block into sse2 registers */ \
+ __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]); \
+ __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom[vx >> 16]); \
+ (void)xmm_ux4; /* suppress warning: unused variable 'xmm_ux4' */ \
+ vx += unit_x; \
+ /* vertical interpolation */ \
+ xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); \
+ xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); \
+ xmm_a = _mm_add_epi16 (xmm_a, xmm_b); \
+ /* calculate horizontal weights */ \
+ xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, \
+ 16 - BILINEAR_INTERPOLATION_BITS)); \
+ xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); \
+ /* horizontal interpolation */ \
+ xmm_b = _mm_unpacklo_epi64 (/* any value is fine here */ xmm_b, xmm_a); \
+ xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); \
+ /* shift the result */ \
+ pix = _mm_srli_epi32 (xmm_a, BILINEAR_INTERPOLATION_BITS * 2); \
+} while (0)
+
+/***********************************************************************************/
+
+#endif
+
+#define BILINEAR_INTERPOLATE_ONE_PIXEL(pix); \
+do { \
+ __m128i xmm_pix; \
+ BILINEAR_INTERPOLATE_ONE_PIXEL_HELPER (xmm_pix, -1); \
+ xmm_pix = _mm_packs_epi32 (xmm_pix, xmm_pix); \
+ xmm_pix = _mm_packus_epi16 (xmm_pix, xmm_pix); \
+ pix = _mm_cvtsi128_si32 (xmm_pix); \
+} while(0)
+
+#define BILINEAR_INTERPOLATE_FOUR_PIXELS(pix); \
+do { \
+ __m128i xmm_pix1, xmm_pix2, xmm_pix3, xmm_pix4; \
+ BILINEAR_INTERPOLATE_ONE_PIXEL_HELPER (xmm_pix1, 0); \
+ BILINEAR_INTERPOLATE_ONE_PIXEL_HELPER (xmm_pix2, 1); \
+ BILINEAR_INTERPOLATE_ONE_PIXEL_HELPER (xmm_pix3, 2); \
+ BILINEAR_INTERPOLATE_ONE_PIXEL_HELPER (xmm_pix4, 3); \
+ xmm_pix1 = _mm_packs_epi32 (xmm_pix1, xmm_pix2); \
+ xmm_pix3 = _mm_packs_epi32 (xmm_pix3, xmm_pix4); \
+ pix = _mm_packus_epi16 (xmm_pix1, xmm_pix3); \
+} while(0)
+
#define BILINEAR_SKIP_ONE_PIXEL() \
do { \
vx += unit_x; \
- xmm_x = _mm_add_epi16 (xmm_x, xmm_ux); \
+ xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); \
} while(0)
+#define BILINEAR_SKIP_FOUR_PIXELS() \
+do { \
+ vx += unit_x * 4; \
+ xmm_x = _mm_add_epi16 (xmm_x, xmm_ux4); \
+} while(0)
+
+/***********************************************************************************/
+
static force_inline void
scaled_bilinear_scanline_sse2_8888_8888_SRC (uint32_t * dst,
const uint32_t * mask,
@@ -5625,24 +5698,28 @@ scaled_bilinear_scanline_sse2_8888_8888_SRC (uint32_t * dst,
int32_t w,
int wt,
int wb,
- pixman_fixed_t vx,
- pixman_fixed_t unit_x,
+ pixman_fixed_t vx_,
+ pixman_fixed_t unit_x_,
pixman_fixed_t max_vx,
pixman_bool_t zero_src)
{
+ intptr_t vx = vx_;
+ intptr_t unit_x = unit_x_;
BILINEAR_DECLARE_VARIABLES;
- uint32_t pix1, pix2, pix3, pix4;
+ uint32_t pix1, pix2;
- while ((w -= 4) >= 0)
+ while (w && ((uintptr_t)dst & 15))
{
BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
- BILINEAR_INTERPOLATE_ONE_PIXEL (pix2);
- BILINEAR_INTERPOLATE_ONE_PIXEL (pix3);
- BILINEAR_INTERPOLATE_ONE_PIXEL (pix4);
*dst++ = pix1;
- *dst++ = pix2;
- *dst++ = pix3;
- *dst++ = pix4;
+ w--;
+ }
+
+ while ((w -= 4) >= 0) {
+ __m128i xmm_src;
+ BILINEAR_INTERPOLATE_FOUR_PIXELS (xmm_src);
+ _mm_store_si128 ((__m128i *)dst, xmm_src);
+ dst += 4;
}
if (w & 2)
@@ -5661,23 +5738,20 @@ scaled_bilinear_scanline_sse2_8888_8888_SRC (uint32_t * dst,
}
-/* Add extra NULL argument to the existing bilinear fast paths to indicate
- * that we don't need two-pass processing */
-
FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_cover_SRC,
- scaled_bilinear_scanline_sse2_8888_8888_SRC, NULL,
+ scaled_bilinear_scanline_sse2_8888_8888_SRC,
uint32_t, uint32_t, uint32_t,
COVER, FLAG_NONE)
FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_pad_SRC,
- scaled_bilinear_scanline_sse2_8888_8888_SRC, NULL,
+ scaled_bilinear_scanline_sse2_8888_8888_SRC,
uint32_t, uint32_t, uint32_t,
PAD, FLAG_NONE)
FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_none_SRC,
- scaled_bilinear_scanline_sse2_8888_8888_SRC, NULL,
+ scaled_bilinear_scanline_sse2_8888_8888_SRC,
uint32_t, uint32_t, uint32_t,
NONE, FLAG_NONE)
FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_normal_SRC,
- scaled_bilinear_scanline_sse2_8888_8888_SRC, NULL,
+ scaled_bilinear_scanline_sse2_8888_8888_SRC,
uint32_t, uint32_t, uint32_t,
NORMAL, FLAG_NONE)
@@ -5697,7 +5771,7 @@ scaled_bilinear_scanline_sse2_x888_8888_SRC (uint32_t * dst,
intptr_t vx = vx_;
intptr_t unit_x = unit_x_;
BILINEAR_DECLARE_VARIABLES;
- uint32_t pix1, pix2, pix3, pix4;
+ uint32_t pix1, pix2;
while (w && ((uintptr_t)dst & 15))
{
@@ -5708,14 +5782,9 @@ scaled_bilinear_scanline_sse2_x888_8888_SRC (uint32_t * dst,
while ((w -= 4) >= 0) {
__m128i xmm_src;
- BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
- BILINEAR_INTERPOLATE_ONE_PIXEL (pix2);
- BILINEAR_INTERPOLATE_ONE_PIXEL (pix3);
- BILINEAR_INTERPOLATE_ONE_PIXEL (pix4);
-
- xmm_src = _mm_set_epi32 (pix4, pix3, pix2, pix1);
- _mm_store_si128 ((__m128i *)dst, _mm_or_si128 (xmm_src, mask_ff000000));
- dst += 4;
+ BILINEAR_INTERPOLATE_FOUR_PIXELS (xmm_src);
+ _mm_store_si128 ((__m128i *)dst, _mm_or_si128 (xmm_src, mask_ff000000));
+ dst += 4;
}
if (w & 2)
@@ -5734,23 +5803,18 @@ scaled_bilinear_scanline_sse2_x888_8888_SRC (uint32_t * dst,
}
FAST_BILINEAR_MAINLOOP_COMMON (sse2_x888_8888_cover_SRC,
- scaled_bilinear_scanline_sse2_x888_8888_SRC, NULL,
+ scaled_bilinear_scanline_sse2_x888_8888_SRC,
uint32_t, uint32_t, uint32_t,
COVER, FLAG_NONE)
FAST_BILINEAR_MAINLOOP_COMMON (sse2_x888_8888_pad_SRC,
- scaled_bilinear_scanline_sse2_x888_8888_SRC, NULL,
+ scaled_bilinear_scanline_sse2_x888_8888_SRC,
uint32_t, uint32_t, uint32_t,
PAD, FLAG_NONE)
FAST_BILINEAR_MAINLOOP_COMMON (sse2_x888_8888_normal_SRC,
- scaled_bilinear_scanline_sse2_x888_8888_SRC, NULL,
+ scaled_bilinear_scanline_sse2_x888_8888_SRC,
uint32_t, uint32_t, uint32_t,
NORMAL, FLAG_NONE)
-#if 0
-FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_pad_OVER,
- scaled_bilinear_scanline_sse2_8888_8888_OVER, NULL,
- uint32_t, uint32_t, uint32_t,
- PAD, FLAG_NONE)
-#endif
+
static force_inline void
scaled_bilinear_scanline_sse2_8888_8888_OVER (uint32_t * dst,
const uint32_t * mask,
@@ -5759,13 +5823,15 @@ scaled_bilinear_scanline_sse2_8888_8888_OVER (uint32_t * dst,
int32_t w,
int wt,
int wb,
- pixman_fixed_t vx,
- pixman_fixed_t unit_x,
+ pixman_fixed_t vx_,
+ pixman_fixed_t unit_x_,
pixman_fixed_t max_vx,
pixman_bool_t zero_src)
{
+ intptr_t vx = vx_;
+ intptr_t unit_x = unit_x_;
BILINEAR_DECLARE_VARIABLES;
- uint32_t pix1, pix2, pix3, pix4;
+ uint32_t pix1, pix2;
while (w && ((uintptr_t)dst & 15))
{
@@ -5787,12 +5853,7 @@ scaled_bilinear_scanline_sse2_8888_8888_OVER (uint32_t * dst,
__m128i xmm_src_hi, xmm_src_lo, xmm_dst_hi, xmm_dst_lo;
__m128i xmm_alpha_hi, xmm_alpha_lo;
- BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
- BILINEAR_INTERPOLATE_ONE_PIXEL (pix2);
- BILINEAR_INTERPOLATE_ONE_PIXEL (pix3);
- BILINEAR_INTERPOLATE_ONE_PIXEL (pix4);
-
- xmm_src = _mm_set_epi32 (pix4, pix3, pix2, pix1);
+ BILINEAR_INTERPOLATE_FOUR_PIXELS (xmm_src);
if (!is_zero (xmm_src))
{
@@ -5835,56 +5896,22 @@ scaled_bilinear_scanline_sse2_8888_8888_OVER (uint32_t * dst,
}
FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_cover_OVER,
- scaled_bilinear_scanline_sse2_8888_8888_OVER, NULL,
+ scaled_bilinear_scanline_sse2_8888_8888_OVER,
uint32_t, uint32_t, uint32_t,
COVER, FLAG_NONE)
FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_pad_OVER,
- scaled_bilinear_scanline_sse2_8888_8888_OVER, NULL,
+ scaled_bilinear_scanline_sse2_8888_8888_OVER,
uint32_t, uint32_t, uint32_t,
PAD, FLAG_NONE)
FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_none_OVER,
- scaled_bilinear_scanline_sse2_8888_8888_OVER, NULL,
+ scaled_bilinear_scanline_sse2_8888_8888_OVER,
uint32_t, uint32_t, uint32_t,
NONE, FLAG_NONE)
FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_normal_OVER,
- scaled_bilinear_scanline_sse2_8888_8888_OVER, NULL,
+ scaled_bilinear_scanline_sse2_8888_8888_OVER,
uint32_t, uint32_t, uint32_t,
NORMAL, FLAG_NONE)
-
-/* An example of SSE2 two-stage bilinear_over_8888_0565 fast path, which is implemented
- as scaled_bilinear_scanline_sse2_8888_8888_SRC + op_bilinear_over_8888_0565 */
-
-void op_bilinear_over_8888_0565(uint16_t *dst, const uint32_t *mask, const uint32_t *src, int width)
-{
- /* Note: this is not really fast and should be based on 8 pixel loop from sse2_composite_over_8888_0565 */
- while (--width >= 0)
- {
- *dst = composite_over_8888_0565pixel (*src, *dst);
- src++;
- dst++;
- }
-}
-
-FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_0565_cover_OVER,
- scaled_bilinear_scanline_sse2_8888_8888_SRC, op_bilinear_over_8888_0565,
- uint32_t, uint32_t, uint16_t,
- COVER, FLAG_NONE)
-FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_0565_pad_OVER,
- scaled_bilinear_scanline_sse2_8888_8888_SRC, op_bilinear_over_8888_0565,
- uint32_t, uint32_t, uint16_t,
- PAD, FLAG_NONE)
-FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_0565_none_OVER,
- scaled_bilinear_scanline_sse2_8888_8888_SRC, op_bilinear_over_8888_0565,
- uint32_t, uint32_t, uint16_t,
- NONE, FLAG_NONE)
-FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_0565_normal_OVER,
- scaled_bilinear_scanline_sse2_8888_8888_SRC, op_bilinear_over_8888_0565,
- uint32_t, uint32_t, uint16_t,
- NORMAL, FLAG_NONE)
-
-/*****************************/
-
static force_inline void
scaled_bilinear_scanline_sse2_8888_8_8888_OVER (uint32_t * dst,
const uint8_t * mask,
@@ -5893,20 +5920,20 @@ scaled_bilinear_scanline_sse2_8888_8_8888_OVER (uint32_t * dst,
int32_t w,
int wt,
int wb,
- pixman_fixed_t vx,
- pixman_fixed_t unit_x,
+ pixman_fixed_t vx_,
+ pixman_fixed_t unit_x_,
pixman_fixed_t max_vx,
pixman_bool_t zero_src)
{
+ intptr_t vx = vx_;
+ intptr_t unit_x = unit_x_;
BILINEAR_DECLARE_VARIABLES;
- uint32_t pix1, pix2, pix3, pix4;
- uint32_t m;
+ uint32_t pix1, pix2;
while (w && ((uintptr_t)dst & 15))
{
uint32_t sa;
-
- m = (uint32_t) *mask++;
+ uint8_t m = *mask++;
if (m)
{
@@ -5942,20 +5969,17 @@ scaled_bilinear_scanline_sse2_8888_8_8888_OVER (uint32_t * dst,
while (w >= 4)
{
+ uint32_t m;
+
__m128i xmm_src, xmm_src_lo, xmm_src_hi, xmm_srca_lo, xmm_srca_hi;
__m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
__m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
- m = *(uint32_t*)mask;
+ memcpy(&m, mask, sizeof(uint32_t));
if (m)
{
- BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
- BILINEAR_INTERPOLATE_ONE_PIXEL (pix2);
- BILINEAR_INTERPOLATE_ONE_PIXEL (pix3);
- BILINEAR_INTERPOLATE_ONE_PIXEL (pix4);
-
- xmm_src = _mm_set_epi32 (pix4, pix3, pix2, pix1);
+ BILINEAR_INTERPOLATE_FOUR_PIXELS (xmm_src);
if (m == 0xffffffff && is_opaque (xmm_src))
{
@@ -5982,10 +6006,7 @@ scaled_bilinear_scanline_sse2_8888_8_8888_OVER (uint32_t * dst,
}
else
{
- BILINEAR_SKIP_ONE_PIXEL ();
- BILINEAR_SKIP_ONE_PIXEL ();
- BILINEAR_SKIP_ONE_PIXEL ();
- BILINEAR_SKIP_ONE_PIXEL ();
+ BILINEAR_SKIP_FOUR_PIXELS ();
}
w -= 4;
@@ -5996,8 +6017,7 @@ scaled_bilinear_scanline_sse2_8888_8_8888_OVER (uint32_t * dst,
while (w)
{
uint32_t sa;
-
- m = (uint32_t) *mask++;
+ uint8_t m = *mask++;
if (m)
{
@@ -6033,19 +6053,19 @@ scaled_bilinear_scanline_sse2_8888_8_8888_OVER (uint32_t * dst,
}
FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8_8888_cover_OVER,
- scaled_bilinear_scanline_sse2_8888_8_8888_OVER, NULL,
+ scaled_bilinear_scanline_sse2_8888_8_8888_OVER,
uint32_t, uint8_t, uint32_t,
COVER, FLAG_HAVE_NON_SOLID_MASK)
FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8_8888_pad_OVER,
- scaled_bilinear_scanline_sse2_8888_8_8888_OVER, NULL,
+ scaled_bilinear_scanline_sse2_8888_8_8888_OVER,
uint32_t, uint8_t, uint32_t,
PAD, FLAG_HAVE_NON_SOLID_MASK)
FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8_8888_none_OVER,
- scaled_bilinear_scanline_sse2_8888_8_8888_OVER, NULL,
+ scaled_bilinear_scanline_sse2_8888_8_8888_OVER,
uint32_t, uint8_t, uint32_t,
NONE, FLAG_HAVE_NON_SOLID_MASK)
FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8_8888_normal_OVER,
- scaled_bilinear_scanline_sse2_8888_8_8888_OVER, NULL,
+ scaled_bilinear_scanline_sse2_8888_8_8888_OVER,
uint32_t, uint8_t, uint32_t,
NORMAL, FLAG_HAVE_NON_SOLID_MASK)
@@ -6057,13 +6077,15 @@ scaled_bilinear_scanline_sse2_8888_n_8888_OVER (uint32_t * dst,
int32_t w,
int wt,
int wb,
- pixman_fixed_t vx,
- pixman_fixed_t unit_x,
+ pixman_fixed_t vx_,
+ pixman_fixed_t unit_x_,
pixman_fixed_t max_vx,
pixman_bool_t zero_src)
{
+ intptr_t vx = vx_;
+ intptr_t unit_x = unit_x_;
BILINEAR_DECLARE_VARIABLES;
- uint32_t pix1, pix2, pix3, pix4;
+ uint32_t pix1;
__m128i xmm_mask;
if (zero_src || (*mask >> 24) == 0)
@@ -6093,19 +6115,15 @@ scaled_bilinear_scanline_sse2_8888_n_8888_OVER (uint32_t * dst,
while (w >= 4)
{
- BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
- BILINEAR_INTERPOLATE_ONE_PIXEL (pix2);
- BILINEAR_INTERPOLATE_ONE_PIXEL (pix3);
- BILINEAR_INTERPOLATE_ONE_PIXEL (pix4);
+ __m128i xmm_src;
+ BILINEAR_INTERPOLATE_FOUR_PIXELS (xmm_src);
- if (pix1 | pix2 | pix3 | pix4)
+ if (!is_zero (xmm_src))
{
- __m128i xmm_src, xmm_src_lo, xmm_src_hi;
+ __m128i xmm_src_lo, xmm_src_hi;
__m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
__m128i xmm_alpha_lo, xmm_alpha_hi;
- xmm_src = _mm_set_epi32 (pix4, pix3, pix2, pix1);
-
xmm_dst = load_128_aligned ((__m128i*)dst);
unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
@@ -6148,19 +6166,19 @@ scaled_bilinear_scanline_sse2_8888_n_8888_OVER (uint32_t * dst,
}
FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_n_8888_cover_OVER,
- scaled_bilinear_scanline_sse2_8888_n_8888_OVER, NULL,
+ scaled_bilinear_scanline_sse2_8888_n_8888_OVER,
uint32_t, uint32_t, uint32_t,
COVER, FLAG_HAVE_SOLID_MASK)
FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_n_8888_pad_OVER,
- scaled_bilinear_scanline_sse2_8888_n_8888_OVER, NULL,
+ scaled_bilinear_scanline_sse2_8888_n_8888_OVER,
uint32_t, uint32_t, uint32_t,
PAD, FLAG_HAVE_SOLID_MASK)
FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_n_8888_none_OVER,
- scaled_bilinear_scanline_sse2_8888_n_8888_OVER, NULL,
+ scaled_bilinear_scanline_sse2_8888_n_8888_OVER,
uint32_t, uint32_t, uint32_t,
NONE, FLAG_HAVE_SOLID_MASK)
FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_n_8888_normal_OVER,
- scaled_bilinear_scanline_sse2_8888_n_8888_OVER, NULL,
+ scaled_bilinear_scanline_sse2_8888_n_8888_OVER,
uint32_t, uint32_t, uint32_t,
NORMAL, FLAG_HAVE_SOLID_MASK)
@@ -6260,31 +6278,15 @@ static const pixman_fast_path_t sse2_fast_paths[] =
PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, sse2_composite_in_n_8_8),
PIXMAN_STD_FAST_PATH (IN, solid, null, a8, sse2_composite_in_n_8),
- SIMPLE_NEAREST_FAST_PATH_COVER (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8888),
- SIMPLE_NEAREST_FAST_PATH_COVER (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8888),
- SIMPLE_NEAREST_FAST_PATH_COVER (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888),
- SIMPLE_NEAREST_FAST_PATH_COVER (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888),
- SIMPLE_NEAREST_FAST_PATH_NONE (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8888),
- SIMPLE_NEAREST_FAST_PATH_NONE (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8888),
- SIMPLE_NEAREST_FAST_PATH_NONE (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888),
- SIMPLE_NEAREST_FAST_PATH_NONE (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888),
- SIMPLE_NEAREST_FAST_PATH_PAD (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8888),
- SIMPLE_NEAREST_FAST_PATH_PAD (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8888),
- SIMPLE_NEAREST_FAST_PATH_PAD (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888),
- SIMPLE_NEAREST_FAST_PATH_PAD (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888),
- SIMPLE_NEAREST_FAST_PATH_NORMAL (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8888),
- SIMPLE_NEAREST_FAST_PATH_NORMAL (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8888),
- SIMPLE_NEAREST_FAST_PATH_NORMAL (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888),
- SIMPLE_NEAREST_FAST_PATH_NORMAL (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888),
+ SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8888),
+ SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8888),
+ SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888),
+ SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888),
SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_n_8888),
SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_n_8888),
SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_n_8888),
SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_n_8888),
- SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_n_8888),
- SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_n_8888),
- SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_n_8888),
- SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_n_8888),
SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, sse2_8888_8888),
SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, sse2_8888_8888),
@@ -6315,11 +6317,6 @@ static const pixman_fast_path_t sse2_fast_paths[] =
SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8_8888),
SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8_8888),
- /* and here the needed entries are added to the fast path table */
-
- SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, r5g6b5, sse2_8888_0565),
- SIMPLE_BILINEAR_FAST_PATH (OVER, a8b8g8r8, b5g6r5, sse2_8888_0565),
-
{ PIXMAN_OP_NONE },
};
@@ -6417,7 +6414,7 @@ sse2_fetch_a8 (pixman_iter_t *iter, const uint32_t *mask)
while (w && (((uintptr_t)dst) & 15))
{
- *dst++ = *(src++) << 24;
+ *dst++ = (uint32_t)(*(src++)) << 24;
w--;
}
@@ -6444,59 +6441,30 @@ sse2_fetch_a8 (pixman_iter_t *iter, const uint32_t *mask)
while (w)
{
- *dst++ = *(src++) << 24;
+ *dst++ = (uint32_t)(*(src++)) << 24;
w--;
}
return iter->buffer;
}
-typedef struct
-{
- pixman_format_code_t format;
- pixman_iter_get_scanline_t get_scanline;
-} fetcher_info_t;
-
-static const fetcher_info_t fetchers[] =
-{
- { PIXMAN_x8r8g8b8, sse2_fetch_x8r8g8b8 },
- { PIXMAN_r5g6b5, sse2_fetch_r5g6b5 },
- { PIXMAN_a8, sse2_fetch_a8 },
- { PIXMAN_null }
-};
-
-static pixman_bool_t
-sse2_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
-{
- pixman_image_t *image = iter->image;
-
-#define FLAGS \
+#define IMAGE_FLAGS \
(FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | \
FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST)
- if ((iter->iter_flags & ITER_NARROW) &&
- (iter->image_flags & FLAGS) == FLAGS)
- {
- const fetcher_info_t *f;
-
- for (f = &fetchers[0]; f->format != PIXMAN_null; f++)
- {
- if (image->common.extended_format_code == f->format)
- {
- uint8_t *b = (uint8_t *)image->bits.bits;
- int s = image->bits.rowstride * 4;
-
- iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (f->format) / 8;
- iter->stride = s;
-
- iter->get_scanline = f->get_scanline;
- return TRUE;
- }
- }
- }
-
- return FALSE;
-}
+static const pixman_iter_info_t sse2_iters[] =
+{
+ { PIXMAN_x8r8g8b8, IMAGE_FLAGS, ITER_NARROW,
+ _pixman_iter_init_bits_stride, sse2_fetch_x8r8g8b8, NULL
+ },
+ { PIXMAN_r5g6b5, IMAGE_FLAGS, ITER_NARROW,
+ _pixman_iter_init_bits_stride, sse2_fetch_r5g6b5, NULL
+ },
+ { PIXMAN_a8, IMAGE_FLAGS, ITER_NARROW,
+ _pixman_iter_init_bits_stride, sse2_fetch_a8, NULL
+ },
+ { PIXMAN_null },
+};
#if defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__)
__attribute__((__force_align_arg_pointer__))
@@ -6554,7 +6522,7 @@ _pixman_implementation_create_sse2 (pixman_implementation_t *fallback)
imp->blt = sse2_blt;
imp->fill = sse2_fill;
- imp->src_iter_init = sse2_src_iter_init;
+ imp->iter_info = sse2_iters;
return imp;
}
diff --git a/gfx/cairo/libpixman/src/pixman-ssse3.c b/gfx/cairo/libpixman/src/pixman-ssse3.c
new file mode 100644
index 0000000000..680d6b95a0
--- /dev/null
+++ b/gfx/cairo/libpixman/src/pixman-ssse3.c
@@ -0,0 +1,351 @@
+/*
+ * Copyright © 2013 Soren Sandmann Pedersen
+ * Copyright © 2013 Red Hat, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Author: Soren Sandmann (soren.sandmann@gmail.com)
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <stdlib.h>
+#include <mmintrin.h>
+#include <xmmintrin.h>
+#include <emmintrin.h>
+#include <tmmintrin.h>
+#include "pixman-private.h"
+#include "pixman-inlines.h"
+
+typedef struct
+{
+ int y;
+ uint64_t * buffer;
+} line_t;
+
+typedef struct
+{
+ line_t lines[2];
+ pixman_fixed_t y;
+ pixman_fixed_t x;
+ uint64_t data[1];
+} bilinear_info_t;
+
+static void
+ssse3_fetch_horizontal (bits_image_t *image, line_t *line,
+ int y, pixman_fixed_t x, pixman_fixed_t ux, int n)
+{
+ uint32_t *bits = image->bits + y * image->rowstride;
+ __m128i vx = _mm_set_epi16 (
+ - (x + 1), x, - (x + 1), x,
+ - (x + ux + 1), x + ux, - (x + ux + 1), x + ux);
+ __m128i vux = _mm_set_epi16 (
+ - 2 * ux, 2 * ux, - 2 * ux, 2 * ux,
+ - 2 * ux, 2 * ux, - 2 * ux, 2 * ux);
+ __m128i vaddc = _mm_set_epi16 (1, 0, 1, 0, 1, 0, 1, 0);
+ __m128i *b = (__m128i *)line->buffer;
+ __m128i vrl0, vrl1;
+
+ while ((n -= 2) >= 0)
+ {
+ __m128i vw, vr, s;
+
+ vrl1 = _mm_loadl_epi64 (
+ (__m128i *)(bits + pixman_fixed_to_int (x + ux)));
+ /* vrl1: R1, L1 */
+
+ final_pixel:
+ vrl0 = _mm_loadl_epi64 (
+ (__m128i *)(bits + pixman_fixed_to_int (x)));
+ /* vrl0: R0, L0 */
+
+ /* The weights are based on vx which is a vector of
+ *
+ * - (x + 1), x, - (x + 1), x,
+ * - (x + ux + 1), x + ux, - (x + ux + 1), x + ux
+ *
+ * so the 16 bit weights end up like this:
+ *
+ * iw0, w0, iw0, w0, iw1, w1, iw1, w1
+ *
+ * and after shifting and packing, we get these bytes:
+ *
+ * iw0, w0, iw0, w0, iw1, w1, iw1, w1,
+ * iw0, w0, iw0, w0, iw1, w1, iw1, w1,
+ *
+ * which means the first and the second input pixel
+ * have to be interleaved like this:
+ *
+ * la0, ra0, lr0, rr0, la1, ra1, lr1, rr1,
+ * lg0, rg0, lb0, rb0, lg1, rg1, lb1, rb1
+ *
+ * before maddubsw can be used.
+ */
+
+ vw = _mm_add_epi16 (
+ vaddc, _mm_srli_epi16 (vx, 16 - BILINEAR_INTERPOLATION_BITS));
+ /* vw: iw0, w0, iw0, w0, iw1, w1, iw1, w1
+ */
+
+ vw = _mm_packus_epi16 (vw, vw);
+ /* vw: iw0, w0, iw0, w0, iw1, w1, iw1, w1,
+ * iw0, w0, iw0, w0, iw1, w1, iw1, w1
+ */
+ vx = _mm_add_epi16 (vx, vux);
+
+ x += 2 * ux;
+
+ vr = _mm_unpacklo_epi16 (vrl1, vrl0);
+ /* vr: rar0, rar1, rgb0, rgb1, lar0, lar1, lgb0, lgb1 */
+
+ s = _mm_shuffle_epi32 (vr, _MM_SHUFFLE (1, 0, 3, 2));
+ /* s: lar0, lar1, lgb0, lgb1, rar0, rar1, rgb0, rgb1 */
+
+ vr = _mm_unpackhi_epi8 (vr, s);
+ /* vr: la0, ra0, lr0, rr0, la1, ra1, lr1, rr1,
+ * lg0, rg0, lb0, rb0, lg1, rg1, lb1, rb1
+ */
+
+ vr = _mm_maddubs_epi16 (vr, vw);
+
+ /* When the weight is 0, the inverse weight is
+ * 128 which can't be represented in a signed byte.
+ * As a result maddubsw computes the following:
+ *
+ * r = l * -128 + r * 0
+ *
+ * rather than the desired
+ *
+ * r = l * 128 + r * 0
+ *
+ * We fix this by taking the absolute value of the
+ * result.
+ */
+ vr = _mm_abs_epi16 (vr);
+
+ /* vr: A0, R0, A1, R1, G0, B0, G1, B1 */
+ _mm_store_si128 (b++, vr);
+ }
+
+ if (n == -1)
+ {
+ vrl1 = _mm_setzero_si128();
+ goto final_pixel;
+ }
+
+ line->y = y;
+}
+
+static uint32_t *
+ssse3_fetch_bilinear_cover (pixman_iter_t *iter, const uint32_t *mask)
+{
+ pixman_fixed_t fx, ux;
+ bilinear_info_t *info = iter->data;
+ line_t *line0, *line1;
+ int y0, y1;
+ int32_t dist_y;
+ __m128i vw;
+ int i;
+
+ fx = info->x;
+ ux = iter->image->common.transform->matrix[0][0];
+
+ y0 = pixman_fixed_to_int (info->y);
+ y1 = y0 + 1;
+
+ line0 = &info->lines[y0 & 0x01];
+ line1 = &info->lines[y1 & 0x01];
+
+ if (line0->y != y0)
+ {
+ ssse3_fetch_horizontal (
+ &iter->image->bits, line0, y0, fx, ux, iter->width);
+ }
+
+ if (line1->y != y1)
+ {
+ ssse3_fetch_horizontal (
+ &iter->image->bits, line1, y1, fx, ux, iter->width);
+ }
+
+ dist_y = pixman_fixed_to_bilinear_weight (info->y);
+ dist_y <<= (16 - BILINEAR_INTERPOLATION_BITS);
+
+ vw = _mm_set_epi16 (
+ dist_y, dist_y, dist_y, dist_y, dist_y, dist_y, dist_y, dist_y);
+
+ for (i = 0; i + 3 < iter->width; i += 4)
+ {
+ __m128i top0 = _mm_load_si128 ((__m128i *)(line0->buffer + i));
+ __m128i bot0 = _mm_load_si128 ((__m128i *)(line1->buffer + i));
+ __m128i top1 = _mm_load_si128 ((__m128i *)(line0->buffer + i + 2));
+ __m128i bot1 = _mm_load_si128 ((__m128i *)(line1->buffer + i + 2));
+ __m128i r0, r1, tmp, p;
+
+ r0 = _mm_mulhi_epu16 (
+ _mm_sub_epi16 (bot0, top0), vw);
+ tmp = _mm_cmplt_epi16 (bot0, top0);
+ tmp = _mm_and_si128 (tmp, vw);
+ r0 = _mm_sub_epi16 (r0, tmp);
+ r0 = _mm_add_epi16 (r0, top0);
+ r0 = _mm_srli_epi16 (r0, BILINEAR_INTERPOLATION_BITS);
+ /* r0: A0 R0 A1 R1 G0 B0 G1 B1 */
+ r0 = _mm_shuffle_epi32 (r0, _MM_SHUFFLE (2, 0, 3, 1));
+ /* r0: A1 R1 G1 B1 A0 R0 G0 B0 */
+
+ r1 = _mm_mulhi_epu16 (
+ _mm_sub_epi16 (bot1, top1), vw);
+ tmp = _mm_cmplt_epi16 (bot1, top1);
+ tmp = _mm_and_si128 (tmp, vw);
+ r1 = _mm_sub_epi16 (r1, tmp);
+ r1 = _mm_add_epi16 (r1, top1);
+ r1 = _mm_srli_epi16 (r1, BILINEAR_INTERPOLATION_BITS);
+ r1 = _mm_shuffle_epi32 (r1, _MM_SHUFFLE (2, 0, 3, 1));
+ /* r1: A3 R3 G3 B3 A2 R2 G2 B2 */
+
+ p = _mm_packus_epi16 (r0, r1);
+
+ _mm_storeu_si128 ((__m128i *)(iter->buffer + i), p);
+ }
+
+ while (i < iter->width)
+ {
+ __m128i top0 = _mm_load_si128 ((__m128i *)(line0->buffer + i));
+ __m128i bot0 = _mm_load_si128 ((__m128i *)(line1->buffer + i));
+ __m128i r0, tmp, p;
+
+ r0 = _mm_mulhi_epu16 (
+ _mm_sub_epi16 (bot0, top0), vw);
+ tmp = _mm_cmplt_epi16 (bot0, top0);
+ tmp = _mm_and_si128 (tmp, vw);
+ r0 = _mm_sub_epi16 (r0, tmp);
+ r0 = _mm_add_epi16 (r0, top0);
+ r0 = _mm_srli_epi16 (r0, BILINEAR_INTERPOLATION_BITS);
+ /* r0: A0 R0 A1 R1 G0 B0 G1 B1 */
+ r0 = _mm_shuffle_epi32 (r0, _MM_SHUFFLE (2, 0, 3, 1));
+ /* r0: A1 R1 G1 B1 A0 R0 G0 B0 */
+
+ p = _mm_packus_epi16 (r0, r0);
+
+ if (iter->width - i == 1)
+ {
+ *(uint32_t *)(iter->buffer + i) = _mm_cvtsi128_si32 (p);
+ i++;
+ }
+ else
+ {
+ _mm_storel_epi64 ((__m128i *)(iter->buffer + i), p);
+ i += 2;
+ }
+ }
+
+ info->y += iter->image->common.transform->matrix[1][1];
+
+ return iter->buffer;
+}
+
+static void
+ssse3_bilinear_cover_iter_fini (pixman_iter_t *iter)
+{
+ free (iter->data);
+}
+
+static void
+ssse3_bilinear_cover_iter_init (pixman_iter_t *iter, const pixman_iter_info_t *iter_info)
+{
+ int width = iter->width;
+ bilinear_info_t *info;
+ pixman_vector_t v;
+
+ /* Reference point is the center of the pixel */
+ v.vector[0] = pixman_int_to_fixed (iter->x) + pixman_fixed_1 / 2;
+ v.vector[1] = pixman_int_to_fixed (iter->y) + pixman_fixed_1 / 2;
+ v.vector[2] = pixman_fixed_1;
+
+ if (!pixman_transform_point_3d (iter->image->common.transform, &v))
+ goto fail;
+
+ info = malloc (sizeof (*info) + (2 * width - 1) * sizeof (uint64_t) + 64);
+ if (!info)
+ goto fail;
+
+ info->x = v.vector[0] - pixman_fixed_1 / 2;
+ info->y = v.vector[1] - pixman_fixed_1 / 2;
+
+#define ALIGN(addr) \
+ ((void *)((((uintptr_t)(addr)) + 15) & (~15)))
+
+ /* It is safe to set the y coordinates to -1 initially
+ * because COVER_CLIP_BILINEAR ensures that we will only
+ * be asked to fetch lines in the [0, height) interval
+ */
+ info->lines[0].y = -1;
+ info->lines[0].buffer = ALIGN (&(info->data[0]));
+ info->lines[1].y = -1;
+ info->lines[1].buffer = ALIGN (info->lines[0].buffer + width);
+
+ iter->get_scanline = ssse3_fetch_bilinear_cover;
+ iter->fini = ssse3_bilinear_cover_iter_fini;
+
+ iter->data = info;
+ return;
+
+fail:
+ /* Something went wrong, either a bad matrix or OOM; in such cases,
+ * we don't guarantee any particular rendering.
+ */
+ _pixman_log_error (
+ FUNC, "Allocation failure or bad matrix, skipping rendering\n");
+
+ iter->get_scanline = _pixman_iter_get_scanline_noop;
+ iter->fini = NULL;
+}
+
+static const pixman_iter_info_t ssse3_iters[] =
+{
+ { PIXMAN_a8r8g8b8,
+ (FAST_PATH_STANDARD_FLAGS |
+ FAST_PATH_SCALE_TRANSFORM |
+ FAST_PATH_BILINEAR_FILTER |
+ FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR),
+ ITER_NARROW | ITER_SRC,
+ ssse3_bilinear_cover_iter_init,
+ NULL, NULL
+ },
+
+ { PIXMAN_null },
+};
+
+static const pixman_fast_path_t ssse3_fast_paths[] =
+{
+ { PIXMAN_OP_NONE },
+};
+
+pixman_implementation_t *
+_pixman_implementation_create_ssse3 (pixman_implementation_t *fallback)
+{
+ pixman_implementation_t *imp =
+ _pixman_implementation_create (fallback, ssse3_fast_paths);
+
+ imp->iter_info = ssse3_iters;
+
+ return imp;
+}
diff --git a/gfx/cairo/libpixman/src/pixman-utils.c b/gfx/cairo/libpixman/src/pixman-utils.c
index b2ffb8ca2c..2c2dddd64c 100644
--- a/gfx/cairo/libpixman/src/pixman-utils.c
+++ b/gfx/cairo/libpixman/src/pixman-utils.c
@@ -27,7 +27,6 @@
#endif
#include <stdio.h>
#include <stdlib.h>
-#include <limits.h>
#include "pixman-private.h"
@@ -50,6 +49,15 @@ _pixman_addition_overflows_int (unsigned int a, unsigned int b)
}
void *
+pixman_malloc_ab_plus_c (unsigned int a, unsigned int b, unsigned int c)
+{
+ if (!b || a >= INT32_MAX / b || (a * b) > INT32_MAX - c)
+ return NULL;
+
+ return malloc (a * b + c);
+}
+
+void *
pixman_malloc_ab (unsigned int a,
unsigned int b)
{
@@ -198,7 +206,7 @@ pixman_contract_from_float (uint32_t *dst,
for (i = 0; i < width; ++i)
{
- uint8_t a, r, g, b;
+ uint32_t a, r, g, b;
a = float_to_unorm (src[i].a, 8);
r = float_to_unorm (src[i].r, 8);
@@ -215,6 +223,17 @@ _pixman_iter_get_scanline_noop (pixman_iter_t *iter, const uint32_t *mask)
return iter->buffer;
}
+void
+_pixman_iter_init_bits_stride (pixman_iter_t *iter, const pixman_iter_info_t *info)
+{
+ pixman_image_t *image = iter->image;
+ uint8_t *b = (uint8_t *)image->bits.bits;
+ int s = image->bits.rowstride * 4;
+
+ iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (info->format) / 8;
+ iter->stride = s;
+}
+
#define N_TMP_BOXES (16)
pixman_bool_t
@@ -293,8 +312,6 @@ _pixman_internal_only_get_implementation (void)
return get_implementation ();
}
-#ifdef DEBUG
-
void
_pixman_log_error (const char *function, const char *message)
{
@@ -311,5 +328,3 @@ _pixman_log_error (const char *function, const char *message)
n_messages++;
}
}
-
-#endif
diff --git a/gfx/cairo/libpixman/src/pixman-version.h b/gfx/cairo/libpixman/src/pixman-version.h
index fac4225b07..8b0e774271 100644
--- a/gfx/cairo/libpixman/src/pixman-version.h
+++ b/gfx/cairo/libpixman/src/pixman-version.h
@@ -32,10 +32,10 @@
#endif
#define PIXMAN_VERSION_MAJOR 0
-#define PIXMAN_VERSION_MINOR 27
-#define PIXMAN_VERSION_MICRO 1
+#define PIXMAN_VERSION_MINOR 40
+#define PIXMAN_VERSION_MICRO 0
-#define PIXMAN_VERSION_STRING "0.27.1"
+#define PIXMAN_VERSION_STRING "0.40.0"
#define PIXMAN_VERSION_ENCODE(major, minor, micro) ( \
((major) * 10000) \
@@ -47,4 +47,8 @@
PIXMAN_VERSION_MINOR, \
PIXMAN_VERSION_MICRO)
+#ifndef PIXMAN_API
+# define PIXMAN_API
+#endif
+
#endif /* PIXMAN_VERSION_H__ */
diff --git a/gfx/cairo/libpixman/src/pixman-version.h.in b/gfx/cairo/libpixman/src/pixman-version.h.in
new file mode 100644
index 0000000000..64778a595c
--- /dev/null
+++ b/gfx/cairo/libpixman/src/pixman-version.h.in
@@ -0,0 +1,54 @@
+/*
+ * Copyright © 2008 Red Hat, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Author: Carl D. Worth <cworth@cworth.org>
+ */
+
+#ifndef PIXMAN_VERSION_H__
+#define PIXMAN_VERSION_H__
+
+#ifndef PIXMAN_H__
+# error pixman-version.h should only be included by pixman.h
+#endif
+
+#define PIXMAN_VERSION_MAJOR @PIXMAN_VERSION_MAJOR@
+#define PIXMAN_VERSION_MINOR @PIXMAN_VERSION_MINOR@
+#define PIXMAN_VERSION_MICRO @PIXMAN_VERSION_MICRO@
+
+#define PIXMAN_VERSION_STRING "@PIXMAN_VERSION_MAJOR@.@PIXMAN_VERSION_MINOR@.@PIXMAN_VERSION_MICRO@"
+
+#define PIXMAN_VERSION_ENCODE(major, minor, micro) ( \
+ ((major) * 10000) \
+ + ((minor) * 100) \
+ + ((micro) * 1))
+
+#define PIXMAN_VERSION PIXMAN_VERSION_ENCODE( \
+ PIXMAN_VERSION_MAJOR, \
+ PIXMAN_VERSION_MINOR, \
+ PIXMAN_VERSION_MICRO)
+
+#ifndef PIXMAN_API
+# define PIXMAN_API
+#endif
+
+#endif /* PIXMAN_VERSION_H__ */
diff --git a/gfx/cairo/libpixman/src/pixman-vmx.c b/gfx/cairo/libpixman/src/pixman-vmx.c
index 6868704a87..52de37e69e 100644
--- a/gfx/cairo/libpixman/src/pixman-vmx.c
+++ b/gfx/cairo/libpixman/src/pixman-vmx.c
@@ -25,20 +25,46 @@
* Based on fbmmx.c by Owen Taylor, Søren Sandmann and Nicholas Miell
*/
+#ifdef HAVE_CONFIG_H
#include <config.h>
+#endif
#include "pixman-private.h"
#include "pixman-combine32.h"
+#include "pixman-inlines.h"
#include <altivec.h>
#define AVV(x...) {x}
+static vector unsigned int mask_ff000000;
+static vector unsigned int mask_red;
+static vector unsigned int mask_green;
+static vector unsigned int mask_blue;
+static vector unsigned int mask_565_fix_rb;
+static vector unsigned int mask_565_fix_g;
+
static force_inline vector unsigned int
splat_alpha (vector unsigned int pix)
{
+#ifdef WORDS_BIGENDIAN
return vec_perm (pix, pix,
(vector unsigned char)AVV (
0x00, 0x00, 0x00, 0x00, 0x04, 0x04, 0x04, 0x04,
0x08, 0x08, 0x08, 0x08, 0x0C, 0x0C, 0x0C, 0x0C));
+#else
+ return vec_perm (pix, pix,
+ (vector unsigned char)AVV (
+ 0x03, 0x03, 0x03, 0x03, 0x07, 0x07, 0x07, 0x07,
+ 0x0B, 0x0B, 0x0B, 0x0B, 0x0F, 0x0F, 0x0F, 0x0F));
+#endif
+}
+
+static force_inline vector unsigned int
+splat_pixel (vector unsigned int pix)
+{
+ return vec_perm (pix, pix,
+ (vector unsigned char)AVV (
+ 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01,
+ 0x02, 0x02, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03));
}
static force_inline vector unsigned int
@@ -48,12 +74,22 @@ pix_multiply (vector unsigned int p, vector unsigned int a)
/* unpack to short */
hi = (vector unsigned short)
+#ifdef WORDS_BIGENDIAN
vec_mergeh ((vector unsigned char)AVV (0),
(vector unsigned char)p);
+#else
+ vec_mergeh ((vector unsigned char) p,
+ (vector unsigned char) AVV (0));
+#endif
mod = (vector unsigned short)
+#ifdef WORDS_BIGENDIAN
vec_mergeh ((vector unsigned char)AVV (0),
(vector unsigned char)a);
+#else
+ vec_mergeh ((vector unsigned char) a,
+ (vector unsigned char) AVV (0));
+#endif
hi = vec_mladd (hi, mod, (vector unsigned short)
AVV (0x0080, 0x0080, 0x0080, 0x0080,
@@ -65,11 +101,22 @@ pix_multiply (vector unsigned int p, vector unsigned int a)
/* unpack to short */
lo = (vector unsigned short)
+#ifdef WORDS_BIGENDIAN
vec_mergel ((vector unsigned char)AVV (0),
(vector unsigned char)p);
+#else
+ vec_mergel ((vector unsigned char) p,
+ (vector unsigned char) AVV (0));
+#endif
+
mod = (vector unsigned short)
+#ifdef WORDS_BIGENDIAN
vec_mergel ((vector unsigned char)AVV (0),
(vector unsigned char)a);
+#else
+ vec_mergel ((vector unsigned char) a,
+ (vector unsigned char) AVV (0));
+#endif
lo = vec_mladd (lo, mod, (vector unsigned short)
AVV (0x0080, 0x0080, 0x0080, 0x0080,
@@ -127,60 +174,316 @@ over (vector unsigned int src,
over (pix_multiply (src, mask), \
pix_multiply (srca, mask), dest)
+#ifdef WORDS_BIGENDIAN
#define COMPUTE_SHIFT_MASK(source) \
source ## _mask = vec_lvsl (0, source);
#define COMPUTE_SHIFT_MASKS(dest, source) \
- dest ## _mask = vec_lvsl (0, dest); \
- source ## _mask = vec_lvsl (0, source); \
- store_mask = vec_lvsr (0, dest);
+ source ## _mask = vec_lvsl (0, source);
#define COMPUTE_SHIFT_MASKC(dest, source, mask) \
mask ## _mask = vec_lvsl (0, mask); \
- dest ## _mask = vec_lvsl (0, dest); \
- source ## _mask = vec_lvsl (0, source); \
- store_mask = vec_lvsr (0, dest);
-
-/* notice you have to declare temp vars...
- * Note: tmp3 and tmp4 must remain untouched!
- */
+ source ## _mask = vec_lvsl (0, source);
-#define LOAD_VECTORS(dest, source) \
+#define LOAD_VECTOR(source) \
+do \
+{ \
+ vector unsigned char tmp1, tmp2; \
tmp1 = (typeof(tmp1))vec_ld (0, source); \
tmp2 = (typeof(tmp2))vec_ld (15, source); \
- tmp3 = (typeof(tmp3))vec_ld (0, dest); \
- v ## source = (typeof(v ## source)) \
+ v ## source = (typeof(v ## source)) \
vec_perm (tmp1, tmp2, source ## _mask); \
- tmp4 = (typeof(tmp4))vec_ld (15, dest); \
- v ## dest = (typeof(v ## dest)) \
- vec_perm (tmp3, tmp4, dest ## _mask);
+} while (0)
+
+#define LOAD_VECTORS(dest, source) \
+do \
+{ \
+ LOAD_VECTOR(source); \
+ v ## dest = (typeof(v ## dest))vec_ld (0, dest); \
+} while (0)
#define LOAD_VECTORSC(dest, source, mask) \
- tmp1 = (typeof(tmp1))vec_ld (0, source); \
- tmp2 = (typeof(tmp2))vec_ld (15, source); \
- tmp3 = (typeof(tmp3))vec_ld (0, dest); \
- v ## source = (typeof(v ## source)) \
- vec_perm (tmp1, tmp2, source ## _mask); \
- tmp4 = (typeof(tmp4))vec_ld (15, dest); \
- tmp1 = (typeof(tmp1))vec_ld (0, mask); \
- v ## dest = (typeof(v ## dest)) \
- vec_perm (tmp3, tmp4, dest ## _mask); \
- tmp2 = (typeof(tmp2))vec_ld (15, mask); \
- v ## mask = (typeof(v ## mask)) \
- vec_perm (tmp1, tmp2, mask ## _mask);
+do \
+{ \
+ LOAD_VECTORS(dest, source); \
+ LOAD_VECTOR(mask); \
+} while (0)
+
+#define DECLARE_SRC_MASK_VAR vector unsigned char src_mask
+#define DECLARE_MASK_MASK_VAR vector unsigned char mask_mask
+
+#else
+
+/* Now the COMPUTE_SHIFT_{MASK, MASKS, MASKC} below are just no-op.
+ * They are defined that way because little endian altivec can do unaligned
+ * reads natively and have no need for constructing the permutation pattern
+ * variables.
+ */
+#define COMPUTE_SHIFT_MASK(source)
+
+#define COMPUTE_SHIFT_MASKS(dest, source)
+
+#define COMPUTE_SHIFT_MASKC(dest, source, mask)
+
+# define LOAD_VECTOR(source) \
+ v ## source = (typeof(v ## source))vec_xl(0, source);
+
+# define LOAD_VECTORS(dest, source) \
+ LOAD_VECTOR(source); \
+ LOAD_VECTOR(dest); \
+
+# define LOAD_VECTORSC(dest, source, mask) \
+ LOAD_VECTORS(dest, source); \
+ LOAD_VECTOR(mask); \
+
+#define DECLARE_SRC_MASK_VAR
+#define DECLARE_MASK_MASK_VAR
+
+#endif /* WORDS_BIGENDIAN */
#define LOAD_VECTORSM(dest, source, mask) \
- LOAD_VECTORSC (dest, source, mask) \
+ LOAD_VECTORSC (dest, source, mask); \
v ## source = pix_multiply (v ## source, \
splat_alpha (v ## mask));
#define STORE_VECTOR(dest) \
- edges = vec_perm (tmp4, tmp3, dest ## _mask); \
- tmp3 = vec_perm ((vector unsigned char)v ## dest, edges, store_mask); \
- tmp1 = vec_perm (edges, (vector unsigned char)v ## dest, store_mask); \
- vec_st ((vector unsigned int) tmp3, 15, dest); \
- vec_st ((vector unsigned int) tmp1, 0, dest);
+ vec_st ((vector unsigned int) v ## dest, 0, dest);
+
+/* load 4 pixels from a 16-byte boundary aligned address */
+static force_inline vector unsigned int
+load_128_aligned (const uint32_t* src)
+{
+ return *((vector unsigned int *) src);
+}
+
+/* load 4 pixels from a unaligned address */
+static force_inline vector unsigned int
+load_128_unaligned (const uint32_t* src)
+{
+ vector unsigned int vsrc;
+ DECLARE_SRC_MASK_VAR;
+
+ COMPUTE_SHIFT_MASK (src);
+ LOAD_VECTOR (src);
+
+ return vsrc;
+}
+
+/* save 4 pixels on a 16-byte boundary aligned address */
+static force_inline void
+save_128_aligned (uint32_t* data,
+ vector unsigned int vdata)
+{
+ STORE_VECTOR(data)
+}
+
+static force_inline vector unsigned int
+create_mask_1x32_128 (const uint32_t *src)
+{
+ vector unsigned int vsrc;
+ DECLARE_SRC_MASK_VAR;
+
+ COMPUTE_SHIFT_MASK (src);
+ LOAD_VECTOR (src);
+ return vec_splat(vsrc, 0);
+}
+
+static force_inline vector unsigned int
+create_mask_32_128 (uint32_t mask)
+{
+ return create_mask_1x32_128(&mask);
+}
+
+static force_inline vector unsigned int
+unpacklo_128_16x8 (vector unsigned int data1, vector unsigned int data2)
+{
+ vector unsigned char lo;
+
+ /* unpack to short */
+ lo = (vector unsigned char)
+#ifdef WORDS_BIGENDIAN
+ vec_mergel ((vector unsigned char) data2,
+ (vector unsigned char) data1);
+#else
+ vec_mergel ((vector unsigned char) data1,
+ (vector unsigned char) data2);
+#endif
+
+ return (vector unsigned int) lo;
+}
+
+static force_inline vector unsigned int
+unpackhi_128_16x8 (vector unsigned int data1, vector unsigned int data2)
+{
+ vector unsigned char hi;
+
+ /* unpack to short */
+ hi = (vector unsigned char)
+#ifdef WORDS_BIGENDIAN
+ vec_mergeh ((vector unsigned char) data2,
+ (vector unsigned char) data1);
+#else
+ vec_mergeh ((vector unsigned char) data1,
+ (vector unsigned char) data2);
+#endif
+
+ return (vector unsigned int) hi;
+}
+
+static force_inline vector unsigned int
+unpacklo_128_8x16 (vector unsigned int data1, vector unsigned int data2)
+{
+ vector unsigned short lo;
+
+ /* unpack to char */
+ lo = (vector unsigned short)
+#ifdef WORDS_BIGENDIAN
+ vec_mergel ((vector unsigned short) data2,
+ (vector unsigned short) data1);
+#else
+ vec_mergel ((vector unsigned short) data1,
+ (vector unsigned short) data2);
+#endif
+
+ return (vector unsigned int) lo;
+}
+
+static force_inline vector unsigned int
+unpackhi_128_8x16 (vector unsigned int data1, vector unsigned int data2)
+{
+ vector unsigned short hi;
+
+ /* unpack to char */
+ hi = (vector unsigned short)
+#ifdef WORDS_BIGENDIAN
+ vec_mergeh ((vector unsigned short) data2,
+ (vector unsigned short) data1);
+#else
+ vec_mergeh ((vector unsigned short) data1,
+ (vector unsigned short) data2);
+#endif
+
+ return (vector unsigned int) hi;
+}
+
+static force_inline void
+unpack_128_2x128 (vector unsigned int data1, vector unsigned int data2,
+ vector unsigned int* data_lo, vector unsigned int* data_hi)
+{
+ *data_lo = unpacklo_128_16x8(data1, data2);
+ *data_hi = unpackhi_128_16x8(data1, data2);
+}
+
+static force_inline void
+unpack_128_2x128_16 (vector unsigned int data1, vector unsigned int data2,
+ vector unsigned int* data_lo, vector unsigned int* data_hi)
+{
+ *data_lo = unpacklo_128_8x16(data1, data2);
+ *data_hi = unpackhi_128_8x16(data1, data2);
+}
+
+static force_inline vector unsigned int
+unpack_565_to_8888 (vector unsigned int lo)
+{
+ vector unsigned int r, g, b, rb, t;
+
+ r = vec_and (vec_sl(lo, create_mask_32_128(8)), mask_red);
+ g = vec_and (vec_sl(lo, create_mask_32_128(5)), mask_green);
+ b = vec_and (vec_sl(lo, create_mask_32_128(3)), mask_blue);
+
+ rb = vec_or (r, b);
+ t = vec_and (rb, mask_565_fix_rb);
+ t = vec_sr (t, create_mask_32_128(5));
+ rb = vec_or (rb, t);
+
+ t = vec_and (g, mask_565_fix_g);
+ t = vec_sr (t, create_mask_32_128(6));
+ g = vec_or (g, t);
+
+ return vec_or (rb, g);
+}
+
+static force_inline int
+is_opaque (vector unsigned int x)
+{
+ uint32_t cmp_result;
+ vector bool int ffs = vec_cmpeq(x, x);
+
+ cmp_result = vec_all_eq(x, ffs);
+
+ return (cmp_result & 0x8888) == 0x8888;
+}
+
+static force_inline int
+is_zero (vector unsigned int x)
+{
+ uint32_t cmp_result;
+
+ cmp_result = vec_all_eq(x, (vector unsigned int) AVV(0));
+
+ return cmp_result == 0xffff;
+}
+
+static force_inline int
+is_transparent (vector unsigned int x)
+{
+ uint32_t cmp_result;
+
+ cmp_result = vec_all_eq(x, (vector unsigned int) AVV(0));
+ return (cmp_result & 0x8888) == 0x8888;
+}
+
+static force_inline uint32_t
+core_combine_over_u_pixel_vmx (uint32_t src, uint32_t dst)
+{
+ uint32_t a;
+
+ a = ALPHA_8(src);
+
+ if (a == 0xff)
+ {
+ return src;
+ }
+ else if (src)
+ {
+ UN8x4_MUL_UN8_ADD_UN8x4(dst, (~a & MASK), src);
+ }
+
+ return dst;
+}
+
+static force_inline uint32_t
+combine1 (const uint32_t *ps, const uint32_t *pm)
+{
+ uint32_t s = *ps;
+
+ if (pm)
+ UN8x4_MUL_UN8(s, ALPHA_8(*pm));
+
+ return s;
+}
+
+static force_inline vector unsigned int
+combine4 (const uint32_t* ps, const uint32_t* pm)
+{
+ vector unsigned int src, msk;
+
+ if (pm)
+ {
+ msk = load_128_unaligned(pm);
+
+ if (is_transparent(msk))
+ return (vector unsigned int) AVV(0);
+ }
+
+ src = load_128_unaligned(ps);
+
+ if (pm)
+ src = pix_multiply(src, msk);
+
+ return src;
+}
static void
vmx_combine_over_u_no_mask (uint32_t * dest,
@@ -189,8 +492,19 @@ vmx_combine_over_u_no_mask (uint32_t * dest,
{
int i;
vector unsigned int vdest, vsrc;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, src_mask, store_mask;
+ DECLARE_SRC_MASK_VAR;
+
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t s = *src++;
+ uint32_t d = *dest;
+ uint32_t ia = ALPHA_8 (~s);
+
+ UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s);
+
+ *dest++ = d;
+ width--;
+ }
COMPUTE_SHIFT_MASKS (dest, src);
@@ -228,8 +542,24 @@ vmx_combine_over_u_mask (uint32_t * dest,
{
int i;
vector unsigned int vdest, vsrc, vmask;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, src_mask, mask_mask, store_mask;
+ DECLARE_SRC_MASK_VAR;
+ DECLARE_MASK_MASK_VAR;
+
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t m = ALPHA_8 (*mask++);
+ uint32_t s = *src++;
+ uint32_t d = *dest;
+ uint32_t ia;
+
+ UN8x4_MUL_UN8 (s, m);
+
+ ia = ALPHA_8 (~s);
+
+ UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s);
+ *dest++ = d;
+ width--;
+ }
COMPUTE_SHIFT_MASKC (dest, src, mask);
@@ -284,8 +614,18 @@ vmx_combine_over_reverse_u_no_mask (uint32_t * dest,
{
int i;
vector unsigned int vdest, vsrc;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, src_mask, store_mask;
+ DECLARE_SRC_MASK_VAR;
+
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t s = *src++;
+ uint32_t d = *dest;
+ uint32_t ia = ALPHA_8 (~d);
+
+ UN8x4_MUL_UN8_ADD_UN8x4 (s, ia, d);
+ *dest++ = s;
+ width--;
+ }
COMPUTE_SHIFT_MASKS (dest, src);
@@ -322,8 +662,22 @@ vmx_combine_over_reverse_u_mask (uint32_t * dest,
{
int i;
vector unsigned int vdest, vsrc, vmask;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, src_mask, mask_mask, store_mask;
+ DECLARE_SRC_MASK_VAR;
+ DECLARE_MASK_MASK_VAR;
+
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t m = ALPHA_8 (*mask++);
+ uint32_t s = *src++;
+ uint32_t d = *dest;
+ uint32_t ia = ALPHA_8 (~d);
+
+ UN8x4_MUL_UN8 (s, m);
+
+ UN8x4_MUL_UN8_ADD_UN8x4 (s, ia, d);
+ *dest++ = s;
+ width--;
+ }
COMPUTE_SHIFT_MASKC (dest, src, mask);
@@ -377,8 +731,17 @@ vmx_combine_in_u_no_mask (uint32_t * dest,
{
int i;
vector unsigned int vdest, vsrc;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, src_mask, store_mask;
+ DECLARE_SRC_MASK_VAR;
+
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t s = *src++;
+ uint32_t a = ALPHA_8 (*dest);
+
+ UN8x4_MUL_UN8 (s, a);
+ *dest++ = s;
+ width--;
+ }
COMPUTE_SHIFT_MASKS (dest, src);
@@ -413,8 +776,21 @@ vmx_combine_in_u_mask (uint32_t * dest,
{
int i;
vector unsigned int vdest, vsrc, vmask;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, src_mask, mask_mask, store_mask;
+ DECLARE_SRC_MASK_VAR;
+ DECLARE_MASK_MASK_VAR;
+
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t m = ALPHA_8 (*mask++);
+ uint32_t s = *src++;
+ uint32_t a = ALPHA_8 (*dest);
+
+ UN8x4_MUL_UN8 (s, m);
+ UN8x4_MUL_UN8 (s, a);
+
+ *dest++ = s;
+ width--;
+ }
COMPUTE_SHIFT_MASKC (dest, src, mask);
@@ -466,8 +842,18 @@ vmx_combine_in_reverse_u_no_mask (uint32_t * dest,
{
int i;
vector unsigned int vdest, vsrc;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, src_mask, store_mask;
+ DECLARE_SRC_MASK_VAR;
+
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t d = *dest;
+ uint32_t a = ALPHA_8 (*src++);
+
+ UN8x4_MUL_UN8 (d, a);
+
+ *dest++ = d;
+ width--;
+ }
COMPUTE_SHIFT_MASKS (dest, src);
@@ -503,8 +889,22 @@ vmx_combine_in_reverse_u_mask (uint32_t * dest,
{
int i;
vector unsigned int vdest, vsrc, vmask;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, src_mask, mask_mask, store_mask;
+ DECLARE_SRC_MASK_VAR;
+ DECLARE_MASK_MASK_VAR;
+
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t m = ALPHA_8 (*mask++);
+ uint32_t d = *dest;
+ uint32_t a = *src++;
+
+ UN8x4_MUL_UN8 (a, m);
+ a = ALPHA_8 (a);
+ UN8x4_MUL_UN8 (d, a);
+
+ *dest++ = d;
+ width--;
+ }
COMPUTE_SHIFT_MASKC (dest, src, mask);
@@ -557,8 +957,18 @@ vmx_combine_out_u_no_mask (uint32_t * dest,
{
int i;
vector unsigned int vdest, vsrc;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, src_mask, store_mask;
+ DECLARE_SRC_MASK_VAR;
+
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t s = *src++;
+ uint32_t a = ALPHA_8 (~(*dest));
+
+ UN8x4_MUL_UN8 (s, a);
+
+ *dest++ = s;
+ width--;
+ }
COMPUTE_SHIFT_MASKS (dest, src);
@@ -594,8 +1004,21 @@ vmx_combine_out_u_mask (uint32_t * dest,
{
int i;
vector unsigned int vdest, vsrc, vmask;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, src_mask, mask_mask, store_mask;
+ DECLARE_SRC_MASK_VAR;
+ DECLARE_MASK_MASK_VAR;
+
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t m = ALPHA_8 (*mask++);
+ uint32_t s = *src++;
+ uint32_t a = ALPHA_8 (~(*dest));
+
+ UN8x4_MUL_UN8 (s, m);
+ UN8x4_MUL_UN8 (s, a);
+
+ *dest++ = s;
+ width--;
+ }
COMPUTE_SHIFT_MASKC (dest, src, mask);
@@ -647,8 +1070,18 @@ vmx_combine_out_reverse_u_no_mask (uint32_t * dest,
{
int i;
vector unsigned int vdest, vsrc;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, src_mask, store_mask;
+ DECLARE_SRC_MASK_VAR;
+
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t d = *dest;
+ uint32_t a = ALPHA_8 (~(*src++));
+
+ UN8x4_MUL_UN8 (d, a);
+
+ *dest++ = d;
+ width--;
+ }
COMPUTE_SHIFT_MASKS (dest, src);
@@ -685,8 +1118,22 @@ vmx_combine_out_reverse_u_mask (uint32_t * dest,
{
int i;
vector unsigned int vdest, vsrc, vmask;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, src_mask, mask_mask, store_mask;
+ DECLARE_SRC_MASK_VAR;
+ DECLARE_MASK_MASK_VAR;
+
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t m = ALPHA_8 (*mask++);
+ uint32_t d = *dest;
+ uint32_t a = *src++;
+
+ UN8x4_MUL_UN8 (a, m);
+ a = ALPHA_8 (~a);
+ UN8x4_MUL_UN8 (d, a);
+
+ *dest++ = d;
+ width--;
+ }
COMPUTE_SHIFT_MASKC (dest, src, mask);
@@ -739,8 +1186,20 @@ vmx_combine_atop_u_no_mask (uint32_t * dest,
{
int i;
vector unsigned int vdest, vsrc;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, src_mask, store_mask;
+ DECLARE_SRC_MASK_VAR;
+
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t s = *src++;
+ uint32_t d = *dest;
+ uint32_t dest_a = ALPHA_8 (d);
+ uint32_t src_ia = ALPHA_8 (~s);
+
+ UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_a, d, src_ia);
+
+ *dest++ = s;
+ width--;
+ }
COMPUTE_SHIFT_MASKS (dest, src);
@@ -779,8 +1238,26 @@ vmx_combine_atop_u_mask (uint32_t * dest,
{
int i;
vector unsigned int vdest, vsrc, vmask;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, src_mask, mask_mask, store_mask;
+ DECLARE_SRC_MASK_VAR;
+ DECLARE_MASK_MASK_VAR;
+
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t m = ALPHA_8 (*mask++);
+ uint32_t s = *src++;
+ uint32_t d = *dest;
+ uint32_t dest_a = ALPHA_8 (d);
+ uint32_t src_ia;
+
+ UN8x4_MUL_UN8 (s, m);
+
+ src_ia = ALPHA_8 (~s);
+
+ UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_a, d, src_ia);
+
+ *dest++ = s;
+ width--;
+ }
COMPUTE_SHIFT_MASKC (dest, src, mask);
@@ -838,8 +1315,20 @@ vmx_combine_atop_reverse_u_no_mask (uint32_t * dest,
{
int i;
vector unsigned int vdest, vsrc;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, src_mask, store_mask;
+ DECLARE_SRC_MASK_VAR;
+
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t s = *src++;
+ uint32_t d = *dest;
+ uint32_t src_a = ALPHA_8 (s);
+ uint32_t dest_ia = ALPHA_8 (~d);
+
+ UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_a);
+
+ *dest++ = s;
+ width--;
+ }
COMPUTE_SHIFT_MASKS (dest, src);
@@ -878,8 +1367,26 @@ vmx_combine_atop_reverse_u_mask (uint32_t * dest,
{
int i;
vector unsigned int vdest, vsrc, vmask;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, src_mask, mask_mask, store_mask;
+ DECLARE_SRC_MASK_VAR;
+ DECLARE_MASK_MASK_VAR;
+
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t m = ALPHA_8 (*mask++);
+ uint32_t s = *src++;
+ uint32_t d = *dest;
+ uint32_t src_a;
+ uint32_t dest_ia = ALPHA_8 (~d);
+
+ UN8x4_MUL_UN8 (s, m);
+
+ src_a = ALPHA_8 (s);
+
+ UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_a);
+
+ *dest++ = s;
+ width--;
+ }
COMPUTE_SHIFT_MASKC (dest, src, mask);
@@ -937,8 +1444,20 @@ vmx_combine_xor_u_no_mask (uint32_t * dest,
{
int i;
vector unsigned int vdest, vsrc;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, src_mask, store_mask;
+ DECLARE_SRC_MASK_VAR;
+
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t s = *src++;
+ uint32_t d = *dest;
+ uint32_t src_ia = ALPHA_8 (~s);
+ uint32_t dest_ia = ALPHA_8 (~d);
+
+ UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_ia);
+
+ *dest++ = s;
+ width--;
+ }
COMPUTE_SHIFT_MASKS (dest, src);
@@ -977,8 +1496,26 @@ vmx_combine_xor_u_mask (uint32_t * dest,
{
int i;
vector unsigned int vdest, vsrc, vmask;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, src_mask, mask_mask, store_mask;
+ DECLARE_SRC_MASK_VAR;
+ DECLARE_MASK_MASK_VAR;
+
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t m = ALPHA_8 (*mask++);
+ uint32_t s = *src++;
+ uint32_t d = *dest;
+ uint32_t src_ia;
+ uint32_t dest_ia = ALPHA_8 (~d);
+
+ UN8x4_MUL_UN8 (s, m);
+
+ src_ia = ALPHA_8 (~s);
+
+ UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_ia);
+
+ *dest++ = s;
+ width--;
+ }
COMPUTE_SHIFT_MASKC (dest, src, mask);
@@ -1036,8 +1573,18 @@ vmx_combine_add_u_no_mask (uint32_t * dest,
{
int i;
vector unsigned int vdest, vsrc;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, src_mask, store_mask;
+ DECLARE_SRC_MASK_VAR;
+
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t s = *src++;
+ uint32_t d = *dest;
+
+ UN8x4_ADD_UN8x4 (d, s);
+
+ *dest++ = d;
+ width--;
+ }
COMPUTE_SHIFT_MASKS (dest, src);
/* printf ("%s\n",__PRETTY_FUNCTION__); */
@@ -1072,8 +1619,21 @@ vmx_combine_add_u_mask (uint32_t * dest,
{
int i;
vector unsigned int vdest, vsrc, vmask;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, src_mask, mask_mask, store_mask;
+ DECLARE_SRC_MASK_VAR;
+ DECLARE_MASK_MASK_VAR;
+
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t m = ALPHA_8 (*mask++);
+ uint32_t s = *src++;
+ uint32_t d = *dest;
+
+ UN8x4_MUL_UN8 (s, m);
+ UN8x4_ADD_UN8x4 (d, s);
+
+ *dest++ = d;
+ width--;
+ }
COMPUTE_SHIFT_MASKC (dest, src, mask);
@@ -1128,8 +1688,19 @@ vmx_combine_src_ca (pixman_implementation_t *imp,
{
int i;
vector unsigned int vdest, vsrc, vmask;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, mask_mask, src_mask, store_mask;
+ DECLARE_SRC_MASK_VAR;
+ DECLARE_MASK_MASK_VAR;
+
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t a = *mask++;
+ uint32_t s = *src++;
+
+ UN8x4_MUL_UN8x4 (s, a);
+
+ *dest++ = s;
+ width--;
+ }
COMPUTE_SHIFT_MASKC (dest, src, mask);
@@ -1168,8 +1739,23 @@ vmx_combine_over_ca (pixman_implementation_t *imp,
{
int i;
vector unsigned int vdest, vsrc, vmask;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, mask_mask, src_mask, store_mask;
+ DECLARE_SRC_MASK_VAR;
+ DECLARE_MASK_MASK_VAR;
+
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t a = *mask++;
+ uint32_t s = *src++;
+ uint32_t d = *dest;
+ uint32_t sa = ALPHA_8 (s);
+
+ UN8x4_MUL_UN8x4 (s, a);
+ UN8x4_MUL_UN8 (a, sa);
+ UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ~a, s);
+
+ *dest++ = d;
+ width--;
+ }
COMPUTE_SHIFT_MASKC (dest, src, mask);
@@ -1212,8 +1798,22 @@ vmx_combine_over_reverse_ca (pixman_implementation_t *imp,
{
int i;
vector unsigned int vdest, vsrc, vmask;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, mask_mask, src_mask, store_mask;
+ DECLARE_SRC_MASK_VAR;
+ DECLARE_MASK_MASK_VAR;
+
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t a = *mask++;
+ uint32_t s = *src++;
+ uint32_t d = *dest;
+ uint32_t ida = ALPHA_8 (~d);
+
+ UN8x4_MUL_UN8x4 (s, a);
+ UN8x4_MUL_UN8_ADD_UN8x4 (s, ida, d);
+
+ *dest++ = s;
+ width--;
+ }
COMPUTE_SHIFT_MASKC (dest, src, mask);
@@ -1255,8 +1855,21 @@ vmx_combine_in_ca (pixman_implementation_t *imp,
{
int i;
vector unsigned int vdest, vsrc, vmask;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, mask_mask, src_mask, store_mask;
+ DECLARE_SRC_MASK_VAR;
+ DECLARE_MASK_MASK_VAR;
+
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t a = *mask++;
+ uint32_t s = *src++;
+ uint32_t da = ALPHA_8 (*dest);
+
+ UN8x4_MUL_UN8x4 (s, a);
+ UN8x4_MUL_UN8 (s, da);
+
+ *dest++ = s;
+ width--;
+ }
COMPUTE_SHIFT_MASKC (dest, src, mask);
@@ -1297,8 +1910,21 @@ vmx_combine_in_reverse_ca (pixman_implementation_t *imp,
{
int i;
vector unsigned int vdest, vsrc, vmask;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, mask_mask, src_mask, store_mask;
+ DECLARE_SRC_MASK_VAR;
+ DECLARE_MASK_MASK_VAR;
+
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t a = *mask++;
+ uint32_t d = *dest;
+ uint32_t sa = ALPHA_8 (*src++);
+
+ UN8x4_MUL_UN8 (a, sa);
+ UN8x4_MUL_UN8x4 (d, a);
+
+ *dest++ = d;
+ width--;
+ }
COMPUTE_SHIFT_MASKC (dest, src, mask);
@@ -1340,8 +1966,22 @@ vmx_combine_out_ca (pixman_implementation_t *imp,
{
int i;
vector unsigned int vdest, vsrc, vmask;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, mask_mask, src_mask, store_mask;
+ DECLARE_SRC_MASK_VAR;
+ DECLARE_MASK_MASK_VAR;
+
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t a = *mask++;
+ uint32_t s = *src++;
+ uint32_t d = *dest;
+ uint32_t da = ALPHA_8 (~d);
+
+ UN8x4_MUL_UN8x4 (s, a);
+ UN8x4_MUL_UN8 (s, da);
+
+ *dest++ = s;
+ width--;
+ }
COMPUTE_SHIFT_MASKC (dest, src, mask);
@@ -1384,8 +2024,22 @@ vmx_combine_out_reverse_ca (pixman_implementation_t *imp,
{
int i;
vector unsigned int vdest, vsrc, vmask;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, mask_mask, src_mask, store_mask;
+ DECLARE_SRC_MASK_VAR;
+ DECLARE_MASK_MASK_VAR;
+
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t a = *mask++;
+ uint32_t s = *src++;
+ uint32_t d = *dest;
+ uint32_t sa = ALPHA_8 (s);
+
+ UN8x4_MUL_UN8 (a, sa);
+ UN8x4_MUL_UN8x4 (d, ~a);
+
+ *dest++ = d;
+ width--;
+ }
COMPUTE_SHIFT_MASKC (dest, src, mask);
@@ -1428,8 +2082,24 @@ vmx_combine_atop_ca (pixman_implementation_t *imp,
{
int i;
vector unsigned int vdest, vsrc, vmask, vsrca;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, mask_mask, src_mask, store_mask;
+ DECLARE_SRC_MASK_VAR;
+ DECLARE_MASK_MASK_VAR;
+
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t a = *mask++;
+ uint32_t s = *src++;
+ uint32_t d = *dest;
+ uint32_t sa = ALPHA_8 (s);
+ uint32_t da = ALPHA_8 (d);
+
+ UN8x4_MUL_UN8x4 (s, a);
+ UN8x4_MUL_UN8 (a, sa);
+ UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ~a, s, da);
+
+ *dest++ = d;
+ width--;
+ }
COMPUTE_SHIFT_MASKC (dest, src, mask);
@@ -1479,8 +2149,24 @@ vmx_combine_atop_reverse_ca (pixman_implementation_t *imp,
{
int i;
vector unsigned int vdest, vsrc, vmask;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, mask_mask, src_mask, store_mask;
+ DECLARE_SRC_MASK_VAR;
+ DECLARE_MASK_MASK_VAR;
+
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t a = *mask++;
+ uint32_t s = *src++;
+ uint32_t d = *dest;
+ uint32_t sa = ALPHA_8 (s);
+ uint32_t da = ALPHA_8 (~d);
+
+ UN8x4_MUL_UN8x4 (s, a);
+ UN8x4_MUL_UN8 (a, sa);
+ UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, a, s, da);
+
+ *dest++ = d;
+ width--;
+ }
COMPUTE_SHIFT_MASKC (dest, src, mask);
@@ -1527,8 +2213,24 @@ vmx_combine_xor_ca (pixman_implementation_t *imp,
{
int i;
vector unsigned int vdest, vsrc, vmask;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, mask_mask, src_mask, store_mask;
+ DECLARE_SRC_MASK_VAR;
+ DECLARE_MASK_MASK_VAR;
+
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t a = *mask++;
+ uint32_t s = *src++;
+ uint32_t d = *dest;
+ uint32_t sa = ALPHA_8 (s);
+ uint32_t da = ALPHA_8 (~d);
+
+ UN8x4_MUL_UN8x4 (s, a);
+ UN8x4_MUL_UN8 (a, sa);
+ UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ~a, s, da);
+
+ *dest++ = d;
+ width--;
+ }
COMPUTE_SHIFT_MASKC (dest, src, mask);
@@ -1575,8 +2277,21 @@ vmx_combine_add_ca (pixman_implementation_t *imp,
{
int i;
vector unsigned int vdest, vsrc, vmask;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, mask_mask, src_mask, store_mask;
+ DECLARE_SRC_MASK_VAR;
+ DECLARE_MASK_MASK_VAR;
+
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t a = *mask++;
+ uint32_t s = *src++;
+ uint32_t d = *dest;
+
+ UN8x4_MUL_UN8x4 (s, a);
+ UN8x4_ADD_UN8x4 (s, d);
+
+ *dest++ = s;
+ width--;
+ }
COMPUTE_SHIFT_MASKC (dest, src, mask);
@@ -1607,16 +2322,809 @@ vmx_combine_add_ca (pixman_implementation_t *imp,
}
}
+static void
+vmx_composite_over_n_8_8888 (pixman_implementation_t *imp,
+ pixman_composite_info_t *info)
+{
+ PIXMAN_COMPOSITE_ARGS (info);
+ uint32_t src, srca;
+ uint32_t *dst_line, *dst;
+ uint8_t *mask_line;
+ int dst_stride, mask_stride;
+ int32_t w;
+ uint32_t m, d, s, ia;
+
+ vector unsigned int vsrc, valpha, vmask, vdst;
+
+ src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
+
+ srca = ALPHA_8(src);
+ if (src == 0)
+ return;
+
+ PIXMAN_IMAGE_GET_LINE (
+ dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+ PIXMAN_IMAGE_GET_LINE (
+ mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
+
+ vsrc = (vector unsigned int) {src, src, src, src};
+ valpha = splat_alpha(vsrc);
+
+ while (height--)
+ {
+ const uint8_t *pm = mask_line;
+ dst = dst_line;
+ dst_line += dst_stride;
+ mask_line += mask_stride;
+ w = width;
+
+ while (w && (uintptr_t)dst & 15)
+ {
+ s = src;
+ m = *pm++;
+
+ if (m)
+ {
+ d = *dst;
+ UN8x4_MUL_UN8 (s, m);
+ ia = ALPHA_8 (~s);
+ UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s);
+ *dst = d;
+ }
+
+ w--;
+ dst++;
+ }
+
+ while (w >= 4)
+ {
+ m = *((uint32_t*)pm);
+
+ if (srca == 0xff && m == 0xffffffff)
+ {
+ save_128_aligned(dst, vsrc);
+ }
+ else if (m)
+ {
+ vmask = splat_pixel((vector unsigned int) {m, m, m, m});
+
+ /* dst is 16-byte aligned */
+ vdst = in_over (vsrc, valpha, vmask, load_128_aligned (dst));
+
+ save_128_aligned(dst, vdst);
+ }
+
+ w -= 4;
+ dst += 4;
+ pm += 4;
+ }
+
+ while (w)
+ {
+ s = src;
+ m = *pm++;
+
+ if (m)
+ {
+ d = *dst;
+ UN8x4_MUL_UN8 (s, m);
+ ia = ALPHA_8 (~s);
+ UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s);
+ *dst = d;
+ }
+
+ w--;
+ dst++;
+ }
+ }
+
+}
+
+static pixman_bool_t
+vmx_fill (pixman_implementation_t *imp,
+ uint32_t * bits,
+ int stride,
+ int bpp,
+ int x,
+ int y,
+ int width,
+ int height,
+ uint32_t filler)
+{
+ uint32_t byte_width;
+ uint8_t *byte_line;
+
+ vector unsigned int vfiller;
+
+ if (bpp == 8)
+ {
+ uint8_t b;
+ uint16_t w;
+
+ stride = stride * (int) sizeof (uint32_t) / 1;
+ byte_line = (uint8_t *)(((uint8_t *)bits) + stride * y + x);
+ byte_width = width;
+ stride *= 1;
+
+ b = filler & 0xff;
+ w = (b << 8) | b;
+ filler = (w << 16) | w;
+ }
+ else if (bpp == 16)
+ {
+ stride = stride * (int) sizeof (uint32_t) / 2;
+ byte_line = (uint8_t *)(((uint16_t *)bits) + stride * y + x);
+ byte_width = 2 * width;
+ stride *= 2;
+
+ filler = (filler & 0xffff) * 0x00010001;
+ }
+ else if (bpp == 32)
+ {
+ stride = stride * (int) sizeof (uint32_t) / 4;
+ byte_line = (uint8_t *)(((uint32_t *)bits) + stride * y + x);
+ byte_width = 4 * width;
+ stride *= 4;
+ }
+ else
+ {
+ return FALSE;
+ }
+
+ vfiller = create_mask_1x32_128(&filler);
+
+ while (height--)
+ {
+ int w;
+ uint8_t *d = byte_line;
+ byte_line += stride;
+ w = byte_width;
+
+ if (w >= 1 && ((uintptr_t)d & 1))
+ {
+ *(uint8_t *)d = filler;
+ w -= 1;
+ d += 1;
+ }
+
+ while (w >= 2 && ((uintptr_t)d & 3))
+ {
+ *(uint16_t *)d = filler;
+ w -= 2;
+ d += 2;
+ }
+
+ while (w >= 4 && ((uintptr_t)d & 15))
+ {
+ *(uint32_t *)d = filler;
+
+ w -= 4;
+ d += 4;
+ }
+
+ while (w >= 128)
+ {
+ vec_st(vfiller, 0, (uint32_t *) d);
+ vec_st(vfiller, 0, (uint32_t *) d + 4);
+ vec_st(vfiller, 0, (uint32_t *) d + 8);
+ vec_st(vfiller, 0, (uint32_t *) d + 12);
+ vec_st(vfiller, 0, (uint32_t *) d + 16);
+ vec_st(vfiller, 0, (uint32_t *) d + 20);
+ vec_st(vfiller, 0, (uint32_t *) d + 24);
+ vec_st(vfiller, 0, (uint32_t *) d + 28);
+
+ d += 128;
+ w -= 128;
+ }
+
+ if (w >= 64)
+ {
+ vec_st(vfiller, 0, (uint32_t *) d);
+ vec_st(vfiller, 0, (uint32_t *) d + 4);
+ vec_st(vfiller, 0, (uint32_t *) d + 8);
+ vec_st(vfiller, 0, (uint32_t *) d + 12);
+
+ d += 64;
+ w -= 64;
+ }
+
+ if (w >= 32)
+ {
+ vec_st(vfiller, 0, (uint32_t *) d);
+ vec_st(vfiller, 0, (uint32_t *) d + 4);
+
+ d += 32;
+ w -= 32;
+ }
+
+ if (w >= 16)
+ {
+ vec_st(vfiller, 0, (uint32_t *) d);
+
+ d += 16;
+ w -= 16;
+ }
+
+ while (w >= 4)
+ {
+ *(uint32_t *)d = filler;
+
+ w -= 4;
+ d += 4;
+ }
+
+ if (w >= 2)
+ {
+ *(uint16_t *)d = filler;
+ w -= 2;
+ d += 2;
+ }
+
+ if (w >= 1)
+ {
+ *(uint8_t *)d = filler;
+ w -= 1;
+ d += 1;
+ }
+ }
+
+ return TRUE;
+}
+
+static void
+vmx_composite_src_x888_8888 (pixman_implementation_t *imp,
+ pixman_composite_info_t *info)
+{
+ PIXMAN_COMPOSITE_ARGS (info);
+ uint32_t *dst_line, *dst;
+ uint32_t *src_line, *src;
+ int32_t w;
+ int dst_stride, src_stride;
+
+ PIXMAN_IMAGE_GET_LINE (
+ dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+ PIXMAN_IMAGE_GET_LINE (
+ src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+
+ while (height--)
+ {
+ dst = dst_line;
+ dst_line += dst_stride;
+ src = src_line;
+ src_line += src_stride;
+ w = width;
+
+ while (w && (uintptr_t)dst & 15)
+ {
+ *dst++ = *src++ | 0xff000000;
+ w--;
+ }
+
+ while (w >= 16)
+ {
+ vector unsigned int vmx_src1, vmx_src2, vmx_src3, vmx_src4;
+
+ vmx_src1 = load_128_unaligned (src);
+ vmx_src2 = load_128_unaligned (src + 4);
+ vmx_src3 = load_128_unaligned (src + 8);
+ vmx_src4 = load_128_unaligned (src + 12);
+
+ save_128_aligned (dst, vec_or (vmx_src1, mask_ff000000));
+ save_128_aligned (dst + 4, vec_or (vmx_src2, mask_ff000000));
+ save_128_aligned (dst + 8, vec_or (vmx_src3, mask_ff000000));
+ save_128_aligned (dst + 12, vec_or (vmx_src4, mask_ff000000));
+
+ dst += 16;
+ src += 16;
+ w -= 16;
+ }
+
+ while (w)
+ {
+ *dst++ = *src++ | 0xff000000;
+ w--;
+ }
+ }
+}
+
+static void
+vmx_composite_over_n_8888 (pixman_implementation_t *imp,
+ pixman_composite_info_t *info)
+{
+ PIXMAN_COMPOSITE_ARGS (info);
+ uint32_t *dst_line, *dst;
+ uint32_t src, ia;
+ int i, w, dst_stride;
+ vector unsigned int vdst, vsrc, via;
+
+ src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
+
+ if (src == 0)
+ return;
+
+ PIXMAN_IMAGE_GET_LINE (
+ dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+
+ vsrc = (vector unsigned int){src, src, src, src};
+ via = negate (splat_alpha (vsrc));
+ ia = ALPHA_8 (~src);
+
+ while (height--)
+ {
+ dst = dst_line;
+ dst_line += dst_stride;
+ w = width;
+
+ while (w && ((uintptr_t)dst & 15))
+ {
+ uint32_t d = *dst;
+ UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, src);
+ *dst++ = d;
+ w--;
+ }
+
+ for (i = w / 4; i > 0; i--)
+ {
+ vdst = pix_multiply (load_128_aligned (dst), via);
+ save_128_aligned (dst, pix_add (vsrc, vdst));
+ dst += 4;
+ }
+
+ for (i = w % 4; --i >= 0;)
+ {
+ uint32_t d = dst[i];
+ UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, src);
+ dst[i] = d;
+ }
+ }
+}
+
+static void
+vmx_composite_over_8888_8888 (pixman_implementation_t *imp,
+ pixman_composite_info_t *info)
+{
+ PIXMAN_COMPOSITE_ARGS (info);
+ int dst_stride, src_stride;
+ uint32_t *dst_line, *dst;
+ uint32_t *src_line, *src;
+
+ PIXMAN_IMAGE_GET_LINE (
+ dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+ PIXMAN_IMAGE_GET_LINE (
+ src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+
+ dst = dst_line;
+ src = src_line;
+
+ while (height--)
+ {
+ vmx_combine_over_u (imp, op, dst, src, NULL, width);
+
+ dst += dst_stride;
+ src += src_stride;
+ }
+}
+
+static void
+vmx_composite_over_n_8888_8888_ca (pixman_implementation_t *imp,
+ pixman_composite_info_t *info)
+{
+ PIXMAN_COMPOSITE_ARGS (info);
+ uint32_t src, ia;
+ uint32_t *dst_line, d;
+ uint32_t *mask_line, m;
+ uint32_t pack_cmp;
+ int dst_stride, mask_stride;
+
+ vector unsigned int vsrc, valpha, vmask, vdest;
+
+ src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
+
+ if (src == 0)
+ return;
+
+ PIXMAN_IMAGE_GET_LINE (
+ dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+ PIXMAN_IMAGE_GET_LINE (
+ mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
+
+ vsrc = (vector unsigned int) {src, src, src, src};
+ valpha = splat_alpha(vsrc);
+ ia = ALPHA_8 (src);
+
+ while (height--)
+ {
+ int w = width;
+ const uint32_t *pm = (uint32_t *)mask_line;
+ uint32_t *pd = (uint32_t *)dst_line;
+ uint32_t s;
+
+ dst_line += dst_stride;
+ mask_line += mask_stride;
+
+ while (w && (uintptr_t)pd & 15)
+ {
+ s = src;
+ m = *pm++;
+
+ if (m)
+ {
+ d = *pd;
+ UN8x4_MUL_UN8x4 (s, m);
+ UN8x4_MUL_UN8 (m, ia);
+ m = ~m;
+ UN8x4_MUL_UN8x4_ADD_UN8x4 (d, m, s);
+ *pd = d;
+ }
+
+ pd++;
+ w--;
+ }
+
+ while (w >= 4)
+ {
+ /* pm is NOT necessarily 16-byte aligned */
+ vmask = load_128_unaligned (pm);
+
+ pack_cmp = vec_all_eq(vmask, (vector unsigned int) AVV(0));
+
+ /* if all bits in mask are zero, pack_cmp is not 0 */
+ if (pack_cmp == 0)
+ {
+ /* pd is 16-byte aligned */
+ vdest = in_over (vsrc, valpha, vmask, load_128_aligned (pd));
+
+ save_128_aligned(pd, vdest);
+ }
+
+ pd += 4;
+ pm += 4;
+ w -= 4;
+ }
+
+ while (w)
+ {
+ s = src;
+ m = *pm++;
+
+ if (m)
+ {
+ d = *pd;
+ UN8x4_MUL_UN8x4 (s, m);
+ UN8x4_MUL_UN8 (m, ia);
+ m = ~m;
+ UN8x4_MUL_UN8x4_ADD_UN8x4 (d, m, s);
+ *pd = d;
+ }
+
+ pd++;
+ w--;
+ }
+ }
+}
+
+static void
+vmx_composite_add_8_8 (pixman_implementation_t *imp,
+ pixman_composite_info_t *info)
+{
+ PIXMAN_COMPOSITE_ARGS (info);
+ uint8_t *dst_line, *dst;
+ uint8_t *src_line, *src;
+ int dst_stride, src_stride;
+ int32_t w;
+ uint16_t t;
+
+ PIXMAN_IMAGE_GET_LINE (
+ src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
+ PIXMAN_IMAGE_GET_LINE (
+ dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
+
+ while (height--)
+ {
+ dst = dst_line;
+ src = src_line;
+
+ dst_line += dst_stride;
+ src_line += src_stride;
+ w = width;
+
+ /* Small head */
+ while (w && (uintptr_t)dst & 3)
+ {
+ t = (*dst) + (*src++);
+ *dst++ = t | (0 - (t >> 8));
+ w--;
+ }
+
+ vmx_combine_add_u (imp, op,
+ (uint32_t*)dst, (uint32_t*)src, NULL, w >> 2);
+
+ /* Small tail */
+ dst += w & 0xfffc;
+ src += w & 0xfffc;
+
+ w &= 3;
+
+ while (w)
+ {
+ t = (*dst) + (*src++);
+ *dst++ = t | (0 - (t >> 8));
+ w--;
+ }
+ }
+}
+
+static void
+vmx_composite_add_8888_8888 (pixman_implementation_t *imp,
+ pixman_composite_info_t *info)
+{
+ PIXMAN_COMPOSITE_ARGS (info);
+ uint32_t *dst_line, *dst;
+ uint32_t *src_line, *src;
+ int dst_stride, src_stride;
+
+ PIXMAN_IMAGE_GET_LINE (
+ src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+ PIXMAN_IMAGE_GET_LINE (
+ dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+
+ while (height--)
+ {
+ dst = dst_line;
+ dst_line += dst_stride;
+ src = src_line;
+ src_line += src_stride;
+
+ vmx_combine_add_u (imp, op, dst, src, NULL, width);
+ }
+}
+
+static force_inline void
+scaled_nearest_scanline_vmx_8888_8888_OVER (uint32_t* pd,
+ const uint32_t* ps,
+ int32_t w,
+ pixman_fixed_t vx,
+ pixman_fixed_t unit_x,
+ pixman_fixed_t src_width_fixed,
+ pixman_bool_t fully_transparent_src)
+{
+ uint32_t s, d;
+ const uint32_t* pm = NULL;
+
+ vector unsigned int vsrc, vdst;
+
+ if (fully_transparent_src)
+ return;
+
+ /* Align dst on a 16-byte boundary */
+ while (w && ((uintptr_t)pd & 15))
+ {
+ d = *pd;
+ s = combine1 (ps + pixman_fixed_to_int (vx), pm);
+ vx += unit_x;
+ while (vx >= 0)
+ vx -= src_width_fixed;
+
+ *pd++ = core_combine_over_u_pixel_vmx (s, d);
+ if (pm)
+ pm++;
+ w--;
+ }
+
+ while (w >= 4)
+ {
+ vector unsigned int tmp;
+ uint32_t tmp1, tmp2, tmp3, tmp4;
+
+ tmp1 = *(ps + pixman_fixed_to_int (vx));
+ vx += unit_x;
+ while (vx >= 0)
+ vx -= src_width_fixed;
+ tmp2 = *(ps + pixman_fixed_to_int (vx));
+ vx += unit_x;
+ while (vx >= 0)
+ vx -= src_width_fixed;
+ tmp3 = *(ps + pixman_fixed_to_int (vx));
+ vx += unit_x;
+ while (vx >= 0)
+ vx -= src_width_fixed;
+ tmp4 = *(ps + pixman_fixed_to_int (vx));
+ vx += unit_x;
+ while (vx >= 0)
+ vx -= src_width_fixed;
+
+ tmp[0] = tmp1;
+ tmp[1] = tmp2;
+ tmp[2] = tmp3;
+ tmp[3] = tmp4;
+
+ vsrc = combine4 ((const uint32_t *) &tmp, pm);
+
+ if (is_opaque (vsrc))
+ {
+ save_128_aligned (pd, vsrc);
+ }
+ else if (!is_zero (vsrc))
+ {
+ vdst = over(vsrc, splat_alpha(vsrc), load_128_aligned (pd));
+
+ save_128_aligned (pd, vdst);
+ }
+
+ w -= 4;
+ pd += 4;
+ if (pm)
+ pm += 4;
+ }
+
+ while (w)
+ {
+ d = *pd;
+ s = combine1 (ps + pixman_fixed_to_int (vx), pm);
+ vx += unit_x;
+ while (vx >= 0)
+ vx -= src_width_fixed;
+
+ *pd++ = core_combine_over_u_pixel_vmx (s, d);
+ if (pm)
+ pm++;
+
+ w--;
+ }
+}
+
+FAST_NEAREST_MAINLOOP (vmx_8888_8888_cover_OVER,
+ scaled_nearest_scanline_vmx_8888_8888_OVER,
+ uint32_t, uint32_t, COVER)
+FAST_NEAREST_MAINLOOP (vmx_8888_8888_none_OVER,
+ scaled_nearest_scanline_vmx_8888_8888_OVER,
+ uint32_t, uint32_t, NONE)
+FAST_NEAREST_MAINLOOP (vmx_8888_8888_pad_OVER,
+ scaled_nearest_scanline_vmx_8888_8888_OVER,
+ uint32_t, uint32_t, PAD)
+FAST_NEAREST_MAINLOOP (vmx_8888_8888_normal_OVER,
+ scaled_nearest_scanline_vmx_8888_8888_OVER,
+ uint32_t, uint32_t, NORMAL)
+
static const pixman_fast_path_t vmx_fast_paths[] =
{
+ PIXMAN_STD_FAST_PATH (OVER, solid, null, a8r8g8b8, vmx_composite_over_n_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, null, x8r8g8b8, vmx_composite_over_n_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, vmx_composite_over_8888_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, vmx_composite_over_8888_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, vmx_composite_over_8888_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, vmx_composite_over_8888_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, vmx_composite_over_n_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, vmx_composite_over_n_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, vmx_composite_over_n_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, vmx_composite_over_n_8_8888),
+ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, vmx_composite_over_n_8888_8888_ca),
+ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, vmx_composite_over_n_8888_8888_ca),
+ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, vmx_composite_over_n_8888_8888_ca),
+ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, vmx_composite_over_n_8888_8888_ca),
+
+ /* PIXMAN_OP_ADD */
+ PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, vmx_composite_add_8_8),
+ PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, vmx_composite_add_8888_8888),
+ PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, vmx_composite_add_8888_8888),
+
+ /* PIXMAN_OP_SRC */
+ PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, vmx_composite_src_x888_8888),
+ PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, vmx_composite_src_x888_8888),
+
+ SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, vmx_8888_8888),
+ SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, vmx_8888_8888),
+ SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, vmx_8888_8888),
+ SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, vmx_8888_8888),
+
{ PIXMAN_OP_NONE },
};
+static uint32_t *
+vmx_fetch_x8r8g8b8 (pixman_iter_t *iter, const uint32_t *mask)
+{
+ int w = iter->width;
+ vector unsigned int ff000000 = mask_ff000000;
+ uint32_t *dst = iter->buffer;
+ uint32_t *src = (uint32_t *)iter->bits;
+
+ iter->bits += iter->stride;
+
+ while (w && ((uintptr_t)dst) & 0x0f)
+ {
+ *dst++ = (*src++) | 0xff000000;
+ w--;
+ }
+
+ while (w >= 4)
+ {
+ save_128_aligned(dst, vec_or(load_128_unaligned(src), ff000000));
+
+ dst += 4;
+ src += 4;
+ w -= 4;
+ }
+
+ while (w)
+ {
+ *dst++ = (*src++) | 0xff000000;
+ w--;
+ }
+
+ return iter->buffer;
+}
+
+static uint32_t *
+vmx_fetch_a8 (pixman_iter_t *iter, const uint32_t *mask)
+{
+ int w = iter->width;
+ uint32_t *dst = iter->buffer;
+ uint8_t *src = iter->bits;
+ vector unsigned int vmx0, vmx1, vmx2, vmx3, vmx4, vmx5, vmx6;
+
+ iter->bits += iter->stride;
+
+ while (w && (((uintptr_t)dst) & 15))
+ {
+ *dst++ = *(src++) << 24;
+ w--;
+ }
+
+ while (w >= 16)
+ {
+ vmx0 = load_128_unaligned((uint32_t *) src);
+
+ unpack_128_2x128((vector unsigned int) AVV(0), vmx0, &vmx1, &vmx2);
+ unpack_128_2x128_16((vector unsigned int) AVV(0), vmx1, &vmx3, &vmx4);
+ unpack_128_2x128_16((vector unsigned int) AVV(0), vmx2, &vmx5, &vmx6);
+
+ save_128_aligned(dst, vmx6);
+ save_128_aligned((dst + 4), vmx5);
+ save_128_aligned((dst + 8), vmx4);
+ save_128_aligned((dst + 12), vmx3);
+
+ dst += 16;
+ src += 16;
+ w -= 16;
+ }
+
+ while (w)
+ {
+ *dst++ = *(src++) << 24;
+ w--;
+ }
+
+ return iter->buffer;
+}
+
+#define IMAGE_FLAGS \
+ (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | \
+ FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST)
+
+static const pixman_iter_info_t vmx_iters[] =
+{
+ { PIXMAN_x8r8g8b8, IMAGE_FLAGS, ITER_NARROW,
+ _pixman_iter_init_bits_stride, vmx_fetch_x8r8g8b8, NULL
+ },
+ { PIXMAN_a8, IMAGE_FLAGS, ITER_NARROW,
+ _pixman_iter_init_bits_stride, vmx_fetch_a8, NULL
+ },
+ { PIXMAN_null },
+};
+
pixman_implementation_t *
_pixman_implementation_create_vmx (pixman_implementation_t *fallback)
{
pixman_implementation_t *imp = _pixman_implementation_create (fallback, vmx_fast_paths);
+ /* VMX constants */
+ mask_ff000000 = create_mask_32_128 (0xff000000);
+ mask_red = create_mask_32_128 (0x00f80000);
+ mask_green = create_mask_32_128 (0x0000fc00);
+ mask_blue = create_mask_32_128 (0x000000f8);
+ mask_565_fix_rb = create_mask_32_128 (0x00e000e0);
+ mask_565_fix_g = create_mask_32_128 (0x0000c000);
+
/* Set up function pointers */
imp->combine_32[PIXMAN_OP_OVER] = vmx_combine_over_u;
@@ -1643,5 +3151,9 @@ _pixman_implementation_create_vmx (pixman_implementation_t *fallback)
imp->combine_32_ca[PIXMAN_OP_XOR] = vmx_combine_xor_ca;
imp->combine_32_ca[PIXMAN_OP_ADD] = vmx_combine_add_ca;
+ imp->fill = vmx_fill;
+
+ imp->iter_info = vmx_iters;
+
return imp;
}
diff --git a/gfx/cairo/libpixman/src/pixman-x64-mmx-emulation.h b/gfx/cairo/libpixman/src/pixman-x64-mmx-emulation.h
deleted file mode 100644
index 378019cf27..0000000000
--- a/gfx/cairo/libpixman/src/pixman-x64-mmx-emulation.h
+++ /dev/null
@@ -1,263 +0,0 @@
-#ifndef MMX_X64_H_INCLUDED
-#define MMX_X64_H_INCLUDED
-
-/* Implementation of x64 MMX substitition functions, before
- * pixman is reimplemented not to use __m64 type on Visual C++
- *
- * Copyright (C)2009 by George Yohng
- * Released in public domain.
- */
-
-#include <intrin.h>
-
-#define M64C(a) (*(const __m64 *)(&a))
-#define M64U(a) (*(const unsigned long long *)(&a))
-
-__inline __m64
-_m_from_int (int a)
-{
- long long i64 = a;
-
- return M64C (i64);
-}
-
-__inline __m64
-_mm_setzero_si64 ()
-{
- long long i64 = 0;
-
- return M64C (i64);
-}
-
-__inline __m64
-_mm_set_pi32 (int i1, int i0)
-{
- unsigned long long i64 = ((unsigned)i0) + (((unsigned long long)(unsigned)i1) << 32);
-
- return M64C (i64);
-}
-
-__inline void
-_m_empty ()
-{
-}
-
-__inline __m64
-_mm_set1_pi16 (short w)
-{
- unsigned long long i64 = ((unsigned long long)(unsigned short)(w)) * 0x0001000100010001ULL;
-
- return M64C (i64);
-}
-
-__inline int
-_m_to_int (__m64 m)
-{
- return m.m64_i32[0];
-}
-
-__inline __m64
-_mm_movepi64_pi64 (__m128i a)
-{
- return M64C (a.m128i_i64[0]);
-}
-
-__inline __m64
-_m_pand (__m64 a, __m64 b)
-{
- unsigned long long i64 = M64U (a) & M64U (b);
-
- return M64C (i64);
-}
-
-__inline __m64
-_m_por (__m64 a, __m64 b)
-{
- unsigned long long i64 = M64U (a) | M64U (b);
-
- return M64C (i64);
-}
-
-__inline __m64
-_m_pxor (__m64 a, __m64 b)
-{
- unsigned long long i64 = M64U (a) ^ M64U (b);
-
- return M64C (i64);
-}
-
-__inline __m64
-_m_pmulhuw (__m64 a, __m64 b) /* unoptimized */
-{
- unsigned short d[4] =
- {
- (unsigned short)((((unsigned)a.m64_u16[0]) * b.m64_u16[0]) >> 16),
- (unsigned short)((((unsigned)a.m64_u16[1]) * b.m64_u16[1]) >> 16),
- (unsigned short)((((unsigned)a.m64_u16[2]) * b.m64_u16[2]) >> 16),
- (unsigned short)((((unsigned)a.m64_u16[3]) * b.m64_u16[3]) >> 16)
- };
-
- return M64C (d[0]);
-}
-
-__inline __m64
-_m_pmullw2 (__m64 a, __m64 b) /* unoptimized */
-{
- unsigned short d[4] =
- {
- (unsigned short)((((unsigned)a.m64_u16[0]) * b.m64_u16[0])),
- (unsigned short)((((unsigned)a.m64_u16[1]) * b.m64_u16[1])),
- (unsigned short)((((unsigned)a.m64_u16[2]) * b.m64_u16[2])),
- (unsigned short)((((unsigned)a.m64_u16[3]) * b.m64_u16[3]))
- };
-
- return M64C (d[0]);
-}
-
-__inline __m64
-_m_pmullw (__m64 a, __m64 b) /* unoptimized */
-{
- unsigned long long x =
- ((unsigned long long)(unsigned short)((((unsigned)a.m64_u16[0]) * b.m64_u16[0]))) +
- (((unsigned long long)(unsigned short)((((unsigned)a.m64_u16[1]) * b.m64_u16[1]))) << 16) +
- (((unsigned long long)(unsigned short)((((unsigned)a.m64_u16[2]) * b.m64_u16[2]))) << 32) +
- (((unsigned long long)(unsigned short)((((unsigned)a.m64_u16[3]) * b.m64_u16[3]))) << 48);
-
- return M64C (x);
-}
-
-__inline __m64
-_m_paddusb (__m64 a, __m64 b) /* unoptimized */
-{
- unsigned long long x = (M64U (a) & 0x00FF00FF00FF00FFULL) +
- (M64U (b) & 0x00FF00FF00FF00FFULL);
-
- unsigned long long y = ((M64U (a) >> 8) & 0x00FF00FF00FF00FFULL) +
- ((M64U (b) >> 8) & 0x00FF00FF00FF00FFULL);
-
- x |= ((x & 0xFF00FF00FF00FF00ULL) >> 8) * 0xFF;
- y |= ((y & 0xFF00FF00FF00FF00ULL) >> 8) * 0xFF;
-
- x = (x & 0x00FF00FF00FF00FFULL) | ((y & 0x00FF00FF00FF00FFULL) << 8);
-
- return M64C (x);
-}
-
-__inline __m64
-_m_paddusw (__m64 a, __m64 b) /* unoptimized */
-{
- unsigned long long x = (M64U (a) & 0x0000FFFF0000FFFFULL) +
- (M64U (b) & 0x0000FFFF0000FFFFULL);
-
- unsigned long long y = ((M64U (a) >> 16) & 0x0000FFFF0000FFFFULL) +
- ((M64U (b) >> 16) & 0x0000FFFF0000FFFFULL);
-
- x |= ((x & 0xFFFF0000FFFF0000) >> 16) * 0xFFFF;
- y |= ((y & 0xFFFF0000FFFF0000) >> 16) * 0xFFFF;
-
- x = (x & 0x0000FFFF0000FFFFULL) | ((y & 0x0000FFFF0000FFFFULL) << 16);
-
- return M64C (x);
-}
-
-__inline __m64
-_m_pshufw (__m64 a, int n) /* unoptimized */
-{
- unsigned short d[4] =
- {
- a.m64_u16[n & 3],
- a.m64_u16[(n >> 2) & 3],
- a.m64_u16[(n >> 4) & 3],
- a.m64_u16[(n >> 6) & 3]
- };
-
- return M64C (d[0]);
-}
-
-__inline unsigned char
-sat16 (unsigned short d)
-{
- if (d > 0xFF) return 0xFF;
- else return d & 0xFF;
-}
-
-__inline __m64
-_m_packuswb (__m64 m1, __m64 m2) /* unoptimized */
-{
- unsigned char d[8] =
- {
- sat16 (m1.m64_u16[0]),
- sat16 (m1.m64_u16[1]),
- sat16 (m1.m64_u16[2]),
- sat16 (m1.m64_u16[3]),
- sat16 (m2.m64_u16[0]),
- sat16 (m2.m64_u16[1]),
- sat16 (m2.m64_u16[2]),
- sat16 (m2.m64_u16[3])
- };
-
- return M64C (d[0]);
-}
-
-__inline __m64 _m_punpcklbw (__m64 m1, __m64 m2) /* unoptimized */
-{
- unsigned char d[8] =
- {
- m1.m64_u8[0],
- m2.m64_u8[0],
- m1.m64_u8[1],
- m2.m64_u8[1],
- m1.m64_u8[2],
- m2.m64_u8[2],
- m1.m64_u8[3],
- m2.m64_u8[3],
- };
-
- return M64C (d[0]);
-}
-
-__inline __m64 _m_punpckhbw (__m64 m1, __m64 m2) /* unoptimized */
-{
- unsigned char d[8] =
- {
- m1.m64_u8[4],
- m2.m64_u8[4],
- m1.m64_u8[5],
- m2.m64_u8[5],
- m1.m64_u8[6],
- m2.m64_u8[6],
- m1.m64_u8[7],
- m2.m64_u8[7],
- };
-
- return M64C (d[0]);
-}
-
-__inline __m64 _m_psrlwi (__m64 a, int n) /* unoptimized */
-{
- unsigned short d[4] =
- {
- a.m64_u16[0] >> n,
- a.m64_u16[1] >> n,
- a.m64_u16[2] >> n,
- a.m64_u16[3] >> n
- };
-
- return M64C (d[0]);
-}
-
-__inline __m64 _m_psrlqi (__m64 m, int n)
-{
- unsigned long long x = M64U (m) >> n;
-
- return M64C (x);
-}
-
-__inline __m64 _m_psllqi (__m64 m, int n)
-{
- unsigned long long x = M64U (m) << n;
-
- return M64C (x);
-}
-
-#endif /* MMX_X64_H_INCLUDED */
diff --git a/gfx/cairo/libpixman/src/pixman-x86.c b/gfx/cairo/libpixman/src/pixman-x86.c
index feea23e790..2c702e5c3b 100644
--- a/gfx/cairo/libpixman/src/pixman-x86.c
+++ b/gfx/cairo/libpixman/src/pixman-x86.c
@@ -25,7 +25,7 @@
#include "pixman-private.h"
-#if defined(USE_X86_MMX) || defined (USE_SSE2)
+#if defined(USE_X86_MMX) || defined (USE_SSE2) || defined (USE_SSSE3)
/* The CPU detection code needs to be in a file not compiled with
* "-mmmx -msse", as gcc would generate CMOV instructions otherwise
@@ -39,7 +39,8 @@ typedef enum
X86_MMX_EXTENSIONS = (1 << 1),
X86_SSE = (1 << 2) | X86_MMX_EXTENSIONS,
X86_SSE2 = (1 << 3),
- X86_CMOV = (1 << 4)
+ X86_CMOV = (1 << 4),
+ X86_SSSE3 = (1 << 5)
} cpu_features_t;
#ifdef HAVE_GETISAX
@@ -64,6 +65,8 @@ detect_cpu_features (void)
features |= X86_SSE;
if (result & AV_386_SSE2)
features |= X86_SSE2;
+ if (result & AV_386_SSSE3)
+ features |= X86_SSSE3;
}
return features;
@@ -171,6 +174,8 @@ detect_cpu_features (void)
features |= X86_SSE;
if (d & (1 << 26))
features |= X86_SSE2;
+ if (c & (1 << 9))
+ features |= X86_SSSE3;
/* Check for AMD specific features */
if ((features & X86_MMX) && !(features & X86_SSE))
@@ -186,6 +191,7 @@ detect_cpu_features (void)
memcpy (vendor + 8, &c, 4);
if (strcmp (vendor, "AuthenticAMD") == 0 ||
+ strcmp (vendor, "HygonGenuine") == 0 ||
strcmp (vendor, "Geode by NSC") == 0)
{
pixman_cpuid (0x80000000, &a, &b, &c, &d);
@@ -226,6 +232,7 @@ _pixman_x86_get_implementations (pixman_implementation_t *imp)
{
#define MMX_BITS (X86_MMX | X86_MMX_EXTENSIONS)
#define SSE2_BITS (X86_MMX | X86_MMX_EXTENSIONS | X86_SSE | X86_SSE2)
+#define SSSE3_BITS (X86_SSE | X86_SSE2 | X86_SSSE3)
#ifdef USE_X86_MMX
if (!_pixman_disabled ("mmx") && have_feature (MMX_BITS))
@@ -237,5 +244,10 @@ _pixman_x86_get_implementations (pixman_implementation_t *imp)
imp = _pixman_implementation_create_sse2 (imp);
#endif
+#ifdef USE_SSSE3
+ if (!_pixman_disabled ("ssse3") && have_feature (SSSE3_BITS))
+ imp = _pixman_implementation_create_ssse3 (imp);
+#endif
+
return imp;
}
diff --git a/gfx/cairo/libpixman/src/pixman.c b/gfx/cairo/libpixman/src/pixman.c
index 184f0c4e6a..c09b528083 100644
--- a/gfx/cairo/libpixman/src/pixman.c
+++ b/gfx/cairo/libpixman/src/pixman.c
@@ -325,18 +325,20 @@ _pixman_compute_composite_region32 (pixman_region32_t * region,
return TRUE;
}
-typedef struct
+typedef struct box_48_16 box_48_16_t;
+
+struct box_48_16
{
- pixman_fixed_48_16_t x1;
- pixman_fixed_48_16_t y1;
- pixman_fixed_48_16_t x2;
- pixman_fixed_48_16_t y2;
-} box_48_16_t;
+ pixman_fixed_48_16_t x1;
+ pixman_fixed_48_16_t y1;
+ pixman_fixed_48_16_t x2;
+ pixman_fixed_48_16_t y2;
+};
static pixman_bool_t
-compute_transformed_extents (pixman_transform_t *transform,
+compute_transformed_extents (pixman_transform_t *transform,
const pixman_box32_t *extents,
- box_48_16_t *transformed)
+ box_48_16_t *transformed)
{
pixman_fixed_48_16_t tx1, ty1, tx2, ty2;
pixman_fixed_t x1, y1, x2, y2;
@@ -495,21 +497,12 @@ analyze_extent (pixman_image_t *image,
if (!compute_transformed_extents (transform, extents, &transformed))
return FALSE;
- /* Expand the source area by a tiny bit so account of different rounding that
- * may happen during sampling. Note that (8 * pixman_fixed_e) is very far from
- * 0.5 so this won't cause the area computed to be overly pessimistic.
- */
- transformed.x1 -= 8 * pixman_fixed_e;
- transformed.y1 -= 8 * pixman_fixed_e;
- transformed.x2 += 8 * pixman_fixed_e;
- transformed.y2 += 8 * pixman_fixed_e;
-
if (image->common.type == BITS)
{
- if (pixman_fixed_to_int (transformed.x1) >= 0 &&
- pixman_fixed_to_int (transformed.y1) >= 0 &&
- pixman_fixed_to_int (transformed.x2) < image->bits.width &&
- pixman_fixed_to_int (transformed.y2) < image->bits.height)
+ if (pixman_fixed_to_int (transformed.x1 - pixman_fixed_e) >= 0 &&
+ pixman_fixed_to_int (transformed.y1 - pixman_fixed_e) >= 0 &&
+ pixman_fixed_to_int (transformed.x2 - pixman_fixed_e) < image->bits.width &&
+ pixman_fixed_to_int (transformed.y2 - pixman_fixed_e) < image->bits.height)
{
*flags |= FAST_PATH_SAMPLES_COVER_CLIP_NEAREST;
}
@@ -605,7 +598,7 @@ pixman_image_composite32 (pixman_op_t op,
else
{
mask_format = PIXMAN_null;
- info.mask_flags = FAST_PATH_IS_OPAQUE;
+ info.mask_flags = FAST_PATH_IS_OPAQUE | FAST_PATH_NO_ALPHA_MAP;
}
dest_format = dest->common.extended_format_code;
@@ -784,6 +777,11 @@ color_to_pixel (const pixman_color_t *color,
{
uint32_t c = color_to_uint32 (color);
+ if (PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_RGBA_FLOAT)
+ {
+ return FALSE;
+ }
+
if (!(format == PIXMAN_a8r8g8b8 ||
format == PIXMAN_x8r8g8b8 ||
format == PIXMAN_a8b8g8r8 ||
diff --git a/gfx/cairo/libpixman/src/pixman.h b/gfx/cairo/libpixman/src/pixman.h
index 893adc50e0..66bc9abb1d 100644
--- a/gfx/cairo/libpixman/src/pixman.h
+++ b/gfx/cairo/libpixman/src/pixman.h
@@ -73,7 +73,6 @@ SOFTWARE.
#include "cairo/pixman-rename.h"
#endif
-
#include <pixman-version.h>
#ifdef __cplusplus
@@ -94,6 +93,18 @@ PIXMAN_BEGIN_DECLS
#if defined (_SVR4) || defined (SVR4) || defined (__OpenBSD__) || defined (_sgi) || defined (__sun) || defined (sun) || defined (__digital__) || defined (__HP_cc)
# include <inttypes.h>
+/* VS 2010 (_MSC_VER 1600) has stdint.h */
+#elif defined (_MSC_VER) && _MSC_VER < 1600
+typedef __int8 int8_t;
+typedef unsigned __int8 uint8_t;
+typedef __int16 int16_t;
+typedef unsigned __int16 uint16_t;
+typedef __int32 int32_t;
+typedef unsigned __int32 uint32_t;
+typedef __int64 int64_t;
+typedef unsigned __int64 uint64_t;
+#elif defined (_AIX)
+# include <sys/inttypes.h>
#else
# include <stdint.h>
#endif
@@ -120,7 +131,7 @@ typedef pixman_fixed_16_16_t pixman_fixed_t;
#define pixman_fixed_1_minus_e (pixman_fixed_1 - pixman_fixed_e)
#define pixman_fixed_minus_1 (pixman_int_to_fixed(-1))
#define pixman_fixed_to_int(f) ((int) ((f) >> 16))
-#define pixman_int_to_fixed(i) ((pixman_fixed_t) ((i) << 16))
+#define pixman_int_to_fixed(i) ((pixman_fixed_t) ((uint32_t) (i) << 16))
#define pixman_fixed_to_double(f) (double) ((f) / (double) pixman_fixed_1)
#define pixman_double_to_fixed(d) ((pixman_fixed_t) ((d) * 65536.0))
#define pixman_fixed_frac(f) ((f) & pixman_fixed_1_minus_e)
@@ -177,42 +188,73 @@ struct pixman_transform
struct pixman_box16;
typedef union pixman_image pixman_image_t;
+PIXMAN_API
void pixman_transform_init_identity (struct pixman_transform *matrix);
+
+PIXMAN_API
pixman_bool_t pixman_transform_point_3d (const struct pixman_transform *transform,
struct pixman_vector *vector);
+
+PIXMAN_API
pixman_bool_t pixman_transform_point (const struct pixman_transform *transform,
struct pixman_vector *vector);
+
+PIXMAN_API
pixman_bool_t pixman_transform_multiply (struct pixman_transform *dst,
const struct pixman_transform *l,
const struct pixman_transform *r);
+
+PIXMAN_API
void pixman_transform_init_scale (struct pixman_transform *t,
pixman_fixed_t sx,
pixman_fixed_t sy);
+
+PIXMAN_API
pixman_bool_t pixman_transform_scale (struct pixman_transform *forward,
struct pixman_transform *reverse,
pixman_fixed_t sx,
pixman_fixed_t sy);
+
+PIXMAN_API
void pixman_transform_init_rotate (struct pixman_transform *t,
pixman_fixed_t cos,
pixman_fixed_t sin);
+
+PIXMAN_API
pixman_bool_t pixman_transform_rotate (struct pixman_transform *forward,
struct pixman_transform *reverse,
pixman_fixed_t c,
pixman_fixed_t s);
+
+PIXMAN_API
void pixman_transform_init_translate (struct pixman_transform *t,
pixman_fixed_t tx,
pixman_fixed_t ty);
+
+PIXMAN_API
pixman_bool_t pixman_transform_translate (struct pixman_transform *forward,
struct pixman_transform *reverse,
pixman_fixed_t tx,
pixman_fixed_t ty);
+
+PIXMAN_API
pixman_bool_t pixman_transform_bounds (const struct pixman_transform *matrix,
struct pixman_box16 *b);
+
+PIXMAN_API
pixman_bool_t pixman_transform_invert (struct pixman_transform *dst,
const struct pixman_transform *src);
+
+PIXMAN_API
pixman_bool_t pixman_transform_is_identity (const struct pixman_transform *t);
+
+PIXMAN_API
pixman_bool_t pixman_transform_is_scale (const struct pixman_transform *t);
+
+PIXMAN_API
pixman_bool_t pixman_transform_is_int_translate (const struct pixman_transform *t);
+
+PIXMAN_API
pixman_bool_t pixman_transform_is_inverse (const struct pixman_transform *a,
const struct pixman_transform *b);
@@ -232,42 +274,70 @@ struct pixman_f_transform
double m[3][3];
};
+
+PIXMAN_API
pixman_bool_t pixman_transform_from_pixman_f_transform (struct pixman_transform *t,
const struct pixman_f_transform *ft);
+
+PIXMAN_API
void pixman_f_transform_from_pixman_transform (struct pixman_f_transform *ft,
const struct pixman_transform *t);
+
+PIXMAN_API
pixman_bool_t pixman_f_transform_invert (struct pixman_f_transform *dst,
const struct pixman_f_transform *src);
+
+PIXMAN_API
pixman_bool_t pixman_f_transform_point (const struct pixman_f_transform *t,
struct pixman_f_vector *v);
+
+PIXMAN_API
void pixman_f_transform_point_3d (const struct pixman_f_transform *t,
struct pixman_f_vector *v);
+
+PIXMAN_API
void pixman_f_transform_multiply (struct pixman_f_transform *dst,
const struct pixman_f_transform *l,
const struct pixman_f_transform *r);
+
+PIXMAN_API
void pixman_f_transform_init_scale (struct pixman_f_transform *t,
double sx,
double sy);
+
+PIXMAN_API
pixman_bool_t pixman_f_transform_scale (struct pixman_f_transform *forward,
struct pixman_f_transform *reverse,
double sx,
double sy);
+
+PIXMAN_API
void pixman_f_transform_init_rotate (struct pixman_f_transform *t,
double cos,
double sin);
+
+PIXMAN_API
pixman_bool_t pixman_f_transform_rotate (struct pixman_f_transform *forward,
struct pixman_f_transform *reverse,
double c,
double s);
+
+PIXMAN_API
void pixman_f_transform_init_translate (struct pixman_f_transform *t,
double tx,
double ty);
+
+PIXMAN_API
pixman_bool_t pixman_f_transform_translate (struct pixman_f_transform *forward,
struct pixman_f_transform *reverse,
double tx,
double ty);
+
+PIXMAN_API
pixman_bool_t pixman_f_transform_bounds (const struct pixman_f_transform *t,
struct pixman_box16 *b);
+
+PIXMAN_API
void pixman_f_transform_init_identity (struct pixman_f_transform *t);
typedef enum
@@ -280,6 +350,16 @@ typedef enum
typedef enum
{
+ PIXMAN_DITHER_NONE,
+ PIXMAN_DITHER_FAST,
+ PIXMAN_DITHER_GOOD,
+ PIXMAN_DITHER_BEST,
+ PIXMAN_DITHER_ORDERED_BAYER_8,
+ PIXMAN_DITHER_ORDERED_BLUE_NOISE_64,
+} pixman_dither_t;
+
+typedef enum
+{
PIXMAN_FILTER_FAST,
PIXMAN_FILTER_GOOD,
PIXMAN_FILTER_BEST,
@@ -416,73 +496,120 @@ typedef enum
/* This function exists only to make it possible to preserve
* the X ABI - it should go away at first opportunity.
*/
+PIXMAN_API
void pixman_region_set_static_pointers (pixman_box16_t *empty_box,
pixman_region16_data_t *empty_data,
pixman_region16_data_t *broken_data);
/* creation/destruction */
+PIXMAN_API
void pixman_region_init (pixman_region16_t *region);
+
+PIXMAN_API
void pixman_region_init_rect (pixman_region16_t *region,
int x,
int y,
unsigned int width,
unsigned int height);
+
+PIXMAN_API
pixman_bool_t pixman_region_init_rects (pixman_region16_t *region,
const pixman_box16_t *boxes,
int count);
+
+PIXMAN_API
void pixman_region_init_with_extents (pixman_region16_t *region,
pixman_box16_t *extents);
+
+PIXMAN_API
void pixman_region_init_from_image (pixman_region16_t *region,
pixman_image_t *image);
+
+PIXMAN_API
void pixman_region_fini (pixman_region16_t *region);
/* manipulation */
+PIXMAN_API
void pixman_region_translate (pixman_region16_t *region,
int x,
int y);
+
+PIXMAN_API
pixman_bool_t pixman_region_copy (pixman_region16_t *dest,
pixman_region16_t *source);
+
+PIXMAN_API
pixman_bool_t pixman_region_intersect (pixman_region16_t *new_reg,
pixman_region16_t *reg1,
pixman_region16_t *reg2);
+
+PIXMAN_API
pixman_bool_t pixman_region_union (pixman_region16_t *new_reg,
pixman_region16_t *reg1,
pixman_region16_t *reg2);
+
+PIXMAN_API
pixman_bool_t pixman_region_union_rect (pixman_region16_t *dest,
pixman_region16_t *source,
int x,
int y,
unsigned int width,
unsigned int height);
+
+PIXMAN_API
pixman_bool_t pixman_region_intersect_rect (pixman_region16_t *dest,
pixman_region16_t *source,
int x,
int y,
unsigned int width,
unsigned int height);
+
+PIXMAN_API
pixman_bool_t pixman_region_subtract (pixman_region16_t *reg_d,
pixman_region16_t *reg_m,
pixman_region16_t *reg_s);
+
+PIXMAN_API
pixman_bool_t pixman_region_inverse (pixman_region16_t *new_reg,
pixman_region16_t *reg1,
pixman_box16_t *inv_rect);
+
+PIXMAN_API
pixman_bool_t pixman_region_contains_point (pixman_region16_t *region,
int x,
int y,
pixman_box16_t *box);
+
+PIXMAN_API
pixman_region_overlap_t pixman_region_contains_rectangle (pixman_region16_t *region,
pixman_box16_t *prect);
+
+PIXMAN_API
pixman_bool_t pixman_region_not_empty (pixman_region16_t *region);
+
+PIXMAN_API
pixman_box16_t * pixman_region_extents (pixman_region16_t *region);
+
+PIXMAN_API
int pixman_region_n_rects (pixman_region16_t *region);
+
+PIXMAN_API
pixman_box16_t * pixman_region_rectangles (pixman_region16_t *region,
int *n_rects);
+
+PIXMAN_API
pixman_bool_t pixman_region_equal (pixman_region16_t *region1,
pixman_region16_t *region2);
+
+PIXMAN_API
pixman_bool_t pixman_region_selfcheck (pixman_region16_t *region);
+
+PIXMAN_API
void pixman_region_reset (pixman_region16_t *region,
pixman_box16_t *box);
+
+PIXMAN_API
void pixman_region_clear (pixman_region16_t *region);
/*
* 32 bit regions
@@ -516,72 +643,119 @@ struct pixman_region32
};
/* creation/destruction */
+PIXMAN_API
void pixman_region32_init (pixman_region32_t *region);
+
+PIXMAN_API
void pixman_region32_init_rect (pixman_region32_t *region,
int x,
int y,
unsigned int width,
unsigned int height);
+
+PIXMAN_API
pixman_bool_t pixman_region32_init_rects (pixman_region32_t *region,
const pixman_box32_t *boxes,
int count);
+
+PIXMAN_API
void pixman_region32_init_with_extents (pixman_region32_t *region,
pixman_box32_t *extents);
+
+PIXMAN_API
void pixman_region32_init_from_image (pixman_region32_t *region,
pixman_image_t *image);
+
+PIXMAN_API
void pixman_region32_fini (pixman_region32_t *region);
/* manipulation */
+PIXMAN_API
void pixman_region32_translate (pixman_region32_t *region,
int x,
int y);
+
+PIXMAN_API
pixman_bool_t pixman_region32_copy (pixman_region32_t *dest,
pixman_region32_t *source);
+
+PIXMAN_API
pixman_bool_t pixman_region32_intersect (pixman_region32_t *new_reg,
pixman_region32_t *reg1,
pixman_region32_t *reg2);
+
+PIXMAN_API
pixman_bool_t pixman_region32_union (pixman_region32_t *new_reg,
pixman_region32_t *reg1,
pixman_region32_t *reg2);
+
+PIXMAN_API
pixman_bool_t pixman_region32_intersect_rect (pixman_region32_t *dest,
pixman_region32_t *source,
int x,
int y,
unsigned int width,
unsigned int height);
+
+PIXMAN_API
pixman_bool_t pixman_region32_union_rect (pixman_region32_t *dest,
pixman_region32_t *source,
int x,
int y,
unsigned int width,
unsigned int height);
+
+PIXMAN_API
pixman_bool_t pixman_region32_subtract (pixman_region32_t *reg_d,
pixman_region32_t *reg_m,
pixman_region32_t *reg_s);
+
+PIXMAN_API
pixman_bool_t pixman_region32_inverse (pixman_region32_t *new_reg,
pixman_region32_t *reg1,
pixman_box32_t *inv_rect);
+
+PIXMAN_API
pixman_bool_t pixman_region32_contains_point (pixman_region32_t *region,
int x,
int y,
pixman_box32_t *box);
+
+PIXMAN_API
pixman_region_overlap_t pixman_region32_contains_rectangle (pixman_region32_t *region,
pixman_box32_t *prect);
+
+PIXMAN_API
pixman_bool_t pixman_region32_not_empty (pixman_region32_t *region);
+
+PIXMAN_API
pixman_box32_t * pixman_region32_extents (pixman_region32_t *region);
+
+PIXMAN_API
int pixman_region32_n_rects (pixman_region32_t *region);
+
+PIXMAN_API
pixman_box32_t * pixman_region32_rectangles (pixman_region32_t *region,
int *n_rects);
+
+PIXMAN_API
pixman_bool_t pixman_region32_equal (pixman_region32_t *region1,
pixman_region32_t *region2);
+
+PIXMAN_API
pixman_bool_t pixman_region32_selfcheck (pixman_region32_t *region);
+
+PIXMAN_API
void pixman_region32_reset (pixman_region32_t *region,
pixman_box32_t *box);
+
+PIXMAN_API
void pixman_region32_clear (pixman_region32_t *region);
/* Copy / Fill / Misc */
+PIXMAN_API
pixman_bool_t pixman_blt (uint32_t *src_bits,
uint32_t *dst_bits,
int src_stride,
@@ -594,6 +768,8 @@ pixman_bool_t pixman_blt (uint32_t *src_bits,
int dest_y,
int width,
int height);
+
+PIXMAN_API
pixman_bool_t pixman_fill (uint32_t *bits,
int stride,
int bpp,
@@ -603,7 +779,11 @@ pixman_bool_t pixman_fill (uint32_t *bits,
int height,
uint32_t _xor);
+
+PIXMAN_API
int pixman_version (void);
+
+PIXMAN_API
const char* pixman_version_string (void);
/*
@@ -647,12 +827,24 @@ struct pixman_indexed
((g) << 4) | \
((b)))
-#define PIXMAN_FORMAT_BPP(f) (((f) >> 24) )
-#define PIXMAN_FORMAT_TYPE(f) (((f) >> 16) & 0xff)
-#define PIXMAN_FORMAT_A(f) (((f) >> 12) & 0x0f)
-#define PIXMAN_FORMAT_R(f) (((f) >> 8) & 0x0f)
-#define PIXMAN_FORMAT_G(f) (((f) >> 4) & 0x0f)
-#define PIXMAN_FORMAT_B(f) (((f) ) & 0x0f)
+#define PIXMAN_FORMAT_BYTE(bpp,type,a,r,g,b) \
+ (((bpp >> 3) << 24) | \
+ (3 << 22) | ((type) << 16) | \
+ ((a >> 3) << 12) | \
+ ((r >> 3) << 8) | \
+ ((g >> 3) << 4) | \
+ ((b >> 3)))
+
+#define PIXMAN_FORMAT_RESHIFT(val, ofs, num) \
+ (((val >> (ofs)) & ((1 << (num)) - 1)) << ((val >> 22) & 3))
+
+#define PIXMAN_FORMAT_BPP(f) PIXMAN_FORMAT_RESHIFT(f, 24, 8)
+#define PIXMAN_FORMAT_SHIFT(f) ((uint32_t)((f >> 22) & 3))
+#define PIXMAN_FORMAT_TYPE(f) (((f) >> 16) & 0x3f)
+#define PIXMAN_FORMAT_A(f) PIXMAN_FORMAT_RESHIFT(f, 12, 4)
+#define PIXMAN_FORMAT_R(f) PIXMAN_FORMAT_RESHIFT(f, 8, 4)
+#define PIXMAN_FORMAT_G(f) PIXMAN_FORMAT_RESHIFT(f, 4, 4)
+#define PIXMAN_FORMAT_B(f) PIXMAN_FORMAT_RESHIFT(f, 0, 4)
#define PIXMAN_FORMAT_RGB(f) (((f) ) & 0xfff)
#define PIXMAN_FORMAT_VIS(f) (((f) ) & 0xffff)
#define PIXMAN_FORMAT_DEPTH(f) (PIXMAN_FORMAT_A(f) + \
@@ -671,15 +863,22 @@ struct pixman_indexed
#define PIXMAN_TYPE_BGRA 8
#define PIXMAN_TYPE_RGBA 9
#define PIXMAN_TYPE_ARGB_SRGB 10
+#define PIXMAN_TYPE_RGBA_FLOAT 11
#define PIXMAN_FORMAT_COLOR(f) \
(PIXMAN_FORMAT_TYPE(f) == PIXMAN_TYPE_ARGB || \
PIXMAN_FORMAT_TYPE(f) == PIXMAN_TYPE_ABGR || \
PIXMAN_FORMAT_TYPE(f) == PIXMAN_TYPE_BGRA || \
- PIXMAN_FORMAT_TYPE(f) == PIXMAN_TYPE_RGBA)
+ PIXMAN_FORMAT_TYPE(f) == PIXMAN_TYPE_RGBA || \
+ PIXMAN_FORMAT_TYPE(f) == PIXMAN_TYPE_RGBA_FLOAT)
-/* 32bpp formats */
typedef enum {
+/* 128bpp formats */
+ PIXMAN_rgba_float = PIXMAN_FORMAT_BYTE(128,PIXMAN_TYPE_RGBA_FLOAT,32,32,32,32),
+/* 96bpp formats */
+ PIXMAN_rgb_float = PIXMAN_FORMAT_BYTE(96,PIXMAN_TYPE_RGBA_FLOAT,0,32,32,32),
+
+/* 32bpp formats */
PIXMAN_a8r8g8b8 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ARGB,8,8,8,8),
PIXMAN_x8r8g8b8 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ARGB,0,8,8,8),
PIXMAN_a8b8g8r8 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ABGR,8,8,8,8),
@@ -750,30 +949,44 @@ typedef enum {
} pixman_format_code_t;
/* Querying supported format values. */
+PIXMAN_API
pixman_bool_t pixman_format_supported_destination (pixman_format_code_t format);
+
+PIXMAN_API
pixman_bool_t pixman_format_supported_source (pixman_format_code_t format);
/* Constructors */
+PIXMAN_API
pixman_image_t *pixman_image_create_solid_fill (const pixman_color_t *color);
+
+PIXMAN_API
pixman_image_t *pixman_image_create_linear_gradient (const pixman_point_fixed_t *p1,
const pixman_point_fixed_t *p2,
const pixman_gradient_stop_t *stops,
int n_stops);
+
+PIXMAN_API
pixman_image_t *pixman_image_create_radial_gradient (const pixman_point_fixed_t *inner,
const pixman_point_fixed_t *outer,
pixman_fixed_t inner_radius,
pixman_fixed_t outer_radius,
const pixman_gradient_stop_t *stops,
int n_stops);
+
+PIXMAN_API
pixman_image_t *pixman_image_create_conical_gradient (const pixman_point_fixed_t *center,
pixman_fixed_t angle,
const pixman_gradient_stop_t *stops,
int n_stops);
+
+PIXMAN_API
pixman_image_t *pixman_image_create_bits (pixman_format_code_t format,
int width,
int height,
uint32_t *bits,
int rowstride_bytes);
+
+PIXMAN_API
pixman_image_t *pixman_image_create_bits_no_clear (pixman_format_code_t format,
int width,
int height,
@@ -781,48 +994,99 @@ pixman_image_t *pixman_image_create_bits_no_clear (pixman_format_code_t forma
int rowstride_bytes);
/* Destructor */
+PIXMAN_API
pixman_image_t *pixman_image_ref (pixman_image_t *image);
+
+PIXMAN_API
pixman_bool_t pixman_image_unref (pixman_image_t *image);
+
+PIXMAN_API
void pixman_image_set_destroy_function (pixman_image_t *image,
pixman_image_destroy_func_t function,
void *data);
+
+PIXMAN_API
void * pixman_image_get_destroy_data (pixman_image_t *image);
/* Set properties */
+PIXMAN_API
pixman_bool_t pixman_image_set_clip_region (pixman_image_t *image,
pixman_region16_t *region);
+
+PIXMAN_API
pixman_bool_t pixman_image_set_clip_region32 (pixman_image_t *image,
pixman_region32_t *region);
+
+PIXMAN_API
void pixman_image_set_has_client_clip (pixman_image_t *image,
pixman_bool_t clien_clip);
+
+PIXMAN_API
pixman_bool_t pixman_image_set_transform (pixman_image_t *image,
const pixman_transform_t *transform);
+
+PIXMAN_API
void pixman_image_set_repeat (pixman_image_t *image,
pixman_repeat_t repeat);
+
+PIXMAN_API
+void pixman_image_set_dither (pixman_image_t *image,
+ pixman_dither_t dither);
+
+PIXMAN_API
+void pixman_image_set_dither_offset (pixman_image_t *image,
+ int offset_x,
+ int offset_y);
+
+PIXMAN_API
pixman_bool_t pixman_image_set_filter (pixman_image_t *image,
pixman_filter_t filter,
const pixman_fixed_t *filter_params,
int n_filter_params);
+
+PIXMAN_API
void pixman_image_set_source_clipping (pixman_image_t *image,
pixman_bool_t source_clipping);
+
+PIXMAN_API
void pixman_image_set_alpha_map (pixman_image_t *image,
pixman_image_t *alpha_map,
int16_t x,
int16_t y);
+
+PIXMAN_API
void pixman_image_set_component_alpha (pixman_image_t *image,
pixman_bool_t component_alpha);
+
+PIXMAN_API
pixman_bool_t pixman_image_get_component_alpha (pixman_image_t *image);
+
+PIXMAN_API
void pixman_image_set_accessors (pixman_image_t *image,
pixman_read_memory_func_t read_func,
pixman_write_memory_func_t write_func);
+
+PIXMAN_API
void pixman_image_set_indexed (pixman_image_t *image,
const pixman_indexed_t *indexed);
+
+PIXMAN_API
uint32_t *pixman_image_get_data (pixman_image_t *image);
+
+PIXMAN_API
int pixman_image_get_width (pixman_image_t *image);
+
+PIXMAN_API
int pixman_image_get_height (pixman_image_t *image);
+
+PIXMAN_API
int pixman_image_get_stride (pixman_image_t *image); /* in bytes */
+
+PIXMAN_API
int pixman_image_get_depth (pixman_image_t *image);
+
+PIXMAN_API
pixman_format_code_t pixman_image_get_format (pixman_image_t *image);
typedef enum
@@ -840,6 +1104,7 @@ typedef enum
/* Create the parameter list for a SEPARABLE_CONVOLUTION filter
* with the given kernels and scale parameters.
*/
+PIXMAN_API
pixman_fixed_t *
pixman_filter_create_separable_convolution (int *n_values,
pixman_fixed_t scale_x,
@@ -851,11 +1116,15 @@ pixman_filter_create_separable_convolution (int *n_values,
int subsample_bits_x,
int subsample_bits_y);
+
+PIXMAN_API
pixman_bool_t pixman_image_fill_rectangles (pixman_op_t op,
pixman_image_t *image,
const pixman_color_t *color,
int n_rects,
const pixman_rectangle16_t *rects);
+
+PIXMAN_API
pixman_bool_t pixman_image_fill_boxes (pixman_op_t op,
pixman_image_t *dest,
const pixman_color_t *color,
@@ -863,6 +1132,7 @@ pixman_bool_t pixman_image_fill_boxes (pixman_op_t
const pixman_box32_t *boxes);
/* Composite */
+PIXMAN_API
pixman_bool_t pixman_compute_composite_region (pixman_region16_t *region,
pixman_image_t *src_image,
pixman_image_t *mask_image,
@@ -875,6 +1145,8 @@ pixman_bool_t pixman_compute_composite_region (pixman_region16_t *region,
int16_t dest_y,
uint16_t width,
uint16_t height);
+
+PIXMAN_API
void pixman_image_composite (pixman_op_t op,
pixman_image_t *src,
pixman_image_t *mask,
@@ -887,6 +1159,8 @@ void pixman_image_composite (pixman_op_t op,
int16_t dest_y,
uint16_t width,
uint16_t height);
+
+PIXMAN_API
void pixman_image_composite32 (pixman_op_t op,
pixman_image_t *src,
pixman_image_t *mask,
@@ -918,6 +1192,7 @@ void pixman_image_composite32 (pixman_op_t op,
* Since 0.21.2, pixman doesn't do these workarounds anymore, so now this
* function is a no-op.
*/
+PIXMAN_API
void pixman_disable_out_of_bounds_workaround (void);
/*
@@ -930,29 +1205,48 @@ typedef struct
const void *glyph;
} pixman_glyph_t;
+PIXMAN_API
pixman_glyph_cache_t *pixman_glyph_cache_create (void);
+
+PIXMAN_API
void pixman_glyph_cache_destroy (pixman_glyph_cache_t *cache);
+
+PIXMAN_API
void pixman_glyph_cache_freeze (pixman_glyph_cache_t *cache);
+
+PIXMAN_API
void pixman_glyph_cache_thaw (pixman_glyph_cache_t *cache);
+
+PIXMAN_API
const void * pixman_glyph_cache_lookup (pixman_glyph_cache_t *cache,
void *font_key,
void *glyph_key);
+
+PIXMAN_API
const void * pixman_glyph_cache_insert (pixman_glyph_cache_t *cache,
void *font_key,
void *glyph_key,
int origin_x,
int origin_y,
pixman_image_t *glyph_image);
+
+PIXMAN_API
void pixman_glyph_cache_remove (pixman_glyph_cache_t *cache,
void *font_key,
void *glyph_key);
+
+PIXMAN_API
void pixman_glyph_get_extents (pixman_glyph_cache_t *cache,
int n_glyphs,
pixman_glyph_t *glyphs,
pixman_box32_t *extents);
+
+PIXMAN_API
pixman_format_code_t pixman_glyph_get_mask_format (pixman_glyph_cache_t *cache,
int n_glyphs,
const pixman_glyph_t *glyphs);
+
+PIXMAN_API
void pixman_composite_glyphs (pixman_op_t op,
pixman_image_t *src,
pixman_image_t *dest,
@@ -968,6 +1262,8 @@ void pixman_composite_glyphs (pixman_op_t op,
pixman_glyph_cache_t *cache,
int n_glyphs,
const pixman_glyph_t *glyphs);
+
+PIXMAN_API
void pixman_composite_glyphs_no_mask (pixman_op_t op,
pixman_image_t *src,
pixman_image_t *dest,
@@ -1023,7 +1319,7 @@ struct pixman_triangle
#define pixman_trapezoid_valid(t) \
((t)->left.p1.y != (t)->left.p2.y && \
(t)->right.p1.y != (t)->right.p2.y && \
- (int) ((t)->bottom - (t)->top) > 0)
+ ((t)->bottom > (t)->top))
struct pixman_span_fix
{
@@ -1035,12 +1331,19 @@ struct pixman_trap
pixman_span_fix_t top, bot;
};
+PIXMAN_API
pixman_fixed_t pixman_sample_ceil_y (pixman_fixed_t y,
int bpp);
+
+PIXMAN_API
pixman_fixed_t pixman_sample_floor_y (pixman_fixed_t y,
int bpp);
+
+PIXMAN_API
void pixman_edge_step (pixman_edge_t *e,
int n);
+
+PIXMAN_API
void pixman_edge_init (pixman_edge_t *e,
int bpp,
pixman_fixed_t y_start,
@@ -1048,31 +1351,43 @@ void pixman_edge_init (pixman_edge_t *e,
pixman_fixed_t y_top,
pixman_fixed_t x_bot,
pixman_fixed_t y_bot);
+
+PIXMAN_API
void pixman_line_fixed_edge_init (pixman_edge_t *e,
int bpp,
pixman_fixed_t y,
const pixman_line_fixed_t *line,
int x_off,
int y_off);
+
+PIXMAN_API
void pixman_rasterize_edges (pixman_image_t *image,
pixman_edge_t *l,
pixman_edge_t *r,
pixman_fixed_t t,
pixman_fixed_t b);
+
+PIXMAN_API
void pixman_add_traps (pixman_image_t *image,
int16_t x_off,
int16_t y_off,
int ntrap,
const pixman_trap_t *traps);
+
+PIXMAN_API
void pixman_add_trapezoids (pixman_image_t *image,
int16_t x_off,
int y_off,
int ntraps,
const pixman_trapezoid_t *traps);
+
+PIXMAN_API
void pixman_rasterize_trapezoid (pixman_image_t *image,
const pixman_trapezoid_t *trap,
int x_off,
int y_off);
+
+PIXMAN_API
void pixman_composite_trapezoids (pixman_op_t op,
pixman_image_t * src,
pixman_image_t * dst,
@@ -1083,6 +1398,8 @@ void pixman_composite_trapezoids (pixman_op_t op,
int y_dst,
int n_traps,
const pixman_trapezoid_t * traps);
+
+PIXMAN_API
void pixman_composite_triangles (pixman_op_t op,
pixman_image_t * src,
pixman_image_t * dst,
@@ -1093,6 +1410,8 @@ void pixman_composite_triangles (pixman_op_t op,
int y_dst,
int n_tris,
const pixman_triangle_t * tris);
+
+PIXMAN_API
void pixman_add_triangles (pixman_image_t *image,
int32_t x_off,
int32_t y_off,
diff --git a/gfx/cairo/libpixman/src/refactor b/gfx/cairo/libpixman/src/refactor
deleted file mode 100644
index 52fceab175..0000000000
--- a/gfx/cairo/libpixman/src/refactor
+++ /dev/null
@@ -1,478 +0,0 @@
-Roadmap
-
-- Move all the fetchers etc. into pixman-image to make pixman-compose.c
- less intimidating.
-
- DONE
-
-- Make combiners for unified alpha take a mask argument. That way
- we won't need two separate paths for unified vs component in the
- general compositing code.
-
- DONE, except that the Altivec code needs to be updated. Luca is
- looking into that.
-
-- Delete separate 'unified alpha' path
-
- DONE
-
-- Split images into their own files
-
- DONE
-
-- Split the gradient walker code out into its own file
-
- DONE
-
-- Add scanline getters per image
-
- DONE
-
-- Generic 64 bit fetcher
-
- DONE
-
-- Split fast path tables into their respective architecture dependent
- files.
-
-See "Render Algorithm" below for rationale
-
-Images will eventually have these virtual functions:
-
- get_scanline()
- get_scanline_wide()
- get_pixel()
- get_pixel_wide()
- get_untransformed_pixel()
- get_untransformed_pixel_wide()
- get_unfiltered_pixel()
- get_unfiltered_pixel_wide()
-
- store_scanline()
- store_scanline_wide()
-
-1.
-
-Initially we will just have get_scanline() and get_scanline_wide();
-these will be based on the ones in pixman-compose. Hopefully this will
-reduce the complexity in pixman_composite_rect_general().
-
-Note that there is access considerations - the compose function is
-being compiled twice.
-
-
-2.
-
-Split image types into their own source files. Export noop virtual
-reinit() call. Call this whenever a property of the image changes.
-
-
-3.
-
-Split the get_scanline() call into smaller functions that are
-initialized by the reinit() call.
-
-The Render Algorithm:
- (first repeat, then filter, then transform, then clip)
-
-Starting from a destination pixel (x, y), do
-
- 1 x = x - xDst + xSrc
- y = y - yDst + ySrc
-
- 2 reject pixel that is outside the clip
-
- This treats clipping as something that happens after
- transformation, which I think is correct for client clips. For
- hierarchy clips it is wrong, but who really cares? Without
- GraphicsExposes hierarchy clips are basically irrelevant. Yes,
- you could imagine cases where the pixels of a subwindow of a
- redirected, transformed window should be treated as
- transparent. I don't really care
-
- Basically, I think the render spec should say that pixels that
- are unavailable due to the hierarcy have undefined content,
- and that GraphicsExposes are not generated. Ie., basically
- that using non-redirected windows as sources is fail. This is
- at least consistent with the current implementation and we can
- update the spec later if someone makes it work.
-
- The implication for render is that it should stop passing the
- hierarchy clip to pixman. In pixman, if a souce image has a
- clip it should be used in computing the composite region and
- nowhere else, regardless of what "has_client_clip" says. The
- default should be for there to not be any clip.
-
- I would really like to get rid of the client clip as well for
- source images, but unfortunately there is at least one
- application in the wild that uses them.
-
- 3 Transform pixel: (x, y) = T(x, y)
-
- 4 Call p = GetUntransformedPixel (x, y)
-
- 5 If the image has an alpha map, then
-
- Call GetUntransformedPixel (x, y) on the alpha map
-
- add resulting alpha channel to p
-
- return p
-
- Where GetUnTransformedPixel is:
-
- 6 switch (filter)
- {
- case NEAREST:
- return GetUnfilteredPixel (x, y);
- break;
-
- case BILINEAR:
- return GetUnfilteredPixel (...) // 4 times
- break;
-
- case CONVOLUTION:
- return GetUnfilteredPixel (...) // as many times as necessary.
- break;
- }
-
- Where GetUnfilteredPixel (x, y) is
-
- 7 switch (repeat)
- {
- case REPEAT_NORMAL:
- case REPEAT_PAD:
- case REPEAT_REFLECT:
- // adjust x, y as appropriate
- break;
-
- case REPEAT_NONE:
- if (x, y) is outside image bounds
- return 0;
- break;
- }
-
- return GetRawPixel(x, y)
-
- Where GetRawPixel (x, y) is
-
- 8 Compute the pixel in question, depending on image type.
-
-For gradients, repeat has a totally different meaning, so
-UnfilteredPixel() and RawPixel() must be the same function so that
-gradients can do their own repeat algorithm.
-
-So, the GetRawPixel
-
- for bits must deal with repeats
- for gradients must deal with repeats (differently)
- for solids, should ignore repeats.
-
- for polygons, when we add them, either ignore repeats or do
- something similar to bits (in which case, we may want an extra
- layer of indirection to modify the coordinates).
-
-It is then possible to build things like "get scanline" or "get tile" on
-top of this. In the simplest case, just repeatedly calling GetPixel()
-would work, but specialized get_scanline()s or get_tile()s could be
-plugged in for common cases.
-
-By not plugging anything in for images with access functions, we only
-have to compile the pixel functions twice, not the scanline functions.
-
-And we can get rid of fetchers for the bizarre formats that no one
-uses. Such as b2g3r3 etc. r1g2b1? Seriously? It is also worth
-considering a generic format based pixel fetcher for these edge cases.
-
-Since the actual routines depend on the image attributes, the images
-must be notified when those change and update their function pointers
-appropriately. So there should probably be a virtual function called
-(* reinit) or something like that.
-
-There will also be wide fetchers for both pixels and lines. The line
-fetcher will just call the wide pixel fetcher. The wide pixel fetcher
-will just call expand, except for 10 bit formats.
-
-Rendering pipeline:
-
-Drawable:
- 0. if (picture has alpha map)
- 0.1. Position alpha map according to the alpha_x/alpha_y
- 0.2. Where the two drawables intersect, the alpha channel
- Replace the alpha channel of source with the one
- from the alpha map. Replacement only takes place
- in the intersection of the two drawables' geometries.
- 1. Repeat the drawable according to the repeat attribute
- 2. Reconstruct a continuous image according to the filter
- 3. Transform according to the transform attribute
- 4. Position image such that src_x, src_y is over dst_x, dst_y
- 5. Sample once per destination pixel
- 6. Clip. If a pixel is not within the source clip, then no
- compositing takes place at that pixel. (Ie., it's *not*
- treated as 0).
-
- Sampling a drawable:
-
- - If the channel does not have an alpha channel, the pixels in it
- are treated as opaque.
-
- Note on reconstruction:
-
- - The top left pixel has coordinates (0.5, 0.5) and pixels are
- spaced 1 apart.
-
-Gradient:
- 1. Unless gradient type is conical, repeat the underlying (0, 1)
- gradient according to the repeat attribute
- 2. Integrate the gradient across the plane according to type.
- 3. Transform according to transform attribute
- 4. Position gradient
- 5. Sample once per destination pixel.
- 6. Clip
-
-Solid Fill:
- 1. Repeat has no effect
- 2. Image is already continuous and defined for the entire plane
- 3. Transform has no effect
- 4. Positioning has no effect
- 5. Sample once per destination pixel.
- 6. Clip
-
-Polygon:
- 1. Repeat has no effect
- 2. Image is already continuous and defined on the whole plane
- 3. Transform according to transform attribute
- 4. Position image
- 5. Supersample 15x17 per destination pixel.
- 6. Clip
-
-Possibly interesting additions:
- - More general transformations, such as warping, or general
- shading.
-
- - Shader image where a function is called to generate the
- pixel (ie., uploading assembly code).
-
- - Resampling kernels
-
- In principle the polygon image uses a 15x17 box filter for
- resampling. If we allow general resampling filters, then we
- get all the various antialiasing types for free.
-
- Bilinear downsampling looks terrible and could be much
- improved by a resampling filter. NEAREST reconstruction
- combined with a box resampling filter is what GdkPixbuf
- does, I believe.
-
- Useful for high frequency gradients as well.
-
- (Note that the difference between a reconstruction and a
- resampling filter is mainly where in the pipeline they
- occur. High quality resampling should use a correctly
- oriented kernel so it should happen after transformation.
-
- An implementation can transform the resampling kernel and
- convolve it with the reconstruction if it so desires, but it
- will need to deal with the fact that the resampling kernel
- will not necessarily be pixel aligned.
-
- "Output kernels"
-
- One could imagine doing the resampling after compositing,
- ie., for each destination pixel sample each source image 16
- times, then composite those subpixels individually, then
- finally apply a kernel.
-
- However, this is effectively the same as full screen
- antialiasing, which is a simpler way to think about it. So
- resampling kernels may make sense for individual images, but
- not as a post-compositing step.
-
- Fullscreen AA is inefficient without chained compositing
- though. Consider an (image scaled up to oversample size IN
- some polygon) scaled down to screen size. With the current
- implementation, there will be a huge temporary. With chained
- compositing, the whole thing ends up being equivalent to the
- output kernel from above.
-
- - Color space conversion
-
- The complete model here is that each surface has a color
- space associated with it and that the compositing operation
- also has one associated with it. Note also that gradients
- should have associcated colorspaces.
-
- - Dithering
-
- If people dither something that is already dithered, it will
- look terrible, but don't do that, then. (Dithering happens
- after resampling if at all - what is the relationship
- with color spaces? Presumably dithering should happen in linear
- intensity space).
-
- - Floating point surfaces, 16, 32 and possibly 64 bit per
- channel.
-
- Maybe crack:
-
- - Glyph polygons
-
- If glyphs could be given as polygons, they could be
- positioned and rasterized more accurately. The glyph
- structure would need subpixel positioning though.
-
- - Luminance vs. coverage for the alpha channel
-
- Whether the alpha channel should be interpreted as luminance
- modulation or as coverage (intensity modulation). This is a
- bit of a departure from the rendering model though. It could
- also be considered whether it should be possible to have
- both channels in the same drawable.
-
- - Alternative for component alpha
-
- - Set component-alpha on the output image.
-
- - This means each of the components are sampled
- independently and composited in the corresponding
- channel only.
-
- - Have 3 x oversampled mask
-
- - Scale it down by 3 horizontally, with [ 1/3, 1/3, 1/3 ]
- resampling filter.
-
- Is this equivalent to just using a component alpha mask?
-
- Incompatible changes:
-
- - Gradients could be specified with premultiplied colors. (You
- can use a mask to get things like gradients from solid red to
- transparent red.
-
-Refactoring pixman
-
-The pixman code is not particularly nice to put it mildly. Among the
-issues are
-
-- inconsistent naming style (fb vs Fb, camelCase vs
- underscore_naming). Sometimes there is even inconsistency *within*
- one name.
-
- fetchProc32 ACCESS(pixman_fetchProcForPicture32)
-
- may be one of the uglies names ever created.
-
- coding style:
- use the one from cairo except that pixman uses this brace style:
-
- while (blah)
- {
- }
-
- Format do while like this:
-
- do
- {
-
- }
- while (...);
-
-- PIXMAN_COMPOSITE_RECT_GENERAL() is horribly complex
-
-- switch case logic in pixman-access.c
-
- Instead it would be better to just store function pointers in the
- image objects themselves,
-
- get_pixel()
- get_scanline()
-
-- Much of the scanline fetching code is for formats that no one
- ever uses. a2r2g2b2 anyone?
-
- It would probably be worthwhile having a generic fetcher for any
- pixman format whatsoever.
-
-- Code related to particular image types should be split into individual
- files.
-
- pixman-bits-image.c
- pixman-linear-gradient-image.c
- pixman-radial-gradient-image.c
- pixman-solid-image.c
-
-- Fast path code should be split into files based on architecture:
-
- pixman-mmx-fastpath.c
- pixman-sse2-fastpath.c
- pixman-c-fastpath.c
-
- etc.
-
- Each of these files should then export a fastpath table, which would
- be declared in pixman-private.h. This should allow us to get rid
- of the pixman-mmx.h files.
-
- The fast path table should describe each fast path. Ie there should
- be bitfields indicating what things the fast path can handle, rather than
- like now where it is only allowed to take one format per src/mask/dest. Ie.,
-
- {
- FAST_a8r8g8b8 | FAST_x8r8g8b8,
- FAST_null,
- FAST_x8r8g8b8,
- FAST_repeat_normal | FAST_repeat_none,
- the_fast_path
- }
-
-There should then be *one* file that implements pixman_image_composite().
-This should do this:
-
- optimize_operator();
-
- convert 1x1 repeat to solid (actually this should be done at
- image creation time).
-
- is there a useful fastpath?
-
-There should be a file called pixman-cpu.c that contains all the
-architecture specific stuff to detect what CPU features we have.
-
-Issues that must be kept in mind:
-
- - we need accessor code to be preserved
-
- - maybe there should be a "store_scanline" too?
-
- Is this sufficient?
-
- We should preserve the optimization where the
- compositing happens directly in the destination
- whenever possible.
-
- - It should be possible to create GPU samplers from the
- images.
-
-The "horizontal" classification should be a bit in the image, the
-"vertical" classification should just happen inside the gradient
-file. Note though that
-
- (a) these will change if the tranformation/repeat changes.
-
- (b) at the moment the optimization for linear gradients
- takes the source rectangle into account. Presumably
- this is to also optimize the case where the gradient
- is close enough to horizontal?
-
-Who is responsible for repeats? In principle it should be the scanline
-fetch. Right now NORMAL repeats are handled by walk_composite_region()
-while other repeats are handled by the scanline code.
-
-
-(Random note on filtering: do you filter before or after
-transformation? Hardware is going to filter after transformation;
-this is also what pixman does currently). It's not completely clear
-what filtering *after* transformation means. One thing that might look
-good would be to do *supersampling*, ie., compute multiple subpixels
-per destination pixel, then average them together.
diff --git a/gfx/cairo/libpixman/src/solaris-hwcap.mapfile b/gfx/cairo/libpixman/src/solaris-hwcap.mapfile
new file mode 100644
index 0000000000..87efce1e34
--- /dev/null
+++ b/gfx/cairo/libpixman/src/solaris-hwcap.mapfile
@@ -0,0 +1,30 @@
+###############################################################################
+#
+# Copyright 2009, Oracle and/or its affiliates. All rights reserved.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+#
+###############################################################################
+#
+# Override the linker's detection of CMOV/MMX/SSE instructions so this
+# library isn't flagged as only usable on CPU's with those ISA's, since it
+# checks at runtime for availability before calling them
+
+hwcap_1 = V0x0 FPU OVERRIDE;