diff options
67 files changed, 9288 insertions, 13225 deletions
diff --git a/gfx/cairo/libpixman/src/Makefile.in b/gfx/cairo/libpixman/src/Makefile.in deleted file mode 100644 index f119333b44..0000000000 --- a/gfx/cairo/libpixman/src/Makefile.in +++ /dev/null @@ -1,10 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. - -include $(topsrcdir)/config/rules.mk - -# The ARM asm functions here don't appreciate being called by functions -# compiled with -mapcs-frame. See bug 832752. -CXXFLAGS := $(filter-out -mapcs-frame,$(CXXFLAGS)) -CFLAGS := $(filter-out -mapcs-frame,$(CFLAGS)) diff --git a/gfx/cairo/libpixman/src/dither/blue-noise-64x64.h b/gfx/cairo/libpixman/src/dither/blue-noise-64x64.h new file mode 100644 index 0000000000..93c8805b51 --- /dev/null +++ b/gfx/cairo/libpixman/src/dither/blue-noise-64x64.h @@ -0,0 +1,77 @@ +/* WARNING: This file is generated by make-blue-noise.c + * Please edit that file instead of this one. + */ + +#ifndef BLUE_NOISE_64X64_H +#define BLUE_NOISE_64X64_H + +#include <stdint.h> + +static const uint16_t dither_blue_noise_64x64[4096] = { + 3039, 1368, 3169, 103, 2211, 1248, 2981, 668, 2633, 37, 3963, 2903, 384, 2564, 3115, 1973, 3348, 830, 2505, 1293, 3054, 1060, 1505, 3268, 400, 1341, 593, 3802, 3384, 429, 4082, 1411, 2503, 3863, 126, 1292, 1887, 2855, 205, 2094, 2977, 1899, 3924, 356, 3088, 2500, 3942, 1409, 2293, 1734, 3732, 1291, 3227, 277, 2054, 786, 2871, 411, 2425, 1678, 3986, 455, 2879, 2288, + 388, 1972, 3851, 778, 2768, 3697, 944, 2123, 1501, 3533, 937, 1713, 1381, 3888, 156, 1242, 516, 2888, 1607, 3676, 632, 2397, 3804, 2673, 1898, 3534, 2593, 1777, 1170, 2299, 3013, 1838, 523, 3053, 1647, 3601, 3197, 959, 1520, 3633, 893, 2437, 3367, 2187, 1258, 137, 1965, 401, 3546, 643, 3087, 2498, 733, 2786, 3371, 4053, 1266, 1977, 3663, 183, 2570, 2107, 1183, 3708, + 907, 2473, 1151, 3363, 1527, 1902, 232, 3903, 3060, 496, 2486, 3206, 2165, 861, 2387, 3653, 2101, 3972, 132, 2162, 3437, 1827, 215, 895, 3114, 271, 969, 2932, 197, 1598, 878, 3696, 1140, 2120, 904, 2431, 302, 3846, 2675, 481, 3187, 66, 1440, 650, 3833, 2826, 3435, 901, 2936, 2111, 250, 1875, 3609, 1174, 1747, 162, 2346, 3420, 913, 3172, 1383, 752, 3298, 1735, + 3540, 2938, 249, 2324, 526, 3099, 2561, 1324, 2347, 1861, 1200, 3702, 257, 3442, 1514, 2999, 992, 1766, 2735, 1163, 478, 2943, 1279, 3635, 2177, 1464, 3672, 2386, 3871, 3340, 2690, 64, 3489, 2811, 3999, 633, 1948, 1243, 2269, 1807, 1143, 2750, 3729, 1790, 2363, 1053, 1537, 2636, 4065, 1076, 1476, 3869, 450, 2200, 2676, 658, 2979, 1548, 544, 1913, 2838, 3911, 116, 2698, + 517, 1295, 3997, 1739, 3665, 1083, 3509, 599, 3400, 118, 2956, 720, 2689, 1907, 567, 2523, 284, 3397, 711, 3219, 2450, 3985, 1665, 2549, 562, 3011, 1855, 729, 1355, 528, 1908, 2456, 1384, 337, 1540, 2654, 3138, 3513, 703, 4080, 3314, 2047, 855, 3037, 209, 3317, 577, 1828, 17, 2336, 3193, 2748, 962, 3441, 1450, 3246, 1075, 3878, 2615, 3497, 1033, 2310, 1442, 2183, + 1654, 3254, 2061, 738, 2832, 148, 2030, 1670, 909, 3850, 2109, 1533, 4046, 1085, 3098, 3897, 1378, 2248, 3829, 1495, 1966, 23, 797, 3427, 1124, 4057, 95, 2787, 2190, 3074, 3950, 742, 3194, 1999, 3386, 1113, 16, 1657, 2804, 201, 1543, 383, 2559, 1325, 3604, 2068, 2493, 3771, 1284, 3460, 710, 1716, 2447, 80, 3811, 2032, 347, 2227, 15, 1689, 397, 3084, 662, 3798, + 973, 43, 2608, 3143, 1459, 2423, 4066, 2770, 3191, 1283, 2630, 314, 3235, 2289, 72, 1822, 2840, 924, 350, 2653, 1057, 3715, 2235, 2775, 346, 2083, 1553, 3292, 1081, 274, 1686, 1188, 2327, 3743, 578, 2234, 3916, 2519, 1011, 3056, 2207, 3438, 3890, 537, 1617, 837, 3094, 373, 2795, 1980, 276, 3951, 1353, 3015, 844, 1724, 3651, 2923, 1316, 4092, 2504, 3627, 1936, 2854, + 2461, 3929, 1193, 421, 3746, 820, 1180, 286, 2261, 532, 3625, 1812, 802, 1327, 3527, 670, 3730, 2025, 3124, 3565, 529, 2960, 1769, 1390, 3196, 2494, 3756, 796, 3618, 2602, 3463, 2847, 166, 953, 1745, 2900, 438, 2070, 1418, 3741, 639, 1205, 1891, 2882, 2282, 4012, 1182, 1696, 3630, 951, 2904, 2170, 3530, 375, 2320, 2742, 1132, 701, 3216, 2023, 847, 1230, 310, 3431, + 770, 1961, 3531, 1702, 2181, 3370, 1877, 3072, 1571, 3389, 1071, 2415, 3782, 2803, 1610, 2454, 1211, 182, 1655, 2322, 1282, 3372, 287, 3935, 704, 1232, 415, 1910, 2286, 1399, 556, 1964, 4068, 2444, 3605, 1272, 3345, 816, 3526, 256, 2402, 2777, 955, 345, 3289, 111, 2727, 635, 2396, 1488, 3331, 600, 1032, 1575, 4026, 515, 3507, 2433, 1605, 460, 3364, 2783, 1810, 1397, + 2334, 223, 2945, 688, 2533, 99, 2705, 624, 3944, 2073, 46, 2978, 508, 2132, 269, 3173, 3453, 2631, 4076, 694, 1892, 2586, 972, 2178, 3470, 1695, 2849, 3141, 77, 3884, 994, 3029, 1536, 673, 3083, 124, 2583, 1722, 2821, 1944, 4027, 1661, 3176, 3728, 1337, 1813, 3503, 2035, 3930, 157, 2537, 1865, 3096, 2646, 1941, 3252, 1449, 135, 2836, 3758, 2139, 84, 3678, 3106, + 3862, 1545, 3307, 1320, 3955, 1031, 3664, 1306, 2460, 776, 1487, 3294, 1187, 3990, 1903, 1021, 549, 1484, 943, 3027, 97, 3853, 1499, 2880, 198, 2575, 3995, 1089, 1587, 2475, 3282, 339, 2657, 1158, 2105, 1493, 3943, 580, 3232, 1287, 846, 48, 2480, 2112, 771, 2534, 459, 3134, 850, 1298, 3790, 325, 3652, 1249, 193, 940, 2202, 3895, 1829, 911, 1366, 2577, 1069, 534, + 2104, 1009, 2667, 392, 1983, 2917, 1645, 324, 3439, 2869, 3705, 1767, 2592, 756, 2916, 3683, 2276, 2850, 2053, 3594, 2403, 3181, 634, 3699, 1933, 906, 519, 2150, 3673, 764, 1770, 2220, 3795, 3336, 502, 3547, 2339, 1110, 301, 2210, 3354, 3643, 569, 1518, 2940, 3973, 1138, 1613, 2773, 2127, 2983, 1671, 769, 2161, 3800, 2730, 3127, 1179, 533, 3259, 2284, 4014, 1651, 2820, + 3566, 653, 1839, 3455, 2399, 789, 3149, 2244, 1863, 1099, 474, 2307, 158, 3541, 1312, 1711, 0, 3902, 360, 1629, 1091, 395, 1781, 1191, 2374, 3353, 1419, 3225, 206, 2931, 3553, 1046, 54, 1646, 2470, 910, 1860, 3137, 3770, 2635, 1562, 2809, 1215, 3788, 222, 2199, 3335, 67, 3606, 524, 1001, 3309, 2410, 3473, 591, 1619, 291, 2502, 3629, 2891, 335, 741, 3378, 168, + 2384, 3129, 4051, 22, 1444, 3613, 543, 3893, 186, 2665, 4062, 933, 3058, 2142, 449, 2711, 3224, 849, 1330, 3349, 2195, 2670, 3484, 2993, 32, 3774, 2722, 1859, 2548, 1268, 583, 2027, 3165, 2807, 4029, 227, 2897, 1434, 721, 1816, 195, 905, 2066, 3258, 1754, 970, 2674, 1880, 2338, 3915, 1485, 2660, 14, 1313, 2914, 2046, 4074, 791, 1917, 1301, 1725, 2687, 2019, 1443, + 418, 1186, 1664, 2859, 1049, 2056, 2741, 1226, 1589, 3186, 2042, 1377, 3449, 1574, 3941, 1063, 1930, 2501, 3751, 2930, 671, 4031, 888, 2081, 1544, 684, 1117, 351, 4052, 1698, 2393, 3881, 1439, 785, 1277, 2013, 3488, 441, 2459, 3980, 3061, 3481, 2543, 419, 3020, 609, 3515, 1350, 799, 2878, 348, 2034, 3966, 1824, 950, 3281, 1394, 2239, 3452, 55, 3922, 3119, 892, 3785, + 3023, 2140, 782, 2492, 3817, 241, 3355, 2424, 856, 3639, 612, 2556, 245, 2858, 705, 2316, 3562, 495, 1748, 128, 1912, 1454, 280, 2552, 3905, 3130, 2274, 3472, 834, 3055, 240, 2692, 471, 2272, 3301, 2632, 1080, 3693, 2136, 1029, 1364, 590, 1611, 4067, 1190, 2360, 3827, 261, 3180, 1768, 3471, 1103, 3003, 520, 3674, 151, 2571, 555, 3033, 982, 2353, 504, 1259, 2555, + 149, 3889, 3380, 493, 3178, 1681, 663, 1924, 2990, 49, 1792, 3861, 1192, 1987, 3273, 297, 1457, 3043, 1177, 2292, 3249, 2829, 3682, 1154, 1758, 428, 2872, 1993, 1500, 3703, 1129, 3421, 1840, 3754, 163, 659, 1733, 3182, 38, 2875, 1957, 3614, 2237, 78, 1873, 2801, 1513, 2121, 1074, 2516, 667, 3710, 1429, 2430, 2088, 2830, 1072, 3557, 1531, 2733, 1955, 3286, 3590, 1826, + 2778, 1068, 1932, 1452, 2279, 1185, 3564, 3952, 1391, 2726, 3313, 2331, 870, 3709, 1674, 2772, 4085, 808, 2596, 3848, 927, 538, 2335, 3334, 773, 3597, 1347, 109, 2663, 608, 2108, 2994, 936, 1524, 2922, 3968, 2422, 1467, 845, 3870, 321, 2704, 1073, 3308, 3680, 823, 430, 3375, 4030, 112, 2171, 2695, 267, 3374, 731, 1627, 3919, 1871, 352, 3839, 1370, 234, 794, 1532, + 3245, 647, 3575, 74, 3045, 2766, 285, 2174, 498, 1059, 1551, 385, 3125, 2598, 143, 1128, 2095, 3395, 318, 1590, 3524, 1345, 1969, 242, 2759, 2092, 947, 3926, 3244, 2356, 1658, 6, 3593, 2554, 1172, 1995, 371, 2755, 3417, 2294, 1570, 3164, 748, 2517, 1401, 3111, 2420, 1662, 2910, 1276, 3276, 854, 1804, 4000, 1253, 2987, 229, 2344, 3184, 649, 2196, 2921, 4095, 2389, + 1289, 2193, 2579, 4023, 757, 1858, 986, 3199, 2514, 3475, 4021, 2154, 651, 1432, 3468, 2404, 574, 1799, 3105, 2145, 86, 2614, 3218, 1565, 4088, 2481, 3079, 1815, 323, 1212, 3837, 759, 2159, 435, 3223, 784, 3659, 1114, 1888, 550, 1221, 3786, 1803, 499, 2117, 185, 3763, 942, 589, 2001, 3838, 1483, 3154, 2256, 468, 2544, 3403, 898, 1208, 2610, 3622, 967, 1929, 378, + 3781, 220, 1656, 1115, 3347, 2428, 3822, 1577, 712, 1959, 110, 2765, 1762, 3854, 979, 2928, 3714, 1371, 746, 3969, 2884, 975, 3779, 641, 1142, 159, 1460, 702, 3485, 2866, 2495, 3330, 1305, 3937, 1635, 2229, 2962, 146, 4055, 3091, 2417, 100, 3508, 2933, 4006, 1167, 1920, 2760, 3552, 2545, 433, 2845, 142, 1056, 1886, 3616, 1435, 2099, 3803, 1749, 27, 1446, 3350, 2843, + 884, 3310, 2948, 2103, 447, 1351, 187, 2895, 3655, 1256, 3036, 932, 3325, 2257, 451, 1915, 40, 2780, 2438, 1112, 1814, 423, 2290, 1905, 2898, 3419, 2306, 3760, 1938, 486, 1019, 1791, 3010, 2628, 203, 3408, 1269, 2507, 1606, 862, 2779, 2078, 952, 1529, 2638, 708, 3332, 1413, 2, 1726, 1156, 3500, 2392, 3791, 3076, 812, 107, 2861, 501, 3050, 3487, 2455, 594, 1731, + 2685, 1498, 680, 3908, 2621, 3529, 1786, 2236, 342, 2569, 1526, 3722, 230, 1290, 3203, 3947, 1609, 3516, 467, 3267, 3685, 1461, 3140, 3569, 367, 1759, 928, 2754, 1332, 2219, 4034, 260, 655, 1984, 978, 3814, 617, 2086, 3525, 279, 3841, 1373, 3361, 319, 2251, 3066, 407, 2382, 3918, 3133, 2168, 762, 1523, 507, 2641, 1677, 4025, 2413, 1584, 793, 2049, 1109, 3962, 2218, + 1194, 3692, 266, 1687, 981, 3103, 740, 3983, 1005, 3434, 570, 2383, 1942, 2718, 676, 2462, 1007, 2089, 1308, 2222, 233, 2568, 829, 1241, 2669, 3987, 514, 3303, 69, 3142, 1603, 3560, 2295, 3288, 1497, 2696, 1764, 2865, 1058, 3271, 1914, 477, 2529, 3927, 1736, 1273, 3752, 2029, 1012, 565, 2798, 4078, 1949, 3305, 1175, 2179, 380, 3366, 1195, 3849, 2637, 416, 2959, 125, + 3396, 2467, 2036, 3234, 2340, 68, 2819, 1436, 2011, 3139, 1704, 4073, 860, 3582, 1468, 2969, 211, 3157, 4056, 866, 2935, 2000, 3923, 31, 2157, 1477, 2429, 1147, 3792, 2557, 774, 2802, 1153, 3747, 464, 3192, 42, 3904, 539, 1474, 2283, 803, 2876, 1061, 75, 3477, 747, 2893, 1538, 3626, 251, 1322, 2506, 189, 2791, 3667, 939, 2991, 1971, 175, 3195, 1416, 3648, 1857, + 3052, 454, 851, 3789, 1271, 1906, 3694, 2484, 406, 2757, 26, 1189, 2909, 296, 2215, 3784, 1864, 637, 2715, 1673, 3445, 581, 1572, 3059, 3469, 761, 2984, 1737, 2058, 440, 1414, 1921, 121, 2527, 894, 2223, 1302, 2377, 3077, 2666, 3759, 3198, 1811, 3661, 2166, 2731, 1883, 359, 3285, 2458, 1805, 3459, 926, 3834, 675, 1893, 1496, 2612, 657, 3523, 1763, 2354, 564, 961, + 1367, 3977, 1588, 2714, 322, 3446, 1088, 625, 3887, 1354, 3535, 2090, 3316, 1760, 1127, 483, 3491, 1421, 2301, 94, 1202, 3740, 2311, 1014, 1878, 3836, 180, 3412, 991, 2868, 3953, 3450, 3081, 1632, 4071, 1882, 3543, 726, 1719, 179, 1171, 364, 1420, 622, 3090, 1490, 946, 4007, 2212, 1102, 619, 2739, 2189, 1669, 2937, 3426, 39, 3940, 2191, 1264, 887, 4091, 2792, 2135, + 4, 2883, 2281, 631, 3044, 1641, 2232, 3243, 1773, 2319, 827, 2591, 629, 3938, 2426, 3222, 2629, 1044, 3879, 3293, 1952, 2749, 275, 2590, 472, 1372, 2496, 660, 3669, 2264, 208, 915, 2167, 561, 2828, 307, 3265, 1104, 3964, 2155, 3425, 1951, 4077, 2391, 283, 3387, 2581, 115, 1415, 3069, 3896, 141, 3158, 1214, 442, 2405, 1349, 3085, 425, 2528, 3002, 312, 1602, 3588, + 1137, 3323, 1963, 1002, 3578, 2521, 127, 925, 2970, 273, 3737, 1573, 167, 2863, 1509, 800, 147, 2059, 2942, 409, 921, 3151, 1451, 3909, 3333, 2844, 2096, 1512, 3136, 1210, 1798, 2709, 1331, 3586, 1034, 1521, 2441, 2926, 488, 2585, 775, 3031, 2693, 879, 3602, 1173, 2028, 3654, 2781, 841, 1975, 1507, 3646, 768, 3991, 2012, 996, 3544, 1666, 3810, 1990, 3360, 753, 2597, + 3736, 304, 1473, 3828, 485, 1334, 4008, 2072, 3495, 1136, 2806, 2004, 3236, 1010, 2130, 3819, 1750, 3567, 644, 2515, 1794, 3636, 698, 2137, 1162, 832, 3761, 326, 2613, 513, 3302, 3820, 357, 3163, 2259, 3733, 101, 1922, 1386, 3587, 1640, 28, 1286, 2141, 1761, 2918, 693, 1639, 457, 3250, 2434, 365, 2599, 1729, 3284, 2643, 306, 2793, 689, 1090, 104, 1309, 2305, 1831, + 2776, 859, 2446, 2915, 1778, 3337, 2677, 614, 1508, 2409, 469, 4033, 1321, 3563, 402, 3131, 2720, 1093, 1569, 4042, 1229, 2277, 216, 3046, 1817, 57, 3006, 1684, 4059, 2016, 795, 2440, 1652, 1960, 610, 2763, 920, 3864, 3110, 1026, 2326, 3762, 3233, 521, 3856, 173, 2457, 3939, 2138, 1262, 3572, 989, 3021, 2238, 119, 1445, 3832, 1809, 2297, 3467, 2700, 3684, 3102, 394, + 4036, 2050, 3256, 89, 2198, 1079, 248, 1845, 3805, 3104, 880, 1779, 2688, 717, 2373, 1375, 262, 2249, 3071, 13, 2813, 3429, 1600, 3984, 2416, 3603, 1299, 2298, 998, 3492, 1393, 2951, 10, 4009, 1247, 3462, 1679, 2204, 414, 2736, 316, 1894, 2816, 1050, 3373, 1462, 3107, 817, 3464, 21, 1835, 4070, 568, 1178, 3718, 875, 3168, 466, 2974, 1458, 2084, 616, 1564, 1018, + 1693, 546, 1244, 3899, 716, 3160, 3608, 2877, 1220, 334, 3443, 2270, 44, 3000, 1843, 3928, 3405, 766, 3686, 2040, 587, 993, 2647, 387, 930, 2753, 630, 3274, 150, 2808, 453, 3638, 1092, 2352, 3030, 239, 2562, 700, 3240, 1257, 4016, 730, 1515, 2203, 2551, 417, 1866, 1123, 2348, 2902, 1550, 2678, 2075, 3238, 1630, 2531, 2115, 1255, 4054, 840, 290, 3874, 2477, 3399, + 2250, 3577, 2817, 1626, 2576, 1356, 2315, 792, 2087, 2618, 1612, 3855, 1263, 3637, 1036, 494, 1535, 2553, 1198, 1715, 3867, 3170, 1359, 1954, 3483, 1539, 2069, 3886, 1772, 2487, 1534, 2045, 3242, 806, 1578, 2018, 3948, 1423, 3596, 2076, 2466, 3424, 139, 3688, 871, 4049, 2852, 3342, 547, 3719, 327, 852, 3505, 207, 2794, 542, 3600, 45, 2411, 3324, 1788, 3012, 1235, 61, + 2655, 917, 253, 1986, 3738, 313, 1706, 4072, 120, 3229, 957, 597, 2024, 3262, 2453, 2857, 2002, 3190, 210, 2784, 2206, 300, 2400, 3766, 553, 3152, 218, 1150, 2988, 883, 3753, 627, 2664, 3831, 437, 3385, 1008, 2957, 60, 1636, 891, 2899, 1776, 3062, 1315, 2026, 194, 1643, 2079, 1296, 3201, 2465, 1379, 1927, 3898, 1125, 1847, 2846, 1552, 1028, 2725, 2169, 787, 3202, + 1441, 3982, 3032, 1052, 3251, 605, 2639, 3073, 1431, 3642, 2329, 2949, 341, 1634, 833, 129, 4020, 916, 3571, 669, 1506, 3411, 821, 2856, 1207, 2337, 2683, 3448, 340, 2214, 3128, 235, 1738, 1288, 2833, 2419, 606, 1884, 2668, 552, 3765, 1176, 399, 2302, 596, 3591, 2634, 767, 3845, 2767, 995, 3967, 491, 3057, 814, 2300, 3422, 691, 3797, 254, 3645, 509, 3478, 1836, + 2119, 475, 2445, 1525, 2175, 3539, 914, 1926, 473, 1157, 1800, 3971, 2701, 3739, 2129, 3486, 1333, 1784, 2366, 2982, 1070, 4089, 1802, 73, 1642, 3958, 835, 1837, 1480, 4043, 1217, 2469, 3416, 2113, 88, 3668, 1240, 3255, 3920, 2355, 3167, 2003, 2645, 3936, 3228, 1592, 1144, 3474, 2394, 79, 1820, 2241, 1594, 3656, 2584, 153, 1448, 3034, 2005, 2511, 1692, 1335, 3913, 217, + 2822, 3391, 745, 3813, 192, 1274, 2941, 3847, 2489, 3440, 744, 161, 1422, 1086, 572, 3004, 2617, 338, 3807, 2031, 236, 2472, 3065, 2098, 3358, 362, 2163, 3574, 497, 2788, 1970, 948, 3885, 685, 3100, 1712, 2228, 292, 1408, 1016, 164, 3537, 1417, 941, 34, 2172, 3001, 358, 1491, 3147, 699, 3356, 258, 1149, 2946, 1787, 3931, 382, 1146, 3291, 818, 2890, 2379, 1096, + 3679, 1328, 1901, 3162, 2747, 1730, 2253, 5, 1556, 2818, 2093, 3166, 2522, 3410, 2287, 1701, 956, 3237, 620, 1596, 3300, 1307, 511, 3701, 1020, 2939, 1362, 2532, 3208, 749, 3641, 160, 1522, 2624, 1095, 4086, 826, 2841, 3583, 2173, 1727, 723, 2925, 1911, 2482, 3726, 863, 1962, 4028, 1111, 2835, 3773, 2449, 2022, 582, 3278, 923, 2619, 2152, 4039, 92, 1934, 3145, 677, + 2530, 53, 2303, 1003, 458, 3989, 739, 3321, 1064, 369, 3556, 877, 1900, 426, 3876, 1, 3617, 2106, 1197, 2805, 3634, 857, 2706, 1504, 2418, 682, 3868, 20, 1139, 1688, 2333, 3311, 2907, 1945, 265, 2385, 3433, 1601, 636, 2620, 3095, 4044, 386, 3382, 1184, 527, 2814, 3414, 2342, 465, 1889, 1343, 874, 3479, 1502, 2233, 3689, 1385, 559, 2745, 1463, 3465, 376, 1718, + 3217, 4045, 1580, 3612, 2525, 1228, 3018, 1958, 3725, 2358, 1361, 3996, 1581, 3063, 1224, 2737, 1475, 2442, 3946, 191, 1796, 2128, 3975, 134, 1916, 3318, 1597, 2071, 3749, 2672, 403, 1278, 602, 3745, 3220, 1374, 445, 2064, 3830, 243, 1252, 2390, 1563, 2724, 3875, 1818, 1346, 165, 1650, 3264, 2680, 117, 2998, 4081, 343, 2799, 9, 3122, 1743, 3724, 1040, 2231, 3842, 1209, + 900, 398, 2851, 697, 1797, 3482, 293, 2679, 1649, 566, 2954, 91, 2697, 714, 2060, 3211, 781, 480, 3040, 1038, 2611, 666, 2989, 3458, 1201, 2796, 548, 2975, 839, 3121, 1850, 4001, 2208, 1631, 790, 2558, 2972, 1148, 3213, 1849, 3624, 971, 2102, 108, 772, 3101, 2589, 3777, 1042, 656, 3907, 2097, 1615, 2540, 805, 1935, 1231, 3494, 2451, 268, 2995, 750, 2682, 2020, + 3024, 1392, 2124, 3279, 106, 2217, 1387, 822, 3214, 3825, 2160, 1000, 2395, 3691, 228, 4038, 1872, 3413, 1608, 2225, 3536, 303, 1653, 886, 2541, 224, 4037, 2252, 1428, 172, 3504, 958, 2848, 113, 3628, 1834, 3979, 19, 2317, 779, 2797, 518, 3174, 3549, 1482, 2266, 444, 2014, 3555, 2439, 1213, 3113, 535, 1135, 3204, 3858, 2309, 931, 623, 2009, 3359, 1566, 140, 3550, + 1808, 3872, 2488, 1152, 3764, 2892, 3960, 2412, 353, 1223, 1825, 3444, 3116, 1717, 1082, 2313, 1280, 2661, 82, 3852, 1389, 3200, 2330, 3812, 2038, 3581, 1728, 1039, 3339, 2427, 586, 2580, 1238, 3328, 2280, 1047, 595, 2662, 1363, 3338, 1620, 3934, 2497, 1881, 1054, 3954, 3215, 864, 2887, 1801, 320, 3519, 2378, 3704, 1753, 424, 2958, 1660, 4005, 2601, 1116, 3912, 2381, 573, + 2740, 200, 828, 1667, 432, 1931, 1035, 1616, 3598, 2640, 728, 264, 1437, 557, 3501, 2966, 372, 3734, 974, 1978, 758, 2719, 1145, 452, 1433, 725, 2681, 408, 3843, 1918, 1547, 3906, 1996, 503, 1456, 3019, 3493, 1700, 3742, 355, 2134, 176, 1311, 615, 2867, 315, 1680, 1314, 8, 3297, 1494, 783, 1950, 83, 2656, 1382, 3561, 138, 2834, 1404, 330, 1904, 3156, 1027, + 1357, 3381, 3041, 3666, 2729, 734, 3415, 177, 3051, 2021, 4079, 2823, 3775, 2186, 2616, 869, 1668, 3148, 2367, 3315, 393, 4075, 1870, 2920, 3343, 2362, 3188, 1303, 2782, 825, 3171, 259, 2905, 3717, 2538, 184, 2074, 838, 2860, 2407, 1024, 3496, 3008, 3706, 1985, 2349, 3623, 2582, 4058, 2184, 2694, 3873, 2964, 990, 3346, 690, 2033, 1066, 2201, 3490, 2971, 718, 3700, 2188, + 4061, 391, 1989, 2325, 1430, 3150, 2125, 2526, 592, 1403, 976, 2351, 1165, 1851, 114, 3921, 2063, 613, 1358, 2785, 1623, 2254, 25, 3542, 1045, 246, 1852, 3554, 87, 2243, 3615, 1169, 727, 1705, 968, 3957, 3185, 1251, 500, 4063, 1751, 2622, 842, 1519, 90, 3393, 819, 490, 1874, 999, 571, 1275, 2271, 1586, 4040, 2448, 3126, 3731, 436, 885, 1708, 2421, 24, 1599, + 889, 2563, 1199, 645, 70, 4013, 1237, 3723, 1694, 3499, 3, 3266, 484, 2997, 3390, 1233, 2842, 3687, 152, 3480, 1084, 3698, 881, 2490, 1542, 3992, 2209, 692, 1690, 3022, 1470, 2625, 2114, 3512, 2359, 381, 2684, 1897, 3368, 1395, 3080, 289, 2065, 3981, 2758, 1141, 3097, 1472, 2870, 3352, 3707, 225, 3159, 505, 1895, 214, 1222, 1774, 2686, 3978, 3275, 1196, 3518, 2825, + 3270, 1720, 3796, 3466, 2650, 1841, 298, 899, 2862, 2091, 2671, 1744, 3735, 801, 1560, 349, 2262, 903, 1833, 2524, 512, 3117, 1793, 2827, 476, 3038, 1216, 2550, 3826, 980, 431, 4048, 35, 2992, 1265, 1595, 765, 3675, 76, 2247, 696, 3456, 1254, 2452, 664, 1757, 2133, 3750, 145, 2332, 1554, 1981, 3580, 2712, 868, 3640, 2919, 638, 2275, 1427, 309, 2595, 2006, 492, + 2226, 178, 2911, 836, 1528, 3028, 2240, 3327, 404, 3970, 707, 1294, 2464, 2131, 4032, 2600, 3319, 1406, 2913, 3974, 2156, 1425, 221, 3877, 2017, 811, 3662, 272, 3287, 1988, 2408, 3357, 1746, 598, 3239, 3823, 2182, 2934, 1078, 2604, 3840, 1697, 2906, 413, 3210, 3880, 331, 2644, 1260, 848, 3042, 2535, 1077, 1438, 3261, 2365, 1561, 3799, 85, 3082, 1876, 674, 3932, 1101, + 3644, 1344, 1943, 2401, 390, 3835, 1048, 2572, 1541, 1133, 3075, 3584, 308, 2889, 1065, 1869, 601, 3783, 282, 1181, 736, 3312, 2368, 1126, 3383, 1675, 2734, 1426, 628, 2873, 1317, 843, 2717, 2048, 1004, 2536, 333, 1782, 3295, 1517, 219, 2153, 815, 3502, 1579, 2268, 987, 3409, 1780, 4018, 354, 665, 3914, 47, 1956, 456, 1006, 2010, 3406, 1130, 3621, 2894, 1549, 3092, + 2485, 640, 3993, 3179, 1270, 3436, 585, 1925, 3757, 2304, 136, 1976, 1486, 646, 3520, 50, 3155, 1637, 2435, 3522, 1937, 2756, 3748, 661, 2224, 58, 3230, 2357, 1830, 3892, 170, 3607, 1447, 3949, 190, 3392, 1336, 584, 4010, 918, 3016, 3670, 1155, 2406, 52, 1304, 3009, 607, 2085, 2699, 3205, 1848, 2291, 3402, 2764, 3865, 3048, 2508, 735, 2710, 443, 2341, 897, 263, + 1785, 2769, 983, 56, 2197, 1685, 2703, 202, 2944, 810, 3377, 2626, 3787, 3047, 2055, 1236, 2752, 2122, 945, 3093, 96, 1624, 439, 3014, 1388, 4015, 977, 448, 3506, 1098, 2242, 3026, 506, 2361, 2952, 1862, 3619, 2790, 1992, 2483, 525, 1868, 2652, 4093, 1998, 3595, 2478, 3816, 122, 1412, 929, 3716, 1166, 1648, 813, 1300, 199, 1489, 3998, 1771, 1310, 3808, 2052, 3423, + 434, 3712, 1625, 3558, 2955, 853, 4019, 1348, 3511, 1732, 1246, 487, 934, 1672, 2510, 3965, 788, 3711, 396, 1369, 4090, 1055, 2603, 1879, 3528, 2518, 2067, 3005, 1516, 2588, 751, 1740, 3418, 1131, 1576, 686, 2296, 1118, 18, 3263, 1365, 3401, 294, 737, 3177, 410, 867, 1633, 2963, 3579, 2375, 252, 2881, 479, 2471, 3576, 2180, 3306, 332, 2255, 3035, 41, 2648, 1396, + 2929, 2230, 1219, 2512, 446, 2008, 3189, 2388, 626, 2164, 2831, 4047, 2376, 174, 3272, 368, 1469, 3226, 2578, 1991, 2874, 2263, 3681, 876, 188, 1239, 683, 3776, 226, 3183, 4083, 2148, 63, 2649, 3859, 299, 3086, 3933, 1585, 2185, 3767, 988, 1707, 2908, 1407, 1844, 2771, 2245, 1161, 560, 1755, 3376, 2051, 4064, 3135, 1832, 652, 2853, 1051, 3649, 760, 3290, 1105, 3945, + 872, 154, 3207, 713, 3780, 1453, 281, 1087, 3695, 30, 3299, 1919, 1400, 3551, 1119, 1890, 2314, 618, 1703, 3428, 724, 295, 3146, 1557, 3341, 2896, 1683, 2723, 1974, 1017, 541, 1380, 3720, 804, 3280, 2082, 997, 2567, 777, 2961, 213, 2707, 2328, 3632, 1025, 3891, 3304, 255, 4003, 3108, 2587, 1323, 743, 1479, 105, 1013, 3901, 1618, 2044, 2627, 1465, 1846, 576, 1994, + 2560, 3521, 1742, 2118, 2800, 3404, 1783, 2609, 2968, 1582, 1022, 412, 2713, 687, 2976, 3857, 2761, 3620, 62, 1108, 3844, 1340, 2100, 540, 2345, 3925, 405, 3457, 1319, 2468, 3362, 2815, 1867, 2372, 1281, 1714, 3690, 482, 3498, 1842, 1285, 3994, 558, 2039, 81, 2499, 678, 1481, 1923, 964, 12, 3824, 2980, 2205, 2762, 3432, 2398, 181, 3247, 462, 4094, 2350, 3589, 3089, + 1555, 1094, 4041, 247, 1267, 908, 3959, 2041, 732, 3860, 2343, 3132, 3769, 2144, 1621, 237, 912, 1329, 3025, 2146, 2642, 1775, 3721, 2746, 1121, 1953, 902, 2285, 130, 3671, 1659, 278, 3153, 522, 2721, 123, 2996, 1466, 2380, 377, 3231, 873, 1510, 3476, 3123, 1250, 2147, 3650, 2839, 3451, 2323, 1122, 3545, 379, 1765, 1218, 603, 3768, 1360, 938, 2885, 133, 1245, 363, + 2364, 554, 2743, 3344, 2474, 530, 3112, 169, 1297, 3430, 536, 1741, 98, 1043, 2574, 3253, 2246, 1854, 4022, 510, 3283, 204, 858, 3398, 36, 3118, 1478, 3794, 2986, 706, 2176, 922, 3559, 1097, 3976, 3322, 2149, 1160, 2810, 3883, 2007, 2513, 2953, 328, 1721, 3793, 422, 2566, 807, 329, 1638, 1967, 648, 2520, 3727, 3109, 2116, 2927, 2491, 1939, 3365, 1709, 2728, 3815, + 2037, 3120, 831, 1405, 1896, 3592, 1622, 2369, 2864, 2151, 1107, 2542, 3532, 1410, 3917, 427, 3568, 709, 2509, 1503, 1037, 2973, 2436, 1604, 4035, 2594, 563, 1819, 2659, 1234, 4004, 2565, 1511, 2273, 1823, 336, 882, 3772, 575, 1628, 171, 3570, 1120, 2260, 2716, 935, 3064, 1806, 1342, 3144, 3900, 2744, 3296, 985, 1546, 238, 896, 1663, 305, 3660, 695, 2213, 960, 3407, + 144, 1795, 3894, 2267, 51, 2708, 1023, 3818, 366, 1821, 4087, 2985, 755, 2057, 2912, 949, 1583, 2774, 231, 3447, 2258, 3866, 1982, 672, 1225, 2077, 3320, 1062, 370, 3241, 1968, 7, 3068, 681, 3631, 2573, 1567, 3175, 2321, 1067, 3070, 722, 1856, 3744, 642, 1471, 4084, 131, 3514, 2443, 531, 1227, 155, 2265, 4024, 2658, 3326, 3910, 1168, 3078, 1530, 3956, 489, 1424, + 3647, 1203, 420, 2924, 3755, 719, 3248, 1376, 3067, 890, 196, 1559, 3269, 270, 2432, 1885, 3212, 1164, 3778, 1752, 579, 1338, 344, 3585, 3017, 288, 3658, 2371, 3882, 1691, 611, 2789, 3809, 1339, 389, 2950, 2015, 59, 3548, 2751, 2158, 4011, 1352, 29, 3388, 2370, 2812, 1946, 954, 2110, 1558, 2947, 3573, 1909, 1326, 679, 1853, 2312, 551, 2702, 33, 2414, 3209, 2824, + 2547, 2143, 3379, 966, 1492, 1979, 2479, 463, 2194, 3657, 2738, 2318, 1261, 3713, 604, 4002, 11, 2192, 2967, 919, 2607, 3369, 2837, 1676, 2539, 984, 1568, 93, 2901, 1318, 3538, 1041, 2216, 1756, 3454, 1030, 4050, 1402, 798, 1723, 311, 3277, 2546, 2886, 2043, 461, 1206, 3677, 361, 3260, 3988, 809, 2605, 470, 3007, 3517, 102, 3221, 1398, 2062, 3611, 1134, 1928, 865, + 4060, 621, 1710, 2606, 3510, 317, 4017, 1682, 3329, 1159, 1940, 654, 3461, 1789, 1015, 2691, 1455, 3599, 374, 1947, 4069, 71, 2126, 763, 3961, 2278, 3161, 1997, 824, 2623, 2080, 244, 3257, 780, 2732, 2308, 545, 3351, 2476, 3806, 1204, 588, 1591, 963, 3610, 1699, 754, 3049, 2651, 1106, 65, 2221, 1644, 3821, 1100, 2463, 1614, 3801, 965, 2965, 715, 3394, 1593, 212, +}; + +#endif /* BLUE_NOISE_64X64_H */ diff --git a/gfx/cairo/libpixman/src/dither/make-blue-noise.c b/gfx/cairo/libpixman/src/dither/make-blue-noise.c new file mode 100644 index 0000000000..f9974b4d44 --- /dev/null +++ b/gfx/cairo/libpixman/src/dither/make-blue-noise.c @@ -0,0 +1,679 @@ +/* Blue noise generation using the void-and-cluster method as described in + * + * The void-and-cluster method for dither array generation + * Ulichney, Robert A (1993) + * + * http://cv.ulichney.com/papers/1993-void-cluster.pdf + * + * Note that running with openmp (-DUSE_OPENMP) will trigger additional + * randomness due to computing reductions in parallel, and is not recommended + * unless generating very large dither arrays. + */ + +#include <assert.h> +#include <stdlib.h> +#include <stdint.h> +#include <math.h> +#include <stdio.h> + +/* Booleans and utility functions */ + +#ifndef TRUE +# define TRUE 1 +#endif + +#ifndef FALSE +# define FALSE 0 +#endif + +typedef int bool_t; + +int +imin (int x, int y) +{ + return x < y ? x : y; +} + +/* Memory allocation */ +void * +malloc_abc (unsigned int a, unsigned int b, unsigned int c) +{ + if (a >= INT32_MAX / b) + return NULL; + else if (a * b >= INT32_MAX / c) + return NULL; + else + return malloc (a * b * c); +} + +/* Random number generation */ +typedef uint32_t xorwow_state_t[5]; + +uint32_t +xorwow_next (xorwow_state_t *state) +{ + uint32_t s = (*state)[0], + t = (*state)[3]; + (*state)[3] = (*state)[2]; + (*state)[2] = (*state)[1]; + (*state)[1] = s; + + t ^= t >> 2; + t ^= t << 1; + t ^= s ^ (s << 4); + + (*state)[0] = t; + (*state)[4] += 362437; + + return t + (*state)[4]; +} + +float +xorwow_float (xorwow_state_t *s) +{ + return (xorwow_next (s) >> 9) / (float)((1 << 23) - 1); +} + +/* Floating point matrices + * + * Used to cache the cluster sizes. + */ +typedef struct matrix_t { + int width; + int height; + float *buffer; +} matrix_t; + +bool_t +matrix_init (matrix_t *matrix, int width, int height) +{ + float *buffer; + + if (!matrix) + return FALSE; + + buffer = malloc_abc (width, height, sizeof (float)); + + if (!buffer) + return FALSE; + + matrix->buffer = buffer; + matrix->width = width; + matrix->height = height; + + return TRUE; +} + +bool_t +matrix_copy (matrix_t *dst, matrix_t const *src) +{ + float *srcbuf = src->buffer, + *srcend = src->buffer + src->width * src->height, + *dstbuf = dst->buffer; + + if (dst->width != src->width || dst->height != src->height) + return FALSE; + + while (srcbuf < srcend) + *dstbuf++ = *srcbuf++; + + return TRUE; +} + +float * +matrix_get (matrix_t *matrix, int x, int y) +{ + return &matrix->buffer[y * matrix->width + x]; +} + +void +matrix_destroy (matrix_t *matrix) +{ + free (matrix->buffer); +} + +/* Binary patterns */ +typedef struct pattern_t { + int width; + int height; + bool_t *buffer; +} pattern_t; + +bool_t +pattern_init (pattern_t *pattern, int width, int height) +{ + bool_t *buffer; + + if (!pattern) + return FALSE; + + buffer = malloc_abc (width, height, sizeof (bool_t)); + + if (!buffer) + return FALSE; + + pattern->buffer = buffer; + pattern->width = width; + pattern->height = height; + + return TRUE; +} + +bool_t +pattern_copy (pattern_t *dst, pattern_t const *src) +{ + bool_t *srcbuf = src->buffer, + *srcend = src->buffer + src->width * src->height, + *dstbuf = dst->buffer; + + if (dst->width != src->width || dst->height != src->height) + return FALSE; + + while (srcbuf < srcend) + *dstbuf++ = *srcbuf++; + + return TRUE; +} + +bool_t * +pattern_get (pattern_t *pattern, int x, int y) +{ + return &pattern->buffer[y * pattern->width + x]; +} + +void +pattern_fill_white_noise (pattern_t *pattern, float fraction, + xorwow_state_t *s) +{ + bool_t *buffer = pattern->buffer; + bool_t *end = buffer + (pattern->width * pattern->height); + + while (buffer < end) + *buffer++ = xorwow_float (s) < fraction; +} + +void +pattern_destroy (pattern_t *pattern) +{ + free (pattern->buffer); +} + +/* Dither arrays */ +typedef struct array_t { + int width; + int height; + uint32_t *buffer; +} array_t; + +bool_t +array_init (array_t *array, int width, int height) +{ + uint32_t *buffer; + + if (!array) + return FALSE; + + buffer = malloc_abc (width, height, sizeof (uint32_t)); + + if (!buffer) + return FALSE; + + array->buffer = buffer; + array->width = width; + array->height = height; + + return TRUE; +} + +uint32_t * +array_get (array_t *array, int x, int y) +{ + return &array->buffer[y * array->width + x]; +} + +bool_t +array_save_ppm (array_t *array, const char *filename) +{ + FILE *f = fopen(filename, "wb"); + + int i = 0; + int bpp = 2; + uint8_t buffer[1024]; + + if (!f) + return FALSE; + + if (array->width * array->height - 1 < 256) + bpp = 1; + + fprintf(f, "P5 %d %d %d\n", array->width, array->height, + array->width * array->height - 1); + while (i < array->width * array->height) + { + int j = 0; + for (; j < 1024 / bpp && j < array->width * array->height; ++j) + { + uint32_t v = array->buffer[i + j]; + if (bpp == 2) + { + buffer[2 * j] = v & 0xff; + buffer[2 * j + 1] = (v & 0xff00) >> 8; + } else { + buffer[j] = v; + } + } + + fwrite((void *)buffer, bpp, j, f); + i += j; + } + + if (fclose(f) != 0) + return FALSE; + + return TRUE; +} + +bool_t +array_save (array_t *array, const char *filename) +{ + int x, y; + FILE *f = fopen(filename, "wb"); + + if (!f) + return FALSE; + + fprintf (f, +"/* WARNING: This file is generated by make-blue-noise.c\n" +" * Please edit that file instead of this one.\n" +" */\n" +"\n" +"#ifndef BLUE_NOISE_%dX%d_H\n" +"#define BLUE_NOISE_%dX%d_H\n" +"\n" +"#include <stdint.h>\n" +"\n", array->width, array->height, array->width, array->height); + + fprintf (f, "static const uint16_t dither_blue_noise_%dx%d[%d] = {\n", + array->width, array->height, array->width * array->height); + + for (y = 0; y < array->height; ++y) + { + fprintf (f, " "); + for (x = 0; x < array->width; ++x) + { + if (x != 0) + fprintf (f, ", "); + + fprintf (f, "%d", *array_get (array, x, y)); + } + + fprintf (f, ",\n"); + } + fprintf (f, "};\n"); + + fprintf (f, "\n#endif /* BLUE_NOISE_%dX%d_H */\n", + array->width, array->height); + + if (fclose(f) != 0) + return FALSE; + + return TRUE; +} + +void +array_destroy (array_t *array) +{ + free (array->buffer); +} + +/* Dither array generation */ +bool_t +compute_cluster_sizes (pattern_t *pattern, matrix_t *matrix) +{ + int width = pattern->width, + height = pattern->height; + + if (matrix->width != width || matrix->height != height) + return FALSE; + + int px, py, qx, qy, dx, dy; + float tsqsi = 2.f * 1.5f * 1.5f; + +#ifdef USE_OPENMP +#pragma omp parallel for default (none) \ + private (py, px, qy, qx, dx, dy) \ + shared (height, width, pattern, matrix, tsqsi) +#endif + for (py = 0; py < height; ++py) + { + for (px = 0; px < width; ++px) + { + bool_t pixel = *pattern_get (pattern, px, py); + float dist = 0.f; + + for (qx = 0; qx < width; ++qx) + { + dx = imin (abs (qx - px), width - abs (qx - px)); + dx = dx * dx; + + for (qy = 0; qy < height; ++qy) + { + dy = imin (abs (qy - py), height - abs (qy - py)); + dy = dy * dy; + + dist += (pixel == *pattern_get (pattern, qx, qy)) + * expf (- (dx + dy) / tsqsi); + } + } + + *matrix_get (matrix, px, py) = dist; + } + } + + return TRUE; +} + +bool_t +swap_pixel (pattern_t *pattern, matrix_t *matrix, int x, int y) +{ + int width = pattern->width, + height = pattern->height; + + bool_t new; + + float f, + dist = 0.f, + tsqsi = 2.f * 1.5f * 1.5f; + + int px, py, dx, dy; + bool_t b; + + new = !*pattern_get (pattern, x, y); + *pattern_get (pattern, x, y) = new; + + if (matrix->width != width || matrix->height != height) + return FALSE; + + +#ifdef USE_OPENMP +#pragma omp parallel for reduction (+:dist) default (none) \ + private (px, py, dx, dy, b, f) \ + shared (x, y, width, height, pattern, matrix, new, tsqsi) +#endif + for (py = 0; py < height; ++py) + { + dy = imin (abs (py - y), height - abs (py - y)); + dy = dy * dy; + + for (px = 0; px < width; ++px) + { + dx = imin (abs (px - x), width - abs (px - x)); + dx = dx * dx; + + b = (*pattern_get (pattern, px, py) == new); + f = expf (- (dx + dy) / tsqsi); + *matrix_get (matrix, px, py) += (2 * b - 1) * f; + + dist += b * f; + } + } + + *matrix_get (matrix, x, y) = dist; + return TRUE; +} + +void +largest_cluster (pattern_t *pattern, matrix_t *matrix, + bool_t pixel, int *xmax, int *ymax) +{ + int width = pattern->width, + height = pattern->height; + + int x, y; + + float vmax = -INFINITY; + +#ifdef USE_OPENMP +#pragma omp parallel default (none) \ + private (x, y) \ + shared (height, width, pattern, matrix, pixel, xmax, ymax, vmax) +#endif + { + int xbest = -1, + ybest = -1; + +#ifdef USE_OPENMP + float vbest = -INFINITY; + +#pragma omp for reduction (max: vmax) collapse (2) +#endif + for (y = 0; y < height; ++y) + { + for (x = 0; x < width; ++x) + { + if (*pattern_get (pattern, x, y) != pixel) + continue; + + if (*matrix_get (matrix, x, y) > vmax) + { + vmax = *matrix_get (matrix, x, y); +#ifdef USE_OPENMP + vbest = vmax; +#endif + xbest = x; + ybest = y; + } + } + } + +#ifdef USE_OPENMP +#pragma omp barrier +#pragma omp critical + { + if (vmax == vbest) + { + *xmax = xbest; + *ymax = ybest; + } + } +#else + *xmax = xbest; + *ymax = ybest; +#endif + } + + assert (vmax > -INFINITY); +} + +void +generate_initial_binary_pattern (pattern_t *pattern, matrix_t *matrix) +{ + int xcluster = 0, + ycluster = 0, + xvoid = 0, + yvoid = 0; + + for (;;) + { + largest_cluster (pattern, matrix, TRUE, &xcluster, &ycluster); + assert (*pattern_get (pattern, xcluster, ycluster) == TRUE); + swap_pixel (pattern, matrix, xcluster, ycluster); + + largest_cluster (pattern, matrix, FALSE, &xvoid, &yvoid); + assert (*pattern_get (pattern, xvoid, yvoid) == FALSE); + swap_pixel (pattern, matrix, xvoid, yvoid); + + if (xcluster == xvoid && ycluster == yvoid) + return; + } +} + +bool_t +generate_dither_array (array_t *array, + pattern_t const *prototype, matrix_t const *matrix, + pattern_t *temp_pattern, matrix_t *temp_matrix) +{ + int width = prototype->width, + height = prototype->height; + + int x, y, rank; + + int initial_rank = 0; + + if (array->width != width || array->height != height) + return FALSE; + + // Make copies of the prototype and associated sizes matrix since we will + // trash them + if (!pattern_copy (temp_pattern, prototype)) + return FALSE; + + if (!matrix_copy (temp_matrix, matrix)) + return FALSE; + + // Compute initial rank + for (y = 0; y < height; ++y) + { + for (x = 0; x < width; ++x) + { + if (*pattern_get (temp_pattern, x, y)) + initial_rank += 1; + + *array_get (array, x, y) = 0; + } + } + + // Phase 1 + for (rank = initial_rank; rank > 0; --rank) + { + largest_cluster (temp_pattern, temp_matrix, TRUE, &x, &y); + swap_pixel (temp_pattern, temp_matrix, x, y); + *array_get (array, x, y) = rank - 1; + } + + // Make copies again for phases 2 & 3 + if (!pattern_copy (temp_pattern, prototype)) + return FALSE; + + if (!matrix_copy (temp_matrix, matrix)) + return FALSE; + + // Phase 2 & 3 + for (rank = initial_rank; rank < width * height; ++rank) + { + largest_cluster (temp_pattern, temp_matrix, FALSE, &x, &y); + swap_pixel (temp_pattern, temp_matrix, x, y); + *array_get (array, x, y) = rank; + } + + return TRUE; +} + +bool_t +generate (int size, xorwow_state_t *s, + char const *c_filename, char const *ppm_filename) +{ + bool_t ok = TRUE; + + pattern_t prototype, temp_pattern; + array_t array; + matrix_t matrix, temp_matrix; + + printf ("Generating %dx%d blue noise...\n", size, size); + + if (!pattern_init (&prototype, size, size)) + return FALSE; + + if (!pattern_init (&temp_pattern, size, size)) + { + pattern_destroy (&prototype); + return FALSE; + } + + if (!matrix_init (&matrix, size, size)) + { + pattern_destroy (&temp_pattern); + pattern_destroy (&prototype); + return FALSE; + } + + if (!matrix_init (&temp_matrix, size, size)) + { + matrix_destroy (&matrix); + pattern_destroy (&temp_pattern); + pattern_destroy (&prototype); + return FALSE; + } + + if (!array_init (&array, size, size)) + { + matrix_destroy (&temp_matrix); + matrix_destroy (&matrix); + pattern_destroy (&temp_pattern); + pattern_destroy (&prototype); + return FALSE; + } + + printf("Filling initial binary pattern with white noise...\n"); + pattern_fill_white_noise (&prototype, .1, s); + + printf("Initializing cluster sizes...\n"); + if (!compute_cluster_sizes (&prototype, &matrix)) + { + fprintf (stderr, "Error while computing cluster sizes\n"); + ok = FALSE; + goto out; + } + + printf("Generating initial binary pattern...\n"); + generate_initial_binary_pattern (&prototype, &matrix); + + printf("Generating dither array...\n"); + if (!generate_dither_array (&array, &prototype, &matrix, + &temp_pattern, &temp_matrix)) + { + fprintf (stderr, "Error while generating dither array\n"); + ok = FALSE; + goto out; + } + + printf("Saving dither array...\n"); + if (!array_save (&array, c_filename)) + { + fprintf (stderr, "Error saving dither array\n"); + ok = FALSE; + goto out; + } + +#if SAVE_PPM + if (!array_save_ppm (&array, ppm_filename)) + { + fprintf (stderr, "Error saving dither array PPM\n"); + ok = FALSE; + goto out; + } +#else + (void)ppm_filename; +#endif + + printf("All done!\n"); + +out: + array_destroy (&array); + matrix_destroy (&temp_matrix); + matrix_destroy (&matrix); + pattern_destroy (&temp_pattern); + pattern_destroy (&prototype); + return ok; +} + +int +main (void) +{ + xorwow_state_t s = {1185956906, 12385940, 983948, 349208051, 901842}; + + if (!generate (64, &s, "blue-noise-64x64.h", "blue-noise-64x64.ppm")) + return -1; + + return 0; +} diff --git a/gfx/cairo/libpixman/src/loongson-mmintrin.h b/gfx/cairo/libpixman/src/loongson-mmintrin.h new file mode 100644 index 0000000000..0e79e86484 --- /dev/null +++ b/gfx/cairo/libpixman/src/loongson-mmintrin.h @@ -0,0 +1,412 @@ +/* The gcc-provided loongson intrinsic functions are way too fucking broken + * to be of any use, otherwise I'd use them. + * + * - The hardware instructions are very similar to MMX or iwMMXt. Certainly + * close enough that they could have implemented the _mm_*-style intrinsic + * interface and had a ton of optimized code available to them. Instead they + * implemented something much, much worse. + * + * - pshuf takes a dead first argument, causing extra instructions to be + * generated. + * + * - There are no 64-bit shift or logical intrinsics, which means you have + * to implement them with inline assembly, but this is a nightmare because + * gcc doesn't understand that the integer vector datatypes are actually in + * floating-point registers, so you end up with braindead code like + * + * punpcklwd $f9,$f9,$f5 + * dmtc1 v0,$f8 + * punpcklwd $f19,$f19,$f5 + * dmfc1 t9,$f9 + * dmtc1 v0,$f9 + * dmtc1 t9,$f20 + * dmfc1 s0,$f19 + * punpcklbh $f20,$f20,$f2 + * + * where crap just gets copied back and forth between integer and floating- + * point registers ad nauseum. + * + * Instead of trying to workaround the problems from these crap intrinsics, I + * just implement the _mm_* intrinsics needed for pixman-mmx.c using inline + * assembly. + */ + +#include <stdint.h> + +/* vectors are stored in 64-bit floating-point registers */ +typedef double __m64; +/* having a 32-bit datatype allows us to use 32-bit loads in places like load8888 */ +typedef float __m32; + +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_setzero_si64 (void) +{ + return 0.0; +} + +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_add_pi16 (__m64 __m1, __m64 __m2) +{ + __m64 ret; + asm("paddh %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + return ret; +} + +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_add_pi32 (__m64 __m1, __m64 __m2) +{ + __m64 ret; + asm("paddw %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + return ret; +} + +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_adds_pu16 (__m64 __m1, __m64 __m2) +{ + __m64 ret; + asm("paddush %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + return ret; +} + +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_adds_pu8 (__m64 __m1, __m64 __m2) +{ + __m64 ret; + asm("paddusb %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + return ret; +} + +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_and_si64 (__m64 __m1, __m64 __m2) +{ + __m64 ret; + asm("and %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + return ret; +} + +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmpeq_pi32 (__m64 __m1, __m64 __m2) +{ + __m64 ret; + asm("pcmpeqw %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + return ret; +} + +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_empty (void) +{ + +} + +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_madd_pi16 (__m64 __m1, __m64 __m2) +{ + __m64 ret; + asm("pmaddhw %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + return ret; +} + +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mulhi_pu16 (__m64 __m1, __m64 __m2) +{ + __m64 ret; + asm("pmulhuh %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + return ret; +} + +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mullo_pi16 (__m64 __m1, __m64 __m2) +{ + __m64 ret; + asm("pmullh %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + return ret; +} + +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_or_si64 (__m64 __m1, __m64 __m2) +{ + __m64 ret; + asm("or %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + return ret; +} + +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_packs_pu16 (__m64 __m1, __m64 __m2) +{ + __m64 ret; + asm("packushb %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + return ret; +} + +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_packs_pi32 (__m64 __m1, __m64 __m2) +{ + __m64 ret; + asm("packsswh %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + return ret; +} + +#define _MM_SHUFFLE(fp3,fp2,fp1,fp0) \ + (((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | (fp0)) +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_set_pi16 (uint16_t __w3, uint16_t __w2, uint16_t __w1, uint16_t __w0) +{ + if (__builtin_constant_p (__w3) && + __builtin_constant_p (__w2) && + __builtin_constant_p (__w1) && + __builtin_constant_p (__w0)) + { + uint64_t val = ((uint64_t)__w3 << 48) + | ((uint64_t)__w2 << 32) + | ((uint64_t)__w1 << 16) + | ((uint64_t)__w0 << 0); + return *(__m64 *)&val; + } + else if (__w3 == __w2 && __w2 == __w1 && __w1 == __w0) + { + /* TODO: handle other cases */ + uint64_t val = __w3; + uint64_t imm = _MM_SHUFFLE (0, 0, 0, 0); + __m64 ret; + asm("pshufh %0, %1, %2\n\t" + : "=f" (ret) + : "f" (*(__m64 *)&val), "f" (*(__m64 *)&imm) + ); + return ret; + } else { + uint64_t val = ((uint64_t)__w3 << 48) + | ((uint64_t)__w2 << 32) + | ((uint64_t)__w1 << 16) + | ((uint64_t)__w0 << 0); + return *(__m64 *)&val; + } +} + +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_set_pi32 (unsigned __i1, unsigned __i0) +{ + if (__builtin_constant_p (__i1) && + __builtin_constant_p (__i0)) + { + uint64_t val = ((uint64_t)__i1 << 32) + | ((uint64_t)__i0 << 0); + return *(__m64 *)&val; + } + else if (__i1 == __i0) + { + uint64_t imm = _MM_SHUFFLE (1, 0, 1, 0); + __m64 ret; + asm("pshufh %0, %1, %2\n\t" + : "=f" (ret) + : "f" (*(__m32 *)&__i1), "f" (*(__m64 *)&imm) + ); + return ret; + } else { + uint64_t val = ((uint64_t)__i1 << 32) + | ((uint64_t)__i0 << 0); + return *(__m64 *)&val; + } +} +#undef _MM_SHUFFLE + +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_shuffle_pi16 (__m64 __m, int64_t __n) +{ + __m64 ret; + asm("pshufh %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m), "f" (*(__m64 *)&__n) + ); + return ret; +} + +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_slli_pi16 (__m64 __m, int64_t __count) +{ + __m64 ret; + asm("psllh %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m), "f" (*(__m64 *)&__count) + ); + return ret; +} +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_slli_si64 (__m64 __m, int64_t __count) +{ + __m64 ret; + asm("dsll %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m), "f" (*(__m64 *)&__count) + ); + return ret; +} + +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_srli_pi16 (__m64 __m, int64_t __count) +{ + __m64 ret; + asm("psrlh %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m), "f" (*(__m64 *)&__count) + ); + return ret; +} + +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_srli_pi32 (__m64 __m, int64_t __count) +{ + __m64 ret; + asm("psrlw %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m), "f" (*(__m64 *)&__count) + ); + return ret; +} + +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_srli_si64 (__m64 __m, int64_t __count) +{ + __m64 ret; + asm("dsrl %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m), "f" (*(__m64 *)&__count) + ); + return ret; +} + +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_sub_pi16 (__m64 __m1, __m64 __m2) +{ + __m64 ret; + asm("psubh %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + return ret; +} + +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_unpackhi_pi8 (__m64 __m1, __m64 __m2) +{ + __m64 ret; + asm("punpckhbh %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + return ret; +} + +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_unpackhi_pi16 (__m64 __m1, __m64 __m2) +{ + __m64 ret; + asm("punpckhhw %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + return ret; +} + +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_unpacklo_pi8 (__m64 __m1, __m64 __m2) +{ + __m64 ret; + asm("punpcklbh %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + return ret; +} + +/* Since punpcklbh doesn't care about the high 32-bits, we use the __m32 datatype which + * allows load8888 to use 32-bit loads */ +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_unpacklo_pi8_f (__m32 __m1, __m64 __m2) +{ + __m64 ret; + asm("punpcklbh %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + return ret; +} + +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_unpacklo_pi16 (__m64 __m1, __m64 __m2) +{ + __m64 ret; + asm("punpcklhw %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + return ret; +} + +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_xor_si64 (__m64 __m1, __m64 __m2) +{ + __m64 ret; + asm("xor %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + return ret; +} + +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +loongson_extract_pi16 (__m64 __m, int64_t __pos) +{ + __m64 ret; + asm("pextrh %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m), "f" (*(__m64 *)&__pos) + ); + return ret; +} + +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +loongson_insert_pi16 (__m64 __m1, __m64 __m2, int64_t __pos) +{ + __m64 ret; + asm("pinsrh_%3 %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2), "i" (__pos) + ); + return ret; +} diff --git a/gfx/cairo/libpixman/src/make-combine.pl b/gfx/cairo/libpixman/src/make-combine.pl deleted file mode 100644 index 210a5da12b..0000000000 --- a/gfx/cairo/libpixman/src/make-combine.pl +++ /dev/null @@ -1,86 +0,0 @@ -$usage = "Usage: combine.pl { 8 | 16 } < pixman-combine.c.template"; - -$#ARGV == 0 or die $usage; - -# Get the component size. -$size = int($ARGV[0]); -$size == 8 or $size == 16 or die $usage; - -$pixel_size = $size * 4; -$half_pixel_size = $size * 2; - -sub mask { - my $str = shift; - my $suffix; - $suffix = "ULL" if $size > 8; - - return "0x" . $str . $suffix; -} - -# Generate mask strings. -$nibbles = $size / 4; -$mask = "f" x $nibbles; -$zero_mask = "0" x $nibbles; -$one_half = "8" . "0" x ($nibbles - 1); - -print "/* WARNING: This file is generated by combine.pl from combine.inc.\n"; -print " Please edit one of those files rather than this one. */\n"; -print "\n"; - -print "#line 1 \"pixman-combine.c.template\"\n"; - -$mask_ = mask($mask); -$one_half_ = mask($one_half); -$g_mask = mask($mask . $zero_mask); -$b_mask = mask($mask . $zero_mask x 2); -$a_mask = mask($mask . $zero_mask x 3); -$rb_mask = mask($mask . $zero_mask . $mask); -$ag_mask = mask($mask . $zero_mask . $mask . $zero_mask); -$rb_one_half = mask($one_half . $zero_mask . $one_half); -$rb_mask_plus_one = mask("1" . $zero_mask x 2 . "1" . $zero_mask); - -while (<STDIN>) { - # Mask and 1/2 value for a single component. - s/#define COMPONENT_SIZE\b/$& $size/; - s/#define MASK\b/$& $mask_/; - s/#define ONE_HALF\b/$& $one_half_/; - - # Shifts and masks for green, blue, and alpha. - s/#define G_SHIFT\b/$& $size/; - s/#define R_SHIFT\b/$& $size * 2/; - s/#define A_SHIFT\b/$& $size * 3/; - s/#define G_MASK\b/$& $g_mask/; - s/#define R_MASK\b/$& $b_mask/; - s/#define A_MASK\b/$& $a_mask/; - - # Special values for dealing with red + blue at the same time. - s/#define RB_MASK\b/$& $rb_mask/; - s/#define AG_MASK\b/$& $ag_mask/; - s/#define RB_ONE_HALF\b/$& $rb_one_half/; - s/#define RB_MASK_PLUS_ONE\b/$& $rb_mask_plus_one/; - - # Add 32/64 suffix to combining function types. - s/\bCombineFunc\b/CombineFunc$pixel_size/; - s/\bFbComposeFunctions\b/FbComposeFunctions$pixel_size/; - s/combine_width/combine_$pixel_size/; - s/_pixman_setup_combiner_functions_width/_pixman_setup_combiner_functions_$pixel_size/; - s/UNc/UN$size/g; - s/ALPHA_c/ALPHA_$size/g; - s/RED_c/RED_$size/g; - s/GREEN_c/GREEN_$size/g; - s/BLUE_c/BLUE_$size/g; - - # Convert comp*_t values into the appropriate real types. - s/comp1_t/uint${size}_t/g; - s/comp2_t/uint${half_pixel_size}_t/g; - s/comp4_t/uint${pixel_size}_t/g; - - # Change the function table name for the 64-bit version. - s/pixman_composeFunctions/pixman_composeFunctions64/ if $size == 16; - - # Change the header for the 64-bit version - s/pixman-combine.h/pixman-combine64.h/ if $size == 16; - s/pixman-combine.h/pixman-combine32.h/ if $size == 8; - - print; -} diff --git a/gfx/cairo/libpixman/src/meson.build b/gfx/cairo/libpixman/src/meson.build new file mode 100644 index 0000000000..f48357f261 --- /dev/null +++ b/gfx/cairo/libpixman/src/meson.build @@ -0,0 +1,129 @@ +# Copyright © 2018 Intel Corporation + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +config_h = configure_file( + configuration : config, + output : 'config.h' +) + +version_h = configure_file( + configuration : version_conf, + input : 'pixman-version.h.in', + output : 'pixman-version.h', + install_dir : join_paths(get_option('prefix'), get_option('includedir'), 'pixman-1') +) + +libpixman_extra_cargs = [] +if cc.has_function_attribute('dllexport') + libpixman_extra_cargs = ['-DPIXMAN_API=__declspec(dllexport)'] +endif + +pixman_simd_libs = [] +simds = [ + # the mmx library can be compiled with mmx on x86/x86_64, iwmmxt on + # some arm cores, or loongson mmi on loongson mips systems. The + # libraries will all have the same name, "pixman-mmx", but there is + # no chance of more than one version being built in the same build + # because no system could have mmx, iwmmxt, and mmi, and it + # simplifies the build logic to give them the same name. + ['mmx', have_mmx, mmx_flags, []], + ['mmx', have_loongson_mmi, loongson_mmi_flags, []], + ['mmx', have_iwmmxt, iwmmxt_flags, []], + + ['sse2', have_sse2, sse2_flags, []], + ['ssse3', have_ssse3, ssse3_flags, []], + ['vmx', have_vmx, vmx_flags, []], + ['arm-simd', have_armv6_simd, [], + ['pixman-arm-simd-asm.S', 'pixman-arm-simd-asm-scaled.S']], + ['arm-neon', have_neon, [], + ['pixman-arm-neon-asm.S', 'pixman-arm-neon-asm-bilinear.S']], + ['mips-dspr2', have_mips_dspr2, mips_dspr2_flags, + ['pixman-mips-dspr2-asm.S', 'pixman-mips-memcpy-asm.S']], +] + +foreach simd : simds + if simd[1] + name = 'pixman-' + simd[0] + pixman_simd_libs += static_library( + name, + [name + '.c', config_h, version_h, simd[3]], + c_args : simd[2] + ) + endif +endforeach + +pixman_files = files( + 'pixman.c', + 'pixman-access.c', + 'pixman-access-accessors.c', + 'pixman-bits-image.c', + 'pixman-combine32.c', + 'pixman-combine-float.c', + 'pixman-conical-gradient.c', + 'pixman-filter.c', + 'pixman-x86.c', + 'pixman-mips.c', + 'pixman-arm.c', + 'pixman-ppc.c', + 'pixman-edge.c', + 'pixman-edge-accessors.c', + 'pixman-fast-path.c', + 'pixman-glyph.c', + 'pixman-general.c', + 'pixman-gradient-walker.c', + 'pixman-image.c', + 'pixman-implementation.c', + 'pixman-linear-gradient.c', + 'pixman-matrix.c', + 'pixman-noop.c', + 'pixman-radial-gradient.c', + 'pixman-region16.c', + 'pixman-region32.c', + 'pixman-solid-fill.c', + 'pixman-timer.c', + 'pixman-trap.c', + 'pixman-utils.c', +) + +# We cannot use 'link_with' or 'link_whole' because meson wont do the right +# thing for static archives. +_obs = [] +foreach l : pixman_simd_libs + _obs += l.extract_all_objects() +endforeach + +libpixman = library( + 'pixman-1', + [pixman_files, config_h, version_h], + objects : _obs, + c_args : libpixman_extra_cargs, + dependencies : [dep_m, dep_threads], + version : meson.project_version(), + install : true, +) + +inc_pixman = include_directories('.') + +idep_pixman = declare_dependency( + link_with: libpixman, + include_directories : inc_pixman, +) + +install_headers('pixman.h', subdir : 'pixman-1') diff --git a/gfx/cairo/libpixman/src/moz.build b/gfx/cairo/libpixman/src/moz.build index 7c3e900899..02e2582452 100644 --- a/gfx/cairo/libpixman/src/moz.build +++ b/gfx/cairo/libpixman/src/moz.build @@ -30,7 +30,6 @@ SOURCES += [ 'pixman-arm.c', 'pixman-bits-image.c', 'pixman-combine-float.c', - 'pixman-combine16.c', 'pixman-combine32.c', 'pixman-conical-gradient.c', 'pixman-edge-accessors.c', @@ -66,10 +65,7 @@ LOCAL_INCLUDES += [ ] if CONFIG['MOZ_USE_PTHREADS']: - DEFINES['HAVE_PTHREAD_SETSPECIFIC'] = True - -if CONFIG['_MSC_VER']: - DEFINES['PIXMAN_USE_XP_DLL_TLS_WORKAROUND'] = True + DEFINES['HAVE_PTHREADS'] = True DEFINES['PACKAGE'] = 'mozpixman' @@ -103,8 +99,8 @@ if use_mmx: if CONFIG['GNU_CC']: SOURCES['pixman-mmx.c'].flags += [ '-Winline', - '--param inline-unit-growth=10000', - '--param large-function-growth=10000', + '--param', 'inline-unit-growth=10000', + '--param', 'large-function-growth=10000', ] if use_sse2: @@ -130,30 +126,20 @@ if use_arm_neon_gcc: SOURCES['pixman-arm-neon.c'].flags += CONFIG['NEON_FLAGS'] # Suppress warnings in third-party code. -if CONFIG['_MSC_VER']: - CFLAGS += [ - '-wd4047', # different levels of indirection - '-wd4101', # unreferenced local variable - '-wd4133', # 'function' : incompatible types - '-wd4146', # unary minus operator applied to unsigned type - '-wd4311', # 'variable' : pointer truncation from 'type' to 'type' - ] - if CONFIG['GNU_CC'] or CONFIG['CLANG_CL']: CFLAGS += [ '-Wno-address', '-Wno-missing-field-initializers', '-Wno-sign-compare', + '-Wno-incompatible-pointer-types', '-Wno-unused', # too many unused warnings; ignore ] - if CONFIG['CLANG_CXX'] or CONFIG['CLANG_CL']: CFLAGS += [ '-Wno-incompatible-pointer-types', '-Wno-tautological-compare', '-Wno-tautological-constant-out-of-range-compare', ] - if CONFIG['CLANG_CL']: CFLAGS += [ '-Wno-unused-variable', diff --git a/gfx/cairo/libpixman/src/pixman-access.c b/gfx/cairo/libpixman/src/pixman-access.c index 00a02140ae..7c5ce783f9 100644 --- a/gfx/cairo/libpixman/src/pixman-access.c +++ b/gfx/cairo/libpixman/src/pixman-access.c @@ -68,14 +68,14 @@ #ifdef WORDS_BIGENDIAN #define FETCH_24(img,l,o) \ - ((READ (img, (((uint8_t *)(l)) + ((o) * 3) + 0)) << 16) | \ - (READ (img, (((uint8_t *)(l)) + ((o) * 3) + 1)) << 8) | \ - (READ (img, (((uint8_t *)(l)) + ((o) * 3) + 2)) << 0)) + ((uint32_t)(READ (img, (((uint8_t *)(l)) + ((o) * 3) + 0)) << 16) | \ + (uint32_t)(READ (img, (((uint8_t *)(l)) + ((o) * 3) + 1)) << 8) | \ + (uint32_t)(READ (img, (((uint8_t *)(l)) + ((o) * 3) + 2)) << 0)) #else #define FETCH_24(img,l,o) \ - ((READ (img, (((uint8_t *)(l)) + ((o) * 3) + 0)) << 0) | \ - (READ (img, (((uint8_t *)(l)) + ((o) * 3) + 1)) << 8) | \ - (READ (img, (((uint8_t *)(l)) + ((o) * 3) + 2)) << 16)) + ((uint32_t)(READ (img, (((uint8_t *)(l)) + ((o) * 3) + 0)) << 0) | \ + (uint32_t)(READ (img, (((uint8_t *)(l)) + ((o) * 3) + 1)) << 8) | \ + (uint32_t)(READ (img, (((uint8_t *)(l)) + ((o) * 3) + 2)) << 16)) #endif /* Store macros */ @@ -87,7 +87,7 @@ uint32_t *__d = ((uint32_t *)(l)) + ((o) >> 5); \ uint32_t __m, __v; \ \ - __m = 1 << (0x1f - ((o) & 0x1f)); \ + __m = 1U << (0x1f - ((o) & 0x1f)); \ __v = (v)? __m : 0; \ \ WRITE((img), __d, (READ((img), __d) & ~__m) | __v); \ @@ -100,7 +100,7 @@ uint32_t *__d = ((uint32_t *)(l)) + ((o) >> 5); \ uint32_t __m, __v; \ \ - __m = 1 << ((o) & 0x1f); \ + __m = 1U << ((o) & 0x1f); \ __v = (v)? __m : 0; \ \ WRITE((img), __d, (READ((img), __d) & ~__m) | __v); \ @@ -294,14 +294,14 @@ convert_pixel (pixman_format_code_t from, pixman_format_code_t to, uint32_t pixe } static force_inline uint32_t -convert_pixel_to_a8r8g8b8 (pixman_image_t *image, +convert_pixel_to_a8r8g8b8 (bits_image_t *image, pixman_format_code_t format, uint32_t pixel) { if (PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_GRAY || PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_COLOR) { - return image->bits.indexed->rgba[pixel]; + return image->indexed->rgba[pixel]; } else { @@ -332,7 +332,7 @@ convert_pixel_from_a8r8g8b8 (pixman_image_t *image, } static force_inline uint32_t -fetch_and_convert_pixel (pixman_image_t * image, +fetch_and_convert_pixel (bits_image_t * image, const uint8_t * bits, int offset, pixman_format_code_t format) @@ -417,7 +417,7 @@ convert_and_store_pixel (bits_image_t * image, #define MAKE_ACCESSORS(format) \ static void \ - fetch_scanline_ ## format (pixman_image_t *image, \ + fetch_scanline_ ## format (bits_image_t *image, \ int x, \ int y, \ int width, \ @@ -425,7 +425,7 @@ convert_and_store_pixel (bits_image_t * image, const uint32_t *mask) \ { \ uint8_t *bits = \ - (uint8_t *)(image->bits.bits + y * image->bits.rowstride); \ + (uint8_t *)(image->bits + y * image->rowstride); \ int i; \ \ for (i = 0; i < width; ++i) \ @@ -461,8 +461,8 @@ convert_and_store_pixel (bits_image_t * image, uint8_t *bits = \ (uint8_t *)(image->bits + line * image->rowstride); \ \ - return fetch_and_convert_pixel ((pixman_image_t *)image, \ - bits, offset, PIXMAN_ ## format); \ + return fetch_and_convert_pixel ( \ + image, bits, offset, PIXMAN_ ## format); \ } \ \ static const void *const __dummy__ ## format @@ -583,14 +583,14 @@ to_srgb (float f) } static void -fetch_scanline_a8r8g8b8_sRGB_float (pixman_image_t *image, +fetch_scanline_a8r8g8b8_sRGB_float (bits_image_t * image, int x, int y, int width, uint32_t * b, const uint32_t *mask) { - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; + const uint32_t *bits = image->bits + y * image->rowstride; const uint32_t *pixel = bits + x; const uint32_t *end = pixel + width; argb_t *buffer = (argb_t *)b; @@ -612,14 +612,14 @@ fetch_scanline_a8r8g8b8_sRGB_float (pixman_image_t *image, /* Expects a float buffer */ static void -fetch_scanline_a2r10g10b10_float (pixman_image_t *image, +fetch_scanline_a2r10g10b10_float (bits_image_t * image, int x, int y, int width, uint32_t * b, const uint32_t *mask) { - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; + const uint32_t *bits = image->bits + y * image->rowstride; const uint32_t *pixel = bits + x; const uint32_t *end = pixel + width; argb_t *buffer = (argb_t *)b; @@ -642,15 +642,57 @@ fetch_scanline_a2r10g10b10_float (pixman_image_t *image, } /* Expects a float buffer */ +#ifndef PIXMAN_FB_ACCESSORS +static void +fetch_scanline_rgbf_float (bits_image_t *image, + int x, + int y, + int width, + uint32_t * b, + const uint32_t *mask) +{ + const float *bits = (float *)image->bits + y * image->rowstride; + const float *pixel = bits + x * 3; + argb_t *buffer = (argb_t *)b; + + for (; width--; buffer++) { + buffer->r = *pixel++; + buffer->g = *pixel++; + buffer->b = *pixel++; + buffer->a = 1.f; + } +} + +static void +fetch_scanline_rgbaf_float (bits_image_t *image, + int x, + int y, + int width, + uint32_t * b, + const uint32_t *mask) +{ + const float *bits = (float *)image->bits + y * image->rowstride; + const float *pixel = bits + x * 4; + argb_t *buffer = (argb_t *)b; + + for (; width--; buffer++) { + buffer->r = *pixel++; + buffer->g = *pixel++; + buffer->b = *pixel++; + buffer->a = *pixel++; + } +} +#endif + static void -fetch_scanline_x2r10g10b10_float (pixman_image_t *image, +fetch_scanline_x2r10g10b10_float (bits_image_t *image, int x, int y, int width, uint32_t * b, const uint32_t *mask) { - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; + const uint32_t *bits = image->bits + y * image->rowstride; const uint32_t *pixel = (uint32_t *)bits + x; const uint32_t *end = pixel + width; argb_t *buffer = (argb_t *)b; @@ -673,14 +715,14 @@ fetch_scanline_x2r10g10b10_float (pixman_image_t *image, /* Expects a float buffer */ static void -fetch_scanline_a2b10g10r10_float (pixman_image_t *image, +fetch_scanline_a2b10g10r10_float (bits_image_t *image, int x, int y, int width, uint32_t * b, const uint32_t *mask) { - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; + const uint32_t *bits = image->bits + y * image->rowstride; const uint32_t *pixel = bits + x; const uint32_t *end = pixel + width; argb_t *buffer = (argb_t *)b; @@ -704,14 +746,14 @@ fetch_scanline_a2b10g10r10_float (pixman_image_t *image, /* Expects a float buffer */ static void -fetch_scanline_x2b10g10r10_float (pixman_image_t *image, +fetch_scanline_x2b10g10r10_float (bits_image_t *image, int x, int y, int width, uint32_t * b, const uint32_t *mask) { - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; + const uint32_t *bits = image->bits + y * image->rowstride; const uint32_t *pixel = (uint32_t *)bits + x; const uint32_t *end = pixel + width; argb_t *buffer = (argb_t *)b; @@ -733,14 +775,14 @@ fetch_scanline_x2b10g10r10_float (pixman_image_t *image, } static void -fetch_scanline_yuy2 (pixman_image_t *image, +fetch_scanline_yuy2 (bits_image_t *image, int x, int line, int width, uint32_t * buffer, const uint32_t *mask) { - const uint32_t *bits = image->bits.bits + image->bits.rowstride * line; + const uint32_t *bits = image->bits + image->rowstride * line; int i; for (i = 0; i < width; i++) @@ -767,7 +809,7 @@ fetch_scanline_yuy2 (pixman_image_t *image, } static void -fetch_scanline_yv12 (pixman_image_t *image, +fetch_scanline_yv12 (bits_image_t *image, int x, int line, int width, @@ -805,6 +847,40 @@ fetch_scanline_yv12 (pixman_image_t *image, /**************************** Pixel wise fetching *****************************/ +#ifndef PIXMAN_FB_ACCESSORS +static argb_t +fetch_pixel_rgbf_float (bits_image_t *image, + int offset, + int line) +{ + float *bits = (float *)image->bits + line * image->rowstride; + argb_t argb; + + argb.r = bits[offset * 3]; + argb.g = bits[offset * 3 + 1]; + argb.b = bits[offset * 3 + 2]; + argb.a = 1.f; + + return argb; +} + +static argb_t +fetch_pixel_rgbaf_float (bits_image_t *image, + int offset, + int line) +{ + float *bits = (float *)image->bits + line * image->rowstride; + argb_t argb; + + argb.r = bits[offset * 4]; + argb.g = bits[offset * 4 + 1]; + argb.b = bits[offset * 4 + 2]; + argb.a = bits[offset * 4 + 3]; + + return argb; +} +#endif + static argb_t fetch_pixel_x2r10g10b10_float (bits_image_t *image, int offset, @@ -962,6 +1038,45 @@ fetch_pixel_yv12 (bits_image_t *image, /*********************************** Store ************************************/ +#ifndef PIXMAN_FB_ACCESSORS +static void +store_scanline_rgbaf_float (bits_image_t * image, + int x, + int y, + int width, + const uint32_t *v) +{ + float *bits = (float *)image->bits + image->rowstride * y + 4 * x; + const argb_t *values = (argb_t *)v; + + for (; width; width--, values++) + { + *bits++ = values->r; + *bits++ = values->g; + *bits++ = values->b; + *bits++ = values->a; + } +} + +static void +store_scanline_rgbf_float (bits_image_t * image, + int x, + int y, + int width, + const uint32_t *v) +{ + float *bits = (float *)image->bits + image->rowstride * y + 3 * x; + const argb_t *values = (argb_t *)v; + + for (; width; width--, values++) + { + *bits++ = values->r; + *bits++ = values->g; + *bits++ = values->b; + } +} +#endif + static void store_scanline_a2r10g10b10_float (bits_image_t * image, int x, @@ -976,7 +1091,7 @@ store_scanline_a2r10g10b10_float (bits_image_t * image, for (i = 0; i < width; ++i) { - uint16_t a, r, g, b; + uint32_t a, r, g, b; a = pixman_float_to_unorm (values[i].a, 2); r = pixman_float_to_unorm (values[i].r, 10); @@ -1002,7 +1117,7 @@ store_scanline_x2r10g10b10_float (bits_image_t * image, for (i = 0; i < width; ++i) { - uint16_t r, g, b; + uint32_t r, g, b; r = pixman_float_to_unorm (values[i].r, 10); g = pixman_float_to_unorm (values[i].g, 10); @@ -1027,7 +1142,7 @@ store_scanline_a2b10g10r10_float (bits_image_t * image, for (i = 0; i < width; ++i) { - uint16_t a, r, g, b; + uint32_t a, r, g, b; a = pixman_float_to_unorm (values[i].a, 2); r = pixman_float_to_unorm (values[i].r, 10); @@ -1053,7 +1168,7 @@ store_scanline_x2b10g10r10_float (bits_image_t * image, for (i = 0; i < width; ++i) { - uint16_t r, g, b; + uint32_t r, g, b; r = pixman_float_to_unorm (values[i].r, 10); g = pixman_float_to_unorm (values[i].g, 10); @@ -1078,7 +1193,7 @@ store_scanline_a8r8g8b8_sRGB_float (bits_image_t * image, for (i = 0; i < width; ++i) { - uint8_t a, r, g, b; + uint32_t a, r, g, b; a = pixman_float_to_unorm (values[i].a, 8); r = to_srgb (values[i].r); @@ -1090,44 +1205,6 @@ store_scanline_a8r8g8b8_sRGB_float (bits_image_t * image, } } -static void -store_scanline_16 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *v) -{ - uint16_t *bits = (uint16_t*)(image->bits + image->rowstride * y); - uint16_t *values = (uint16_t *)v; - uint16_t *pixel = bits + x; - int i; - - for (i = 0; i < width; ++i) - { - WRITE (image, pixel++, values[i]); - } -} - -static void -fetch_scanline_16 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * b, - const uint32_t *mask) -{ - const uint16_t *bits = (uint16_t*)(image->bits.bits + y * image->bits.rowstride); - const uint16_t *pixel = bits + x; - int i; - uint16_t *buffer = (uint16_t *)b; - - for (i = 0; i < width; ++i) - { - *buffer++ = READ (image, pixel++); - } -} - - /* * Contracts a floating point image to 32bpp and then stores it using a * regular 32-bit store proc. Despite the type, this function expects an @@ -1159,37 +1236,37 @@ store_scanline_generic_float (bits_image_t * image, } static void -fetch_scanline_generic_float (pixman_image_t *image, +fetch_scanline_generic_float (bits_image_t * image, int x, int y, int width, uint32_t * buffer, const uint32_t *mask) { - image->bits.fetch_scanline_32 (image, x, y, width, buffer, NULL); + image->fetch_scanline_32 (image, x, y, width, buffer, NULL); - pixman_expand_to_float ((argb_t *)buffer, buffer, image->bits.format, width); + pixman_expand_to_float ((argb_t *)buffer, buffer, image->format, width); } /* The 32_sRGB paths should be deleted after narrow processing * is no longer invoked for formats that are considered wide. * (Also see fetch_pixel_generic_lossy_32) */ static void -fetch_scanline_a8r8g8b8_32_sRGB (pixman_image_t *image, +fetch_scanline_a8r8g8b8_32_sRGB (bits_image_t *image, int x, int y, int width, uint32_t *buffer, const uint32_t *mask) { - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; + const uint32_t *bits = image->bits + y * image->rowstride; const uint32_t *pixel = (uint32_t *)bits + x; const uint32_t *end = pixel + width; uint32_t tmp; while (pixel < end) { - uint8_t a, r, g, b; + uint32_t a, r, g, b; tmp = READ (image, pixel++); @@ -1213,7 +1290,7 @@ fetch_pixel_a8r8g8b8_32_sRGB (bits_image_t *image, { uint32_t *bits = image->bits + line * image->rowstride; uint32_t tmp = READ (image, bits + offset); - uint8_t a, r, g, b; + uint32_t a, r, g, b; a = (tmp >> 24) & 0xff; r = (tmp >> 16) & 0xff; @@ -1242,7 +1319,7 @@ store_scanline_a8r8g8b8_32_sRGB (bits_image_t *image, for (i = 0; i < width; ++i) { - uint8_t a, r, g, b; + uint32_t a, r, g, b; tmp = values[i]; @@ -1294,12 +1371,10 @@ fetch_pixel_generic_lossy_32 (bits_image_t *image, typedef struct { pixman_format_code_t format; - fetch_scanline_t fetch_scanline_16; fetch_scanline_t fetch_scanline_32; fetch_scanline_t fetch_scanline_float; fetch_pixel_32_t fetch_pixel_32; fetch_pixel_float_t fetch_pixel_float; - store_scanline_t store_scanline_16; store_scanline_t store_scanline_32; store_scanline_t store_scanline_float; } format_info_t; @@ -1307,29 +1382,14 @@ typedef struct #define FORMAT_INFO(format) \ { \ PIXMAN_ ## format, \ - NULL, \ fetch_scanline_ ## format, \ fetch_scanline_generic_float, \ fetch_pixel_ ## format, \ fetch_pixel_generic_float, \ - NULL, \ - store_scanline_ ## format, \ - store_scanline_generic_float \ - } -#define FORMAT_INFO16(format) \ - { \ - PIXMAN_ ## format, \ - fetch_scanline_16, \ - fetch_scanline_ ## format, \ - fetch_scanline_generic_float, \ - fetch_pixel_ ## format, \ - fetch_pixel_generic_float, \ - store_scanline_16, \ store_scanline_ ## format, \ store_scanline_generic_float \ } - static const format_info_t accessors[] = { /* 32 bpp formats */ @@ -1345,10 +1405,8 @@ static const format_info_t accessors[] = /* sRGB formats */ { PIXMAN_a8r8g8b8_sRGB, - NULL, fetch_scanline_a8r8g8b8_32_sRGB, fetch_scanline_a8r8g8b8_sRGB_float, fetch_pixel_a8r8g8b8_32_sRGB, fetch_pixel_a8r8g8b8_sRGB_float, - NULL, store_scanline_a8r8g8b8_32_sRGB, store_scanline_a8r8g8b8_sRGB_float, }, @@ -1357,8 +1415,8 @@ static const format_info_t accessors[] = FORMAT_INFO (b8g8r8), /* 16bpp formats */ - FORMAT_INFO16 (r5g6b5), - FORMAT_INFO16 (b5g6r5), + FORMAT_INFO (r5g6b5), + FORMAT_INFO (b5g6r5), FORMAT_INFO (a1r5g5b5), FORMAT_INFO (x1r5g5b5), @@ -1408,37 +1466,48 @@ static const format_info_t accessors[] = FORMAT_INFO (g1), /* Wide formats */ - +#ifndef PIXMAN_FB_ACCESSORS + { PIXMAN_rgba_float, + NULL, fetch_scanline_rgbaf_float, + fetch_pixel_generic_lossy_32, fetch_pixel_rgbaf_float, + NULL, store_scanline_rgbaf_float }, + + { PIXMAN_rgb_float, + NULL, fetch_scanline_rgbf_float, + fetch_pixel_generic_lossy_32, fetch_pixel_rgbf_float, + NULL, store_scanline_rgbf_float }, +#endif + { PIXMAN_a2r10g10b10, - NULL, NULL, fetch_scanline_a2r10g10b10_float, + NULL, fetch_scanline_a2r10g10b10_float, fetch_pixel_generic_lossy_32, fetch_pixel_a2r10g10b10_float, - NULL, NULL, store_scanline_a2r10g10b10_float }, + NULL, store_scanline_a2r10g10b10_float }, { PIXMAN_x2r10g10b10, - NULL, NULL, fetch_scanline_x2r10g10b10_float, + NULL, fetch_scanline_x2r10g10b10_float, fetch_pixel_generic_lossy_32, fetch_pixel_x2r10g10b10_float, - NULL, NULL, store_scanline_x2r10g10b10_float }, + NULL, store_scanline_x2r10g10b10_float }, { PIXMAN_a2b10g10r10, - NULL, NULL, fetch_scanline_a2b10g10r10_float, + NULL, fetch_scanline_a2b10g10r10_float, fetch_pixel_generic_lossy_32, fetch_pixel_a2b10g10r10_float, - NULL, NULL, store_scanline_a2b10g10r10_float }, + NULL, store_scanline_a2b10g10r10_float }, { PIXMAN_x2b10g10r10, - NULL, NULL, fetch_scanline_x2b10g10r10_float, + NULL, fetch_scanline_x2b10g10r10_float, fetch_pixel_generic_lossy_32, fetch_pixel_x2b10g10r10_float, - NULL, NULL, store_scanline_x2b10g10r10_float }, + NULL, store_scanline_x2b10g10r10_float }, /* YUV formats */ { PIXMAN_yuy2, - NULL, fetch_scanline_yuy2, fetch_scanline_generic_float, + fetch_scanline_yuy2, fetch_scanline_generic_float, fetch_pixel_yuy2, fetch_pixel_generic_float, - NULL, NULL, NULL }, + NULL, NULL }, { PIXMAN_yv12, - NULL, fetch_scanline_yv12, fetch_scanline_generic_float, + fetch_scanline_yv12, fetch_scanline_generic_float, fetch_pixel_yv12, fetch_pixel_generic_float, - NULL, NULL, NULL }, + NULL, NULL }, { PIXMAN_null }, }; @@ -1452,12 +1521,10 @@ setup_accessors (bits_image_t *image) { if (info->format == image->format) { - image->fetch_scanline_16 = info->fetch_scanline_16; image->fetch_scanline_32 = info->fetch_scanline_32; image->fetch_scanline_float = info->fetch_scanline_float; image->fetch_pixel_32 = info->fetch_pixel_32; image->fetch_pixel_float = info->fetch_pixel_float; - image->store_scanline_16 = info->store_scanline_16; image->store_scanline_32 = info->store_scanline_32; image->store_scanline_float = info->store_scanline_float; diff --git a/gfx/cairo/libpixman/src/pixman-arm-asm.h b/gfx/cairo/libpixman/src/pixman-arm-asm.h new file mode 100644 index 0000000000..ee78541087 --- /dev/null +++ b/gfx/cairo/libpixman/src/pixman-arm-asm.h @@ -0,0 +1,37 @@ +/* + * Copyright © 2008 Mozilla Corporation + * Copyright © 2010 Nokia Corporation + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of Mozilla Corporation not be used in + * advertising or publicity pertaining to distribution of the software without + * specific, written prior permission. Mozilla Corporation makes no + * representations about the suitability of this software for any purpose. It + * is provided "as is" without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS + * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY + * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN + * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING + * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS + * SOFTWARE. + * + * Author: Jeff Muizelaar (jeff@infidigm.net) + * + */ + +/* Supplementary macro for setting function attributes */ +.macro pixman_asm_function fname + .func fname + .global fname +#ifdef __ELF__ + .hidden fname + .type fname, %function +#endif +fname: +.endm diff --git a/gfx/cairo/libpixman/src/pixman-arm-common.h b/gfx/cairo/libpixman/src/pixman-arm-common.h index b598502bba..9537688306 100644 --- a/gfx/cairo/libpixman/src/pixman-arm-common.h +++ b/gfx/cairo/libpixman/src/pixman-arm-common.h @@ -266,13 +266,6 @@ FAST_NEAREST_MAINLOOP (cputype##_##name##_normal_##op, \ scaled_nearest_scanline_##cputype##_##name##_##op, \ src_type, dst_type, NORMAL) -/* Provide entries for the fast path table */ -#define PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH(op,s,d,func) \ - SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func), \ - SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func), \ - SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func), \ - SIMPLE_NEAREST_FAST_PATH_NORMAL (op,s,d,func) - #define PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_A8_DST(flags, cputype, name, op, \ src_type, dst_type) \ void \ @@ -318,9 +311,7 @@ FAST_NEAREST_MAINLOOP_COMMON (cputype##_##name##_normal_##op, \ /* Provide entries for the fast path table */ #define PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH(op,s,d,func) \ - SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER (op,s,d,func), \ - SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE (op,s,d,func), \ - SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD (op,s,d,func), \ + SIMPLE_NEAREST_A8_MASK_FAST_PATH (op,s,d,func), \ SIMPLE_NEAREST_A8_MASK_FAST_PATH_NORMAL (op,s,d,func) /*****************************************************************************/ @@ -360,16 +351,16 @@ scaled_bilinear_scanline_##cputype##_##name##_##op ( \ \ FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_cover_##op, \ scaled_bilinear_scanline_##cputype##_##name##_##op, \ - NULL, src_type, uint32_t, dst_type, COVER, FLAG_NONE) \ + src_type, uint32_t, dst_type, COVER, FLAG_NONE) \ FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_none_##op, \ scaled_bilinear_scanline_##cputype##_##name##_##op, \ - NULL, src_type, uint32_t, dst_type, NONE, FLAG_NONE) \ + src_type, uint32_t, dst_type, NONE, FLAG_NONE) \ FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_pad_##op, \ scaled_bilinear_scanline_##cputype##_##name##_##op, \ - NULL, src_type, uint32_t, dst_type, PAD, FLAG_NONE) \ + src_type, uint32_t, dst_type, PAD, FLAG_NONE) \ FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_normal_##op, \ scaled_bilinear_scanline_##cputype##_##name##_##op, \ - NULL, src_type, uint32_t, dst_type, NORMAL, \ + src_type, uint32_t, dst_type, NORMAL, \ FLAG_NONE) @@ -409,19 +400,19 @@ scaled_bilinear_scanline_##cputype##_##name##_##op ( \ \ FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_cover_##op, \ scaled_bilinear_scanline_##cputype##_##name##_##op, \ - NULL, src_type, uint8_t, dst_type, COVER, \ + src_type, uint8_t, dst_type, COVER, \ FLAG_HAVE_NON_SOLID_MASK) \ FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_none_##op, \ scaled_bilinear_scanline_##cputype##_##name##_##op, \ - NULL, src_type, uint8_t, dst_type, NONE, \ + src_type, uint8_t, dst_type, NONE, \ FLAG_HAVE_NON_SOLID_MASK) \ FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_pad_##op, \ scaled_bilinear_scanline_##cputype##_##name##_##op, \ - NULL, src_type, uint8_t, dst_type, PAD, \ + src_type, uint8_t, dst_type, PAD, \ FLAG_HAVE_NON_SOLID_MASK) \ FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_normal_##op, \ scaled_bilinear_scanline_##cputype##_##name##_##op, \ - NULL, src_type, uint8_t, dst_type, NORMAL, \ + src_type, uint8_t, dst_type, NORMAL, \ FLAG_HAVE_NON_SOLID_MASK) diff --git a/gfx/cairo/libpixman/src/pixman-arm-detect-win32.asm b/gfx/cairo/libpixman/src/pixman-arm-detect-win32.asm deleted file mode 100644 index 8f5d5eb2a9..0000000000 --- a/gfx/cairo/libpixman/src/pixman-arm-detect-win32.asm +++ /dev/null @@ -1,21 +0,0 @@ - area pixman_msvc, code, readonly
-
- export pixman_msvc_try_arm_simd_op
-
-pixman_msvc_try_arm_simd_op
- ;; I don't think the msvc arm asm knows how to do SIMD insns
- ;; uqadd8 r3,r3,r3
- dcd 0xe6633f93
- mov pc,lr
- endp
-
- export pixman_msvc_try_arm_neon_op
-
-pixman_msvc_try_arm_neon_op
- ;; I don't think the msvc arm asm knows how to do NEON insns
- ;; veor d0,d0,d0
- dcd 0xf3000110
- mov pc,lr
- endp
-
- end
diff --git a/gfx/cairo/libpixman/src/pixman-arm-neon-asm-bilinear.S b/gfx/cairo/libpixman/src/pixman-arm-neon-asm-bilinear.S index e37b5c298e..0fd92d61c5 100644 --- a/gfx/cairo/libpixman/src/pixman-arm-neon-asm-bilinear.S +++ b/gfx/cairo/libpixman/src/pixman-arm-neon-asm-bilinear.S @@ -65,23 +65,13 @@ .p2align 2 #include "pixman-private.h" +#include "pixman-arm-asm.h" #include "pixman-arm-neon-asm.h" /* * Bilinear macros from pixman-arm-neon-asm.S */ -/* Supplementary macro for setting function attributes */ -.macro pixman_asm_function fname - .func fname - .global fname -#ifdef __ELF__ - .hidden fname - .type fname, %function -#endif -fname: -.endm - /* * Bilinear scaling support code which tries to provide pixel fetching, color * format conversion, and interpolation as separate macros which can be used diff --git a/gfx/cairo/libpixman/src/pixman-arm-neon-asm.S b/gfx/cairo/libpixman/src/pixman-arm-neon-asm.S index d0e943d712..7e949a38fd 100644 --- a/gfx/cairo/libpixman/src/pixman-arm-neon-asm.S +++ b/gfx/cairo/libpixman/src/pixman-arm-neon-asm.S @@ -50,6 +50,7 @@ .p2align 2 #include "pixman-private.h" +#include "pixman-arm-asm.h" #include "pixman-arm-neon-asm.h" /* Global configuration options and preferences */ @@ -954,7 +955,6 @@ generate_composite_function \ */ .macro pixman_composite_over_n_8_0565_init add DUMMY, sp, #ARGS_STACK_OFFSET - .vsave {d8-d15} vpush {d8-d15} vld1.32 {d11[0]}, [DUMMY] vdup.8 d8, d11[0] @@ -982,7 +982,6 @@ generate_composite_function \ .macro pixman_composite_over_8888_n_0565_init add DUMMY, sp, #(ARGS_STACK_OFFSET + 8) - .vsave {d8-d15} vpush {d8-d15} vld1.32 {d24[0]}, [DUMMY] vdup.8 d24, d24[3] @@ -1449,7 +1448,6 @@ generate_composite_function \ .macro pixman_composite_over_n_8_8888_init add DUMMY, sp, #ARGS_STACK_OFFSET - .vsave {d8-d15} vpush {d8-d15} vld1.32 {d11[0]}, [DUMMY] vdup.8 d8, d11[0] @@ -1521,7 +1519,6 @@ generate_composite_function \ .macro pixman_composite_over_n_8_8_init add DUMMY, sp, #ARGS_STACK_OFFSET - .vsave {d8-d15} vpush {d8-d15} vld1.32 {d8[0]}, [DUMMY] vdup.8 d8, d8[3] @@ -1623,7 +1620,6 @@ generate_composite_function \ .macro pixman_composite_over_n_8888_8888_ca_init add DUMMY, sp, #ARGS_STACK_OFFSET - .vsave {d8-d15} vpush {d8-d15} vld1.32 {d11[0]}, [DUMMY] vdup.8 d8, d11[0] @@ -1793,7 +1789,6 @@ generate_composite_function \ .macro pixman_composite_over_n_8888_0565_ca_init add DUMMY, sp, #ARGS_STACK_OFFSET - .vsave {d8-d15} vpush {d8-d15} vld1.32 {d11[0]}, [DUMMY] vdup.8 d8, d11[0] @@ -1907,7 +1902,6 @@ generate_composite_function \ .macro pixman_composite_add_n_8_8_init add DUMMY, sp, #ARGS_STACK_OFFSET - .vsave {d8-d15} vpush {d8-d15} vld1.32 {d11[0]}, [DUMMY] vdup.8 d11, d11[3] @@ -2214,7 +2208,6 @@ generate_composite_function_single_scanline \ .macro pixman_composite_over_8888_n_8888_init add DUMMY, sp, #48 - .vsave {d8-d15} vpush {d8-d15} vld1.32 {d15[0]}, [DUMMY] vdup.8 d15, d15[3] @@ -2587,7 +2580,6 @@ generate_composite_function \ .macro pixman_composite_over_0565_n_0565_init add DUMMY, sp, #(ARGS_STACK_OFFSET + 8) - .vsave {d8-d15} vpush {d8-d15} vld1.32 {d15[0]}, [DUMMY] vdup.8 d15, d15[3] @@ -2839,17 +2831,6 @@ generate_composite_function_nearest_scanline \ /******************************************************************************/ -/* Supplementary macro for setting function attributes */ -.macro pixman_asm_function fname - .func fname - .global fname -#ifdef __ELF__ - .hidden fname - .type fname, %function -#endif -fname: -.endm - /* * Bilinear scaling support code which tries to provide pixel fetching, color * format conversion, and interpolation as separate macros which can be used @@ -3141,16 +3122,13 @@ pixman_asm_function fname TMP4 .req r9 STRIDE .req r2 - .fnstart mov ip, sp - .save {r4, r5, r6, r7, r8, r9} push {r4, r5, r6, r7, r8, r9} mov PF_OFFS, #prefetch_distance ldmia ip, {WB, X, UX, WIDTH} mul PF_OFFS, PF_OFFS, UX .if ((flags) & BILINEAR_FLAG_USE_ALL_NEON_REGS) != 0 - .vsave {d8-d15} vpush {d8-d15} .endif @@ -3244,7 +3222,6 @@ pixman_asm_function fname .endif pop {r4, r5, r6, r7, r8, r9} bx lr - .fnend .unreq OUT .unreq TOP diff --git a/gfx/cairo/libpixman/src/pixman-arm-neon-asm.h b/gfx/cairo/libpixman/src/pixman-arm-neon-asm.h index f50ade3ef1..bdcf6a9d47 100644 --- a/gfx/cairo/libpixman/src/pixman-arm-neon-asm.h +++ b/gfx/cairo/libpixman/src/pixman-arm-neon-asm.h @@ -385,7 +385,7 @@ * execute simultaneously with NEON and be completely shadowed by it. Thus * we get no performance overhead at all (*). This looks like a very nice * feature of Cortex-A8, if used wisely. We don't have a hardware prefetcher, - * but still can implement some rather advanced prefetch logic in sofware + * but still can implement some rather advanced prefetch logic in software * for almost zero cost! * * (*) The overhead of the prefetcher is visible when running some trivial @@ -631,16 +631,8 @@ local skip1 src_basereg_ = 0, \ mask_basereg_ = 24 - .func fname - .global fname - /* For ELF format also set function visibility to hidden */ -#ifdef __ELF__ - .hidden fname - .type fname, %function -#endif -fname: - .fnstart - .save {r4-r12, lr} + pixman_asm_function fname + push {r4-r12, lr} /* save all registers */ /* @@ -818,7 +810,6 @@ fname: init .if regs_shortage - .save {r0, r1} push {r0, r1} .endif subs H, H, #1 @@ -904,7 +895,6 @@ fname: .endif cleanup pop {r4-r12, pc} /* exit */ - .fnend .purgem fetch_src_pixblock .purgem pixld_src @@ -949,15 +939,8 @@ fname: src_basereg_ = 0, \ mask_basereg_ = 24 - .func fname - .global fname - /* For ELF format also set function visibility to hidden */ -#ifdef __ELF__ - .hidden fname - .type fname, %function -#endif -fname: - .fnstart + pixman_asm_function fname + .set PREFETCH_TYPE_CURRENT, PREFETCH_TYPE_NONE /* * Make some macro arguments globally visible and accessible @@ -992,7 +975,6 @@ fname: .endm ldr UNIT_X, [sp] - .save {r4-r8, lr} push {r4-r8, lr} ldr SRC_WIDTH_FIXED, [sp, #(24 + 4)] .if mask_bpp != 0 @@ -1108,7 +1090,6 @@ fname: .purgem fetch_src_pixblock .purgem pixld_src - .fnend .endfunc .endm @@ -1135,7 +1116,6 @@ fname: */ .macro default_init_need_all_regs - .vsave {d8-d15} vpush {d8-d15} .endm diff --git a/gfx/cairo/libpixman/src/pixman-arm-neon.c b/gfx/cairo/libpixman/src/pixman-arm-neon.c index d902193cfa..be761c9652 100644 --- a/gfx/cairo/libpixman/src/pixman-arm-neon.c +++ b/gfx/cairo/libpixman/src/pixman-arm-neon.c @@ -145,23 +145,6 @@ PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 0565_0565, SRC, uint16_t, uint16_t) PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (SKIP_ZERO_SRC, neon, 8888_8888, OVER, uint32_t, uint32_t) -static force_inline void -pixman_scaled_bilinear_scanline_8888_8888_SRC ( - uint32_t * dst, - const uint32_t * mask, - const uint32_t * src_top, - const uint32_t * src_bottom, - int32_t w, - int wt, - int wb, - pixman_fixed_t vx, - pixman_fixed_t unit_x, - pixman_fixed_t max_vx, - pixman_bool_t zero_src) -{ - pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_neon (dst, src_top, src_bottom, wt, wb, vx, unit_x, w); -} - PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (SKIP_ZERO_SRC, neon, 8888_8888, ADD, uint32_t, uint32_t) @@ -285,28 +268,6 @@ arm_neon_blt (pixman_implementation_t *imp, } } -static inline void op_bilinear_over_8888_0565(uint16_t *dst, const uint32_t *mask, const uint32_t *src, int width) -{ - pixman_composite_over_8888_0565_asm_neon (width, 1, dst, 0, src, 0); -} - -FAST_BILINEAR_MAINLOOP_COMMON (neon_8888_0565_cover_OVER, - pixman_scaled_bilinear_scanline_8888_8888_SRC, op_bilinear_over_8888_0565, - uint32_t, uint32_t, uint16_t, - COVER, FLAG_NONE) -FAST_BILINEAR_MAINLOOP_COMMON (neon_8888_0565_pad_OVER, - pixman_scaled_bilinear_scanline_8888_8888_SRC, op_bilinear_over_8888_0565, - uint32_t, uint32_t, uint16_t, - PAD, FLAG_NONE) -FAST_BILINEAR_MAINLOOP_COMMON (neon_8888_0565_none_OVER, - pixman_scaled_bilinear_scanline_8888_8888_SRC, op_bilinear_over_8888_0565, - uint32_t, uint32_t, uint16_t, - NONE, FLAG_NONE) -FAST_BILINEAR_MAINLOOP_COMMON (neon_8888_0565_normal_OVER, - pixman_scaled_bilinear_scanline_8888_8888_SRC, op_bilinear_over_8888_0565, - uint32_t, uint32_t, uint16_t, - NORMAL, FLAG_NONE) - static const pixman_fast_path_t arm_neon_fast_paths[] = { PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, neon_composite_src_0565_0565), @@ -401,21 +362,21 @@ static const pixman_fast_path_t arm_neon_fast_paths[] = PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8, null, a8r8g8b8, neon_composite_out_reverse_8_8888), PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8, null, a8b8g8r8, neon_composite_out_reverse_8_8888), - PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, neon_8888_8888), - PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, neon_8888_8888), - PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, neon_8888_8888), - PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, neon_8888_8888), + SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, neon_8888_8888), + SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, neon_8888_8888), + SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, neon_8888_8888), + SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, neon_8888_8888), - PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, r5g6b5, neon_8888_0565), - PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, b5g6r5, neon_8888_0565), + SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, r5g6b5, neon_8888_0565), + SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, b5g6r5, neon_8888_0565), - PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, r5g6b5, neon_8888_0565), - PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, r5g6b5, neon_8888_0565), - PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, b5g6r5, neon_8888_0565), - PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, b5g6r5, neon_8888_0565), + SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, r5g6b5, neon_8888_0565), + SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, r5g6b5, neon_8888_0565), + SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, b5g6r5, neon_8888_0565), + SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, b5g6r5, neon_8888_0565), - PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, b5g6r5, x8b8g8r8, neon_0565_8888), - PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, x8r8g8b8, neon_0565_8888), + SIMPLE_NEAREST_FAST_PATH (SRC, b5g6r5, x8b8g8r8, neon_0565_8888), + SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, x8r8g8b8, neon_0565_8888), /* Note: NONE repeat is not supported yet */ SIMPLE_NEAREST_FAST_PATH_COVER (SRC, r5g6b5, a8r8g8b8, neon_0565_8888), SIMPLE_NEAREST_FAST_PATH_COVER (SRC, b5g6r5, a8b8g8r8, neon_0565_8888), @@ -460,8 +421,6 @@ static const pixman_fast_path_t arm_neon_fast_paths[] = SIMPLE_BILINEAR_A8_MASK_FAST_PATH (ADD, a8r8g8b8, a8r8g8b8, neon_8888_8_8888), SIMPLE_BILINEAR_A8_MASK_FAST_PATH (ADD, a8r8g8b8, x8r8g8b8, neon_8888_8_8888), - SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, r5g6b5, neon_8888_0565), - { PIXMAN_OP_NONE }, }; diff --git a/gfx/cairo/libpixman/src/pixman-arm-simd-asm-scaled.S b/gfx/cairo/libpixman/src/pixman-arm-simd-asm-scaled.S index 7110995488..e050292e05 100644 --- a/gfx/cairo/libpixman/src/pixman-arm-simd-asm-scaled.S +++ b/gfx/cairo/libpixman/src/pixman-arm-simd-asm-scaled.S @@ -37,16 +37,7 @@ .altmacro .p2align 2 -/* Supplementary macro for setting function attributes */ -.macro pixman_asm_function fname - .func fname - .global fname -#ifdef __ELF__ - .hidden fname - .type fname, %function -#endif -fname: -.endm +#include "pixman-arm-asm.h" /* * Note: This code is only using armv5te instructions (not even armv6), diff --git a/gfx/cairo/libpixman/src/pixman-arm-simd-asm.S b/gfx/cairo/libpixman/src/pixman-arm-simd-asm.S index c209688790..a74a0a8f34 100644 --- a/gfx/cairo/libpixman/src/pixman-arm-simd-asm.S +++ b/gfx/cairo/libpixman/src/pixman-arm-simd-asm.S @@ -37,6 +37,7 @@ .altmacro .p2align 2 +#include "pixman-arm-asm.h" #include "pixman-arm-simd-asm.h" /* A head macro should do all processing which results in an output of up to @@ -303,6 +304,83 @@ generate_composite_function \ /******************************************************************************/ +.macro src_x888_0565_init + /* Hold loop invariant in MASK */ + ldr MASK, =0x001F001F + line_saved_regs STRIDE_S, ORIG_W +.endm + +.macro src_x888_0565_1pixel s, d + and WK&d, MASK, WK&s, lsr #3 @ 00000000000rrrrr00000000000bbbbb + and STRIDE_S, WK&s, #0xFC00 @ 0000000000000000gggggg0000000000 + orr WK&d, WK&d, WK&d, lsr #5 @ 00000000000-----rrrrr000000bbbbb + orr WK&d, WK&d, STRIDE_S, lsr #5 @ 00000000000-----rrrrrggggggbbbbb + /* Top 16 bits are discarded during the following STRH */ +.endm + +.macro src_x888_0565_2pixels slo, shi, d, tmp + and SCRATCH, WK&shi, #0xFC00 @ 0000000000000000GGGGGG0000000000 + and WK&tmp, MASK, WK&shi, lsr #3 @ 00000000000RRRRR00000000000BBBBB + and WK&shi, MASK, WK&slo, lsr #3 @ 00000000000rrrrr00000000000bbbbb + orr WK&tmp, WK&tmp, WK&tmp, lsr #5 @ 00000000000-----RRRRR000000BBBBB + orr WK&tmp, WK&tmp, SCRATCH, lsr #5 @ 00000000000-----RRRRRGGGGGGBBBBB + and SCRATCH, WK&slo, #0xFC00 @ 0000000000000000gggggg0000000000 + orr WK&shi, WK&shi, WK&shi, lsr #5 @ 00000000000-----rrrrr000000bbbbb + orr WK&shi, WK&shi, SCRATCH, lsr #5 @ 00000000000-----rrrrrggggggbbbbb + pkhbt WK&d, WK&shi, WK&tmp, lsl #16 @ RRRRRGGGGGGBBBBBrrrrrggggggbbbbb +.endm + +.macro src_x888_0565_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload + WK4 .req STRIDE_S + WK5 .req STRIDE_M + WK6 .req WK3 + WK7 .req ORIG_W + .if numbytes == 16 + pixld , 16, 4, SRC, 0 + src_x888_0565_2pixels 4, 5, 0, 0 + pixld , 8, 4, SRC, 0 + src_x888_0565_2pixels 6, 7, 1, 1 + pixld , 8, 6, SRC, 0 + .else + pixld , numbytes*2, 4, SRC, 0 + .endif +.endm + +.macro src_x888_0565_process_tail cond, numbytes, firstreg + .if numbytes == 16 + src_x888_0565_2pixels 4, 5, 2, 2 + src_x888_0565_2pixels 6, 7, 3, 4 + .elseif numbytes == 8 + src_x888_0565_2pixels 4, 5, 1, 1 + src_x888_0565_2pixels 6, 7, 2, 2 + .elseif numbytes == 4 + src_x888_0565_2pixels 4, 5, 1, 1 + .else + src_x888_0565_1pixel 4, 1 + .endif + .if numbytes == 16 + pixst , numbytes, 0, DST + .else + pixst , numbytes, 1, DST + .endif + .unreq WK4 + .unreq WK5 + .unreq WK6 + .unreq WK7 +.endm + +generate_composite_function \ + pixman_composite_src_x888_0565_asm_armv6, 32, 0, 16, \ + FLAG_DST_WRITEONLY | FLAG_BRANCH_OVER | FLAG_PROCESS_DOES_STORE | FLAG_SPILL_LINE_VARS | FLAG_PROCESS_CORRUPTS_SCRATCH, \ + 3, /* prefetch distance */ \ + src_x888_0565_init, \ + nop_macro, /* newline */ \ + nop_macro, /* cleanup */ \ + src_x888_0565_process_head, \ + src_x888_0565_process_tail + +/******************************************************************************/ + .macro add_8_8_8pixels cond, dst1, dst2 uqadd8&cond WK&dst1, WK&dst1, MASK uqadd8&cond WK&dst2, WK&dst2, STRIDE_M @@ -611,3 +689,491 @@ generate_composite_function \ /******************************************************************************/ +.macro over_reverse_n_8888_init + ldr SRC, [sp, #ARGS_STACK_OFFSET] + ldr MASK, =0x00800080 + /* Split source pixel into RB/AG parts */ + uxtb16 STRIDE_S, SRC + uxtb16 STRIDE_M, SRC, ror #8 + /* Set GE[3:0] to 0101 so SEL instructions do what we want */ + uadd8 SCRATCH, MASK, MASK + line_saved_regs STRIDE_D, ORIG_W +.endm + +.macro over_reverse_n_8888_newline + mov STRIDE_D, #0xFF +.endm + +.macro over_reverse_n_8888_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload + pixld , numbytes, firstreg, DST, 0 +.endm + +.macro over_reverse_n_8888_1pixel d, is_only + teq WK&d, #0 + beq 8f /* replace with source */ + bics ORIG_W, STRIDE_D, WK&d, lsr #24 + .if is_only == 1 + beq 49f /* skip store */ + .else + beq 9f /* write same value back */ + .endif + mla SCRATCH, STRIDE_S, ORIG_W, MASK /* red/blue */ + mla ORIG_W, STRIDE_M, ORIG_W, MASK /* alpha/green */ + uxtab16 SCRATCH, SCRATCH, SCRATCH, ror #8 + uxtab16 ORIG_W, ORIG_W, ORIG_W, ror #8 + mov SCRATCH, SCRATCH, ror #8 + sel ORIG_W, SCRATCH, ORIG_W + uqadd8 WK&d, WK&d, ORIG_W + b 9f +8: mov WK&d, SRC +9: +.endm + +.macro over_reverse_n_8888_tail numbytes, reg1, reg2, reg3, reg4 + .if numbytes == 4 + over_reverse_n_8888_1pixel reg1, 1 + .else + and SCRATCH, WK®1, WK®2 + .if numbytes == 16 + and SCRATCH, SCRATCH, WK®3 + and SCRATCH, SCRATCH, WK®4 + .endif + mvns SCRATCH, SCRATCH, asr #24 + beq 49f /* skip store if all opaque */ + over_reverse_n_8888_1pixel reg1, 0 + over_reverse_n_8888_1pixel reg2, 0 + .if numbytes == 16 + over_reverse_n_8888_1pixel reg3, 0 + over_reverse_n_8888_1pixel reg4, 0 + .endif + .endif + pixst , numbytes, reg1, DST +49: +.endm + +.macro over_reverse_n_8888_process_tail cond, numbytes, firstreg + over_reverse_n_8888_tail numbytes, firstreg, %(firstreg+1), %(firstreg+2), %(firstreg+3) +.endm + +generate_composite_function \ + pixman_composite_over_reverse_n_8888_asm_armv6, 0, 0, 32 \ + FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_CORRUPTS_PSR | FLAG_PROCESS_DOES_STORE | FLAG_SPILL_LINE_VARS | FLAG_PROCESS_CORRUPTS_SCRATCH, \ + 3, /* prefetch distance */ \ + over_reverse_n_8888_init, \ + over_reverse_n_8888_newline, \ + nop_macro, /* cleanup */ \ + over_reverse_n_8888_process_head, \ + over_reverse_n_8888_process_tail + +/******************************************************************************/ + +.macro over_white_8888_8888_ca_init + HALF .req SRC + TMP0 .req STRIDE_D + TMP1 .req STRIDE_S + TMP2 .req STRIDE_M + TMP3 .req ORIG_W + WK4 .req SCRATCH + line_saved_regs STRIDE_D, STRIDE_M, ORIG_W + ldr SCRATCH, =0x800080 + mov HALF, #0x80 + /* Set GE[3:0] to 0101 so SEL instructions do what we want */ + uadd8 SCRATCH, SCRATCH, SCRATCH + .set DST_PRELOAD_BIAS, 8 +.endm + +.macro over_white_8888_8888_ca_cleanup + .set DST_PRELOAD_BIAS, 0 + .unreq HALF + .unreq TMP0 + .unreq TMP1 + .unreq TMP2 + .unreq TMP3 + .unreq WK4 +.endm + +.macro over_white_8888_8888_ca_combine m, d + uxtb16 TMP1, TMP0 /* rb_notmask */ + uxtb16 TMP2, d /* rb_dest; 1 stall follows */ + smlatt TMP3, TMP2, TMP1, HALF /* red */ + smlabb TMP2, TMP2, TMP1, HALF /* blue */ + uxtb16 TMP0, TMP0, ror #8 /* ag_notmask */ + uxtb16 TMP1, d, ror #8 /* ag_dest; 1 stall follows */ + smlatt d, TMP1, TMP0, HALF /* alpha */ + smlabb TMP1, TMP1, TMP0, HALF /* green */ + pkhbt TMP0, TMP2, TMP3, lsl #16 /* rb; 1 stall follows */ + pkhbt TMP1, TMP1, d, lsl #16 /* ag */ + uxtab16 TMP0, TMP0, TMP0, ror #8 + uxtab16 TMP1, TMP1, TMP1, ror #8 + mov TMP0, TMP0, ror #8 + sel d, TMP0, TMP1 + uqadd8 d, d, m /* d is a late result */ +.endm + +.macro over_white_8888_8888_ca_1pixel_head + pixld , 4, 1, MASK, 0 + pixld , 4, 3, DST, 0 +.endm + +.macro over_white_8888_8888_ca_1pixel_tail + mvn TMP0, WK1 + teq WK1, WK1, asr #32 + bne 01f + bcc 03f + mov WK3, WK1 + b 02f +01: over_white_8888_8888_ca_combine WK1, WK3 +02: pixst , 4, 3, DST +03: +.endm + +.macro over_white_8888_8888_ca_2pixels_head + pixld , 8, 1, MASK, 0 +.endm + +.macro over_white_8888_8888_ca_2pixels_tail + pixld , 8, 3, DST + mvn TMP0, WK1 + teq WK1, WK1, asr #32 + bne 01f + movcs WK3, WK1 + bcs 02f + teq WK2, #0 + beq 05f + b 02f +01: over_white_8888_8888_ca_combine WK1, WK3 +02: mvn TMP0, WK2 + teq WK2, WK2, asr #32 + bne 03f + movcs WK4, WK2 + b 04f +03: over_white_8888_8888_ca_combine WK2, WK4 +04: pixst , 8, 3, DST +05: +.endm + +.macro over_white_8888_8888_ca_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload + .if numbytes == 4 + over_white_8888_8888_ca_1pixel_head + .else + .if numbytes == 16 + over_white_8888_8888_ca_2pixels_head + over_white_8888_8888_ca_2pixels_tail + .endif + over_white_8888_8888_ca_2pixels_head + .endif +.endm + +.macro over_white_8888_8888_ca_process_tail cond, numbytes, firstreg + .if numbytes == 4 + over_white_8888_8888_ca_1pixel_tail + .else + over_white_8888_8888_ca_2pixels_tail + .endif +.endm + +generate_composite_function \ + pixman_composite_over_white_8888_8888_ca_asm_armv6, 0, 32, 32 \ + FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_CORRUPTS_PSR | FLAG_PROCESS_DOES_STORE | FLAG_SPILL_LINE_VARS | FLAG_PROCESS_CORRUPTS_SCRATCH \ + 2, /* prefetch distance */ \ + over_white_8888_8888_ca_init, \ + nop_macro, /* newline */ \ + over_white_8888_8888_ca_cleanup, \ + over_white_8888_8888_ca_process_head, \ + over_white_8888_8888_ca_process_tail + + +.macro over_n_8888_8888_ca_init + /* Set up constants. RB_SRC and AG_SRC are in registers; + * RB_FLDS, A_SRC, and the two HALF values need to go on the + * stack (and the ful SRC value is already there) */ + ldr SCRATCH, [sp, #ARGS_STACK_OFFSET] + mov WK0, #0x00FF0000 + orr WK0, WK0, #0xFF /* RB_FLDS (0x00FF00FF) */ + mov WK1, #0x80 /* HALF default value */ + mov WK2, SCRATCH, lsr #24 /* A_SRC */ + orr WK3, WK1, WK1, lsl #16 /* HALF alternate value (0x00800080) */ + push {WK0-WK3} + .set ARGS_STACK_OFFSET, ARGS_STACK_OFFSET+16 + uxtb16 SRC, SCRATCH + uxtb16 STRIDE_S, SCRATCH, ror #8 + + /* Set GE[3:0] to 0101 so SEL instructions do what we want */ + uadd8 SCRATCH, WK3, WK3 + + .unreq WK0 + .unreq WK1 + .unreq WK2 + .unreq WK3 + WK0 .req Y + WK1 .req STRIDE_D + RB_SRC .req SRC + AG_SRC .req STRIDE_S + WK2 .req STRIDE_M + RB_FLDS .req r8 /* the reloaded constants have to be at consecutive registers starting at an even one */ + A_SRC .req r8 + HALF .req r9 + WK3 .req r10 + WK4 .req r11 + WK5 .req SCRATCH + WK6 .req ORIG_W + + line_saved_regs Y, STRIDE_D, STRIDE_M, ORIG_W +.endm + +.macro over_n_8888_8888_ca_cleanup + add sp, sp, #16 + .set ARGS_STACK_OFFSET, ARGS_STACK_OFFSET-16 + + .unreq WK0 + .unreq WK1 + .unreq RB_SRC + .unreq AG_SRC + .unreq WK2 + .unreq RB_FLDS + .unreq A_SRC + .unreq HALF + .unreq WK3 + .unreq WK4 + .unreq WK5 + .unreq WK6 + WK0 .req r8 + WK1 .req r9 + WK2 .req r10 + WK3 .req r11 +.endm + +.macro over_n_8888_8888_ca_1pixel_head + pixld , 4, 6, MASK, 0 + pixld , 4, 0, DST, 0 +.endm + +.macro over_n_8888_8888_ca_1pixel_tail + ldrd A_SRC, HALF, [sp, #LOCALS_STACK_OFFSET+8] + uxtb16 WK1, WK6 /* rb_mask (first step of hard case placed in what would otherwise be a stall) */ + teq WK6, WK6, asr #32 /* Zc if transparent, ZC if opaque */ + bne 20f + bcc 40f + /* Mask is fully opaque (all channels) */ + ldr WK6, [sp, #ARGS_STACK_OFFSET] /* get SRC back */ + eors A_SRC, A_SRC, #0xFF + bne 10f + /* Source is also opaque - same as src_8888_8888 */ + mov WK0, WK6 + b 30f +10: /* Same as over_8888_8888 */ + mul_8888_8 WK0, A_SRC, WK5, HALF + uqadd8 WK0, WK0, WK6 + b 30f +20: /* No simplifications possible - do it the hard way */ + uxtb16 WK2, WK6, ror #8 /* ag_mask */ + mla WK3, WK1, A_SRC, HALF /* rb_mul; 2 cycles */ + mla WK4, WK2, A_SRC, HALF /* ag_mul; 2 cycles */ + ldrd RB_FLDS, HALF, [sp, #LOCALS_STACK_OFFSET] + uxtb16 WK5, WK0 /* rb_dest */ + uxtab16 WK3, WK3, WK3, ror #8 + uxtb16 WK6, WK0, ror #8 /* ag_dest */ + uxtab16 WK4, WK4, WK4, ror #8 + smlatt WK0, RB_SRC, WK1, HALF /* red1 */ + smlabb WK1, RB_SRC, WK1, HALF /* blue1 */ + bic WK3, RB_FLDS, WK3, lsr #8 + bic WK4, RB_FLDS, WK4, lsr #8 + pkhbt WK1, WK1, WK0, lsl #16 /* rb1 */ + smlatt WK0, WK5, WK3, HALF /* red2 */ + smlabb WK3, WK5, WK3, HALF /* blue2 */ + uxtab16 WK1, WK1, WK1, ror #8 + smlatt WK5, AG_SRC, WK2, HALF /* alpha1 */ + pkhbt WK3, WK3, WK0, lsl #16 /* rb2 */ + smlabb WK0, AG_SRC, WK2, HALF /* green1 */ + smlatt WK2, WK6, WK4, HALF /* alpha2 */ + smlabb WK4, WK6, WK4, HALF /* green2 */ + pkhbt WK0, WK0, WK5, lsl #16 /* ag1 */ + uxtab16 WK3, WK3, WK3, ror #8 + pkhbt WK4, WK4, WK2, lsl #16 /* ag2 */ + uxtab16 WK0, WK0, WK0, ror #8 + uxtab16 WK4, WK4, WK4, ror #8 + mov WK1, WK1, ror #8 + mov WK3, WK3, ror #8 + sel WK2, WK1, WK0 /* recombine source*mask */ + sel WK1, WK3, WK4 /* recombine dest*(1-source_alpha*mask) */ + uqadd8 WK0, WK1, WK2 /* followed by 1 stall */ +30: /* The destination buffer is already in the L1 cache, so + * there's little point in amalgamating writes */ + pixst , 4, 0, DST +40: +.endm + +.macro over_n_8888_8888_ca_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload + .rept (numbytes / 4) - 1 + over_n_8888_8888_ca_1pixel_head + over_n_8888_8888_ca_1pixel_tail + .endr + over_n_8888_8888_ca_1pixel_head +.endm + +.macro over_n_8888_8888_ca_process_tail cond, numbytes, firstreg + over_n_8888_8888_ca_1pixel_tail +.endm + +pixman_asm_function pixman_composite_over_n_8888_8888_ca_asm_armv6 + ldr ip, [sp] + cmp ip, #-1 + beq pixman_composite_over_white_8888_8888_ca_asm_armv6 + /* else drop through... */ + .endfunc +generate_composite_function \ + pixman_composite_over_n_8888_8888_ca_asm_armv6_helper, 0, 32, 32 \ + FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_CORRUPTS_PSR | FLAG_PROCESS_DOES_STORE | FLAG_SPILL_LINE_VARS | FLAG_PROCESS_CORRUPTS_SCRATCH | FLAG_PROCESS_CORRUPTS_WK0 \ + 2, /* prefetch distance */ \ + over_n_8888_8888_ca_init, \ + nop_macro, /* newline */ \ + over_n_8888_8888_ca_cleanup, \ + over_n_8888_8888_ca_process_head, \ + over_n_8888_8888_ca_process_tail + +/******************************************************************************/ + +.macro in_reverse_8888_8888_init + /* Hold loop invariant in MASK */ + ldr MASK, =0x00800080 + /* Set GE[3:0] to 0101 so SEL instructions do what we want */ + uadd8 SCRATCH, MASK, MASK + /* Offset the source pointer: we only need the alpha bytes */ + add SRC, SRC, #3 + line_saved_regs ORIG_W +.endm + +.macro in_reverse_8888_8888_head numbytes, reg1, reg2, reg3 + ldrb ORIG_W, [SRC], #4 + .if numbytes >= 8 + ldrb WK®1, [SRC], #4 + .if numbytes == 16 + ldrb WK®2, [SRC], #4 + ldrb WK®3, [SRC], #4 + .endif + .endif + add DST, DST, #numbytes +.endm + +.macro in_reverse_8888_8888_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload + in_reverse_8888_8888_head numbytes, firstreg, %(firstreg+1), %(firstreg+2) +.endm + +.macro in_reverse_8888_8888_1pixel s, d, offset, is_only + .if is_only != 1 + movs s, ORIG_W + .if offset != 0 + ldrb ORIG_W, [SRC, #offset] + .endif + beq 01f + teq STRIDE_M, #0xFF + beq 02f + .endif + uxtb16 SCRATCH, d /* rb_dest */ + uxtb16 d, d, ror #8 /* ag_dest */ + mla SCRATCH, SCRATCH, s, MASK + mla d, d, s, MASK + uxtab16 SCRATCH, SCRATCH, SCRATCH, ror #8 + uxtab16 d, d, d, ror #8 + mov SCRATCH, SCRATCH, ror #8 + sel d, SCRATCH, d + b 02f + .if offset == 0 +48: /* Last mov d,#0 of the set - used as part of shortcut for + * source values all 0 */ + .endif +01: mov d, #0 +02: +.endm + +.macro in_reverse_8888_8888_tail numbytes, reg1, reg2, reg3, reg4 + .if numbytes == 4 + teq ORIG_W, ORIG_W, asr #32 + ldrne WK®1, [DST, #-4] + .elseif numbytes == 8 + teq ORIG_W, WK®1 + teqeq ORIG_W, ORIG_W, asr #32 /* all 0 or all -1? */ + ldmnedb DST, {WK®1-WK®2} + .else + teq ORIG_W, WK®1 + teqeq ORIG_W, WK®2 + teqeq ORIG_W, WK®3 + teqeq ORIG_W, ORIG_W, asr #32 /* all 0 or all -1? */ + ldmnedb DST, {WK®1-WK®4} + .endif + cmnne DST, #0 /* clear C if NE */ + bcs 49f /* no writes to dest if source all -1 */ + beq 48f /* set dest to all 0 if source all 0 */ + .if numbytes == 4 + in_reverse_8888_8888_1pixel ORIG_W, WK®1, 0, 1 + str WK®1, [DST, #-4] + .elseif numbytes == 8 + in_reverse_8888_8888_1pixel STRIDE_M, WK®1, -4, 0 + in_reverse_8888_8888_1pixel STRIDE_M, WK®2, 0, 0 + stmdb DST, {WK®1-WK®2} + .else + in_reverse_8888_8888_1pixel STRIDE_M, WK®1, -12, 0 + in_reverse_8888_8888_1pixel STRIDE_M, WK®2, -8, 0 + in_reverse_8888_8888_1pixel STRIDE_M, WK®3, -4, 0 + in_reverse_8888_8888_1pixel STRIDE_M, WK®4, 0, 0 + stmdb DST, {WK®1-WK®4} + .endif +49: +.endm + +.macro in_reverse_8888_8888_process_tail cond, numbytes, firstreg + in_reverse_8888_8888_tail numbytes, firstreg, %(firstreg+1), %(firstreg+2), %(firstreg+3) +.endm + +generate_composite_function \ + pixman_composite_in_reverse_8888_8888_asm_armv6, 32, 0, 32 \ + FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_CORRUPTS_PSR | FLAG_PROCESS_DOES_STORE | FLAG_SPILL_LINE_VARS | FLAG_PROCESS_CORRUPTS_SCRATCH | FLAG_NO_PRELOAD_DST \ + 2, /* prefetch distance */ \ + in_reverse_8888_8888_init, \ + nop_macro, /* newline */ \ + nop_macro, /* cleanup */ \ + in_reverse_8888_8888_process_head, \ + in_reverse_8888_8888_process_tail + +/******************************************************************************/ + +.macro over_n_8888_init + ldr SRC, [sp, #ARGS_STACK_OFFSET] + /* Hold loop invariant in MASK */ + ldr MASK, =0x00800080 + /* Hold multiplier for destination in STRIDE_M */ + mov STRIDE_M, #255 + sub STRIDE_M, STRIDE_M, SRC, lsr #24 + /* Set GE[3:0] to 0101 so SEL instructions do what we want */ + uadd8 SCRATCH, MASK, MASK +.endm + +.macro over_n_8888_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload + pixld , numbytes, firstreg, DST, 0 +.endm + +.macro over_n_8888_1pixel dst + mul_8888_8 WK&dst, STRIDE_M, SCRATCH, MASK + uqadd8 WK&dst, WK&dst, SRC +.endm + +.macro over_n_8888_process_tail cond, numbytes, firstreg + .set PROCESS_REG, firstreg + .rept numbytes / 4 + over_n_8888_1pixel %(PROCESS_REG) + .set PROCESS_REG, PROCESS_REG+1 + .endr + pixst , numbytes, firstreg, DST +.endm + +generate_composite_function \ + pixman_composite_over_n_8888_asm_armv6, 0, 0, 32 \ + FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_DOES_STORE \ + 2, /* prefetch distance */ \ + over_n_8888_init, \ + nop_macro, /* newline */ \ + nop_macro, /* cleanup */ \ + over_n_8888_process_head, \ + over_n_8888_process_tail + +/******************************************************************************/ diff --git a/gfx/cairo/libpixman/src/pixman-arm-simd-asm.h b/gfx/cairo/libpixman/src/pixman-arm-simd-asm.h index 496e37e309..da153c3f58 100644 --- a/gfx/cairo/libpixman/src/pixman-arm-simd-asm.h +++ b/gfx/cairo/libpixman/src/pixman-arm-simd-asm.h @@ -76,6 +76,16 @@ .set FLAG_SPILL_LINE_VARS, 48 .set FLAG_PROCESS_CORRUPTS_SCRATCH, 0 .set FLAG_PROCESS_PRESERVES_SCRATCH, 64 +.set FLAG_PROCESS_PRESERVES_WK0, 0 +.set FLAG_PROCESS_CORRUPTS_WK0, 128 /* if possible, use the specified register(s) instead so WK0 can hold number of leading pixels */ +.set FLAG_PRELOAD_DST, 0 +.set FLAG_NO_PRELOAD_DST, 256 + +/* + * Number of bytes by which to adjust preload offset of destination + * buffer (allows preload instruction to be moved before the load(s)) + */ +.set DST_PRELOAD_BIAS, 0 /* * Offset into stack where mask and source pointer/stride can be accessed. @@ -87,6 +97,11 @@ #endif /* + * Offset into stack where space allocated during init macro can be accessed. + */ +.set LOCALS_STACK_OFFSET, 0 + +/* * Constants for selecting preferable prefetch type. */ .set PREFETCH_TYPE_NONE, 0 @@ -196,8 +211,8 @@ PF add, SCRATCH, base, WK0, lsl #bpp_shift-dst_bpp_shift PF and, SCRATCH, SCRATCH, #31 PF rsb, SCRATCH, SCRATCH, WK0, lsl #bpp_shift-dst_bpp_shift - PF sub, SCRATCH, SCRATCH, #1 /* so now ranges are -16..-1 / 0..31 / 32..63 */ - PF movs, SCRATCH, SCRATCH, #32-6 /* so this sets NC / nc / Nc */ + PF sub, SCRATCH, SCRATCH, #1 /* so now ranges are -16..-1 / 0..31 / 32..63 */ + PF movs, SCRATCH, SCRATCH, lsl #32-6 /* so this sets NC / nc / Nc */ PF bcs, 61f PF bpl, 60f PF pld, [ptr, #32*(prefetch_distance+2)] @@ -359,23 +374,41 @@ .macro test_bits_1_0_ptr + .if (flags) & FLAG_PROCESS_CORRUPTS_WK0 + movs SCRATCH, X, lsl #32-1 /* C,N = bits 1,0 of DST */ + .else movs SCRATCH, WK0, lsl #32-1 /* C,N = bits 1,0 of DST */ + .endif .endm .macro test_bits_3_2_ptr + .if (flags) & FLAG_PROCESS_CORRUPTS_WK0 + movs SCRATCH, X, lsl #32-3 /* C,N = bits 3, 2 of DST */ + .else movs SCRATCH, WK0, lsl #32-3 /* C,N = bits 3, 2 of DST */ + .endif .endm .macro leading_15bytes process_head, process_tail /* On entry, WK0 bits 0-3 = number of bytes until destination is 16-byte aligned */ + .set DECREMENT_X, 1 + .if (flags) & FLAG_PROCESS_CORRUPTS_WK0 + .set DECREMENT_X, 0 + sub X, X, WK0, lsr #dst_bpp_shift + str X, [sp, #LINE_SAVED_REG_COUNT*4] + mov X, WK0 + .endif /* Use unaligned loads in all cases for simplicity */ .if dst_w_bpp == 8 - conditional_process2 test_bits_1_0_ptr, mi, cs, process_head, process_tail, 1, 2, 1, 2, 1, 1, 1 + conditional_process2 test_bits_1_0_ptr, mi, cs, process_head, process_tail, 1, 2, 1, 2, 1, 1, DECREMENT_X .elseif dst_w_bpp == 16 test_bits_1_0_ptr - conditional_process1 cs, process_head, process_tail, 2, 2, 1, 1, 1 + conditional_process1 cs, process_head, process_tail, 2, 2, 1, 1, DECREMENT_X + .endif + conditional_process2 test_bits_3_2_ptr, mi, cs, process_head, process_tail, 4, 8, 1, 2, 1, 1, DECREMENT_X + .if (flags) & FLAG_PROCESS_CORRUPTS_WK0 + ldr X, [sp, #LINE_SAVED_REG_COUNT*4] .endif - conditional_process2 test_bits_3_2_ptr, mi, cs, process_head, process_tail, 4, 8, 1, 2, 1, 1, 1 .endm .macro test_bits_3_2_pix @@ -414,7 +447,7 @@ preload_middle src_bpp, SRC, 0 preload_middle mask_bpp, MASK, 0 .endif - .if (dst_r_bpp > 0) && ((SUBBLOCK % 2) == 0) + .if (dst_r_bpp > 0) && ((SUBBLOCK % 2) == 0) && (((flags) & FLAG_NO_PRELOAD_DST) == 0) /* Because we know that writes are 16-byte aligned, it's relatively easy to ensure that * destination prefetches are 32-byte aligned. It's also the easiest channel to offset * preloads for, to achieve staggered prefetches for multiple channels, because there are @@ -437,11 +470,11 @@ .if dst_r_bpp > 0 tst DST, #16 bne 111f - process_inner_loop process_head, process_tail, unaligned_src, unaligned_mask, 16 + process_inner_loop process_head, process_tail, unaligned_src, unaligned_mask, 16 + DST_PRELOAD_BIAS b 112f 111: .endif - process_inner_loop process_head, process_tail, unaligned_src, unaligned_mask, 0 + process_inner_loop process_head, process_tail, unaligned_src, unaligned_mask, 0 + DST_PRELOAD_BIAS 112: /* Just before the final (prefetch_distance+1) 32-byte blocks, deal with final preloads */ .if (src_bpp*pix_per_block > 256) || (mask_bpp*pix_per_block > 256) || (dst_r_bpp*pix_per_block > 256) @@ -449,7 +482,9 @@ .endif preload_trailing src_bpp, src_bpp_shift, SRC preload_trailing mask_bpp, mask_bpp_shift, MASK + .if ((flags) & FLAG_NO_PRELOAD_DST) == 0 preload_trailing dst_r_bpp, dst_bpp_shift, DST + .endif add X, X, #(prefetch_distance+2)*pix_per_block - 128/dst_w_bpp /* The remainder of the line is handled identically to the medium case */ medium_case_inner_loop_and_trailing_pixels process_head, process_tail,, exit_label, unaligned_src, unaligned_mask @@ -561,13 +596,7 @@ process_tail, \ process_inner_loop - .func fname - .global fname - /* For ELF format also set function visibility to hidden */ -#ifdef __ELF__ - .hidden fname - .type fname, %function -#endif + pixman_asm_function fname /* * Make some macro arguments globally visible and accessible @@ -679,16 +708,12 @@ SCRATCH .req r12 ORIG_W .req r14 /* width (pixels) */ -fname: - .fnstart - .save {r4-r11, lr} push {r4-r11, lr} /* save all registers */ subs Y, Y, #1 blo 199f #ifdef DEBUG_PARAMS - .pad #9*4 sub sp, sp, #9*4 #endif @@ -708,6 +733,13 @@ fname: #endif init + + .if (flags) & FLAG_PROCESS_CORRUPTS_WK0 + /* Reserve a word in which to store X during leading pixels */ + sub sp, sp, #4 + .set ARGS_STACK_OFFSET, ARGS_STACK_OFFSET+4 + .set LOCALS_STACK_OFFSET, LOCALS_STACK_OFFSET+4 + .endif lsl STRIDE_D, #dst_bpp_shift /* stride in bytes */ sub STRIDE_D, STRIDE_D, X, lsl #dst_bpp_shift @@ -737,42 +769,49 @@ fname: .if (flags) & FLAG_SPILL_LINE_VARS_WIDE /* This is stmdb sp!,{} */ .word 0xE92D0000 | LINE_SAVED_REGS + .set ARGS_STACK_OFFSET, ARGS_STACK_OFFSET + LINE_SAVED_REG_COUNT*4 + .set LOCALS_STACK_OFFSET, LOCALS_STACK_OFFSET + LINE_SAVED_REG_COUNT*4 .endif 151: /* New line */ newline preload_leading_step1 src_bpp, WK1, SRC preload_leading_step1 mask_bpp, WK2, MASK + .if ((flags) & FLAG_NO_PRELOAD_DST) == 0 preload_leading_step1 dst_r_bpp, WK3, DST + .endif - tst DST, #15 + ands WK0, DST, #15 beq 154f - rsb WK0, DST, #0 /* bits 0-3 = number of leading bytes until destination aligned */ - .if (src_bpp != 0 && src_bpp != 2*dst_w_bpp) || (mask_bpp != 0 && mask_bpp != 2*dst_w_bpp) - PF and, WK0, WK0, #15 - .endif + rsb WK0, WK0, #16 /* number of leading bytes until destination aligned */ preload_leading_step2 src_bpp, src_bpp_shift, WK1, SRC preload_leading_step2 mask_bpp, mask_bpp_shift, WK2, MASK + .if ((flags) & FLAG_NO_PRELOAD_DST) == 0 preload_leading_step2 dst_r_bpp, dst_bpp_shift, WK3, DST + .endif leading_15bytes process_head, process_tail 154: /* Destination now 16-byte aligned; we have at least one prefetch on each channel as well as at least one 16-byte output block */ - .if (src_bpp > 0) && (mask_bpp == 0) && ((flags) & FLAG_PROCESS_PRESERVES_SCRATCH) + .if (src_bpp > 0) && (mask_bpp == 0) && ((flags) & FLAG_PROCESS_PRESERVES_SCRATCH) and SCRATCH, SRC, #31 rsb SCRATCH, SCRATCH, #32*prefetch_distance - .elseif (src_bpp == 0) && (mask_bpp > 0) && ((flags) & FLAG_PROCESS_PRESERVES_SCRATCH) + .elseif (src_bpp == 0) && (mask_bpp > 0) && ((flags) & FLAG_PROCESS_PRESERVES_SCRATCH) and SCRATCH, MASK, #31 rsb SCRATCH, SCRATCH, #32*prefetch_distance - .endif - .ifc "process_inner_loop","" + .endif + .ifc "process_inner_loop","" switch_on_alignment wide_case_inner_loop_and_trailing_pixels, process_head, process_tail, wide_case_inner_loop, 157f - .else + .else switch_on_alignment wide_case_inner_loop_and_trailing_pixels, process_head, process_tail, process_inner_loop, 157f - .endif + .endif 157: /* Check for another line */ end_of_line 1, %((flags) & FLAG_SPILL_LINE_VARS_WIDE), 151b + .if (flags) & FLAG_SPILL_LINE_VARS_WIDE + .set ARGS_STACK_OFFSET, ARGS_STACK_OFFSET - LINE_SAVED_REG_COUNT*4 + .set LOCALS_STACK_OFFSET, LOCALS_STACK_OFFSET - LINE_SAVED_REG_COUNT*4 + .endif .endif .ltorg @@ -782,17 +821,21 @@ fname: .if (flags) & FLAG_SPILL_LINE_VARS_NON_WIDE /* This is stmdb sp!,{} */ .word 0xE92D0000 | LINE_SAVED_REGS + .set ARGS_STACK_OFFSET, ARGS_STACK_OFFSET + LINE_SAVED_REG_COUNT*4 + .set LOCALS_STACK_OFFSET, LOCALS_STACK_OFFSET + LINE_SAVED_REG_COUNT*4 .endif 161: /* New line */ newline preload_line 0, src_bpp, src_bpp_shift, SRC /* in: X, corrupts: WK0-WK1 */ preload_line 0, mask_bpp, mask_bpp_shift, MASK + .if ((flags) & FLAG_NO_PRELOAD_DST) == 0 preload_line 0, dst_r_bpp, dst_bpp_shift, DST + .endif sub X, X, #128/dst_w_bpp /* simplifies inner loop termination */ - tst DST, #15 + ands WK0, DST, #15 beq 164f - rsb WK0, DST, #0 /* bits 0-3 = number of leading bytes until destination aligned */ + rsb WK0, WK0, #16 /* number of leading bytes until destination aligned */ leading_15bytes process_head, process_tail @@ -816,7 +859,9 @@ fname: newline preload_line 1, src_bpp, src_bpp_shift, SRC /* in: X, corrupts: WK0-WK1 */ preload_line 1, mask_bpp, mask_bpp_shift, MASK + .if ((flags) & FLAG_NO_PRELOAD_DST) == 0 preload_line 1, dst_r_bpp, dst_bpp_shift, DST + .endif .if dst_w_bpp == 8 tst DST, #3 @@ -847,12 +892,22 @@ fname: 177: /* Check for another line */ end_of_line %(dst_w_bpp < 32), %((flags) & FLAG_SPILL_LINE_VARS_NON_WIDE), 171b, last_one + .if (flags) & FLAG_SPILL_LINE_VARS_NON_WIDE + .set ARGS_STACK_OFFSET, ARGS_STACK_OFFSET - LINE_SAVED_REG_COUNT*4 + .set LOCALS_STACK_OFFSET, LOCALS_STACK_OFFSET - LINE_SAVED_REG_COUNT*4 + .endif 197: .if (flags) & FLAG_SPILL_LINE_VARS add sp, sp, #LINE_SAVED_REG_COUNT*4 .endif 198: + .if (flags) & FLAG_PROCESS_CORRUPTS_WK0 + .set ARGS_STACK_OFFSET, ARGS_STACK_OFFSET-4 + .set LOCALS_STACK_OFFSET, LOCALS_STACK_OFFSET-4 + add sp, sp, #4 + .endif + cleanup #ifdef DEBUG_PARAMS @@ -860,7 +915,6 @@ fname: #endif 199: pop {r4-r11, pc} /* exit */ - .fnend .ltorg diff --git a/gfx/cairo/libpixman/src/pixman-arm-simd.c b/gfx/cairo/libpixman/src/pixman-arm-simd.c index af062e19dc..f0d14540bc 100644 --- a/gfx/cairo/libpixman/src/pixman-arm-simd.c +++ b/gfx/cairo/libpixman/src/pixman-arm-simd.c @@ -41,11 +41,20 @@ PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, src_8_8, uint8_t, 1, uint8_t, 1) PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, src_0565_8888, uint16_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, src_x888_0565, + uint32_t, 1, uint16_t, 1) PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, add_8_8, uint8_t, 1, uint8_t, 1) PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, over_8888_8888, uint32_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, in_reverse_8888_8888, + uint32_t, 1, uint32_t, 1) + +PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, armv6, over_n_8888, + uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_N_DST (0, armv6, over_reverse_n_8888, + uint32_t, 1) PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, armv6, over_8888_n_8888, uint32_t, 1, uint32_t, 1) @@ -53,6 +62,9 @@ PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, armv6, over_8888_n_8888, PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, armv6, over_n_8_8888, uint8_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, armv6, over_n_8888_8888_ca, + uint32_t, 1, uint32_t, 1) + PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (armv6, 0565_0565, SRC, uint16_t, uint16_t) PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (armv6, 8888_8888, SRC, @@ -216,6 +228,11 @@ static const pixman_fast_path_t arm_simd_fast_paths[] = PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, a8b8g8r8, armv6_composite_src_0565_8888), PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, x8b8g8r8, armv6_composite_src_0565_8888), + PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, r5g6b5, armv6_composite_src_x888_0565), + PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, r5g6b5, armv6_composite_src_x888_0565), + PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, b5g6r5, armv6_composite_src_x888_0565), + PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, b5g6r5, armv6_composite_src_x888_0565), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, armv6_composite_over_8888_8888), PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, armv6_composite_over_8888_8888), PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, armv6_composite_over_8888_8888), @@ -225,6 +242,13 @@ static const pixman_fast_path_t arm_simd_fast_paths[] = PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, a8b8g8r8, armv6_composite_over_8888_n_8888), PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, x8b8g8r8, armv6_composite_over_8888_n_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, null, a8r8g8b8, armv6_composite_over_n_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, null, x8r8g8b8, armv6_composite_over_n_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, null, a8b8g8r8, armv6_composite_over_n_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, null, x8b8g8r8, armv6_composite_over_n_8888), + PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8r8g8b8, armv6_composite_over_reverse_n_8888), + PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, armv6_composite_over_reverse_n_8888), + PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, armv6_composite_add_8_8), PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, armv6_composite_over_n_8_8888), @@ -232,15 +256,25 @@ static const pixman_fast_path_t arm_simd_fast_paths[] = PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, armv6_composite_over_n_8_8888), PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, armv6_composite_over_n_8_8888), - PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, r5g6b5, armv6_0565_0565), - PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, b5g6r5, b5g6r5, armv6_0565_0565), + PIXMAN_STD_FAST_PATH (IN_REVERSE, a8r8g8b8, null, a8r8g8b8, armv6_composite_in_reverse_8888_8888), + PIXMAN_STD_FAST_PATH (IN_REVERSE, a8r8g8b8, null, x8r8g8b8, armv6_composite_in_reverse_8888_8888), + PIXMAN_STD_FAST_PATH (IN_REVERSE, a8b8g8r8, null, a8b8g8r8, armv6_composite_in_reverse_8888_8888), + PIXMAN_STD_FAST_PATH (IN_REVERSE, a8b8g8r8, null, x8b8g8r8, armv6_composite_in_reverse_8888_8888), + + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, armv6_composite_over_n_8888_8888_ca), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, armv6_composite_over_n_8888_8888_ca), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, armv6_composite_over_n_8888_8888_ca), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, armv6_composite_over_n_8888_8888_ca), + + SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, r5g6b5, armv6_0565_0565), + SIMPLE_NEAREST_FAST_PATH (SRC, b5g6r5, b5g6r5, armv6_0565_0565), - PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, armv6_8888_8888), - PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, armv6_8888_8888), - PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, armv6_8888_8888), - PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8, armv6_8888_8888), - PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8, armv6_8888_8888), - PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, armv6_8888_8888), + SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, armv6_8888_8888), + SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, armv6_8888_8888), + SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, armv6_8888_8888), + SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8, armv6_8888_8888), + SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8, armv6_8888_8888), + SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, armv6_8888_8888), { PIXMAN_OP_NONE }, }; diff --git a/gfx/cairo/libpixman/src/pixman-arm.c b/gfx/cairo/libpixman/src/pixman-arm.c index 358372e50a..4a2ae85393 100644 --- a/gfx/cairo/libpixman/src/pixman-arm.c +++ b/gfx/cairo/libpixman/src/pixman-arm.c @@ -1,20 +1,19 @@ /* * Copyright © 2000 SuSE, Inc. * Copyright © 2007 Red Hat, Inc. - * Copyright © 2021 Moonchild Productions * * Permission to use, copy, modify, distribute, and sell this software and its * documentation for any purpose is hereby granted without fee, provided that * the above copyright notice appear in all copies and that both that * copyright notice and this permission notice appear in supporting - * documentation, and that the names of the authors not be used in advertising or + * documentation, and that the name of SuSE not be used in advertising or * publicity pertaining to distribution of the software without specific, - * written prior permission. The authors make no representations about the + * written prior permission. SuSE makes no representations about the * suitability of this software for any purpose. It is provided "as is" * without express or implied warranty. * - * THE AUTHORS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL THE AUTHORS + * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN @@ -95,6 +94,35 @@ detect_cpu_features (void) return features; } +#elif defined(__ANDROID__) || defined(ANDROID) /* Android */ + +#include <cpu-features.h> + +static arm_cpu_features_t +detect_cpu_features (void) +{ + arm_cpu_features_t features = 0; + AndroidCpuFamily cpu_family; + uint64_t cpu_features; + + cpu_family = android_getCpuFamily(); + cpu_features = android_getCpuFeatures(); + + if (cpu_family == ANDROID_CPU_FAMILY_ARM) + { + if (cpu_features & ANDROID_CPU_ARM_FEATURE_ARMv7) + features |= ARM_V7; + + if (cpu_features & ANDROID_CPU_ARM_FEATURE_VFPv3) + features |= ARM_VFP; + + if (cpu_features & ANDROID_CPU_ARM_FEATURE_NEON) + features |= ARM_NEON; + } + + return features; +} + #elif defined (__linux__) /* linux ELF */ #include <unistd.h> @@ -148,6 +176,31 @@ detect_cpu_features (void) return features; } +#elif defined (_3DS) /* 3DS homebrew (devkitARM) */ + +static arm_cpu_features_t +detect_cpu_features (void) +{ + arm_cpu_features_t features = 0; + + features |= ARM_V6; + + return features; +} + +#elif defined (PSP2) || defined (__SWITCH__) +/* Vita (VitaSDK) or Switch (devkitA64) homebrew */ + +static arm_cpu_features_t +detect_cpu_features (void) +{ + arm_cpu_features_t features = 0; + + features |= ARM_NEON; + + return features; +} + #else /* Unknown */ static arm_cpu_features_t diff --git a/gfx/cairo/libpixman/src/pixman-bits-image.c b/gfx/cairo/libpixman/src/pixman-bits-image.c index e9d2fb69c6..4cfabe318a 100644 --- a/gfx/cairo/libpixman/src/pixman-bits-image.c +++ b/gfx/cairo/libpixman/src/pixman-bits-image.c @@ -35,44 +35,47 @@ #include "pixman-private.h" #include "pixman-combine32.h" #include "pixman-inlines.h" +#include "dither/blue-noise-64x64.h" -static uint32_t * -_pixman_image_get_scanline_generic_float (pixman_iter_t * iter, - const uint32_t *mask) -{ - pixman_iter_get_scanline_t fetch_32 = iter->data; - uint32_t *buffer = iter->buffer; - - fetch_32 (iter, NULL); +/* Fetch functions */ - pixman_expand_to_float ((argb_t *)buffer, buffer, PIXMAN_a8r8g8b8, iter->width); +static force_inline void +fetch_pixel_no_alpha_32 (bits_image_t *image, + int x, int y, pixman_bool_t check_bounds, + void *out) +{ + uint32_t *ret = out; - return iter->buffer; + if (check_bounds && + (x < 0 || x >= image->width || y < 0 || y >= image->height)) + *ret = 0; + else + *ret = image->fetch_pixel_32 (image, x, y); } -/* Fetch functions */ - -static force_inline uint32_t -fetch_pixel_no_alpha (bits_image_t *image, - int x, int y, pixman_bool_t check_bounds) +static force_inline void +fetch_pixel_no_alpha_float (bits_image_t *image, + int x, int y, pixman_bool_t check_bounds, + void *out) { + argb_t *ret = out; + if (check_bounds && (x < 0 || x >= image->width || y < 0 || y >= image->height)) - { - return 0; - } - - return image->fetch_pixel_32 (image, x, y); + ret->a = ret->r = ret->g = ret->b = 0.f; + else + *ret = image->fetch_pixel_float (image, x, y); } -typedef uint32_t (* get_pixel_t) (bits_image_t *image, - int x, int y, pixman_bool_t check_bounds); +typedef void (* get_pixel_t) (bits_image_t *image, + int x, int y, pixman_bool_t check_bounds, void *out); -static force_inline uint32_t +static force_inline void bits_image_fetch_pixel_nearest (bits_image_t *image, pixman_fixed_t x, pixman_fixed_t y, - get_pixel_t get_pixel) + get_pixel_t get_pixel, + void *out) { int x0 = pixman_fixed_to_int (x - pixman_fixed_e); int y0 = pixman_fixed_to_int (y - pixman_fixed_e); @@ -82,19 +85,20 @@ bits_image_fetch_pixel_nearest (bits_image_t *image, repeat (image->common.repeat, &x0, image->width); repeat (image->common.repeat, &y0, image->height); - return get_pixel (image, x0, y0, FALSE); + get_pixel (image, x0, y0, FALSE, out); } else { - return get_pixel (image, x0, y0, TRUE); + get_pixel (image, x0, y0, TRUE, out); } } -static force_inline uint32_t -bits_image_fetch_pixel_bilinear (bits_image_t *image, - pixman_fixed_t x, - pixman_fixed_t y, - get_pixel_t get_pixel) +static force_inline void +bits_image_fetch_pixel_bilinear_32 (bits_image_t *image, + pixman_fixed_t x, + pixman_fixed_t y, + get_pixel_t get_pixel, + void *out) { pixman_repeat_t repeat_mode = image->common.repeat; int width = image->width; @@ -102,6 +106,7 @@ bits_image_fetch_pixel_bilinear (bits_image_t *image, int x1, y1, x2, y2; uint32_t tl, tr, bl, br; int32_t distx, disty; + uint32_t *ret = out; x1 = x - pixman_fixed_1 / 2; y1 = y - pixman_fixed_1 / 2; @@ -121,242 +126,142 @@ bits_image_fetch_pixel_bilinear (bits_image_t *image, repeat (repeat_mode, &x2, width); repeat (repeat_mode, &y2, height); - tl = get_pixel (image, x1, y1, FALSE); - bl = get_pixel (image, x1, y2, FALSE); - tr = get_pixel (image, x2, y1, FALSE); - br = get_pixel (image, x2, y2, FALSE); + get_pixel (image, x1, y1, FALSE, &tl); + get_pixel (image, x2, y1, FALSE, &tr); + get_pixel (image, x1, y2, FALSE, &bl); + get_pixel (image, x2, y2, FALSE, &br); } else { - tl = get_pixel (image, x1, y1, TRUE); - tr = get_pixel (image, x2, y1, TRUE); - bl = get_pixel (image, x1, y2, TRUE); - br = get_pixel (image, x2, y2, TRUE); + get_pixel (image, x1, y1, TRUE, &tl); + get_pixel (image, x2, y1, TRUE, &tr); + get_pixel (image, x1, y2, TRUE, &bl); + get_pixel (image, x2, y2, TRUE, &br); } - return bilinear_interpolation (tl, tr, bl, br, distx, disty); + *ret = bilinear_interpolation (tl, tr, bl, br, distx, disty); } -static uint32_t * -bits_image_fetch_bilinear_no_repeat_8888 (pixman_iter_t *iter, - const uint32_t *mask) +static force_inline void +bits_image_fetch_pixel_bilinear_float (bits_image_t *image, + pixman_fixed_t x, + pixman_fixed_t y, + get_pixel_t get_pixel, + void *out) { + pixman_repeat_t repeat_mode = image->common.repeat; + int width = image->width; + int height = image->height; + int x1, y1, x2, y2; + argb_t tl, tr, bl, br; + float distx, disty; + argb_t *ret = out; - pixman_image_t * ima = iter->image; - int offset = iter->x; - int line = iter->y++; - int width = iter->width; - uint32_t * buffer = iter->buffer; - - bits_image_t *bits = &ima->bits; - pixman_fixed_t x_top, x_bottom, x; - pixman_fixed_t ux_top, ux_bottom, ux; - pixman_vector_t v; - uint32_t top_mask, bottom_mask; - uint32_t *top_row; - uint32_t *bottom_row; - uint32_t *end; - uint32_t zero[2] = { 0, 0 }; - uint32_t one = 1; - int y, y1, y2; - int disty; - int mask_inc; - int w; - - /* reference point is the center of the pixel */ - v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2; - v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2; - v.vector[2] = pixman_fixed_1; - - if (!pixman_transform_point_3d (bits->common.transform, &v)) - return iter->buffer; - - ux = ux_top = ux_bottom = bits->common.transform->matrix[0][0]; - x = x_top = x_bottom = v.vector[0] - pixman_fixed_1/2; + x1 = x - pixman_fixed_1 / 2; + y1 = y - pixman_fixed_1 / 2; - y = v.vector[1] - pixman_fixed_1/2; - disty = pixman_fixed_to_bilinear_weight (y); + distx = ((float)pixman_fixed_fraction(x1)) / 65536.f; + disty = ((float)pixman_fixed_fraction(y1)) / 65536.f; - /* Load the pointers to the first and second lines from the source - * image that bilinear code must read. - * - * The main trick in this code is about the check if any line are - * outside of the image; - * - * When I realize that a line (any one) is outside, I change - * the pointer to a dummy area with zeros. Once I change this, I - * must be sure the pointer will not change, so I set the - * variables to each pointer increments inside the loop. - */ - y1 = pixman_fixed_to_int (y); + x1 = pixman_fixed_to_int (x1); + y1 = pixman_fixed_to_int (y1); + x2 = x1 + 1; y2 = y1 + 1; - if (y1 < 0 || y1 >= bits->height) - { - top_row = zero; - x_top = 0; - ux_top = 0; - } - else - { - top_row = bits->bits + y1 * bits->rowstride; - x_top = x; - ux_top = ux; - } - - if (y2 < 0 || y2 >= bits->height) - { - bottom_row = zero; - x_bottom = 0; - ux_bottom = 0; - } - else - { - bottom_row = bits->bits + y2 * bits->rowstride; - x_bottom = x; - ux_bottom = ux; - } - - /* Instead of checking whether the operation uses the mast in - * each loop iteration, verify this only once and prepare the - * variables to make the code smaller inside the loop. - */ - if (!mask) - { - mask_inc = 0; - mask = &one; - } - else + if (repeat_mode != PIXMAN_REPEAT_NONE) { - /* If have a mask, prepare the variables to check it */ - mask_inc = 1; - } + repeat (repeat_mode, &x1, width); + repeat (repeat_mode, &y1, height); + repeat (repeat_mode, &x2, width); + repeat (repeat_mode, &y2, height); - /* If both are zero, then the whole thing is zero */ - if (top_row == zero && bottom_row == zero) - { - memset (buffer, 0, width * sizeof (uint32_t)); - return iter->buffer; - } - else if (bits->format == PIXMAN_x8r8g8b8) - { - if (top_row == zero) - { - top_mask = 0; - bottom_mask = 0xff000000; - } - else if (bottom_row == zero) - { - top_mask = 0xff000000; - bottom_mask = 0; - } - else - { - top_mask = 0xff000000; - bottom_mask = 0xff000000; - } + get_pixel (image, x1, y1, FALSE, &tl); + get_pixel (image, x2, y1, FALSE, &tr); + get_pixel (image, x1, y2, FALSE, &bl); + get_pixel (image, x2, y2, FALSE, &br); } else { - top_mask = 0; - bottom_mask = 0; - } - - end = buffer + width; - - /* Zero fill to the left of the image */ - while (buffer < end && x < pixman_fixed_minus_1) - { - *buffer++ = 0; - x += ux; - x_top += ux_top; - x_bottom += ux_bottom; - mask += mask_inc; - } - - /* Left edge - */ - while (buffer < end && x < 0) - { - uint32_t tr, br; - int32_t distx; - - tr = top_row[pixman_fixed_to_int (x_top) + 1] | top_mask; - br = bottom_row[pixman_fixed_to_int (x_bottom) + 1] | bottom_mask; - - distx = pixman_fixed_to_bilinear_weight (x); - - *buffer++ = bilinear_interpolation (0, tr, 0, br, distx, disty); - - x += ux; - x_top += ux_top; - x_bottom += ux_bottom; - mask += mask_inc; + get_pixel (image, x1, y1, TRUE, &tl); + get_pixel (image, x2, y1, TRUE, &tr); + get_pixel (image, x1, y2, TRUE, &bl); + get_pixel (image, x2, y2, TRUE, &br); } - /* Main part */ - w = pixman_int_to_fixed (bits->width - 1); + *ret = bilinear_interpolation_float (tl, tr, bl, br, distx, disty); +} - while (buffer < end && x < w) - { - if (*mask) - { - uint32_t tl, tr, bl, br; - int32_t distx; +static force_inline void accum_32(unsigned int *satot, unsigned int *srtot, + unsigned int *sgtot, unsigned int *sbtot, + const void *p, pixman_fixed_t f) +{ + uint32_t pixel = *(uint32_t *)p; - tl = top_row [pixman_fixed_to_int (x_top)] | top_mask; - tr = top_row [pixman_fixed_to_int (x_top) + 1] | top_mask; - bl = bottom_row [pixman_fixed_to_int (x_bottom)] | bottom_mask; - br = bottom_row [pixman_fixed_to_int (x_bottom) + 1] | bottom_mask; + *srtot += (int)RED_8 (pixel) * f; + *sgtot += (int)GREEN_8 (pixel) * f; + *sbtot += (int)BLUE_8 (pixel) * f; + *satot += (int)ALPHA_8 (pixel) * f; +} - distx = pixman_fixed_to_bilinear_weight (x); +static force_inline void reduce_32(unsigned int satot, unsigned int srtot, + unsigned int sgtot, unsigned int sbtot, + void *p) +{ + uint32_t *ret = p; - *buffer = bilinear_interpolation (tl, tr, bl, br, distx, disty); - } + satot = (satot + 0x8000) >> 16; + srtot = (srtot + 0x8000) >> 16; + sgtot = (sgtot + 0x8000) >> 16; + sbtot = (sbtot + 0x8000) >> 16; - buffer++; - x += ux; - x_top += ux_top; - x_bottom += ux_bottom; - mask += mask_inc; - } + satot = CLIP (satot, 0, 0xff); + srtot = CLIP (srtot, 0, 0xff); + sgtot = CLIP (sgtot, 0, 0xff); + sbtot = CLIP (sbtot, 0, 0xff); - /* Right Edge */ - w = pixman_int_to_fixed (bits->width); - while (buffer < end && x < w) - { - if (*mask) - { - uint32_t tl, bl; - int32_t distx; + *ret = ((satot << 24) | (srtot << 16) | (sgtot << 8) | (sbtot)); +} - tl = top_row [pixman_fixed_to_int (x_top)] | top_mask; - bl = bottom_row [pixman_fixed_to_int (x_bottom)] | bottom_mask; +static force_inline void accum_float(unsigned int *satot, unsigned int *srtot, + unsigned int *sgtot, unsigned int *sbtot, + const void *p, pixman_fixed_t f) +{ + const argb_t *pixel = p; - distx = pixman_fixed_to_bilinear_weight (x); + *satot += pixel->a * f; + *srtot += pixel->r * f; + *sgtot += pixel->g * f; + *sbtot += pixel->b * f; +} - *buffer = bilinear_interpolation (tl, 0, bl, 0, distx, disty); - } +static force_inline void reduce_float(unsigned int satot, unsigned int srtot, + unsigned int sgtot, unsigned int sbtot, + void *p) +{ + argb_t *ret = p; - buffer++; - x += ux; - x_top += ux_top; - x_bottom += ux_bottom; - mask += mask_inc; - } + ret->a = CLIP (satot / 65536.f, 0.f, 1.f); + ret->r = CLIP (srtot / 65536.f, 0.f, 1.f); + ret->g = CLIP (sgtot / 65536.f, 0.f, 1.f); + ret->b = CLIP (sbtot / 65536.f, 0.f, 1.f); +} - /* Zero fill to the left of the image */ - while (buffer < end) - *buffer++ = 0; +typedef void (* accumulate_pixel_t) (unsigned int *satot, unsigned int *srtot, + unsigned int *sgtot, unsigned int *sbtot, + const void *pixel, pixman_fixed_t f); - return iter->buffer; -} +typedef void (* reduce_pixel_t) (unsigned int satot, unsigned int srtot, + unsigned int sgtot, unsigned int sbtot, + void *out); -static force_inline uint32_t +static force_inline void bits_image_fetch_pixel_convolution (bits_image_t *image, pixman_fixed_t x, pixman_fixed_t y, - get_pixel_t get_pixel) + get_pixel_t get_pixel, + void *out, + accumulate_pixel_t accum, + reduce_pixel_t reduce) { pixman_fixed_t *params = image->common.filter_params; int x_off = (params[0] - pixman_fixed_1) >> 1; @@ -367,7 +272,7 @@ bits_image_fetch_pixel_convolution (bits_image_t *image, pixman_repeat_t repeat_mode = image->common.repeat; int width = image->width; int height = image->height; - int srtot, sgtot, sbtot, satot; + unsigned int srtot, sgtot, sbtot, satot; params += 2; @@ -389,48 +294,39 @@ bits_image_fetch_pixel_convolution (bits_image_t *image, if (f) { - uint32_t pixel; + /* Must be big enough to hold a argb_t */ + argb_t pixel; if (repeat_mode != PIXMAN_REPEAT_NONE) { repeat (repeat_mode, &rx, width); repeat (repeat_mode, &ry, height); - pixel = get_pixel (image, rx, ry, FALSE); + get_pixel (image, rx, ry, FALSE, &pixel); } else { - pixel = get_pixel (image, rx, ry, TRUE); + get_pixel (image, rx, ry, TRUE, &pixel); } - srtot += (int)RED_8 (pixel) * f; - sgtot += (int)GREEN_8 (pixel) * f; - sbtot += (int)BLUE_8 (pixel) * f; - satot += (int)ALPHA_8 (pixel) * f; + accum (&satot, &srtot, &sgtot, &sbtot, &pixel, f); } params++; } } - satot = (satot + 0x8000) >> 16; - srtot = (srtot + 0x8000) >> 16; - sgtot = (sgtot + 0x8000) >> 16; - sbtot = (sbtot + 0x8000) >> 16; - - satot = CLIP (satot, 0, 0xff); - srtot = CLIP (srtot, 0, 0xff); - sgtot = CLIP (sgtot, 0, 0xff); - sbtot = CLIP (sbtot, 0, 0xff); - - return ((satot << 24) | (srtot << 16) | (sgtot << 8) | (sbtot)); + reduce (satot, srtot, sgtot, sbtot, out); } -static uint32_t -bits_image_fetch_pixel_separable_convolution (bits_image_t *image, - pixman_fixed_t x, - pixman_fixed_t y, - get_pixel_t get_pixel) +static void +bits_image_fetch_pixel_separable_convolution (bits_image_t *image, + pixman_fixed_t x, + pixman_fixed_t y, + get_pixel_t get_pixel, + void *out, + accumulate_pixel_t accum, + reduce_pixel_t reduce) { pixman_fixed_t *params = image->common.filter_params; pixman_repeat_t repeat_mode = image->common.repeat; @@ -445,7 +341,7 @@ bits_image_fetch_pixel_separable_convolution (bits_image_t *image, int x_off = ((cwidth << 16) - pixman_fixed_1) >> 1; int y_off = ((cheight << 16) - pixman_fixed_1) >> 1; pixman_fixed_t *y_params; - int srtot, sgtot, sbtot, satot; + unsigned int srtot, sgtot, sbtot, satot; int32_t x1, x2, y1, y2; int32_t px, py; int i, j; @@ -485,82 +381,100 @@ bits_image_fetch_pixel_separable_convolution (bits_image_t *image, if (fx) { + /* Must be big enough to hold a argb_t */ + argb_t pixel; pixman_fixed_t f; - uint32_t pixel; if (repeat_mode != PIXMAN_REPEAT_NONE) { repeat (repeat_mode, &rx, width); repeat (repeat_mode, &ry, height); - pixel = get_pixel (image, rx, ry, FALSE); + get_pixel (image, rx, ry, FALSE, &pixel); } else { - pixel = get_pixel (image, rx, ry, TRUE); + get_pixel (image, rx, ry, TRUE, &pixel); } f = (fy * fx + 0x8000) >> 16; - srtot += (int)RED_8 (pixel) * f; - sgtot += (int)GREEN_8 (pixel) * f; - sbtot += (int)BLUE_8 (pixel) * f; - satot += (int)ALPHA_8 (pixel) * f; + accum(&satot, &srtot, &sgtot, &sbtot, &pixel, f); } } } } - satot = (satot + 0x8000) >> 16; - srtot = (srtot + 0x8000) >> 16; - sgtot = (sgtot + 0x8000) >> 16; - sbtot = (sbtot + 0x8000) >> 16; - - satot = CLIP (satot, 0, 0xff); - srtot = CLIP (srtot, 0, 0xff); - sgtot = CLIP (sgtot, 0, 0xff); - sbtot = CLIP (sbtot, 0, 0xff); - return ((satot << 24) | (srtot << 16) | (sgtot << 8) | (sbtot)); + reduce(satot, srtot, sgtot, sbtot, out); } -static force_inline uint32_t -bits_image_fetch_pixel_filtered (bits_image_t *image, +static force_inline void +bits_image_fetch_pixel_filtered (bits_image_t *image, + pixman_bool_t wide, pixman_fixed_t x, pixman_fixed_t y, - get_pixel_t get_pixel) + get_pixel_t get_pixel, + void *out) { switch (image->common.filter) { case PIXMAN_FILTER_NEAREST: case PIXMAN_FILTER_FAST: - return bits_image_fetch_pixel_nearest (image, x, y, get_pixel); + bits_image_fetch_pixel_nearest (image, x, y, get_pixel, out); break; case PIXMAN_FILTER_BILINEAR: case PIXMAN_FILTER_GOOD: case PIXMAN_FILTER_BEST: - return bits_image_fetch_pixel_bilinear (image, x, y, get_pixel); + if (wide) + bits_image_fetch_pixel_bilinear_float (image, x, y, get_pixel, out); + else + bits_image_fetch_pixel_bilinear_32 (image, x, y, get_pixel, out); break; case PIXMAN_FILTER_CONVOLUTION: - return bits_image_fetch_pixel_convolution (image, x, y, get_pixel); + if (wide) + { + bits_image_fetch_pixel_convolution (image, x, y, + get_pixel, out, + accum_float, + reduce_float); + } + else + { + bits_image_fetch_pixel_convolution (image, x, y, + get_pixel, out, + accum_32, reduce_32); + } break; case PIXMAN_FILTER_SEPARABLE_CONVOLUTION: - return bits_image_fetch_pixel_separable_convolution (image, x, y, get_pixel); + if (wide) + { + bits_image_fetch_pixel_separable_convolution (image, x, y, + get_pixel, out, + accum_float, + reduce_float); + } + else + { + bits_image_fetch_pixel_separable_convolution (image, x, y, + get_pixel, out, + accum_32, reduce_32); + } break; default: + assert (0); break; } - - return 0; } static uint32_t * -bits_image_fetch_affine_no_alpha (pixman_iter_t * iter, - const uint32_t * mask) +__bits_image_fetch_affine_no_alpha (pixman_iter_t * iter, + pixman_bool_t wide, + const uint32_t * mask) { pixman_image_t *image = iter->image; int offset = iter->x; @@ -572,6 +486,8 @@ bits_image_fetch_affine_no_alpha (pixman_iter_t * iter, pixman_fixed_t ux, uy; pixman_vector_t v; int i; + get_pixel_t get_pixel = + wide ? fetch_pixel_no_alpha_float : fetch_pixel_no_alpha_32; /* reference point is the center of the pixel */ v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2; @@ -599,27 +515,45 @@ bits_image_fetch_affine_no_alpha (pixman_iter_t * iter, { if (!mask || mask[i]) { - buffer[i] = bits_image_fetch_pixel_filtered ( - &image->bits, x, y, fetch_pixel_no_alpha); + bits_image_fetch_pixel_filtered ( + &image->bits, wide, x, y, get_pixel, buffer); } x += ux; y += uy; + buffer += wide ? 4 : 1; } - return buffer; + return iter->buffer; +} + +static uint32_t * +bits_image_fetch_affine_no_alpha_32 (pixman_iter_t *iter, + const uint32_t *mask) +{ + return __bits_image_fetch_affine_no_alpha(iter, FALSE, mask); +} + +static uint32_t * +bits_image_fetch_affine_no_alpha_float (pixman_iter_t *iter, + const uint32_t *mask) +{ + return __bits_image_fetch_affine_no_alpha(iter, TRUE, mask); } /* General fetcher */ -static force_inline uint32_t -fetch_pixel_general (bits_image_t *image, int x, int y, pixman_bool_t check_bounds) +static force_inline void +fetch_pixel_general_32 (bits_image_t *image, + int x, int y, pixman_bool_t check_bounds, + void *out) { - uint32_t pixel; + uint32_t pixel, *ret = out; if (check_bounds && (x < 0 || x >= image->width || y < 0 || y >= image->height)) { - return 0; + *ret = 0; + return; } pixel = image->fetch_pixel_32 (image, x, y); @@ -648,18 +582,59 @@ fetch_pixel_general (bits_image_t *image, int x, int y, pixman_bool_t check_boun pixel |= (pixel_a << 24); } - return pixel; + *ret = pixel; +} + +static force_inline void +fetch_pixel_general_float (bits_image_t *image, + int x, int y, pixman_bool_t check_bounds, + void *out) +{ + argb_t *ret = out; + + if (check_bounds && + (x < 0 || x >= image->width || y < 0 || y >= image->height)) + { + ret->a = ret->r = ret->g = ret->b = 0; + return; + } + + *ret = image->fetch_pixel_float (image, x, y); + + if (image->common.alpha_map) + { + x -= image->common.alpha_origin_x; + y -= image->common.alpha_origin_y; + + if (x < 0 || x >= image->common.alpha_map->width || + y < 0 || y >= image->common.alpha_map->height) + { + ret->a = 0.f; + } + else + { + argb_t alpha; + + alpha = image->common.alpha_map->fetch_pixel_float ( + image->common.alpha_map, x, y); + + ret->a = alpha.a; + } + } } static uint32_t * -bits_image_fetch_general (pixman_iter_t *iter, - const uint32_t *mask) +__bits_image_fetch_general (pixman_iter_t *iter, + pixman_bool_t wide, + const uint32_t *mask) { pixman_image_t *image = iter->image; int offset = iter->x; int line = iter->y++; int width = iter->width; uint32_t * buffer = iter->buffer; + get_pixel_t get_pixel = + wide ? fetch_pixel_general_float : fetch_pixel_general_32; pixman_fixed_t x, y, w; pixman_fixed_t ux, uy, uw; @@ -699,8 +674,8 @@ bits_image_fetch_general (pixman_iter_t *iter, { if (w != 0) { - x0 = ((pixman_fixed_48_16_t)x << 16) / w; - y0 = ((pixman_fixed_48_16_t)y << 16) / w; + x0 = ((uint64_t)x << 16) / w; + y0 = ((uint64_t)y << 16) / w; } else { @@ -708,484 +683,33 @@ bits_image_fetch_general (pixman_iter_t *iter, y0 = 0; } - buffer[i] = bits_image_fetch_pixel_filtered ( - &image->bits, x0, y0, fetch_pixel_general); + bits_image_fetch_pixel_filtered ( + &image->bits, wide, x0, y0, get_pixel, buffer); } x += ux; y += uy; w += uw; + buffer += wide ? 4 : 1; } - return buffer; -} - -typedef uint32_t (* convert_pixel_t) (const uint8_t *row, int x); - -static force_inline void -bits_image_fetch_separable_convolution_affine (pixman_image_t * image, - int offset, - int line, - int width, - uint32_t * buffer, - const uint32_t * mask, - - convert_pixel_t convert_pixel, - pixman_format_code_t format, - pixman_repeat_t repeat_mode) -{ - bits_image_t *bits = &image->bits; - pixman_fixed_t *params = image->common.filter_params; - int cwidth = pixman_fixed_to_int (params[0]); - int cheight = pixman_fixed_to_int (params[1]); - int x_off = ((cwidth << 16) - pixman_fixed_1) >> 1; - int y_off = ((cheight << 16) - pixman_fixed_1) >> 1; - int x_phase_bits = pixman_fixed_to_int (params[2]); - int y_phase_bits = pixman_fixed_to_int (params[3]); - int x_phase_shift = 16 - x_phase_bits; - int y_phase_shift = 16 - y_phase_bits; - pixman_fixed_t vx, vy; - pixman_fixed_t ux, uy; - pixman_vector_t v; - int k; - - /* reference point is the center of the pixel */ - v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2; - v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2; - v.vector[2] = pixman_fixed_1; - - if (!pixman_transform_point_3d (image->common.transform, &v)) - return; - - ux = image->common.transform->matrix[0][0]; - uy = image->common.transform->matrix[1][0]; - - vx = v.vector[0]; - vy = v.vector[1]; - - for (k = 0; k < width; ++k) - { - pixman_fixed_t *y_params; - int satot, srtot, sgtot, sbtot; - pixman_fixed_t x, y; - int32_t x1, x2, y1, y2; - int32_t px, py; - int i, j; - - if (mask && !mask[k]) - goto next; - - /* Round x and y to the middle of the closest phase before continuing. This - * ensures that the convolution matrix is aligned right, since it was - * positioned relative to a particular phase (and not relative to whatever - * exact fraction we happen to get here). - */ - x = ((vx >> x_phase_shift) << x_phase_shift) + ((1 << x_phase_shift) >> 1); - y = ((vy >> y_phase_shift) << y_phase_shift) + ((1 << y_phase_shift) >> 1); - - px = (x & 0xffff) >> x_phase_shift; - py = (y & 0xffff) >> y_phase_shift; - - x1 = pixman_fixed_to_int (x - pixman_fixed_e - x_off); - y1 = pixman_fixed_to_int (y - pixman_fixed_e - y_off); - x2 = x1 + cwidth; - y2 = y1 + cheight; - - satot = srtot = sgtot = sbtot = 0; - - y_params = params + 4 + (1 << x_phase_bits) * cwidth + py * cheight; - - for (i = y1; i < y2; ++i) - { - pixman_fixed_t fy = *y_params++; - - if (fy) - { - pixman_fixed_t *x_params = params + 4 + px * cwidth; - - for (j = x1; j < x2; ++j) - { - pixman_fixed_t fx = *x_params++; - int rx = j; - int ry = i; - - if (fx) - { - pixman_fixed_t f; - uint32_t pixel, mask; - uint8_t *row; - - mask = PIXMAN_FORMAT_A (format)? 0 : 0xff000000; - - if (repeat_mode != PIXMAN_REPEAT_NONE) - { - repeat (repeat_mode, &rx, bits->width); - repeat (repeat_mode, &ry, bits->height); - - row = (uint8_t *)bits->bits + bits->rowstride * 4 * ry; - pixel = convert_pixel (row, rx) | mask; - } - else - { - if (rx < 0 || ry < 0 || rx >= bits->width || ry >= bits->height) - { - pixel = 0; - } - else - { - row = (uint8_t *)bits->bits + bits->rowstride * 4 * ry; - pixel = convert_pixel (row, rx) | mask; - } - } - - f = ((pixman_fixed_32_32_t)fx * fy + 0x8000) >> 16; - srtot += (int)RED_8 (pixel) * f; - sgtot += (int)GREEN_8 (pixel) * f; - sbtot += (int)BLUE_8 (pixel) * f; - satot += (int)ALPHA_8 (pixel) * f; - } - } - } - } - - satot = (satot + 0x8000) >> 16; - srtot = (srtot + 0x8000) >> 16; - sgtot = (sgtot + 0x8000) >> 16; - sbtot = (sbtot + 0x8000) >> 16; - - satot = CLIP (satot, 0, 0xff); - srtot = CLIP (srtot, 0, 0xff); - sgtot = CLIP (sgtot, 0, 0xff); - sbtot = CLIP (sbtot, 0, 0xff); - - buffer[k] = (satot << 24) | (srtot << 16) | (sgtot << 8) | (sbtot << 0); - - next: - vx += ux; - vy += uy; - } -} - -static const uint8_t zero[8] = { 0, 0, 0, 0, 0, 0, 0, 0 }; - -static force_inline void -bits_image_fetch_bilinear_affine (pixman_image_t * image, - int offset, - int line, - int width, - uint32_t * buffer, - const uint32_t * mask, - - convert_pixel_t convert_pixel, - pixman_format_code_t format, - pixman_repeat_t repeat_mode) -{ - pixman_fixed_t x, y; - pixman_fixed_t ux, uy; - pixman_vector_t v; - bits_image_t *bits = &image->bits; - int i; - - /* reference point is the center of the pixel */ - v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2; - v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2; - v.vector[2] = pixman_fixed_1; - - if (!pixman_transform_point_3d (image->common.transform, &v)) - return; - - ux = image->common.transform->matrix[0][0]; - uy = image->common.transform->matrix[1][0]; - - x = v.vector[0]; - y = v.vector[1]; - - for (i = 0; i < width; ++i) - { - int x1, y1, x2, y2; - uint32_t tl, tr, bl, br; - int32_t distx, disty; - int width = image->bits.width; - int height = image->bits.height; - const uint8_t *row1; - const uint8_t *row2; - - if (mask && !mask[i]) - goto next; - - x1 = x - pixman_fixed_1 / 2; - y1 = y - pixman_fixed_1 / 2; - - distx = pixman_fixed_to_bilinear_weight (x1); - disty = pixman_fixed_to_bilinear_weight (y1); - - y1 = pixman_fixed_to_int (y1); - y2 = y1 + 1; - x1 = pixman_fixed_to_int (x1); - x2 = x1 + 1; - - if (repeat_mode != PIXMAN_REPEAT_NONE) - { - uint32_t mask; - - mask = PIXMAN_FORMAT_A (format)? 0 : 0xff000000; - - repeat (repeat_mode, &x1, width); - repeat (repeat_mode, &y1, height); - repeat (repeat_mode, &x2, width); - repeat (repeat_mode, &y2, height); - - row1 = (uint8_t *)bits->bits + bits->rowstride * 4 * y1; - row2 = (uint8_t *)bits->bits + bits->rowstride * 4 * y2; - - tl = convert_pixel (row1, x1) | mask; - tr = convert_pixel (row1, x2) | mask; - bl = convert_pixel (row2, x1) | mask; - br = convert_pixel (row2, x2) | mask; - } - else - { - uint32_t mask1, mask2; - int bpp; - - /* Note: PIXMAN_FORMAT_BPP() returns an unsigned value, - * which means if you use it in expressions, those - * expressions become unsigned themselves. Since - * the variables below can be negative in some cases, - * that will lead to crashes on 64 bit architectures. - * - * So this line makes sure bpp is signed - */ - bpp = PIXMAN_FORMAT_BPP (format); - - if (x1 >= width || x2 < 0 || y1 >= height || y2 < 0) - { - buffer[i] = 0; - goto next; - } - - if (y2 == 0) - { - row1 = zero; - mask1 = 0; - } - else - { - row1 = (uint8_t *)bits->bits + bits->rowstride * 4 * y1; - row1 += bpp / 8 * x1; - - mask1 = PIXMAN_FORMAT_A (format)? 0 : 0xff000000; - } - - if (y1 == height - 1) - { - row2 = zero; - mask2 = 0; - } - else - { - row2 = (uint8_t *)bits->bits + bits->rowstride * 4 * y2; - row2 += bpp / 8 * x1; - - mask2 = PIXMAN_FORMAT_A (format)? 0 : 0xff000000; - } - - if (x2 == 0) - { - tl = 0; - bl = 0; - } - else - { - tl = convert_pixel (row1, 0) | mask1; - bl = convert_pixel (row2, 0) | mask2; - } - - if (x1 == width - 1) - { - tr = 0; - br = 0; - } - else - { - tr = convert_pixel (row1, 1) | mask1; - br = convert_pixel (row2, 1) | mask2; - } - } - - buffer[i] = bilinear_interpolation ( - tl, tr, bl, br, distx, disty); - - next: - x += ux; - y += uy; - } -} - -static force_inline void -bits_image_fetch_nearest_affine (pixman_image_t * image, - int offset, - int line, - int width, - uint32_t * buffer, - const uint32_t * mask, - - convert_pixel_t convert_pixel, - pixman_format_code_t format, - pixman_repeat_t repeat_mode) -{ - pixman_fixed_t x, y; - pixman_fixed_t ux, uy; - pixman_vector_t v; - bits_image_t *bits = &image->bits; - int i; - - /* reference point is the center of the pixel */ - v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2; - v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2; - v.vector[2] = pixman_fixed_1; - - if (!pixman_transform_point_3d (image->common.transform, &v)) - return; - - ux = image->common.transform->matrix[0][0]; - uy = image->common.transform->matrix[1][0]; - - x = v.vector[0]; - y = v.vector[1]; - - for (i = 0; i < width; ++i) - { - int width, height, x0, y0; - const uint8_t *row; - - if (mask && !mask[i]) - goto next; - - width = image->bits.width; - height = image->bits.height; - x0 = pixman_fixed_to_int (x - pixman_fixed_e); - y0 = pixman_fixed_to_int (y - pixman_fixed_e); - - if (repeat_mode == PIXMAN_REPEAT_NONE && - (y0 < 0 || y0 >= height || x0 < 0 || x0 >= width)) - { - buffer[i] = 0; - } - else - { - uint32_t mask = PIXMAN_FORMAT_A (format)? 0 : 0xff000000; - - if (repeat_mode != PIXMAN_REPEAT_NONE) - { - repeat (repeat_mode, &x0, width); - repeat (repeat_mode, &y0, height); - } - - row = (uint8_t *)bits->bits + bits->rowstride * 4 * y0; - - buffer[i] = convert_pixel (row, x0) | mask; - } - - next: - x += ux; - y += uy; - } -} - -static force_inline uint32_t -convert_a8r8g8b8 (const uint8_t *row, int x) -{ - return *(((uint32_t *)row) + x); -} - -static force_inline uint32_t -convert_x8r8g8b8 (const uint8_t *row, int x) -{ - return *(((uint32_t *)row) + x); + return iter->buffer; } -static force_inline uint32_t -convert_a8 (const uint8_t *row, int x) +static uint32_t * +bits_image_fetch_general_32 (pixman_iter_t *iter, + const uint32_t *mask) { - return *(row + x) << 24; + return __bits_image_fetch_general(iter, FALSE, mask); } -static force_inline uint32_t -convert_r5g6b5 (const uint8_t *row, int x) +static uint32_t * +bits_image_fetch_general_float (pixman_iter_t *iter, + const uint32_t *mask) { - return convert_0565_to_0888 (*((uint16_t *)row + x)); + return __bits_image_fetch_general(iter, TRUE, mask); } -#define MAKE_SEPARABLE_CONVOLUTION_FETCHER(name, format, repeat_mode) \ - static uint32_t * \ - bits_image_fetch_separable_convolution_affine_ ## name (pixman_iter_t *iter, \ - const uint32_t * mask) \ - { \ - bits_image_fetch_separable_convolution_affine ( \ - iter->image, \ - iter->x, iter->y++, \ - iter->width, \ - iter->buffer, mask, \ - convert_ ## format, \ - PIXMAN_ ## format, \ - repeat_mode); \ - \ - return iter->buffer; \ - } - -#define MAKE_BILINEAR_FETCHER(name, format, repeat_mode) \ - static uint32_t * \ - bits_image_fetch_bilinear_affine_ ## name (pixman_iter_t *iter, \ - const uint32_t * mask) \ - { \ - bits_image_fetch_bilinear_affine (iter->image, \ - iter->x, iter->y++, \ - iter->width, \ - iter->buffer, mask, \ - convert_ ## format, \ - PIXMAN_ ## format, \ - repeat_mode); \ - return iter->buffer; \ - } - -#define MAKE_NEAREST_FETCHER(name, format, repeat_mode) \ - static uint32_t * \ - bits_image_fetch_nearest_affine_ ## name (pixman_iter_t *iter, \ - const uint32_t * mask) \ - { \ - bits_image_fetch_nearest_affine (iter->image, \ - iter->x, iter->y++, \ - iter->width, \ - iter->buffer, mask, \ - convert_ ## format, \ - PIXMAN_ ## format, \ - repeat_mode); \ - return iter->buffer; \ - } - -#define MAKE_FETCHERS(name, format, repeat_mode) \ - MAKE_NEAREST_FETCHER (name, format, repeat_mode) \ - MAKE_BILINEAR_FETCHER (name, format, repeat_mode) \ - MAKE_SEPARABLE_CONVOLUTION_FETCHER (name, format, repeat_mode) - -MAKE_FETCHERS (pad_a8r8g8b8, a8r8g8b8, PIXMAN_REPEAT_PAD) -MAKE_FETCHERS (none_a8r8g8b8, a8r8g8b8, PIXMAN_REPEAT_NONE) -MAKE_FETCHERS (reflect_a8r8g8b8, a8r8g8b8, PIXMAN_REPEAT_REFLECT) -MAKE_FETCHERS (normal_a8r8g8b8, a8r8g8b8, PIXMAN_REPEAT_NORMAL) -MAKE_FETCHERS (pad_x8r8g8b8, x8r8g8b8, PIXMAN_REPEAT_PAD) -MAKE_FETCHERS (none_x8r8g8b8, x8r8g8b8, PIXMAN_REPEAT_NONE) -MAKE_FETCHERS (reflect_x8r8g8b8, x8r8g8b8, PIXMAN_REPEAT_REFLECT) -MAKE_FETCHERS (normal_x8r8g8b8, x8r8g8b8, PIXMAN_REPEAT_NORMAL) -MAKE_FETCHERS (pad_a8, a8, PIXMAN_REPEAT_PAD) -MAKE_FETCHERS (none_a8, a8, PIXMAN_REPEAT_NONE) -MAKE_FETCHERS (reflect_a8, a8, PIXMAN_REPEAT_REFLECT) -MAKE_FETCHERS (normal_a8, a8, PIXMAN_REPEAT_NORMAL) -MAKE_FETCHERS (pad_r5g6b5, r5g6b5, PIXMAN_REPEAT_PAD) -MAKE_FETCHERS (none_r5g6b5, r5g6b5, PIXMAN_REPEAT_NONE) -MAKE_FETCHERS (reflect_r5g6b5, r5g6b5, PIXMAN_REPEAT_REFLECT) -MAKE_FETCHERS (normal_r5g6b5, r5g6b5, PIXMAN_REPEAT_NORMAL) - static void replicate_pixel_32 (bits_image_t * bits, int x, @@ -1253,9 +777,9 @@ bits_image_fetch_untransformed_repeat_none (bits_image_t *image, w = MIN (width, image->width - x); if (wide) - image->fetch_scanline_float ((pixman_image_t *)image, x, y, w, buffer, NULL); + image->fetch_scanline_float (image, x, y, w, buffer, NULL); else - image->fetch_scanline_32 ((pixman_image_t *)image, x, y, w, buffer, NULL); + image->fetch_scanline_32 (image, x, y, w, buffer, NULL); width -= w; buffer += w * (wide? 4 : 1); @@ -1301,9 +825,9 @@ bits_image_fetch_untransformed_repeat_normal (bits_image_t *image, w = MIN (width, image->width - x); if (wide) - image->fetch_scanline_float ((pixman_image_t *)image, x, y, w, buffer, NULL); + image->fetch_scanline_float (image, x, y, w, buffer, NULL); else - image->fetch_scanline_32 ((pixman_image_t *)image, x, y, w, buffer, NULL); + image->fetch_scanline_32 (image, x, y, w, buffer, NULL); buffer += w * (wide? 4 : 1); x += w; @@ -1381,104 +905,18 @@ static const fetcher_info_t fetcher_info[] = bits_image_fetch_untransformed_float }, -#define FAST_BILINEAR_FLAGS \ - (FAST_PATH_NO_ALPHA_MAP | \ - FAST_PATH_NO_ACCESSORS | \ - FAST_PATH_HAS_TRANSFORM | \ - FAST_PATH_AFFINE_TRANSFORM | \ - FAST_PATH_X_UNIT_POSITIVE | \ - FAST_PATH_Y_UNIT_ZERO | \ - FAST_PATH_NONE_REPEAT | \ - FAST_PATH_BILINEAR_FILTER) - - { PIXMAN_a8r8g8b8, - FAST_BILINEAR_FLAGS, - bits_image_fetch_bilinear_no_repeat_8888, - _pixman_image_get_scanline_generic_float - }, - - { PIXMAN_x8r8g8b8, - FAST_BILINEAR_FLAGS, - bits_image_fetch_bilinear_no_repeat_8888, - _pixman_image_get_scanline_generic_float - }, - -#define GENERAL_BILINEAR_FLAGS \ - (FAST_PATH_NO_ALPHA_MAP | \ - FAST_PATH_NO_ACCESSORS | \ - FAST_PATH_HAS_TRANSFORM | \ - FAST_PATH_AFFINE_TRANSFORM | \ - FAST_PATH_BILINEAR_FILTER) - -#define GENERAL_NEAREST_FLAGS \ - (FAST_PATH_NO_ALPHA_MAP | \ - FAST_PATH_NO_ACCESSORS | \ - FAST_PATH_HAS_TRANSFORM | \ - FAST_PATH_AFFINE_TRANSFORM | \ - FAST_PATH_NEAREST_FILTER) - -#define GENERAL_SEPARABLE_CONVOLUTION_FLAGS \ - (FAST_PATH_NO_ALPHA_MAP | \ - FAST_PATH_NO_ACCESSORS | \ - FAST_PATH_HAS_TRANSFORM | \ - FAST_PATH_AFFINE_TRANSFORM | \ - FAST_PATH_SEPARABLE_CONVOLUTION_FILTER) - -#define SEPARABLE_CONVOLUTION_AFFINE_FAST_PATH(name, format, repeat) \ - { PIXMAN_ ## format, \ - GENERAL_SEPARABLE_CONVOLUTION_FLAGS | FAST_PATH_ ## repeat ## _REPEAT, \ - bits_image_fetch_separable_convolution_affine_ ## name, \ - _pixman_image_get_scanline_generic_float \ - }, - -#define BILINEAR_AFFINE_FAST_PATH(name, format, repeat) \ - { PIXMAN_ ## format, \ - GENERAL_BILINEAR_FLAGS | FAST_PATH_ ## repeat ## _REPEAT, \ - bits_image_fetch_bilinear_affine_ ## name, \ - _pixman_image_get_scanline_generic_float \ - }, - -#define NEAREST_AFFINE_FAST_PATH(name, format, repeat) \ - { PIXMAN_ ## format, \ - GENERAL_NEAREST_FLAGS | FAST_PATH_ ## repeat ## _REPEAT, \ - bits_image_fetch_nearest_affine_ ## name, \ - _pixman_image_get_scanline_generic_float \ - }, - -#define AFFINE_FAST_PATHS(name, format, repeat) \ - SEPARABLE_CONVOLUTION_AFFINE_FAST_PATH(name, format, repeat) \ - BILINEAR_AFFINE_FAST_PATH(name, format, repeat) \ - NEAREST_AFFINE_FAST_PATH(name, format, repeat) - - AFFINE_FAST_PATHS (pad_a8r8g8b8, a8r8g8b8, PAD) - AFFINE_FAST_PATHS (none_a8r8g8b8, a8r8g8b8, NONE) - AFFINE_FAST_PATHS (reflect_a8r8g8b8, a8r8g8b8, REFLECT) - AFFINE_FAST_PATHS (normal_a8r8g8b8, a8r8g8b8, NORMAL) - AFFINE_FAST_PATHS (pad_x8r8g8b8, x8r8g8b8, PAD) - AFFINE_FAST_PATHS (none_x8r8g8b8, x8r8g8b8, NONE) - AFFINE_FAST_PATHS (reflect_x8r8g8b8, x8r8g8b8, REFLECT) - AFFINE_FAST_PATHS (normal_x8r8g8b8, x8r8g8b8, NORMAL) - AFFINE_FAST_PATHS (pad_a8, a8, PAD) - AFFINE_FAST_PATHS (none_a8, a8, NONE) - AFFINE_FAST_PATHS (reflect_a8, a8, REFLECT) - AFFINE_FAST_PATHS (normal_a8, a8, NORMAL) - AFFINE_FAST_PATHS (pad_r5g6b5, r5g6b5, PAD) - AFFINE_FAST_PATHS (none_r5g6b5, r5g6b5, NONE) - AFFINE_FAST_PATHS (reflect_r5g6b5, r5g6b5, REFLECT) - AFFINE_FAST_PATHS (normal_r5g6b5, r5g6b5, NORMAL) - /* Affine, no alpha */ { PIXMAN_any, (FAST_PATH_NO_ALPHA_MAP | FAST_PATH_HAS_TRANSFORM | FAST_PATH_AFFINE_TRANSFORM), - bits_image_fetch_affine_no_alpha, - _pixman_image_get_scanline_generic_float + bits_image_fetch_affine_no_alpha_32, + bits_image_fetch_affine_no_alpha_float, }, /* General */ { PIXMAN_any, 0, - bits_image_fetch_general, - _pixman_image_get_scanline_generic_float + bits_image_fetch_general_32, + bits_image_fetch_general_float, }, { PIXMAN_null }, @@ -1508,7 +946,6 @@ _pixman_bits_image_src_iter_init (pixman_image_t *image, pixman_iter_t *iter) } else { - iter->data = info->get_scanline_32; iter->get_scanline = info->get_scanline_float; } return; @@ -1520,20 +957,6 @@ _pixman_bits_image_src_iter_init (pixman_image_t *image, pixman_iter_t *iter) } static uint32_t * -dest_get_scanline_16 (pixman_iter_t *iter, const uint32_t *mask) -{ - pixman_image_t *image = iter->image; - int x = iter->x; - int y = iter->y; - int width = iter->width; - uint32_t * buffer = iter->buffer; - - image->bits.fetch_scanline_16 (image, x, y, width, buffer, mask); - - return iter->buffer; -} - -static uint32_t * dest_get_scanline_narrow (pixman_iter_t *iter, const uint32_t *mask) { pixman_image_t *image = iter->image; @@ -1542,7 +965,7 @@ dest_get_scanline_narrow (pixman_iter_t *iter, const uint32_t *mask) int width = iter->width; uint32_t * buffer = iter->buffer; - image->bits.fetch_scanline_32 (image, x, y, width, buffer, mask); + image->bits.fetch_scanline_32 (&image->bits, x, y, width, buffer, mask); if (image->common.alpha_map) { uint32_t *alpha; @@ -1555,8 +978,7 @@ dest_get_scanline_narrow (pixman_iter_t *iter, const uint32_t *mask) y -= image->common.alpha_origin_y; image->common.alpha_map->fetch_scanline_32 ( - (pixman_image_t *)image->common.alpha_map, - x, y, width, alpha, mask); + image->common.alpha_map, x, y, width, alpha, mask); for (i = 0; i < width; ++i) { @@ -1581,7 +1003,7 @@ dest_get_scanline_wide (pixman_iter_t *iter, const uint32_t *mask) argb_t * buffer = (argb_t *)iter->buffer; image->fetch_scanline_float ( - (pixman_image_t *)image, x, y, width, (uint32_t *)buffer, mask); + image, x, y, width, (uint32_t *)buffer, mask); if (image->common.alpha_map) { argb_t *alpha; @@ -1594,8 +1016,7 @@ dest_get_scanline_wide (pixman_iter_t *iter, const uint32_t *mask) y -= image->common.alpha_origin_y; image->common.alpha_map->fetch_scanline_float ( - (pixman_image_t *)image->common.alpha_map, - x, y, width, (uint32_t *)alpha, mask); + image->common.alpha_map, x, y, width, (uint32_t *)alpha, mask); for (i = 0; i < width; ++i) buffer[i].a = alpha[i].a; @@ -1608,20 +1029,6 @@ dest_get_scanline_wide (pixman_iter_t *iter, const uint32_t *mask) } static void -dest_write_back_16 (pixman_iter_t *iter) -{ - bits_image_t * image = &iter->image->bits; - int x = iter->x; - int y = iter->y; - int width = iter->width; - const uint32_t *buffer = iter->buffer; - - image->store_scanline_16 (image, x, y, width, buffer); - - iter->y++; -} - -static void dest_write_back_narrow (pixman_iter_t *iter) { bits_image_t * image = &iter->image->bits; @@ -1644,6 +1051,119 @@ dest_write_back_narrow (pixman_iter_t *iter) iter->y++; } +static const float +dither_factor_blue_noise_64 (int x, int y) +{ + float m = dither_blue_noise_64x64[((y & 0x3f) << 6) | (x & 0x3f)]; + return m * (1. / 4096.f) + (1. / 8192.f); +} + +static const float +dither_factor_bayer_8 (int x, int y) +{ + uint32_t m; + + y ^= x; + + /* Compute reverse(interleave(xor(x mod n, y mod n), x mod n)) + * Here n = 8 and `mod n` is the bottom 3 bits. + */ + m = ((y & 0x1) << 5) | ((x & 0x1) << 4) | + ((y & 0x2) << 2) | ((x & 0x2) << 1) | + ((y & 0x4) >> 1) | ((x & 0x4) >> 2); + + /* m is in range [0, 63]. We scale it to [0, 63.0f/64.0f], then + * shift it to to [1.0f/128.0f, 127.0f/128.0f] so that 0 < d < 1. + * This ensures exact values are not changed by dithering. + */ + return (float)(m) * (1 / 64.0f) + (1.0f / 128.0f); +} + +typedef float (* dither_factor_t)(int x, int y); + +static force_inline float +dither_apply_channel (float f, float d, float s) +{ + /* float_to_unorm splits the [0, 1] segment in (1 << n_bits) + * subsections of equal length; however unorm_to_float does not + * map to the center of those sections. In fact, pixel value u is + * mapped to: + * + * u u u 1 + * -------------- = ---------- + -------------- * ---------- + * 2^n_bits - 1 2^n_bits 2^n_bits - 1 2^n_bits + * + * Hence if f = u / (2^n_bits - 1) is exactly representable on a + * n_bits palette, all the numbers between + * + * u + * ---------- = f - f * 2^n_bits = f + (0 - f) * 2^n_bits + * 2^n_bits + * + * and + * + * u + 1 + * ---------- = f - (f - 1) * 2^n_bits = f + (1 - f) * 2^n_bits + * 2^n_bits + * + * are also mapped back to u. + * + * Hence the following calculation ensures that we add as much + * noise as possible without perturbing values which are exactly + * representable in the target colorspace. Note that this corresponds to + * mixing the original color with noise with a ratio of `1 / 2^n_bits`. + */ + return f + (d - f) * s; +} + +static force_inline float +dither_compute_scale (int n_bits) +{ + // No dithering for wide formats + if (n_bits == 0 || n_bits >= 32) + return 0.f; + + return 1.f / (float)(1 << n_bits); +} + +static const uint32_t * +dither_apply_ordered (pixman_iter_t *iter, dither_factor_t factor) +{ + bits_image_t *image = &iter->image->bits; + int x = iter->x + image->dither_offset_x; + int y = iter->y + image->dither_offset_y; + int width = iter->width; + argb_t *buffer = (argb_t *)iter->buffer; + + pixman_format_code_t format = image->format; + int a_size = PIXMAN_FORMAT_A (format); + int r_size = PIXMAN_FORMAT_R (format); + int g_size = PIXMAN_FORMAT_G (format); + int b_size = PIXMAN_FORMAT_B (format); + + float a_scale = dither_compute_scale (a_size); + float r_scale = dither_compute_scale (r_size); + float g_scale = dither_compute_scale (g_size); + float b_scale = dither_compute_scale (b_size); + + int i; + float d; + + for (i = 0; i < width; ++i) + { + d = factor (x + i, y); + + buffer->a = dither_apply_channel (buffer->a, d, a_scale); + buffer->r = dither_apply_channel (buffer->r, d, r_scale); + buffer->g = dither_apply_channel (buffer->g, d, g_scale); + buffer->b = dither_apply_channel (buffer->b, d, b_scale); + + buffer++; + } + + return iter->buffer; +} + static void dest_write_back_wide (pixman_iter_t *iter) { @@ -1653,6 +1173,23 @@ dest_write_back_wide (pixman_iter_t *iter) int width = iter->width; const uint32_t *buffer = iter->buffer; + switch (image->dither) + { + case PIXMAN_DITHER_NONE: + break; + + case PIXMAN_DITHER_GOOD: + case PIXMAN_DITHER_BEST: + case PIXMAN_DITHER_ORDERED_BLUE_NOISE_64: + buffer = dither_apply_ordered (iter, dither_factor_blue_noise_64); + break; + + case PIXMAN_DITHER_FAST: + case PIXMAN_DITHER_ORDERED_BAYER_8: + buffer = dither_apply_ordered (iter, dither_factor_bayer_8); + break; + } + image->store_scanline_float (image, x, y, width, buffer); if (image->common.alpha_map) @@ -1670,20 +1207,7 @@ dest_write_back_wide (pixman_iter_t *iter) void _pixman_bits_image_dest_iter_init (pixman_image_t *image, pixman_iter_t *iter) { - if (iter->iter_flags & ITER_16) - { - if ((iter->iter_flags & (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA)) == - (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA)) - { - iter->get_scanline = _pixman_iter_get_scanline_noop; - } - else - { - iter->get_scanline = dest_get_scanline_16; - } - iter->write_back = dest_write_back_16; - } - else if (iter->iter_flags & ITER_NARROW) + if (iter->iter_flags & ITER_NARROW) { if ((iter->iter_flags & (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA)) == (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA)) @@ -1694,7 +1218,7 @@ _pixman_bits_image_dest_iter_init (pixman_image_t *image, pixman_iter_t *iter) { iter->get_scanline = dest_get_scanline_narrow; } - + iter->write_back = dest_write_back_narrow; } else @@ -1758,6 +1282,9 @@ _pixman_bits_image_init (pixman_image_t * image, { uint32_t *free_me = NULL; + if (PIXMAN_FORMAT_BPP (format) == 128) + return_val_if_fail(!(rowstride % 4), FALSE); + if (!bits && width && height) { int rowstride_bytes; @@ -1778,6 +1305,9 @@ _pixman_bits_image_init (pixman_image_t * image, image->bits.height = height; image->bits.bits = bits; image->bits.free_me = free_me; + image->bits.dither = PIXMAN_DITHER_NONE; + image->bits.dither_offset_x = 0; + image->bits.dither_offset_y = 0; image->bits.read_func = NULL; image->bits.write_func = NULL; image->bits.rowstride = rowstride; diff --git a/gfx/cairo/libpixman/src/pixman-combine-float.c b/gfx/cairo/libpixman/src/pixman-combine-float.c index 06ce2037ee..f5145bc9d7 100644 --- a/gfx/cairo/libpixman/src/pixman-combine-float.c +++ b/gfx/cairo/libpixman/src/pixman-combine-float.c @@ -42,8 +42,6 @@ #define force_inline __inline__ #endif -#define IS_ZERO(f) (-FLT_MIN < (f) && (f) < FLT_MIN) - typedef float (* combine_channel_t) (float sa, float s, float da, float d); static force_inline void @@ -203,56 +201,56 @@ get_factor (combine_factor_t factor, float sa, float da) break; case SA_OVER_DA: - if (IS_ZERO (da)) + if (FLOAT_IS_ZERO (da)) f = 1.0f; else f = CLAMP (sa / da); break; case DA_OVER_SA: - if (IS_ZERO (sa)) + if (FLOAT_IS_ZERO (sa)) f = 1.0f; else f = CLAMP (da / sa); break; case INV_SA_OVER_DA: - if (IS_ZERO (da)) + if (FLOAT_IS_ZERO (da)) f = 1.0f; else f = CLAMP ((1.0f - sa) / da); break; case INV_DA_OVER_SA: - if (IS_ZERO (sa)) + if (FLOAT_IS_ZERO (sa)) f = 1.0f; else f = CLAMP ((1.0f - da) / sa); break; case ONE_MINUS_SA_OVER_DA: - if (IS_ZERO (da)) + if (FLOAT_IS_ZERO (da)) f = 0.0f; else f = CLAMP (1.0f - sa / da); break; case ONE_MINUS_DA_OVER_SA: - if (IS_ZERO (sa)) + if (FLOAT_IS_ZERO (sa)) f = 0.0f; else f = CLAMP (1.0f - da / sa); break; case ONE_MINUS_INV_DA_OVER_SA: - if (IS_ZERO (sa)) + if (FLOAT_IS_ZERO (sa)) f = 0.0f; else f = CLAMP (1.0f - (1.0f - da) / sa); break; case ONE_MINUS_INV_SA_OVER_DA: - if (IS_ZERO (da)) + if (FLOAT_IS_ZERO (da)) f = 0.0f; else f = CLAMP (1.0f - (1.0f - sa) / da); @@ -321,23 +319,44 @@ MAKE_PD_COMBINERS (conjoint_xor, ONE_MINUS_DA_OVER_SA, ONE_MINUS_SA_OVER_DA) * * The following blend modes have been taken from the PDF ISO 32000 * specification, which at this point in time is available from - * http://www.adobe.com/devnet/acrobat/pdfs/PDF32000_2008.pdf - * The relevant chapters are 11.3.5 and 11.3.6. + * + * http://www.adobe.com/devnet/pdf/pdf_reference.html + * + * The specific documents of interest are the PDF spec itself: + * + * http://wwwimages.adobe.com/www.adobe.com/content/dam/Adobe/en/devnet/pdf/pdfs/PDF32000_2008.pdf + * + * chapters 11.3.5 and 11.3.6 and a later supplement for Adobe Acrobat + * 9.1 and Reader 9.1: + * + * http://wwwimages.adobe.com/www.adobe.com/content/dam/Adobe/en/devnet/pdf/pdfs/adobe_supplement_iso32000_1.pdf + * + * that clarifies the specifications for blend modes ColorDodge and + * ColorBurn. + * * The formula for computing the final pixel color given in 11.3.6 is: - * αr × Cr = (1 – αs) × αb × Cb + (1 – αb) × αs × Cs + αb × αs × B(Cb, Cs) - * with B() being the blend function. - * Note that OVER is a special case of this operation, using B(Cb, Cs) = Cs - * - * These blend modes should match the SVG filter draft specification, as - * it has been designed to mirror ISO 32000. Note that at the current point - * no released draft exists that shows this, as the formulas have not been - * updated yet after the release of ISO 32000. - * - * The default implementation here uses the PDF_SEPARABLE_BLEND_MODE and - * PDF_NON_SEPARABLE_BLEND_MODE macros, which take the blend function as an - * argument. Note that this implementation operates on premultiplied colors, - * while the PDF specification does not. Therefore the code uses the formula - * ar.Cra = (1 – as) . Dca + (1 – ad) . Sca + B(Dca, ad, Sca, as) + * + * αr × Cr = (1 – αs) × αb × Cb + (1 – αb) × αs × Cs + αb × αs × B(Cb, Cs) + * + * with B() is the blend function. When B(Cb, Cs) = Cs, this formula + * reduces to the regular OVER operator. + * + * Cs and Cb are not premultiplied, so in our implementation we instead + * use: + * + * cr = (1 – αs) × cb + (1 – αb) × cs + αb × αs × B (cb/αb, cs/αs) + * + * where cr, cs, and cb are premultiplied colors, and where the + * + * αb × αs × B(cb/αb, cs/αs) + * + * part is first arithmetically simplified under the assumption that αb + * and αs are not 0, and then updated to produce a meaningful result when + * they are. + * + * For all the blend mode operators, the alpha channel is given by + * + * αr = αs + αb + αb × αs */ #define MAKE_SEPARABLE_PDF_COMBINERS(name) \ @@ -357,18 +376,55 @@ MAKE_PD_COMBINERS (conjoint_xor, ONE_MINUS_DA_OVER_SA, ONE_MINUS_SA_OVER_DA) \ MAKE_COMBINERS (name, combine_ ## name ## _a, combine_ ## name ## _c) +/* + * Multiply + * + * ad * as * B(d / ad, s / as) + * = ad * as * d/ad * s/as + * = d * s + * + */ static force_inline float blend_multiply (float sa, float s, float da, float d) { return d * s; } +/* + * Screen + * + * ad * as * B(d/ad, s/as) + * = ad * as * (d/ad + s/as - s/as * d/ad) + * = ad * s + as * d - s * d + */ static force_inline float blend_screen (float sa, float s, float da, float d) { return d * sa + s * da - s * d; } +/* + * Overlay + * + * ad * as * B(d/ad, s/as) + * = ad * as * Hardlight (s, d) + * = if (d / ad < 0.5) + * as * ad * Multiply (s/as, 2 * d/ad) + * else + * as * ad * Screen (s/as, 2 * d / ad - 1) + * = if (d < 0.5 * ad) + * as * ad * s/as * 2 * d /ad + * else + * as * ad * (s/as + 2 * d / ad - 1 - s / as * (2 * d / ad - 1)) + * = if (2 * d < ad) + * 2 * s * d + * else + * ad * s + 2 * as * d - as * ad - ad * s * (2 * d / ad - 1) + * = if (2 * d < ad) + * 2 * s * d + * else + * as * ad - 2 * (ad - d) * (as - s) + */ static force_inline float blend_overlay (float sa, float s, float da, float d) { @@ -378,6 +434,13 @@ blend_overlay (float sa, float s, float da, float d) return sa * da - 2 * (da - d) * (sa - s); } +/* + * Darken + * + * ad * as * B(d/ad, s/as) + * = ad * as * MIN(d/ad, s/as) + * = MIN (as * d, ad * s) + */ static force_inline float blend_darken (float sa, float s, float da, float d) { @@ -390,6 +453,13 @@ blend_darken (float sa, float s, float da, float d) return s; } +/* + * Lighten + * + * ad * as * B(d/ad, s/as) + * = ad * as * MAX(d/ad, s/as) + * = MAX (as * d, ad * s) + */ static force_inline float blend_lighten (float sa, float s, float da, float d) { @@ -402,19 +472,57 @@ blend_lighten (float sa, float s, float da, float d) return d; } +/* + * Color dodge + * + * ad * as * B(d/ad, s/as) + * = if d/ad = 0 + * ad * as * 0 + * else if (d/ad >= (1 - s/as) + * ad * as * 1 + * else + * ad * as * ((d/ad) / (1 - s/as)) + * = if d = 0 + * 0 + * elif as * d >= ad * (as - s) + * ad * as + * else + * as * (as * d / (as - s)) + * + */ static force_inline float blend_color_dodge (float sa, float s, float da, float d) { - if (IS_ZERO (d)) + if (FLOAT_IS_ZERO (d)) return 0.0f; else if (d * sa >= sa * da - s * da) return sa * da; - else if (IS_ZERO (sa - s)) + else if (FLOAT_IS_ZERO (sa - s)) return sa * da; else return sa * sa * d / (sa - s); } +/* + * Color burn + * + * We modify the first clause "if d = 1" to "if d >= 1" since with + * premultiplied colors d > 1 can actually happen. + * + * ad * as * B(d/ad, s/as) + * = if d/ad >= 1 + * ad * as * 1 + * elif (1 - d/ad) >= s/as + * ad * as * 0 + * else + * ad * as * (1 - ((1 - d/ad) / (s/as))) + * = if d >= ad + * ad * as + * elif as * ad - as * d >= ad * s + * 0 + * else + * ad * as - as * as * (ad - d) / s + */ static force_inline float blend_color_burn (float sa, float s, float da, float d) { @@ -422,12 +530,29 @@ blend_color_burn (float sa, float s, float da, float d) return sa * da; else if (sa * (da - d) >= s * da) return 0.0f; - else if (IS_ZERO (s)) + else if (FLOAT_IS_ZERO (s)) return 0.0f; else return sa * (da - sa * (da - d) / s); } +/* + * Hard light + * + * ad * as * B(d/ad, s/as) + * = if (s/as <= 0.5) + * ad * as * Multiply (d/ad, 2 * s/as) + * else + * ad * as * Screen (d/ad, 2 * s/as - 1) + * = if 2 * s <= as + * ad * as * d/ad * 2 * s / as + * else + * ad * as * (d/ad + (2 * s/as - 1) + d/ad * (2 * s/as - 1)) + * = if 2 * s <= as + * 2 * s * d + * else + * as * ad - 2 * (ad - d) * (as - s) + */ static force_inline float blend_hard_light (float sa, float s, float da, float d) { @@ -437,21 +562,38 @@ blend_hard_light (float sa, float s, float da, float d) return sa * da - 2 * (da - d) * (sa - s); } +/* + * Soft light + * + * ad * as * B(d/ad, s/as) + * = if (s/as <= 0.5) + * ad * as * (d/ad - (1 - 2 * s/as) * d/ad * (1 - d/ad)) + * else if (d/ad <= 0.25) + * ad * as * (d/ad + (2 * s/as - 1) * ((((16 * d/ad - 12) * d/ad + 4) * d/ad) - d/ad)) + * else + * ad * as * (d/ad + (2 * s/as - 1) * sqrt (d/ad)) + * = if (2 * s <= as) + * d * as - d * (ad - d) * (as - 2 * s) / ad; + * else if (4 * d <= ad) + * (2 * s - as) * d * ((16 * d / ad - 12) * d / ad + 3); + * else + * d * as + (sqrt (d * ad) - d) * (2 * s - as); + */ static force_inline float blend_soft_light (float sa, float s, float da, float d) { - if (2 * s < sa) + if (2 * s <= sa) { - if (IS_ZERO (da)) + if (FLOAT_IS_ZERO (da)) return d * sa; else return d * sa - d * (da - d) * (sa - 2 * s) / da; } else { - if (IS_ZERO (da)) + if (FLOAT_IS_ZERO (da)) { - return 0.0f; + return d * sa; } else { @@ -463,6 +605,20 @@ blend_soft_light (float sa, float s, float da, float d) } } +/* + * Difference + * + * ad * as * B(s/as, d/ad) + * = ad * as * abs (s/as - d/ad) + * = if (s/as <= d/ad) + * ad * as * (d/ad - s/as) + * else + * ad * as * (s/as - d/ad) + * = if (ad * s <= as * d) + * as * d - ad * s + * else + * ad * s - as * d + */ static force_inline float blend_difference (float sa, float s, float da, float d) { @@ -475,6 +631,13 @@ blend_difference (float sa, float s, float da, float d) return sda - dsa; } +/* + * Exclusion + * + * ad * as * B(s/as, d/ad) + * = ad * as * (d/ad + s/as - 2 * d/ad * s/as) + * = as * d + ad * s - 2 * s * d + */ static force_inline float blend_exclusion (float sa, float s, float da, float d) { @@ -494,116 +657,79 @@ MAKE_SEPARABLE_PDF_COMBINERS (difference) MAKE_SEPARABLE_PDF_COMBINERS (exclusion) /* - * PDF nonseperable blend modes. - * - * These are implemented using the following functions to operate in Hsl - * space, with Cmax, Cmid, Cmin referring to the max, mid and min value - * of the red, green and blue components. + * PDF nonseperable blend modes are implemented using the following functions + * to operate in Hsl space, with Cmax, Cmid, Cmin referring to the max, mid + * and min value of the red, green and blue components. * * LUM (C) = 0.3 × Cred + 0.59 × Cgreen + 0.11 × Cblue * * clip_color (C): - * l = LUM (C) - * min = Cmin - * max = Cmax - * if n < 0.0 - * C = l + (((C – l) × l) ⁄ (l – min)) - * if x > 1.0 - * C = l + (((C – l) × (1 – l)) (max – l)) - * return C + * l = LUM (C) + * min = Cmin + * max = Cmax + * if n < 0.0 + * C = l + (((C – l) × l) ⁄ (l – min)) + * if x > 1.0 + * C = l + (((C – l) × (1 – l) ) ⁄ (max – l)) + * return C * * set_lum (C, l): - * d = l – LUM (C) - * C += d - * return clip_color (C) + * d = l – LUM (C) + * C += d + * return clip_color (C) * * SAT (C) = CH_MAX (C) - CH_MIN (C) * * set_sat (C, s): - * if Cmax > Cmin - * Cmid = ( ( ( Cmid – Cmin ) × s ) ⁄ ( Cmax – Cmin ) ) - * Cmax = s - * else - * Cmid = Cmax = 0.0 - * Cmin = 0.0 - * return C + * if Cmax > Cmin + * Cmid = ( ( ( Cmid – Cmin ) × s ) ⁄ ( Cmax – Cmin ) ) + * Cmax = s + * else + * Cmid = Cmax = 0.0 + * Cmin = 0.0 + * return C */ /* For premultiplied colors, we need to know what happens when C is * multiplied by a real number. LUM and SAT are linear: * - * LUM (r × C) = r × LUM (C) SAT (r × C) = r × SAT (C) + * LUM (r × C) = r × LUM (C) SAT (r * C) = r * SAT (C) * * If we extend clip_color with an extra argument a and change * - * if x >= 1.0 + * if x >= 1.0 * * into * - * if x >= a + * if x >= a * * then clip_color is also linear: * - * r * clip_color (C, a) = clip_color (r_c, ra); + * r * clip_color (C, a) = clip_color (r * C, r * a); * * for positive r. * * Similarly, we can extend set_lum with an extra argument that is just passed * on to clip_color: * - * r × set_lum ( C, l, a) + * r * set_lum (C, l, a) * - * = r × clip_color ( C + l - LUM (C), a) + * = r × clip_color (C + l - LUM (C), a) * - * = clip_color ( r * C + r × l - LUM (r × C), r * a) + * = clip_color (r * C + r × l - r * LUM (C), r * a) * - * = set_lum ( r * C, r * l, r * a) + * = set_lum (r * C, r * l, r * a) * * Finally, set_sat: * - * r * set_sat (C, s) = set_sat (x * C, r * s) + * r * set_sat (C, s) = set_sat (x * C, r * s) * - * The above holds for all non-zero x because they x'es in the fraction for + * The above holds for all non-zero x, because the x'es in the fraction for * C_mid cancel out. Specifically, it holds for x = r: * - * r * set_sat (C, s) = set_sat (r_c, rs) - * - * - * - * - * So, for the non-separable PDF blend modes, we have (using s, d for - * non-premultiplied colors, and S, D for premultiplied: - * - * Color: - * - * a_s * a_d * B(s, d) - * = a_s * a_d * set_lum (S/a_s, LUM (D/a_d), 1) - * = set_lum (S * a_d, a_s * LUM (D), a_s * a_d) - * - * - * Luminosity: - * - * a_s * a_d * B(s, d) - * = a_s * a_d * set_lum (D/a_d, LUM(S/a_s), 1) - * = set_lum (a_s * D, a_d * LUM(S), a_s * a_d) - * - * - * Saturation: - * - * a_s * a_d * B(s, d) - * = a_s * a_d * set_lum (set_sat (D/a_d, SAT (S/a_s)), LUM (D/a_d), 1) - * = set_lum (a_s * a_d * set_sat (D/a_d, SAT (S/a_s)), - * a_s * LUM (D), a_s * a_d) - * = set_lum (set_sat (a_s * D, a_d * SAT (S), a_s * LUM (D), a_s * a_d)) - * - * Hue: - * - * a_s * a_d * B(s, d) - * = a_s * a_d * set_lum (set_sat (S/a_s, SAT (D/a_d)), LUM (D/a_d), 1) - * = set_lum (set_sat (a_d * S, a_s * SAT (D)), a_s * LUM (D), a_s * a_d) + * r * set_sat (C, s) = set_sat (r * C, r * s) * */ - typedef struct { float r; @@ -658,7 +784,7 @@ clip_color (rgb_t *color, float a) if (n < 0.0f) { t = l - n; - if (IS_ZERO (t)) + if (FLOAT_IS_ZERO (t)) { color->r = 0.0f; color->g = 0.0f; @@ -674,7 +800,7 @@ clip_color (rgb_t *color, float a) if (x > a) { t = x - l; - if (IS_ZERO (t)) + if (FLOAT_IS_ZERO (t)) { color->r = a; color->g = a; @@ -758,7 +884,7 @@ set_sat (rgb_t *src, float sat) t = *max - *min; - if (IS_ZERO (t)) + if (FLOAT_IS_ZERO (t)) { *mid = *max = 0.0f; } @@ -771,9 +897,12 @@ set_sat (rgb_t *src, float sat) *min = 0.0f; } -/* - * Hue: - * B(Cb, Cs) = set_lum (set_sat (Cs, SAT (Cb)), LUM (Cb)) +/* Hue: + * + * as * ad * B(s/as, d/as) + * = as * ad * set_lum (set_sat (s/as, SAT (d/ad)), LUM (d/ad), 1) + * = set_lum (set_sat (ad * s, as * SAT (d)), as * LUM (d), as * ad) + * */ static force_inline void blend_hsl_hue (rgb_t *res, @@ -788,9 +917,14 @@ blend_hsl_hue (rgb_t *res, set_lum (res, sa * da, get_lum (dest) * sa); } -/* - * Saturation: - * B(Cb, Cs) = set_lum (set_sat (Cb, SAT (Cs)), LUM (Cb)) +/* + * Saturation + * + * as * ad * B(s/as, d/ad) + * = as * ad * set_lum (set_sat (d/ad, SAT (s/as)), LUM (d/ad), 1) + * = set_lum (as * ad * set_sat (d/ad, SAT (s/as)), + * as * LUM (d), as * ad) + * = set_lum (set_sat (as * d, ad * SAT (s), as * LUM (d), as * ad)) */ static force_inline void blend_hsl_saturation (rgb_t *res, @@ -805,9 +939,12 @@ blend_hsl_saturation (rgb_t *res, set_lum (res, sa * da, get_lum (dest) * sa); } -/* - * Color: - * B(Cb, Cs) = set_lum (Cs, LUM (Cb)) +/* + * Color + * + * as * ad * B(s/as, d/as) + * = as * ad * set_lum (s/as, LUM (d/ad), 1) + * = set_lum (s * ad, as * LUM (d), as * ad) */ static force_inline void blend_hsl_color (rgb_t *res, @@ -822,8 +959,11 @@ blend_hsl_color (rgb_t *res, } /* - * Luminosity: - * B(Cb, Cs) = set_lum (Cb, LUM (Cs)) + * Luminosity + * + * as * ad * B(s/as, d/ad) + * = as * ad * set_lum (d/ad, LUM (s/as), 1) + * = set_lum (as * d, ad * LUM (s), as * ad) */ static force_inline void blend_hsl_luminosity (rgb_t *res, diff --git a/gfx/cairo/libpixman/src/pixman-combine.c.template b/gfx/cairo/libpixman/src/pixman-combine.c.template deleted file mode 100644 index cd008d9673..0000000000 --- a/gfx/cairo/libpixman/src/pixman-combine.c.template +++ /dev/null @@ -1,2461 +0,0 @@ -#ifdef HAVE_CONFIG_H -#include <config.h> -#endif - -#include <math.h> -#include <string.h> - -#include "pixman-private.h" - -#include "pixman-combine.h" - -/*** per channel helper functions ***/ - -static void -combine_mask_ca (comp4_t *src, comp4_t *mask) -{ - comp4_t a = *mask; - - comp4_t x; - comp2_t xa; - - if (!a) - { - *(src) = 0; - return; - } - - x = *(src); - if (a == ~0) - { - x = x >> A_SHIFT; - x |= x << G_SHIFT; - x |= x << R_SHIFT; - *(mask) = x; - return; - } - - xa = x >> A_SHIFT; - UNcx4_MUL_UNcx4 (x, a); - *(src) = x; - - UNcx4_MUL_UNc (a, xa); - *(mask) = a; -} - -static void -combine_mask_value_ca (comp4_t *src, const comp4_t *mask) -{ - comp4_t a = *mask; - comp4_t x; - - if (!a) - { - *(src) = 0; - return; - } - - if (a == ~0) - return; - - x = *(src); - UNcx4_MUL_UNcx4 (x, a); - *(src) = x; -} - -static void -combine_mask_alpha_ca (const comp4_t *src, comp4_t *mask) -{ - comp4_t a = *(mask); - comp4_t x; - - if (!a) - return; - - x = *(src) >> A_SHIFT; - if (x == MASK) - return; - - if (a == ~0) - { - x |= x << G_SHIFT; - x |= x << R_SHIFT; - *(mask) = x; - return; - } - - UNcx4_MUL_UNc (a, x); - *(mask) = a; -} - -/* - * There are two ways of handling alpha -- either as a single unified value or - * a separate value for each component, hence each macro must have two - * versions. The unified alpha version has a 'U' at the end of the name, - * the component version has a 'C'. Similarly, functions which deal with - * this difference will have two versions using the same convention. - */ - -/* - * All of the composing functions - */ - -static force_inline comp4_t -combine_mask (const comp4_t *src, const comp4_t *mask, int i) -{ - comp4_t s, m; - - if (mask) - { - m = *(mask + i) >> A_SHIFT; - - if (!m) - return 0; - } - - s = *(src + i); - - if (mask) - UNcx4_MUL_UNc (s, m); - - return s; -} - -static void -combine_clear (pixman_implementation_t *imp, - pixman_op_t op, - comp4_t * dest, - const comp4_t * src, - const comp4_t * mask, - int width) -{ - memset (dest, 0, width * sizeof(comp4_t)); -} - -static void -combine_dst (pixman_implementation_t *imp, - pixman_op_t op, - comp4_t * dest, - const comp4_t * src, - const comp4_t * mask, - int width) -{ - return; -} - -static void -combine_src_u (pixman_implementation_t *imp, - pixman_op_t op, - comp4_t * dest, - const comp4_t * src, - const comp4_t * mask, - int width) -{ - int i; - - if (!mask) - memcpy (dest, src, width * sizeof (comp4_t)); - else - { - for (i = 0; i < width; ++i) - { - comp4_t s = combine_mask (src, mask, i); - - *(dest + i) = s; - } - } -} - -/* if the Src is opaque, call combine_src_u */ -static void -combine_over_u (pixman_implementation_t *imp, - pixman_op_t op, - comp4_t * dest, - const comp4_t * src, - const comp4_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - comp4_t s = combine_mask (src, mask, i); - comp4_t d = *(dest + i); - comp4_t ia = ALPHA_c (~s); - - UNcx4_MUL_UNc_ADD_UNcx4 (d, ia, s); - *(dest + i) = d; - } -} - -/* if the Dst is opaque, this is a noop */ -static void -combine_over_reverse_u (pixman_implementation_t *imp, - pixman_op_t op, - comp4_t * dest, - const comp4_t * src, - const comp4_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - comp4_t s = combine_mask (src, mask, i); - comp4_t d = *(dest + i); - comp4_t ia = ALPHA_c (~*(dest + i)); - UNcx4_MUL_UNc_ADD_UNcx4 (s, ia, d); - *(dest + i) = s; - } -} - -/* if the Dst is opaque, call combine_src_u */ -static void -combine_in_u (pixman_implementation_t *imp, - pixman_op_t op, - comp4_t * dest, - const comp4_t * src, - const comp4_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - comp4_t s = combine_mask (src, mask, i); - comp4_t a = ALPHA_c (*(dest + i)); - UNcx4_MUL_UNc (s, a); - *(dest + i) = s; - } -} - -/* if the Src is opaque, this is a noop */ -static void -combine_in_reverse_u (pixman_implementation_t *imp, - pixman_op_t op, - comp4_t * dest, - const comp4_t * src, - const comp4_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - comp4_t s = combine_mask (src, mask, i); - comp4_t d = *(dest + i); - comp4_t a = ALPHA_c (s); - UNcx4_MUL_UNc (d, a); - *(dest + i) = d; - } -} - -/* if the Dst is opaque, call combine_clear */ -static void -combine_out_u (pixman_implementation_t *imp, - pixman_op_t op, - comp4_t * dest, - const comp4_t * src, - const comp4_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - comp4_t s = combine_mask (src, mask, i); - comp4_t a = ALPHA_c (~*(dest + i)); - UNcx4_MUL_UNc (s, a); - *(dest + i) = s; - } -} - -/* if the Src is opaque, call combine_clear */ -static void -combine_out_reverse_u (pixman_implementation_t *imp, - pixman_op_t op, - comp4_t * dest, - const comp4_t * src, - const comp4_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - comp4_t s = combine_mask (src, mask, i); - comp4_t d = *(dest + i); - comp4_t a = ALPHA_c (~s); - UNcx4_MUL_UNc (d, a); - *(dest + i) = d; - } -} - -/* if the Src is opaque, call combine_in_u */ -/* if the Dst is opaque, call combine_over_u */ -/* if both the Src and Dst are opaque, call combine_src_u */ -static void -combine_atop_u (pixman_implementation_t *imp, - pixman_op_t op, - comp4_t * dest, - const comp4_t * src, - const comp4_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - comp4_t s = combine_mask (src, mask, i); - comp4_t d = *(dest + i); - comp4_t dest_a = ALPHA_c (d); - comp4_t src_ia = ALPHA_c (~s); - - UNcx4_MUL_UNc_ADD_UNcx4_MUL_UNc (s, dest_a, d, src_ia); - *(dest + i) = s; - } -} - -/* if the Src is opaque, call combine_over_reverse_u */ -/* if the Dst is opaque, call combine_in_reverse_u */ -/* if both the Src and Dst are opaque, call combine_dst_u */ -static void -combine_atop_reverse_u (pixman_implementation_t *imp, - pixman_op_t op, - comp4_t * dest, - const comp4_t * src, - const comp4_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - comp4_t s = combine_mask (src, mask, i); - comp4_t d = *(dest + i); - comp4_t src_a = ALPHA_c (s); - comp4_t dest_ia = ALPHA_c (~d); - - UNcx4_MUL_UNc_ADD_UNcx4_MUL_UNc (s, dest_ia, d, src_a); - *(dest + i) = s; - } -} - -/* if the Src is opaque, call combine_over_u */ -/* if the Dst is opaque, call combine_over_reverse_u */ -/* if both the Src and Dst are opaque, call combine_clear */ -static void -combine_xor_u (pixman_implementation_t *imp, - pixman_op_t op, - comp4_t * dest, - const comp4_t * src, - const comp4_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - comp4_t s = combine_mask (src, mask, i); - comp4_t d = *(dest + i); - comp4_t src_ia = ALPHA_c (~s); - comp4_t dest_ia = ALPHA_c (~d); - - UNcx4_MUL_UNc_ADD_UNcx4_MUL_UNc (s, dest_ia, d, src_ia); - *(dest + i) = s; - } -} - -static void -combine_add_u (pixman_implementation_t *imp, - pixman_op_t op, - comp4_t * dest, - const comp4_t * src, - const comp4_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - comp4_t s = combine_mask (src, mask, i); - comp4_t d = *(dest + i); - UNcx4_ADD_UNcx4 (d, s); - *(dest + i) = d; - } -} - -/* if the Src is opaque, call combine_add_u */ -/* if the Dst is opaque, call combine_add_u */ -/* if both the Src and Dst are opaque, call combine_add_u */ -static void -combine_saturate_u (pixman_implementation_t *imp, - pixman_op_t op, - comp4_t * dest, - const comp4_t * src, - const comp4_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - comp4_t s = combine_mask (src, mask, i); - comp4_t d = *(dest + i); - comp2_t sa, da; - - sa = s >> A_SHIFT; - da = ~d >> A_SHIFT; - if (sa > da) - { - sa = DIV_UNc (da, sa); - UNcx4_MUL_UNc (s, sa); - } - ; - UNcx4_ADD_UNcx4 (d, s); - *(dest + i) = d; - } -} - -/* - * PDF blend modes: - * The following blend modes have been taken from the PDF ISO 32000 - * specification, which at this point in time is available from - * http://www.adobe.com/devnet/acrobat/pdfs/PDF32000_2008.pdf - * The relevant chapters are 11.3.5 and 11.3.6. - * The formula for computing the final pixel color given in 11.3.6 is: - * αr × Cr = (1 – αs) × αb × Cb + (1 – αb) × αs × Cs + αb × αs × B(Cb, Cs) - * with B() being the blend function. - * Note that OVER is a special case of this operation, using B(Cb, Cs) = Cs - * - * These blend modes should match the SVG filter draft specification, as - * it has been designed to mirror ISO 32000. Note that at the current point - * no released draft exists that shows this, as the formulas have not been - * updated yet after the release of ISO 32000. - * - * The default implementation here uses the PDF_SEPARABLE_BLEND_MODE and - * PDF_NON_SEPARABLE_BLEND_MODE macros, which take the blend function as an - * argument. Note that this implementation operates on premultiplied colors, - * while the PDF specification does not. Therefore the code uses the formula - * Cra = (1 – as) . Dca + (1 – ad) . Sca + B(Dca, ad, Sca, as) - */ - -/* - * Multiply - * B(Dca, ad, Sca, as) = Dca.Sca - */ - -static void -combine_multiply_u (pixman_implementation_t *imp, - pixman_op_t op, - comp4_t * dest, - const comp4_t * src, - const comp4_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - comp4_t s = combine_mask (src, mask, i); - comp4_t d = *(dest + i); - comp4_t ss = s; - comp4_t src_ia = ALPHA_c (~s); - comp4_t dest_ia = ALPHA_c (~d); - - UNcx4_MUL_UNc_ADD_UNcx4_MUL_UNc (ss, dest_ia, d, src_ia); - UNcx4_MUL_UNcx4 (d, s); - UNcx4_ADD_UNcx4 (d, ss); - - *(dest + i) = d; - } -} - -static void -combine_multiply_ca (pixman_implementation_t *imp, - pixman_op_t op, - comp4_t * dest, - const comp4_t * src, - const comp4_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - comp4_t m = *(mask + i); - comp4_t s = *(src + i); - comp4_t d = *(dest + i); - comp4_t r = d; - comp4_t dest_ia = ALPHA_c (~d); - - combine_mask_value_ca (&s, &m); - - UNcx4_MUL_UNcx4_ADD_UNcx4_MUL_UNc (r, ~m, s, dest_ia); - UNcx4_MUL_UNcx4 (d, s); - UNcx4_ADD_UNcx4 (r, d); - - *(dest + i) = r; - } -} - -#define PDF_SEPARABLE_BLEND_MODE(name) \ - static void \ - combine_ ## name ## _u (pixman_implementation_t *imp, \ - pixman_op_t op, \ - comp4_t * dest, \ - const comp4_t * src, \ - const comp4_t * mask, \ - int width) \ - { \ - int i; \ - for (i = 0; i < width; ++i) { \ - comp4_t s = combine_mask (src, mask, i); \ - comp4_t d = *(dest + i); \ - comp1_t sa = ALPHA_c (s); \ - comp1_t isa = ~sa; \ - comp1_t da = ALPHA_c (d); \ - comp1_t ida = ~da; \ - comp4_t result; \ - \ - result = d; \ - UNcx4_MUL_UNc_ADD_UNcx4_MUL_UNc (result, isa, s, ida); \ - \ - *(dest + i) = result + \ - (DIV_ONE_UNc (sa * (comp4_t)da) << A_SHIFT) + \ - (blend_ ## name (RED_c (d), da, RED_c (s), sa) << R_SHIFT) + \ - (blend_ ## name (GREEN_c (d), da, GREEN_c (s), sa) << G_SHIFT) + \ - (blend_ ## name (BLUE_c (d), da, BLUE_c (s), sa)); \ - } \ - } \ - \ - static void \ - combine_ ## name ## _ca (pixman_implementation_t *imp, \ - pixman_op_t op, \ - comp4_t * dest, \ - const comp4_t * src, \ - const comp4_t * mask, \ - int width) \ - { \ - int i; \ - for (i = 0; i < width; ++i) { \ - comp4_t m = *(mask + i); \ - comp4_t s = *(src + i); \ - comp4_t d = *(dest + i); \ - comp1_t da = ALPHA_c (d); \ - comp1_t ida = ~da; \ - comp4_t result; \ - \ - combine_mask_value_ca (&s, &m); \ - \ - result = d; \ - UNcx4_MUL_UNcx4_ADD_UNcx4_MUL_UNc (result, ~m, s, ida); \ - \ - result += \ - (DIV_ONE_UNc (ALPHA_c (m) * (comp4_t)da) << A_SHIFT) + \ - (blend_ ## name (RED_c (d), da, RED_c (s), RED_c (m)) << R_SHIFT) + \ - (blend_ ## name (GREEN_c (d), da, GREEN_c (s), GREEN_c (m)) << G_SHIFT) + \ - (blend_ ## name (BLUE_c (d), da, BLUE_c (s), BLUE_c (m))); \ - \ - *(dest + i) = result; \ - } \ - } - -/* - * Screen - * B(Dca, ad, Sca, as) = Dca.sa + Sca.da - Dca.Sca - */ -static inline comp4_t -blend_screen (comp4_t dca, comp4_t da, comp4_t sca, comp4_t sa) -{ - return DIV_ONE_UNc (sca * da + dca * sa - sca * dca); -} - -PDF_SEPARABLE_BLEND_MODE (screen) - -/* - * Overlay - * B(Dca, Da, Sca, Sa) = - * if 2.Dca < Da - * 2.Sca.Dca - * otherwise - * Sa.Da - 2.(Da - Dca).(Sa - Sca) - */ -static inline comp4_t -blend_overlay (comp4_t dca, comp4_t da, comp4_t sca, comp4_t sa) -{ - comp4_t rca; - - if (2 * dca < da) - rca = 2 * sca * dca; - else - rca = sa * da - 2 * (da - dca) * (sa - sca); - return DIV_ONE_UNc (rca); -} - -PDF_SEPARABLE_BLEND_MODE (overlay) - -/* - * Darken - * B(Dca, Da, Sca, Sa) = min (Sca.Da, Dca.Sa) - */ -static inline comp4_t -blend_darken (comp4_t dca, comp4_t da, comp4_t sca, comp4_t sa) -{ - comp4_t s, d; - - s = sca * da; - d = dca * sa; - return DIV_ONE_UNc (s > d ? d : s); -} - -PDF_SEPARABLE_BLEND_MODE (darken) - -/* - * Lighten - * B(Dca, Da, Sca, Sa) = max (Sca.Da, Dca.Sa) - */ -static inline comp4_t -blend_lighten (comp4_t dca, comp4_t da, comp4_t sca, comp4_t sa) -{ - comp4_t s, d; - - s = sca * da; - d = dca * sa; - return DIV_ONE_UNc (s > d ? s : d); -} - -PDF_SEPARABLE_BLEND_MODE (lighten) - -/* - * Color dodge - * B(Dca, Da, Sca, Sa) = - * if Dca == 0 - * 0 - * if Sca == Sa - * Sa.Da - * otherwise - * Sa.Da. min (1, Dca / Da / (1 - Sca/Sa)) - */ -static inline comp4_t -blend_color_dodge (comp4_t dca, comp4_t da, comp4_t sca, comp4_t sa) -{ - if (sca >= sa) - { - return dca == 0 ? 0 : DIV_ONE_UNc (sa * da); - } - else - { - comp4_t rca = dca * sa / (sa - sca); - return DIV_ONE_UNc (sa * MIN (rca, da)); - } -} - -PDF_SEPARABLE_BLEND_MODE (color_dodge) - -/* - * Color burn - * B(Dca, Da, Sca, Sa) = - * if Dca == Da - * Sa.Da - * if Sca == 0 - * 0 - * otherwise - * Sa.Da.(1 - min (1, (1 - Dca/Da).Sa / Sca)) - */ -static inline comp4_t -blend_color_burn (comp4_t dca, comp4_t da, comp4_t sca, comp4_t sa) -{ - if (sca == 0) - { - return dca < da ? 0 : DIV_ONE_UNc (sa * da); - } - else - { - comp4_t rca = (da - dca) * sa / sca; - return DIV_ONE_UNc (sa * (MAX (rca, da) - rca)); - } -} - -PDF_SEPARABLE_BLEND_MODE (color_burn) - -/* - * Hard light - * B(Dca, Da, Sca, Sa) = - * if 2.Sca < Sa - * 2.Sca.Dca - * otherwise - * Sa.Da - 2.(Da - Dca).(Sa - Sca) - */ -static inline comp4_t -blend_hard_light (comp4_t dca, comp4_t da, comp4_t sca, comp4_t sa) -{ - if (2 * sca < sa) - return DIV_ONE_UNc (2 * sca * dca); - else - return DIV_ONE_UNc (sa * da - 2 * (da - dca) * (sa - sca)); -} - -PDF_SEPARABLE_BLEND_MODE (hard_light) - -/* - * Soft light - * B(Dca, Da, Sca, Sa) = - * if (2.Sca <= Sa) - * Dca.(Sa - (1 - Dca/Da).(2.Sca - Sa)) - * otherwise if Dca.4 <= Da - * Dca.(Sa + (2.Sca - Sa).((16.Dca/Da - 12).Dca/Da + 3) - * otherwise - * (Dca.Sa + (SQRT (Dca/Da).Da - Dca).(2.Sca - Sa)) - */ -static inline comp4_t -blend_soft_light (comp4_t dca_org, - comp4_t da_org, - comp4_t sca_org, - comp4_t sa_org) -{ - double dca = dca_org * (1.0 / MASK); - double da = da_org * (1.0 / MASK); - double sca = sca_org * (1.0 / MASK); - double sa = sa_org * (1.0 / MASK); - double rca; - - if (2 * sca < sa) - { - if (da == 0) - rca = dca * sa; - else - rca = dca * sa - dca * (da - dca) * (sa - 2 * sca) / da; - } - else if (da == 0) - { - rca = 0; - } - else if (4 * dca <= da) - { - rca = dca * sa + - (2 * sca - sa) * dca * ((16 * dca / da - 12) * dca / da + 3); - } - else - { - rca = dca * sa + (sqrt (dca * da) - dca) * (2 * sca - sa); - } - return rca * MASK + 0.5; -} - -PDF_SEPARABLE_BLEND_MODE (soft_light) - -/* - * Difference - * B(Dca, Da, Sca, Sa) = abs (Dca.Sa - Sca.Da) - */ -static inline comp4_t -blend_difference (comp4_t dca, comp4_t da, comp4_t sca, comp4_t sa) -{ - comp4_t dcasa = dca * sa; - comp4_t scada = sca * da; - - if (scada < dcasa) - return DIV_ONE_UNc (dcasa - scada); - else - return DIV_ONE_UNc (scada - dcasa); -} - -PDF_SEPARABLE_BLEND_MODE (difference) - -/* - * Exclusion - * B(Dca, Da, Sca, Sa) = (Sca.Da + Dca.Sa - 2.Sca.Dca) - */ - -/* This can be made faster by writing it directly and not using - * PDF_SEPARABLE_BLEND_MODE, but that's a performance optimization */ - -static inline comp4_t -blend_exclusion (comp4_t dca, comp4_t da, comp4_t sca, comp4_t sa) -{ - return DIV_ONE_UNc (sca * da + dca * sa - 2 * dca * sca); -} - -PDF_SEPARABLE_BLEND_MODE (exclusion) - -#undef PDF_SEPARABLE_BLEND_MODE - -/* - * PDF nonseperable blend modes are implemented using the following functions - * to operate in Hsl space, with Cmax, Cmid, Cmin referring to the max, mid - * and min value of the red, green and blue components. - * - * LUM (C) = 0.3 × Cred + 0.59 × Cgreen + 0.11 × Cblue - * - * clip_color (C): - * l = LUM (C) - * min = Cmin - * max = Cmax - * if n < 0.0 - * C = l + ( ( ( C – l ) × l ) ⁄ ( l – min ) ) - * if x > 1.0 - * C = l + ( ( ( C – l ) × ( 1 – l ) ) ⁄ ( max – l ) ) - * return C - * - * set_lum (C, l): - * d = l – LUM (C) - * C += d - * return clip_color (C) - * - * SAT (C) = CH_MAX (C) - CH_MIN (C) - * - * set_sat (C, s): - * if Cmax > Cmin - * Cmid = ( ( ( Cmid – Cmin ) × s ) ⁄ ( Cmax – Cmin ) ) - * Cmax = s - * else - * Cmid = Cmax = 0.0 - * Cmin = 0.0 - * return C - */ - -/* For premultiplied colors, we need to know what happens when C is - * multiplied by a real number. LUM and SAT are linear: - * - * LUM (r × C) = r × LUM (C) SAT (r * C) = r * SAT (C) - * - * If we extend clip_color with an extra argument a and change - * - * if x >= 1.0 - * - * into - * - * if x >= a - * - * then clip_color is also linear: - * - * r * clip_color (C, a) = clip_color (r_c, ra); - * - * for positive r. - * - * Similarly, we can extend set_lum with an extra argument that is just passed - * on to clip_color: - * - * r * set_lum ( C, l, a) - * - * = r × clip_color ( C + l - LUM (C), a) - * - * = clip_color ( r * C + r × l - r * LUM (C), r * a) - * - * = set_lum ( r * C, r * l, r * a) - * - * Finally, set_sat: - * - * r * set_sat (C, s) = set_sat (x * C, r * s) - * - * The above holds for all non-zero x, because the x'es in the fraction for - * C_mid cancel out. Specifically, it holds for x = r: - * - * r * set_sat (C, s) = set_sat (r_c, rs) - * - */ - -/* So, for the non-separable PDF blend modes, we have (using s, d for - * non-premultiplied colors, and S, D for premultiplied: - * - * Color: - * - * a_s * a_d * B(s, d) - * = a_s * a_d * set_lum (S/a_s, LUM (D/a_d), 1) - * = set_lum (S * a_d, a_s * LUM (D), a_s * a_d) - * - * - * Luminosity: - * - * a_s * a_d * B(s, d) - * = a_s * a_d * set_lum (D/a_d, LUM(S/a_s), 1) - * = set_lum (a_s * D, a_d * LUM(S), a_s * a_d) - * - * - * Saturation: - * - * a_s * a_d * B(s, d) - * = a_s * a_d * set_lum (set_sat (D/a_d, SAT (S/a_s)), LUM (D/a_d), 1) - * = set_lum (a_s * a_d * set_sat (D/a_d, SAT (S/a_s)), - * a_s * LUM (D), a_s * a_d) - * = set_lum (set_sat (a_s * D, a_d * SAT (S), a_s * LUM (D), a_s * a_d)) - * - * Hue: - * - * a_s * a_d * B(s, d) - * = a_s * a_d * set_lum (set_sat (S/a_s, SAT (D/a_d)), LUM (D/a_d), 1) - * = set_lum (set_sat (a_d * S, a_s * SAT (D)), a_s * LUM (D), a_s * a_d) - * - */ - -#define CH_MIN(c) (c[0] < c[1] ? (c[0] < c[2] ? c[0] : c[2]) : (c[1] < c[2] ? c[1] : c[2])) -#define CH_MAX(c) (c[0] > c[1] ? (c[0] > c[2] ? c[0] : c[2]) : (c[1] > c[2] ? c[1] : c[2])) -#define LUM(c) ((c[0] * 30 + c[1] * 59 + c[2] * 11) / 100) -#define SAT(c) (CH_MAX (c) - CH_MIN (c)) - -#define PDF_NON_SEPARABLE_BLEND_MODE(name) \ - static void \ - combine_ ## name ## _u (pixman_implementation_t *imp, \ - pixman_op_t op, \ - comp4_t *dest, \ - const comp4_t *src, \ - const comp4_t *mask, \ - int width) \ - { \ - int i; \ - for (i = 0; i < width; ++i) \ - { \ - comp4_t s = combine_mask (src, mask, i); \ - comp4_t d = *(dest + i); \ - comp1_t sa = ALPHA_c (s); \ - comp1_t isa = ~sa; \ - comp1_t da = ALPHA_c (d); \ - comp1_t ida = ~da; \ - comp4_t result; \ - comp4_t sc[3], dc[3], c[3]; \ - \ - result = d; \ - UNcx4_MUL_UNc_ADD_UNcx4_MUL_UNc (result, isa, s, ida); \ - dc[0] = RED_c (d); \ - sc[0] = RED_c (s); \ - dc[1] = GREEN_c (d); \ - sc[1] = GREEN_c (s); \ - dc[2] = BLUE_c (d); \ - sc[2] = BLUE_c (s); \ - blend_ ## name (c, dc, da, sc, sa); \ - \ - *(dest + i) = result + \ - (DIV_ONE_UNc (sa * (comp4_t)da) << A_SHIFT) + \ - (DIV_ONE_UNc (c[0]) << R_SHIFT) + \ - (DIV_ONE_UNc (c[1]) << G_SHIFT) + \ - (DIV_ONE_UNc (c[2])); \ - } \ - } - -static void -set_lum (comp4_t dest[3], comp4_t src[3], comp4_t sa, comp4_t lum) -{ - double a, l, min, max; - double tmp[3]; - - a = sa * (1.0 / MASK); - - l = lum * (1.0 / MASK); - tmp[0] = src[0] * (1.0 / MASK); - tmp[1] = src[1] * (1.0 / MASK); - tmp[2] = src[2] * (1.0 / MASK); - - l = l - LUM (tmp); - tmp[0] += l; - tmp[1] += l; - tmp[2] += l; - - /* clip_color */ - l = LUM (tmp); - min = CH_MIN (tmp); - max = CH_MAX (tmp); - - if (min < 0) - { - if (l - min == 0.0) - { - tmp[0] = 0; - tmp[1] = 0; - tmp[2] = 0; - } - else - { - tmp[0] = l + (tmp[0] - l) * l / (l - min); - tmp[1] = l + (tmp[1] - l) * l / (l - min); - tmp[2] = l + (tmp[2] - l) * l / (l - min); - } - } - if (max > a) - { - if (max - l == 0.0) - { - tmp[0] = a; - tmp[1] = a; - tmp[2] = a; - } - else - { - tmp[0] = l + (tmp[0] - l) * (a - l) / (max - l); - tmp[1] = l + (tmp[1] - l) * (a - l) / (max - l); - tmp[2] = l + (tmp[2] - l) * (a - l) / (max - l); - } - } - - dest[0] = tmp[0] * MASK + 0.5; - dest[1] = tmp[1] * MASK + 0.5; - dest[2] = tmp[2] * MASK + 0.5; -} - -static void -set_sat (comp4_t dest[3], comp4_t src[3], comp4_t sat) -{ - int id[3]; - comp4_t min, max; - - if (src[0] > src[1]) - { - if (src[0] > src[2]) - { - id[0] = 0; - if (src[1] > src[2]) - { - id[1] = 1; - id[2] = 2; - } - else - { - id[1] = 2; - id[2] = 1; - } - } - else - { - id[0] = 2; - id[1] = 0; - id[2] = 1; - } - } - else - { - if (src[0] > src[2]) - { - id[0] = 1; - id[1] = 0; - id[2] = 2; - } - else - { - id[2] = 0; - if (src[1] > src[2]) - { - id[0] = 1; - id[1] = 2; - } - else - { - id[0] = 2; - id[1] = 1; - } - } - } - - max = dest[id[0]]; - min = dest[id[2]]; - if (max > min) - { - dest[id[1]] = (dest[id[1]] - min) * sat / (max - min); - dest[id[0]] = sat; - dest[id[2]] = 0; - } - else - { - dest[0] = dest[1] = dest[2] = 0; - } -} - -/* - * Hue: - * B(Cb, Cs) = set_lum (set_sat (Cs, SAT (Cb)), LUM (Cb)) - */ -static inline void -blend_hsl_hue (comp4_t c[3], - comp4_t dc[3], - comp4_t da, - comp4_t sc[3], - comp4_t sa) -{ - c[0] = sc[0] * da; - c[1] = sc[1] * da; - c[2] = sc[2] * da; - set_sat (c, c, SAT (dc) * sa); - set_lum (c, c, sa * da, LUM (dc) * sa); -} - -PDF_NON_SEPARABLE_BLEND_MODE (hsl_hue) - -/* - * Saturation: - * B(Cb, Cs) = set_lum (set_sat (Cb, SAT (Cs)), LUM (Cb)) - */ -static inline void -blend_hsl_saturation (comp4_t c[3], - comp4_t dc[3], - comp4_t da, - comp4_t sc[3], - comp4_t sa) -{ - c[0] = dc[0] * sa; - c[1] = dc[1] * sa; - c[2] = dc[2] * sa; - set_sat (c, c, SAT (sc) * da); - set_lum (c, c, sa * da, LUM (dc) * sa); -} - -PDF_NON_SEPARABLE_BLEND_MODE (hsl_saturation) - -/* - * Color: - * B(Cb, Cs) = set_lum (Cs, LUM (Cb)) - */ -static inline void -blend_hsl_color (comp4_t c[3], - comp4_t dc[3], - comp4_t da, - comp4_t sc[3], - comp4_t sa) -{ - c[0] = sc[0] * da; - c[1] = sc[1] * da; - c[2] = sc[2] * da; - set_lum (c, c, sa * da, LUM (dc) * sa); -} - -PDF_NON_SEPARABLE_BLEND_MODE (hsl_color) - -/* - * Luminosity: - * B(Cb, Cs) = set_lum (Cb, LUM (Cs)) - */ -static inline void -blend_hsl_luminosity (comp4_t c[3], - comp4_t dc[3], - comp4_t da, - comp4_t sc[3], - comp4_t sa) -{ - c[0] = dc[0] * sa; - c[1] = dc[1] * sa; - c[2] = dc[2] * sa; - set_lum (c, c, sa * da, LUM (sc) * da); -} - -PDF_NON_SEPARABLE_BLEND_MODE (hsl_luminosity) - -#undef SAT -#undef LUM -#undef CH_MAX -#undef CH_MIN -#undef PDF_NON_SEPARABLE_BLEND_MODE - -/* All of the disjoint/conjoint composing functions - * - * The four entries in the first column indicate what source contributions - * come from each of the four areas of the picture -- areas covered by neither - * A nor B, areas covered only by A, areas covered only by B and finally - * areas covered by both A and B. - * - * Disjoint Conjoint - * Fa Fb Fa Fb - * (0,0,0,0) 0 0 0 0 - * (0,A,0,A) 1 0 1 0 - * (0,0,B,B) 0 1 0 1 - * (0,A,B,A) 1 min((1-a)/b,1) 1 max(1-a/b,0) - * (0,A,B,B) min((1-b)/a,1) 1 max(1-b/a,0) 1 - * (0,0,0,A) max(1-(1-b)/a,0) 0 min(1,b/a) 0 - * (0,0,0,B) 0 max(1-(1-a)/b,0) 0 min(a/b,1) - * (0,A,0,0) min(1,(1-b)/a) 0 max(1-b/a,0) 0 - * (0,0,B,0) 0 min(1,(1-a)/b) 0 max(1-a/b,0) - * (0,0,B,A) max(1-(1-b)/a,0) min(1,(1-a)/b) min(1,b/a) max(1-a/b,0) - * (0,A,0,B) min(1,(1-b)/a) max(1-(1-a)/b,0) max(1-b/a,0) min(1,a/b) - * (0,A,B,0) min(1,(1-b)/a) min(1,(1-a)/b) max(1-b/a,0) max(1-a/b,0) - * - * See http://marc.info/?l=xfree-render&m=99792000027857&w=2 for more - * information about these operators. - */ - -#define COMBINE_A_OUT 1 -#define COMBINE_A_IN 2 -#define COMBINE_B_OUT 4 -#define COMBINE_B_IN 8 - -#define COMBINE_CLEAR 0 -#define COMBINE_A (COMBINE_A_OUT | COMBINE_A_IN) -#define COMBINE_B (COMBINE_B_OUT | COMBINE_B_IN) -#define COMBINE_A_OVER (COMBINE_A_OUT | COMBINE_B_OUT | COMBINE_A_IN) -#define COMBINE_B_OVER (COMBINE_A_OUT | COMBINE_B_OUT | COMBINE_B_IN) -#define COMBINE_A_ATOP (COMBINE_B_OUT | COMBINE_A_IN) -#define COMBINE_B_ATOP (COMBINE_A_OUT | COMBINE_B_IN) -#define COMBINE_XOR (COMBINE_A_OUT | COMBINE_B_OUT) - -/* portion covered by a but not b */ -static comp1_t -combine_disjoint_out_part (comp1_t a, comp1_t b) -{ - /* min (1, (1-b) / a) */ - - b = ~b; /* 1 - b */ - if (b >= a) /* 1 - b >= a -> (1-b)/a >= 1 */ - return MASK; /* 1 */ - return DIV_UNc (b, a); /* (1-b) / a */ -} - -/* portion covered by both a and b */ -static comp1_t -combine_disjoint_in_part (comp1_t a, comp1_t b) -{ - /* max (1-(1-b)/a,0) */ - /* = - min ((1-b)/a - 1, 0) */ - /* = 1 - min (1, (1-b)/a) */ - - b = ~b; /* 1 - b */ - if (b >= a) /* 1 - b >= a -> (1-b)/a >= 1 */ - return 0; /* 1 - 1 */ - return ~DIV_UNc(b, a); /* 1 - (1-b) / a */ -} - -/* portion covered by a but not b */ -static comp1_t -combine_conjoint_out_part (comp1_t a, comp1_t b) -{ - /* max (1-b/a,0) */ - /* = 1-min(b/a,1) */ - - /* min (1, (1-b) / a) */ - - if (b >= a) /* b >= a -> b/a >= 1 */ - return 0x00; /* 0 */ - return ~DIV_UNc(b, a); /* 1 - b/a */ -} - -/* portion covered by both a and b */ -static comp1_t -combine_conjoint_in_part (comp1_t a, comp1_t b) -{ - /* min (1,b/a) */ - - if (b >= a) /* b >= a -> b/a >= 1 */ - return MASK; /* 1 */ - return DIV_UNc (b, a); /* b/a */ -} - -#define GET_COMP(v, i) ((comp2_t) (comp1_t) ((v) >> i)) - -#define ADD(x, y, i, t) \ - ((t) = GET_COMP (x, i) + GET_COMP (y, i), \ - (comp4_t) ((comp1_t) ((t) | (0 - ((t) >> G_SHIFT)))) << (i)) - -#define GENERIC(x, y, i, ax, ay, t, u, v) \ - ((t) = (MUL_UNc (GET_COMP (y, i), ay, (u)) + \ - MUL_UNc (GET_COMP (x, i), ax, (v))), \ - (comp4_t) ((comp1_t) ((t) | \ - (0 - ((t) >> G_SHIFT)))) << (i)) - -static void -combine_disjoint_general_u (comp4_t * dest, - const comp4_t *src, - const comp4_t *mask, - int width, - comp1_t combine) -{ - int i; - - for (i = 0; i < width; ++i) - { - comp4_t s = combine_mask (src, mask, i); - comp4_t d = *(dest + i); - comp4_t m, n, o, p; - comp2_t Fa, Fb, t, u, v; - comp1_t sa = s >> A_SHIFT; - comp1_t da = d >> A_SHIFT; - - switch (combine & COMBINE_A) - { - default: - Fa = 0; - break; - - case COMBINE_A_OUT: - Fa = combine_disjoint_out_part (sa, da); - break; - - case COMBINE_A_IN: - Fa = combine_disjoint_in_part (sa, da); - break; - - case COMBINE_A: - Fa = MASK; - break; - } - - switch (combine & COMBINE_B) - { - default: - Fb = 0; - break; - - case COMBINE_B_OUT: - Fb = combine_disjoint_out_part (da, sa); - break; - - case COMBINE_B_IN: - Fb = combine_disjoint_in_part (da, sa); - break; - - case COMBINE_B: - Fb = MASK; - break; - } - m = GENERIC (s, d, 0, Fa, Fb, t, u, v); - n = GENERIC (s, d, G_SHIFT, Fa, Fb, t, u, v); - o = GENERIC (s, d, R_SHIFT, Fa, Fb, t, u, v); - p = GENERIC (s, d, A_SHIFT, Fa, Fb, t, u, v); - s = m | n | o | p; - *(dest + i) = s; - } -} - -static void -combine_disjoint_over_u (pixman_implementation_t *imp, - pixman_op_t op, - comp4_t * dest, - const comp4_t * src, - const comp4_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - comp4_t s = combine_mask (src, mask, i); - comp2_t a = s >> A_SHIFT; - - if (s != 0x00) - { - comp4_t d = *(dest + i); - a = combine_disjoint_out_part (d >> A_SHIFT, a); - UNcx4_MUL_UNc_ADD_UNcx4 (d, a, s); - - *(dest + i) = d; - } - } -} - -static void -combine_disjoint_in_u (pixman_implementation_t *imp, - pixman_op_t op, - comp4_t * dest, - const comp4_t * src, - const comp4_t * mask, - int width) -{ - combine_disjoint_general_u (dest, src, mask, width, COMBINE_A_IN); -} - -static void -combine_disjoint_in_reverse_u (pixman_implementation_t *imp, - pixman_op_t op, - comp4_t * dest, - const comp4_t * src, - const comp4_t * mask, - int width) -{ - combine_disjoint_general_u (dest, src, mask, width, COMBINE_B_IN); -} - -static void -combine_disjoint_out_u (pixman_implementation_t *imp, - pixman_op_t op, - comp4_t * dest, - const comp4_t * src, - const comp4_t * mask, - int width) -{ - combine_disjoint_general_u (dest, src, mask, width, COMBINE_A_OUT); -} - -static void -combine_disjoint_out_reverse_u (pixman_implementation_t *imp, - pixman_op_t op, - comp4_t * dest, - const comp4_t * src, - const comp4_t * mask, - int width) -{ - combine_disjoint_general_u (dest, src, mask, width, COMBINE_B_OUT); -} - -static void -combine_disjoint_atop_u (pixman_implementation_t *imp, - pixman_op_t op, - comp4_t * dest, - const comp4_t * src, - const comp4_t * mask, - int width) -{ - combine_disjoint_general_u (dest, src, mask, width, COMBINE_A_ATOP); -} - -static void -combine_disjoint_atop_reverse_u (pixman_implementation_t *imp, - pixman_op_t op, - comp4_t * dest, - const comp4_t * src, - const comp4_t * mask, - int width) -{ - combine_disjoint_general_u (dest, src, mask, width, COMBINE_B_ATOP); -} - -static void -combine_disjoint_xor_u (pixman_implementation_t *imp, - pixman_op_t op, - comp4_t * dest, - const comp4_t * src, - const comp4_t * mask, - int width) -{ - combine_disjoint_general_u (dest, src, mask, width, COMBINE_XOR); -} - -static void -combine_conjoint_general_u (comp4_t * dest, - const comp4_t *src, - const comp4_t *mask, - int width, - comp1_t combine) -{ - int i; - - for (i = 0; i < width; ++i) - { - comp4_t s = combine_mask (src, mask, i); - comp4_t d = *(dest + i); - comp4_t m, n, o, p; - comp2_t Fa, Fb, t, u, v; - comp1_t sa = s >> A_SHIFT; - comp1_t da = d >> A_SHIFT; - - switch (combine & COMBINE_A) - { - default: - Fa = 0; - break; - - case COMBINE_A_OUT: - Fa = combine_conjoint_out_part (sa, da); - break; - - case COMBINE_A_IN: - Fa = combine_conjoint_in_part (sa, da); - break; - - case COMBINE_A: - Fa = MASK; - break; - } - - switch (combine & COMBINE_B) - { - default: - Fb = 0; - break; - - case COMBINE_B_OUT: - Fb = combine_conjoint_out_part (da, sa); - break; - - case COMBINE_B_IN: - Fb = combine_conjoint_in_part (da, sa); - break; - - case COMBINE_B: - Fb = MASK; - break; - } - - m = GENERIC (s, d, 0, Fa, Fb, t, u, v); - n = GENERIC (s, d, G_SHIFT, Fa, Fb, t, u, v); - o = GENERIC (s, d, R_SHIFT, Fa, Fb, t, u, v); - p = GENERIC (s, d, A_SHIFT, Fa, Fb, t, u, v); - - s = m | n | o | p; - - *(dest + i) = s; - } -} - -static void -combine_conjoint_over_u (pixman_implementation_t *imp, - pixman_op_t op, - comp4_t * dest, - const comp4_t * src, - const comp4_t * mask, - int width) -{ - combine_conjoint_general_u (dest, src, mask, width, COMBINE_A_OVER); -} - -static void -combine_conjoint_over_reverse_u (pixman_implementation_t *imp, - pixman_op_t op, - comp4_t * dest, - const comp4_t * src, - const comp4_t * mask, - int width) -{ - combine_conjoint_general_u (dest, src, mask, width, COMBINE_B_OVER); -} - -static void -combine_conjoint_in_u (pixman_implementation_t *imp, - pixman_op_t op, - comp4_t * dest, - const comp4_t * src, - const comp4_t * mask, - int width) -{ - combine_conjoint_general_u (dest, src, mask, width, COMBINE_A_IN); -} - -static void -combine_conjoint_in_reverse_u (pixman_implementation_t *imp, - pixman_op_t op, - comp4_t * dest, - const comp4_t * src, - const comp4_t * mask, - int width) -{ - combine_conjoint_general_u (dest, src, mask, width, COMBINE_B_IN); -} - -static void -combine_conjoint_out_u (pixman_implementation_t *imp, - pixman_op_t op, - comp4_t * dest, - const comp4_t * src, - const comp4_t * mask, - int width) -{ - combine_conjoint_general_u (dest, src, mask, width, COMBINE_A_OUT); -} - -static void -combine_conjoint_out_reverse_u (pixman_implementation_t *imp, - pixman_op_t op, - comp4_t * dest, - const comp4_t * src, - const comp4_t * mask, - int width) -{ - combine_conjoint_general_u (dest, src, mask, width, COMBINE_B_OUT); -} - -static void -combine_conjoint_atop_u (pixman_implementation_t *imp, - pixman_op_t op, - comp4_t * dest, - const comp4_t * src, - const comp4_t * mask, - int width) -{ - combine_conjoint_general_u (dest, src, mask, width, COMBINE_A_ATOP); -} - -static void -combine_conjoint_atop_reverse_u (pixman_implementation_t *imp, - pixman_op_t op, - comp4_t * dest, - const comp4_t * src, - const comp4_t * mask, - int width) -{ - combine_conjoint_general_u (dest, src, mask, width, COMBINE_B_ATOP); -} - -static void -combine_conjoint_xor_u (pixman_implementation_t *imp, - pixman_op_t op, - comp4_t * dest, - const comp4_t * src, - const comp4_t * mask, - int width) -{ - combine_conjoint_general_u (dest, src, mask, width, COMBINE_XOR); -} - -/************************************************************************/ -/*********************** Per Channel functions **************************/ -/************************************************************************/ - -static void -combine_clear_ca (pixman_implementation_t *imp, - pixman_op_t op, - comp4_t * dest, - const comp4_t * src, - const comp4_t * mask, - int width) -{ - memset (dest, 0, width * sizeof(comp4_t)); -} - -static void -combine_src_ca (pixman_implementation_t *imp, - pixman_op_t op, - comp4_t * dest, - const comp4_t * src, - const comp4_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - comp4_t s = *(src + i); - comp4_t m = *(mask + i); - - combine_mask_value_ca (&s, &m); - - *(dest + i) = s; - } -} - -static void -combine_over_ca (pixman_implementation_t *imp, - pixman_op_t op, - comp4_t * dest, - const comp4_t * src, - const comp4_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - comp4_t s = *(src + i); - comp4_t m = *(mask + i); - comp4_t a; - - combine_mask_ca (&s, &m); - - a = ~m; - if (a) - { - comp4_t d = *(dest + i); - UNcx4_MUL_UNcx4_ADD_UNcx4 (d, a, s); - s = d; - } - - *(dest + i) = s; - } -} - -static void -combine_over_reverse_ca (pixman_implementation_t *imp, - pixman_op_t op, - comp4_t * dest, - const comp4_t * src, - const comp4_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - comp4_t d = *(dest + i); - comp4_t a = ~d >> A_SHIFT; - - if (a) - { - comp4_t s = *(src + i); - comp4_t m = *(mask + i); - - UNcx4_MUL_UNcx4 (s, m); - UNcx4_MUL_UNc_ADD_UNcx4 (s, a, d); - - *(dest + i) = s; - } - } -} - -static void -combine_in_ca (pixman_implementation_t *imp, - pixman_op_t op, - comp4_t * dest, - const comp4_t * src, - const comp4_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - comp4_t d = *(dest + i); - comp2_t a = d >> A_SHIFT; - comp4_t s = 0; - - if (a) - { - comp4_t m = *(mask + i); - - s = *(src + i); - combine_mask_value_ca (&s, &m); - - if (a != MASK) - UNcx4_MUL_UNc (s, a); - } - - *(dest + i) = s; - } -} - -static void -combine_in_reverse_ca (pixman_implementation_t *imp, - pixman_op_t op, - comp4_t * dest, - const comp4_t * src, - const comp4_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - comp4_t s = *(src + i); - comp4_t m = *(mask + i); - comp4_t a; - - combine_mask_alpha_ca (&s, &m); - - a = m; - if (a != ~0) - { - comp4_t d = 0; - - if (a) - { - d = *(dest + i); - UNcx4_MUL_UNcx4 (d, a); - } - - *(dest + i) = d; - } - } -} - -static void -combine_out_ca (pixman_implementation_t *imp, - pixman_op_t op, - comp4_t * dest, - const comp4_t * src, - const comp4_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - comp4_t d = *(dest + i); - comp2_t a = ~d >> A_SHIFT; - comp4_t s = 0; - - if (a) - { - comp4_t m = *(mask + i); - - s = *(src + i); - combine_mask_value_ca (&s, &m); - - if (a != MASK) - UNcx4_MUL_UNc (s, a); - } - - *(dest + i) = s; - } -} - -static void -combine_out_reverse_ca (pixman_implementation_t *imp, - pixman_op_t op, - comp4_t * dest, - const comp4_t * src, - const comp4_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - comp4_t s = *(src + i); - comp4_t m = *(mask + i); - comp4_t a; - - combine_mask_alpha_ca (&s, &m); - - a = ~m; - if (a != ~0) - { - comp4_t d = 0; - - if (a) - { - d = *(dest + i); - UNcx4_MUL_UNcx4 (d, a); - } - - *(dest + i) = d; - } - } -} - -static void -combine_atop_ca (pixman_implementation_t *imp, - pixman_op_t op, - comp4_t * dest, - const comp4_t * src, - const comp4_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - comp4_t d = *(dest + i); - comp4_t s = *(src + i); - comp4_t m = *(mask + i); - comp4_t ad; - comp2_t as = d >> A_SHIFT; - - combine_mask_ca (&s, &m); - - ad = ~m; - - UNcx4_MUL_UNcx4_ADD_UNcx4_MUL_UNc (d, ad, s, as); - - *(dest + i) = d; - } -} - -static void -combine_atop_reverse_ca (pixman_implementation_t *imp, - pixman_op_t op, - comp4_t * dest, - const comp4_t * src, - const comp4_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - comp4_t d = *(dest + i); - comp4_t s = *(src + i); - comp4_t m = *(mask + i); - comp4_t ad; - comp2_t as = ~d >> A_SHIFT; - - combine_mask_ca (&s, &m); - - ad = m; - - UNcx4_MUL_UNcx4_ADD_UNcx4_MUL_UNc (d, ad, s, as); - - *(dest + i) = d; - } -} - -static void -combine_xor_ca (pixman_implementation_t *imp, - pixman_op_t op, - comp4_t * dest, - const comp4_t * src, - const comp4_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - comp4_t d = *(dest + i); - comp4_t s = *(src + i); - comp4_t m = *(mask + i); - comp4_t ad; - comp2_t as = ~d >> A_SHIFT; - - combine_mask_ca (&s, &m); - - ad = ~m; - - UNcx4_MUL_UNcx4_ADD_UNcx4_MUL_UNc (d, ad, s, as); - - *(dest + i) = d; - } -} - -static void -combine_add_ca (pixman_implementation_t *imp, - pixman_op_t op, - comp4_t * dest, - const comp4_t * src, - const comp4_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - comp4_t s = *(src + i); - comp4_t m = *(mask + i); - comp4_t d = *(dest + i); - - combine_mask_value_ca (&s, &m); - - UNcx4_ADD_UNcx4 (d, s); - - *(dest + i) = d; - } -} - -static void -combine_saturate_ca (pixman_implementation_t *imp, - pixman_op_t op, - comp4_t * dest, - const comp4_t * src, - const comp4_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - comp4_t s, d; - comp2_t sa, sr, sg, sb, da; - comp2_t t, u, v; - comp4_t m, n, o, p; - - d = *(dest + i); - s = *(src + i); - m = *(mask + i); - - combine_mask_ca (&s, &m); - - sa = (m >> A_SHIFT); - sr = (m >> R_SHIFT) & MASK; - sg = (m >> G_SHIFT) & MASK; - sb = m & MASK; - da = ~d >> A_SHIFT; - - if (sb <= da) - m = ADD (s, d, 0, t); - else - m = GENERIC (s, d, 0, (da << G_SHIFT) / sb, MASK, t, u, v); - - if (sg <= da) - n = ADD (s, d, G_SHIFT, t); - else - n = GENERIC (s, d, G_SHIFT, (da << G_SHIFT) / sg, MASK, t, u, v); - - if (sr <= da) - o = ADD (s, d, R_SHIFT, t); - else - o = GENERIC (s, d, R_SHIFT, (da << G_SHIFT) / sr, MASK, t, u, v); - - if (sa <= da) - p = ADD (s, d, A_SHIFT, t); - else - p = GENERIC (s, d, A_SHIFT, (da << G_SHIFT) / sa, MASK, t, u, v); - - *(dest + i) = m | n | o | p; - } -} - -static void -combine_disjoint_general_ca (comp4_t * dest, - const comp4_t *src, - const comp4_t *mask, - int width, - comp1_t combine) -{ - int i; - - for (i = 0; i < width; ++i) - { - comp4_t s, d; - comp4_t m, n, o, p; - comp4_t Fa, Fb; - comp2_t t, u, v; - comp4_t sa; - comp1_t da; - - s = *(src + i); - m = *(mask + i); - d = *(dest + i); - da = d >> A_SHIFT; - - combine_mask_ca (&s, &m); - - sa = m; - - switch (combine & COMBINE_A) - { - default: - Fa = 0; - break; - - case COMBINE_A_OUT: - m = (comp4_t)combine_disjoint_out_part ((comp1_t) (sa >> 0), da); - n = (comp4_t)combine_disjoint_out_part ((comp1_t) (sa >> G_SHIFT), da) << G_SHIFT; - o = (comp4_t)combine_disjoint_out_part ((comp1_t) (sa >> R_SHIFT), da) << R_SHIFT; - p = (comp4_t)combine_disjoint_out_part ((comp1_t) (sa >> A_SHIFT), da) << A_SHIFT; - Fa = m | n | o | p; - break; - - case COMBINE_A_IN: - m = (comp4_t)combine_disjoint_in_part ((comp1_t) (sa >> 0), da); - n = (comp4_t)combine_disjoint_in_part ((comp1_t) (sa >> G_SHIFT), da) << G_SHIFT; - o = (comp4_t)combine_disjoint_in_part ((comp1_t) (sa >> R_SHIFT), da) << R_SHIFT; - p = (comp4_t)combine_disjoint_in_part ((comp1_t) (sa >> A_SHIFT), da) << A_SHIFT; - Fa = m | n | o | p; - break; - - case COMBINE_A: - Fa = ~0; - break; - } - - switch (combine & COMBINE_B) - { - default: - Fb = 0; - break; - - case COMBINE_B_OUT: - m = (comp4_t)combine_disjoint_out_part (da, (comp1_t) (sa >> 0)); - n = (comp4_t)combine_disjoint_out_part (da, (comp1_t) (sa >> G_SHIFT)) << G_SHIFT; - o = (comp4_t)combine_disjoint_out_part (da, (comp1_t) (sa >> R_SHIFT)) << R_SHIFT; - p = (comp4_t)combine_disjoint_out_part (da, (comp1_t) (sa >> A_SHIFT)) << A_SHIFT; - Fb = m | n | o | p; - break; - - case COMBINE_B_IN: - m = (comp4_t)combine_disjoint_in_part (da, (comp1_t) (sa >> 0)); - n = (comp4_t)combine_disjoint_in_part (da, (comp1_t) (sa >> G_SHIFT)) << G_SHIFT; - o = (comp4_t)combine_disjoint_in_part (da, (comp1_t) (sa >> R_SHIFT)) << R_SHIFT; - p = (comp4_t)combine_disjoint_in_part (da, (comp1_t) (sa >> A_SHIFT)) << A_SHIFT; - Fb = m | n | o | p; - break; - - case COMBINE_B: - Fb = ~0; - break; - } - m = GENERIC (s, d, 0, GET_COMP (Fa, 0), GET_COMP (Fb, 0), t, u, v); - n = GENERIC (s, d, G_SHIFT, GET_COMP (Fa, G_SHIFT), GET_COMP (Fb, G_SHIFT), t, u, v); - o = GENERIC (s, d, R_SHIFT, GET_COMP (Fa, R_SHIFT), GET_COMP (Fb, R_SHIFT), t, u, v); - p = GENERIC (s, d, A_SHIFT, GET_COMP (Fa, A_SHIFT), GET_COMP (Fb, A_SHIFT), t, u, v); - - s = m | n | o | p; - - *(dest + i) = s; - } -} - -static void -combine_disjoint_over_ca (pixman_implementation_t *imp, - pixman_op_t op, - comp4_t * dest, - const comp4_t * src, - const comp4_t * mask, - int width) -{ - combine_disjoint_general_ca (dest, src, mask, width, COMBINE_A_OVER); -} - -static void -combine_disjoint_in_ca (pixman_implementation_t *imp, - pixman_op_t op, - comp4_t * dest, - const comp4_t * src, - const comp4_t * mask, - int width) -{ - combine_disjoint_general_ca (dest, src, mask, width, COMBINE_A_IN); -} - -static void -combine_disjoint_in_reverse_ca (pixman_implementation_t *imp, - pixman_op_t op, - comp4_t * dest, - const comp4_t * src, - const comp4_t * mask, - int width) -{ - combine_disjoint_general_ca (dest, src, mask, width, COMBINE_B_IN); -} - -static void -combine_disjoint_out_ca (pixman_implementation_t *imp, - pixman_op_t op, - comp4_t * dest, - const comp4_t * src, - const comp4_t * mask, - int width) -{ - combine_disjoint_general_ca (dest, src, mask, width, COMBINE_A_OUT); -} - -static void -combine_disjoint_out_reverse_ca (pixman_implementation_t *imp, - pixman_op_t op, - comp4_t * dest, - const comp4_t * src, - const comp4_t * mask, - int width) -{ - combine_disjoint_general_ca (dest, src, mask, width, COMBINE_B_OUT); -} - -static void -combine_disjoint_atop_ca (pixman_implementation_t *imp, - pixman_op_t op, - comp4_t * dest, - const comp4_t * src, - const comp4_t * mask, - int width) -{ - combine_disjoint_general_ca (dest, src, mask, width, COMBINE_A_ATOP); -} - -static void -combine_disjoint_atop_reverse_ca (pixman_implementation_t *imp, - pixman_op_t op, - comp4_t * dest, - const comp4_t * src, - const comp4_t * mask, - int width) -{ - combine_disjoint_general_ca (dest, src, mask, width, COMBINE_B_ATOP); -} - -static void -combine_disjoint_xor_ca (pixman_implementation_t *imp, - pixman_op_t op, - comp4_t * dest, - const comp4_t * src, - const comp4_t * mask, - int width) -{ - combine_disjoint_general_ca (dest, src, mask, width, COMBINE_XOR); -} - -static void -combine_conjoint_general_ca (comp4_t * dest, - const comp4_t *src, - const comp4_t *mask, - int width, - comp1_t combine) -{ - int i; - - for (i = 0; i < width; ++i) - { - comp4_t s, d; - comp4_t m, n, o, p; - comp4_t Fa, Fb; - comp2_t t, u, v; - comp4_t sa; - comp1_t da; - - s = *(src + i); - m = *(mask + i); - d = *(dest + i); - da = d >> A_SHIFT; - - combine_mask_ca (&s, &m); - - sa = m; - - switch (combine & COMBINE_A) - { - default: - Fa = 0; - break; - - case COMBINE_A_OUT: - m = (comp4_t)combine_conjoint_out_part ((comp1_t) (sa >> 0), da); - n = (comp4_t)combine_conjoint_out_part ((comp1_t) (sa >> G_SHIFT), da) << G_SHIFT; - o = (comp4_t)combine_conjoint_out_part ((comp1_t) (sa >> R_SHIFT), da) << R_SHIFT; - p = (comp4_t)combine_conjoint_out_part ((comp1_t) (sa >> A_SHIFT), da) << A_SHIFT; - Fa = m | n | o | p; - break; - - case COMBINE_A_IN: - m = (comp4_t)combine_conjoint_in_part ((comp1_t) (sa >> 0), da); - n = (comp4_t)combine_conjoint_in_part ((comp1_t) (sa >> G_SHIFT), da) << G_SHIFT; - o = (comp4_t)combine_conjoint_in_part ((comp1_t) (sa >> R_SHIFT), da) << R_SHIFT; - p = (comp4_t)combine_conjoint_in_part ((comp1_t) (sa >> A_SHIFT), da) << A_SHIFT; - Fa = m | n | o | p; - break; - - case COMBINE_A: - Fa = ~0; - break; - } - - switch (combine & COMBINE_B) - { - default: - Fb = 0; - break; - - case COMBINE_B_OUT: - m = (comp4_t)combine_conjoint_out_part (da, (comp1_t) (sa >> 0)); - n = (comp4_t)combine_conjoint_out_part (da, (comp1_t) (sa >> G_SHIFT)) << G_SHIFT; - o = (comp4_t)combine_conjoint_out_part (da, (comp1_t) (sa >> R_SHIFT)) << R_SHIFT; - p = (comp4_t)combine_conjoint_out_part (da, (comp1_t) (sa >> A_SHIFT)) << A_SHIFT; - Fb = m | n | o | p; - break; - - case COMBINE_B_IN: - m = (comp4_t)combine_conjoint_in_part (da, (comp1_t) (sa >> 0)); - n = (comp4_t)combine_conjoint_in_part (da, (comp1_t) (sa >> G_SHIFT)) << G_SHIFT; - o = (comp4_t)combine_conjoint_in_part (da, (comp1_t) (sa >> R_SHIFT)) << R_SHIFT; - p = (comp4_t)combine_conjoint_in_part (da, (comp1_t) (sa >> A_SHIFT)) << A_SHIFT; - Fb = m | n | o | p; - break; - - case COMBINE_B: - Fb = ~0; - break; - } - m = GENERIC (s, d, 0, GET_COMP (Fa, 0), GET_COMP (Fb, 0), t, u, v); - n = GENERIC (s, d, G_SHIFT, GET_COMP (Fa, G_SHIFT), GET_COMP (Fb, G_SHIFT), t, u, v); - o = GENERIC (s, d, R_SHIFT, GET_COMP (Fa, R_SHIFT), GET_COMP (Fb, R_SHIFT), t, u, v); - p = GENERIC (s, d, A_SHIFT, GET_COMP (Fa, A_SHIFT), GET_COMP (Fb, A_SHIFT), t, u, v); - - s = m | n | o | p; - - *(dest + i) = s; - } -} - -static void -combine_conjoint_over_ca (pixman_implementation_t *imp, - pixman_op_t op, - comp4_t * dest, - const comp4_t * src, - const comp4_t * mask, - int width) -{ - combine_conjoint_general_ca (dest, src, mask, width, COMBINE_A_OVER); -} - -static void -combine_conjoint_over_reverse_ca (pixman_implementation_t *imp, - pixman_op_t op, - comp4_t * dest, - const comp4_t * src, - const comp4_t * mask, - int width) -{ - combine_conjoint_general_ca (dest, src, mask, width, COMBINE_B_OVER); -} - -static void -combine_conjoint_in_ca (pixman_implementation_t *imp, - pixman_op_t op, - comp4_t * dest, - const comp4_t * src, - const comp4_t * mask, - int width) -{ - combine_conjoint_general_ca (dest, src, mask, width, COMBINE_A_IN); -} - -static void -combine_conjoint_in_reverse_ca (pixman_implementation_t *imp, - pixman_op_t op, - comp4_t * dest, - const comp4_t * src, - const comp4_t * mask, - int width) -{ - combine_conjoint_general_ca (dest, src, mask, width, COMBINE_B_IN); -} - -static void -combine_conjoint_out_ca (pixman_implementation_t *imp, - pixman_op_t op, - comp4_t * dest, - const comp4_t * src, - const comp4_t * mask, - int width) -{ - combine_conjoint_general_ca (dest, src, mask, width, COMBINE_A_OUT); -} - -static void -combine_conjoint_out_reverse_ca (pixman_implementation_t *imp, - pixman_op_t op, - comp4_t * dest, - const comp4_t * src, - const comp4_t * mask, - int width) -{ - combine_conjoint_general_ca (dest, src, mask, width, COMBINE_B_OUT); -} - -static void -combine_conjoint_atop_ca (pixman_implementation_t *imp, - pixman_op_t op, - comp4_t * dest, - const comp4_t * src, - const comp4_t * mask, - int width) -{ - combine_conjoint_general_ca (dest, src, mask, width, COMBINE_A_ATOP); -} - -static void -combine_conjoint_atop_reverse_ca (pixman_implementation_t *imp, - pixman_op_t op, - comp4_t * dest, - const comp4_t * src, - const comp4_t * mask, - int width) -{ - combine_conjoint_general_ca (dest, src, mask, width, COMBINE_B_ATOP); -} - -static void -combine_conjoint_xor_ca (pixman_implementation_t *imp, - pixman_op_t op, - comp4_t * dest, - const comp4_t * src, - const comp4_t * mask, - int width) -{ - combine_conjoint_general_ca (dest, src, mask, width, COMBINE_XOR); -} - -void -_pixman_setup_combiner_functions_width (pixman_implementation_t *imp) -{ - /* Unified alpha */ - imp->combine_width[PIXMAN_OP_CLEAR] = combine_clear; - imp->combine_width[PIXMAN_OP_SRC] = combine_src_u; - imp->combine_width[PIXMAN_OP_DST] = combine_dst; - imp->combine_width[PIXMAN_OP_OVER] = combine_over_u; - imp->combine_width[PIXMAN_OP_OVER_REVERSE] = combine_over_reverse_u; - imp->combine_width[PIXMAN_OP_IN] = combine_in_u; - imp->combine_width[PIXMAN_OP_IN_REVERSE] = combine_in_reverse_u; - imp->combine_width[PIXMAN_OP_OUT] = combine_out_u; - imp->combine_width[PIXMAN_OP_OUT_REVERSE] = combine_out_reverse_u; - imp->combine_width[PIXMAN_OP_ATOP] = combine_atop_u; - imp->combine_width[PIXMAN_OP_ATOP_REVERSE] = combine_atop_reverse_u; - imp->combine_width[PIXMAN_OP_XOR] = combine_xor_u; - imp->combine_width[PIXMAN_OP_ADD] = combine_add_u; - imp->combine_width[PIXMAN_OP_SATURATE] = combine_saturate_u; - - /* Disjoint, unified */ - imp->combine_width[PIXMAN_OP_DISJOINT_CLEAR] = combine_clear; - imp->combine_width[PIXMAN_OP_DISJOINT_SRC] = combine_src_u; - imp->combine_width[PIXMAN_OP_DISJOINT_DST] = combine_dst; - imp->combine_width[PIXMAN_OP_DISJOINT_OVER] = combine_disjoint_over_u; - imp->combine_width[PIXMAN_OP_DISJOINT_OVER_REVERSE] = combine_saturate_u; - imp->combine_width[PIXMAN_OP_DISJOINT_IN] = combine_disjoint_in_u; - imp->combine_width[PIXMAN_OP_DISJOINT_IN_REVERSE] = combine_disjoint_in_reverse_u; - imp->combine_width[PIXMAN_OP_DISJOINT_OUT] = combine_disjoint_out_u; - imp->combine_width[PIXMAN_OP_DISJOINT_OUT_REVERSE] = combine_disjoint_out_reverse_u; - imp->combine_width[PIXMAN_OP_DISJOINT_ATOP] = combine_disjoint_atop_u; - imp->combine_width[PIXMAN_OP_DISJOINT_ATOP_REVERSE] = combine_disjoint_atop_reverse_u; - imp->combine_width[PIXMAN_OP_DISJOINT_XOR] = combine_disjoint_xor_u; - - /* Conjoint, unified */ - imp->combine_width[PIXMAN_OP_CONJOINT_CLEAR] = combine_clear; - imp->combine_width[PIXMAN_OP_CONJOINT_SRC] = combine_src_u; - imp->combine_width[PIXMAN_OP_CONJOINT_DST] = combine_dst; - imp->combine_width[PIXMAN_OP_CONJOINT_OVER] = combine_conjoint_over_u; - imp->combine_width[PIXMAN_OP_CONJOINT_OVER_REVERSE] = combine_conjoint_over_reverse_u; - imp->combine_width[PIXMAN_OP_CONJOINT_IN] = combine_conjoint_in_u; - imp->combine_width[PIXMAN_OP_CONJOINT_IN_REVERSE] = combine_conjoint_in_reverse_u; - imp->combine_width[PIXMAN_OP_CONJOINT_OUT] = combine_conjoint_out_u; - imp->combine_width[PIXMAN_OP_CONJOINT_OUT_REVERSE] = combine_conjoint_out_reverse_u; - imp->combine_width[PIXMAN_OP_CONJOINT_ATOP] = combine_conjoint_atop_u; - imp->combine_width[PIXMAN_OP_CONJOINT_ATOP_REVERSE] = combine_conjoint_atop_reverse_u; - imp->combine_width[PIXMAN_OP_CONJOINT_XOR] = combine_conjoint_xor_u; - - imp->combine_width[PIXMAN_OP_MULTIPLY] = combine_multiply_u; - imp->combine_width[PIXMAN_OP_SCREEN] = combine_screen_u; - imp->combine_width[PIXMAN_OP_OVERLAY] = combine_overlay_u; - imp->combine_width[PIXMAN_OP_DARKEN] = combine_darken_u; - imp->combine_width[PIXMAN_OP_LIGHTEN] = combine_lighten_u; - imp->combine_width[PIXMAN_OP_COLOR_DODGE] = combine_color_dodge_u; - imp->combine_width[PIXMAN_OP_COLOR_BURN] = combine_color_burn_u; - imp->combine_width[PIXMAN_OP_HARD_LIGHT] = combine_hard_light_u; - imp->combine_width[PIXMAN_OP_SOFT_LIGHT] = combine_soft_light_u; - imp->combine_width[PIXMAN_OP_DIFFERENCE] = combine_difference_u; - imp->combine_width[PIXMAN_OP_EXCLUSION] = combine_exclusion_u; - imp->combine_width[PIXMAN_OP_HSL_HUE] = combine_hsl_hue_u; - imp->combine_width[PIXMAN_OP_HSL_SATURATION] = combine_hsl_saturation_u; - imp->combine_width[PIXMAN_OP_HSL_COLOR] = combine_hsl_color_u; - imp->combine_width[PIXMAN_OP_HSL_LUMINOSITY] = combine_hsl_luminosity_u; - - /* Component alpha combiners */ - imp->combine_width_ca[PIXMAN_OP_CLEAR] = combine_clear_ca; - imp->combine_width_ca[PIXMAN_OP_SRC] = combine_src_ca; - /* dest */ - imp->combine_width_ca[PIXMAN_OP_OVER] = combine_over_ca; - imp->combine_width_ca[PIXMAN_OP_OVER_REVERSE] = combine_over_reverse_ca; - imp->combine_width_ca[PIXMAN_OP_IN] = combine_in_ca; - imp->combine_width_ca[PIXMAN_OP_IN_REVERSE] = combine_in_reverse_ca; - imp->combine_width_ca[PIXMAN_OP_OUT] = combine_out_ca; - imp->combine_width_ca[PIXMAN_OP_OUT_REVERSE] = combine_out_reverse_ca; - imp->combine_width_ca[PIXMAN_OP_ATOP] = combine_atop_ca; - imp->combine_width_ca[PIXMAN_OP_ATOP_REVERSE] = combine_atop_reverse_ca; - imp->combine_width_ca[PIXMAN_OP_XOR] = combine_xor_ca; - imp->combine_width_ca[PIXMAN_OP_ADD] = combine_add_ca; - imp->combine_width_ca[PIXMAN_OP_SATURATE] = combine_saturate_ca; - - /* Disjoint CA */ - imp->combine_width_ca[PIXMAN_OP_DISJOINT_CLEAR] = combine_clear_ca; - imp->combine_width_ca[PIXMAN_OP_DISJOINT_SRC] = combine_src_ca; - imp->combine_width_ca[PIXMAN_OP_DISJOINT_DST] = combine_dst; - imp->combine_width_ca[PIXMAN_OP_DISJOINT_OVER] = combine_disjoint_over_ca; - imp->combine_width_ca[PIXMAN_OP_DISJOINT_OVER_REVERSE] = combine_saturate_ca; - imp->combine_width_ca[PIXMAN_OP_DISJOINT_IN] = combine_disjoint_in_ca; - imp->combine_width_ca[PIXMAN_OP_DISJOINT_IN_REVERSE] = combine_disjoint_in_reverse_ca; - imp->combine_width_ca[PIXMAN_OP_DISJOINT_OUT] = combine_disjoint_out_ca; - imp->combine_width_ca[PIXMAN_OP_DISJOINT_OUT_REVERSE] = combine_disjoint_out_reverse_ca; - imp->combine_width_ca[PIXMAN_OP_DISJOINT_ATOP] = combine_disjoint_atop_ca; - imp->combine_width_ca[PIXMAN_OP_DISJOINT_ATOP_REVERSE] = combine_disjoint_atop_reverse_ca; - imp->combine_width_ca[PIXMAN_OP_DISJOINT_XOR] = combine_disjoint_xor_ca; - - /* Conjoint CA */ - imp->combine_width_ca[PIXMAN_OP_CONJOINT_CLEAR] = combine_clear_ca; - imp->combine_width_ca[PIXMAN_OP_CONJOINT_SRC] = combine_src_ca; - imp->combine_width_ca[PIXMAN_OP_CONJOINT_DST] = combine_dst; - imp->combine_width_ca[PIXMAN_OP_CONJOINT_OVER] = combine_conjoint_over_ca; - imp->combine_width_ca[PIXMAN_OP_CONJOINT_OVER_REVERSE] = combine_conjoint_over_reverse_ca; - imp->combine_width_ca[PIXMAN_OP_CONJOINT_IN] = combine_conjoint_in_ca; - imp->combine_width_ca[PIXMAN_OP_CONJOINT_IN_REVERSE] = combine_conjoint_in_reverse_ca; - imp->combine_width_ca[PIXMAN_OP_CONJOINT_OUT] = combine_conjoint_out_ca; - imp->combine_width_ca[PIXMAN_OP_CONJOINT_OUT_REVERSE] = combine_conjoint_out_reverse_ca; - imp->combine_width_ca[PIXMAN_OP_CONJOINT_ATOP] = combine_conjoint_atop_ca; - imp->combine_width_ca[PIXMAN_OP_CONJOINT_ATOP_REVERSE] = combine_conjoint_atop_reverse_ca; - imp->combine_width_ca[PIXMAN_OP_CONJOINT_XOR] = combine_conjoint_xor_ca; - - imp->combine_width_ca[PIXMAN_OP_MULTIPLY] = combine_multiply_ca; - imp->combine_width_ca[PIXMAN_OP_SCREEN] = combine_screen_ca; - imp->combine_width_ca[PIXMAN_OP_OVERLAY] = combine_overlay_ca; - imp->combine_width_ca[PIXMAN_OP_DARKEN] = combine_darken_ca; - imp->combine_width_ca[PIXMAN_OP_LIGHTEN] = combine_lighten_ca; - imp->combine_width_ca[PIXMAN_OP_COLOR_DODGE] = combine_color_dodge_ca; - imp->combine_width_ca[PIXMAN_OP_COLOR_BURN] = combine_color_burn_ca; - imp->combine_width_ca[PIXMAN_OP_HARD_LIGHT] = combine_hard_light_ca; - imp->combine_width_ca[PIXMAN_OP_SOFT_LIGHT] = combine_soft_light_ca; - imp->combine_width_ca[PIXMAN_OP_DIFFERENCE] = combine_difference_ca; - imp->combine_width_ca[PIXMAN_OP_EXCLUSION] = combine_exclusion_ca; - - /* It is not clear that these make sense, so make them noops for now */ - imp->combine_width_ca[PIXMAN_OP_HSL_HUE] = combine_dst; - imp->combine_width_ca[PIXMAN_OP_HSL_SATURATION] = combine_dst; - imp->combine_width_ca[PIXMAN_OP_HSL_COLOR] = combine_dst; - imp->combine_width_ca[PIXMAN_OP_HSL_LUMINOSITY] = combine_dst; -} - diff --git a/gfx/cairo/libpixman/src/pixman-combine.h.template b/gfx/cairo/libpixman/src/pixman-combine.h.template deleted file mode 100644 index 20f784b5b4..0000000000 --- a/gfx/cairo/libpixman/src/pixman-combine.h.template +++ /dev/null @@ -1,226 +0,0 @@ - -#define COMPONENT_SIZE -#define MASK -#define ONE_HALF - -#define A_SHIFT -#define R_SHIFT -#define G_SHIFT -#define A_MASK -#define R_MASK -#define G_MASK - -#define RB_MASK -#define AG_MASK -#define RB_ONE_HALF -#define RB_MASK_PLUS_ONE - -#define ALPHA_c(x) ((x) >> A_SHIFT) -#define RED_c(x) (((x) >> R_SHIFT) & MASK) -#define GREEN_c(x) (((x) >> G_SHIFT) & MASK) -#define BLUE_c(x) ((x) & MASK) - -/* - * Helper macros. - */ - -#define MUL_UNc(a, b, t) \ - ((t) = (a) * (comp2_t)(b) + ONE_HALF, ((((t) >> G_SHIFT ) + (t) ) >> G_SHIFT )) - -#define DIV_UNc(a, b) \ - (((comp2_t) (a) * MASK + ((b) / 2)) / (b)) - -#define ADD_UNc(x, y, t) \ - ((t) = (x) + (y), \ - (comp4_t) (comp1_t) ((t) | (0 - ((t) >> G_SHIFT)))) - -#define DIV_ONE_UNc(x) \ - (((x) + ONE_HALF + (((x) + ONE_HALF) >> G_SHIFT)) >> G_SHIFT) - -/* - * The methods below use some tricks to be able to do two color - * components at the same time. - */ - -/* - * x_rb = (x_rb * a) / 255 - */ -#define UNc_rb_MUL_UNc(x, a, t) \ - do \ - { \ - t = ((x) & RB_MASK) * (a); \ - t += RB_ONE_HALF; \ - x = (t + ((t >> G_SHIFT) & RB_MASK)) >> G_SHIFT; \ - x &= RB_MASK; \ - } while (0) - -/* - * x_rb = min (x_rb + y_rb, 255) - */ -#define UNc_rb_ADD_UNc_rb(x, y, t) \ - do \ - { \ - t = ((x) + (y)); \ - t |= RB_MASK_PLUS_ONE - ((t >> G_SHIFT) & RB_MASK); \ - x = (t & RB_MASK); \ - } while (0) - -/* - * x_rb = (x_rb * a_rb) / 255 - */ -#define UNc_rb_MUL_UNc_rb(x, a, t) \ - do \ - { \ - t = (x & MASK) * (a & MASK); \ - t |= (x & R_MASK) * ((a >> R_SHIFT) & MASK); \ - t += RB_ONE_HALF; \ - t = (t + ((t >> G_SHIFT) & RB_MASK)) >> G_SHIFT; \ - x = t & RB_MASK; \ - } while (0) - -/* - * x_c = (x_c * a) / 255 - */ -#define UNcx4_MUL_UNc(x, a) \ - do \ - { \ - comp4_t r1__, r2__, t__; \ - \ - r1__ = (x); \ - UNc_rb_MUL_UNc (r1__, (a), t__); \ - \ - r2__ = (x) >> G_SHIFT; \ - UNc_rb_MUL_UNc (r2__, (a), t__); \ - \ - (x) = r1__ | (r2__ << G_SHIFT); \ - } while (0) - -/* - * x_c = (x_c * a) / 255 + y_c - */ -#define UNcx4_MUL_UNc_ADD_UNcx4(x, a, y) \ - do \ - { \ - comp4_t r1__, r2__, r3__, t__; \ - \ - r1__ = (x); \ - r2__ = (y) & RB_MASK; \ - UNc_rb_MUL_UNc (r1__, (a), t__); \ - UNc_rb_ADD_UNc_rb (r1__, r2__, t__); \ - \ - r2__ = (x) >> G_SHIFT; \ - r3__ = ((y) >> G_SHIFT) & RB_MASK; \ - UNc_rb_MUL_UNc (r2__, (a), t__); \ - UNc_rb_ADD_UNc_rb (r2__, r3__, t__); \ - \ - (x) = r1__ | (r2__ << G_SHIFT); \ - } while (0) - -/* - * x_c = (x_c * a + y_c * b) / 255 - */ -#define UNcx4_MUL_UNc_ADD_UNcx4_MUL_UNc(x, a, y, b) \ - do \ - { \ - comp4_t r1__, r2__, r3__, t__; \ - \ - r1__ = (x); \ - r2__ = (y); \ - UNc_rb_MUL_UNc (r1__, (a), t__); \ - UNc_rb_MUL_UNc (r2__, (b), t__); \ - UNc_rb_ADD_UNc_rb (r1__, r2__, t__); \ - \ - r2__ = ((x) >> G_SHIFT); \ - r3__ = ((y) >> G_SHIFT); \ - UNc_rb_MUL_UNc (r2__, (a), t__); \ - UNc_rb_MUL_UNc (r3__, (b), t__); \ - UNc_rb_ADD_UNc_rb (r2__, r3__, t__); \ - \ - (x) = r1__ | (r2__ << G_SHIFT); \ - } while (0) - -/* - * x_c = (x_c * a_c) / 255 - */ -#define UNcx4_MUL_UNcx4(x, a) \ - do \ - { \ - comp4_t r1__, r2__, r3__, t__; \ - \ - r1__ = (x); \ - r2__ = (a); \ - UNc_rb_MUL_UNc_rb (r1__, r2__, t__); \ - \ - r2__ = (x) >> G_SHIFT; \ - r3__ = (a) >> G_SHIFT; \ - UNc_rb_MUL_UNc_rb (r2__, r3__, t__); \ - \ - (x) = r1__ | (r2__ << G_SHIFT); \ - } while (0) - -/* - * x_c = (x_c * a_c) / 255 + y_c - */ -#define UNcx4_MUL_UNcx4_ADD_UNcx4(x, a, y) \ - do \ - { \ - comp4_t r1__, r2__, r3__, t__; \ - \ - r1__ = (x); \ - r2__ = (a); \ - UNc_rb_MUL_UNc_rb (r1__, r2__, t__); \ - r2__ = (y) & RB_MASK; \ - UNc_rb_ADD_UNc_rb (r1__, r2__, t__); \ - \ - r2__ = ((x) >> G_SHIFT); \ - r3__ = ((a) >> G_SHIFT); \ - UNc_rb_MUL_UNc_rb (r2__, r3__, t__); \ - r3__ = ((y) >> G_SHIFT) & RB_MASK; \ - UNc_rb_ADD_UNc_rb (r2__, r3__, t__); \ - \ - (x) = r1__ | (r2__ << G_SHIFT); \ - } while (0) - -/* - * x_c = (x_c * a_c + y_c * b) / 255 - */ -#define UNcx4_MUL_UNcx4_ADD_UNcx4_MUL_UNc(x, a, y, b) \ - do \ - { \ - comp4_t r1__, r2__, r3__, t__; \ - \ - r1__ = (x); \ - r2__ = (a); \ - UNc_rb_MUL_UNc_rb (r1__, r2__, t__); \ - r2__ = (y); \ - UNc_rb_MUL_UNc (r2__, (b), t__); \ - UNc_rb_ADD_UNc_rb (r1__, r2__, t__); \ - \ - r2__ = (x) >> G_SHIFT; \ - r3__ = (a) >> G_SHIFT; \ - UNc_rb_MUL_UNc_rb (r2__, r3__, t__); \ - r3__ = (y) >> G_SHIFT; \ - UNc_rb_MUL_UNc (r3__, (b), t__); \ - UNc_rb_ADD_UNc_rb (r2__, r3__, t__); \ - \ - x = r1__ | (r2__ << G_SHIFT); \ - } while (0) - -/* - x_c = min(x_c + y_c, 255) -*/ -#define UNcx4_ADD_UNcx4(x, y) \ - do \ - { \ - comp4_t r1__, r2__, r3__, t__; \ - \ - r1__ = (x) & RB_MASK; \ - r2__ = (y) & RB_MASK; \ - UNc_rb_ADD_UNc_rb (r1__, r2__, t__); \ - \ - r2__ = ((x) >> G_SHIFT) & RB_MASK; \ - r3__ = ((y) >> G_SHIFT) & RB_MASK; \ - UNc_rb_ADD_UNc_rb (r2__, r3__, t__); \ - \ - x = r1__ | (r2__ << G_SHIFT); \ - } while (0) diff --git a/gfx/cairo/libpixman/src/pixman-combine16.c b/gfx/cairo/libpixman/src/pixman-combine16.c deleted file mode 100644 index 9ba439fe5b..0000000000 --- a/gfx/cairo/libpixman/src/pixman-combine16.c +++ /dev/null @@ -1,114 +0,0 @@ -#ifdef HAVE_CONFIG_H -#include <config.h> -#endif - -#include <math.h> -#include <string.h> - -#include "pixman-private.h" - -#include "pixman-combine32.h" - -static force_inline uint32_t -combine_mask (const uint32_t src, const uint32_t mask) -{ - uint32_t s, m; - - m = mask >> A_SHIFT; - - if (!m) - return 0; - s = src; - - UN8x4_MUL_UN8 (s, m); - - return s; -} - -static void -combine_src_u (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - int i; - - if (!mask) - memcpy (dest, src, width * sizeof (uint16_t)); - else - { - uint16_t *d = (uint16_t*)dest; - uint16_t *src16 = (uint16_t*)src; - for (i = 0; i < width; ++i) - { - if ((*mask & 0xff000000) == 0xff000000) { - // it's likely worth special casing - // fully opaque because it avoids - // the cost of conversion as well the multiplication - *(d + i) = *src16; - } else { - // the mask is still 32bits - uint32_t s = combine_mask (convert_0565_to_8888(*src16), *mask); - *(d + i) = convert_8888_to_0565(s); - } - mask++; - src16++; - } - } - -} - -static void -combine_over_u (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - int i; - - if (!mask) - memcpy (dest, src, width * sizeof (uint16_t)); - else - { - uint16_t *d = (uint16_t*)dest; - uint16_t *src16 = (uint16_t*)src; - for (i = 0; i < width; ++i) - { - if ((*mask & 0xff000000) == 0xff000000) { - // it's likely worth special casing - // fully opaque because it avoids - // the cost of conversion as well the multiplication - *(d + i) = *src16; - } else if ((*mask & 0xff000000) == 0x00000000) { - // keep the dest the same - } else { - // the mask is still 32bits - uint32_t s = combine_mask (convert_0565_to_8888(*src16), *mask); - uint32_t ia = ALPHA_8 (~s); - uint32_t d32 = convert_0565_to_8888(*(d + i)); - UN8x4_MUL_UN8_ADD_UN8x4 (d32, ia, s); - *(d + i) = convert_8888_to_0565(d32); - } - mask++; - src16++; - } - } - -} - - -void -_pixman_setup_combiner_functions_16 (pixman_implementation_t *imp) -{ - int i; - for (i = 0; i < PIXMAN_N_OPERATORS; i++) { - imp->combine_16[i] = NULL; - } - imp->combine_16[PIXMAN_OP_SRC] = combine_src_u; - imp->combine_16[PIXMAN_OP_OVER] = combine_over_u; -} - diff --git a/gfx/cairo/libpixman/src/pixman-combine32.c b/gfx/cairo/libpixman/src/pixman-combine32.c index 3ac7576bdc..4a89384d9c 100644 --- a/gfx/cairo/libpixman/src/pixman-combine32.c +++ b/gfx/cairo/libpixman/src/pixman-combine32.c @@ -142,12 +142,12 @@ combine_mask (const uint32_t *src, const uint32_t *mask, int i) static void combine_clear (pixman_implementation_t *imp, pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, int width) { - memset (dest, 0, width * sizeof(uint32_t)); + memset (dest, 0, width * sizeof (uint32_t)); } static void @@ -155,7 +155,7 @@ combine_dst (pixman_implementation_t *imp, pixman_op_t op, uint32_t * dest, const uint32_t * src, - const uint32_t * mask, + const uint32_t * mask, int width) { return; @@ -164,9 +164,9 @@ combine_dst (pixman_implementation_t *imp, static void combine_src_u (pixman_implementation_t *imp, pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, int width) { int i; @@ -189,9 +189,9 @@ combine_src_u (pixman_implementation_t *imp, static void combine_over_u (pixman_implementation_t *imp, pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, int width) { int i; @@ -254,9 +254,9 @@ combine_over_u (pixman_implementation_t *imp, static void combine_over_reverse_u (pixman_implementation_t *imp, pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, int width) { int i; @@ -274,9 +274,9 @@ combine_over_reverse_u (pixman_implementation_t *imp, static void combine_in_u (pixman_implementation_t *imp, pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, int width) { int i; @@ -293,9 +293,9 @@ combine_in_u (pixman_implementation_t *imp, static void combine_in_reverse_u (pixman_implementation_t *imp, pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, int width) { int i; @@ -313,9 +313,9 @@ combine_in_reverse_u (pixman_implementation_t *imp, static void combine_out_u (pixman_implementation_t *imp, pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, int width) { int i; @@ -332,9 +332,9 @@ combine_out_u (pixman_implementation_t *imp, static void combine_out_reverse_u (pixman_implementation_t *imp, pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, int width) { int i; @@ -352,9 +352,9 @@ combine_out_reverse_u (pixman_implementation_t *imp, static void combine_atop_u (pixman_implementation_t *imp, pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, int width) { int i; @@ -374,9 +374,9 @@ combine_atop_u (pixman_implementation_t *imp, static void combine_atop_reverse_u (pixman_implementation_t *imp, pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, int width) { int i; @@ -396,9 +396,9 @@ combine_atop_reverse_u (pixman_implementation_t *imp, static void combine_xor_u (pixman_implementation_t *imp, pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, int width) { int i; @@ -418,9 +418,9 @@ combine_xor_u (pixman_implementation_t *imp, static void combine_add_u (pixman_implementation_t *imp, pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, int width) { int i; @@ -434,68 +434,65 @@ combine_add_u (pixman_implementation_t *imp, } } -static void -combine_saturate_u (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint32_t s = combine_mask (src, mask, i); - uint32_t d = *(dest + i); - uint16_t sa, da; - - sa = s >> A_SHIFT; - da = ~d >> A_SHIFT; - if (sa > da) - { - sa = DIV_UN8 (da, sa); - UN8x4_MUL_UN8 (s, sa); - } - ; - UN8x4_ADD_UN8x4 (d, s); - *(dest + i) = d; - } -} - /* * PDF blend modes: + * * The following blend modes have been taken from the PDF ISO 32000 * specification, which at this point in time is available from - * http://www.adobe.com/devnet/acrobat/pdfs/PDF32000_2008.pdf - * The relevant chapters are 11.3.5 and 11.3.6. + * + * http://www.adobe.com/devnet/pdf/pdf_reference.html + * + * The specific documents of interest are the PDF spec itself: + * + * http://wwwimages.adobe.com/www.adobe.com/content/dam/Adobe/en/devnet/pdf/pdfs/PDF32000_2008.pdf + * + * chapters 11.3.5 and 11.3.6 and a later supplement for Adobe Acrobat + * 9.1 and Reader 9.1: + * + * http://wwwimages.adobe.com/www.adobe.com/content/dam/Adobe/en/devnet/pdf/pdfs/adobe_supplement_iso32000_1.pdf + * + * that clarifies the specifications for blend modes ColorDodge and + * ColorBurn. + * * The formula for computing the final pixel color given in 11.3.6 is: - * αr × Cr = (1 – αs) × αb × Cb + (1 – αb) × αs × Cs + αb × αs × B(Cb, Cs) - * with B() being the blend function. - * Note that OVER is a special case of this operation, using B(Cb, Cs) = Cs * - * These blend modes should match the SVG filter draft specification, as - * it has been designed to mirror ISO 32000. Note that at the current point - * no released draft exists that shows this, as the formulas have not been - * updated yet after the release of ISO 32000. + * αr × Cr = (1 – αs) × αb × Cb + (1 – αb) × αs × Cs + αb × αs × B(Cb, Cs) + * + * with B() is the blend function. When B(Cb, Cs) = Cs, this formula + * reduces to the regular OVER operator. + * + * Cs and Cb are not premultiplied, so in our implementation we instead + * use: + * + * cr = (1 – αs) × cb + (1 – αb) × cs + αb × αs × B (cb/αb, cs/αs) * - * The default implementation here uses the PDF_SEPARABLE_BLEND_MODE and - * PDF_NON_SEPARABLE_BLEND_MODE macros, which take the blend function as an - * argument. Note that this implementation operates on premultiplied colors, - * while the PDF specification does not. Therefore the code uses the formula - * Cra = (1 – as) . Dca + (1 – ad) . Sca + B(Dca, ad, Sca, as) + * where cr, cs, and cb are premultiplied colors, and where the + * + * αb × αs × B(cb/αb, cs/αs) + * + * part is first arithmetically simplified under the assumption that αb + * and αs are not 0, and then updated to produce a meaningful result when + * they are. + * + * For all the blend mode operators, the alpha channel is given by + * + * αr = αs + αb + αb × αs */ /* * Multiply - * B(Dca, ad, Sca, as) = Dca.Sca + * + * ad * as * B(d / ad, s / as) + * = ad * as * d/ad * s/as + * = d * s + * */ static void combine_multiply_u (pixman_implementation_t *imp, pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, int width) { int i; @@ -519,9 +516,9 @@ combine_multiply_u (pixman_implementation_t *imp, static void combine_multiply_ca (pixman_implementation_t *imp, pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, int width) { int i; @@ -544,1087 +541,271 @@ combine_multiply_ca (pixman_implementation_t *imp, } } +#define CLAMP(v, low, high) \ + do \ + { \ + if (v < (low)) \ + v = (low); \ + if (v > (high)) \ + v = (high); \ + } while (0) + #define PDF_SEPARABLE_BLEND_MODE(name) \ static void \ combine_ ## name ## _u (pixman_implementation_t *imp, \ pixman_op_t op, \ - uint32_t * dest, \ - const uint32_t * src, \ - const uint32_t * mask, \ + uint32_t * dest, \ + const uint32_t * src, \ + const uint32_t * mask, \ int width) \ { \ int i; \ - for (i = 0; i < width; ++i) { \ + for (i = 0; i < width; ++i) \ + { \ uint32_t s = combine_mask (src, mask, i); \ uint32_t d = *(dest + i); \ uint8_t sa = ALPHA_8 (s); \ uint8_t isa = ~sa; \ uint8_t da = ALPHA_8 (d); \ uint8_t ida = ~da; \ - uint32_t result; \ - \ - result = d; \ - UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (result, isa, s, ida); \ + uint32_t ra, rr, rg, rb; \ \ - *(dest + i) = result + \ - (DIV_ONE_UN8 (sa * (uint32_t)da) << A_SHIFT) + \ - (blend_ ## name (RED_8 (d), da, RED_8 (s), sa) << R_SHIFT) + \ - (blend_ ## name (GREEN_8 (d), da, GREEN_8 (s), sa) << G_SHIFT) + \ - (blend_ ## name (BLUE_8 (d), da, BLUE_8 (s), sa)); \ + ra = da * 0xff + sa * 0xff - sa * da; \ + rr = isa * RED_8 (d) + ida * RED_8 (s); \ + rg = isa * GREEN_8 (d) + ida * GREEN_8 (s); \ + rb = isa * BLUE_8 (d) + ida * BLUE_8 (s); \ + \ + rr += blend_ ## name (RED_8 (d), da, RED_8 (s), sa); \ + rg += blend_ ## name (GREEN_8 (d), da, GREEN_8 (s), sa); \ + rb += blend_ ## name (BLUE_8 (d), da, BLUE_8 (s), sa); \ + \ + CLAMP (ra, 0, 255 * 255); \ + CLAMP (rr, 0, 255 * 255); \ + CLAMP (rg, 0, 255 * 255); \ + CLAMP (rb, 0, 255 * 255); \ + \ + ra = DIV_ONE_UN8 (ra); \ + rr = DIV_ONE_UN8 (rr); \ + rg = DIV_ONE_UN8 (rg); \ + rb = DIV_ONE_UN8 (rb); \ + \ + *(dest + i) = ra << 24 | rr << 16 | rg << 8 | rb; \ } \ } \ \ static void \ combine_ ## name ## _ca (pixman_implementation_t *imp, \ pixman_op_t op, \ - uint32_t * dest, \ - const uint32_t * src, \ - const uint32_t * mask, \ - int width) \ + uint32_t * dest, \ + const uint32_t * src, \ + const uint32_t * mask, \ + int width) \ { \ int i; \ - for (i = 0; i < width; ++i) { \ + for (i = 0; i < width; ++i) \ + { \ uint32_t m = *(mask + i); \ uint32_t s = *(src + i); \ uint32_t d = *(dest + i); \ uint8_t da = ALPHA_8 (d); \ uint8_t ida = ~da; \ - uint32_t result; \ - \ + uint32_t ra, rr, rg, rb; \ + uint8_t ira, iga, iba; \ + \ combine_mask_ca (&s, &m); \ - \ - result = d; \ - UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (result, ~m, s, ida); \ - \ - result += \ - (DIV_ONE_UN8 (ALPHA_8 (m) * (uint32_t)da) << A_SHIFT) + \ - (blend_ ## name (RED_8 (d), da, RED_8 (s), RED_8 (m)) << R_SHIFT) + \ - (blend_ ## name (GREEN_8 (d), da, GREEN_8 (s), GREEN_8 (m)) << G_SHIFT) + \ - (blend_ ## name (BLUE_8 (d), da, BLUE_8 (s), BLUE_8 (m))); \ \ - *(dest + i) = result; \ + ira = ~RED_8 (m); \ + iga = ~GREEN_8 (m); \ + iba = ~BLUE_8 (m); \ + \ + ra = da * 0xff + ALPHA_8 (s) * 0xff - ALPHA_8 (s) * da; \ + rr = ira * RED_8 (d) + ida * RED_8 (s); \ + rg = iga * GREEN_8 (d) + ida * GREEN_8 (s); \ + rb = iba * BLUE_8 (d) + ida * BLUE_8 (s); \ + \ + rr += blend_ ## name (RED_8 (d), da, RED_8 (s), RED_8 (m)); \ + rg += blend_ ## name (GREEN_8 (d), da, GREEN_8 (s), GREEN_8 (m)); \ + rb += blend_ ## name (BLUE_8 (d), da, BLUE_8 (s), BLUE_8 (m)); \ + \ + CLAMP (ra, 0, 255 * 255); \ + CLAMP (rr, 0, 255 * 255); \ + CLAMP (rg, 0, 255 * 255); \ + CLAMP (rb, 0, 255 * 255); \ + \ + ra = DIV_ONE_UN8 (ra); \ + rr = DIV_ONE_UN8 (rr); \ + rg = DIV_ONE_UN8 (rg); \ + rb = DIV_ONE_UN8 (rb); \ + \ + *(dest + i) = ra << 24 | rr << 16 | rg << 8 | rb; \ } \ } /* * Screen - * B(Dca, ad, Sca, as) = Dca.sa + Sca.da - Dca.Sca + * + * ad * as * B(d/ad, s/as) + * = ad * as * (d/ad + s/as - s/as * d/ad) + * = ad * s + as * d - s * d */ -static inline uint32_t -blend_screen (uint32_t dca, uint32_t da, uint32_t sca, uint32_t sa) +static inline int32_t +blend_screen (int32_t d, int32_t ad, int32_t s, int32_t as) { - return DIV_ONE_UN8 (sca * da + dca * sa - sca * dca); + return s * ad + d * as - s * d; } PDF_SEPARABLE_BLEND_MODE (screen) /* * Overlay - * B(Dca, Da, Sca, Sa) = - * if 2.Dca < Da - * 2.Sca.Dca - * otherwise - * Sa.Da - 2.(Da - Dca).(Sa - Sca) + * + * ad * as * B(d/ad, s/as) + * = ad * as * Hardlight (s, d) + * = if (d / ad < 0.5) + * as * ad * Multiply (s/as, 2 * d/ad) + * else + * as * ad * Screen (s/as, 2 * d / ad - 1) + * = if (d < 0.5 * ad) + * as * ad * s/as * 2 * d /ad + * else + * as * ad * (s/as + 2 * d / ad - 1 - s / as * (2 * d / ad - 1)) + * = if (2 * d < ad) + * 2 * s * d + * else + * ad * s + 2 * as * d - as * ad - ad * s * (2 * d / ad - 1) + * = if (2 * d < ad) + * 2 * s * d + * else + * as * ad - 2 * (ad - d) * (as - s) */ -static inline uint32_t -blend_overlay (uint32_t dca, uint32_t da, uint32_t sca, uint32_t sa) +static inline int32_t +blend_overlay (int32_t d, int32_t ad, int32_t s, int32_t as) { - uint32_t rca; + uint32_t r; - if (2 * dca < da) - rca = 2 * sca * dca; + if (2 * d < ad) + r = 2 * s * d; else - rca = sa * da - 2 * (da - dca) * (sa - sca); - return DIV_ONE_UN8 (rca); + r = as * ad - 2 * (ad - d) * (as - s); + + return r; } PDF_SEPARABLE_BLEND_MODE (overlay) /* * Darken - * B(Dca, Da, Sca, Sa) = min (Sca.Da, Dca.Sa) + * + * ad * as * B(d/ad, s/as) + * = ad * as * MIN(d/ad, s/as) + * = MIN (as * d, ad * s) */ -static inline uint32_t -blend_darken (uint32_t dca, uint32_t da, uint32_t sca, uint32_t sa) +static inline int32_t +blend_darken (int32_t d, int32_t ad, int32_t s, int32_t as) { - uint32_t s, d; + s = ad * s; + d = as * d; - s = sca * da; - d = dca * sa; - return DIV_ONE_UN8 (s > d ? d : s); + return s > d ? d : s; } PDF_SEPARABLE_BLEND_MODE (darken) /* * Lighten - * B(Dca, Da, Sca, Sa) = max (Sca.Da, Dca.Sa) + * + * ad * as * B(d/ad, s/as) + * = ad * as * MAX(d/ad, s/as) + * = MAX (as * d, ad * s) */ -static inline uint32_t -blend_lighten (uint32_t dca, uint32_t da, uint32_t sca, uint32_t sa) +static inline int32_t +blend_lighten (int32_t d, int32_t ad, int32_t s, int32_t as) { - uint32_t s, d; - - s = sca * da; - d = dca * sa; - return DIV_ONE_UN8 (s > d ? s : d); + s = ad * s; + d = as * d; + + return s > d ? s : d; } PDF_SEPARABLE_BLEND_MODE (lighten) /* - * Color dodge - * B(Dca, Da, Sca, Sa) = - * if Dca == 0 - * 0 - * if Sca == Sa - * Sa.Da - * otherwise - * Sa.Da. min (1, Dca / Da / (1 - Sca/Sa)) - */ -static inline uint32_t -blend_color_dodge (uint32_t dca, uint32_t da, uint32_t sca, uint32_t sa) -{ - if (sca >= sa) - { - return dca == 0 ? 0 : DIV_ONE_UN8 (sa * da); - } - else - { - uint32_t rca = dca * sa / (sa - sca); - return DIV_ONE_UN8 (sa * MIN (rca, da)); - } -} - -PDF_SEPARABLE_BLEND_MODE (color_dodge) - -/* - * Color burn - * B(Dca, Da, Sca, Sa) = - * if Dca == Da - * Sa.Da - * if Sca == 0 - * 0 - * otherwise - * Sa.Da.(1 - min (1, (1 - Dca/Da).Sa / Sca)) - */ -static inline uint32_t -blend_color_burn (uint32_t dca, uint32_t da, uint32_t sca, uint32_t sa) -{ - if (sca == 0) - { - return dca < da ? 0 : DIV_ONE_UN8 (sa * da); - } - else - { - uint32_t rca = (da - dca) * sa / sca; - return DIV_ONE_UN8 (sa * (MAX (rca, da) - rca)); - } -} - -PDF_SEPARABLE_BLEND_MODE (color_burn) - -/* * Hard light - * B(Dca, Da, Sca, Sa) = - * if 2.Sca < Sa - * 2.Sca.Dca - * otherwise - * Sa.Da - 2.(Da - Dca).(Sa - Sca) + * + * ad * as * B(d/ad, s/as) + * = if (s/as <= 0.5) + * ad * as * Multiply (d/ad, 2 * s/as) + * else + * ad * as * Screen (d/ad, 2 * s/as - 1) + * = if 2 * s <= as + * ad * as * d/ad * 2 * s / as + * else + * ad * as * (d/ad + (2 * s/as - 1) + d/ad * (2 * s/as - 1)) + * = if 2 * s <= as + * 2 * s * d + * else + * as * ad - 2 * (ad - d) * (as - s) */ -static inline uint32_t -blend_hard_light (uint32_t dca, uint32_t da, uint32_t sca, uint32_t sa) +static inline int32_t +blend_hard_light (int32_t d, int32_t ad, int32_t s, int32_t as) { - if (2 * sca < sa) - return DIV_ONE_UN8 (2 * sca * dca); + if (2 * s < as) + return 2 * s * d; else - return DIV_ONE_UN8 (sa * da - 2 * (da - dca) * (sa - sca)); + return as * ad - 2 * (ad - d) * (as - s); } PDF_SEPARABLE_BLEND_MODE (hard_light) /* - * Soft light - * B(Dca, Da, Sca, Sa) = - * if (2.Sca <= Sa) - * Dca.(Sa - (1 - Dca/Da).(2.Sca - Sa)) - * otherwise if Dca.4 <= Da - * Dca.(Sa + (2.Sca - Sa).((16.Dca/Da - 12).Dca/Da + 3) - * otherwise - * (Dca.Sa + (SQRT (Dca/Da).Da - Dca).(2.Sca - Sa)) - */ -static inline uint32_t -blend_soft_light (uint32_t dca_org, - uint32_t da_org, - uint32_t sca_org, - uint32_t sa_org) -{ - double dca = dca_org * (1.0 / MASK); - double da = da_org * (1.0 / MASK); - double sca = sca_org * (1.0 / MASK); - double sa = sa_org * (1.0 / MASK); - double rca; - - if (2 * sca < sa) - { - if (da == 0) - rca = dca * sa; - else - rca = dca * sa - dca * (da - dca) * (sa - 2 * sca) / da; - } - else if (da == 0) - { - rca = 0; - } - else if (4 * dca <= da) - { - rca = dca * sa + - (2 * sca - sa) * dca * ((16 * dca / da - 12) * dca / da + 3); - } - else - { - rca = dca * sa + (sqrt (dca * da) - dca) * (2 * sca - sa); - } - return rca * MASK + 0.5; -} - -PDF_SEPARABLE_BLEND_MODE (soft_light) - -/* * Difference - * B(Dca, Da, Sca, Sa) = abs (Dca.Sa - Sca.Da) + * + * ad * as * B(s/as, d/ad) + * = ad * as * abs (s/as - d/ad) + * = if (s/as <= d/ad) + * ad * as * (d/ad - s/as) + * else + * ad * as * (s/as - d/ad) + * = if (ad * s <= as * d) + * as * d - ad * s + * else + * ad * s - as * d */ -static inline uint32_t -blend_difference (uint32_t dca, uint32_t da, uint32_t sca, uint32_t sa) +static inline int32_t +blend_difference (int32_t d, int32_t ad, int32_t s, int32_t as) { - uint32_t dcasa = dca * sa; - uint32_t scada = sca * da; + int32_t das = d * as; + int32_t sad = s * ad; - if (scada < dcasa) - return DIV_ONE_UN8 (dcasa - scada); + if (sad < das) + return das - sad; else - return DIV_ONE_UN8 (scada - dcasa); + return sad - das; } PDF_SEPARABLE_BLEND_MODE (difference) /* * Exclusion - * B(Dca, Da, Sca, Sa) = (Sca.Da + Dca.Sa - 2.Sca.Dca) + * + * ad * as * B(s/as, d/ad) + * = ad * as * (d/ad + s/as - 2 * d/ad * s/as) + * = as * d + ad * s - 2 * s * d */ /* This can be made faster by writing it directly and not using * PDF_SEPARABLE_BLEND_MODE, but that's a performance optimization */ -static inline uint32_t -blend_exclusion (uint32_t dca, uint32_t da, uint32_t sca, uint32_t sa) +static inline int32_t +blend_exclusion (int32_t d, int32_t ad, int32_t s, int32_t as) { - return DIV_ONE_UN8 (sca * da + dca * sa - 2 * dca * sca); + return s * ad + d * as - 2 * d * s; } PDF_SEPARABLE_BLEND_MODE (exclusion) #undef PDF_SEPARABLE_BLEND_MODE -/* - * PDF nonseperable blend modes are implemented using the following functions - * to operate in Hsl space, with Cmax, Cmid, Cmin referring to the max, mid - * and min value of the red, green and blue components. - * - * LUM (C) = 0.3 × Cred + 0.59 × Cgreen + 0.11 × Cblue - * - * clip_color (C): - * l = LUM (C) - * min = Cmin - * max = Cmax - * if n < 0.0 - * C = l + ( ( ( C – l ) × l ) ⁄ ( l – min ) ) - * if x > 1.0 - * C = l + ( ( ( C – l ) × ( 1 – l ) ) ⁄ ( max – l ) ) - * return C - * - * set_lum (C, l): - * d = l – LUM (C) - * C += d - * return clip_color (C) - * - * SAT (C) = CH_MAX (C) - CH_MIN (C) - * - * set_sat (C, s): - * if Cmax > Cmin - * Cmid = ( ( ( Cmid – Cmin ) × s ) ⁄ ( Cmax – Cmin ) ) - * Cmax = s - * else - * Cmid = Cmax = 0.0 - * Cmin = 0.0 - * return C - */ - -/* For premultiplied colors, we need to know what happens when C is - * multiplied by a real number. LUM and SAT are linear: - * - * LUM (r × C) = r × LUM (C) SAT (r * C) = r * SAT (C) - * - * If we extend clip_color with an extra argument a and change - * - * if x >= 1.0 - * - * into - * - * if x >= a - * - * then clip_color is also linear: - * - * r * clip_color (C, a) = clip_color (r_c, ra); - * - * for positive r. - * - * Similarly, we can extend set_lum with an extra argument that is just passed - * on to clip_color: - * - * r * set_lum ( C, l, a) - * - * = r × clip_color ( C + l - LUM (C), a) - * - * = clip_color ( r * C + r × l - r * LUM (C), r * a) - * - * = set_lum ( r * C, r * l, r * a) - * - * Finally, set_sat: - * - * r * set_sat (C, s) = set_sat (x * C, r * s) - * - * The above holds for all non-zero x, because the x'es in the fraction for - * C_mid cancel out. Specifically, it holds for x = r: - * - * r * set_sat (C, s) = set_sat (r_c, rs) - * - */ - -/* So, for the non-separable PDF blend modes, we have (using s, d for - * non-premultiplied colors, and S, D for premultiplied: - * - * Color: - * - * a_s * a_d * B(s, d) - * = a_s * a_d * set_lum (S/a_s, LUM (D/a_d), 1) - * = set_lum (S * a_d, a_s * LUM (D), a_s * a_d) - * - * - * Luminosity: - * - * a_s * a_d * B(s, d) - * = a_s * a_d * set_lum (D/a_d, LUM(S/a_s), 1) - * = set_lum (a_s * D, a_d * LUM(S), a_s * a_d) - * - * - * Saturation: - * - * a_s * a_d * B(s, d) - * = a_s * a_d * set_lum (set_sat (D/a_d, SAT (S/a_s)), LUM (D/a_d), 1) - * = set_lum (a_s * a_d * set_sat (D/a_d, SAT (S/a_s)), - * a_s * LUM (D), a_s * a_d) - * = set_lum (set_sat (a_s * D, a_d * SAT (S), a_s * LUM (D), a_s * a_d)) - * - * Hue: - * - * a_s * a_d * B(s, d) - * = a_s * a_d * set_lum (set_sat (S/a_s, SAT (D/a_d)), LUM (D/a_d), 1) - * = set_lum (set_sat (a_d * S, a_s * SAT (D)), a_s * LUM (D), a_s * a_d) - * - */ - -#define CH_MIN(c) (c[0] < c[1] ? (c[0] < c[2] ? c[0] : c[2]) : (c[1] < c[2] ? c[1] : c[2])) -#define CH_MAX(c) (c[0] > c[1] ? (c[0] > c[2] ? c[0] : c[2]) : (c[1] > c[2] ? c[1] : c[2])) -#define LUM(c) ((c[0] * 30 + c[1] * 59 + c[2] * 11) / 100) -#define SAT(c) (CH_MAX (c) - CH_MIN (c)) - -#define PDF_NON_SEPARABLE_BLEND_MODE(name) \ - static void \ - combine_ ## name ## _u (pixman_implementation_t *imp, \ - pixman_op_t op, \ - uint32_t *dest, \ - const uint32_t *src, \ - const uint32_t *mask, \ - int width) \ - { \ - int i; \ - for (i = 0; i < width; ++i) \ - { \ - uint32_t s = combine_mask (src, mask, i); \ - uint32_t d = *(dest + i); \ - uint8_t sa = ALPHA_8 (s); \ - uint8_t isa = ~sa; \ - uint8_t da = ALPHA_8 (d); \ - uint8_t ida = ~da; \ - uint32_t result; \ - uint32_t sc[3], dc[3], c[3]; \ - \ - result = d; \ - UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (result, isa, s, ida); \ - dc[0] = RED_8 (d); \ - sc[0] = RED_8 (s); \ - dc[1] = GREEN_8 (d); \ - sc[1] = GREEN_8 (s); \ - dc[2] = BLUE_8 (d); \ - sc[2] = BLUE_8 (s); \ - blend_ ## name (c, dc, da, sc, sa); \ - \ - *(dest + i) = result + \ - (DIV_ONE_UN8 (sa * (uint32_t)da) << A_SHIFT) + \ - (DIV_ONE_UN8 (c[0]) << R_SHIFT) + \ - (DIV_ONE_UN8 (c[1]) << G_SHIFT) + \ - (DIV_ONE_UN8 (c[2])); \ - } \ - } - -static void -set_lum (uint32_t dest[3], uint32_t src[3], uint32_t sa, uint32_t lum) -{ - double a, l, min, max; - double tmp[3]; - - a = sa * (1.0 / MASK); - - l = lum * (1.0 / MASK); - tmp[0] = src[0] * (1.0 / MASK); - tmp[1] = src[1] * (1.0 / MASK); - tmp[2] = src[2] * (1.0 / MASK); - - l = l - LUM (tmp); - tmp[0] += l; - tmp[1] += l; - tmp[2] += l; - - /* clip_color */ - l = LUM (tmp); - min = CH_MIN (tmp); - max = CH_MAX (tmp); - - if (min < 0) - { - if (l - min == 0.0) - { - tmp[0] = 0; - tmp[1] = 0; - tmp[2] = 0; - } - else - { - tmp[0] = l + (tmp[0] - l) * l / (l - min); - tmp[1] = l + (tmp[1] - l) * l / (l - min); - tmp[2] = l + (tmp[2] - l) * l / (l - min); - } - } - if (max > a) - { - if (max - l == 0.0) - { - tmp[0] = a; - tmp[1] = a; - tmp[2] = a; - } - else - { - tmp[0] = l + (tmp[0] - l) * (a - l) / (max - l); - tmp[1] = l + (tmp[1] - l) * (a - l) / (max - l); - tmp[2] = l + (tmp[2] - l) * (a - l) / (max - l); - } - } - - dest[0] = tmp[0] * MASK + 0.5; - dest[1] = tmp[1] * MASK + 0.5; - dest[2] = tmp[2] * MASK + 0.5; -} - -static void -set_sat (uint32_t dest[3], uint32_t src[3], uint32_t sat) -{ - int id[3]; - uint32_t min, max; - - if (src[0] > src[1]) - { - if (src[0] > src[2]) - { - id[0] = 0; - if (src[1] > src[2]) - { - id[1] = 1; - id[2] = 2; - } - else - { - id[1] = 2; - id[2] = 1; - } - } - else - { - id[0] = 2; - id[1] = 0; - id[2] = 1; - } - } - else - { - if (src[0] > src[2]) - { - id[0] = 1; - id[1] = 0; - id[2] = 2; - } - else - { - id[2] = 0; - if (src[1] > src[2]) - { - id[0] = 1; - id[1] = 2; - } - else - { - id[0] = 2; - id[1] = 1; - } - } - } - - max = dest[id[0]]; - min = dest[id[2]]; - if (max > min) - { - dest[id[1]] = (dest[id[1]] - min) * sat / (max - min); - dest[id[0]] = sat; - dest[id[2]] = 0; - } - else - { - dest[0] = dest[1] = dest[2] = 0; - } -} - -/* - * Hue: - * B(Cb, Cs) = set_lum (set_sat (Cs, SAT (Cb)), LUM (Cb)) - */ -static inline void -blend_hsl_hue (uint32_t c[3], - uint32_t dc[3], - uint32_t da, - uint32_t sc[3], - uint32_t sa) -{ - c[0] = sc[0] * da; - c[1] = sc[1] * da; - c[2] = sc[2] * da; - set_sat (c, c, SAT (dc) * sa); - set_lum (c, c, sa * da, LUM (dc) * sa); -} - -PDF_NON_SEPARABLE_BLEND_MODE (hsl_hue) - -/* - * Saturation: - * B(Cb, Cs) = set_lum (set_sat (Cb, SAT (Cs)), LUM (Cb)) - */ -static inline void -blend_hsl_saturation (uint32_t c[3], - uint32_t dc[3], - uint32_t da, - uint32_t sc[3], - uint32_t sa) -{ - c[0] = dc[0] * sa; - c[1] = dc[1] * sa; - c[2] = dc[2] * sa; - set_sat (c, c, SAT (sc) * da); - set_lum (c, c, sa * da, LUM (dc) * sa); -} - -PDF_NON_SEPARABLE_BLEND_MODE (hsl_saturation) - -/* - * Color: - * B(Cb, Cs) = set_lum (Cs, LUM (Cb)) - */ -static inline void -blend_hsl_color (uint32_t c[3], - uint32_t dc[3], - uint32_t da, - uint32_t sc[3], - uint32_t sa) -{ - c[0] = sc[0] * da; - c[1] = sc[1] * da; - c[2] = sc[2] * da; - set_lum (c, c, sa * da, LUM (dc) * sa); -} - -PDF_NON_SEPARABLE_BLEND_MODE (hsl_color) - -/* - * Luminosity: - * B(Cb, Cs) = set_lum (Cb, LUM (Cs)) - */ -static inline void -blend_hsl_luminosity (uint32_t c[3], - uint32_t dc[3], - uint32_t da, - uint32_t sc[3], - uint32_t sa) -{ - c[0] = dc[0] * sa; - c[1] = dc[1] * sa; - c[2] = dc[2] * sa; - set_lum (c, c, sa * da, LUM (sc) * da); -} - -PDF_NON_SEPARABLE_BLEND_MODE (hsl_luminosity) - -#undef SAT -#undef LUM -#undef CH_MAX -#undef CH_MIN -#undef PDF_NON_SEPARABLE_BLEND_MODE - -/* All of the disjoint/conjoint composing functions - * - * The four entries in the first column indicate what source contributions - * come from each of the four areas of the picture -- areas covered by neither - * A nor B, areas covered only by A, areas covered only by B and finally - * areas covered by both A and B. - * - * Disjoint Conjoint - * Fa Fb Fa Fb - * (0,0,0,0) 0 0 0 0 - * (0,A,0,A) 1 0 1 0 - * (0,0,B,B) 0 1 0 1 - * (0,A,B,A) 1 min((1-a)/b,1) 1 max(1-a/b,0) - * (0,A,B,B) min((1-b)/a,1) 1 max(1-b/a,0) 1 - * (0,0,0,A) max(1-(1-b)/a,0) 0 min(1,b/a) 0 - * (0,0,0,B) 0 max(1-(1-a)/b,0) 0 min(a/b,1) - * (0,A,0,0) min(1,(1-b)/a) 0 max(1-b/a,0) 0 - * (0,0,B,0) 0 min(1,(1-a)/b) 0 max(1-a/b,0) - * (0,0,B,A) max(1-(1-b)/a,0) min(1,(1-a)/b) min(1,b/a) max(1-a/b,0) - * (0,A,0,B) min(1,(1-b)/a) max(1-(1-a)/b,0) max(1-b/a,0) min(1,a/b) - * (0,A,B,0) min(1,(1-b)/a) min(1,(1-a)/b) max(1-b/a,0) max(1-a/b,0) - * - * See http://marc.info/?l=xfree-render&m=99792000027857&w=2 for more - * information about these operators. - */ - -#define COMBINE_A_OUT 1 -#define COMBINE_A_IN 2 -#define COMBINE_B_OUT 4 -#define COMBINE_B_IN 8 - -#define COMBINE_CLEAR 0 -#define COMBINE_A (COMBINE_A_OUT | COMBINE_A_IN) -#define COMBINE_B (COMBINE_B_OUT | COMBINE_B_IN) -#define COMBINE_A_OVER (COMBINE_A_OUT | COMBINE_B_OUT | COMBINE_A_IN) -#define COMBINE_B_OVER (COMBINE_A_OUT | COMBINE_B_OUT | COMBINE_B_IN) -#define COMBINE_A_ATOP (COMBINE_B_OUT | COMBINE_A_IN) -#define COMBINE_B_ATOP (COMBINE_A_OUT | COMBINE_B_IN) -#define COMBINE_XOR (COMBINE_A_OUT | COMBINE_B_OUT) - -/* portion covered by a but not b */ -static uint8_t -combine_disjoint_out_part (uint8_t a, uint8_t b) -{ - /* min (1, (1-b) / a) */ - - b = ~b; /* 1 - b */ - if (b >= a) /* 1 - b >= a -> (1-b)/a >= 1 */ - return MASK; /* 1 */ - return DIV_UN8 (b, a); /* (1-b) / a */ -} - -/* portion covered by both a and b */ -static uint8_t -combine_disjoint_in_part (uint8_t a, uint8_t b) -{ - /* max (1-(1-b)/a,0) */ - /* = - min ((1-b)/a - 1, 0) */ - /* = 1 - min (1, (1-b)/a) */ - - b = ~b; /* 1 - b */ - if (b >= a) /* 1 - b >= a -> (1-b)/a >= 1 */ - return 0; /* 1 - 1 */ - return ~DIV_UN8(b, a); /* 1 - (1-b) / a */ -} - -/* portion covered by a but not b */ -static uint8_t -combine_conjoint_out_part (uint8_t a, uint8_t b) -{ - /* max (1-b/a,0) */ - /* = 1-min(b/a,1) */ - - /* min (1, (1-b) / a) */ - - if (b >= a) /* b >= a -> b/a >= 1 */ - return 0x00; /* 0 */ - return ~DIV_UN8(b, a); /* 1 - b/a */ -} - -/* portion covered by both a and b */ -static uint8_t -combine_conjoint_in_part (uint8_t a, uint8_t b) -{ - /* min (1,b/a) */ - - if (b >= a) /* b >= a -> b/a >= 1 */ - return MASK; /* 1 */ - return DIV_UN8 (b, a); /* b/a */ -} - -#define GET_COMP(v, i) ((uint16_t) (uint8_t) ((v) >> i)) - -#define ADD(x, y, i, t) \ - ((t) = GET_COMP (x, i) + GET_COMP (y, i), \ - (uint32_t) ((uint8_t) ((t) | (0 - ((t) >> G_SHIFT)))) << (i)) - -#define GENERIC(x, y, i, ax, ay, t, u, v) \ - ((t) = (MUL_UN8 (GET_COMP (y, i), ay, (u)) + \ - MUL_UN8 (GET_COMP (x, i), ax, (v))), \ - (uint32_t) ((uint8_t) ((t) | \ - (0 - ((t) >> G_SHIFT)))) << (i)) - -static void -combine_disjoint_general_u (uint32_t * dest, - const uint32_t *src, - const uint32_t *mask, - int width, - uint8_t combine) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint32_t s = combine_mask (src, mask, i); - uint32_t d = *(dest + i); - uint32_t m, n, o, p; - uint16_t Fa, Fb, t, u, v; - uint8_t sa = s >> A_SHIFT; - uint8_t da = d >> A_SHIFT; - - switch (combine & COMBINE_A) - { - default: - Fa = 0; - break; - - case COMBINE_A_OUT: - Fa = combine_disjoint_out_part (sa, da); - break; - - case COMBINE_A_IN: - Fa = combine_disjoint_in_part (sa, da); - break; - - case COMBINE_A: - Fa = MASK; - break; - } - - switch (combine & COMBINE_B) - { - default: - Fb = 0; - break; - - case COMBINE_B_OUT: - Fb = combine_disjoint_out_part (da, sa); - break; - - case COMBINE_B_IN: - Fb = combine_disjoint_in_part (da, sa); - break; - - case COMBINE_B: - Fb = MASK; - break; - } - m = GENERIC (s, d, 0, Fa, Fb, t, u, v); - n = GENERIC (s, d, G_SHIFT, Fa, Fb, t, u, v); - o = GENERIC (s, d, R_SHIFT, Fa, Fb, t, u, v); - p = GENERIC (s, d, A_SHIFT, Fa, Fb, t, u, v); - s = m | n | o | p; - *(dest + i) = s; - } -} - -static void -combine_disjoint_over_u (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint32_t s = combine_mask (src, mask, i); - uint16_t a = s >> A_SHIFT; - - if (s != 0x00) - { - uint32_t d = *(dest + i); - a = combine_disjoint_out_part (d >> A_SHIFT, a); - UN8x4_MUL_UN8_ADD_UN8x4 (d, a, s); - - *(dest + i) = d; - } - } -} - -static void -combine_disjoint_in_u (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - combine_disjoint_general_u (dest, src, mask, width, COMBINE_A_IN); -} - -static void -combine_disjoint_in_reverse_u (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - combine_disjoint_general_u (dest, src, mask, width, COMBINE_B_IN); -} - -static void -combine_disjoint_out_u (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - combine_disjoint_general_u (dest, src, mask, width, COMBINE_A_OUT); -} - -static void -combine_disjoint_out_reverse_u (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - combine_disjoint_general_u (dest, src, mask, width, COMBINE_B_OUT); -} - -static void -combine_disjoint_atop_u (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - combine_disjoint_general_u (dest, src, mask, width, COMBINE_A_ATOP); -} - -static void -combine_disjoint_atop_reverse_u (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - combine_disjoint_general_u (dest, src, mask, width, COMBINE_B_ATOP); -} - -static void -combine_disjoint_xor_u (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - combine_disjoint_general_u (dest, src, mask, width, COMBINE_XOR); -} - -static void -combine_conjoint_general_u (uint32_t * dest, - const uint32_t *src, - const uint32_t *mask, - int width, - uint8_t combine) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint32_t s = combine_mask (src, mask, i); - uint32_t d = *(dest + i); - uint32_t m, n, o, p; - uint16_t Fa, Fb, t, u, v; - uint8_t sa = s >> A_SHIFT; - uint8_t da = d >> A_SHIFT; - - switch (combine & COMBINE_A) - { - default: - Fa = 0; - break; - - case COMBINE_A_OUT: - Fa = combine_conjoint_out_part (sa, da); - break; - - case COMBINE_A_IN: - Fa = combine_conjoint_in_part (sa, da); - break; - - case COMBINE_A: - Fa = MASK; - break; - } - - switch (combine & COMBINE_B) - { - default: - Fb = 0; - break; - - case COMBINE_B_OUT: - Fb = combine_conjoint_out_part (da, sa); - break; - - case COMBINE_B_IN: - Fb = combine_conjoint_in_part (da, sa); - break; - - case COMBINE_B: - Fb = MASK; - break; - } - - m = GENERIC (s, d, 0, Fa, Fb, t, u, v); - n = GENERIC (s, d, G_SHIFT, Fa, Fb, t, u, v); - o = GENERIC (s, d, R_SHIFT, Fa, Fb, t, u, v); - p = GENERIC (s, d, A_SHIFT, Fa, Fb, t, u, v); - - s = m | n | o | p; - - *(dest + i) = s; - } -} - -static void -combine_conjoint_over_u (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - combine_conjoint_general_u (dest, src, mask, width, COMBINE_A_OVER); -} - -static void -combine_conjoint_over_reverse_u (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - combine_conjoint_general_u (dest, src, mask, width, COMBINE_B_OVER); -} - -static void -combine_conjoint_in_u (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - combine_conjoint_general_u (dest, src, mask, width, COMBINE_A_IN); -} - -static void -combine_conjoint_in_reverse_u (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - combine_conjoint_general_u (dest, src, mask, width, COMBINE_B_IN); -} - -static void -combine_conjoint_out_u (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - combine_conjoint_general_u (dest, src, mask, width, COMBINE_A_OUT); -} - -static void -combine_conjoint_out_reverse_u (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - combine_conjoint_general_u (dest, src, mask, width, COMBINE_B_OUT); -} - -static void -combine_conjoint_atop_u (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - combine_conjoint_general_u (dest, src, mask, width, COMBINE_A_ATOP); -} - -static void -combine_conjoint_atop_reverse_u (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - combine_conjoint_general_u (dest, src, mask, width, COMBINE_B_ATOP); -} - -static void -combine_conjoint_xor_u (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - combine_conjoint_general_u (dest, src, mask, width, COMBINE_XOR); -} - - /* Component alpha combiners */ static void @@ -1955,428 +1136,6 @@ combine_add_ca (pixman_implementation_t *imp, } } -static void -combine_saturate_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint32_t s, d; - uint16_t sa, sr, sg, sb, da; - uint16_t t, u, v; - uint32_t m, n, o, p; - - d = *(dest + i); - s = *(src + i); - m = *(mask + i); - - combine_mask_ca (&s, &m); - - sa = (m >> A_SHIFT); - sr = (m >> R_SHIFT) & MASK; - sg = (m >> G_SHIFT) & MASK; - sb = m & MASK; - da = ~d >> A_SHIFT; - - if (sb <= da) - m = ADD (s, d, 0, t); - else - m = GENERIC (s, d, 0, (da << G_SHIFT) / sb, MASK, t, u, v); - - if (sg <= da) - n = ADD (s, d, G_SHIFT, t); - else - n = GENERIC (s, d, G_SHIFT, (da << G_SHIFT) / sg, MASK, t, u, v); - - if (sr <= da) - o = ADD (s, d, R_SHIFT, t); - else - o = GENERIC (s, d, R_SHIFT, (da << G_SHIFT) / sr, MASK, t, u, v); - - if (sa <= da) - p = ADD (s, d, A_SHIFT, t); - else - p = GENERIC (s, d, A_SHIFT, (da << G_SHIFT) / sa, MASK, t, u, v); - - *(dest + i) = m | n | o | p; - } -} - -static void -combine_disjoint_general_ca (uint32_t * dest, - const uint32_t *src, - const uint32_t *mask, - int width, - uint8_t combine) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint32_t s, d; - uint32_t m, n, o, p; - uint32_t Fa, Fb; - uint16_t t, u, v; - uint32_t sa; - uint8_t da; - - s = *(src + i); - m = *(mask + i); - d = *(dest + i); - da = d >> A_SHIFT; - - combine_mask_ca (&s, &m); - - sa = m; - - switch (combine & COMBINE_A) - { - default: - Fa = 0; - break; - - case COMBINE_A_OUT: - m = (uint32_t)combine_disjoint_out_part ((uint8_t) (sa >> 0), da); - n = (uint32_t)combine_disjoint_out_part ((uint8_t) (sa >> G_SHIFT), da) << G_SHIFT; - o = (uint32_t)combine_disjoint_out_part ((uint8_t) (sa >> R_SHIFT), da) << R_SHIFT; - p = (uint32_t)combine_disjoint_out_part ((uint8_t) (sa >> A_SHIFT), da) << A_SHIFT; - Fa = m | n | o | p; - break; - - case COMBINE_A_IN: - m = (uint32_t)combine_disjoint_in_part ((uint8_t) (sa >> 0), da); - n = (uint32_t)combine_disjoint_in_part ((uint8_t) (sa >> G_SHIFT), da) << G_SHIFT; - o = (uint32_t)combine_disjoint_in_part ((uint8_t) (sa >> R_SHIFT), da) << R_SHIFT; - p = (uint32_t)combine_disjoint_in_part ((uint8_t) (sa >> A_SHIFT), da) << A_SHIFT; - Fa = m | n | o | p; - break; - - case COMBINE_A: - Fa = ~0; - break; - } - - switch (combine & COMBINE_B) - { - default: - Fb = 0; - break; - - case COMBINE_B_OUT: - m = (uint32_t)combine_disjoint_out_part (da, (uint8_t) (sa >> 0)); - n = (uint32_t)combine_disjoint_out_part (da, (uint8_t) (sa >> G_SHIFT)) << G_SHIFT; - o = (uint32_t)combine_disjoint_out_part (da, (uint8_t) (sa >> R_SHIFT)) << R_SHIFT; - p = (uint32_t)combine_disjoint_out_part (da, (uint8_t) (sa >> A_SHIFT)) << A_SHIFT; - Fb = m | n | o | p; - break; - - case COMBINE_B_IN: - m = (uint32_t)combine_disjoint_in_part (da, (uint8_t) (sa >> 0)); - n = (uint32_t)combine_disjoint_in_part (da, (uint8_t) (sa >> G_SHIFT)) << G_SHIFT; - o = (uint32_t)combine_disjoint_in_part (da, (uint8_t) (sa >> R_SHIFT)) << R_SHIFT; - p = (uint32_t)combine_disjoint_in_part (da, (uint8_t) (sa >> A_SHIFT)) << A_SHIFT; - Fb = m | n | o | p; - break; - - case COMBINE_B: - Fb = ~0; - break; - } - m = GENERIC (s, d, 0, GET_COMP (Fa, 0), GET_COMP (Fb, 0), t, u, v); - n = GENERIC (s, d, G_SHIFT, GET_COMP (Fa, G_SHIFT), GET_COMP (Fb, G_SHIFT), t, u, v); - o = GENERIC (s, d, R_SHIFT, GET_COMP (Fa, R_SHIFT), GET_COMP (Fb, R_SHIFT), t, u, v); - p = GENERIC (s, d, A_SHIFT, GET_COMP (Fa, A_SHIFT), GET_COMP (Fb, A_SHIFT), t, u, v); - - s = m | n | o | p; - - *(dest + i) = s; - } -} - -static void -combine_disjoint_over_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - combine_disjoint_general_ca (dest, src, mask, width, COMBINE_A_OVER); -} - -static void -combine_disjoint_in_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - combine_disjoint_general_ca (dest, src, mask, width, COMBINE_A_IN); -} - -static void -combine_disjoint_in_reverse_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - combine_disjoint_general_ca (dest, src, mask, width, COMBINE_B_IN); -} - -static void -combine_disjoint_out_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - combine_disjoint_general_ca (dest, src, mask, width, COMBINE_A_OUT); -} - -static void -combine_disjoint_out_reverse_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - combine_disjoint_general_ca (dest, src, mask, width, COMBINE_B_OUT); -} - -static void -combine_disjoint_atop_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - combine_disjoint_general_ca (dest, src, mask, width, COMBINE_A_ATOP); -} - -static void -combine_disjoint_atop_reverse_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - combine_disjoint_general_ca (dest, src, mask, width, COMBINE_B_ATOP); -} - -static void -combine_disjoint_xor_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - combine_disjoint_general_ca (dest, src, mask, width, COMBINE_XOR); -} - -static void -combine_conjoint_general_ca (uint32_t * dest, - const uint32_t *src, - const uint32_t *mask, - int width, - uint8_t combine) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint32_t s, d; - uint32_t m, n, o, p; - uint32_t Fa, Fb; - uint16_t t, u, v; - uint32_t sa; - uint8_t da; - - s = *(src + i); - m = *(mask + i); - d = *(dest + i); - da = d >> A_SHIFT; - - combine_mask_ca (&s, &m); - - sa = m; - - switch (combine & COMBINE_A) - { - default: - Fa = 0; - break; - - case COMBINE_A_OUT: - m = (uint32_t)combine_conjoint_out_part ((uint8_t) (sa >> 0), da); - n = (uint32_t)combine_conjoint_out_part ((uint8_t) (sa >> G_SHIFT), da) << G_SHIFT; - o = (uint32_t)combine_conjoint_out_part ((uint8_t) (sa >> R_SHIFT), da) << R_SHIFT; - p = (uint32_t)combine_conjoint_out_part ((uint8_t) (sa >> A_SHIFT), da) << A_SHIFT; - Fa = m | n | o | p; - break; - - case COMBINE_A_IN: - m = (uint32_t)combine_conjoint_in_part ((uint8_t) (sa >> 0), da); - n = (uint32_t)combine_conjoint_in_part ((uint8_t) (sa >> G_SHIFT), da) << G_SHIFT; - o = (uint32_t)combine_conjoint_in_part ((uint8_t) (sa >> R_SHIFT), da) << R_SHIFT; - p = (uint32_t)combine_conjoint_in_part ((uint8_t) (sa >> A_SHIFT), da) << A_SHIFT; - Fa = m | n | o | p; - break; - - case COMBINE_A: - Fa = ~0; - break; - } - - switch (combine & COMBINE_B) - { - default: - Fb = 0; - break; - - case COMBINE_B_OUT: - m = (uint32_t)combine_conjoint_out_part (da, (uint8_t) (sa >> 0)); - n = (uint32_t)combine_conjoint_out_part (da, (uint8_t) (sa >> G_SHIFT)) << G_SHIFT; - o = (uint32_t)combine_conjoint_out_part (da, (uint8_t) (sa >> R_SHIFT)) << R_SHIFT; - p = (uint32_t)combine_conjoint_out_part (da, (uint8_t) (sa >> A_SHIFT)) << A_SHIFT; - Fb = m | n | o | p; - break; - - case COMBINE_B_IN: - m = (uint32_t)combine_conjoint_in_part (da, (uint8_t) (sa >> 0)); - n = (uint32_t)combine_conjoint_in_part (da, (uint8_t) (sa >> G_SHIFT)) << G_SHIFT; - o = (uint32_t)combine_conjoint_in_part (da, (uint8_t) (sa >> R_SHIFT)) << R_SHIFT; - p = (uint32_t)combine_conjoint_in_part (da, (uint8_t) (sa >> A_SHIFT)) << A_SHIFT; - Fb = m | n | o | p; - break; - - case COMBINE_B: - Fb = ~0; - break; - } - m = GENERIC (s, d, 0, GET_COMP (Fa, 0), GET_COMP (Fb, 0), t, u, v); - n = GENERIC (s, d, G_SHIFT, GET_COMP (Fa, G_SHIFT), GET_COMP (Fb, G_SHIFT), t, u, v); - o = GENERIC (s, d, R_SHIFT, GET_COMP (Fa, R_SHIFT), GET_COMP (Fb, R_SHIFT), t, u, v); - p = GENERIC (s, d, A_SHIFT, GET_COMP (Fa, A_SHIFT), GET_COMP (Fb, A_SHIFT), t, u, v); - - s = m | n | o | p; - - *(dest + i) = s; - } -} - -static void -combine_conjoint_over_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - combine_conjoint_general_ca (dest, src, mask, width, COMBINE_A_OVER); -} - -static void -combine_conjoint_over_reverse_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - combine_conjoint_general_ca (dest, src, mask, width, COMBINE_B_OVER); -} - -static void -combine_conjoint_in_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - combine_conjoint_general_ca (dest, src, mask, width, COMBINE_A_IN); -} - -static void -combine_conjoint_in_reverse_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - combine_conjoint_general_ca (dest, src, mask, width, COMBINE_B_IN); -} - -static void -combine_conjoint_out_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - combine_conjoint_general_ca (dest, src, mask, width, COMBINE_A_OUT); -} - -static void -combine_conjoint_out_reverse_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - combine_conjoint_general_ca (dest, src, mask, width, COMBINE_B_OUT); -} - -static void -combine_conjoint_atop_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - combine_conjoint_general_ca (dest, src, mask, width, COMBINE_A_ATOP); -} - -static void -combine_conjoint_atop_reverse_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - combine_conjoint_general_ca (dest, src, mask, width, COMBINE_B_ATOP); -} - -static void -combine_conjoint_xor_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - combine_conjoint_general_ca (dest, src, mask, width, COMBINE_XOR); -} - void _pixman_setup_combiner_functions_32 (pixman_implementation_t *imp) { @@ -2394,51 +1153,15 @@ _pixman_setup_combiner_functions_32 (pixman_implementation_t *imp) imp->combine_32[PIXMAN_OP_ATOP_REVERSE] = combine_atop_reverse_u; imp->combine_32[PIXMAN_OP_XOR] = combine_xor_u; imp->combine_32[PIXMAN_OP_ADD] = combine_add_u; - imp->combine_32[PIXMAN_OP_SATURATE] = combine_saturate_u; - - /* Disjoint, unified */ - imp->combine_32[PIXMAN_OP_DISJOINT_CLEAR] = combine_clear; - imp->combine_32[PIXMAN_OP_DISJOINT_SRC] = combine_src_u; - imp->combine_32[PIXMAN_OP_DISJOINT_DST] = combine_dst; - imp->combine_32[PIXMAN_OP_DISJOINT_OVER] = combine_disjoint_over_u; - imp->combine_32[PIXMAN_OP_DISJOINT_OVER_REVERSE] = combine_saturate_u; - imp->combine_32[PIXMAN_OP_DISJOINT_IN] = combine_disjoint_in_u; - imp->combine_32[PIXMAN_OP_DISJOINT_IN_REVERSE] = combine_disjoint_in_reverse_u; - imp->combine_32[PIXMAN_OP_DISJOINT_OUT] = combine_disjoint_out_u; - imp->combine_32[PIXMAN_OP_DISJOINT_OUT_REVERSE] = combine_disjoint_out_reverse_u; - imp->combine_32[PIXMAN_OP_DISJOINT_ATOP] = combine_disjoint_atop_u; - imp->combine_32[PIXMAN_OP_DISJOINT_ATOP_REVERSE] = combine_disjoint_atop_reverse_u; - imp->combine_32[PIXMAN_OP_DISJOINT_XOR] = combine_disjoint_xor_u; - - /* Conjoint, unified */ - imp->combine_32[PIXMAN_OP_CONJOINT_CLEAR] = combine_clear; - imp->combine_32[PIXMAN_OP_CONJOINT_SRC] = combine_src_u; - imp->combine_32[PIXMAN_OP_CONJOINT_DST] = combine_dst; - imp->combine_32[PIXMAN_OP_CONJOINT_OVER] = combine_conjoint_over_u; - imp->combine_32[PIXMAN_OP_CONJOINT_OVER_REVERSE] = combine_conjoint_over_reverse_u; - imp->combine_32[PIXMAN_OP_CONJOINT_IN] = combine_conjoint_in_u; - imp->combine_32[PIXMAN_OP_CONJOINT_IN_REVERSE] = combine_conjoint_in_reverse_u; - imp->combine_32[PIXMAN_OP_CONJOINT_OUT] = combine_conjoint_out_u; - imp->combine_32[PIXMAN_OP_CONJOINT_OUT_REVERSE] = combine_conjoint_out_reverse_u; - imp->combine_32[PIXMAN_OP_CONJOINT_ATOP] = combine_conjoint_atop_u; - imp->combine_32[PIXMAN_OP_CONJOINT_ATOP_REVERSE] = combine_conjoint_atop_reverse_u; - imp->combine_32[PIXMAN_OP_CONJOINT_XOR] = combine_conjoint_xor_u; imp->combine_32[PIXMAN_OP_MULTIPLY] = combine_multiply_u; imp->combine_32[PIXMAN_OP_SCREEN] = combine_screen_u; imp->combine_32[PIXMAN_OP_OVERLAY] = combine_overlay_u; imp->combine_32[PIXMAN_OP_DARKEN] = combine_darken_u; imp->combine_32[PIXMAN_OP_LIGHTEN] = combine_lighten_u; - imp->combine_32[PIXMAN_OP_COLOR_DODGE] = combine_color_dodge_u; - imp->combine_32[PIXMAN_OP_COLOR_BURN] = combine_color_burn_u; imp->combine_32[PIXMAN_OP_HARD_LIGHT] = combine_hard_light_u; - imp->combine_32[PIXMAN_OP_SOFT_LIGHT] = combine_soft_light_u; imp->combine_32[PIXMAN_OP_DIFFERENCE] = combine_difference_u; imp->combine_32[PIXMAN_OP_EXCLUSION] = combine_exclusion_u; - imp->combine_32[PIXMAN_OP_HSL_HUE] = combine_hsl_hue_u; - imp->combine_32[PIXMAN_OP_HSL_SATURATION] = combine_hsl_saturation_u; - imp->combine_32[PIXMAN_OP_HSL_COLOR] = combine_hsl_color_u; - imp->combine_32[PIXMAN_OP_HSL_LUMINOSITY] = combine_hsl_luminosity_u; /* Component alpha combiners */ imp->combine_32_ca[PIXMAN_OP_CLEAR] = combine_clear_ca; @@ -2454,51 +1177,13 @@ _pixman_setup_combiner_functions_32 (pixman_implementation_t *imp) imp->combine_32_ca[PIXMAN_OP_ATOP_REVERSE] = combine_atop_reverse_ca; imp->combine_32_ca[PIXMAN_OP_XOR] = combine_xor_ca; imp->combine_32_ca[PIXMAN_OP_ADD] = combine_add_ca; - imp->combine_32_ca[PIXMAN_OP_SATURATE] = combine_saturate_ca; - - /* Disjoint CA */ - imp->combine_32_ca[PIXMAN_OP_DISJOINT_CLEAR] = combine_clear_ca; - imp->combine_32_ca[PIXMAN_OP_DISJOINT_SRC] = combine_src_ca; - imp->combine_32_ca[PIXMAN_OP_DISJOINT_DST] = combine_dst; - imp->combine_32_ca[PIXMAN_OP_DISJOINT_OVER] = combine_disjoint_over_ca; - imp->combine_32_ca[PIXMAN_OP_DISJOINT_OVER_REVERSE] = combine_saturate_ca; - imp->combine_32_ca[PIXMAN_OP_DISJOINT_IN] = combine_disjoint_in_ca; - imp->combine_32_ca[PIXMAN_OP_DISJOINT_IN_REVERSE] = combine_disjoint_in_reverse_ca; - imp->combine_32_ca[PIXMAN_OP_DISJOINT_OUT] = combine_disjoint_out_ca; - imp->combine_32_ca[PIXMAN_OP_DISJOINT_OUT_REVERSE] = combine_disjoint_out_reverse_ca; - imp->combine_32_ca[PIXMAN_OP_DISJOINT_ATOP] = combine_disjoint_atop_ca; - imp->combine_32_ca[PIXMAN_OP_DISJOINT_ATOP_REVERSE] = combine_disjoint_atop_reverse_ca; - imp->combine_32_ca[PIXMAN_OP_DISJOINT_XOR] = combine_disjoint_xor_ca; - - /* Conjoint CA */ - imp->combine_32_ca[PIXMAN_OP_CONJOINT_CLEAR] = combine_clear_ca; - imp->combine_32_ca[PIXMAN_OP_CONJOINT_SRC] = combine_src_ca; - imp->combine_32_ca[PIXMAN_OP_CONJOINT_DST] = combine_dst; - imp->combine_32_ca[PIXMAN_OP_CONJOINT_OVER] = combine_conjoint_over_ca; - imp->combine_32_ca[PIXMAN_OP_CONJOINT_OVER_REVERSE] = combine_conjoint_over_reverse_ca; - imp->combine_32_ca[PIXMAN_OP_CONJOINT_IN] = combine_conjoint_in_ca; - imp->combine_32_ca[PIXMAN_OP_CONJOINT_IN_REVERSE] = combine_conjoint_in_reverse_ca; - imp->combine_32_ca[PIXMAN_OP_CONJOINT_OUT] = combine_conjoint_out_ca; - imp->combine_32_ca[PIXMAN_OP_CONJOINT_OUT_REVERSE] = combine_conjoint_out_reverse_ca; - imp->combine_32_ca[PIXMAN_OP_CONJOINT_ATOP] = combine_conjoint_atop_ca; - imp->combine_32_ca[PIXMAN_OP_CONJOINT_ATOP_REVERSE] = combine_conjoint_atop_reverse_ca; - imp->combine_32_ca[PIXMAN_OP_CONJOINT_XOR] = combine_conjoint_xor_ca; imp->combine_32_ca[PIXMAN_OP_MULTIPLY] = combine_multiply_ca; imp->combine_32_ca[PIXMAN_OP_SCREEN] = combine_screen_ca; imp->combine_32_ca[PIXMAN_OP_OVERLAY] = combine_overlay_ca; imp->combine_32_ca[PIXMAN_OP_DARKEN] = combine_darken_ca; imp->combine_32_ca[PIXMAN_OP_LIGHTEN] = combine_lighten_ca; - imp->combine_32_ca[PIXMAN_OP_COLOR_DODGE] = combine_color_dodge_ca; - imp->combine_32_ca[PIXMAN_OP_COLOR_BURN] = combine_color_burn_ca; imp->combine_32_ca[PIXMAN_OP_HARD_LIGHT] = combine_hard_light_ca; - imp->combine_32_ca[PIXMAN_OP_SOFT_LIGHT] = combine_soft_light_ca; imp->combine_32_ca[PIXMAN_OP_DIFFERENCE] = combine_difference_ca; imp->combine_32_ca[PIXMAN_OP_EXCLUSION] = combine_exclusion_ca; - - /* It is not clear that these make sense, so make them noops for now */ - imp->combine_32_ca[PIXMAN_OP_HSL_HUE] = combine_dst; - imp->combine_32_ca[PIXMAN_OP_HSL_SATURATION] = combine_dst; - imp->combine_32_ca[PIXMAN_OP_HSL_COLOR] = combine_dst; - imp->combine_32_ca[PIXMAN_OP_HSL_LUMINOSITY] = combine_dst; } diff --git a/gfx/cairo/libpixman/src/pixman-combine32.h b/gfx/cairo/libpixman/src/pixman-combine32.h index cdd56a61a1..59bb2477aa 100644 --- a/gfx/cairo/libpixman/src/pixman-combine32.h +++ b/gfx/cairo/libpixman/src/pixman-combine32.h @@ -12,7 +12,7 @@ #define RB_MASK 0xff00ff #define AG_MASK 0xff00ff00 #define RB_ONE_HALF 0x800080 -#define RB_MASK_PLUS_ONE 0x10000100 +#define RB_MASK_PLUS_ONE 0x1000100 #define ALPHA_8(x) ((x) >> A_SHIFT) #define RED_8(x) (((x) >> R_SHIFT) & MASK) diff --git a/gfx/cairo/libpixman/src/pixman-combine64.c b/gfx/cairo/libpixman/src/pixman-combine64.c deleted file mode 100644 index 1c85af8eec..0000000000 --- a/gfx/cairo/libpixman/src/pixman-combine64.c +++ /dev/null @@ -1,2465 +0,0 @@ -/* WARNING: This file is generated by combine.pl from combine.inc. - Please edit one of those files rather than this one. */ - -#line 1 "pixman-combine.c.template" -#ifdef HAVE_CONFIG_H -#include <config.h> -#endif - -#include <math.h> -#include <string.h> - -#include "pixman-private.h" - -#include "pixman-combine64.h" - -/*** per channel helper functions ***/ - -static void -combine_mask_ca (uint64_t *src, uint64_t *mask) -{ - uint64_t a = *mask; - - uint64_t x; - uint32_t xa; - - if (!a) - { - *(src) = 0; - return; - } - - x = *(src); - if (a == ~0) - { - x = x >> A_SHIFT; - x |= x << G_SHIFT; - x |= x << R_SHIFT; - *(mask) = x; - return; - } - - xa = x >> A_SHIFT; - UN16x4_MUL_UN16x4 (x, a); - *(src) = x; - - UN16x4_MUL_UN16 (a, xa); - *(mask) = a; -} - -static void -combine_mask_value_ca (uint64_t *src, const uint64_t *mask) -{ - uint64_t a = *mask; - uint64_t x; - - if (!a) - { - *(src) = 0; - return; - } - - if (a == ~0) - return; - - x = *(src); - UN16x4_MUL_UN16x4 (x, a); - *(src) = x; -} - -static void -combine_mask_alpha_ca (const uint64_t *src, uint64_t *mask) -{ - uint64_t a = *(mask); - uint64_t x; - - if (!a) - return; - - x = *(src) >> A_SHIFT; - if (x == MASK) - return; - - if (a == ~0) - { - x |= x << G_SHIFT; - x |= x << R_SHIFT; - *(mask) = x; - return; - } - - UN16x4_MUL_UN16 (a, x); - *(mask) = a; -} - -/* - * There are two ways of handling alpha -- either as a single unified value or - * a separate value for each component, hence each macro must have two - * versions. The unified alpha version has a 'U' at the end of the name, - * the component version has a 'C'. Similarly, functions which deal with - * this difference will have two versions using the same convention. - */ - -/* - * All of the composing functions - */ - -static force_inline uint64_t -combine_mask (const uint64_t *src, const uint64_t *mask, int i) -{ - uint64_t s, m; - - if (mask) - { - m = *(mask + i) >> A_SHIFT; - - if (!m) - return 0; - } - - s = *(src + i); - - if (mask) - UN16x4_MUL_UN16 (s, m); - - return s; -} - -static void -combine_clear (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - memset (dest, 0, width * sizeof(uint64_t)); -} - -static void -combine_dst (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - return; -} - -static void -combine_src_u (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - int i; - - if (!mask) - memcpy (dest, src, width * sizeof (uint64_t)); - else - { - for (i = 0; i < width; ++i) - { - uint64_t s = combine_mask (src, mask, i); - - *(dest + i) = s; - } - } -} - -/* if the Src is opaque, call combine_src_u */ -static void -combine_over_u (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint64_t s = combine_mask (src, mask, i); - uint64_t d = *(dest + i); - uint64_t ia = ALPHA_16 (~s); - - UN16x4_MUL_UN16_ADD_UN16x4 (d, ia, s); - *(dest + i) = d; - } -} - -/* if the Dst is opaque, this is a noop */ -static void -combine_over_reverse_u (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint64_t s = combine_mask (src, mask, i); - uint64_t d = *(dest + i); - uint64_t ia = ALPHA_16 (~*(dest + i)); - UN16x4_MUL_UN16_ADD_UN16x4 (s, ia, d); - *(dest + i) = s; - } -} - -/* if the Dst is opaque, call combine_src_u */ -static void -combine_in_u (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint64_t s = combine_mask (src, mask, i); - uint64_t a = ALPHA_16 (*(dest + i)); - UN16x4_MUL_UN16 (s, a); - *(dest + i) = s; - } -} - -/* if the Src is opaque, this is a noop */ -static void -combine_in_reverse_u (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint64_t s = combine_mask (src, mask, i); - uint64_t d = *(dest + i); - uint64_t a = ALPHA_16 (s); - UN16x4_MUL_UN16 (d, a); - *(dest + i) = d; - } -} - -/* if the Dst is opaque, call combine_clear */ -static void -combine_out_u (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint64_t s = combine_mask (src, mask, i); - uint64_t a = ALPHA_16 (~*(dest + i)); - UN16x4_MUL_UN16 (s, a); - *(dest + i) = s; - } -} - -/* if the Src is opaque, call combine_clear */ -static void -combine_out_reverse_u (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint64_t s = combine_mask (src, mask, i); - uint64_t d = *(dest + i); - uint64_t a = ALPHA_16 (~s); - UN16x4_MUL_UN16 (d, a); - *(dest + i) = d; - } -} - -/* if the Src is opaque, call combine_in_u */ -/* if the Dst is opaque, call combine_over_u */ -/* if both the Src and Dst are opaque, call combine_src_u */ -static void -combine_atop_u (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint64_t s = combine_mask (src, mask, i); - uint64_t d = *(dest + i); - uint64_t dest_a = ALPHA_16 (d); - uint64_t src_ia = ALPHA_16 (~s); - - UN16x4_MUL_UN16_ADD_UN16x4_MUL_UN16 (s, dest_a, d, src_ia); - *(dest + i) = s; - } -} - -/* if the Src is opaque, call combine_over_reverse_u */ -/* if the Dst is opaque, call combine_in_reverse_u */ -/* if both the Src and Dst are opaque, call combine_dst_u */ -static void -combine_atop_reverse_u (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint64_t s = combine_mask (src, mask, i); - uint64_t d = *(dest + i); - uint64_t src_a = ALPHA_16 (s); - uint64_t dest_ia = ALPHA_16 (~d); - - UN16x4_MUL_UN16_ADD_UN16x4_MUL_UN16 (s, dest_ia, d, src_a); - *(dest + i) = s; - } -} - -/* if the Src is opaque, call combine_over_u */ -/* if the Dst is opaque, call combine_over_reverse_u */ -/* if both the Src and Dst are opaque, call combine_clear */ -static void -combine_xor_u (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint64_t s = combine_mask (src, mask, i); - uint64_t d = *(dest + i); - uint64_t src_ia = ALPHA_16 (~s); - uint64_t dest_ia = ALPHA_16 (~d); - - UN16x4_MUL_UN16_ADD_UN16x4_MUL_UN16 (s, dest_ia, d, src_ia); - *(dest + i) = s; - } -} - -static void -combine_add_u (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint64_t s = combine_mask (src, mask, i); - uint64_t d = *(dest + i); - UN16x4_ADD_UN16x4 (d, s); - *(dest + i) = d; - } -} - -/* if the Src is opaque, call combine_add_u */ -/* if the Dst is opaque, call combine_add_u */ -/* if both the Src and Dst are opaque, call combine_add_u */ -static void -combine_saturate_u (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint64_t s = combine_mask (src, mask, i); - uint64_t d = *(dest + i); - uint32_t sa, da; - - sa = s >> A_SHIFT; - da = ~d >> A_SHIFT; - if (sa > da) - { - sa = DIV_UN16 (da, sa); - UN16x4_MUL_UN16 (s, sa); - } - ; - UN16x4_ADD_UN16x4 (d, s); - *(dest + i) = d; - } -} - -/* - * PDF blend modes: - * The following blend modes have been taken from the PDF ISO 32000 - * specification, which at this point in time is available from - * http://www.adobe.com/devnet/acrobat/pdfs/PDF32000_2008.pdf - * The relevant chapters are 11.3.5 and 11.3.6. - * The formula for computing the final pixel color given in 11.3.6 is: - * αr × Cr = (1 – αs) × αb × Cb + (1 – αb) × αs × Cs + αb × αs × B(Cb, Cs) - * with B() being the blend function. - * Note that OVER is a special case of this operation, using B(Cb, Cs) = Cs - * - * These blend modes should match the SVG filter draft specification, as - * it has been designed to mirror ISO 32000. Note that at the current point - * no released draft exists that shows this, as the formulas have not been - * updated yet after the release of ISO 32000. - * - * The default implementation here uses the PDF_SEPARABLE_BLEND_MODE and - * PDF_NON_SEPARABLE_BLEND_MODE macros, which take the blend function as an - * argument. Note that this implementation operates on premultiplied colors, - * while the PDF specification does not. Therefore the code uses the formula - * Cra = (1 – as) . Dca + (1 – ad) . Sca + B(Dca, ad, Sca, as) - */ - -/* - * Multiply - * B(Dca, ad, Sca, as) = Dca.Sca - */ - -static void -combine_multiply_u (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint64_t s = combine_mask (src, mask, i); - uint64_t d = *(dest + i); - uint64_t ss = s; - uint64_t src_ia = ALPHA_16 (~s); - uint64_t dest_ia = ALPHA_16 (~d); - - UN16x4_MUL_UN16_ADD_UN16x4_MUL_UN16 (ss, dest_ia, d, src_ia); - UN16x4_MUL_UN16x4 (d, s); - UN16x4_ADD_UN16x4 (d, ss); - - *(dest + i) = d; - } -} - -static void -combine_multiply_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint64_t m = *(mask + i); - uint64_t s = *(src + i); - uint64_t d = *(dest + i); - uint64_t r = d; - uint64_t dest_ia = ALPHA_16 (~d); - - combine_mask_value_ca (&s, &m); - - UN16x4_MUL_UN16x4_ADD_UN16x4_MUL_UN16 (r, ~m, s, dest_ia); - UN16x4_MUL_UN16x4 (d, s); - UN16x4_ADD_UN16x4 (r, d); - - *(dest + i) = r; - } -} - -#define PDF_SEPARABLE_BLEND_MODE(name) \ - static void \ - combine_ ## name ## _u (pixman_implementation_t *imp, \ - pixman_op_t op, \ - uint64_t * dest, \ - const uint64_t * src, \ - const uint64_t * mask, \ - int width) \ - { \ - int i; \ - for (i = 0; i < width; ++i) { \ - uint64_t s = combine_mask (src, mask, i); \ - uint64_t d = *(dest + i); \ - uint16_t sa = ALPHA_16 (s); \ - uint16_t isa = ~sa; \ - uint16_t da = ALPHA_16 (d); \ - uint16_t ida = ~da; \ - uint64_t result; \ - \ - result = d; \ - UN16x4_MUL_UN16_ADD_UN16x4_MUL_UN16 (result, isa, s, ida); \ - \ - *(dest + i) = result + \ - (DIV_ONE_UN16 (sa * (uint64_t)da) << A_SHIFT) + \ - (blend_ ## name (RED_16 (d), da, RED_16 (s), sa) << R_SHIFT) + \ - (blend_ ## name (GREEN_16 (d), da, GREEN_16 (s), sa) << G_SHIFT) + \ - (blend_ ## name (BLUE_16 (d), da, BLUE_16 (s), sa)); \ - } \ - } \ - \ - static void \ - combine_ ## name ## _ca (pixman_implementation_t *imp, \ - pixman_op_t op, \ - uint64_t * dest, \ - const uint64_t * src, \ - const uint64_t * mask, \ - int width) \ - { \ - int i; \ - for (i = 0; i < width; ++i) { \ - uint64_t m = *(mask + i); \ - uint64_t s = *(src + i); \ - uint64_t d = *(dest + i); \ - uint16_t da = ALPHA_16 (d); \ - uint16_t ida = ~da; \ - uint64_t result; \ - \ - combine_mask_value_ca (&s, &m); \ - \ - result = d; \ - UN16x4_MUL_UN16x4_ADD_UN16x4_MUL_UN16 (result, ~m, s, ida); \ - \ - result += \ - (DIV_ONE_UN16 (ALPHA_16 (m) * (uint64_t)da) << A_SHIFT) + \ - (blend_ ## name (RED_16 (d), da, RED_16 (s), RED_16 (m)) << R_SHIFT) + \ - (blend_ ## name (GREEN_16 (d), da, GREEN_16 (s), GREEN_16 (m)) << G_SHIFT) + \ - (blend_ ## name (BLUE_16 (d), da, BLUE_16 (s), BLUE_16 (m))); \ - \ - *(dest + i) = result; \ - } \ - } - -/* - * Screen - * B(Dca, ad, Sca, as) = Dca.sa + Sca.da - Dca.Sca - */ -static inline uint64_t -blend_screen (uint64_t dca, uint64_t da, uint64_t sca, uint64_t sa) -{ - return DIV_ONE_UN16 (sca * da + dca * sa - sca * dca); -} - -PDF_SEPARABLE_BLEND_MODE (screen) - -/* - * Overlay - * B(Dca, Da, Sca, Sa) = - * if 2.Dca < Da - * 2.Sca.Dca - * otherwise - * Sa.Da - 2.(Da - Dca).(Sa - Sca) - */ -static inline uint64_t -blend_overlay (uint64_t dca, uint64_t da, uint64_t sca, uint64_t sa) -{ - uint64_t rca; - - if (2 * dca < da) - rca = 2 * sca * dca; - else - rca = sa * da - 2 * (da - dca) * (sa - sca); - return DIV_ONE_UN16 (rca); -} - -PDF_SEPARABLE_BLEND_MODE (overlay) - -/* - * Darken - * B(Dca, Da, Sca, Sa) = min (Sca.Da, Dca.Sa) - */ -static inline uint64_t -blend_darken (uint64_t dca, uint64_t da, uint64_t sca, uint64_t sa) -{ - uint64_t s, d; - - s = sca * da; - d = dca * sa; - return DIV_ONE_UN16 (s > d ? d : s); -} - -PDF_SEPARABLE_BLEND_MODE (darken) - -/* - * Lighten - * B(Dca, Da, Sca, Sa) = max (Sca.Da, Dca.Sa) - */ -static inline uint64_t -blend_lighten (uint64_t dca, uint64_t da, uint64_t sca, uint64_t sa) -{ - uint64_t s, d; - - s = sca * da; - d = dca * sa; - return DIV_ONE_UN16 (s > d ? s : d); -} - -PDF_SEPARABLE_BLEND_MODE (lighten) - -/* - * Color dodge - * B(Dca, Da, Sca, Sa) = - * if Dca == 0 - * 0 - * if Sca == Sa - * Sa.Da - * otherwise - * Sa.Da. min (1, Dca / Da / (1 - Sca/Sa)) - */ -static inline uint64_t -blend_color_dodge (uint64_t dca, uint64_t da, uint64_t sca, uint64_t sa) -{ - if (sca >= sa) - { - return dca == 0 ? 0 : DIV_ONE_UN16 (sa * da); - } - else - { - uint64_t rca = dca * sa / (sa - sca); - return DIV_ONE_UN16 (sa * MIN (rca, da)); - } -} - -PDF_SEPARABLE_BLEND_MODE (color_dodge) - -/* - * Color burn - * B(Dca, Da, Sca, Sa) = - * if Dca == Da - * Sa.Da - * if Sca == 0 - * 0 - * otherwise - * Sa.Da.(1 - min (1, (1 - Dca/Da).Sa / Sca)) - */ -static inline uint64_t -blend_color_burn (uint64_t dca, uint64_t da, uint64_t sca, uint64_t sa) -{ - if (sca == 0) - { - return dca < da ? 0 : DIV_ONE_UN16 (sa * da); - } - else - { - uint64_t rca = (da - dca) * sa / sca; - return DIV_ONE_UN16 (sa * (MAX (rca, da) - rca)); - } -} - -PDF_SEPARABLE_BLEND_MODE (color_burn) - -/* - * Hard light - * B(Dca, Da, Sca, Sa) = - * if 2.Sca < Sa - * 2.Sca.Dca - * otherwise - * Sa.Da - 2.(Da - Dca).(Sa - Sca) - */ -static inline uint64_t -blend_hard_light (uint64_t dca, uint64_t da, uint64_t sca, uint64_t sa) -{ - if (2 * sca < sa) - return DIV_ONE_UN16 (2 * sca * dca); - else - return DIV_ONE_UN16 (sa * da - 2 * (da - dca) * (sa - sca)); -} - -PDF_SEPARABLE_BLEND_MODE (hard_light) - -/* - * Soft light - * B(Dca, Da, Sca, Sa) = - * if (2.Sca <= Sa) - * Dca.(Sa - (1 - Dca/Da).(2.Sca - Sa)) - * otherwise if Dca.4 <= Da - * Dca.(Sa + (2.Sca - Sa).((16.Dca/Da - 12).Dca/Da + 3) - * otherwise - * (Dca.Sa + (SQRT (Dca/Da).Da - Dca).(2.Sca - Sa)) - */ -static inline uint64_t -blend_soft_light (uint64_t dca_org, - uint64_t da_org, - uint64_t sca_org, - uint64_t sa_org) -{ - double dca = dca_org * (1.0 / MASK); - double da = da_org * (1.0 / MASK); - double sca = sca_org * (1.0 / MASK); - double sa = sa_org * (1.0 / MASK); - double rca; - - if (2 * sca < sa) - { - if (da == 0) - rca = dca * sa; - else - rca = dca * sa - dca * (da - dca) * (sa - 2 * sca) / da; - } - else if (da == 0) - { - rca = 0; - } - else if (4 * dca <= da) - { - rca = dca * sa + - (2 * sca - sa) * dca * ((16 * dca / da - 12) * dca / da + 3); - } - else - { - rca = dca * sa + (sqrt (dca * da) - dca) * (2 * sca - sa); - } - return rca * MASK + 0.5; -} - -PDF_SEPARABLE_BLEND_MODE (soft_light) - -/* - * Difference - * B(Dca, Da, Sca, Sa) = abs (Dca.Sa - Sca.Da) - */ -static inline uint64_t -blend_difference (uint64_t dca, uint64_t da, uint64_t sca, uint64_t sa) -{ - uint64_t dcasa = dca * sa; - uint64_t scada = sca * da; - - if (scada < dcasa) - return DIV_ONE_UN16 (dcasa - scada); - else - return DIV_ONE_UN16 (scada - dcasa); -} - -PDF_SEPARABLE_BLEND_MODE (difference) - -/* - * Exclusion - * B(Dca, Da, Sca, Sa) = (Sca.Da + Dca.Sa - 2.Sca.Dca) - */ - -/* This can be made faster by writing it directly and not using - * PDF_SEPARABLE_BLEND_MODE, but that's a performance optimization */ - -static inline uint64_t -blend_exclusion (uint64_t dca, uint64_t da, uint64_t sca, uint64_t sa) -{ - return DIV_ONE_UN16 (sca * da + dca * sa - 2 * dca * sca); -} - -PDF_SEPARABLE_BLEND_MODE (exclusion) - -#undef PDF_SEPARABLE_BLEND_MODE - -/* - * PDF nonseperable blend modes are implemented using the following functions - * to operate in Hsl space, with Cmax, Cmid, Cmin referring to the max, mid - * and min value of the red, green and blue components. - * - * LUM (C) = 0.3 × Cred + 0.59 × Cgreen + 0.11 × Cblue - * - * clip_color (C): - * l = LUM (C) - * min = Cmin - * max = Cmax - * if n < 0.0 - * C = l + ( ( ( C – l ) × l ) ⁄ ( l – min ) ) - * if x > 1.0 - * C = l + ( ( ( C – l ) × ( 1 – l ) ) ⁄ ( max – l ) ) - * return C - * - * set_lum (C, l): - * d = l – LUM (C) - * C += d - * return clip_color (C) - * - * SAT (C) = CH_MAX (C) - CH_MIN (C) - * - * set_sat (C, s): - * if Cmax > Cmin - * Cmid = ( ( ( Cmid – Cmin ) × s ) ⁄ ( Cmax – Cmin ) ) - * Cmax = s - * else - * Cmid = Cmax = 0.0 - * Cmin = 0.0 - * return C - */ - -/* For premultiplied colors, we need to know what happens when C is - * multiplied by a real number. LUM and SAT are linear: - * - * LUM (r × C) = r × LUM (C) SAT (r * C) = r * SAT (C) - * - * If we extend clip_color with an extra argument a and change - * - * if x >= 1.0 - * - * into - * - * if x >= a - * - * then clip_color is also linear: - * - * r * clip_color (C, a) = clip_color (r_c, ra); - * - * for positive r. - * - * Similarly, we can extend set_lum with an extra argument that is just passed - * on to clip_color: - * - * r * set_lum ( C, l, a) - * - * = r × clip_color ( C + l - LUM (C), a) - * - * = clip_color ( r * C + r × l - r * LUM (C), r * a) - * - * = set_lum ( r * C, r * l, r * a) - * - * Finally, set_sat: - * - * r * set_sat (C, s) = set_sat (x * C, r * s) - * - * The above holds for all non-zero x, because the x'es in the fraction for - * C_mid cancel out. Specifically, it holds for x = r: - * - * r * set_sat (C, s) = set_sat (r_c, rs) - * - */ - -/* So, for the non-separable PDF blend modes, we have (using s, d for - * non-premultiplied colors, and S, D for premultiplied: - * - * Color: - * - * a_s * a_d * B(s, d) - * = a_s * a_d * set_lum (S/a_s, LUM (D/a_d), 1) - * = set_lum (S * a_d, a_s * LUM (D), a_s * a_d) - * - * - * Luminosity: - * - * a_s * a_d * B(s, d) - * = a_s * a_d * set_lum (D/a_d, LUM(S/a_s), 1) - * = set_lum (a_s * D, a_d * LUM(S), a_s * a_d) - * - * - * Saturation: - * - * a_s * a_d * B(s, d) - * = a_s * a_d * set_lum (set_sat (D/a_d, SAT (S/a_s)), LUM (D/a_d), 1) - * = set_lum (a_s * a_d * set_sat (D/a_d, SAT (S/a_s)), - * a_s * LUM (D), a_s * a_d) - * = set_lum (set_sat (a_s * D, a_d * SAT (S), a_s * LUM (D), a_s * a_d)) - * - * Hue: - * - * a_s * a_d * B(s, d) - * = a_s * a_d * set_lum (set_sat (S/a_s, SAT (D/a_d)), LUM (D/a_d), 1) - * = set_lum (set_sat (a_d * S, a_s * SAT (D)), a_s * LUM (D), a_s * a_d) - * - */ - -#define CH_MIN(c) (c[0] < c[1] ? (c[0] < c[2] ? c[0] : c[2]) : (c[1] < c[2] ? c[1] : c[2])) -#define CH_MAX(c) (c[0] > c[1] ? (c[0] > c[2] ? c[0] : c[2]) : (c[1] > c[2] ? c[1] : c[2])) -#define LUM(c) ((c[0] * 30 + c[1] * 59 + c[2] * 11) / 100) -#define SAT(c) (CH_MAX (c) - CH_MIN (c)) - -#define PDF_NON_SEPARABLE_BLEND_MODE(name) \ - static void \ - combine_ ## name ## _u (pixman_implementation_t *imp, \ - pixman_op_t op, \ - uint64_t *dest, \ - const uint64_t *src, \ - const uint64_t *mask, \ - int width) \ - { \ - int i; \ - for (i = 0; i < width; ++i) \ - { \ - uint64_t s = combine_mask (src, mask, i); \ - uint64_t d = *(dest + i); \ - uint16_t sa = ALPHA_16 (s); \ - uint16_t isa = ~sa; \ - uint16_t da = ALPHA_16 (d); \ - uint16_t ida = ~da; \ - uint64_t result; \ - uint64_t sc[3], dc[3], c[3]; \ - \ - result = d; \ - UN16x4_MUL_UN16_ADD_UN16x4_MUL_UN16 (result, isa, s, ida); \ - dc[0] = RED_16 (d); \ - sc[0] = RED_16 (s); \ - dc[1] = GREEN_16 (d); \ - sc[1] = GREEN_16 (s); \ - dc[2] = BLUE_16 (d); \ - sc[2] = BLUE_16 (s); \ - blend_ ## name (c, dc, da, sc, sa); \ - \ - *(dest + i) = result + \ - (DIV_ONE_UN16 (sa * (uint64_t)da) << A_SHIFT) + \ - (DIV_ONE_UN16 (c[0]) << R_SHIFT) + \ - (DIV_ONE_UN16 (c[1]) << G_SHIFT) + \ - (DIV_ONE_UN16 (c[2])); \ - } \ - } - -static void -set_lum (uint64_t dest[3], uint64_t src[3], uint64_t sa, uint64_t lum) -{ - double a, l, min, max; - double tmp[3]; - - a = sa * (1.0 / MASK); - - l = lum * (1.0 / MASK); - tmp[0] = src[0] * (1.0 / MASK); - tmp[1] = src[1] * (1.0 / MASK); - tmp[2] = src[2] * (1.0 / MASK); - - l = l - LUM (tmp); - tmp[0] += l; - tmp[1] += l; - tmp[2] += l; - - /* clip_color */ - l = LUM (tmp); - min = CH_MIN (tmp); - max = CH_MAX (tmp); - - if (min < 0) - { - if (l - min == 0.0) - { - tmp[0] = 0; - tmp[1] = 0; - tmp[2] = 0; - } - else - { - tmp[0] = l + (tmp[0] - l) * l / (l - min); - tmp[1] = l + (tmp[1] - l) * l / (l - min); - tmp[2] = l + (tmp[2] - l) * l / (l - min); - } - } - if (max > a) - { - if (max - l == 0.0) - { - tmp[0] = a; - tmp[1] = a; - tmp[2] = a; - } - else - { - tmp[0] = l + (tmp[0] - l) * (a - l) / (max - l); - tmp[1] = l + (tmp[1] - l) * (a - l) / (max - l); - tmp[2] = l + (tmp[2] - l) * (a - l) / (max - l); - } - } - - dest[0] = tmp[0] * MASK + 0.5; - dest[1] = tmp[1] * MASK + 0.5; - dest[2] = tmp[2] * MASK + 0.5; -} - -static void -set_sat (uint64_t dest[3], uint64_t src[3], uint64_t sat) -{ - int id[3]; - uint64_t min, max; - - if (src[0] > src[1]) - { - if (src[0] > src[2]) - { - id[0] = 0; - if (src[1] > src[2]) - { - id[1] = 1; - id[2] = 2; - } - else - { - id[1] = 2; - id[2] = 1; - } - } - else - { - id[0] = 2; - id[1] = 0; - id[2] = 1; - } - } - else - { - if (src[0] > src[2]) - { - id[0] = 1; - id[1] = 0; - id[2] = 2; - } - else - { - id[2] = 0; - if (src[1] > src[2]) - { - id[0] = 1; - id[1] = 2; - } - else - { - id[0] = 2; - id[1] = 1; - } - } - } - - max = dest[id[0]]; - min = dest[id[2]]; - if (max > min) - { - dest[id[1]] = (dest[id[1]] - min) * sat / (max - min); - dest[id[0]] = sat; - dest[id[2]] = 0; - } - else - { - dest[0] = dest[1] = dest[2] = 0; - } -} - -/* - * Hue: - * B(Cb, Cs) = set_lum (set_sat (Cs, SAT (Cb)), LUM (Cb)) - */ -static inline void -blend_hsl_hue (uint64_t c[3], - uint64_t dc[3], - uint64_t da, - uint64_t sc[3], - uint64_t sa) -{ - c[0] = sc[0] * da; - c[1] = sc[1] * da; - c[2] = sc[2] * da; - set_sat (c, c, SAT (dc) * sa); - set_lum (c, c, sa * da, LUM (dc) * sa); -} - -PDF_NON_SEPARABLE_BLEND_MODE (hsl_hue) - -/* - * Saturation: - * B(Cb, Cs) = set_lum (set_sat (Cb, SAT (Cs)), LUM (Cb)) - */ -static inline void -blend_hsl_saturation (uint64_t c[3], - uint64_t dc[3], - uint64_t da, - uint64_t sc[3], - uint64_t sa) -{ - c[0] = dc[0] * sa; - c[1] = dc[1] * sa; - c[2] = dc[2] * sa; - set_sat (c, c, SAT (sc) * da); - set_lum (c, c, sa * da, LUM (dc) * sa); -} - -PDF_NON_SEPARABLE_BLEND_MODE (hsl_saturation) - -/* - * Color: - * B(Cb, Cs) = set_lum (Cs, LUM (Cb)) - */ -static inline void -blend_hsl_color (uint64_t c[3], - uint64_t dc[3], - uint64_t da, - uint64_t sc[3], - uint64_t sa) -{ - c[0] = sc[0] * da; - c[1] = sc[1] * da; - c[2] = sc[2] * da; - set_lum (c, c, sa * da, LUM (dc) * sa); -} - -PDF_NON_SEPARABLE_BLEND_MODE (hsl_color) - -/* - * Luminosity: - * B(Cb, Cs) = set_lum (Cb, LUM (Cs)) - */ -static inline void -blend_hsl_luminosity (uint64_t c[3], - uint64_t dc[3], - uint64_t da, - uint64_t sc[3], - uint64_t sa) -{ - c[0] = dc[0] * sa; - c[1] = dc[1] * sa; - c[2] = dc[2] * sa; - set_lum (c, c, sa * da, LUM (sc) * da); -} - -PDF_NON_SEPARABLE_BLEND_MODE (hsl_luminosity) - -#undef SAT -#undef LUM -#undef CH_MAX -#undef CH_MIN -#undef PDF_NON_SEPARABLE_BLEND_MODE - -/* All of the disjoint/conjoint composing functions - * - * The four entries in the first column indicate what source contributions - * come from each of the four areas of the picture -- areas covered by neither - * A nor B, areas covered only by A, areas covered only by B and finally - * areas covered by both A and B. - * - * Disjoint Conjoint - * Fa Fb Fa Fb - * (0,0,0,0) 0 0 0 0 - * (0,A,0,A) 1 0 1 0 - * (0,0,B,B) 0 1 0 1 - * (0,A,B,A) 1 min((1-a)/b,1) 1 max(1-a/b,0) - * (0,A,B,B) min((1-b)/a,1) 1 max(1-b/a,0) 1 - * (0,0,0,A) max(1-(1-b)/a,0) 0 min(1,b/a) 0 - * (0,0,0,B) 0 max(1-(1-a)/b,0) 0 min(a/b,1) - * (0,A,0,0) min(1,(1-b)/a) 0 max(1-b/a,0) 0 - * (0,0,B,0) 0 min(1,(1-a)/b) 0 max(1-a/b,0) - * (0,0,B,A) max(1-(1-b)/a,0) min(1,(1-a)/b) min(1,b/a) max(1-a/b,0) - * (0,A,0,B) min(1,(1-b)/a) max(1-(1-a)/b,0) max(1-b/a,0) min(1,a/b) - * (0,A,B,0) min(1,(1-b)/a) min(1,(1-a)/b) max(1-b/a,0) max(1-a/b,0) - * - * See http://marc.info/?l=xfree-render&m=99792000027857&w=2 for more - * information about these operators. - */ - -#define COMBINE_A_OUT 1 -#define COMBINE_A_IN 2 -#define COMBINE_B_OUT 4 -#define COMBINE_B_IN 8 - -#define COMBINE_CLEAR 0 -#define COMBINE_A (COMBINE_A_OUT | COMBINE_A_IN) -#define COMBINE_B (COMBINE_B_OUT | COMBINE_B_IN) -#define COMBINE_A_OVER (COMBINE_A_OUT | COMBINE_B_OUT | COMBINE_A_IN) -#define COMBINE_B_OVER (COMBINE_A_OUT | COMBINE_B_OUT | COMBINE_B_IN) -#define COMBINE_A_ATOP (COMBINE_B_OUT | COMBINE_A_IN) -#define COMBINE_B_ATOP (COMBINE_A_OUT | COMBINE_B_IN) -#define COMBINE_XOR (COMBINE_A_OUT | COMBINE_B_OUT) - -/* portion covered by a but not b */ -static uint16_t -combine_disjoint_out_part (uint16_t a, uint16_t b) -{ - /* min (1, (1-b) / a) */ - - b = ~b; /* 1 - b */ - if (b >= a) /* 1 - b >= a -> (1-b)/a >= 1 */ - return MASK; /* 1 */ - return DIV_UN16 (b, a); /* (1-b) / a */ -} - -/* portion covered by both a and b */ -static uint16_t -combine_disjoint_in_part (uint16_t a, uint16_t b) -{ - /* max (1-(1-b)/a,0) */ - /* = - min ((1-b)/a - 1, 0) */ - /* = 1 - min (1, (1-b)/a) */ - - b = ~b; /* 1 - b */ - if (b >= a) /* 1 - b >= a -> (1-b)/a >= 1 */ - return 0; /* 1 - 1 */ - return ~DIV_UN16(b, a); /* 1 - (1-b) / a */ -} - -/* portion covered by a but not b */ -static uint16_t -combine_conjoint_out_part (uint16_t a, uint16_t b) -{ - /* max (1-b/a,0) */ - /* = 1-min(b/a,1) */ - - /* min (1, (1-b) / a) */ - - if (b >= a) /* b >= a -> b/a >= 1 */ - return 0x00; /* 0 */ - return ~DIV_UN16(b, a); /* 1 - b/a */ -} - -/* portion covered by both a and b */ -static uint16_t -combine_conjoint_in_part (uint16_t a, uint16_t b) -{ - /* min (1,b/a) */ - - if (b >= a) /* b >= a -> b/a >= 1 */ - return MASK; /* 1 */ - return DIV_UN16 (b, a); /* b/a */ -} - -#define GET_COMP(v, i) ((uint32_t) (uint16_t) ((v) >> i)) - -#define ADD(x, y, i, t) \ - ((t) = GET_COMP (x, i) + GET_COMP (y, i), \ - (uint64_t) ((uint16_t) ((t) | (0 - ((t) >> G_SHIFT)))) << (i)) - -#define GENERIC(x, y, i, ax, ay, t, u, v) \ - ((t) = (MUL_UN16 (GET_COMP (y, i), ay, (u)) + \ - MUL_UN16 (GET_COMP (x, i), ax, (v))), \ - (uint64_t) ((uint16_t) ((t) | \ - (0 - ((t) >> G_SHIFT)))) << (i)) - -static void -combine_disjoint_general_u (uint64_t * dest, - const uint64_t *src, - const uint64_t *mask, - int width, - uint16_t combine) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint64_t s = combine_mask (src, mask, i); - uint64_t d = *(dest + i); - uint64_t m, n, o, p; - uint32_t Fa, Fb, t, u, v; - uint16_t sa = s >> A_SHIFT; - uint16_t da = d >> A_SHIFT; - - switch (combine & COMBINE_A) - { - default: - Fa = 0; - break; - - case COMBINE_A_OUT: - Fa = combine_disjoint_out_part (sa, da); - break; - - case COMBINE_A_IN: - Fa = combine_disjoint_in_part (sa, da); - break; - - case COMBINE_A: - Fa = MASK; - break; - } - - switch (combine & COMBINE_B) - { - default: - Fb = 0; - break; - - case COMBINE_B_OUT: - Fb = combine_disjoint_out_part (da, sa); - break; - - case COMBINE_B_IN: - Fb = combine_disjoint_in_part (da, sa); - break; - - case COMBINE_B: - Fb = MASK; - break; - } - m = GENERIC (s, d, 0, Fa, Fb, t, u, v); - n = GENERIC (s, d, G_SHIFT, Fa, Fb, t, u, v); - o = GENERIC (s, d, R_SHIFT, Fa, Fb, t, u, v); - p = GENERIC (s, d, A_SHIFT, Fa, Fb, t, u, v); - s = m | n | o | p; - *(dest + i) = s; - } -} - -static void -combine_disjoint_over_u (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint64_t s = combine_mask (src, mask, i); - uint32_t a = s >> A_SHIFT; - - if (s != 0x00) - { - uint64_t d = *(dest + i); - a = combine_disjoint_out_part (d >> A_SHIFT, a); - UN16x4_MUL_UN16_ADD_UN16x4 (d, a, s); - - *(dest + i) = d; - } - } -} - -static void -combine_disjoint_in_u (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - combine_disjoint_general_u (dest, src, mask, width, COMBINE_A_IN); -} - -static void -combine_disjoint_in_reverse_u (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - combine_disjoint_general_u (dest, src, mask, width, COMBINE_B_IN); -} - -static void -combine_disjoint_out_u (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - combine_disjoint_general_u (dest, src, mask, width, COMBINE_A_OUT); -} - -static void -combine_disjoint_out_reverse_u (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - combine_disjoint_general_u (dest, src, mask, width, COMBINE_B_OUT); -} - -static void -combine_disjoint_atop_u (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - combine_disjoint_general_u (dest, src, mask, width, COMBINE_A_ATOP); -} - -static void -combine_disjoint_atop_reverse_u (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - combine_disjoint_general_u (dest, src, mask, width, COMBINE_B_ATOP); -} - -static void -combine_disjoint_xor_u (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - combine_disjoint_general_u (dest, src, mask, width, COMBINE_XOR); -} - -static void -combine_conjoint_general_u (uint64_t * dest, - const uint64_t *src, - const uint64_t *mask, - int width, - uint16_t combine) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint64_t s = combine_mask (src, mask, i); - uint64_t d = *(dest + i); - uint64_t m, n, o, p; - uint32_t Fa, Fb, t, u, v; - uint16_t sa = s >> A_SHIFT; - uint16_t da = d >> A_SHIFT; - - switch (combine & COMBINE_A) - { - default: - Fa = 0; - break; - - case COMBINE_A_OUT: - Fa = combine_conjoint_out_part (sa, da); - break; - - case COMBINE_A_IN: - Fa = combine_conjoint_in_part (sa, da); - break; - - case COMBINE_A: - Fa = MASK; - break; - } - - switch (combine & COMBINE_B) - { - default: - Fb = 0; - break; - - case COMBINE_B_OUT: - Fb = combine_conjoint_out_part (da, sa); - break; - - case COMBINE_B_IN: - Fb = combine_conjoint_in_part (da, sa); - break; - - case COMBINE_B: - Fb = MASK; - break; - } - - m = GENERIC (s, d, 0, Fa, Fb, t, u, v); - n = GENERIC (s, d, G_SHIFT, Fa, Fb, t, u, v); - o = GENERIC (s, d, R_SHIFT, Fa, Fb, t, u, v); - p = GENERIC (s, d, A_SHIFT, Fa, Fb, t, u, v); - - s = m | n | o | p; - - *(dest + i) = s; - } -} - -static void -combine_conjoint_over_u (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - combine_conjoint_general_u (dest, src, mask, width, COMBINE_A_OVER); -} - -static void -combine_conjoint_over_reverse_u (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - combine_conjoint_general_u (dest, src, mask, width, COMBINE_B_OVER); -} - -static void -combine_conjoint_in_u (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - combine_conjoint_general_u (dest, src, mask, width, COMBINE_A_IN); -} - -static void -combine_conjoint_in_reverse_u (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - combine_conjoint_general_u (dest, src, mask, width, COMBINE_B_IN); -} - -static void -combine_conjoint_out_u (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - combine_conjoint_general_u (dest, src, mask, width, COMBINE_A_OUT); -} - -static void -combine_conjoint_out_reverse_u (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - combine_conjoint_general_u (dest, src, mask, width, COMBINE_B_OUT); -} - -static void -combine_conjoint_atop_u (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - combine_conjoint_general_u (dest, src, mask, width, COMBINE_A_ATOP); -} - -static void -combine_conjoint_atop_reverse_u (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - combine_conjoint_general_u (dest, src, mask, width, COMBINE_B_ATOP); -} - -static void -combine_conjoint_xor_u (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - combine_conjoint_general_u (dest, src, mask, width, COMBINE_XOR); -} - -/************************************************************************/ -/*********************** Per Channel functions **************************/ -/************************************************************************/ - -static void -combine_clear_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - memset (dest, 0, width * sizeof(uint64_t)); -} - -static void -combine_src_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint64_t s = *(src + i); - uint64_t m = *(mask + i); - - combine_mask_value_ca (&s, &m); - - *(dest + i) = s; - } -} - -static void -combine_over_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint64_t s = *(src + i); - uint64_t m = *(mask + i); - uint64_t a; - - combine_mask_ca (&s, &m); - - a = ~m; - if (a) - { - uint64_t d = *(dest + i); - UN16x4_MUL_UN16x4_ADD_UN16x4 (d, a, s); - s = d; - } - - *(dest + i) = s; - } -} - -static void -combine_over_reverse_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint64_t d = *(dest + i); - uint64_t a = ~d >> A_SHIFT; - - if (a) - { - uint64_t s = *(src + i); - uint64_t m = *(mask + i); - - UN16x4_MUL_UN16x4 (s, m); - UN16x4_MUL_UN16_ADD_UN16x4 (s, a, d); - - *(dest + i) = s; - } - } -} - -static void -combine_in_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint64_t d = *(dest + i); - uint32_t a = d >> A_SHIFT; - uint64_t s = 0; - - if (a) - { - uint64_t m = *(mask + i); - - s = *(src + i); - combine_mask_value_ca (&s, &m); - - if (a != MASK) - UN16x4_MUL_UN16 (s, a); - } - - *(dest + i) = s; - } -} - -static void -combine_in_reverse_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint64_t s = *(src + i); - uint64_t m = *(mask + i); - uint64_t a; - - combine_mask_alpha_ca (&s, &m); - - a = m; - if (a != ~0) - { - uint64_t d = 0; - - if (a) - { - d = *(dest + i); - UN16x4_MUL_UN16x4 (d, a); - } - - *(dest + i) = d; - } - } -} - -static void -combine_out_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint64_t d = *(dest + i); - uint32_t a = ~d >> A_SHIFT; - uint64_t s = 0; - - if (a) - { - uint64_t m = *(mask + i); - - s = *(src + i); - combine_mask_value_ca (&s, &m); - - if (a != MASK) - UN16x4_MUL_UN16 (s, a); - } - - *(dest + i) = s; - } -} - -static void -combine_out_reverse_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint64_t s = *(src + i); - uint64_t m = *(mask + i); - uint64_t a; - - combine_mask_alpha_ca (&s, &m); - - a = ~m; - if (a != ~0) - { - uint64_t d = 0; - - if (a) - { - d = *(dest + i); - UN16x4_MUL_UN16x4 (d, a); - } - - *(dest + i) = d; - } - } -} - -static void -combine_atop_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint64_t d = *(dest + i); - uint64_t s = *(src + i); - uint64_t m = *(mask + i); - uint64_t ad; - uint32_t as = d >> A_SHIFT; - - combine_mask_ca (&s, &m); - - ad = ~m; - - UN16x4_MUL_UN16x4_ADD_UN16x4_MUL_UN16 (d, ad, s, as); - - *(dest + i) = d; - } -} - -static void -combine_atop_reverse_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint64_t d = *(dest + i); - uint64_t s = *(src + i); - uint64_t m = *(mask + i); - uint64_t ad; - uint32_t as = ~d >> A_SHIFT; - - combine_mask_ca (&s, &m); - - ad = m; - - UN16x4_MUL_UN16x4_ADD_UN16x4_MUL_UN16 (d, ad, s, as); - - *(dest + i) = d; - } -} - -static void -combine_xor_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint64_t d = *(dest + i); - uint64_t s = *(src + i); - uint64_t m = *(mask + i); - uint64_t ad; - uint32_t as = ~d >> A_SHIFT; - - combine_mask_ca (&s, &m); - - ad = ~m; - - UN16x4_MUL_UN16x4_ADD_UN16x4_MUL_UN16 (d, ad, s, as); - - *(dest + i) = d; - } -} - -static void -combine_add_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint64_t s = *(src + i); - uint64_t m = *(mask + i); - uint64_t d = *(dest + i); - - combine_mask_value_ca (&s, &m); - - UN16x4_ADD_UN16x4 (d, s); - - *(dest + i) = d; - } -} - -static void -combine_saturate_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint64_t s, d; - uint32_t sa, sr, sg, sb, da; - uint32_t t, u, v; - uint64_t m, n, o, p; - - d = *(dest + i); - s = *(src + i); - m = *(mask + i); - - combine_mask_ca (&s, &m); - - sa = (m >> A_SHIFT); - sr = (m >> R_SHIFT) & MASK; - sg = (m >> G_SHIFT) & MASK; - sb = m & MASK; - da = ~d >> A_SHIFT; - - if (sb <= da) - m = ADD (s, d, 0, t); - else - m = GENERIC (s, d, 0, (da << G_SHIFT) / sb, MASK, t, u, v); - - if (sg <= da) - n = ADD (s, d, G_SHIFT, t); - else - n = GENERIC (s, d, G_SHIFT, (da << G_SHIFT) / sg, MASK, t, u, v); - - if (sr <= da) - o = ADD (s, d, R_SHIFT, t); - else - o = GENERIC (s, d, R_SHIFT, (da << G_SHIFT) / sr, MASK, t, u, v); - - if (sa <= da) - p = ADD (s, d, A_SHIFT, t); - else - p = GENERIC (s, d, A_SHIFT, (da << G_SHIFT) / sa, MASK, t, u, v); - - *(dest + i) = m | n | o | p; - } -} - -static void -combine_disjoint_general_ca (uint64_t * dest, - const uint64_t *src, - const uint64_t *mask, - int width, - uint16_t combine) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint64_t s, d; - uint64_t m, n, o, p; - uint64_t Fa, Fb; - uint32_t t, u, v; - uint64_t sa; - uint16_t da; - - s = *(src + i); - m = *(mask + i); - d = *(dest + i); - da = d >> A_SHIFT; - - combine_mask_ca (&s, &m); - - sa = m; - - switch (combine & COMBINE_A) - { - default: - Fa = 0; - break; - - case COMBINE_A_OUT: - m = (uint64_t)combine_disjoint_out_part ((uint16_t) (sa >> 0), da); - n = (uint64_t)combine_disjoint_out_part ((uint16_t) (sa >> G_SHIFT), da) << G_SHIFT; - o = (uint64_t)combine_disjoint_out_part ((uint16_t) (sa >> R_SHIFT), da) << R_SHIFT; - p = (uint64_t)combine_disjoint_out_part ((uint16_t) (sa >> A_SHIFT), da) << A_SHIFT; - Fa = m | n | o | p; - break; - - case COMBINE_A_IN: - m = (uint64_t)combine_disjoint_in_part ((uint16_t) (sa >> 0), da); - n = (uint64_t)combine_disjoint_in_part ((uint16_t) (sa >> G_SHIFT), da) << G_SHIFT; - o = (uint64_t)combine_disjoint_in_part ((uint16_t) (sa >> R_SHIFT), da) << R_SHIFT; - p = (uint64_t)combine_disjoint_in_part ((uint16_t) (sa >> A_SHIFT), da) << A_SHIFT; - Fa = m | n | o | p; - break; - - case COMBINE_A: - Fa = ~0; - break; - } - - switch (combine & COMBINE_B) - { - default: - Fb = 0; - break; - - case COMBINE_B_OUT: - m = (uint64_t)combine_disjoint_out_part (da, (uint16_t) (sa >> 0)); - n = (uint64_t)combine_disjoint_out_part (da, (uint16_t) (sa >> G_SHIFT)) << G_SHIFT; - o = (uint64_t)combine_disjoint_out_part (da, (uint16_t) (sa >> R_SHIFT)) << R_SHIFT; - p = (uint64_t)combine_disjoint_out_part (da, (uint16_t) (sa >> A_SHIFT)) << A_SHIFT; - Fb = m | n | o | p; - break; - - case COMBINE_B_IN: - m = (uint64_t)combine_disjoint_in_part (da, (uint16_t) (sa >> 0)); - n = (uint64_t)combine_disjoint_in_part (da, (uint16_t) (sa >> G_SHIFT)) << G_SHIFT; - o = (uint64_t)combine_disjoint_in_part (da, (uint16_t) (sa >> R_SHIFT)) << R_SHIFT; - p = (uint64_t)combine_disjoint_in_part (da, (uint16_t) (sa >> A_SHIFT)) << A_SHIFT; - Fb = m | n | o | p; - break; - - case COMBINE_B: - Fb = ~0; - break; - } - m = GENERIC (s, d, 0, GET_COMP (Fa, 0), GET_COMP (Fb, 0), t, u, v); - n = GENERIC (s, d, G_SHIFT, GET_COMP (Fa, G_SHIFT), GET_COMP (Fb, G_SHIFT), t, u, v); - o = GENERIC (s, d, R_SHIFT, GET_COMP (Fa, R_SHIFT), GET_COMP (Fb, R_SHIFT), t, u, v); - p = GENERIC (s, d, A_SHIFT, GET_COMP (Fa, A_SHIFT), GET_COMP (Fb, A_SHIFT), t, u, v); - - s = m | n | o | p; - - *(dest + i) = s; - } -} - -static void -combine_disjoint_over_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - combine_disjoint_general_ca (dest, src, mask, width, COMBINE_A_OVER); -} - -static void -combine_disjoint_in_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - combine_disjoint_general_ca (dest, src, mask, width, COMBINE_A_IN); -} - -static void -combine_disjoint_in_reverse_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - combine_disjoint_general_ca (dest, src, mask, width, COMBINE_B_IN); -} - -static void -combine_disjoint_out_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - combine_disjoint_general_ca (dest, src, mask, width, COMBINE_A_OUT); -} - -static void -combine_disjoint_out_reverse_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - combine_disjoint_general_ca (dest, src, mask, width, COMBINE_B_OUT); -} - -static void -combine_disjoint_atop_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - combine_disjoint_general_ca (dest, src, mask, width, COMBINE_A_ATOP); -} - -static void -combine_disjoint_atop_reverse_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - combine_disjoint_general_ca (dest, src, mask, width, COMBINE_B_ATOP); -} - -static void -combine_disjoint_xor_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - combine_disjoint_general_ca (dest, src, mask, width, COMBINE_XOR); -} - -static void -combine_conjoint_general_ca (uint64_t * dest, - const uint64_t *src, - const uint64_t *mask, - int width, - uint16_t combine) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint64_t s, d; - uint64_t m, n, o, p; - uint64_t Fa, Fb; - uint32_t t, u, v; - uint64_t sa; - uint16_t da; - - s = *(src + i); - m = *(mask + i); - d = *(dest + i); - da = d >> A_SHIFT; - - combine_mask_ca (&s, &m); - - sa = m; - - switch (combine & COMBINE_A) - { - default: - Fa = 0; - break; - - case COMBINE_A_OUT: - m = (uint64_t)combine_conjoint_out_part ((uint16_t) (sa >> 0), da); - n = (uint64_t)combine_conjoint_out_part ((uint16_t) (sa >> G_SHIFT), da) << G_SHIFT; - o = (uint64_t)combine_conjoint_out_part ((uint16_t) (sa >> R_SHIFT), da) << R_SHIFT; - p = (uint64_t)combine_conjoint_out_part ((uint16_t) (sa >> A_SHIFT), da) << A_SHIFT; - Fa = m | n | o | p; - break; - - case COMBINE_A_IN: - m = (uint64_t)combine_conjoint_in_part ((uint16_t) (sa >> 0), da); - n = (uint64_t)combine_conjoint_in_part ((uint16_t) (sa >> G_SHIFT), da) << G_SHIFT; - o = (uint64_t)combine_conjoint_in_part ((uint16_t) (sa >> R_SHIFT), da) << R_SHIFT; - p = (uint64_t)combine_conjoint_in_part ((uint16_t) (sa >> A_SHIFT), da) << A_SHIFT; - Fa = m | n | o | p; - break; - - case COMBINE_A: - Fa = ~0; - break; - } - - switch (combine & COMBINE_B) - { - default: - Fb = 0; - break; - - case COMBINE_B_OUT: - m = (uint64_t)combine_conjoint_out_part (da, (uint16_t) (sa >> 0)); - n = (uint64_t)combine_conjoint_out_part (da, (uint16_t) (sa >> G_SHIFT)) << G_SHIFT; - o = (uint64_t)combine_conjoint_out_part (da, (uint16_t) (sa >> R_SHIFT)) << R_SHIFT; - p = (uint64_t)combine_conjoint_out_part (da, (uint16_t) (sa >> A_SHIFT)) << A_SHIFT; - Fb = m | n | o | p; - break; - - case COMBINE_B_IN: - m = (uint64_t)combine_conjoint_in_part (da, (uint16_t) (sa >> 0)); - n = (uint64_t)combine_conjoint_in_part (da, (uint16_t) (sa >> G_SHIFT)) << G_SHIFT; - o = (uint64_t)combine_conjoint_in_part (da, (uint16_t) (sa >> R_SHIFT)) << R_SHIFT; - p = (uint64_t)combine_conjoint_in_part (da, (uint16_t) (sa >> A_SHIFT)) << A_SHIFT; - Fb = m | n | o | p; - break; - - case COMBINE_B: - Fb = ~0; - break; - } - m = GENERIC (s, d, 0, GET_COMP (Fa, 0), GET_COMP (Fb, 0), t, u, v); - n = GENERIC (s, d, G_SHIFT, GET_COMP (Fa, G_SHIFT), GET_COMP (Fb, G_SHIFT), t, u, v); - o = GENERIC (s, d, R_SHIFT, GET_COMP (Fa, R_SHIFT), GET_COMP (Fb, R_SHIFT), t, u, v); - p = GENERIC (s, d, A_SHIFT, GET_COMP (Fa, A_SHIFT), GET_COMP (Fb, A_SHIFT), t, u, v); - - s = m | n | o | p; - - *(dest + i) = s; - } -} - -static void -combine_conjoint_over_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - combine_conjoint_general_ca (dest, src, mask, width, COMBINE_A_OVER); -} - -static void -combine_conjoint_over_reverse_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - combine_conjoint_general_ca (dest, src, mask, width, COMBINE_B_OVER); -} - -static void -combine_conjoint_in_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - combine_conjoint_general_ca (dest, src, mask, width, COMBINE_A_IN); -} - -static void -combine_conjoint_in_reverse_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - combine_conjoint_general_ca (dest, src, mask, width, COMBINE_B_IN); -} - -static void -combine_conjoint_out_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - combine_conjoint_general_ca (dest, src, mask, width, COMBINE_A_OUT); -} - -static void -combine_conjoint_out_reverse_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - combine_conjoint_general_ca (dest, src, mask, width, COMBINE_B_OUT); -} - -static void -combine_conjoint_atop_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - combine_conjoint_general_ca (dest, src, mask, width, COMBINE_A_ATOP); -} - -static void -combine_conjoint_atop_reverse_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - combine_conjoint_general_ca (dest, src, mask, width, COMBINE_B_ATOP); -} - -static void -combine_conjoint_xor_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - combine_conjoint_general_ca (dest, src, mask, width, COMBINE_XOR); -} - -void -_pixman_setup_combiner_functions_64 (pixman_implementation_t *imp) -{ - /* Unified alpha */ - imp->combine_64[PIXMAN_OP_CLEAR] = combine_clear; - imp->combine_64[PIXMAN_OP_SRC] = combine_src_u; - imp->combine_64[PIXMAN_OP_DST] = combine_dst; - imp->combine_64[PIXMAN_OP_OVER] = combine_over_u; - imp->combine_64[PIXMAN_OP_OVER_REVERSE] = combine_over_reverse_u; - imp->combine_64[PIXMAN_OP_IN] = combine_in_u; - imp->combine_64[PIXMAN_OP_IN_REVERSE] = combine_in_reverse_u; - imp->combine_64[PIXMAN_OP_OUT] = combine_out_u; - imp->combine_64[PIXMAN_OP_OUT_REVERSE] = combine_out_reverse_u; - imp->combine_64[PIXMAN_OP_ATOP] = combine_atop_u; - imp->combine_64[PIXMAN_OP_ATOP_REVERSE] = combine_atop_reverse_u; - imp->combine_64[PIXMAN_OP_XOR] = combine_xor_u; - imp->combine_64[PIXMAN_OP_ADD] = combine_add_u; - imp->combine_64[PIXMAN_OP_SATURATE] = combine_saturate_u; - - /* Disjoint, unified */ - imp->combine_64[PIXMAN_OP_DISJOINT_CLEAR] = combine_clear; - imp->combine_64[PIXMAN_OP_DISJOINT_SRC] = combine_src_u; - imp->combine_64[PIXMAN_OP_DISJOINT_DST] = combine_dst; - imp->combine_64[PIXMAN_OP_DISJOINT_OVER] = combine_disjoint_over_u; - imp->combine_64[PIXMAN_OP_DISJOINT_OVER_REVERSE] = combine_saturate_u; - imp->combine_64[PIXMAN_OP_DISJOINT_IN] = combine_disjoint_in_u; - imp->combine_64[PIXMAN_OP_DISJOINT_IN_REVERSE] = combine_disjoint_in_reverse_u; - imp->combine_64[PIXMAN_OP_DISJOINT_OUT] = combine_disjoint_out_u; - imp->combine_64[PIXMAN_OP_DISJOINT_OUT_REVERSE] = combine_disjoint_out_reverse_u; - imp->combine_64[PIXMAN_OP_DISJOINT_ATOP] = combine_disjoint_atop_u; - imp->combine_64[PIXMAN_OP_DISJOINT_ATOP_REVERSE] = combine_disjoint_atop_reverse_u; - imp->combine_64[PIXMAN_OP_DISJOINT_XOR] = combine_disjoint_xor_u; - - /* Conjoint, unified */ - imp->combine_64[PIXMAN_OP_CONJOINT_CLEAR] = combine_clear; - imp->combine_64[PIXMAN_OP_CONJOINT_SRC] = combine_src_u; - imp->combine_64[PIXMAN_OP_CONJOINT_DST] = combine_dst; - imp->combine_64[PIXMAN_OP_CONJOINT_OVER] = combine_conjoint_over_u; - imp->combine_64[PIXMAN_OP_CONJOINT_OVER_REVERSE] = combine_conjoint_over_reverse_u; - imp->combine_64[PIXMAN_OP_CONJOINT_IN] = combine_conjoint_in_u; - imp->combine_64[PIXMAN_OP_CONJOINT_IN_REVERSE] = combine_conjoint_in_reverse_u; - imp->combine_64[PIXMAN_OP_CONJOINT_OUT] = combine_conjoint_out_u; - imp->combine_64[PIXMAN_OP_CONJOINT_OUT_REVERSE] = combine_conjoint_out_reverse_u; - imp->combine_64[PIXMAN_OP_CONJOINT_ATOP] = combine_conjoint_atop_u; - imp->combine_64[PIXMAN_OP_CONJOINT_ATOP_REVERSE] = combine_conjoint_atop_reverse_u; - imp->combine_64[PIXMAN_OP_CONJOINT_XOR] = combine_conjoint_xor_u; - - imp->combine_64[PIXMAN_OP_MULTIPLY] = combine_multiply_u; - imp->combine_64[PIXMAN_OP_SCREEN] = combine_screen_u; - imp->combine_64[PIXMAN_OP_OVERLAY] = combine_overlay_u; - imp->combine_64[PIXMAN_OP_DARKEN] = combine_darken_u; - imp->combine_64[PIXMAN_OP_LIGHTEN] = combine_lighten_u; - imp->combine_64[PIXMAN_OP_COLOR_DODGE] = combine_color_dodge_u; - imp->combine_64[PIXMAN_OP_COLOR_BURN] = combine_color_burn_u; - imp->combine_64[PIXMAN_OP_HARD_LIGHT] = combine_hard_light_u; - imp->combine_64[PIXMAN_OP_SOFT_LIGHT] = combine_soft_light_u; - imp->combine_64[PIXMAN_OP_DIFFERENCE] = combine_difference_u; - imp->combine_64[PIXMAN_OP_EXCLUSION] = combine_exclusion_u; - imp->combine_64[PIXMAN_OP_HSL_HUE] = combine_hsl_hue_u; - imp->combine_64[PIXMAN_OP_HSL_SATURATION] = combine_hsl_saturation_u; - imp->combine_64[PIXMAN_OP_HSL_COLOR] = combine_hsl_color_u; - imp->combine_64[PIXMAN_OP_HSL_LUMINOSITY] = combine_hsl_luminosity_u; - - /* Component alpha combiners */ - imp->combine_64_ca[PIXMAN_OP_CLEAR] = combine_clear_ca; - imp->combine_64_ca[PIXMAN_OP_SRC] = combine_src_ca; - /* dest */ - imp->combine_64_ca[PIXMAN_OP_OVER] = combine_over_ca; - imp->combine_64_ca[PIXMAN_OP_OVER_REVERSE] = combine_over_reverse_ca; - imp->combine_64_ca[PIXMAN_OP_IN] = combine_in_ca; - imp->combine_64_ca[PIXMAN_OP_IN_REVERSE] = combine_in_reverse_ca; - imp->combine_64_ca[PIXMAN_OP_OUT] = combine_out_ca; - imp->combine_64_ca[PIXMAN_OP_OUT_REVERSE] = combine_out_reverse_ca; - imp->combine_64_ca[PIXMAN_OP_ATOP] = combine_atop_ca; - imp->combine_64_ca[PIXMAN_OP_ATOP_REVERSE] = combine_atop_reverse_ca; - imp->combine_64_ca[PIXMAN_OP_XOR] = combine_xor_ca; - imp->combine_64_ca[PIXMAN_OP_ADD] = combine_add_ca; - imp->combine_64_ca[PIXMAN_OP_SATURATE] = combine_saturate_ca; - - /* Disjoint CA */ - imp->combine_64_ca[PIXMAN_OP_DISJOINT_CLEAR] = combine_clear_ca; - imp->combine_64_ca[PIXMAN_OP_DISJOINT_SRC] = combine_src_ca; - imp->combine_64_ca[PIXMAN_OP_DISJOINT_DST] = combine_dst; - imp->combine_64_ca[PIXMAN_OP_DISJOINT_OVER] = combine_disjoint_over_ca; - imp->combine_64_ca[PIXMAN_OP_DISJOINT_OVER_REVERSE] = combine_saturate_ca; - imp->combine_64_ca[PIXMAN_OP_DISJOINT_IN] = combine_disjoint_in_ca; - imp->combine_64_ca[PIXMAN_OP_DISJOINT_IN_REVERSE] = combine_disjoint_in_reverse_ca; - imp->combine_64_ca[PIXMAN_OP_DISJOINT_OUT] = combine_disjoint_out_ca; - imp->combine_64_ca[PIXMAN_OP_DISJOINT_OUT_REVERSE] = combine_disjoint_out_reverse_ca; - imp->combine_64_ca[PIXMAN_OP_DISJOINT_ATOP] = combine_disjoint_atop_ca; - imp->combine_64_ca[PIXMAN_OP_DISJOINT_ATOP_REVERSE] = combine_disjoint_atop_reverse_ca; - imp->combine_64_ca[PIXMAN_OP_DISJOINT_XOR] = combine_disjoint_xor_ca; - - /* Conjoint CA */ - imp->combine_64_ca[PIXMAN_OP_CONJOINT_CLEAR] = combine_clear_ca; - imp->combine_64_ca[PIXMAN_OP_CONJOINT_SRC] = combine_src_ca; - imp->combine_64_ca[PIXMAN_OP_CONJOINT_DST] = combine_dst; - imp->combine_64_ca[PIXMAN_OP_CONJOINT_OVER] = combine_conjoint_over_ca; - imp->combine_64_ca[PIXMAN_OP_CONJOINT_OVER_REVERSE] = combine_conjoint_over_reverse_ca; - imp->combine_64_ca[PIXMAN_OP_CONJOINT_IN] = combine_conjoint_in_ca; - imp->combine_64_ca[PIXMAN_OP_CONJOINT_IN_REVERSE] = combine_conjoint_in_reverse_ca; - imp->combine_64_ca[PIXMAN_OP_CONJOINT_OUT] = combine_conjoint_out_ca; - imp->combine_64_ca[PIXMAN_OP_CONJOINT_OUT_REVERSE] = combine_conjoint_out_reverse_ca; - imp->combine_64_ca[PIXMAN_OP_CONJOINT_ATOP] = combine_conjoint_atop_ca; - imp->combine_64_ca[PIXMAN_OP_CONJOINT_ATOP_REVERSE] = combine_conjoint_atop_reverse_ca; - imp->combine_64_ca[PIXMAN_OP_CONJOINT_XOR] = combine_conjoint_xor_ca; - - imp->combine_64_ca[PIXMAN_OP_MULTIPLY] = combine_multiply_ca; - imp->combine_64_ca[PIXMAN_OP_SCREEN] = combine_screen_ca; - imp->combine_64_ca[PIXMAN_OP_OVERLAY] = combine_overlay_ca; - imp->combine_64_ca[PIXMAN_OP_DARKEN] = combine_darken_ca; - imp->combine_64_ca[PIXMAN_OP_LIGHTEN] = combine_lighten_ca; - imp->combine_64_ca[PIXMAN_OP_COLOR_DODGE] = combine_color_dodge_ca; - imp->combine_64_ca[PIXMAN_OP_COLOR_BURN] = combine_color_burn_ca; - imp->combine_64_ca[PIXMAN_OP_HARD_LIGHT] = combine_hard_light_ca; - imp->combine_64_ca[PIXMAN_OP_SOFT_LIGHT] = combine_soft_light_ca; - imp->combine_64_ca[PIXMAN_OP_DIFFERENCE] = combine_difference_ca; - imp->combine_64_ca[PIXMAN_OP_EXCLUSION] = combine_exclusion_ca; - - /* It is not clear that these make sense, so make them noops for now */ - imp->combine_64_ca[PIXMAN_OP_HSL_HUE] = combine_dst; - imp->combine_64_ca[PIXMAN_OP_HSL_SATURATION] = combine_dst; - imp->combine_64_ca[PIXMAN_OP_HSL_COLOR] = combine_dst; - imp->combine_64_ca[PIXMAN_OP_HSL_LUMINOSITY] = combine_dst; -} - diff --git a/gfx/cairo/libpixman/src/pixman-combine64.h b/gfx/cairo/libpixman/src/pixman-combine64.h deleted file mode 100644 index 00413a85f6..0000000000 --- a/gfx/cairo/libpixman/src/pixman-combine64.h +++ /dev/null @@ -1,230 +0,0 @@ -/* WARNING: This file is generated by combine.pl from combine.inc. - Please edit one of those files rather than this one. */ - -#line 1 "pixman-combine.c.template" - -#define COMPONENT_SIZE 16 -#define MASK 0xffffULL -#define ONE_HALF 0x8000ULL - -#define A_SHIFT 16 * 3 -#define R_SHIFT 16 * 2 -#define G_SHIFT 16 -#define A_MASK 0xffff000000000000ULL -#define R_MASK 0xffff00000000ULL -#define G_MASK 0xffff0000ULL - -#define RB_MASK 0xffff0000ffffULL -#define AG_MASK 0xffff0000ffff0000ULL -#define RB_ONE_HALF 0x800000008000ULL -#define RB_MASK_PLUS_ONE 0x10000000010000ULL - -#define ALPHA_16(x) ((x) >> A_SHIFT) -#define RED_16(x) (((x) >> R_SHIFT) & MASK) -#define GREEN_16(x) (((x) >> G_SHIFT) & MASK) -#define BLUE_16(x) ((x) & MASK) - -/* - * Helper macros. - */ - -#define MUL_UN16(a, b, t) \ - ((t) = (a) * (uint32_t)(b) + ONE_HALF, ((((t) >> G_SHIFT ) + (t) ) >> G_SHIFT )) - -#define DIV_UN16(a, b) \ - (((uint32_t) (a) * MASK + ((b) / 2)) / (b)) - -#define ADD_UN16(x, y, t) \ - ((t) = (x) + (y), \ - (uint64_t) (uint16_t) ((t) | (0 - ((t) >> G_SHIFT)))) - -#define DIV_ONE_UN16(x) \ - (((x) + ONE_HALF + (((x) + ONE_HALF) >> G_SHIFT)) >> G_SHIFT) - -/* - * The methods below use some tricks to be able to do two color - * components at the same time. - */ - -/* - * x_rb = (x_rb * a) / 255 - */ -#define UN16_rb_MUL_UN16(x, a, t) \ - do \ - { \ - t = ((x) & RB_MASK) * (a); \ - t += RB_ONE_HALF; \ - x = (t + ((t >> G_SHIFT) & RB_MASK)) >> G_SHIFT; \ - x &= RB_MASK; \ - } while (0) - -/* - * x_rb = min (x_rb + y_rb, 255) - */ -#define UN16_rb_ADD_UN16_rb(x, y, t) \ - do \ - { \ - t = ((x) + (y)); \ - t |= RB_MASK_PLUS_ONE - ((t >> G_SHIFT) & RB_MASK); \ - x = (t & RB_MASK); \ - } while (0) - -/* - * x_rb = (x_rb * a_rb) / 255 - */ -#define UN16_rb_MUL_UN16_rb(x, a, t) \ - do \ - { \ - t = (x & MASK) * (a & MASK); \ - t |= (x & R_MASK) * ((a >> R_SHIFT) & MASK); \ - t += RB_ONE_HALF; \ - t = (t + ((t >> G_SHIFT) & RB_MASK)) >> G_SHIFT; \ - x = t & RB_MASK; \ - } while (0) - -/* - * x_c = (x_c * a) / 255 - */ -#define UN16x4_MUL_UN16(x, a) \ - do \ - { \ - uint64_t r1__, r2__, t__; \ - \ - r1__ = (x); \ - UN16_rb_MUL_UN16 (r1__, (a), t__); \ - \ - r2__ = (x) >> G_SHIFT; \ - UN16_rb_MUL_UN16 (r2__, (a), t__); \ - \ - (x) = r1__ | (r2__ << G_SHIFT); \ - } while (0) - -/* - * x_c = (x_c * a) / 255 + y_c - */ -#define UN16x4_MUL_UN16_ADD_UN16x4(x, a, y) \ - do \ - { \ - uint64_t r1__, r2__, r3__, t__; \ - \ - r1__ = (x); \ - r2__ = (y) & RB_MASK; \ - UN16_rb_MUL_UN16 (r1__, (a), t__); \ - UN16_rb_ADD_UN16_rb (r1__, r2__, t__); \ - \ - r2__ = (x) >> G_SHIFT; \ - r3__ = ((y) >> G_SHIFT) & RB_MASK; \ - UN16_rb_MUL_UN16 (r2__, (a), t__); \ - UN16_rb_ADD_UN16_rb (r2__, r3__, t__); \ - \ - (x) = r1__ | (r2__ << G_SHIFT); \ - } while (0) - -/* - * x_c = (x_c * a + y_c * b) / 255 - */ -#define UN16x4_MUL_UN16_ADD_UN16x4_MUL_UN16(x, a, y, b) \ - do \ - { \ - uint64_t r1__, r2__, r3__, t__; \ - \ - r1__ = (x); \ - r2__ = (y); \ - UN16_rb_MUL_UN16 (r1__, (a), t__); \ - UN16_rb_MUL_UN16 (r2__, (b), t__); \ - UN16_rb_ADD_UN16_rb (r1__, r2__, t__); \ - \ - r2__ = ((x) >> G_SHIFT); \ - r3__ = ((y) >> G_SHIFT); \ - UN16_rb_MUL_UN16 (r2__, (a), t__); \ - UN16_rb_MUL_UN16 (r3__, (b), t__); \ - UN16_rb_ADD_UN16_rb (r2__, r3__, t__); \ - \ - (x) = r1__ | (r2__ << G_SHIFT); \ - } while (0) - -/* - * x_c = (x_c * a_c) / 255 - */ -#define UN16x4_MUL_UN16x4(x, a) \ - do \ - { \ - uint64_t r1__, r2__, r3__, t__; \ - \ - r1__ = (x); \ - r2__ = (a); \ - UN16_rb_MUL_UN16_rb (r1__, r2__, t__); \ - \ - r2__ = (x) >> G_SHIFT; \ - r3__ = (a) >> G_SHIFT; \ - UN16_rb_MUL_UN16_rb (r2__, r3__, t__); \ - \ - (x) = r1__ | (r2__ << G_SHIFT); \ - } while (0) - -/* - * x_c = (x_c * a_c) / 255 + y_c - */ -#define UN16x4_MUL_UN16x4_ADD_UN16x4(x, a, y) \ - do \ - { \ - uint64_t r1__, r2__, r3__, t__; \ - \ - r1__ = (x); \ - r2__ = (a); \ - UN16_rb_MUL_UN16_rb (r1__, r2__, t__); \ - r2__ = (y) & RB_MASK; \ - UN16_rb_ADD_UN16_rb (r1__, r2__, t__); \ - \ - r2__ = ((x) >> G_SHIFT); \ - r3__ = ((a) >> G_SHIFT); \ - UN16_rb_MUL_UN16_rb (r2__, r3__, t__); \ - r3__ = ((y) >> G_SHIFT) & RB_MASK; \ - UN16_rb_ADD_UN16_rb (r2__, r3__, t__); \ - \ - (x) = r1__ | (r2__ << G_SHIFT); \ - } while (0) - -/* - * x_c = (x_c * a_c + y_c * b) / 255 - */ -#define UN16x4_MUL_UN16x4_ADD_UN16x4_MUL_UN16(x, a, y, b) \ - do \ - { \ - uint64_t r1__, r2__, r3__, t__; \ - \ - r1__ = (x); \ - r2__ = (a); \ - UN16_rb_MUL_UN16_rb (r1__, r2__, t__); \ - r2__ = (y); \ - UN16_rb_MUL_UN16 (r2__, (b), t__); \ - UN16_rb_ADD_UN16_rb (r1__, r2__, t__); \ - \ - r2__ = (x) >> G_SHIFT; \ - r3__ = (a) >> G_SHIFT; \ - UN16_rb_MUL_UN16_rb (r2__, r3__, t__); \ - r3__ = (y) >> G_SHIFT; \ - UN16_rb_MUL_UN16 (r3__, (b), t__); \ - UN16_rb_ADD_UN16_rb (r2__, r3__, t__); \ - \ - x = r1__ | (r2__ << G_SHIFT); \ - } while (0) - -/* - x_c = min(x_c + y_c, 255) -*/ -#define UN16x4_ADD_UN16x4(x, y) \ - do \ - { \ - uint64_t r1__, r2__, r3__, t__; \ - \ - r1__ = (x) & RB_MASK; \ - r2__ = (y) & RB_MASK; \ - UN16_rb_ADD_UN16_rb (r1__, r2__, t__); \ - \ - r2__ = ((x) >> G_SHIFT) & RB_MASK; \ - r3__ = ((y) >> G_SHIFT) & RB_MASK; \ - UN16_rb_ADD_UN16_rb (r2__, r3__, t__); \ - \ - x = r1__ | (r2__ << G_SHIFT); \ - } while (0) diff --git a/gfx/cairo/libpixman/src/pixman-compiler.h b/gfx/cairo/libpixman/src/pixman-compiler.h index fb674c5f08..d852f93798 100644 --- a/gfx/cairo/libpixman/src/pixman-compiler.h +++ b/gfx/cairo/libpixman/src/pixman-compiler.h @@ -19,6 +19,12 @@ #endif #if defined (__GNUC__) +# define unlikely(expr) __builtin_expect ((expr), 0) +#else +# define unlikely(expr) (expr) +#endif + +#if defined (__GNUC__) # define MAYBE_UNUSED __attribute__((unused)) #else # define MAYBE_UNUSED @@ -106,6 +112,8 @@ /* Sun Studio 8 visibility */ #elif defined(__SUNPRO_C) && (__SUNPRO_C >= 0x550) # define PIXMAN_EXPORT __global +#elif defined (_MSC_VER) || defined(__MINGW32__) +# define PIXMAN_EXPORT PIXMAN_API #else # define PIXMAN_EXPORT #endif @@ -131,12 +139,10 @@ # define PIXMAN_GET_THREAD_LOCAL(name) \ (&name) -#elif defined(__MINGW32__) || defined(PIXMAN_USE_XP_DLL_TLS_WORKAROUND) +#elif defined(__MINGW32__) # define _NO_W32_PSEUDO_MODIFIERS # include <windows.h> -#undef IN -#undef OUT # define PIXMAN_DEFINE_THREAD_LOCAL(type, name) \ static volatile int tls_ ## name ## _initialized = 0; \ @@ -193,7 +199,7 @@ # define PIXMAN_GET_THREAD_LOCAL(name) \ (&name) -#elif defined(HAVE_PTHREAD_SETSPECIFIC) +#elif defined(HAVE_PTHREADS) #include <pthread.h> diff --git a/gfx/cairo/libpixman/src/pixman-conical-gradient.c b/gfx/cairo/libpixman/src/pixman-conical-gradient.c index 8bb46aecdc..a39e20c4eb 100644 --- a/gfx/cairo/libpixman/src/pixman-conical-gradient.c +++ b/gfx/cairo/libpixman/src/pixman-conical-gradient.c @@ -51,7 +51,10 @@ coordinates_to_parameter (double x, double y, double angle) } static uint32_t * -conical_get_scanline_narrow (pixman_iter_t *iter, const uint32_t *mask) +conical_get_scanline (pixman_iter_t *iter, + const uint32_t *mask, + int Bpp, + pixman_gradient_walker_write_t write_pixel) { pixman_image_t *image = iter->image; int x = iter->x; @@ -61,7 +64,7 @@ conical_get_scanline_narrow (pixman_iter_t *iter, const uint32_t *mask) gradient_t *gradient = (gradient_t *)image; conical_gradient_t *conical = (conical_gradient_t *)image; - uint32_t *end = buffer + width; + uint32_t *end = buffer + width * (Bpp / 4); pixman_gradient_walker_t walker; pixman_bool_t affine = TRUE; double cx = 1.; @@ -109,11 +112,12 @@ conical_get_scanline_narrow (pixman_iter_t *iter, const uint32_t *mask) { double t = coordinates_to_parameter (rx, ry, conical->angle); - *buffer = _pixman_gradient_walker_pixel ( - &walker, (pixman_fixed_48_16_t)pixman_double_to_fixed (t)); + write_pixel (&walker, + (pixman_fixed_48_16_t)pixman_double_to_fixed (t), + buffer); } - ++buffer; + buffer += (Bpp / 4); rx += cx; ry += cy; @@ -144,11 +148,12 @@ conical_get_scanline_narrow (pixman_iter_t *iter, const uint32_t *mask) t = coordinates_to_parameter (x, y, conical->angle); - *buffer = _pixman_gradient_walker_pixel ( - &walker, (pixman_fixed_48_16_t)pixman_double_to_fixed (t)); + write_pixel (&walker, + (pixman_fixed_48_16_t)pixman_double_to_fixed (t), + buffer); } - ++buffer; + buffer += (Bpp / 4); rx += cx; ry += cy; @@ -161,14 +166,17 @@ conical_get_scanline_narrow (pixman_iter_t *iter, const uint32_t *mask) } static uint32_t * -conical_get_scanline_wide (pixman_iter_t *iter, const uint32_t *mask) +conical_get_scanline_narrow (pixman_iter_t *iter, const uint32_t *mask) { - uint32_t *buffer = conical_get_scanline_narrow (iter, NULL); - - pixman_expand_to_float ( - (argb_t *)buffer, buffer, PIXMAN_a8r8g8b8, iter->width); + return conical_get_scanline (iter, mask, 4, + _pixman_gradient_walker_write_narrow); +} - return buffer; +static uint32_t * +conical_get_scanline_wide (pixman_iter_t *iter, const uint32_t *mask) +{ + return conical_get_scanline (iter, NULL, 16, + _pixman_gradient_walker_write_wide); } void diff --git a/gfx/cairo/libpixman/src/pixman-cpu.c b/gfx/cairo/libpixman/src/pixman-cpu.c deleted file mode 100644 index 30f5fba8a4..0000000000 --- a/gfx/cairo/libpixman/src/pixman-cpu.c +++ /dev/null @@ -1,765 +0,0 @@ -/* - * Copyright © 2000 SuSE, Inc. - * Copyright © 2007 Red Hat, Inc. - * Copyright © 2021 Moonchild Productions - * - * Permission to use, copy, modify, distribute, and sell this software and its - * documentation for any purpose is hereby granted without fee, provided that - * the above copyright notice appear in all copies and that both that - * copyright notice and this permission notice appear in supporting - * documentation, and that the names of the authors not be used in advertising or - * publicity pertaining to distribution of the software without specific, - * written prior permission. The authors make no representations about the - * suitability of this software for any purpose. It is provided "as is" - * without express or implied warranty. - * - * THE AUTHORS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL THE AUTHORS - * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION - * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN - * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ -#ifdef HAVE_CONFIG_H -#include <config.h> -#endif - -#include <string.h> -#include <stdlib.h> - -#if defined(USE_ARM_SIMD) && defined(_MSC_VER) -/* Needed for EXCEPTION_ILLEGAL_INSTRUCTION */ -#include <windows.h> -#endif - -#if defined(__APPLE__) -#include "TargetConditionals.h" -#endif - -#include "pixman-private.h" - -#ifdef USE_VMX - -/* The CPU detection code needs to be in a file not compiled with - * "-maltivec -mabi=altivec", as gcc would try to save vector register - * across function calls causing SIGILL on cpus without Altivec/vmx. - */ -static pixman_bool_t initialized = FALSE; -static volatile pixman_bool_t have_vmx = TRUE; - -#ifdef __APPLE__ -#include <sys/sysctl.h> - -static pixman_bool_t -pixman_have_vmx (void) -{ - if (!initialized) - { - size_t length = sizeof(have_vmx); - int error = - sysctlbyname ("hw.optional.altivec", &have_vmx, &length, NULL, 0); - - if (error) - have_vmx = FALSE; - - initialized = TRUE; - } - return have_vmx; -} - -#elif defined (__OpenBSD__) -#include <sys/param.h> -#include <sys/sysctl.h> -#include <machine/cpu.h> - -static pixman_bool_t -pixman_have_vmx (void) -{ - if (!initialized) - { - int mib[2] = { CTL_MACHDEP, CPU_ALTIVEC }; - size_t length = sizeof(have_vmx); - int error = - sysctl (mib, 2, &have_vmx, &length, NULL, 0); - - if (error != 0) - have_vmx = FALSE; - - initialized = TRUE; - } - return have_vmx; -} - -#elif defined (__linux__) -#include <sys/types.h> -#include <sys/stat.h> -#include <fcntl.h> -#include <unistd.h> -#include <stdio.h> -#include <linux/auxvec.h> -#include <asm/cputable.h> - -static pixman_bool_t -pixman_have_vmx (void) -{ - if (!initialized) - { - char fname[64]; - unsigned long buf[64]; - ssize_t count = 0; - pid_t pid; - int fd, i; - - pid = getpid (); - snprintf (fname, sizeof(fname) - 1, "/proc/%d/auxv", pid); - - fd = open (fname, O_RDONLY); - if (fd >= 0) - { - for (i = 0; i <= (count / sizeof(unsigned long)); i += 2) - { - /* Read more if buf is empty... */ - if (i == (count / sizeof(unsigned long))) - { - count = read (fd, buf, sizeof(buf)); - if (count <= 0) - break; - i = 0; - } - - if (buf[i] == AT_HWCAP) - { - have_vmx = !!(buf[i + 1] & PPC_FEATURE_HAS_ALTIVEC); - initialized = TRUE; - break; - } - else if (buf[i] == AT_NULL) - { - break; - } - } - close (fd); - } - } - if (!initialized) - { - /* Something went wrong. Assume 'no' rather than playing - fragile tricks with catching SIGILL. */ - have_vmx = FALSE; - initialized = TRUE; - } - - return have_vmx; -} - -#else /* !__APPLE__ && !__OpenBSD__ && !__linux__ */ -#include <signal.h> -#include <setjmp.h> - -static jmp_buf jump_env; - -static void -vmx_test (int sig, - siginfo_t *si, - void * unused) -{ - longjmp (jump_env, 1); -} - -static pixman_bool_t -pixman_have_vmx (void) -{ - struct sigaction sa, osa; - int jmp_result; - - if (!initialized) - { - sa.sa_flags = SA_SIGINFO; - sigemptyset (&sa.sa_mask); - sa.sa_sigaction = vmx_test; - sigaction (SIGILL, &sa, &osa); - jmp_result = setjmp (jump_env); - if (jmp_result == 0) - { - asm volatile ( "vor 0, 0, 0" ); - } - sigaction (SIGILL, &osa, NULL); - have_vmx = (jmp_result == 0); - initialized = TRUE; - } - return have_vmx; -} - -#endif /* __APPLE__ */ -#endif /* USE_VMX */ - -#if defined(USE_ARM_SIMD) || defined(USE_ARM_NEON) || defined(USE_ARM_IWMMXT) - -#if defined(_MSC_VER) - -#if defined(USE_ARM_SIMD) -extern int pixman_msvc_try_arm_simd_op (); - -pixman_bool_t -pixman_have_arm_simd (void) -{ - static pixman_bool_t initialized = FALSE; - static pixman_bool_t have_arm_simd = FALSE; - - if (!initialized) - { - __try { - pixman_msvc_try_arm_simd_op (); - have_arm_simd = TRUE; - } __except (GetExceptionCode () == EXCEPTION_ILLEGAL_INSTRUCTION) { - have_arm_simd = FALSE; - } - initialized = TRUE; - } - - return have_arm_simd; -} - -#endif /* USE_ARM_SIMD */ - -#if defined(USE_ARM_NEON) -extern int pixman_msvc_try_arm_neon_op (); - -pixman_bool_t -pixman_have_arm_neon (void) -{ - static pixman_bool_t initialized = FALSE; - static pixman_bool_t have_arm_neon = FALSE; - - if (!initialized) - { - __try - { - pixman_msvc_try_arm_neon_op (); - have_arm_neon = TRUE; - } - __except (GetExceptionCode () == EXCEPTION_ILLEGAL_INSTRUCTION) - { - have_arm_neon = FALSE; - } - initialized = TRUE; - } - - return have_arm_neon; -} - -#endif /* USE_ARM_NEON */ - -#elif (defined (__APPLE__) && defined(TARGET_OS_IPHONE)) /* iOS (iPhone/iPad/iPod touch) */ - -/* Detection of ARM NEON on iOS is fairly simple because iOS binaries - * contain separate executable images for each processor architecture. - * So all we have to do is detect the armv7 architecture build. The - * operating system automatically runs the armv7 binary for armv7 devices - * and the armv6 binary for armv6 devices. - */ - -pixman_bool_t -pixman_have_arm_simd (void) -{ -#if defined(USE_ARM_SIMD) - return TRUE; -#else - return FALSE; -#endif -} - -pixman_bool_t -pixman_have_arm_neon (void) -{ -#if defined(USE_ARM_NEON) && defined(__ARM_NEON__) - /* This is an armv7 cpu build */ - return TRUE; -#else - /* This is an armv6 cpu build */ - return FALSE; -#endif -} - -pixman_bool_t -pixman_have_arm_iwmmxt (void) -{ -#if defined(USE_ARM_IWMMXT) - return FALSE; -#else - return FALSE; -#endif -} - -#elif defined (__linux__) /* linux ELF */ - -static pixman_bool_t arm_has_v7 = FALSE; -static pixman_bool_t arm_has_v6 = FALSE; -static pixman_bool_t arm_has_vfp = FALSE; -static pixman_bool_t arm_has_neon = FALSE; -static pixman_bool_t arm_has_iwmmxt = FALSE; -static pixman_bool_t arm_tests_initialized = FALSE; - -#include <unistd.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <sys/mman.h> -#include <fcntl.h> -#include <string.h> -#include <elf.h> - -static void -pixman_arm_read_auxv_or_cpu_features () -{ - int fd; - Elf32_auxv_t aux; - - fd = open ("/proc/self/auxv", O_RDONLY); - if (fd >= 0) - { - while (read (fd, &aux, sizeof(Elf32_auxv_t)) == sizeof(Elf32_auxv_t)) - { - if (aux.a_type == AT_HWCAP) - { - uint32_t hwcap = aux.a_un.a_val; - /* hardcode these values to avoid depending on specific - * versions of the hwcap header, e.g. HWCAP_NEON - */ - arm_has_vfp = (hwcap & 64) != 0; - arm_has_iwmmxt = (hwcap & 512) != 0; - /* this flag is only present on kernel 2.6.29 */ - arm_has_neon = (hwcap & 4096) != 0; - } - else if (aux.a_type == AT_PLATFORM) - { - const char *plat = (const char*) aux.a_un.a_val; - if (strncmp (plat, "v7l", 3) == 0) - { - arm_has_v7 = TRUE; - arm_has_v6 = TRUE; - } - else if (strncmp (plat, "v6l", 3) == 0) - { - arm_has_v6 = TRUE; - } - } - } - close (fd); - } - - arm_tests_initialized = TRUE; -} - -#if defined(USE_ARM_SIMD) -pixman_bool_t -pixman_have_arm_simd (void) -{ - if (!arm_tests_initialized) - pixman_arm_read_auxv_or_cpu_features (); - - return arm_has_v6; -} - -#endif /* USE_ARM_SIMD */ - -#if defined(USE_ARM_NEON) -pixman_bool_t -pixman_have_arm_neon (void) -{ - if (!arm_tests_initialized) - pixman_arm_read_auxv_or_cpu_features (); - - return arm_has_neon; -} - -#endif /* USE_ARM_NEON */ - -#if defined(USE_ARM_IWMMXT) -pixman_bool_t -pixman_have_arm_iwmmxt (void) -{ - if (!arm_tests_initialized) - pixman_arm_read_auxv_or_cpu_features (); - - return arm_has_iwmmxt; -} - -#endif /* USE_ARM_IWMMXT */ - -#else /* !_MSC_VER && !Linux elf */ - -#define pixman_have_arm_simd() FALSE -#define pixman_have_arm_neon() FALSE -#define pixman_have_arm_iwmmxt() FALSE - -#endif - -#endif /* USE_ARM_SIMD || USE_ARM_NEON || USE_ARM_IWMMXT */ - -#if defined(USE_MIPS_DSPR2) - -#if defined (__linux__) /* linux ELF */ - -pixman_bool_t -pixman_have_mips_dspr2 (void) -{ - const char *search_string = "MIPS 74K"; - const char *file_name = "/proc/cpuinfo"; - /* Simple detection of MIPS DSP ASE (revision 2) at runtime for Linux. - * It is based on /proc/cpuinfo, which reveals hardware configuration - * to user-space applications. According to MIPS (early 2010), no similar - * facility is universally available on the MIPS architectures, so it's up - * to individual OSes to provide such. - * - * Only currently available MIPS core that supports DSPr2 is 74K. - */ - - char cpuinfo_line[256]; - - FILE *f = NULL; - - if ((f = fopen (file_name, "r")) == NULL) - return FALSE; - - while (fgets (cpuinfo_line, sizeof (cpuinfo_line), f) != NULL) - { - if (strstr (cpuinfo_line, search_string) != NULL) - { - fclose (f); - return TRUE; - } - } - - fclose (f); - - /* Did not find string in the proc file. */ - return FALSE; -} - -#else /* linux ELF */ - -#define pixman_have_mips_dspr2() FALSE - -#endif /* linux ELF */ - -#endif /* USE_MIPS_DSPR2 */ - -#if defined(USE_X86_MMX) || defined(USE_SSE2) -/* The CPU detection code needs to be in a file not compiled with - * "-mmmx -msse", as gcc would generate CMOV instructions otherwise - * that would lead to SIGILL instructions on old CPUs that don't have - * it. - */ -#if !defined(__amd64__) && !defined(__x86_64__) && !defined(_M_AMD64) - -#ifdef HAVE_GETISAX -#include <sys/auxv.h> -#endif - -typedef enum -{ - NO_FEATURES = 0, - MMX = 0x1, - MMX_EXTENSIONS = 0x2, - SSE = 0x6, - SSE2 = 0x8, - CMOV = 0x10 -} cpu_features_t; - - -static unsigned int -detect_cpu_features (void) -{ - unsigned int features = 0; - unsigned int result = 0; - -#ifdef HAVE_GETISAX - if (getisax (&result, 1)) - { - if (result & AV_386_CMOV) - features |= CMOV; - if (result & AV_386_MMX) - features |= MMX; - if (result & AV_386_AMD_MMX) - features |= MMX_EXTENSIONS; - if (result & AV_386_SSE) - features |= SSE; - if (result & AV_386_SSE2) - features |= SSE2; - } -#else - char vendor[13]; -#ifdef _MSC_VER - int vendor0 = 0, vendor1, vendor2; -#endif - vendor[0] = 0; - vendor[12] = 0; - -#ifdef __GNUC__ - /* see p. 118 of amd64 instruction set manual Vol3 */ - /* We need to be careful about the handling of %ebx and - * %esp here. We can't declare either one as clobbered - * since they are special registers (%ebx is the "PIC - * register" holding an offset to global data, %esp the - * stack pointer), so we need to make sure they have their - * original values when we access the output operands. - */ - __asm__ ( - "pushf\n" - "pop %%eax\n" - "mov %%eax, %%ecx\n" - "xor $0x00200000, %%eax\n" - "push %%eax\n" - "popf\n" - "pushf\n" - "pop %%eax\n" - "mov $0x0, %%edx\n" - "xor %%ecx, %%eax\n" - "jz 1f\n" - - "mov $0x00000000, %%eax\n" - "push %%ebx\n" - "cpuid\n" - "mov %%ebx, %%eax\n" - "pop %%ebx\n" - "mov %%eax, %1\n" - "mov %%edx, %2\n" - "mov %%ecx, %3\n" - "mov $0x00000001, %%eax\n" - "push %%ebx\n" - "cpuid\n" - "pop %%ebx\n" - "1:\n" - "mov %%edx, %0\n" - : "=r" (result), - "=m" (vendor[0]), - "=m" (vendor[4]), - "=m" (vendor[8]) - : - : "%eax", "%ecx", "%edx" - ); - -#elif defined (_MSC_VER) - - _asm { - pushfd - pop eax - mov ecx, eax - xor eax, 00200000h - push eax - popfd - pushfd - pop eax - mov edx, 0 - xor eax, ecx - jz nocpuid - - mov eax, 0 - push ebx - cpuid - mov eax, ebx - pop ebx - mov vendor0, eax - mov vendor1, edx - mov vendor2, ecx - mov eax, 1 - push ebx - cpuid - pop ebx - nocpuid: - mov result, edx - } - memmove (vendor + 0, &vendor0, 4); - memmove (vendor + 4, &vendor1, 4); - memmove (vendor + 8, &vendor2, 4); - -#else -# error unsupported compiler -#endif - - features = 0; - if (result) - { - /* result now contains the standard feature bits */ - if (result & (1 << 15)) - features |= CMOV; - if (result & (1 << 23)) - features |= MMX; - if (result & (1 << 25)) - features |= SSE; - if (result & (1 << 26)) - features |= SSE2; - if ((features & MMX) && !(features & SSE) && - (strcmp (vendor, "AuthenticAMD") == 0 || - strcmp (vendor, "Geode by NSC") == 0)) - { - /* check for AMD MMX extensions */ -#ifdef __GNUC__ - __asm__ ( - " push %%ebx\n" - " mov $0x80000000, %%eax\n" - " cpuid\n" - " xor %%edx, %%edx\n" - " cmp $0x1, %%eax\n" - " jge 2f\n" - " mov $0x80000001, %%eax\n" - " cpuid\n" - "2:\n" - " pop %%ebx\n" - " mov %%edx, %0\n" - : "=r" (result) - : - : "%eax", "%ecx", "%edx" - ); -#elif defined _MSC_VER - _asm { - push ebx - mov eax, 80000000h - cpuid - xor edx, edx - cmp eax, 1 - jge notamd - mov eax, 80000001h - cpuid - notamd: - pop ebx - mov result, edx - } -#endif - if (result & (1 << 22)) - features |= MMX_EXTENSIONS; - } - } -#endif /* HAVE_GETISAX */ - - return features; -} - -#ifdef USE_X86_MMX -static pixman_bool_t -pixman_have_mmx (void) -{ - static pixman_bool_t initialized = FALSE; - static pixman_bool_t mmx_present; - - if (!initialized) - { - unsigned int features = detect_cpu_features (); - mmx_present = (features & (MMX | MMX_EXTENSIONS)) == (MMX | MMX_EXTENSIONS); - initialized = TRUE; - } - - return mmx_present; -} -#endif - -#ifdef USE_SSE2 -static pixman_bool_t -pixman_have_sse2 (void) -{ - static pixman_bool_t initialized = FALSE; - static pixman_bool_t sse2_present; - - if (!initialized) - { - unsigned int features = detect_cpu_features (); - sse2_present = (features & (MMX | MMX_EXTENSIONS | SSE | SSE2)) == (MMX | MMX_EXTENSIONS | SSE | SSE2); - initialized = TRUE; - } - - return sse2_present; -} - -#endif - -#else /* __amd64__ */ -#ifdef USE_X86_MMX -#define pixman_have_mmx() TRUE -#endif -#ifdef USE_SSE2 -#define pixman_have_sse2() TRUE -#endif -#endif /* __amd64__ */ -#endif - -static pixman_bool_t -disabled (const char *name) -{ - const char *env; - - if ((env = getenv ("PIXMAN_DISABLE"))) - { - do - { - const char *end; - int len; - - if ((end = strchr (env, ' '))) - len = end - env; - else - len = strlen (env); - - if (strlen (name) == len && strncmp (name, env, len) == 0) - { - printf ("pixman: Disabled %s implementation\n", name); - return TRUE; - } - - env += len; - } - while (*env++); - } - - return FALSE; -} - -pixman_implementation_t * -_pixman_choose_implementation (void) -{ - pixman_implementation_t *imp; - - imp = _pixman_implementation_create_general(); - - if (!disabled ("fast")) - imp = _pixman_implementation_create_fast_path (imp); - -#ifdef USE_X86_MMX - if (!disabled ("mmx") && pixman_have_mmx ()) - imp = _pixman_implementation_create_mmx (imp); -#endif - -#ifdef USE_SSE2 - if (!disabled ("sse2") && pixman_have_sse2 ()) - imp = _pixman_implementation_create_sse2 (imp); -#endif - -#ifdef USE_ARM_SIMD - if (!disabled ("arm-simd") && pixman_have_arm_simd ()) - imp = _pixman_implementation_create_arm_simd (imp); -#endif - -#ifdef USE_ARM_IWMMXT - if (!disabled ("arm-iwmmxt") && pixman_have_arm_iwmmxt ()) - imp = _pixman_implementation_create_mmx (imp); -#endif - -#ifdef USE_ARM_NEON - if (!disabled ("arm-neon") && pixman_have_arm_neon ()) - imp = _pixman_implementation_create_arm_neon (imp); -#endif - -#ifdef USE_MIPS_DSPR2 - if (!disabled ("mips-dspr2") && pixman_have_mips_dspr2 ()) - imp = _pixman_implementation_create_mips_dspr2 (imp); -#endif - -#ifdef USE_VMX - if (!disabled ("vmx") && pixman_have_vmx ()) - imp = _pixman_implementation_create_vmx (imp); -#endif - - imp = _pixman_implementation_create_noop (imp); - - return imp; -} - diff --git a/gfx/cairo/libpixman/src/pixman-dither.h b/gfx/cairo/libpixman/src/pixman-dither.h deleted file mode 100644 index ead9f38d8c..0000000000 --- a/gfx/cairo/libpixman/src/pixman-dither.h +++ /dev/null @@ -1,51 +0,0 @@ -#define R16_BITS 5 -#define G16_BITS 6 -#define B16_BITS 5 - -#define R16_SHIFT (B16_BITS + G16_BITS) -#define G16_SHIFT (B16_BITS) -#define B16_SHIFT 0 - -#define MASK 0xff -#define ONE_HALF 0x80 - -#define A_SHIFT 8 * 3 -#define R_SHIFT 8 * 2 -#define G_SHIFT 8 -#define A_MASK 0xff000000 -#define R_MASK 0xff0000 -#define G_MASK 0xff00 - -#define RB_MASK 0xff00ff -#define AG_MASK 0xff00ff00 -#define RB_ONE_HALF 0x800080 -#define RB_MASK_PLUS_ONE 0x10000100 - -#define ALPHA_8(x) ((x) >> A_SHIFT) -#define RED_8(x) (((x) >> R_SHIFT) & MASK) -#define GREEN_8(x) (((x) >> G_SHIFT) & MASK) -#define BLUE_8(x) ((x) & MASK) - -// This uses the same dithering technique that Skia does. -// It is essentially preturbing the lower bit based on the -// high bit -static inline uint16_t dither_32_to_16(uint32_t c) -{ - uint8_t b = BLUE_8(c); - uint8_t g = GREEN_8(c); - uint8_t r = RED_8(c); - r = ((r << 1) - ((r >> (8 - R16_BITS) << (8 - R16_BITS)) | (r >> R16_BITS))) >> (8 - R16_BITS); - g = ((g << 1) - ((g >> (8 - G16_BITS) << (8 - G16_BITS)) | (g >> G16_BITS))) >> (8 - G16_BITS); - b = ((b << 1) - ((b >> (8 - B16_BITS) << (8 - B16_BITS)) | (b >> B16_BITS))) >> (8 - B16_BITS); - return ((r << R16_SHIFT) | (g << G16_SHIFT) | (b << B16_SHIFT)); -} - -static inline uint16_t dither_8888_to_0565(uint32_t color, pixman_bool_t toggle) -{ - // alternate between a preturbed truncation and a regular truncation - if (toggle) { - return dither_32_to_16(color); - } else { - return convert_8888_to_0565(color); - } -} diff --git a/gfx/cairo/libpixman/src/pixman-edge-imp.h b/gfx/cairo/libpixman/src/pixman-edge-imp.h index a47098a896..a4698eddb2 100644 --- a/gfx/cairo/libpixman/src/pixman-edge-imp.h +++ b/gfx/cairo/libpixman/src/pixman-edge-imp.h @@ -55,9 +55,8 @@ RASTERIZE_EDGES (pixman_image_t *image, * * (The AA case does a similar adjustment in RENDER_SAMPLES_X) */ - /* we cast to unsigned to get defined behaviour for overflow */ - lx = (unsigned)lx + X_FRAC_FIRST(1) - pixman_fixed_e; - rx = (unsigned)rx + X_FRAC_FIRST(1) - pixman_fixed_e; + lx += X_FRAC_FIRST(1) - pixman_fixed_e; + rx += X_FRAC_FIRST(1) - pixman_fixed_e; #endif /* clip X */ if (lx < 0) diff --git a/gfx/cairo/libpixman/src/pixman-fast-path.c b/gfx/cairo/libpixman/src/pixman-fast-path.c index 9b22d7b464..4b7a6f897f 100644 --- a/gfx/cairo/libpixman/src/pixman-fast-path.c +++ b/gfx/cairo/libpixman/src/pixman-fast-path.c @@ -908,7 +908,7 @@ fast_composite_add_n_8_8 (pixman_implementation_t *imp, #define CREATE_BITMASK(n) (0x80000000 >> (n)) #define UPDATE_BITMASK(n) ((n) >> 1) #else -#define CREATE_BITMASK(n) (1 << (n)) +#define CREATE_BITMASK(n) (1U << (n)) #define UPDATE_BITMASK(n) ((n) << 1) #endif @@ -1203,228 +1203,6 @@ FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, OVER, NONE) FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, OVER, PAD) FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, OVER, NORMAL) -static force_inline void -scaled_bilinear_scanline_8888_565_OVER (uint16_t * dst, - const uint32_t * mask, - const uint32_t * src_top, - const uint32_t * src_bottom, - int32_t w, - int wt, - int wb, - pixman_fixed_t vx, - pixman_fixed_t unit_x, - pixman_fixed_t max_vx, - pixman_bool_t zero_src) -{ - while ((w -= 1) >= 0) - { - uint32_t tl = src_top [pixman_fixed_to_int (vx)]; - uint32_t tr = src_top [pixman_fixed_to_int (vx) + 1]; - uint32_t bl = src_bottom [pixman_fixed_to_int (vx)]; - uint32_t br = src_bottom [pixman_fixed_to_int (vx) + 1]; - uint32_t src, result; - uint16_t d; - d = *dst; - src = bilinear_interpolation (tl, tr, - bl, br, - pixman_fixed_to_bilinear_weight(vx), - wb); - vx += unit_x; - result = over (src, convert_0565_to_0888 (d)); - *dst++ = convert_8888_to_0565 (result); - } -} - -static force_inline void -scaled_bilinear_scanline_8888_8888_OVER (uint32_t * dst, - const uint32_t * mask, - const uint32_t * src_top, - const uint32_t * src_bottom, - int32_t w, - int wt, - int wb, - pixman_fixed_t vx, - pixman_fixed_t unit_x, - pixman_fixed_t max_vx, - pixman_bool_t zero_src) -{ - while ((w -= 1) >= 0) - { - uint32_t tl = src_top [pixman_fixed_to_int (vx)]; - uint32_t tr = src_top [pixman_fixed_to_int (vx) + 1]; - uint32_t bl = src_bottom [pixman_fixed_to_int (vx)]; - uint32_t br = src_bottom [pixman_fixed_to_int (vx) + 1]; - uint32_t src; - uint32_t d; - uint32_t result; - d = *dst; - src = bilinear_interpolation (tl, tr, - bl, br, - pixman_fixed_to_bilinear_weight(vx), - wb); - vx += unit_x; - *dst++ = over (src, d); - } -} - -#ifndef LOWER_QUALITY_INTERPOLATION - -static force_inline void -scaled_bilinear_scanline_565_565_SRC (uint16_t * dst, - const uint32_t * mask, - const uint16_t * src_top, - const uint16_t * src_bottom, - int32_t w, - int wt, - int wb, - pixman_fixed_t vx, - pixman_fixed_t unit_x, - pixman_fixed_t max_vx, - pixman_bool_t zero_src) -{ - while ((w -= 1) >= 0) - { - uint16_t tl = src_top [pixman_fixed_to_int (vx)]; - uint16_t tr = src_top [pixman_fixed_to_int (vx) + 1]; - uint16_t bl = src_bottom [pixman_fixed_to_int (vx)]; - uint16_t br = src_bottom [pixman_fixed_to_int (vx) + 1]; - uint32_t d; - d = bilinear_interpolation(convert_0565_to_8888 (tl), - convert_0565_to_8888 (tr), - convert_0565_to_8888 (bl), - convert_0565_to_8888 (br), - pixman_fixed_to_bilinear_weight (vx), - wb); - vx += unit_x; - *dst++ = convert_8888_to_0565 (d); - } -} - -#else - -/* This is a clever low resolution bilinear interpolation inspired by the code - in Skia */ - -/* This takes the green component from the 565 representation and moves it: - 00000000 00000000 rrrrrggg gggbbbbb - - 00000ggg ggg00000 rrrrr000 000bbbbb - - This gives us 5 extra bits of space before each component to let us do - SWAR style optimizations -*/ - -#define GREEN_MASK (((1 << 6) - 1) << 5) - -static inline uint32_t -expand_rgb_565 (uint16_t c) { - return ((c & GREEN_MASK) << 16) | (c & ~GREEN_MASK); -} - -static inline uint16_t -compact_rgb_565 (uint32_t c) { - return ((c >> 16) & GREEN_MASK) | (c & ~GREEN_MASK); -} - -static inline uint16_t -bilinear_interpolation_565(uint16_t tl, uint16_t tr, - uint16_t bl, uint16_t br, - int x, int y) -{ - int xy; - uint32_t a00 = expand_rgb_565 (tl); - uint32_t a01 = expand_rgb_565 (tr); - uint32_t a10 = expand_rgb_565 (bl); - uint32_t a11 = expand_rgb_565 (br); - - xy = (x * y) >> 3; - return compact_rgb_565 ((a00 * (32 - 2*y - 2*x + xy) + - a01 * (2*x - xy) + - a10 * (2*y - xy) + - a11 * xy) >> 5); -} - -static force_inline void -scaled_bilinear_scanline_565_565_SRC (uint16_t * dst, - const uint32_t * mask, - const uint16_t * src_top, - const uint16_t * src_bottom, - int32_t w, - int wt, - int wb, - pixman_fixed_t vx, - pixman_fixed_t unit_x, - pixman_fixed_t max_vx, - pixman_bool_t zero_src) -{ - while ((w -= 1) >= 0) - { - uint16_t tl = src_top [pixman_fixed_to_int (vx)]; - uint16_t tr = src_top [pixman_fixed_to_int (vx) + 1]; - uint16_t bl = src_bottom [pixman_fixed_to_int (vx)]; - uint16_t br = src_bottom [pixman_fixed_to_int (vx) + 1]; - - uint16_t d = bilinear_interpolation_565 (tl, tr, bl, br, - pixman_fixed_to_bilinear_weight(vx), - wb); - vx += unit_x; - *dst++ = d; - } -} - -#endif - -FAST_BILINEAR_MAINLOOP_COMMON (565_565_cover_SRC, - scaled_bilinear_scanline_565_565_SRC, NULL, - uint16_t, uint32_t, uint16_t, - COVER, FLAG_NONE) -FAST_BILINEAR_MAINLOOP_COMMON (565_565_pad_SRC, - scaled_bilinear_scanline_565_565_SRC, NULL, - uint16_t, uint32_t, uint16_t, - PAD, FLAG_NONE) -FAST_BILINEAR_MAINLOOP_COMMON (565_565_none_SRC, - scaled_bilinear_scanline_565_565_SRC, NULL, - uint16_t, uint32_t, uint16_t, - NONE, FLAG_NONE) -FAST_BILINEAR_MAINLOOP_COMMON (565_565_normal_SRC, - scaled_bilinear_scanline_565_565_SRC, NULL, - uint16_t, uint32_t, uint16_t, - NORMAL, FLAG_NONE) - -FAST_BILINEAR_MAINLOOP_COMMON (8888_565_cover_OVER, - scaled_bilinear_scanline_8888_565_OVER, NULL, - uint32_t, uint32_t, uint16_t, - COVER, FLAG_NONE) -FAST_BILINEAR_MAINLOOP_COMMON (8888_565_pad_OVER, - scaled_bilinear_scanline_8888_565_OVER, NULL, - uint32_t, uint32_t, uint16_t, - PAD, FLAG_NONE) -FAST_BILINEAR_MAINLOOP_COMMON (8888_565_none_OVER, - scaled_bilinear_scanline_8888_565_OVER, NULL, - uint32_t, uint32_t, uint16_t, - NONE, FLAG_NONE) -FAST_BILINEAR_MAINLOOP_COMMON (8888_565_normal_OVER, - scaled_bilinear_scanline_8888_565_OVER, NULL, - uint32_t, uint32_t, uint16_t, - NORMAL, FLAG_NONE) - -FAST_BILINEAR_MAINLOOP_COMMON (8888_8888_cover_OVER, - scaled_bilinear_scanline_8888_8888_OVER, NULL, - uint32_t, uint32_t, uint32_t, - COVER, FLAG_NONE) -FAST_BILINEAR_MAINLOOP_COMMON (8888_8888_pad_OVER, - scaled_bilinear_scanline_8888_8888_OVER, NULL, - uint32_t, uint32_t, uint32_t, - PAD, FLAG_NONE) -FAST_BILINEAR_MAINLOOP_COMMON (8888_8888_none_OVER, - scaled_bilinear_scanline_8888_8888_OVER, NULL, - uint32_t, uint32_t, uint32_t, - NONE, FLAG_NONE) -FAST_BILINEAR_MAINLOOP_COMMON (8888_8888_normal_OVER, - scaled_bilinear_scanline_8888_8888_OVER, NULL, - uint32_t, uint32_t, uint32_t, - NORMAL, FLAG_NONE) - #define REPEAT_MIN_WIDTH 32 static void @@ -2193,10 +1971,6 @@ static const pixman_fast_path_t c_fast_paths[] = fast_composite_tiled_repeat }, - SIMPLE_BILINEAR_FAST_PATH (SRC, r5g6b5, r5g6b5, 565_565), - SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, r5g6b5, 8888_565), - SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, 8888_8888), - { PIXMAN_OP_NONE }, }; @@ -2489,93 +2263,1024 @@ fast_write_back_r5g6b5 (pixman_iter_t *iter) typedef struct { - pixman_format_code_t format; - pixman_iter_get_scanline_t get_scanline; - pixman_iter_write_back_t write_back; -} fetcher_info_t; + int y; + uint64_t * buffer; +} line_t; -static const fetcher_info_t fetchers[] = +typedef struct { - { PIXMAN_r5g6b5, fast_fetch_r5g6b5, fast_write_back_r5g6b5 }, - { PIXMAN_null } -}; + line_t lines[2]; + pixman_fixed_t y; + pixman_fixed_t x; + uint64_t data[1]; +} bilinear_info_t; -static pixman_bool_t -fast_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter) +static void +fetch_horizontal (bits_image_t *image, line_t *line, + int y, pixman_fixed_t x, pixman_fixed_t ux, int n) { - pixman_image_t *image = iter->image; + uint32_t *bits = image->bits + y * image->rowstride; + int i; -#define FLAGS \ - (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | \ - FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST) + for (i = 0; i < n; ++i) + { + int x0 = pixman_fixed_to_int (x); + int x1 = x0 + 1; + int32_t dist_x; + + uint32_t left = *(bits + x0); + uint32_t right = *(bits + x1); + + dist_x = pixman_fixed_to_bilinear_weight (x); + dist_x <<= (8 - BILINEAR_INTERPOLATION_BITS); + +#if SIZEOF_LONG <= 4 + { + uint32_t lag, rag, ag; + uint32_t lrb, rrb, rb; + + lag = (left & 0xff00ff00) >> 8; + rag = (right & 0xff00ff00) >> 8; + ag = (lag << 8) + dist_x * (rag - lag); + + lrb = (left & 0x00ff00ff); + rrb = (right & 0x00ff00ff); + rb = (lrb << 8) + dist_x * (rrb - lrb); + + *((uint32_t *)(line->buffer + i)) = ag; + *((uint32_t *)(line->buffer + i) + 1) = rb; + } +#else + { + uint64_t lagrb, ragrb; + uint32_t lag, rag; + uint32_t lrb, rrb; + + lag = (left & 0xff00ff00); + lrb = (left & 0x00ff00ff); + rag = (right & 0xff00ff00); + rrb = (right & 0x00ff00ff); + lagrb = (((uint64_t)lag) << 24) | lrb; + ragrb = (((uint64_t)rag) << 24) | rrb; + + line->buffer[i] = (lagrb << 8) + dist_x * (ragrb - lagrb); + } +#endif + + x += ux; + } + + line->y = y; +} + +static uint32_t * +fast_fetch_bilinear_cover (pixman_iter_t *iter, const uint32_t *mask) +{ + pixman_fixed_t fx, ux; + bilinear_info_t *info = iter->data; + line_t *line0, *line1; + int y0, y1; + int32_t dist_y; + int i; + + COMPILE_TIME_ASSERT (BILINEAR_INTERPOLATION_BITS < 8); + + fx = info->x; + ux = iter->image->common.transform->matrix[0][0]; + + y0 = pixman_fixed_to_int (info->y); + y1 = y0 + 1; + dist_y = pixman_fixed_to_bilinear_weight (info->y); + dist_y <<= (8 - BILINEAR_INTERPOLATION_BITS); + + line0 = &info->lines[y0 & 0x01]; + line1 = &info->lines[y1 & 0x01]; + + if (line0->y != y0) + { + fetch_horizontal ( + &iter->image->bits, line0, y0, fx, ux, iter->width); + } + + if (line1->y != y1) + { + fetch_horizontal ( + &iter->image->bits, line1, y1, fx, ux, iter->width); + } + + for (i = 0; i < iter->width; ++i) + { +#if SIZEOF_LONG <= 4 + uint32_t ta, tr, tg, tb; + uint32_t ba, br, bg, bb; + uint32_t tag, trb; + uint32_t bag, brb; + uint32_t a, r, g, b; + + tag = *((uint32_t *)(line0->buffer + i)); + trb = *((uint32_t *)(line0->buffer + i) + 1); + bag = *((uint32_t *)(line1->buffer + i)); + brb = *((uint32_t *)(line1->buffer + i) + 1); + + ta = tag >> 16; + ba = bag >> 16; + a = (ta << 8) + dist_y * (ba - ta); + + tr = trb >> 16; + br = brb >> 16; + r = (tr << 8) + dist_y * (br - tr); + + tg = tag & 0xffff; + bg = bag & 0xffff; + g = (tg << 8) + dist_y * (bg - tg); + + tb = trb & 0xffff; + bb = brb & 0xffff; + b = (tb << 8) + dist_y * (bb - tb); + + a = (a << 8) & 0xff000000; + r = (r << 0) & 0x00ff0000; + g = (g >> 8) & 0x0000ff00; + b = (b >> 16) & 0x000000ff; +#else + uint64_t top = line0->buffer[i]; + uint64_t bot = line1->buffer[i]; + uint64_t tar = (top & 0xffff0000ffff0000ULL) >> 16; + uint64_t bar = (bot & 0xffff0000ffff0000ULL) >> 16; + uint64_t tgb = (top & 0x0000ffff0000ffffULL); + uint64_t bgb = (bot & 0x0000ffff0000ffffULL); + uint64_t ar, gb; + uint32_t a, r, g, b; + + ar = (tar << 8) + dist_y * (bar - tar); + gb = (tgb << 8) + dist_y * (bgb - tgb); + + a = ((ar >> 24) & 0xff000000); + r = ((ar >> 0) & 0x00ff0000); + g = ((gb >> 40) & 0x0000ff00); + b = ((gb >> 16) & 0x000000ff); +#endif + + iter->buffer[i] = a | r | g | b; + } + + info->y += iter->image->common.transform->matrix[1][1]; + + return iter->buffer; +} + +static void +bilinear_cover_iter_fini (pixman_iter_t *iter) +{ + free (iter->data); +} + +static void +fast_bilinear_cover_iter_init (pixman_iter_t *iter, const pixman_iter_info_t *iter_info) +{ + int width = iter->width; + bilinear_info_t *info; + pixman_vector_t v; + + /* Reference point is the center of the pixel */ + v.vector[0] = pixman_int_to_fixed (iter->x) + pixman_fixed_1 / 2; + v.vector[1] = pixman_int_to_fixed (iter->y) + pixman_fixed_1 / 2; + v.vector[2] = pixman_fixed_1; + + if (!pixman_transform_point_3d (iter->image->common.transform, &v)) + goto fail; + + info = malloc (sizeof (*info) + (2 * width - 1) * sizeof (uint64_t)); + if (!info) + goto fail; - if (iter->iter_flags & ITER_16) - return FALSE; + info->x = v.vector[0] - pixman_fixed_1 / 2; + info->y = v.vector[1] - pixman_fixed_1 / 2; - if ((iter->iter_flags & ITER_NARROW) && - (iter->image_flags & FLAGS) == FLAGS) + /* It is safe to set the y coordinates to -1 initially + * because COVER_CLIP_BILINEAR ensures that we will only + * be asked to fetch lines in the [0, height) interval + */ + info->lines[0].y = -1; + info->lines[0].buffer = &(info->data[0]); + info->lines[1].y = -1; + info->lines[1].buffer = &(info->data[width]); + + iter->get_scanline = fast_fetch_bilinear_cover; + iter->fini = bilinear_cover_iter_fini; + + iter->data = info; + return; + +fail: + /* Something went wrong, either a bad matrix or OOM; in such cases, + * we don't guarantee any particular rendering. + */ + _pixman_log_error ( + FUNC, "Allocation failure or bad matrix, skipping rendering\n"); + + iter->get_scanline = _pixman_iter_get_scanline_noop; + iter->fini = NULL; +} + +static uint32_t * +bits_image_fetch_bilinear_no_repeat_8888 (pixman_iter_t *iter, + const uint32_t *mask) +{ + + pixman_image_t * ima = iter->image; + int offset = iter->x; + int line = iter->y++; + int width = iter->width; + uint32_t * buffer = iter->buffer; + + bits_image_t *bits = &ima->bits; + pixman_fixed_t x_top, x_bottom, x; + pixman_fixed_t ux_top, ux_bottom, ux; + pixman_vector_t v; + uint32_t top_mask, bottom_mask; + uint32_t *top_row; + uint32_t *bottom_row; + uint32_t *end; + uint32_t zero[2] = { 0, 0 }; + uint32_t one = 1; + int y, y1, y2; + int disty; + int mask_inc; + int w; + + /* reference point is the center of the pixel */ + v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2; + v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2; + v.vector[2] = pixman_fixed_1; + + if (!pixman_transform_point_3d (bits->common.transform, &v)) + return iter->buffer; + + ux = ux_top = ux_bottom = bits->common.transform->matrix[0][0]; + x = x_top = x_bottom = v.vector[0] - pixman_fixed_1/2; + + y = v.vector[1] - pixman_fixed_1/2; + disty = pixman_fixed_to_bilinear_weight (y); + + /* Load the pointers to the first and second lines from the source + * image that bilinear code must read. + * + * The main trick in this code is about the check if any line are + * outside of the image; + * + * When I realize that a line (any one) is outside, I change + * the pointer to a dummy area with zeros. Once I change this, I + * must be sure the pointer will not change, so I set the + * variables to each pointer increments inside the loop. + */ + y1 = pixman_fixed_to_int (y); + y2 = y1 + 1; + + if (y1 < 0 || y1 >= bits->height) + { + top_row = zero; + x_top = 0; + ux_top = 0; + } + else { - const fetcher_info_t *f; + top_row = bits->bits + y1 * bits->rowstride; + x_top = x; + ux_top = ux; + } - for (f = &fetchers[0]; f->format != PIXMAN_null; f++) + if (y2 < 0 || y2 >= bits->height) + { + bottom_row = zero; + x_bottom = 0; + ux_bottom = 0; + } + else + { + bottom_row = bits->bits + y2 * bits->rowstride; + x_bottom = x; + ux_bottom = ux; + } + + /* Instead of checking whether the operation uses the mast in + * each loop iteration, verify this only once and prepare the + * variables to make the code smaller inside the loop. + */ + if (!mask) + { + mask_inc = 0; + mask = &one; + } + else + { + /* If have a mask, prepare the variables to check it */ + mask_inc = 1; + } + + /* If both are zero, then the whole thing is zero */ + if (top_row == zero && bottom_row == zero) + { + memset (buffer, 0, width * sizeof (uint32_t)); + return iter->buffer; + } + else if (bits->format == PIXMAN_x8r8g8b8) + { + if (top_row == zero) { - if (image->common.extended_format_code == f->format) - { - uint8_t *b = (uint8_t *)image->bits.bits; - int s = image->bits.rowstride * 4; + top_mask = 0; + bottom_mask = 0xff000000; + } + else if (bottom_row == zero) + { + top_mask = 0xff000000; + bottom_mask = 0; + } + else + { + top_mask = 0xff000000; + bottom_mask = 0xff000000; + } + } + else + { + top_mask = 0; + bottom_mask = 0; + } - iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (f->format) / 8; - iter->stride = s; + end = buffer + width; - iter->get_scanline = f->get_scanline; - return TRUE; - } + /* Zero fill to the left of the image */ + while (buffer < end && x < pixman_fixed_minus_1) + { + *buffer++ = 0; + x += ux; + x_top += ux_top; + x_bottom += ux_bottom; + mask += mask_inc; + } + + /* Left edge + */ + while (buffer < end && x < 0) + { + uint32_t tr, br; + int32_t distx; + + tr = top_row[pixman_fixed_to_int (x_top) + 1] | top_mask; + br = bottom_row[pixman_fixed_to_int (x_bottom) + 1] | bottom_mask; + + distx = pixman_fixed_to_bilinear_weight (x); + + *buffer++ = bilinear_interpolation (0, tr, 0, br, distx, disty); + + x += ux; + x_top += ux_top; + x_bottom += ux_bottom; + mask += mask_inc; + } + + /* Main part */ + w = pixman_int_to_fixed (bits->width - 1); + + while (buffer < end && x < w) + { + if (*mask) + { + uint32_t tl, tr, bl, br; + int32_t distx; + + tl = top_row [pixman_fixed_to_int (x_top)] | top_mask; + tr = top_row [pixman_fixed_to_int (x_top) + 1] | top_mask; + bl = bottom_row [pixman_fixed_to_int (x_bottom)] | bottom_mask; + br = bottom_row [pixman_fixed_to_int (x_bottom) + 1] | bottom_mask; + + distx = pixman_fixed_to_bilinear_weight (x); + + *buffer = bilinear_interpolation (tl, tr, bl, br, distx, disty); } + + buffer++; + x += ux; + x_top += ux_top; + x_bottom += ux_bottom; + mask += mask_inc; } - return FALSE; + /* Right Edge */ + w = pixman_int_to_fixed (bits->width); + while (buffer < end && x < w) + { + if (*mask) + { + uint32_t tl, bl; + int32_t distx; + + tl = top_row [pixman_fixed_to_int (x_top)] | top_mask; + bl = bottom_row [pixman_fixed_to_int (x_bottom)] | bottom_mask; + + distx = pixman_fixed_to_bilinear_weight (x); + + *buffer = bilinear_interpolation (tl, 0, bl, 0, distx, disty); + } + + buffer++; + x += ux; + x_top += ux_top; + x_bottom += ux_bottom; + mask += mask_inc; + } + + /* Zero fill to the left of the image */ + while (buffer < end) + *buffer++ = 0; + + return iter->buffer; } -static pixman_bool_t -fast_dest_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter) +typedef uint32_t (* convert_pixel_t) (const uint8_t *row, int x); + +static force_inline void +bits_image_fetch_separable_convolution_affine (pixman_image_t * image, + int offset, + int line, + int width, + uint32_t * buffer, + const uint32_t * mask, + + convert_pixel_t convert_pixel, + pixman_format_code_t format, + pixman_repeat_t repeat_mode) { - pixman_image_t *image = iter->image; + bits_image_t *bits = &image->bits; + pixman_fixed_t *params = image->common.filter_params; + int cwidth = pixman_fixed_to_int (params[0]); + int cheight = pixman_fixed_to_int (params[1]); + int x_off = ((cwidth << 16) - pixman_fixed_1) >> 1; + int y_off = ((cheight << 16) - pixman_fixed_1) >> 1; + int x_phase_bits = pixman_fixed_to_int (params[2]); + int y_phase_bits = pixman_fixed_to_int (params[3]); + int x_phase_shift = 16 - x_phase_bits; + int y_phase_shift = 16 - y_phase_bits; + pixman_fixed_t vx, vy; + pixman_fixed_t ux, uy; + pixman_vector_t v; + int k; - if (iter->iter_flags & ITER_16) - return FALSE; + /* reference point is the center of the pixel */ + v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2; + v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2; + v.vector[2] = pixman_fixed_1; + + if (!pixman_transform_point_3d (image->common.transform, &v)) + return; + + ux = image->common.transform->matrix[0][0]; + uy = image->common.transform->matrix[1][0]; - if ((iter->iter_flags & ITER_NARROW) && - (iter->image_flags & FAST_PATH_STD_DEST_FLAGS) == FAST_PATH_STD_DEST_FLAGS) + vx = v.vector[0]; + vy = v.vector[1]; + + for (k = 0; k < width; ++k) { - const fetcher_info_t *f; + pixman_fixed_t *y_params; + int satot, srtot, sgtot, sbtot; + pixman_fixed_t x, y; + int32_t x1, x2, y1, y2; + int32_t px, py; + int i, j; + + if (mask && !mask[k]) + goto next; + + /* Round x and y to the middle of the closest phase before continuing. This + * ensures that the convolution matrix is aligned right, since it was + * positioned relative to a particular phase (and not relative to whatever + * exact fraction we happen to get here). + */ + x = ((vx >> x_phase_shift) << x_phase_shift) + ((1 << x_phase_shift) >> 1); + y = ((vy >> y_phase_shift) << y_phase_shift) + ((1 << y_phase_shift) >> 1); + + px = (x & 0xffff) >> x_phase_shift; + py = (y & 0xffff) >> y_phase_shift; + + x1 = pixman_fixed_to_int (x - pixman_fixed_e - x_off); + y1 = pixman_fixed_to_int (y - pixman_fixed_e - y_off); + x2 = x1 + cwidth; + y2 = y1 + cheight; - for (f = &fetchers[0]; f->format != PIXMAN_null; f++) + satot = srtot = sgtot = sbtot = 0; + + y_params = params + 4 + (1 << x_phase_bits) * cwidth + py * cheight; + + for (i = y1; i < y2; ++i) { - if (image->common.extended_format_code == f->format) - { - uint8_t *b = (uint8_t *)image->bits.bits; - int s = image->bits.rowstride * 4; + pixman_fixed_t fy = *y_params++; - iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (f->format) / 8; - iter->stride = s; + if (fy) + { + pixman_fixed_t *x_params = params + 4 + px * cwidth; - if ((iter->iter_flags & (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA)) == - (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA)) - { - iter->get_scanline = fast_dest_fetch_noop; - } - else + for (j = x1; j < x2; ++j) { - iter->get_scanline = f->get_scanline; + pixman_fixed_t fx = *x_params++; + int rx = j; + int ry = i; + + if (fx) + { + pixman_fixed_t f; + uint32_t pixel, mask; + uint8_t *row; + + mask = PIXMAN_FORMAT_A (format)? 0 : 0xff000000; + + if (repeat_mode != PIXMAN_REPEAT_NONE) + { + repeat (repeat_mode, &rx, bits->width); + repeat (repeat_mode, &ry, bits->height); + + row = (uint8_t *)(bits->bits + bits->rowstride * ry); + pixel = convert_pixel (row, rx) | mask; + } + else + { + if (rx < 0 || ry < 0 || rx >= bits->width || ry >= bits->height) + { + pixel = 0; + } + else + { + row = (uint8_t *)(bits->bits + bits->rowstride * ry); + pixel = convert_pixel (row, rx) | mask; + } + } + + f = ((pixman_fixed_32_32_t)fx * fy + 0x8000) >> 16; + srtot += (int)RED_8 (pixel) * f; + sgtot += (int)GREEN_8 (pixel) * f; + sbtot += (int)BLUE_8 (pixel) * f; + satot += (int)ALPHA_8 (pixel) * f; + } } - iter->write_back = f->write_back; - return TRUE; } } + + satot = (satot + 0x8000) >> 16; + srtot = (srtot + 0x8000) >> 16; + sgtot = (sgtot + 0x8000) >> 16; + sbtot = (sbtot + 0x8000) >> 16; + + satot = CLIP (satot, 0, 0xff); + srtot = CLIP (srtot, 0, 0xff); + sgtot = CLIP (sgtot, 0, 0xff); + sbtot = CLIP (sbtot, 0, 0xff); + + buffer[k] = (satot << 24) | (srtot << 16) | (sgtot << 8) | (sbtot << 0); + + next: + vx += ux; + vy += uy; + } +} + +static const uint8_t zero[8] = { 0, 0, 0, 0, 0, 0, 0, 0 }; + +static force_inline void +bits_image_fetch_bilinear_affine (pixman_image_t * image, + int offset, + int line, + int width, + uint32_t * buffer, + const uint32_t * mask, + + convert_pixel_t convert_pixel, + pixman_format_code_t format, + pixman_repeat_t repeat_mode) +{ + pixman_fixed_t x, y; + pixman_fixed_t ux, uy; + pixman_vector_t v; + bits_image_t *bits = &image->bits; + int i; + + /* reference point is the center of the pixel */ + v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2; + v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2; + v.vector[2] = pixman_fixed_1; + + if (!pixman_transform_point_3d (image->common.transform, &v)) + return; + + ux = image->common.transform->matrix[0][0]; + uy = image->common.transform->matrix[1][0]; + + x = v.vector[0]; + y = v.vector[1]; + + for (i = 0; i < width; ++i) + { + int x1, y1, x2, y2; + uint32_t tl, tr, bl, br; + int32_t distx, disty; + int width = image->bits.width; + int height = image->bits.height; + const uint8_t *row1; + const uint8_t *row2; + + if (mask && !mask[i]) + goto next; + + x1 = x - pixman_fixed_1 / 2; + y1 = y - pixman_fixed_1 / 2; + + distx = pixman_fixed_to_bilinear_weight (x1); + disty = pixman_fixed_to_bilinear_weight (y1); + + y1 = pixman_fixed_to_int (y1); + y2 = y1 + 1; + x1 = pixman_fixed_to_int (x1); + x2 = x1 + 1; + + if (repeat_mode != PIXMAN_REPEAT_NONE) + { + uint32_t mask; + + mask = PIXMAN_FORMAT_A (format)? 0 : 0xff000000; + + repeat (repeat_mode, &x1, width); + repeat (repeat_mode, &y1, height); + repeat (repeat_mode, &x2, width); + repeat (repeat_mode, &y2, height); + + row1 = (uint8_t *)(bits->bits + bits->rowstride * y1); + row2 = (uint8_t *)(bits->bits + bits->rowstride * y2); + + tl = convert_pixel (row1, x1) | mask; + tr = convert_pixel (row1, x2) | mask; + bl = convert_pixel (row2, x1) | mask; + br = convert_pixel (row2, x2) | mask; + } + else + { + uint32_t mask1, mask2; + int bpp; + + /* Note: PIXMAN_FORMAT_BPP() returns an unsigned value, + * which means if you use it in expressions, those + * expressions become unsigned themselves. Since + * the variables below can be negative in some cases, + * that will lead to crashes on 64 bit architectures. + * + * So this line makes sure bpp is signed + */ + bpp = PIXMAN_FORMAT_BPP (format); + + if (x1 >= width || x2 < 0 || y1 >= height || y2 < 0) + { + buffer[i] = 0; + goto next; + } + + if (y2 == 0) + { + row1 = zero; + mask1 = 0; + } + else + { + row1 = (uint8_t *)(bits->bits + bits->rowstride * y1); + row1 += bpp / 8 * x1; + + mask1 = PIXMAN_FORMAT_A (format)? 0 : 0xff000000; + } + + if (y1 == height - 1) + { + row2 = zero; + mask2 = 0; + } + else + { + row2 = (uint8_t *)(bits->bits + bits->rowstride * y2); + row2 += bpp / 8 * x1; + + mask2 = PIXMAN_FORMAT_A (format)? 0 : 0xff000000; + } + + if (x2 == 0) + { + tl = 0; + bl = 0; + } + else + { + tl = convert_pixel (row1, 0) | mask1; + bl = convert_pixel (row2, 0) | mask2; + } + + if (x1 == width - 1) + { + tr = 0; + br = 0; + } + else + { + tr = convert_pixel (row1, 1) | mask1; + br = convert_pixel (row2, 1) | mask2; + } + } + + buffer[i] = bilinear_interpolation ( + tl, tr, bl, br, distx, disty); + + next: + x += ux; + y += uy; } - return FALSE; } +static force_inline void +bits_image_fetch_nearest_affine (pixman_image_t * image, + int offset, + int line, + int width, + uint32_t * buffer, + const uint32_t * mask, + + convert_pixel_t convert_pixel, + pixman_format_code_t format, + pixman_repeat_t repeat_mode) +{ + pixman_fixed_t x, y; + pixman_fixed_t ux, uy; + pixman_vector_t v; + bits_image_t *bits = &image->bits; + int i; + + /* reference point is the center of the pixel */ + v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2; + v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2; + v.vector[2] = pixman_fixed_1; + + if (!pixman_transform_point_3d (image->common.transform, &v)) + return; + + ux = image->common.transform->matrix[0][0]; + uy = image->common.transform->matrix[1][0]; + + x = v.vector[0]; + y = v.vector[1]; + + for (i = 0; i < width; ++i) + { + int width, height, x0, y0; + const uint8_t *row; + + if (mask && !mask[i]) + goto next; + + width = image->bits.width; + height = image->bits.height; + x0 = pixman_fixed_to_int (x - pixman_fixed_e); + y0 = pixman_fixed_to_int (y - pixman_fixed_e); + + if (repeat_mode == PIXMAN_REPEAT_NONE && + (y0 < 0 || y0 >= height || x0 < 0 || x0 >= width)) + { + buffer[i] = 0; + } + else + { + uint32_t mask = PIXMAN_FORMAT_A (format)? 0 : 0xff000000; + + if (repeat_mode != PIXMAN_REPEAT_NONE) + { + repeat (repeat_mode, &x0, width); + repeat (repeat_mode, &y0, height); + } + + row = (uint8_t *)(bits->bits + bits->rowstride * y0); + + buffer[i] = convert_pixel (row, x0) | mask; + } + + next: + x += ux; + y += uy; + } +} + +static force_inline uint32_t +convert_a8r8g8b8 (const uint8_t *row, int x) +{ + return *(((uint32_t *)row) + x); +} + +static force_inline uint32_t +convert_x8r8g8b8 (const uint8_t *row, int x) +{ + return *(((uint32_t *)row) + x); +} + +static force_inline uint32_t +convert_a8 (const uint8_t *row, int x) +{ + return (uint32_t) *(row + x) << 24; +} + +static force_inline uint32_t +convert_r5g6b5 (const uint8_t *row, int x) +{ + return convert_0565_to_0888 (*((uint16_t *)row + x)); +} + +#define MAKE_SEPARABLE_CONVOLUTION_FETCHER(name, format, repeat_mode) \ + static uint32_t * \ + bits_image_fetch_separable_convolution_affine_ ## name (pixman_iter_t *iter, \ + const uint32_t * mask) \ + { \ + bits_image_fetch_separable_convolution_affine ( \ + iter->image, \ + iter->x, iter->y++, \ + iter->width, \ + iter->buffer, mask, \ + convert_ ## format, \ + PIXMAN_ ## format, \ + repeat_mode); \ + \ + return iter->buffer; \ + } + +#define MAKE_BILINEAR_FETCHER(name, format, repeat_mode) \ + static uint32_t * \ + bits_image_fetch_bilinear_affine_ ## name (pixman_iter_t *iter, \ + const uint32_t * mask) \ + { \ + bits_image_fetch_bilinear_affine (iter->image, \ + iter->x, iter->y++, \ + iter->width, \ + iter->buffer, mask, \ + convert_ ## format, \ + PIXMAN_ ## format, \ + repeat_mode); \ + return iter->buffer; \ + } + +#define MAKE_NEAREST_FETCHER(name, format, repeat_mode) \ + static uint32_t * \ + bits_image_fetch_nearest_affine_ ## name (pixman_iter_t *iter, \ + const uint32_t * mask) \ + { \ + bits_image_fetch_nearest_affine (iter->image, \ + iter->x, iter->y++, \ + iter->width, \ + iter->buffer, mask, \ + convert_ ## format, \ + PIXMAN_ ## format, \ + repeat_mode); \ + return iter->buffer; \ + } + +#define MAKE_FETCHERS(name, format, repeat_mode) \ + MAKE_NEAREST_FETCHER (name, format, repeat_mode) \ + MAKE_BILINEAR_FETCHER (name, format, repeat_mode) \ + MAKE_SEPARABLE_CONVOLUTION_FETCHER (name, format, repeat_mode) + +MAKE_FETCHERS (pad_a8r8g8b8, a8r8g8b8, PIXMAN_REPEAT_PAD) +MAKE_FETCHERS (none_a8r8g8b8, a8r8g8b8, PIXMAN_REPEAT_NONE) +MAKE_FETCHERS (reflect_a8r8g8b8, a8r8g8b8, PIXMAN_REPEAT_REFLECT) +MAKE_FETCHERS (normal_a8r8g8b8, a8r8g8b8, PIXMAN_REPEAT_NORMAL) +MAKE_FETCHERS (pad_x8r8g8b8, x8r8g8b8, PIXMAN_REPEAT_PAD) +MAKE_FETCHERS (none_x8r8g8b8, x8r8g8b8, PIXMAN_REPEAT_NONE) +MAKE_FETCHERS (reflect_x8r8g8b8, x8r8g8b8, PIXMAN_REPEAT_REFLECT) +MAKE_FETCHERS (normal_x8r8g8b8, x8r8g8b8, PIXMAN_REPEAT_NORMAL) +MAKE_FETCHERS (pad_a8, a8, PIXMAN_REPEAT_PAD) +MAKE_FETCHERS (none_a8, a8, PIXMAN_REPEAT_NONE) +MAKE_FETCHERS (reflect_a8, a8, PIXMAN_REPEAT_REFLECT) +MAKE_FETCHERS (normal_a8, a8, PIXMAN_REPEAT_NORMAL) +MAKE_FETCHERS (pad_r5g6b5, r5g6b5, PIXMAN_REPEAT_PAD) +MAKE_FETCHERS (none_r5g6b5, r5g6b5, PIXMAN_REPEAT_NONE) +MAKE_FETCHERS (reflect_r5g6b5, r5g6b5, PIXMAN_REPEAT_REFLECT) +MAKE_FETCHERS (normal_r5g6b5, r5g6b5, PIXMAN_REPEAT_NORMAL) + +#define IMAGE_FLAGS \ + (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | \ + FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST) + +static const pixman_iter_info_t fast_iters[] = +{ + { PIXMAN_r5g6b5, IMAGE_FLAGS, ITER_NARROW | ITER_SRC, + _pixman_iter_init_bits_stride, fast_fetch_r5g6b5, NULL }, + + { PIXMAN_r5g6b5, FAST_PATH_STD_DEST_FLAGS, + ITER_NARROW | ITER_DEST, + _pixman_iter_init_bits_stride, + fast_fetch_r5g6b5, fast_write_back_r5g6b5 }, + + { PIXMAN_r5g6b5, FAST_PATH_STD_DEST_FLAGS, + ITER_NARROW | ITER_DEST | ITER_IGNORE_RGB | ITER_IGNORE_ALPHA, + _pixman_iter_init_bits_stride, + fast_dest_fetch_noop, fast_write_back_r5g6b5 }, + + { PIXMAN_a8r8g8b8, + (FAST_PATH_STANDARD_FLAGS | + FAST_PATH_SCALE_TRANSFORM | + FAST_PATH_BILINEAR_FILTER | + FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR), + ITER_NARROW | ITER_SRC, + fast_bilinear_cover_iter_init, + NULL, NULL + }, + +#define FAST_BILINEAR_FLAGS \ + (FAST_PATH_NO_ALPHA_MAP | \ + FAST_PATH_NO_ACCESSORS | \ + FAST_PATH_HAS_TRANSFORM | \ + FAST_PATH_AFFINE_TRANSFORM | \ + FAST_PATH_X_UNIT_POSITIVE | \ + FAST_PATH_Y_UNIT_ZERO | \ + FAST_PATH_NONE_REPEAT | \ + FAST_PATH_BILINEAR_FILTER) + + { PIXMAN_a8r8g8b8, + FAST_BILINEAR_FLAGS, + ITER_NARROW | ITER_SRC, + NULL, bits_image_fetch_bilinear_no_repeat_8888, NULL + }, + + { PIXMAN_x8r8g8b8, + FAST_BILINEAR_FLAGS, + ITER_NARROW | ITER_SRC, + NULL, bits_image_fetch_bilinear_no_repeat_8888, NULL + }, + +#define GENERAL_BILINEAR_FLAGS \ + (FAST_PATH_NO_ALPHA_MAP | \ + FAST_PATH_NO_ACCESSORS | \ + FAST_PATH_HAS_TRANSFORM | \ + FAST_PATH_AFFINE_TRANSFORM | \ + FAST_PATH_BILINEAR_FILTER) + +#define GENERAL_NEAREST_FLAGS \ + (FAST_PATH_NO_ALPHA_MAP | \ + FAST_PATH_NO_ACCESSORS | \ + FAST_PATH_HAS_TRANSFORM | \ + FAST_PATH_AFFINE_TRANSFORM | \ + FAST_PATH_NEAREST_FILTER) + +#define GENERAL_SEPARABLE_CONVOLUTION_FLAGS \ + (FAST_PATH_NO_ALPHA_MAP | \ + FAST_PATH_NO_ACCESSORS | \ + FAST_PATH_HAS_TRANSFORM | \ + FAST_PATH_AFFINE_TRANSFORM | \ + FAST_PATH_SEPARABLE_CONVOLUTION_FILTER) + +#define SEPARABLE_CONVOLUTION_AFFINE_FAST_PATH(name, format, repeat) \ + { PIXMAN_ ## format, \ + GENERAL_SEPARABLE_CONVOLUTION_FLAGS | FAST_PATH_ ## repeat ## _REPEAT, \ + ITER_NARROW | ITER_SRC, \ + NULL, bits_image_fetch_separable_convolution_affine_ ## name, NULL \ + }, + +#define BILINEAR_AFFINE_FAST_PATH(name, format, repeat) \ + { PIXMAN_ ## format, \ + GENERAL_BILINEAR_FLAGS | FAST_PATH_ ## repeat ## _REPEAT, \ + ITER_NARROW | ITER_SRC, \ + NULL, bits_image_fetch_bilinear_affine_ ## name, NULL, \ + }, + +#define NEAREST_AFFINE_FAST_PATH(name, format, repeat) \ + { PIXMAN_ ## format, \ + GENERAL_NEAREST_FLAGS | FAST_PATH_ ## repeat ## _REPEAT, \ + ITER_NARROW | ITER_SRC, \ + NULL, bits_image_fetch_nearest_affine_ ## name, NULL \ + }, + +#define AFFINE_FAST_PATHS(name, format, repeat) \ + NEAREST_AFFINE_FAST_PATH(name, format, repeat) \ + BILINEAR_AFFINE_FAST_PATH(name, format, repeat) \ + SEPARABLE_CONVOLUTION_AFFINE_FAST_PATH(name, format, repeat) + + AFFINE_FAST_PATHS (pad_a8r8g8b8, a8r8g8b8, PAD) + AFFINE_FAST_PATHS (none_a8r8g8b8, a8r8g8b8, NONE) + AFFINE_FAST_PATHS (reflect_a8r8g8b8, a8r8g8b8, REFLECT) + AFFINE_FAST_PATHS (normal_a8r8g8b8, a8r8g8b8, NORMAL) + AFFINE_FAST_PATHS (pad_x8r8g8b8, x8r8g8b8, PAD) + AFFINE_FAST_PATHS (none_x8r8g8b8, x8r8g8b8, NONE) + AFFINE_FAST_PATHS (reflect_x8r8g8b8, x8r8g8b8, REFLECT) + AFFINE_FAST_PATHS (normal_x8r8g8b8, x8r8g8b8, NORMAL) + AFFINE_FAST_PATHS (pad_a8, a8, PAD) + AFFINE_FAST_PATHS (none_a8, a8, NONE) + AFFINE_FAST_PATHS (reflect_a8, a8, REFLECT) + AFFINE_FAST_PATHS (normal_a8, a8, NORMAL) + AFFINE_FAST_PATHS (pad_r5g6b5, r5g6b5, PAD) + AFFINE_FAST_PATHS (none_r5g6b5, r5g6b5, NONE) + AFFINE_FAST_PATHS (reflect_r5g6b5, r5g6b5, REFLECT) + AFFINE_FAST_PATHS (normal_r5g6b5, r5g6b5, NORMAL) + + { PIXMAN_null }, +}; pixman_implementation_t * _pixman_implementation_create_fast_path (pixman_implementation_t *fallback) @@ -2583,8 +3288,7 @@ _pixman_implementation_create_fast_path (pixman_implementation_t *fallback) pixman_implementation_t *imp = _pixman_implementation_create (fallback, c_fast_paths); imp->fill = fast_path_fill; - imp->src_iter_init = fast_src_iter_init; - imp->dest_iter_init = fast_dest_iter_init; + imp->iter_info = fast_iters; return imp; } diff --git a/gfx/cairo/libpixman/src/pixman-fast-path.h b/gfx/cairo/libpixman/src/pixman-fast-path.h deleted file mode 100644 index 1885d47e77..0000000000 --- a/gfx/cairo/libpixman/src/pixman-fast-path.h +++ /dev/null @@ -1,1022 +0,0 @@ -/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */ -/* - * Copyright © 2000 SuSE, Inc. - * Copyright © 2007 Red Hat, Inc. - * - * Permission to use, copy, modify, distribute, and sell this software and its - * documentation for any purpose is hereby granted without fee, provided that - * the above copyright notice appear in all copies and that both that - * copyright notice and this permission notice appear in supporting - * documentation, and that the name of SuSE not be used in advertising or - * publicity pertaining to distribution of the software without specific, - * written prior permission. SuSE makes no representations about the - * suitability of this software for any purpose. It is provided "as is" - * without express or implied warranty. - * - * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE - * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION - * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN - * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - * - * Author: Keith Packard, SuSE, Inc. - */ - -#ifndef PIXMAN_FAST_PATH_H__ -#define PIXMAN_FAST_PATH_H__ - -#include "pixman-private.h" - -#define PIXMAN_REPEAT_COVER -1 - -static force_inline pixman_bool_t -repeat (pixman_repeat_t repeat, int *c, int size) -{ - if (repeat == PIXMAN_REPEAT_NONE) - { - if (*c < 0 || *c >= size) - return FALSE; - } - else if (repeat == PIXMAN_REPEAT_NORMAL) - { - while (*c >= size) - *c -= size; - while (*c < 0) - *c += size; - } - else if (repeat == PIXMAN_REPEAT_PAD) - { - *c = CLIP (*c, 0, size - 1); - } - else /* REFLECT */ - { - *c = MOD (*c, size * 2); - if (*c >= size) - *c = size * 2 - *c - 1; - } - return TRUE; -} - -/* - * For each scanline fetched from source image with PAD repeat: - * - calculate how many pixels need to be padded on the left side - * - calculate how many pixels need to be padded on the right side - * - update width to only count pixels which are fetched from the image - * All this information is returned via 'width', 'left_pad', 'right_pad' - * arguments. The code is assuming that 'unit_x' is positive. - * - * Note: 64-bit math is used in order to avoid potential overflows, which - * is probably excessive in many cases. This particular function - * may need its own correctness test and performance tuning. - */ -static force_inline void -pad_repeat_get_scanline_bounds (int32_t source_image_width, - pixman_fixed_t vx, - pixman_fixed_t unit_x, - int32_t * width, - int32_t * left_pad, - int32_t * right_pad) -{ - int64_t max_vx = (int64_t) source_image_width << 16; - int64_t tmp; - if (vx < 0) - { - tmp = ((int64_t) unit_x - 1 - vx) / unit_x; - if (tmp > *width) - { - *left_pad = *width; - *width = 0; - } - else - { - *left_pad = (int32_t) tmp; - *width -= (int32_t) tmp; - } - } - else - { - *left_pad = 0; - } - tmp = ((int64_t) unit_x - 1 - vx + max_vx) / unit_x - *left_pad; - if (tmp < 0) - { - *right_pad = *width; - *width = 0; - } - else if (tmp >= *width) - { - *right_pad = 0; - } - else - { - *right_pad = *width - (int32_t) tmp; - *width = (int32_t) tmp; - } -} - -/* A macroified version of specialized nearest scalers for some - * common 8888 and 565 formats. It supports SRC and OVER ops. - * - * There are two repeat versions, one that handles repeat normal, - * and one without repeat handling that only works if the src region - * used is completely covered by the pre-repeated source samples. - * - * The loops are unrolled to process two pixels per iteration for better - * performance on most CPU architectures (superscalar processors - * can issue several operations simultaneously, other processors can hide - * instructions latencies by pipelining operations). Unrolling more - * does not make much sense because the compiler will start running out - * of spare registers soon. - */ - -#define GET_8888_ALPHA(s) ((s) >> 24) - /* This is not actually used since we don't have an OVER with - 565 source, but it is needed to build. */ -#define GET_0565_ALPHA(s) 0xff - -#define FAST_NEAREST_SCANLINE(scanline_func_name, SRC_FORMAT, DST_FORMAT, \ - src_type_t, dst_type_t, OP, repeat_mode) \ -static force_inline void \ -scanline_func_name (dst_type_t *dst, \ - const src_type_t *src, \ - int32_t w, \ - pixman_fixed_t vx, \ - pixman_fixed_t unit_x, \ - pixman_fixed_t max_vx, \ - pixman_bool_t fully_transparent_src) \ -{ \ - uint32_t d; \ - src_type_t s1, s2; \ - uint8_t a1, a2; \ - int x1, x2; \ - \ - if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER && fully_transparent_src) \ - return; \ - \ - if (PIXMAN_OP_ ## OP != PIXMAN_OP_SRC && PIXMAN_OP_ ## OP != PIXMAN_OP_OVER) \ - abort(); \ - \ - while ((w -= 2) >= 0) \ - { \ - x1 = vx >> 16; \ - vx += unit_x; \ - if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \ - { \ - /* This works because we know that unit_x is positive */ \ - while (vx >= max_vx) \ - vx -= max_vx; \ - } \ - s1 = src[x1]; \ - \ - x2 = vx >> 16; \ - vx += unit_x; \ - if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \ - { \ - /* This works because we know that unit_x is positive */ \ - while (vx >= max_vx) \ - vx -= max_vx; \ - } \ - s2 = src[x2]; \ - \ - if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER) \ - { \ - a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1); \ - a2 = GET_ ## SRC_FORMAT ## _ALPHA(s2); \ - \ - if (a1 == 0xff) \ - { \ - *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \ - } \ - else if (s1) \ - { \ - d = CONVERT_ ## DST_FORMAT ## _TO_8888 (*dst); \ - s1 = CONVERT_ ## SRC_FORMAT ## _TO_8888 (s1); \ - a1 ^= 0xff; \ - UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1); \ - *dst = CONVERT_8888_TO_ ## DST_FORMAT (d); \ - } \ - dst++; \ - \ - if (a2 == 0xff) \ - { \ - *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s2); \ - } \ - else if (s2) \ - { \ - d = CONVERT_## DST_FORMAT ## _TO_8888 (*dst); \ - s2 = CONVERT_## SRC_FORMAT ## _TO_8888 (s2); \ - a2 ^= 0xff; \ - UN8x4_MUL_UN8_ADD_UN8x4 (d, a2, s2); \ - *dst = CONVERT_8888_TO_ ## DST_FORMAT (d); \ - } \ - dst++; \ - } \ - else /* PIXMAN_OP_SRC */ \ - { \ - *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \ - *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s2); \ - } \ - } \ - \ - if (w & 1) \ - { \ - x1 = vx >> 16; \ - s1 = src[x1]; \ - \ - if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER) \ - { \ - a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1); \ - \ - if (a1 == 0xff) \ - { \ - *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \ - } \ - else if (s1) \ - { \ - d = CONVERT_## DST_FORMAT ## _TO_8888 (*dst); \ - s1 = CONVERT_ ## SRC_FORMAT ## _TO_8888 (s1); \ - a1 ^= 0xff; \ - UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1); \ - *dst = CONVERT_8888_TO_ ## DST_FORMAT (d); \ - } \ - dst++; \ - } \ - else /* PIXMAN_OP_SRC */ \ - { \ - *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \ - } \ - } \ -} - -#define FAST_NEAREST_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, mask_type_t, \ - dst_type_t, repeat_mode, have_mask, mask_is_solid) \ -static void \ -fast_composite_scaled_nearest ## scale_func_name (pixman_implementation_t *imp, \ - pixman_op_t op, \ - pixman_image_t * src_image, \ - pixman_image_t * mask_image, \ - pixman_image_t * dst_image, \ - int32_t src_x, \ - int32_t src_y, \ - int32_t mask_x, \ - int32_t mask_y, \ - int32_t dst_x, \ - int32_t dst_y, \ - int32_t width, \ - int32_t height) \ -{ \ - dst_type_t *dst_line; \ - mask_type_t *mask_line; \ - src_type_t *src_first_line; \ - int y; \ - pixman_fixed_t max_vx = INT32_MAX; /* suppress uninitialized variable warning */ \ - pixman_fixed_t max_vy; \ - pixman_vector_t v; \ - pixman_fixed_t vx, vy; \ - pixman_fixed_t unit_x, unit_y; \ - int32_t left_pad, right_pad; \ - \ - src_type_t *src; \ - dst_type_t *dst; \ - mask_type_t solid_mask; \ - const mask_type_t *mask = &solid_mask; \ - int src_stride, mask_stride, dst_stride; \ - \ - PIXMAN_IMAGE_GET_LINE (dst_image, dst_x, dst_y, dst_type_t, dst_stride, dst_line, 1); \ - if (have_mask) \ - { \ - if (mask_is_solid) \ - solid_mask = _pixman_image_get_solid (imp, mask_image, dst_image->bits.format); \ - else \ - PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type_t, \ - mask_stride, mask_line, 1); \ - } \ - /* pass in 0 instead of src_x and src_y because src_x and src_y need to be \ - * transformed from destination space to source space */ \ - PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1); \ - \ - /* reference point is the center of the pixel */ \ - v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2; \ - v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2; \ - v.vector[2] = pixman_fixed_1; \ - \ - if (!pixman_transform_point_3d (src_image->common.transform, &v)) \ - return; \ - \ - unit_x = src_image->common.transform->matrix[0][0]; \ - unit_y = src_image->common.transform->matrix[1][1]; \ - \ - /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */ \ - v.vector[0] -= pixman_fixed_e; \ - v.vector[1] -= pixman_fixed_e; \ - \ - vx = v.vector[0]; \ - vy = v.vector[1]; \ - \ - if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \ - { \ - /* Clamp repeating positions inside the actual samples */ \ - max_vx = src_image->bits.width << 16; \ - max_vy = src_image->bits.height << 16; \ - \ - repeat (PIXMAN_REPEAT_NORMAL, &vx, max_vx); \ - repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); \ - } \ - \ - if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD || \ - PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \ - { \ - pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x, \ - &width, &left_pad, &right_pad); \ - vx += left_pad * unit_x; \ - } \ - \ - while (--height >= 0) \ - { \ - dst = dst_line; \ - dst_line += dst_stride; \ - if (have_mask && !mask_is_solid) \ - { \ - mask = mask_line; \ - mask_line += mask_stride; \ - } \ - \ - y = vy >> 16; \ - vy += unit_y; \ - if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \ - repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); \ - if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \ - { \ - repeat (PIXMAN_REPEAT_PAD, &y, src_image->bits.height); \ - src = src_first_line + src_stride * y; \ - if (left_pad > 0) \ - { \ - scanline_func (mask, dst, src, left_pad, 0, 0, 0, FALSE); \ - } \ - if (width > 0) \ - { \ - scanline_func (mask + (mask_is_solid ? 0 : left_pad), \ - dst + left_pad, src, width, vx, unit_x, 0, FALSE); \ - } \ - if (right_pad > 0) \ - { \ - scanline_func (mask + (mask_is_solid ? 0 : left_pad + width), \ - dst + left_pad + width, src + src_image->bits.width - 1, \ - right_pad, 0, 0, 0, FALSE); \ - } \ - } \ - else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \ - { \ - static const src_type_t zero[1] = { 0 }; \ - if (y < 0 || y >= src_image->bits.height) \ - { \ - scanline_func (mask, dst, zero, left_pad + width + right_pad, 0, 0, 0, TRUE); \ - continue; \ - } \ - src = src_first_line + src_stride * y; \ - if (left_pad > 0) \ - { \ - scanline_func (mask, dst, zero, left_pad, 0, 0, 0, TRUE); \ - } \ - if (width > 0) \ - { \ - scanline_func (mask + (mask_is_solid ? 0 : left_pad), \ - dst + left_pad, src, width, vx, unit_x, 0, FALSE); \ - } \ - if (right_pad > 0) \ - { \ - scanline_func (mask + (mask_is_solid ? 0 : left_pad + width), \ - dst + left_pad + width, zero, right_pad, 0, 0, 0, TRUE); \ - } \ - } \ - else \ - { \ - src = src_first_line + src_stride * y; \ - scanline_func (mask, dst, src, width, vx, unit_x, max_vx, FALSE); \ - } \ - } \ -} - -/* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */ -#define FAST_NEAREST_MAINLOOP_COMMON(scale_func_name, scanline_func, src_type_t, mask_type_t, \ - dst_type_t, repeat_mode, have_mask, mask_is_solid) \ - FAST_NEAREST_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, mask_type_t, \ - dst_type_t, repeat_mode, have_mask, mask_is_solid) - -#define FAST_NEAREST_MAINLOOP_NOMASK(scale_func_name, scanline_func, src_type_t, dst_type_t, \ - repeat_mode) \ - static force_inline void \ - scanline_func##scale_func_name##_wrapper ( \ - const uint8_t *mask, \ - dst_type_t *dst, \ - const src_type_t *src, \ - int32_t w, \ - pixman_fixed_t vx, \ - pixman_fixed_t unit_x, \ - pixman_fixed_t max_vx, \ - pixman_bool_t fully_transparent_src) \ - { \ - scanline_func (dst, src, w, vx, unit_x, max_vx, fully_transparent_src); \ - } \ - FAST_NEAREST_MAINLOOP_INT (scale_func_name, scanline_func##scale_func_name##_wrapper, \ - src_type_t, uint8_t, dst_type_t, repeat_mode, FALSE, FALSE) - -#define FAST_NEAREST_MAINLOOP(scale_func_name, scanline_func, src_type_t, dst_type_t, \ - repeat_mode) \ - FAST_NEAREST_MAINLOOP_NOMASK(_ ## scale_func_name, scanline_func, src_type_t, \ - dst_type_t, repeat_mode) - -#define FAST_NEAREST(scale_func_name, SRC_FORMAT, DST_FORMAT, \ - src_type_t, dst_type_t, OP, repeat_mode) \ - FAST_NEAREST_SCANLINE(scaled_nearest_scanline_ ## scale_func_name ## _ ## OP, \ - SRC_FORMAT, DST_FORMAT, src_type_t, dst_type_t, \ - OP, repeat_mode) \ - FAST_NEAREST_MAINLOOP_NOMASK(_ ## scale_func_name ## _ ## OP, \ - scaled_nearest_scanline_ ## scale_func_name ## _ ## OP, \ - src_type_t, dst_type_t, repeat_mode) - - -#define SCALED_NEAREST_FLAGS \ - (FAST_PATH_SCALE_TRANSFORM | \ - FAST_PATH_NO_ALPHA_MAP | \ - FAST_PATH_NEAREST_FILTER | \ - FAST_PATH_NO_ACCESSORS | \ - FAST_PATH_NARROW_FORMAT) - -#define SIMPLE_NEAREST_FAST_PATH_NORMAL(op,s,d,func) \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, \ - (SCALED_NEAREST_FLAGS | \ - FAST_PATH_NORMAL_REPEAT | \ - FAST_PATH_X_UNIT_POSITIVE), \ - PIXMAN_null, 0, \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \ - } - -#define SIMPLE_NEAREST_FAST_PATH_PAD(op,s,d,func) \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, \ - (SCALED_NEAREST_FLAGS | \ - FAST_PATH_PAD_REPEAT | \ - FAST_PATH_X_UNIT_POSITIVE), \ - PIXMAN_null, 0, \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op, \ - } - -#define SIMPLE_NEAREST_FAST_PATH_NONE(op,s,d,func) \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, \ - (SCALED_NEAREST_FLAGS | \ - FAST_PATH_NONE_REPEAT | \ - FAST_PATH_X_UNIT_POSITIVE), \ - PIXMAN_null, 0, \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_scaled_nearest_ ## func ## _none ## _ ## op, \ - } - -#define SIMPLE_NEAREST_FAST_PATH_COVER(op,s,d,func) \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, \ - SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \ - PIXMAN_null, 0, \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op, \ - } - -#define SIMPLE_NEAREST_A8_MASK_FAST_PATH_NORMAL(op,s,d,func) \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, \ - (SCALED_NEAREST_FLAGS | \ - FAST_PATH_NORMAL_REPEAT | \ - FAST_PATH_X_UNIT_POSITIVE), \ - PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \ - } - -#define SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD(op,s,d,func) \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, \ - (SCALED_NEAREST_FLAGS | \ - FAST_PATH_PAD_REPEAT | \ - FAST_PATH_X_UNIT_POSITIVE), \ - PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op, \ - } - -#define SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE(op,s,d,func) \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, \ - (SCALED_NEAREST_FLAGS | \ - FAST_PATH_NONE_REPEAT | \ - FAST_PATH_X_UNIT_POSITIVE), \ - PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_scaled_nearest_ ## func ## _none ## _ ## op, \ - } - -#define SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER(op,s,d,func) \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, \ - SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \ - PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op, \ - } - -#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL(op,s,d,func) \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, \ - (SCALED_NEAREST_FLAGS | \ - FAST_PATH_NORMAL_REPEAT | \ - FAST_PATH_X_UNIT_POSITIVE), \ - PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \ - } - -#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD(op,s,d,func) \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, \ - (SCALED_NEAREST_FLAGS | \ - FAST_PATH_PAD_REPEAT | \ - FAST_PATH_X_UNIT_POSITIVE), \ - PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op, \ - } - -#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE(op,s,d,func) \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, \ - (SCALED_NEAREST_FLAGS | \ - FAST_PATH_NONE_REPEAT | \ - FAST_PATH_X_UNIT_POSITIVE), \ - PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_scaled_nearest_ ## func ## _none ## _ ## op, \ - } - -#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_COVER(op,s,d,func) \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, \ - SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \ - PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op, \ - } - -/* Prefer the use of 'cover' variant, because it is faster */ -#define SIMPLE_NEAREST_FAST_PATH(op,s,d,func) \ - SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func), \ - SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func), \ - SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func), \ - SIMPLE_NEAREST_FAST_PATH_NORMAL (op,s,d,func) - -#define SIMPLE_NEAREST_A8_MASK_FAST_PATH(op,s,d,func) \ - SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER (op,s,d,func), \ - SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE (op,s,d,func), \ - SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD (op,s,d,func) - -#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH(op,s,d,func) \ - SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_COVER (op,s,d,func), \ - SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE (op,s,d,func), \ - SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD (op,s,d,func) - -/*****************************************************************************/ - -/* - * Identify 5 zones in each scanline for bilinear scaling. Depending on - * whether 2 pixels to be interpolated are fetched from the image itself, - * from the padding area around it or from both image and padding area. - */ -static force_inline void -bilinear_pad_repeat_get_scanline_bounds (int32_t source_image_width, - pixman_fixed_t vx, - pixman_fixed_t unit_x, - int32_t * left_pad, - int32_t * left_tz, - int32_t * width, - int32_t * right_tz, - int32_t * right_pad) -{ - int width1 = *width, left_pad1, right_pad1; - int width2 = *width, left_pad2, right_pad2; - - pad_repeat_get_scanline_bounds (source_image_width, vx, unit_x, - &width1, &left_pad1, &right_pad1); - pad_repeat_get_scanline_bounds (source_image_width, vx + pixman_fixed_1, - unit_x, &width2, &left_pad2, &right_pad2); - - *left_pad = left_pad2; - *left_tz = left_pad1 - left_pad2; - *right_tz = right_pad2 - right_pad1; - *right_pad = right_pad1; - *width -= *left_pad + *left_tz + *right_tz + *right_pad; -} - -/* - * Main loop template for single pass bilinear scaling. It needs to be - * provided with 'scanline_func' which should do the compositing operation. - * The needed function has the following prototype: - * - * scanline_func (dst_type_t * dst, - * const mask_type_ * mask, - * const src_type_t * src_top, - * const src_type_t * src_bottom, - * int32_t width, - * int weight_top, - * int weight_bottom, - * pixman_fixed_t vx, - * pixman_fixed_t unit_x, - * pixman_fixed_t max_vx, - * pixman_bool_t zero_src) - * - * Where: - * dst - destination scanline buffer for storing results - * mask - mask buffer (or single value for solid mask) - * src_top, src_bottom - two source scanlines - * width - number of pixels to process - * weight_top - weight of the top row for interpolation - * weight_bottom - weight of the bottom row for interpolation - * vx - initial position for fetching the first pair of - * pixels from the source buffer - * unit_x - position increment needed to move to the next pair - * of pixels - * max_vx - image size as a fixed point value, can be used for - * implementing NORMAL repeat (when it is supported) - * zero_src - boolean hint variable, which is set to TRUE when - * all source pixels are fetched from zero padding - * zone for NONE repeat - * - * Note: normally the sum of 'weight_top' and 'weight_bottom' is equal to 256, - * but sometimes it may be less than that for NONE repeat when handling - * fuzzy antialiased top or bottom image edges. Also both top and - * bottom weight variables are guaranteed to have value in 0-255 - * range and can fit into unsigned byte or be used with 8-bit SIMD - * multiplication instructions. - */ -#define FAST_BILINEAR_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, mask_type_t, \ - dst_type_t, repeat_mode, have_mask, mask_is_solid) \ -static void \ -fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp, \ - pixman_op_t op, \ - pixman_image_t * src_image, \ - pixman_image_t * mask_image, \ - pixman_image_t * dst_image, \ - int32_t src_x, \ - int32_t src_y, \ - int32_t mask_x, \ - int32_t mask_y, \ - int32_t dst_x, \ - int32_t dst_y, \ - int32_t width, \ - int32_t height) \ -{ \ - dst_type_t *dst_line; \ - mask_type_t *mask_line; \ - src_type_t *src_first_line; \ - int y1, y2; \ - pixman_fixed_t max_vx = INT32_MAX; /* suppress uninitialized variable warning */ \ - pixman_vector_t v; \ - pixman_fixed_t vx, vy; \ - pixman_fixed_t unit_x, unit_y; \ - int32_t left_pad, left_tz, right_tz, right_pad; \ - \ - dst_type_t *dst; \ - mask_type_t solid_mask; \ - const mask_type_t *mask = &solid_mask; \ - int src_stride, mask_stride, dst_stride; \ - \ - PIXMAN_IMAGE_GET_LINE (dst_image, dst_x, dst_y, dst_type_t, dst_stride, dst_line, 1); \ - if (have_mask) \ - { \ - if (mask_is_solid) \ - { \ - solid_mask = _pixman_image_get_solid (imp, mask_image, dst_image->bits.format); \ - mask_stride = 0; \ - } \ - else \ - { \ - PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type_t, \ - mask_stride, mask_line, 1); \ - } \ - } \ - /* pass in 0 instead of src_x and src_y because src_x and src_y need to be \ - * transformed from destination space to source space */ \ - PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1); \ - \ - /* reference point is the center of the pixel */ \ - v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2; \ - v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2; \ - v.vector[2] = pixman_fixed_1; \ - \ - if (!pixman_transform_point_3d (src_image->common.transform, &v)) \ - return; \ - \ - unit_x = src_image->common.transform->matrix[0][0]; \ - unit_y = src_image->common.transform->matrix[1][1]; \ - \ - v.vector[0] -= pixman_fixed_1 / 2; \ - v.vector[1] -= pixman_fixed_1 / 2; \ - \ - vy = v.vector[1]; \ - \ - if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD || \ - PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \ - { \ - bilinear_pad_repeat_get_scanline_bounds (src_image->bits.width, v.vector[0], unit_x, \ - &left_pad, &left_tz, &width, &right_tz, &right_pad); \ - if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \ - { \ - /* PAD repeat does not need special handling for 'transition zones' and */ \ - /* they can be combined with 'padding zones' safely */ \ - left_pad += left_tz; \ - right_pad += right_tz; \ - left_tz = right_tz = 0; \ - } \ - v.vector[0] += left_pad * unit_x; \ - } \ - \ - while (--height >= 0) \ - { \ - int weight1, weight2; \ - dst = dst_line; \ - dst_line += dst_stride; \ - vx = v.vector[0]; \ - if (have_mask && !mask_is_solid) \ - { \ - mask = mask_line; \ - mask_line += mask_stride; \ - } \ - \ - y1 = pixman_fixed_to_int (vy); \ - weight2 = (vy >> 8) & 0xff; \ - if (weight2) \ - { \ - /* normal case, both row weights are in 0-255 range and fit unsigned byte */ \ - y2 = y1 + 1; \ - weight1 = 256 - weight2; \ - } \ - else \ - { \ - /* set both top and bottom row to the same scanline, and weights to 128+128 */ \ - y2 = y1; \ - weight1 = weight2 = 128; \ - } \ - vy += unit_y; \ - if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \ - { \ - src_type_t *src1, *src2; \ - src_type_t buf1[2]; \ - src_type_t buf2[2]; \ - repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height); \ - repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height); \ - src1 = src_first_line + src_stride * y1; \ - src2 = src_first_line + src_stride * y2; \ - \ - if (left_pad > 0) \ - { \ - buf1[0] = buf1[1] = src1[0]; \ - buf2[0] = buf2[1] = src2[0]; \ - scanline_func (dst, mask, \ - buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, FALSE); \ - dst += left_pad; \ - if (have_mask && !mask_is_solid) \ - mask += left_pad; \ - } \ - if (width > 0) \ - { \ - scanline_func (dst, mask, \ - src1, src2, width, weight1, weight2, vx, unit_x, 0, FALSE); \ - dst += width; \ - if (have_mask && !mask_is_solid) \ - mask += width; \ - } \ - if (right_pad > 0) \ - { \ - buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; \ - buf2[0] = buf2[1] = src2[src_image->bits.width - 1]; \ - scanline_func (dst, mask, \ - buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, FALSE); \ - } \ - } \ - else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \ - { \ - src_type_t *src1, *src2; \ - src_type_t buf1[2]; \ - src_type_t buf2[2]; \ - /* handle top/bottom zero padding by just setting weights to 0 if needed */ \ - if (y1 < 0) \ - { \ - weight1 = 0; \ - y1 = 0; \ - } \ - if (y1 >= src_image->bits.height) \ - { \ - weight1 = 0; \ - y1 = src_image->bits.height - 1; \ - } \ - if (y2 < 0) \ - { \ - weight2 = 0; \ - y2 = 0; \ - } \ - if (y2 >= src_image->bits.height) \ - { \ - weight2 = 0; \ - y2 = src_image->bits.height - 1; \ - } \ - src1 = src_first_line + src_stride * y1; \ - src2 = src_first_line + src_stride * y2; \ - \ - if (left_pad > 0) \ - { \ - buf1[0] = buf1[1] = 0; \ - buf2[0] = buf2[1] = 0; \ - scanline_func (dst, mask, \ - buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, TRUE); \ - dst += left_pad; \ - if (have_mask && !mask_is_solid) \ - mask += left_pad; \ - } \ - if (left_tz > 0) \ - { \ - buf1[0] = 0; \ - buf1[1] = src1[0]; \ - buf2[0] = 0; \ - buf2[1] = src2[0]; \ - scanline_func (dst, mask, \ - buf1, buf2, left_tz, weight1, weight2, \ - pixman_fixed_frac (vx), unit_x, 0, FALSE); \ - dst += left_tz; \ - if (have_mask && !mask_is_solid) \ - mask += left_tz; \ - vx += left_tz * unit_x; \ - } \ - if (width > 0) \ - { \ - scanline_func (dst, mask, \ - src1, src2, width, weight1, weight2, vx, unit_x, 0, FALSE); \ - dst += width; \ - if (have_mask && !mask_is_solid) \ - mask += width; \ - vx += width * unit_x; \ - } \ - if (right_tz > 0) \ - { \ - buf1[0] = src1[src_image->bits.width - 1]; \ - buf1[1] = 0; \ - buf2[0] = src2[src_image->bits.width - 1]; \ - buf2[1] = 0; \ - scanline_func (dst, mask, \ - buf1, buf2, right_tz, weight1, weight2, \ - pixman_fixed_frac (vx), unit_x, 0, FALSE); \ - dst += right_tz; \ - if (have_mask && !mask_is_solid) \ - mask += right_tz; \ - } \ - if (right_pad > 0) \ - { \ - buf1[0] = buf1[1] = 0; \ - buf2[0] = buf2[1] = 0; \ - scanline_func (dst, mask, \ - buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, TRUE); \ - } \ - } \ - else \ - { \ - scanline_func (dst, mask, src_first_line + src_stride * y1, \ - src_first_line + src_stride * y2, width, \ - weight1, weight2, vx, unit_x, max_vx, FALSE); \ - } \ - } \ -} - -/* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */ -#define FAST_BILINEAR_MAINLOOP_COMMON(scale_func_name, scanline_func, src_type_t, mask_type_t, \ - dst_type_t, repeat_mode, have_mask, mask_is_solid) \ - FAST_BILINEAR_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, mask_type_t,\ - dst_type_t, repeat_mode, have_mask, mask_is_solid) - -#define SCALED_BILINEAR_FLAGS \ - (FAST_PATH_SCALE_TRANSFORM | \ - FAST_PATH_NO_ALPHA_MAP | \ - FAST_PATH_BILINEAR_FILTER | \ - FAST_PATH_NO_ACCESSORS | \ - FAST_PATH_NARROW_FORMAT) - -#define SIMPLE_BILINEAR_FAST_PATH_PAD(op,s,d,func) \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, \ - (SCALED_BILINEAR_FLAGS | \ - FAST_PATH_PAD_REPEAT | \ - FAST_PATH_X_UNIT_POSITIVE), \ - PIXMAN_null, 0, \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op, \ - } - -#define SIMPLE_BILINEAR_FAST_PATH_NONE(op,s,d,func) \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, \ - (SCALED_BILINEAR_FLAGS | \ - FAST_PATH_NONE_REPEAT | \ - FAST_PATH_X_UNIT_POSITIVE), \ - PIXMAN_null, 0, \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op, \ - } - -#define SIMPLE_BILINEAR_FAST_PATH_COVER(op,s,d,func) \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, \ - SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \ - PIXMAN_null, 0, \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op, \ - } - -#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_PAD(op,s,d,func) \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, \ - (SCALED_BILINEAR_FLAGS | \ - FAST_PATH_PAD_REPEAT | \ - FAST_PATH_X_UNIT_POSITIVE), \ - PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op, \ - } - -#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NONE(op,s,d,func) \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, \ - (SCALED_BILINEAR_FLAGS | \ - FAST_PATH_NONE_REPEAT | \ - FAST_PATH_X_UNIT_POSITIVE), \ - PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op, \ - } - -#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_COVER(op,s,d,func) \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, \ - SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \ - PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op, \ - } - -#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_PAD(op,s,d,func) \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, \ - (SCALED_BILINEAR_FLAGS | \ - FAST_PATH_PAD_REPEAT | \ - FAST_PATH_X_UNIT_POSITIVE), \ - PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op, \ - } - -#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NONE(op,s,d,func) \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, \ - (SCALED_BILINEAR_FLAGS | \ - FAST_PATH_NONE_REPEAT | \ - FAST_PATH_X_UNIT_POSITIVE), \ - PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op, \ - } - -#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_COVER(op,s,d,func) \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, \ - SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \ - PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op, \ - } - -/* Prefer the use of 'cover' variant, because it is faster */ -#define SIMPLE_BILINEAR_FAST_PATH(op,s,d,func) \ - SIMPLE_BILINEAR_FAST_PATH_COVER (op,s,d,func), \ - SIMPLE_BILINEAR_FAST_PATH_NONE (op,s,d,func), \ - SIMPLE_BILINEAR_FAST_PATH_PAD (op,s,d,func) - -#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH(op,s,d,func) \ - SIMPLE_BILINEAR_A8_MASK_FAST_PATH_COVER (op,s,d,func), \ - SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NONE (op,s,d,func), \ - SIMPLE_BILINEAR_A8_MASK_FAST_PATH_PAD (op,s,d,func) - -#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH(op,s,d,func) \ - SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_COVER (op,s,d,func), \ - SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NONE (op,s,d,func), \ - SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_PAD (op,s,d,func) - -#endif diff --git a/gfx/cairo/libpixman/src/pixman-filter.c b/gfx/cairo/libpixman/src/pixman-filter.c index 5ff7b6eaad..5f3b752f9b 100644 --- a/gfx/cairo/libpixman/src/pixman-filter.c +++ b/gfx/cairo/libpixman/src/pixman-filter.c @@ -109,14 +109,16 @@ general_cubic (double x, double B, double C) if (ax < 1) { - return ((12 - 9 * B - 6 * C) * ax * ax * ax + - (-18 + 12 * B + 6 * C) * ax * ax + (6 - 2 * B)) / 6; + return (((12 - 9 * B - 6 * C) * ax + + (-18 + 12 * B + 6 * C)) * ax * ax + + (6 - 2 * B)) / 6; } - else if (ax >= 1 && ax < 2) + else if (ax < 2) { - return ((-B - 6 * C) * ax * ax * ax + - (6 * B + 30 * C) * ax * ax + (-12 * B - 48 * C) * - ax + (8 * B + 24 * C)) / 6; + return ((((-B - 6 * C) * ax + + (6 * B + 30 * C)) * ax + + (-12 * B - 48 * C)) * ax + + (8 * B + 24 * C)) / 6; } else { @@ -141,7 +143,7 @@ static const filter_info_t filters[] = { PIXMAN_KERNEL_BOX, box_kernel, 1.0 }, { PIXMAN_KERNEL_LINEAR, linear_kernel, 2.0 }, { PIXMAN_KERNEL_CUBIC, cubic_kernel, 4.0 }, - { PIXMAN_KERNEL_GAUSSIAN, gaussian_kernel, 6 * SIGMA }, + { PIXMAN_KERNEL_GAUSSIAN, gaussian_kernel, 5.0 }, { PIXMAN_KERNEL_LANCZOS2, lanczos2_kernel, 4.0 }, { PIXMAN_KERNEL_LANCZOS3, lanczos3_kernel, 6.0 }, { PIXMAN_KERNEL_LANCZOS3_STRETCHED, nice_kernel, 8.0 }, @@ -160,18 +162,21 @@ integral (pixman_kernel_t kernel1, double x1, pixman_kernel_t kernel2, double scale, double x2, double width) { - /* If the integration interval crosses zero, break it into - * two separate integrals. This ensures that filters such - * as LINEAR that are not differentiable at 0 will still - * integrate properly. + if (kernel1 == PIXMAN_KERNEL_BOX && kernel2 == PIXMAN_KERNEL_BOX) + { + return width; + } + /* The LINEAR filter is not differentiable at 0, so if the + * integration interval crosses zero, break it into two + * separate integrals. */ - if (x1 < 0 && x1 + width > 0) + else if (kernel1 == PIXMAN_KERNEL_LINEAR && x1 < 0 && x1 + width > 0) { return integral (kernel1, x1, kernel2, scale, x2, - x1) + integral (kernel1, 0, kernel2, scale, x2 - x1, width + x1); } - else if (x2 < 0 && x2 + width > 0) + else if (kernel2 == PIXMAN_KERNEL_LINEAR && x2 < 0 && x2 + width > 0) { return integral (kernel1, x1, kernel2, scale, x2, - x2) + @@ -189,13 +194,19 @@ integral (pixman_kernel_t kernel1, double x1, } else { - /* Integration via Simpson's rule */ -#define N_SEGMENTS 128 + /* Integration via Simpson's rule + * See http://www.intmath.com/integration/6-simpsons-rule.php + * 12 segments (6 cubic approximations) seems to produce best + * result for lanczos3.linear, which was the combination that + * showed the most errors. This makes sense as the lanczos3 + * filter is 6 wide. + */ +#define N_SEGMENTS 12 #define SAMPLE(a1, a2) \ (filters[kernel1].func ((a1)) * filters[kernel2].func ((a2) * scale)) double s = 0.0; - double h = width / (double)N_SEGMENTS; + double h = width / N_SEGMENTS; int i; s = SAMPLE (x1, x2); @@ -204,11 +215,14 @@ integral (pixman_kernel_t kernel1, double x1, { double a1 = x1 + h * i; double a2 = x2 + h * i; + s += 4 * SAMPLE (a1, a2); + } + for (i = 2; i < N_SEGMENTS; i += 2) + { + double a1 = x1 + h * i; + double a2 = x2 + h * i; s += 2 * SAMPLE (a1, a2); - - if (i >= 2 && i < N_SEGMENTS - 1) - s += 4 * SAMPLE (a1, a2); } s += SAMPLE (x1 + width, x2 + width); @@ -217,25 +231,17 @@ integral (pixman_kernel_t kernel1, double x1, } } -static pixman_fixed_t * -create_1d_filter (int *width, +static void +create_1d_filter (int width, pixman_kernel_t reconstruct, pixman_kernel_t sample, double scale, - int n_phases) + int n_phases, + pixman_fixed_t *p) { - pixman_fixed_t *params, *p; double step; - double size; int i; - size = scale * filters[sample].width + filters[reconstruct].width; - *width = ceil (size); - - p = params = malloc (*width * n_phases * sizeof (pixman_fixed_t)); - if (!params) - return NULL; - step = 1.0 / n_phases; for (i = 0; i < n_phases; ++i) @@ -243,15 +249,15 @@ create_1d_filter (int *width, double frac = step / 2.0 + i * step; pixman_fixed_t new_total; int x, x1, x2; - double total; + double total, e; /* Sample convolution of reconstruction and sampling * filter. See rounding.txt regarding the rounding * and sample positions. */ - x1 = ceil (frac - *width / 2.0 - 0.5); - x2 = x1 + *width; + x1 = ceil (frac - width / 2.0 - 0.5); + x2 = x1 + width; total = 0; for (x = x1; x < x2; ++x) @@ -274,29 +280,154 @@ create_1d_filter (int *width, ihigh - ilow); } - total += c; - *p++ = (pixman_fixed_t)(c * 65535.0 + 0.5); + *p = (pixman_fixed_t)floor (c * 65536.0 + 0.5); + total += *p; + p++; } - /* Normalize */ - p -= *width; - total = 1 / total; + /* Normalize, with error diffusion */ + p -= width; + total = 65536.0 / total; new_total = 0; + e = 0.0; for (x = x1; x < x2; ++x) { - pixman_fixed_t t = (*p) * total + 0.5; + double v = (*p) * total + e; + pixman_fixed_t t = floor (v + 0.5); + e = v - t; new_total += t; *p++ = t; } - if (new_total != pixman_fixed_1) - *(p - *width / 2) += (pixman_fixed_1 - new_total); + /* pixman_fixed_e's worth of error may remain; put it + * at the first sample, since that is the only one that + * hasn't had any error diffused into it. + */ + *(p - width) += pixman_fixed_1 - new_total; } +} - return params; + +static int +filter_width (pixman_kernel_t reconstruct, pixman_kernel_t sample, double size) +{ + return ceil (filters[reconstruct].width + size * filters[sample].width); +} + +#ifdef PIXMAN_GNUPLOT + +/* If enable-gnuplot is configured, then you can pipe the output of a + * pixman-using program to gnuplot and get a continuously-updated plot + * of the horizontal filter. This works well with demos/scale to test + * the filter generation. + * + * The plot is all the different subposition filters shuffled + * together. This is misleading in a few cases: + * + * IMPULSE.BOX - goes up and down as the subfilters have different + * numbers of non-zero samples + * IMPULSE.TRIANGLE - somewhat crooked for the same reason + * 1-wide filters - looks triangular, but a 1-wide box would be more + * accurate + */ +static void +gnuplot_filter (int width, int n_phases, const pixman_fixed_t* p) +{ + double step; + int i, j; + int first; + + step = 1.0 / n_phases; + + printf ("set style line 1 lc rgb '#0060ad' lt 1 lw 0.5 pt 7 pi 1 ps 0.5\n"); + printf ("plot [x=%g:%g] '-' with linespoints ls 1\n", -width*0.5, width*0.5); + /* Print a point at the origin so that y==0 line is included: */ + printf ("0 0\n\n"); + + /* The position of the first sample of the phase corresponding to + * frac is given by: + * + * ceil (frac - width / 2.0 - 0.5) + 0.5 - frac + * + * We have to find the frac that minimizes this expression. + * + * For odd widths, we have + * + * ceil (frac - width / 2.0 - 0.5) + 0.5 - frac + * = ceil (frac) + K - frac + * = 1 + K - frac + * + * for some K, so this is minimized when frac is maximized and + * strictly growing with frac. So for odd widths, we can simply + * start at the last phase and go backwards. + * + * For even widths, we have + * + * ceil (frac - width / 2.0 - 0.5) + 0.5 - frac + * = ceil (frac - 0.5) + K - frac + * + * The graph for this function (ignoring K) looks like this: + * + * 0.5 + * | |\ + * | | \ + * | | \ + * 0 | | \ + * |\ | + * | \ | + * | \ | + * -0.5 | \| + * --------------------------------- + * 0 0.5 1 + * + * So in this case we need to start with the phase whose frac is + * less than, but as close as possible to 0.5, then go backwards + * until we hit the first phase, then wrap around to the last + * phase and continue backwards. + * + * Which phase is as close as possible 0.5? The locations of the + * sampling point corresponding to the kth phase is given by + * 1/(2 * n_phases) + k / n_phases: + * + * 1/(2 * n_phases) + k / n_phases = 0.5 + * + * from which it follows that + * + * k = (n_phases - 1) / 2 + * + * rounded down is the phase in question. + */ + if (width & 1) + first = n_phases - 1; + else + first = (n_phases - 1) / 2; + + for (j = 0; j < width; ++j) + { + for (i = 0; i < n_phases; ++i) + { + int phase = first - i; + double frac, pos; + + if (phase < 0) + phase = n_phases + phase; + + frac = step / 2.0 + phase * step; + pos = ceil (frac - width / 2.0 - 0.5) + 0.5 - frac + j; + + printf ("%g %g\n", + pos, + pixman_fixed_to_double (*(p + phase * width + j))); + } + } + + printf ("e\n"); + fflush (stdout); } +#endif + /* Create the parameter list for a SEPARABLE_CONVOLUTION filter * with the given kernels and scale parameters */ @@ -313,38 +444,35 @@ pixman_filter_create_separable_convolution (int *n_values, { double sx = fabs (pixman_fixed_to_double (scale_x)); double sy = fabs (pixman_fixed_to_double (scale_y)); - pixman_fixed_t *horz = NULL, *vert = NULL, *params = NULL; + pixman_fixed_t *params; int subsample_x, subsample_y; int width, height; + width = filter_width (reconstruct_x, sample_x, sx); subsample_x = (1 << subsample_bits_x); - subsample_y = (1 << subsample_bits_y); - horz = create_1d_filter (&width, reconstruct_x, sample_x, sx, subsample_x); - vert = create_1d_filter (&height, reconstruct_y, sample_y, sy, subsample_y); + height = filter_width (reconstruct_y, sample_y, sy); + subsample_y = (1 << subsample_bits_y); - if (!horz || !vert) - goto out; - *n_values = 4 + width * subsample_x + height * subsample_y; params = malloc (*n_values * sizeof (pixman_fixed_t)); if (!params) - goto out; + return NULL; params[0] = pixman_int_to_fixed (width); params[1] = pixman_int_to_fixed (height); params[2] = pixman_int_to_fixed (subsample_bits_x); params[3] = pixman_int_to_fixed (subsample_bits_y); - memcpy (params + 4, horz, - width * subsample_x * sizeof (pixman_fixed_t)); - memcpy (params + 4 + width * subsample_x, vert, - height * subsample_y * sizeof (pixman_fixed_t)); + create_1d_filter (width, reconstruct_x, sample_x, sx, subsample_x, + params + 4); + create_1d_filter (height, reconstruct_y, sample_y, sy, subsample_y, + params + 4 + width * subsample_x); -out: - free (horz); - free (vert); +#ifdef PIXMAN_GNUPLOT + gnuplot_filter(width, subsample_x, params + 4); +#endif return params; } diff --git a/gfx/cairo/libpixman/src/pixman-general.c b/gfx/cairo/libpixman/src/pixman-general.c index 2a551e3a5e..7e5a0d09cc 100644 --- a/gfx/cairo/libpixman/src/pixman-general.c +++ b/gfx/cairo/libpixman/src/pixman-general.c @@ -37,43 +37,47 @@ #include <string.h> #include "pixman-private.h" -static pixman_bool_t -general_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter) +static void +general_iter_init (pixman_iter_t *iter, const pixman_iter_info_t *info) { pixman_image_t *image = iter->image; - if (image->type == LINEAR) - _pixman_linear_gradient_iter_init (image, iter); - else if (image->type == RADIAL) + switch (image->type) + { + case BITS: + if ((iter->iter_flags & ITER_SRC) == ITER_SRC) + _pixman_bits_image_src_iter_init (image, iter); + else + _pixman_bits_image_dest_iter_init (image, iter); + break; + + case LINEAR: + _pixman_linear_gradient_iter_init (image, iter); + break; + + case RADIAL: _pixman_radial_gradient_iter_init (image, iter); - else if (image->type == CONICAL) + break; + + case CONICAL: _pixman_conical_gradient_iter_init (image, iter); - else if (image->type == BITS) - _pixman_bits_image_src_iter_init (image, iter); - else if (image->type == SOLID) + break; + + case SOLID: _pixman_log_error (FUNC, "Solid image not handled by noop"); - else - _pixman_log_error (FUNC, "Pixman bug: unknown image type\n"); + break; - return TRUE; + default: + _pixman_log_error (FUNC, "Pixman bug: unknown image type\n"); + break; + } } -static pixman_bool_t -general_dest_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter) +static const pixman_iter_info_t general_iters[] = { - if (iter->image->type == BITS) - { - _pixman_bits_image_dest_iter_init (iter->image, iter); - - return TRUE; - } - else - { - _pixman_log_error (FUNC, "Trying to write to a non-writable image"); - - return FALSE; - } -} + { PIXMAN_any, 0, 0, general_iter_init, NULL, NULL }, + { PIXMAN_null }, +}; typedef struct op_info_t op_info_t; struct op_info_t @@ -105,62 +109,75 @@ static const op_info_t op_flags[PIXMAN_N_OPERATORS] = #define SCANLINE_BUFFER_LENGTH 8192 +static pixman_bool_t +operator_needs_division (pixman_op_t op) +{ + static const uint8_t needs_division[] = + { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, /* SATURATE */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, /* DISJOINT */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, /* CONJOINT */ + 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, /* blend ops */ + }; + + return needs_division[op]; +} + static void general_composite_rect (pixman_implementation_t *imp, pixman_composite_info_t *info) { PIXMAN_COMPOSITE_ARGS (info); - uint64_t stack_scanline_buffer[(SCANLINE_BUFFER_LENGTH * 3 + 7) / 8]; + uint8_t stack_scanline_buffer[3 * SCANLINE_BUFFER_LENGTH]; uint8_t *scanline_buffer = (uint8_t *) stack_scanline_buffer; uint8_t *src_buffer, *mask_buffer, *dest_buffer; pixman_iter_t src_iter, mask_iter, dest_iter; pixman_combine_32_func_t compose; pixman_bool_t component_alpha; - iter_flags_t narrow, src_iter_flags; - iter_flags_t rgb16; + iter_flags_t width_flag, src_iter_flags; int Bpp; int i; - if ((src_image->common.flags & FAST_PATH_NARROW_FORMAT) && - (!mask_image || mask_image->common.flags & FAST_PATH_NARROW_FORMAT) && - (dest_image->common.flags & FAST_PATH_NARROW_FORMAT)) + if ((src_image->common.flags & FAST_PATH_NARROW_FORMAT) && + (!mask_image || mask_image->common.flags & FAST_PATH_NARROW_FORMAT) && + (dest_image->common.flags & FAST_PATH_NARROW_FORMAT) && + !(operator_needs_division (op)) && + (dest_image->bits.dither == PIXMAN_DITHER_NONE)) { - narrow = ITER_NARROW; + width_flag = ITER_NARROW; Bpp = 4; } else { - narrow = 0; + width_flag = ITER_WIDE; Bpp = 16; } - // XXX: This special casing is bad. Ideally, we'd keep the general code general perhaps - // by having it deal more specifically with different intermediate formats - if ( - (dest_image->common.flags & FAST_PATH_16_FORMAT && (src_image->type == LINEAR || src_image->type == RADIAL)) && - ( op == PIXMAN_OP_SRC || - (op == PIXMAN_OP_OVER && (src_image->common.flags & FAST_PATH_IS_OPAQUE)) - ) - ) { - rgb16 = ITER_16; - } else { - rgb16 = 0; - } +#define ALIGN(addr) \ + ((uint8_t *)((((uintptr_t)(addr)) + 15) & (~15))) + if (width <= 0 || _pixman_multiply_overflows_int (width, Bpp * 3)) + return; - if (width * Bpp > SCANLINE_BUFFER_LENGTH) + if (width * Bpp * 3 > sizeof (stack_scanline_buffer) - 15 * 3) { - scanline_buffer = pixman_malloc_abc (width, 3, Bpp); + scanline_buffer = pixman_malloc_ab_plus_c (width, Bpp * 3, 15 * 3); if (!scanline_buffer) return; + + memset (scanline_buffer, 0, width * Bpp * 3 + 15 * 3); + } + else + { + memset (stack_scanline_buffer, 0, sizeof (stack_scanline_buffer)); } - src_buffer = scanline_buffer; - mask_buffer = src_buffer + width * Bpp; - dest_buffer = mask_buffer + width * Bpp; + src_buffer = ALIGN (scanline_buffer); + mask_buffer = ALIGN (src_buffer + width * Bpp); + dest_buffer = ALIGN (mask_buffer + width * Bpp); - if (!narrow) + if (width_flag == ITER_WIDE) { /* To make sure there aren't any NANs in the buffers */ memset (src_buffer, 0, width * Bpp); @@ -169,11 +186,12 @@ general_composite_rect (pixman_implementation_t *imp, } /* src iter */ - src_iter_flags = narrow | op_flags[op].src | rgb16; + src_iter_flags = width_flag | op_flags[op].src | ITER_SRC; - _pixman_implementation_src_iter_init (imp->toplevel, &src_iter, src_image, - src_x, src_y, width, height, - src_buffer, src_iter_flags, info->src_flags); + _pixman_implementation_iter_init (imp->toplevel, &src_iter, src_image, + src_x, src_y, width, height, + src_buffer, src_iter_flags, + info->src_flags); /* mask iter */ if ((src_iter_flags & (ITER_IGNORE_ALPHA | ITER_IGNORE_RGB)) == @@ -185,23 +203,21 @@ general_composite_rect (pixman_implementation_t *imp, mask_image = NULL; } - component_alpha = - mask_image && - mask_image->common.type == BITS && - mask_image->common.component_alpha && - PIXMAN_FORMAT_RGB (mask_image->bits.format); + component_alpha = mask_image && mask_image->common.component_alpha; - _pixman_implementation_src_iter_init ( - imp->toplevel, &mask_iter, mask_image, mask_x, mask_y, width, height, - mask_buffer, narrow | (component_alpha? 0 : ITER_IGNORE_RGB), info->mask_flags); + _pixman_implementation_iter_init ( + imp->toplevel, &mask_iter, + mask_image, mask_x, mask_y, width, height, mask_buffer, + ITER_SRC | width_flag | (component_alpha? 0 : ITER_IGNORE_RGB), + info->mask_flags); /* dest iter */ - _pixman_implementation_dest_iter_init ( + _pixman_implementation_iter_init ( imp->toplevel, &dest_iter, dest_image, dest_x, dest_y, width, height, - dest_buffer, narrow | op_flags[op].dst | rgb16, info->dest_flags); + dest_buffer, ITER_DEST | width_flag | op_flags[op].dst, info->dest_flags); compose = _pixman_implementation_lookup_combiner ( - imp->toplevel, op, component_alpha, narrow, !!rgb16); + imp->toplevel, op, component_alpha, width_flag != ITER_WIDE); for (i = 0; i < height; ++i) { @@ -216,6 +232,13 @@ general_composite_rect (pixman_implementation_t *imp, dest_iter.write_back (&dest_iter); } + if (src_iter.fini) + src_iter.fini (&src_iter); + if (mask_iter.fini) + mask_iter.fini (&mask_iter); + if (dest_iter.fini) + dest_iter.fini (&dest_iter); + if (scanline_buffer != (uint8_t *) stack_scanline_buffer) free (scanline_buffer); } @@ -231,12 +254,10 @@ _pixman_implementation_create_general (void) { pixman_implementation_t *imp = _pixman_implementation_create (NULL, general_fast_path); - _pixman_setup_combiner_functions_16 (imp); _pixman_setup_combiner_functions_32 (imp); _pixman_setup_combiner_functions_float (imp); - imp->src_iter_init = general_src_iter_init; - imp->dest_iter_init = general_dest_iter_init; + imp->iter_info = general_iters; return imp; } diff --git a/gfx/cairo/libpixman/src/pixman-glyph.c b/gfx/cairo/libpixman/src/pixman-glyph.c index 5a271b64b8..96a349ab47 100644 --- a/gfx/cairo/libpixman/src/pixman-glyph.c +++ b/gfx/cairo/libpixman/src/pixman-glyph.c @@ -391,6 +391,9 @@ box32_intersect (pixman_box32_t *dest, return dest->x2 > dest->x1 && dest->y2 > dest->y1; } +#if defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__) +__attribute__((__force_align_arg_pointer__)) +#endif PIXMAN_EXPORT void pixman_composite_glyphs_no_mask (pixman_op_t op, pixman_image_t *src, @@ -630,6 +633,9 @@ out: * - Trim the mask to the destination clip/image? * - Trim composite region based on sources, when the op ignores 0s. */ +#if defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__) +__attribute__((__force_align_arg_pointer__)) +#endif PIXMAN_EXPORT void pixman_composite_glyphs (pixman_op_t op, pixman_image_t *src, diff --git a/gfx/cairo/libpixman/src/pixman-gradient-walker.c b/gfx/cairo/libpixman/src/pixman-gradient-walker.c index e7e724fa6c..fb7f401dac 100644 --- a/gfx/cairo/libpixman/src/pixman-gradient-walker.c +++ b/gfx/cairo/libpixman/src/pixman-gradient-walker.c @@ -37,11 +37,14 @@ _pixman_gradient_walker_init (pixman_gradient_walker_t *walker, walker->stops = gradient->stops; walker->left_x = 0; walker->right_x = 0x10000; - walker->stepper = 0; - walker->left_ag = 0; - walker->left_rb = 0; - walker->right_ag = 0; - walker->right_rb = 0; + walker->a_s = 0.0f; + walker->a_b = 0.0f; + walker->r_s = 0.0f; + walker->r_b = 0.0f; + walker->g_s = 0.0f; + walker->g_b = 0.0f; + walker->b_s = 0.0f; + walker->b_b = 0.0f; walker->repeat = repeat; walker->need_reset = TRUE; @@ -51,10 +54,13 @@ static void gradient_walker_reset (pixman_gradient_walker_t *walker, pixman_fixed_48_16_t pos) { - int32_t x, left_x, right_x; + int64_t x, left_x, right_x; pixman_color_t *left_c, *right_c; int n, count = walker->num_stops; pixman_gradient_stop_t *stops = walker->stops; + float la, lr, lg, lb; + float ra, rr, rg, rb; + float lx, rx; if (walker->repeat == PIXMAN_REPEAT_NORMAL) { @@ -116,57 +122,143 @@ gradient_walker_reset (pixman_gradient_walker_t *walker, left_c = right_c; } - walker->left_x = left_x; - walker->right_x = right_x; - walker->left_ag = ((left_c->alpha >> 8) << 16) | (left_c->green >> 8); - walker->left_rb = ((left_c->red & 0xff00) << 8) | (left_c->blue >> 8); - walker->right_ag = ((right_c->alpha >> 8) << 16) | (right_c->green >> 8); - walker->right_rb = ((right_c->red & 0xff00) << 8) | (right_c->blue >> 8); + /* The alpha/red/green/blue channels are scaled to be in [0, 1]. + * This ensures that after premultiplication all channels will + * be in the [0, 1] interval. + */ + la = (left_c->alpha * (1.0f/257.0f)); + lr = (left_c->red * (1.0f/257.0f)); + lg = (left_c->green * (1.0f/257.0f)); + lb = (left_c->blue * (1.0f/257.0f)); - if (walker->left_x == walker->right_x || - (walker->left_ag == walker->right_ag && - walker->left_rb == walker->right_rb)) + ra = (right_c->alpha * (1.0f/257.0f)); + rr = (right_c->red * (1.0f/257.0f)); + rg = (right_c->green * (1.0f/257.0f)); + rb = (right_c->blue * (1.0f/257.0f)); + + lx = left_x * (1.0f/65536.0f); + rx = right_x * (1.0f/65536.0f); + + if (FLOAT_IS_ZERO (rx - lx) || left_x == INT32_MIN || right_x == INT32_MAX) { - walker->stepper = 0; + walker->a_s = walker->r_s = walker->g_s = walker->b_s = 0.0f; + walker->a_b = (la + ra) / 510.0f; + walker->r_b = (lr + rr) / 510.0f; + walker->g_b = (lg + rg) / 510.0f; + walker->b_b = (lb + rb) / 510.0f; } else { - int32_t width = right_x - left_x; - walker->stepper = ((1 << 24) + width / 2) / width; + float w_rec = 1.0f / (rx - lx); + + walker->a_b = (la * rx - ra * lx) * w_rec * (1.0f/255.0f); + walker->r_b = (lr * rx - rr * lx) * w_rec * (1.0f/255.0f); + walker->g_b = (lg * rx - rg * lx) * w_rec * (1.0f/255.0f); + walker->b_b = (lb * rx - rb * lx) * w_rec * (1.0f/255.0f); + + walker->a_s = (ra - la) * w_rec * (1.0f/255.0f); + walker->r_s = (rr - lr) * w_rec * (1.0f/255.0f); + walker->g_s = (rg - lg) * w_rec * (1.0f/255.0f); + walker->b_s = (rb - lb) * w_rec * (1.0f/255.0f); } + + walker->left_x = left_x; + walker->right_x = right_x; walker->need_reset = FALSE; } -uint32_t -_pixman_gradient_walker_pixel (pixman_gradient_walker_t *walker, - pixman_fixed_48_16_t x) +static argb_t +pixman_gradient_walker_pixel_float (pixman_gradient_walker_t *walker, + pixman_fixed_48_16_t x) +{ + argb_t f; + float y; + + if (walker->need_reset || x < walker->left_x || x >= walker->right_x) + gradient_walker_reset (walker, x); + + y = x * (1.0f / 65536.0f); + + f.a = walker->a_s * y + walker->a_b; + f.r = f.a * (walker->r_s * y + walker->r_b); + f.g = f.a * (walker->g_s * y + walker->g_b); + f.b = f.a * (walker->b_s * y + walker->b_b); + + return f; +} + +static uint32_t +pixman_gradient_walker_pixel_32 (pixman_gradient_walker_t *walker, + pixman_fixed_48_16_t x) { - int dist, idist; - uint32_t t1, t2, a, color; + argb_t f; + float y; if (walker->need_reset || x < walker->left_x || x >= walker->right_x) gradient_walker_reset (walker, x); - dist = ((int)(x - walker->left_x) * walker->stepper) >> 16; - idist = 256 - dist; + y = x * (1.0f / 65536.0f); - /* combined INTERPOLATE and premultiply */ - t1 = walker->left_rb * idist + walker->right_rb * dist; - t1 = (t1 >> 8) & 0xff00ff; + /* Instead of [0...1] for ARGB, we want [0...255], + * multiply alpha with 255 and the color channels + * also get multiplied by the alpha multiplier. + * + * We don't use pixman_contract_from_float because it causes a 2x + * slowdown to do so, and the values are already normalized, + * so we don't have to worry about values < 0.f or > 1.f + */ + f.a = 255.f * (walker->a_s * y + walker->a_b); + f.r = f.a * (walker->r_s * y + walker->r_b); + f.g = f.a * (walker->g_s * y + walker->g_b); + f.b = f.a * (walker->b_s * y + walker->b_b); - t2 = walker->left_ag * idist + walker->right_ag * dist; - t2 &= 0xff00ff00; + return (((uint32_t)(f.a + .5f) << 24) & 0xff000000) | + (((uint32_t)(f.r + .5f) << 16) & 0x00ff0000) | + (((uint32_t)(f.g + .5f) << 8) & 0x0000ff00) | + (((uint32_t)(f.b + .5f) >> 0) & 0x000000ff); +} - color = t2 & 0xff000000; - a = t2 >> 24; +void +_pixman_gradient_walker_write_narrow (pixman_gradient_walker_t *walker, + pixman_fixed_48_16_t x, + uint32_t *buffer) +{ + *buffer = pixman_gradient_walker_pixel_32 (walker, x); +} - t1 = t1 * a + 0x800080; - t1 = (t1 + ((t1 >> 8) & 0xff00ff)) >> 8; +void +_pixman_gradient_walker_write_wide (pixman_gradient_walker_t *walker, + pixman_fixed_48_16_t x, + uint32_t *buffer) +{ + *(argb_t *)buffer = pixman_gradient_walker_pixel_float (walker, x); +} - t2 = (t2 >> 8) * a + 0x800080; - t2 = (t2 + ((t2 >> 8) & 0xff00ff)); +void +_pixman_gradient_walker_fill_narrow (pixman_gradient_walker_t *walker, + pixman_fixed_48_16_t x, + uint32_t *buffer, + uint32_t *end) +{ + register uint32_t color; - return (color | (t1 & 0xff00ff) | (t2 & 0xff00)); + color = pixman_gradient_walker_pixel_32 (walker, x); + while (buffer < end) + *buffer++ = color; } +void +_pixman_gradient_walker_fill_wide (pixman_gradient_walker_t *walker, + pixman_fixed_48_16_t x, + uint32_t *buffer, + uint32_t *end) +{ + register argb_t color; + argb_t *buffer_wide = (argb_t *)buffer; + argb_t *end_wide = (argb_t *)end; + + color = pixman_gradient_walker_pixel_float (walker, x); + while (buffer_wide < end_wide) + *buffer_wide++ = color; +} diff --git a/gfx/cairo/libpixman/src/pixman-image.c b/gfx/cairo/libpixman/src/pixman-image.c index f43e07f41d..db29ff5b4f 100644 --- a/gfx/cairo/libpixman/src/pixman-image.c +++ b/gfx/cairo/libpixman/src/pixman-image.c @@ -33,25 +33,6 @@ static const pixman_color_t transparent_black = { 0, 0, 0, 0 }; -/** - ** bug 1293598 - clean up every pointer after free to avoid - ** "dereferencing freed memory" problem - **/ -#define PIXMAN_POSION - -static void -free_memory (void** p) -{ -#ifdef PIXMAN_POISON - if (*p) { -#endif - free (*p); -#ifdef PIXMAN_POISON - *p = NULL; - } -#endif -} - static void gradient_property_changed (pixman_image_t *image) { @@ -164,8 +145,8 @@ _pixman_image_fini (pixman_image_t *image) pixman_region32_fini (&common->clip_region); - free_memory (&common->transform); - free_memory (&common->filter_params); + free (common->transform); + free (common->filter_params); if (common->alpha_map) pixman_image_unref ((pixman_image_t *)common->alpha_map); @@ -177,8 +158,7 @@ _pixman_image_fini (pixman_image_t *image) if (image->gradient.stops) { /* See _pixman_init_gradient() for an explanation of the - 1 */ - void *addr = image->gradient.stops - 1; - free_memory (&addr); + free (image->gradient.stops - 1); } /* This will trigger if someone adds a property_changed @@ -189,11 +169,8 @@ _pixman_image_fini (pixman_image_t *image) image->common.property_changed == gradient_property_changed); } - if (image->type == BITS && image->bits.free_me) { - free_memory (&image->bits.free_me); - image->bits.bits = NULL; - } - + if (image->type == BITS && image->bits.free_me) + free (image->bits.free_me); return TRUE; } @@ -233,7 +210,7 @@ pixman_image_unref (pixman_image_t *image) { if (_pixman_image_fini (image)) { - free_memory (&image); + free (image); return TRUE; } @@ -358,37 +335,47 @@ compute_image_info (pixman_image_t *image) { flags |= FAST_PATH_NEAREST_FILTER; } - else if ( - /* affine and integer translation components in matrix ... */ - ((flags & FAST_PATH_AFFINE_TRANSFORM) && - !pixman_fixed_frac (image->common.transform->matrix[0][2] | - image->common.transform->matrix[1][2])) && - ( - /* ... combined with a simple rotation */ - (flags & (FAST_PATH_ROTATE_90_TRANSFORM | - FAST_PATH_ROTATE_180_TRANSFORM | - FAST_PATH_ROTATE_270_TRANSFORM)) || - /* ... or combined with a simple non-rotated translation */ - (image->common.transform->matrix[0][0] == pixman_fixed_1 && - image->common.transform->matrix[1][1] == pixman_fixed_1 && - image->common.transform->matrix[0][1] == 0 && - image->common.transform->matrix[1][0] == 0) - ) - ) + else if (flags & FAST_PATH_AFFINE_TRANSFORM) { - /* FIXME: there are some affine-test failures, showing that - * handling of BILINEAR and NEAREST filter is not quite - * equivalent when getting close to 32K for the translation - * components of the matrix. That's likely some bug, but for - * now just skip BILINEAR->NEAREST optimization in this case. + /* Suppose the transform is + * + * [ t00, t01, t02 ] + * [ t10, t11, t12 ] + * [ 0, 0, 1 ] + * + * and the destination coordinates are (n + 0.5, m + 0.5). Then + * the transformed x coordinate is: + * + * tx = t00 * (n + 0.5) + t01 * (m + 0.5) + t02 + * = t00 * n + t01 * m + t02 + (t00 + t01) * 0.5 + * + * which implies that if t00, t01 and t02 are all integers + * and (t00 + t01) is odd, then tx will be an integer plus 0.5, + * which means a BILINEAR filter will reduce to NEAREST. The same + * applies in the y direction */ - pixman_fixed_t magic_limit = pixman_int_to_fixed (30000); - if (image->common.transform->matrix[0][2] <= magic_limit && - image->common.transform->matrix[1][2] <= magic_limit && - image->common.transform->matrix[0][2] >= -magic_limit && - image->common.transform->matrix[1][2] >= -magic_limit) + pixman_fixed_t (*t)[3] = image->common.transform->matrix; + + if ((pixman_fixed_frac ( + t[0][0] | t[0][1] | t[0][2] | + t[1][0] | t[1][1] | t[1][2]) == 0) && + (pixman_fixed_to_int ( + (t[0][0] + t[0][1]) & (t[1][0] + t[1][1])) % 2) == 1) { - flags |= FAST_PATH_NEAREST_FILTER; + /* FIXME: there are some affine-test failures, showing that + * handling of BILINEAR and NEAREST filter is not quite + * equivalent when getting close to 32K for the translation + * components of the matrix. That's likely some bug, but for + * now just skip BILINEAR->NEAREST optimization in this case. + */ + pixman_fixed_t magic_limit = pixman_int_to_fixed (30000); + if (image->common.transform->matrix[0][2] <= magic_limit && + image->common.transform->matrix[1][2] <= magic_limit && + image->common.transform->matrix[0][2] >= -magic_limit && + image->common.transform->matrix[1][2] >= -magic_limit) + { + flags |= FAST_PATH_NEAREST_FILTER; + } } } break; @@ -483,10 +470,6 @@ compute_image_info (pixman_image_t *image) if (PIXMAN_FORMAT_IS_WIDE (image->bits.format)) flags &= ~FAST_PATH_NARROW_FORMAT; - - if (image->bits.format == PIXMAN_r5g6b5) - flags |= FAST_PATH_16_FORMAT; - break; case RADIAL: @@ -529,8 +512,10 @@ compute_image_info (pixman_image_t *image) break; } - /* Alpha map */ - if (!image->common.alpha_map) + /* Alpha maps are only supported for BITS images, so it's always + * safe to ignore their presense for non-BITS images + */ + if (!image->common.alpha_map || image->type != BITS) { flags |= FAST_PATH_NO_ALPHA_MAP; } @@ -699,6 +684,41 @@ pixman_image_set_repeat (pixman_image_t *image, image_property_changed (image); } +PIXMAN_EXPORT void +pixman_image_set_dither (pixman_image_t *image, + pixman_dither_t dither) +{ + if (image->type == BITS) + { + if (image->bits.dither == dither) + return; + + image->bits.dither = dither; + + image_property_changed (image); + } +} + +PIXMAN_EXPORT void +pixman_image_set_dither_offset (pixman_image_t *image, + int offset_x, + int offset_y) +{ + if (image->type == BITS) + { + if (image->bits.dither_offset_x == offset_x && + image->bits.dither_offset_y == offset_y) + { + return; + } + + image->bits.dither_offset_x = offset_x; + image->bits.dither_offset_y = offset_y; + + image_property_changed (image); + } +} + PIXMAN_EXPORT pixman_bool_t pixman_image_set_filter (pixman_image_t * image, pixman_filter_t filter, @@ -857,6 +877,10 @@ pixman_image_set_accessors (pixman_image_t * image, if (image->type == BITS) { + /* Accessors only work for <= 32 bpp. */ + if (PIXMAN_FORMAT_BPP(image->bits.format) > 32) + return_if_fail (!read_func && !write_func); + image->bits.read_func = read_func; image->bits.write_func = write_func; @@ -936,7 +960,7 @@ _pixman_image_get_solid (pixman_implementation_t *imp, else if (image->bits.format == PIXMAN_x8r8g8b8) result = image->bits.bits[0] | 0xff000000; else if (image->bits.format == PIXMAN_a8) - result = (*(uint8_t *)image->bits.bits) << 24; + result = (uint32_t)(*(uint8_t *)image->bits.bits) << 24; else goto otherwise; } @@ -945,12 +969,15 @@ _pixman_image_get_solid (pixman_implementation_t *imp, pixman_iter_t iter; otherwise: - _pixman_implementation_src_iter_init ( + _pixman_implementation_iter_init ( imp, &iter, image, 0, 0, 1, 1, (uint8_t *)&result, - ITER_NARROW, image->common.flags); + ITER_NARROW | ITER_SRC, image->common.flags); result = *iter.get_scanline (&iter, NULL); + + if (iter.fini) + iter.fini (&iter); } /* If necessary, convert RGB <--> BGR. */ diff --git a/gfx/cairo/libpixman/src/pixman-implementation.c b/gfx/cairo/libpixman/src/pixman-implementation.c index 44d4097852..2c7de4c687 100644 --- a/gfx/cairo/libpixman/src/pixman-implementation.c +++ b/gfx/cairo/libpixman/src/pixman-implementation.c @@ -90,15 +90,7 @@ _pixman_implementation_lookup_composite (pixman_implementation_t *toplevel, /* Check cache for fast paths */ cache = PIXMAN_GET_THREAD_LOCAL (fast_path_cache); - /* Bug 1324130 - For compatibility with Windows XP, we have to use Tls - * functions for the per-thread fast-path cache instead of the safer - * __declspec(thread) mechanism. If the Tls functions fail to set up - * the storage for some reason, cache will end up null here. As a - * temporary workaround, just check that cache is not null before - * using it. The implementation lookup will still function without the - * fast-path cache, however, it will incur a slow linear search. - */ - if (cache) for (i = 0; i < N_CACHED_FAST_PATHS; ++i) + for (i = 0; i < N_CACHED_FAST_PATHS; ++i) { const pixman_fast_path_t *info = &(cache->cache[i].fast_path); @@ -158,12 +150,19 @@ _pixman_implementation_lookup_composite (pixman_implementation_t *toplevel, } /* We should never reach this point */ - _pixman_log_error (FUNC, "No known composite function\n"); + _pixman_log_error ( + FUNC, + "No composite function found\n" + "\n" + "The most likely cause of this is that this system has issues with\n" + "thread local storage\n"); + *out_imp = NULL; *out_func = dummy_composite_rect; + return; update_cache: - if (cache && i) + if (i) { while (i--) cache->cache[i + 1] = cache->cache[i]; @@ -194,8 +193,7 @@ pixman_combine_32_func_t _pixman_implementation_lookup_combiner (pixman_implementation_t *imp, pixman_op_t op, pixman_bool_t component_alpha, - pixman_bool_t narrow, - pixman_bool_t rgb16) + pixman_bool_t narrow) { while (imp) { @@ -219,8 +217,6 @@ _pixman_implementation_lookup_combiner (pixman_implementation_t *imp, f = imp->combine_32_ca[op]; break; } - if (rgb16) - f = (pixman_combine_32_func_t *)imp->combine_16[op]; if (f) return f; @@ -289,18 +285,26 @@ _pixman_implementation_fill (pixman_implementation_t *imp, return FALSE; } -pixman_bool_t -_pixman_implementation_src_iter_init (pixman_implementation_t *imp, - pixman_iter_t *iter, - pixman_image_t *image, - int x, - int y, - int width, - int height, - uint8_t *buffer, - iter_flags_t iter_flags, - uint32_t image_flags) +static uint32_t * +get_scanline_null (pixman_iter_t *iter, const uint32_t *mask) { + return NULL; +} + +void +_pixman_implementation_iter_init (pixman_implementation_t *imp, + pixman_iter_t *iter, + pixman_image_t *image, + int x, + int y, + int width, + int height, + uint8_t *buffer, + iter_flags_t iter_flags, + uint32_t image_flags) +{ + pixman_format_code_t format; + iter->image = image; iter->buffer = (uint32_t *)buffer; iter->x = x; @@ -309,48 +313,40 @@ _pixman_implementation_src_iter_init (pixman_implementation_t *imp, iter->height = height; iter->iter_flags = iter_flags; iter->image_flags = image_flags; + iter->fini = NULL; - while (imp) + if (!iter->image) { - if (imp->src_iter_init && (*imp->src_iter_init) (imp, iter)) - return TRUE; - - imp = imp->fallback; + iter->get_scanline = get_scanline_null; + return; } - return FALSE; -} - -pixman_bool_t -_pixman_implementation_dest_iter_init (pixman_implementation_t *imp, - pixman_iter_t *iter, - pixman_image_t *image, - int x, - int y, - int width, - int height, - uint8_t *buffer, - iter_flags_t iter_flags, - uint32_t image_flags) -{ - iter->image = image; - iter->buffer = (uint32_t *)buffer; - iter->x = x; - iter->y = y; - iter->width = width; - iter->height = height; - iter->iter_flags = iter_flags; - iter->image_flags = image_flags; + format = iter->image->common.extended_format_code; while (imp) { - if (imp->dest_iter_init && (*imp->dest_iter_init) (imp, iter)) - return TRUE; - - imp = imp->fallback; + if (imp->iter_info) + { + const pixman_iter_info_t *info; + + for (info = imp->iter_info; info->format != PIXMAN_null; ++info) + { + if ((info->format == PIXMAN_any || info->format == format) && + (info->image_flags & image_flags) == info->image_flags && + (info->iter_flags & iter_flags) == info->iter_flags) + { + iter->get_scanline = info->get_scanline; + iter->write_back = info->write_back; + + if (info->initializer) + info->initializer (iter, info); + return; + } + } + } + + imp = imp->fallback; } - - return FALSE; } pixman_bool_t @@ -384,6 +380,11 @@ _pixman_disabled (const char *name) return FALSE; } +static const pixman_fast_path_t empty_fast_path[] = +{ + { PIXMAN_OP_NONE } +}; + pixman_implementation_t * _pixman_choose_implementation (void) { @@ -401,5 +402,16 @@ _pixman_choose_implementation (void) imp = _pixman_implementation_create_noop (imp); + if (_pixman_disabled ("wholeops")) + { + pixman_implementation_t *cur; + + /* Disable all whole-operation paths except the general one, + * so that optimized iterators are used as much as possible. + */ + for (cur = imp; cur->fallback; cur = cur->fallback) + cur->fast_paths = empty_fast_path; + } + return imp; } diff --git a/gfx/cairo/libpixman/src/pixman-inlines.h b/gfx/cairo/libpixman/src/pixman-inlines.h index 6d78aa7cb0..f785910f80 100644 --- a/gfx/cairo/libpixman/src/pixman-inlines.h +++ b/gfx/cairo/libpixman/src/pixman-inlines.h @@ -173,34 +173,6 @@ bilinear_interpolation (uint32_t tl, uint32_t tr, #else -#ifdef LOW_QUALITY_INTERPOLATION -/* Based on Filter_32_opaque_portable from Skia */ -static force_inline uint32_t -bilinear_interpolation(uint32_t a00, uint32_t a01, - uint32_t a10, uint32_t a11, - int x, int y) -{ - int xy = x * y; - static const uint32_t mask = 0xff00ff; - - int scale = 256 - 16*y - 16*x + xy; - uint32_t lo = (a00 & mask) * scale; - uint32_t hi = ((a00 >> 8) & mask) * scale; - - scale = 16*x - xy; - lo += (a01 & mask) * scale; - hi += ((a01 >> 8) & mask) * scale; - - scale = 16*y - xy; - lo += (a10 & mask) * scale; - hi += ((a10 >> 8) & mask) * scale; - - lo += (a11 & mask) * xy; - hi += ((a11 >> 8) & mask) * xy; - - return ((lo >> 8) & mask) | (hi & ~mask); -} -#else static force_inline uint32_t bilinear_interpolation (uint32_t tl, uint32_t tr, uint32_t bl, uint32_t br, @@ -246,10 +218,35 @@ bilinear_interpolation (uint32_t tl, uint32_t tr, return r; } -#endif + #endif #endif // BILINEAR_INTERPOLATION_BITS <= 4 +static force_inline argb_t +bilinear_interpolation_float (argb_t tl, argb_t tr, + argb_t bl, argb_t br, + float distx, float disty) +{ + float distxy, distxiy, distixy, distixiy; + argb_t r; + + distxy = distx * disty; + distxiy = distx * (1.f - disty); + distixy = (1.f - distx) * disty; + distixiy = (1.f - distx) * (1.f - disty); + + r.a = tl.a * distixiy + tr.a * distxiy + + bl.a * distixy + br.a * distxy; + r.r = tl.r * distixiy + tr.r * distxiy + + bl.r * distixy + br.r * distxy; + r.g = tl.g * distixiy + tr.g * distxiy + + bl.g * distixy + br.g * distxy; + r.b = tl.b * distixiy + tr.b * distxiy + + bl.b * distixy + br.b * distxy; + + return r; +} + /* * For each scanline fetched from source image with PAD repeat: * - calculate how many pixels need to be padded on the left side @@ -775,7 +772,8 @@ fast_composite_scaled_nearest ## scale_func_name (pixman_implementation_t *imp, #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH(op,s,d,func) \ SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_COVER (op,s,d,func), \ SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE (op,s,d,func), \ - SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD (op,s,d,func) + SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD (op,s,d,func), \ + SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL (op,s,d,func) /*****************************************************************************/ @@ -852,38 +850,8 @@ bilinear_pad_repeat_get_scanline_bounds (int32_t source_image_width, * with 8-bit SIMD multiplication instructions for 8-bit interpolation * precision. */ - -/* Replace a single "scanline_func" with "fetch_func" & "op_func" to allow optional - * two stage processing (bilinear fetch to a temp buffer, followed by unscaled - * combine), "op_func" may be NULL, in this case we keep old behavior. - * This is ugly and gcc issues some warnings, but works. - * - * An advice: clang has much better error reporting than gcc for deeply nested macros. - */ - -#define scanline_func(dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst, \ - scanline_buf, mask, src_top, src_bottom, width, \ - weight_top, weight_bottom, vx, unit_x, max_vx, zero_src) \ - do { \ - if (op_func != NULL) \ - { \ - fetch_func ((void *)scanline_buf, (mask), (src_top), (src_bottom), (width), \ - (weight_top), (weight_bottom), (vx), (unit_x), (max_vx), (zero_src)); \ - ((void (*)(dst_type_t *, const mask_type_t *, const src_type_t *, int)) op_func)\ - ((dst), (mask), (src_type_t *)scanline_buf, (width)); \ - } \ - else \ - { \ - fetch_func ((void*)(dst), (mask), (src_top), (src_bottom), (width), (weight_top), \ - (weight_bottom), (vx), (unit_x), (max_vx), (zero_src)); \ - } \ - } while (0) - - -#define SCANLINE_BUFFER_LENGTH 3072 - -#define FAST_BILINEAR_MAINLOOP_INT(scale_func_name, fetch_func, op_func, src_type_t, \ - mask_type_t, dst_type_t, repeat_mode, flags) \ +#define FAST_BILINEAR_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, mask_type_t, \ + dst_type_t, repeat_mode, flags) \ static void \ fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp, \ pixman_composite_info_t *info) \ @@ -908,9 +876,6 @@ fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp, pixman_fixed_t src_width_fixed; \ int max_x; \ pixman_bool_t need_src_extension; \ - \ - uint64_t stack_scanline_buffer[SCANLINE_BUFFER_LENGTH]; \ - uint8_t *scanline_buffer = (uint8_t *) stack_scanline_buffer; \ \ PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type_t, dst_stride, dst_line, 1); \ if (flags & FLAG_HAVE_SOLID_MASK) \ @@ -983,14 +948,6 @@ fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp, \ src_width_fixed = pixman_int_to_fixed (src_width); \ } \ - \ - if (op_func != NULL && width * sizeof(src_type_t) > sizeof(stack_scanline_buffer)) \ - { \ - scanline_buffer = pixman_malloc_ab (width, sizeof(src_type_t)); \ - \ - if (!scanline_buffer) \ - return; \ - } \ \ while (--height >= 0) \ { \ @@ -1033,18 +990,16 @@ fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp, { \ buf1[0] = buf1[1] = src1[0]; \ buf2[0] = buf2[1] = src2[0]; \ - scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst, \ - scanline_buffer, mask, buf1, buf2, left_pad, weight1, weight2, \ - 0, 0, 0, FALSE); \ + scanline_func (dst, mask, \ + buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, FALSE); \ dst += left_pad; \ if (flags & FLAG_HAVE_NON_SOLID_MASK) \ mask += left_pad; \ } \ if (width > 0) \ { \ - scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst, \ - scanline_buffer, mask, src1, src2, width, weight1, weight2, \ - vx, unit_x, 0, FALSE); \ + scanline_func (dst, mask, \ + src1, src2, width, weight1, weight2, vx, unit_x, 0, FALSE); \ dst += width; \ if (flags & FLAG_HAVE_NON_SOLID_MASK) \ mask += width; \ @@ -1053,9 +1008,8 @@ fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp, { \ buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; \ buf2[0] = buf2[1] = src2[src_image->bits.width - 1]; \ - scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst, \ - scanline_buffer, mask, buf1, buf2, right_pad, weight1, weight2, \ - 0, 0, 0, FALSE); \ + scanline_func (dst, mask, \ + buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, FALSE); \ } \ } \ else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \ @@ -1091,9 +1045,8 @@ fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp, { \ buf1[0] = buf1[1] = 0; \ buf2[0] = buf2[1] = 0; \ - scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst, \ - scanline_buffer, mask, buf1, buf2, left_pad, weight1, weight2, \ - 0, 0, 0, TRUE); \ + scanline_func (dst, mask, \ + buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, TRUE); \ dst += left_pad; \ if (flags & FLAG_HAVE_NON_SOLID_MASK) \ mask += left_pad; \ @@ -1104,8 +1057,8 @@ fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp, buf1[1] = src1[0]; \ buf2[0] = 0; \ buf2[1] = src2[0]; \ - scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst, \ - scanline_buffer, mask, buf1, buf2, left_tz, weight1, weight2, \ + scanline_func (dst, mask, \ + buf1, buf2, left_tz, weight1, weight2, \ pixman_fixed_frac (vx), unit_x, 0, FALSE); \ dst += left_tz; \ if (flags & FLAG_HAVE_NON_SOLID_MASK) \ @@ -1114,9 +1067,8 @@ fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp, } \ if (width > 0) \ { \ - scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst, \ - scanline_buffer, mask, src1, src2, width, weight1, weight2, \ - vx, unit_x, 0, FALSE); \ + scanline_func (dst, mask, \ + src1, src2, width, weight1, weight2, vx, unit_x, 0, FALSE); \ dst += width; \ if (flags & FLAG_HAVE_NON_SOLID_MASK) \ mask += width; \ @@ -1128,8 +1080,8 @@ fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp, buf1[1] = 0; \ buf2[0] = src2[src_image->bits.width - 1]; \ buf2[1] = 0; \ - scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst, \ - scanline_buffer, mask, buf1, buf2, right_tz, weight1, weight2, \ + scanline_func (dst, mask, \ + buf1, buf2, right_tz, weight1, weight2, \ pixman_fixed_frac (vx), unit_x, 0, FALSE); \ dst += right_tz; \ if (flags & FLAG_HAVE_NON_SOLID_MASK) \ @@ -1139,9 +1091,8 @@ fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp, { \ buf1[0] = buf1[1] = 0; \ buf2[0] = buf2[1] = 0; \ - scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst, \ - scanline_buffer, mask, buf1, buf2, right_pad, weight1, weight2, \ - 0, 0, 0, TRUE); \ + scanline_func (dst, mask, \ + buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, TRUE); \ } \ } \ else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \ @@ -1203,8 +1154,7 @@ fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp, if (num_pixels > width_remain) \ num_pixels = width_remain; \ \ - scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, \ - dst, scanline_buffer, mask, buf1, buf2, num_pixels, \ + scanline_func (dst, mask, buf1, buf2, num_pixels, \ weight1, weight2, pixman_fixed_frac(vx), \ unit_x, src_width_fixed, FALSE); \ \ @@ -1233,10 +1183,8 @@ fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp, if (num_pixels > width_remain) \ num_pixels = width_remain; \ \ - scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, \ - dst, scanline_buffer, mask, src_line_top, src_line_bottom, \ - num_pixels, weight1, weight2, vx, unit_x, src_width_fixed, \ - FALSE); \ + scanline_func (dst, mask, src_line_top, src_line_bottom, num_pixels, \ + weight1, weight2, vx, unit_x, src_width_fixed, FALSE); \ \ width_remain -= num_pixels; \ vx += num_pixels * unit_x; \ @@ -1249,21 +1197,17 @@ fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp, } \ else \ { \ - scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst, \ - scanline_buffer, mask, \ - src_first_line + src_stride * y1, \ + scanline_func (dst, mask, src_first_line + src_stride * y1, \ src_first_line + src_stride * y2, width, \ weight1, weight2, vx, unit_x, max_vx, FALSE); \ } \ } \ - if (scanline_buffer != (uint8_t *) stack_scanline_buffer) \ - free (scanline_buffer); \ } /* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */ -#define FAST_BILINEAR_MAINLOOP_COMMON(scale_func_name, fetch_func, op_func, src_type_t, mask_type_t,\ +#define FAST_BILINEAR_MAINLOOP_COMMON(scale_func_name, scanline_func, src_type_t, mask_type_t, \ dst_type_t, repeat_mode, flags) \ - FAST_BILINEAR_MAINLOOP_INT(_ ## scale_func_name, fetch_func, op_func, src_type_t, mask_type_t,\ + FAST_BILINEAR_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, mask_type_t,\ dst_type_t, repeat_mode, flags) #define SCALED_BILINEAR_FLAGS \ diff --git a/gfx/cairo/libpixman/src/pixman-linear-gradient.c b/gfx/cairo/libpixman/src/pixman-linear-gradient.c index f5ba51b604..3f528508a1 100644 --- a/gfx/cairo/libpixman/src/pixman-linear-gradient.c +++ b/gfx/cairo/libpixman/src/pixman-linear-gradient.c @@ -31,8 +31,6 @@ #include <stdlib.h> #include "pixman-private.h" -#include "pixman-dither.h" - static pixman_bool_t linear_gradient_is_horizontal (pixman_image_t *image, int x, @@ -91,8 +89,11 @@ linear_gradient_is_horizontal (pixman_image_t *image, } static uint32_t * -linear_get_scanline_narrow (pixman_iter_t *iter, - const uint32_t *mask) +linear_get_scanline (pixman_iter_t *iter, + const uint32_t *mask, + int Bpp, + pixman_gradient_walker_write_t write_pixel, + pixman_gradient_walker_fill_t fill_pixel) { pixman_image_t *image = iter->image; int x = iter->x; @@ -105,7 +106,7 @@ linear_get_scanline_narrow (pixman_iter_t *iter, pixman_fixed_48_16_t dx, dy; gradient_t *gradient = (gradient_t *)image; linear_gradient_t *linear = (linear_gradient_t *)image; - uint32_t *end = buffer + width; + uint32_t *end = buffer + width * (Bpp / 4); pixman_gradient_walker_t walker; _pixman_gradient_walker_init (&walker, gradient, image->common.repeat); @@ -139,7 +140,7 @@ linear_get_scanline_narrow (pixman_iter_t *iter, if (l == 0 || unit.vector[2] == 0) { /* affine transformation only */ - pixman_fixed_32_32_t t, next_inc; + pixman_fixed_32_32_t t, next_inc; double inc; if (l == 0 || v.vector[2] == 0) @@ -154,7 +155,7 @@ linear_get_scanline_narrow (pixman_iter_t *iter, invden = pixman_fixed_1 * (double) pixman_fixed_1 / (l * (double) v.vector[2]); v2 = v.vector[2] * (1. / pixman_fixed_1); - t = ((dx * v.vector[0] + dy * v.vector[1]) - + t = ((dx * v.vector[0] + dy * v.vector[1]) - (dx * linear->p1.x + dy * linear->p1.y) * v2) * invden; inc = (dx * unit.vector[0] + dy * unit.vector[1]) * invden; } @@ -162,11 +163,7 @@ linear_get_scanline_narrow (pixman_iter_t *iter, if (((pixman_fixed_32_32_t )(inc * width)) == 0) { - register uint32_t color; - - color = _pixman_gradient_walker_pixel (&walker, t); - while (buffer < end) - *buffer++ = color; + fill_pixel (&walker, t, buffer, end); } else { @@ -177,12 +174,11 @@ linear_get_scanline_narrow (pixman_iter_t *iter, { if (!mask || *mask++) { - *buffer = _pixman_gradient_walker_pixel (&walker, - t + next_inc); + write_pixel (&walker, t + next_inc, buffer); } i++; next_inc = inc * i; - buffer++; + buffer += (Bpp / 4); } } } @@ -204,14 +200,14 @@ linear_get_scanline_narrow (pixman_iter_t *iter, invden = pixman_fixed_1 * (double) pixman_fixed_1 / (l * (double) v.vector[2]); v2 = v.vector[2] * (1. / pixman_fixed_1); - t = ((dx * v.vector[0] + dy * v.vector[1]) - + t = ((dx * v.vector[0] + dy * v.vector[1]) - (dx * linear->p1.x + dy * linear->p1.y) * v2) * invden; } - *buffer = _pixman_gradient_walker_pixel (&walker, t); + write_pixel (&walker, t, buffer); } - ++buffer; + buffer += (Bpp / 4); v.vector[0] += unit.vector[0]; v.vector[1] += unit.vector[1]; @@ -225,176 +221,30 @@ linear_get_scanline_narrow (pixman_iter_t *iter, } static uint32_t * -linear_get_scanline_16 (pixman_iter_t *iter, - const uint32_t *mask) +linear_get_scanline_narrow (pixman_iter_t *iter, + const uint32_t *mask) { - pixman_image_t *image = iter->image; - int x = iter->x; - int y = iter->y; - int width = iter->width; - uint16_t * buffer = (uint16_t*)iter->buffer; - pixman_bool_t toggle = ((x ^ y) & 1); - - pixman_vector_t v, unit; - pixman_fixed_32_32_t l; - pixman_fixed_48_16_t dx, dy; - gradient_t *gradient = (gradient_t *)image; - linear_gradient_t *linear = (linear_gradient_t *)image; - uint16_t *end = buffer + width; - pixman_gradient_walker_t walker; - - _pixman_gradient_walker_init (&walker, gradient, image->common.repeat); - - /* reference point is the center of the pixel */ - v.vector[0] = pixman_int_to_fixed (x) + pixman_fixed_1 / 2; - v.vector[1] = pixman_int_to_fixed (y) + pixman_fixed_1 / 2; - v.vector[2] = pixman_fixed_1; - - if (image->common.transform) - { - if (!pixman_transform_point_3d (image->common.transform, &v)) - return iter->buffer; - - unit.vector[0] = image->common.transform->matrix[0][0]; - unit.vector[1] = image->common.transform->matrix[1][0]; - unit.vector[2] = image->common.transform->matrix[2][0]; - } - else - { - unit.vector[0] = pixman_fixed_1; - unit.vector[1] = 0; - unit.vector[2] = 0; - } - - dx = linear->p2.x - linear->p1.x; - dy = linear->p2.y - linear->p1.y; - - l = dx * dx + dy * dy; - - if (l == 0 || unit.vector[2] == 0) - { - /* affine transformation only */ - pixman_fixed_32_32_t t, next_inc; - double inc; - - if (l == 0 || v.vector[2] == 0) - { - t = 0; - inc = 0; - } - else - { - double invden, v2; - - invden = pixman_fixed_1 * (double) pixman_fixed_1 / - (l * (double) v.vector[2]); - v2 = v.vector[2] * (1. / pixman_fixed_1); - t = ((dx * v.vector[0] + dy * v.vector[1]) - - (dx * linear->p1.x + dy * linear->p1.y) * v2) * invden; - inc = (dx * unit.vector[0] + dy * unit.vector[1]) * invden; - } - next_inc = 0; - - if (((pixman_fixed_32_32_t )(inc * width)) == 0) - { - register uint32_t color; - uint16_t dither_diff; - uint16_t color16; - uint16_t color16b; - - color = _pixman_gradient_walker_pixel (&walker, t); - color16 = dither_8888_to_0565(color, toggle); - color16b = dither_8888_to_0565(color, toggle^1); - // compute the difference - dither_diff = color16 ^ color16b; - while (buffer < end) { - *buffer++ = color16; - // use dither_diff to toggle between color16 and color16b - color16 ^= dither_diff; - toggle ^= 1; - } - } - else - { - int i; - - i = 0; - while (buffer < end) - { - if (!mask || *mask++) - { - *buffer = dither_8888_to_0565(_pixman_gradient_walker_pixel (&walker, - t + next_inc), - toggle); - } - toggle ^= 1; - i++; - next_inc = inc * i; - buffer++; - } - } - } - else - { - /* projective transformation */ - double t; - - t = 0; - - while (buffer < end) - { - if (!mask || *mask++) - { - if (v.vector[2] != 0) - { - double invden, v2; - - invden = pixman_fixed_1 * (double) pixman_fixed_1 / - (l * (double) v.vector[2]); - v2 = v.vector[2] * (1. / pixman_fixed_1); - t = ((dx * v.vector[0] + dy * v.vector[1]) - - (dx * linear->p1.x + dy * linear->p1.y) * v2) * invden; - } - - *buffer = dither_8888_to_0565(_pixman_gradient_walker_pixel (&walker, t), - toggle); - } - toggle ^= 1; - - ++buffer; - - v.vector[0] += unit.vector[0]; - v.vector[1] += unit.vector[1]; - v.vector[2] += unit.vector[2]; - } - } - - iter->y++; - - return iter->buffer; + return linear_get_scanline (iter, mask, 4, + _pixman_gradient_walker_write_narrow, + _pixman_gradient_walker_fill_narrow); } + static uint32_t * linear_get_scanline_wide (pixman_iter_t *iter, const uint32_t *mask) { - uint32_t *buffer = linear_get_scanline_narrow (iter, NULL); - - pixman_expand_to_float ( - (argb_t *)buffer, buffer, PIXMAN_a8r8g8b8, iter->width); - - return buffer; + return linear_get_scanline (iter, NULL, 16, + _pixman_gradient_walker_write_wide, + _pixman_gradient_walker_fill_wide); } void _pixman_linear_gradient_iter_init (pixman_image_t *image, pixman_iter_t *iter) { - // XXX: we can't use this optimization when dithering - if (0 && linear_gradient_is_horizontal ( + if (linear_gradient_is_horizontal ( iter->image, iter->x, iter->y, iter->width, iter->height)) { - if (iter->iter_flags & ITER_16) - linear_get_scanline_16 (iter, NULL); - else if (iter->iter_flags & ITER_NARROW) + if (iter->iter_flags & ITER_NARROW) linear_get_scanline_narrow (iter, NULL); else linear_get_scanline_wide (iter, NULL); @@ -403,9 +253,7 @@ _pixman_linear_gradient_iter_init (pixman_image_t *image, pixman_iter_t *iter) } else { - if (iter->iter_flags & ITER_16) - iter->get_scanline = linear_get_scanline_16; - else if (iter->iter_flags & ITER_NARROW) + if (iter->iter_flags & ITER_NARROW) iter->get_scanline = linear_get_scanline_narrow; else iter->get_scanline = linear_get_scanline_wide; diff --git a/gfx/cairo/libpixman/src/pixman-matrix.c b/gfx/cairo/libpixman/src/pixman-matrix.c index 89b96826b8..81b6e613ed 100644 --- a/gfx/cairo/libpixman/src/pixman-matrix.c +++ b/gfx/cairo/libpixman/src/pixman-matrix.c @@ -37,7 +37,7 @@ static force_inline int count_leading_zeros (uint32_t x) { -#ifdef __GNUC__ +#ifdef HAVE_BUILTIN_CLZ return __builtin_clz (x); #else int n = 0; @@ -273,7 +273,7 @@ pixman_transform_point_31_16 (const pixman_transform_t *t, { /* the divisor is small, we can actually keep all the bits */ int64_t hi, rhi, lo, rlo; - int64_t div = (divint << 16) + divfrac; + int64_t div = ((uint64_t)divint << 16) + divfrac; fixed_64_16_to_int128 (tmp[0][0], tmp[0][1], &hi, &lo, 32); rlo = rounded_sdiv_128_by_49 (hi, lo, div, &rhi); diff --git a/gfx/cairo/libpixman/src/pixman-mips-dspr2-asm.S b/gfx/cairo/libpixman/src/pixman-mips-dspr2-asm.S index ddfacef62e..9dad163b79 100644 --- a/gfx/cairo/libpixman/src/pixman-mips-dspr2-asm.S +++ b/gfx/cairo/libpixman/src/pixman-mips-dspr2-asm.S @@ -26,7 +26,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * Author: Nemanja Lukic (nlukic@mips.com) + * Author: Nemanja Lukic (nemanja.lukic@rt-rk.com) */ #include "pixman-private.h" @@ -310,6 +310,516 @@ LEAF_MIPS_DSPR2(pixman_composite_src_x888_8888_asm_mips) END(pixman_composite_src_x888_8888_asm_mips) +#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL) +LEAF_MIPS_DSPR2(pixman_composite_src_0888_8888_rev_asm_mips) +/* + * a0 - dst (a8r8g8b8) + * a1 - src (b8g8r8) + * a2 - w + */ + + beqz a2, 6f + nop + + lui t8, 0xff00; + srl t9, a2, 2 /* t9 = how many multiples of 4 src pixels */ + beqz t9, 4f /* branch if less than 4 src pixels */ + nop + + li t0, 0x1 + li t1, 0x2 + li t2, 0x3 + andi t3, a1, 0x3 + beq t3, t0, 1f + nop + beq t3, t1, 2f + nop + beq t3, t2, 3f + nop + +0: + beqz t9, 4f + addiu t9, t9, -1 + lw t0, 0(a1) /* t0 = R2 | B1 | G1 | R1 */ + lw t1, 4(a1) /* t1 = G3 | R3 | B2 | G2 */ + lw t2, 8(a1) /* t2 = B4 | G4 | R4 | B3 */ + + addiu a1, a1, 12 + addiu a2, a2, -4 + + wsbh t0, t0 /* t0 = B1 | R2 | R1 | G1 */ + wsbh t1, t1 /* t1 = R3 | G3 | G2 | B2 */ + wsbh t2, t2 /* t2 = G4 | B4 | B3 | R4 */ + + packrl.ph t3, t1, t0 /* t3 = G2 | B2 | B1 | R2 */ + packrl.ph t4, t0, t0 /* t4 = R1 | G1 | B1 | R2 */ + rotr t3, t3, 16 /* t3 = B1 | R2 | G2 | B2 */ + or t3, t3, t8 /* t3 = FF | R2 | G2 | B2 */ + srl t4, t4, 8 /* t4 = 0 | R1 | G1 | B1 */ + or t4, t4, t8 /* t4 = FF | R1 | G1 | B1 */ + packrl.ph t5, t2, t1 /* t5 = B3 | R4 | R3 | G3 */ + rotr t5, t5, 24 /* t5 = R4 | R3 | G3 | B3 */ + or t5, t5, t8 /* t5 = FF | R3 | G3 | B3 */ + rotr t2, t2, 16 /* t2 = B3 | R4 | G4 | B4 */ + or t2, t2, t8 /* t5 = FF | R3 | G3 | B3 */ + + sw t4, 0(a0) + sw t3, 4(a0) + sw t5, 8(a0) + sw t2, 12(a0) + b 0b + addiu a0, a0, 16 + +1: + lbu t6, 0(a1) /* t6 = 0 | 0 | 0 | R1 */ + lhu t7, 1(a1) /* t7 = 0 | 0 | B1 | G1 */ + sll t6, t6, 16 /* t6 = 0 | R1 | 0 | 0 */ + wsbh t7, t7 /* t7 = 0 | 0 | G1 | B1 */ + or t7, t6, t7 /* t7 = 0 | R1 | G1 | B1 */ +11: + beqz t9, 4f + addiu t9, t9, -1 + lw t0, 3(a1) /* t0 = R3 | B2 | G2 | R2 */ + lw t1, 7(a1) /* t1 = G4 | R4 | B3 | G3 */ + lw t2, 11(a1) /* t2 = B5 | G5 | R5 | B4 */ + + addiu a1, a1, 12 + addiu a2, a2, -4 + + wsbh t0, t0 /* t0 = B2 | R3 | R2 | G2 */ + wsbh t1, t1 /* t1 = R4 | G4 | G3 | B3 */ + wsbh t2, t2 /* t2 = G5 | B5 | B4 | R5 */ + + packrl.ph t3, t1, t0 /* t3 = G3 | B3 | B2 | R3 */ + packrl.ph t4, t2, t1 /* t4 = B4 | R5 | R4 | G4 */ + rotr t0, t0, 24 /* t0 = R3 | R2 | G2 | B2 */ + rotr t3, t3, 16 /* t3 = B2 | R3 | G3 | B3 */ + rotr t4, t4, 24 /* t4 = R5 | R4 | G4 | B4 */ + or t7, t7, t8 /* t7 = FF | R1 | G1 | B1 */ + or t0, t0, t8 /* t0 = FF | R2 | G2 | B2 */ + or t3, t3, t8 /* t1 = FF | R3 | G3 | B3 */ + or t4, t4, t8 /* t3 = FF | R4 | G4 | B4 */ + + sw t7, 0(a0) + sw t0, 4(a0) + sw t3, 8(a0) + sw t4, 12(a0) + rotr t7, t2, 16 /* t7 = xx | R5 | G5 | B5 */ + b 11b + addiu a0, a0, 16 + +2: + lhu t7, 0(a1) /* t7 = 0 | 0 | G1 | R1 */ + wsbh t7, t7 /* t7 = 0 | 0 | R1 | G1 */ +21: + beqz t9, 4f + addiu t9, t9, -1 + lw t0, 2(a1) /* t0 = B2 | G2 | R2 | B1 */ + lw t1, 6(a1) /* t1 = R4 | B3 | G3 | R3 */ + lw t2, 10(a1) /* t2 = G5 | R5 | B4 | G4 */ + + addiu a1, a1, 12 + addiu a2, a2, -4 + + wsbh t0, t0 /* t0 = G2 | B2 | B1 | R2 */ + wsbh t1, t1 /* t1 = B3 | R4 | R3 | G3 */ + wsbh t2, t2 /* t2 = R5 | G5 | G4 | B4 */ + + precr_sra.ph.w t7, t0, 0 /* t7 = R1 | G1 | B1 | R2 */ + rotr t0, t0, 16 /* t0 = B1 | R2 | G2 | B2 */ + packrl.ph t3, t2, t1 /* t3 = G4 | B4 | B3 | R4 */ + rotr t1, t1, 24 /* t1 = R4 | R3 | G3 | B3 */ + srl t7, t7, 8 /* t7 = 0 | R1 | G1 | B1 */ + rotr t3, t3, 16 /* t3 = B3 | R4 | G4 | B4 */ + or t7, t7, t8 /* t7 = FF | R1 | G1 | B1 */ + or t0, t0, t8 /* t0 = FF | R2 | G2 | B2 */ + or t1, t1, t8 /* t1 = FF | R3 | G3 | B3 */ + or t3, t3, t8 /* t3 = FF | R4 | G4 | B4 */ + + sw t7, 0(a0) + sw t0, 4(a0) + sw t1, 8(a0) + sw t3, 12(a0) + srl t7, t2, 16 /* t7 = 0 | 0 | R5 | G5 */ + b 21b + addiu a0, a0, 16 + +3: + lbu t7, 0(a1) /* t7 = 0 | 0 | 0 | R1 */ +31: + beqz t9, 4f + addiu t9, t9, -1 + lw t0, 1(a1) /* t0 = G2 | R2 | B1 | G1 */ + lw t1, 5(a1) /* t1 = B3 | G3 | R3 | B2 */ + lw t2, 9(a1) /* t2 = R5 | B4 | G4 | R4 */ + + addiu a1, a1, 12 + addiu a2, a2, -4 + + wsbh t0, t0 /* t0 = R2 | G2 | G1 | B1 */ + wsbh t1, t1 /* t1 = G3 | B3 | B2 | R3 */ + wsbh t2, t2 /* t2 = B4 | R5 | R4 | G4 */ + + precr_sra.ph.w t7, t0, 0 /* t7 = xx | R1 | G1 | B1 */ + packrl.ph t3, t1, t0 /* t3 = B2 | R3 | R2 | G2 */ + rotr t1, t1, 16 /* t1 = B2 | R3 | G3 | B3 */ + rotr t4, t2, 24 /* t4 = R5 | R4 | G4 | B4 */ + rotr t3, t3, 24 /* t3 = R3 | R2 | G2 | B2 */ + or t7, t7, t8 /* t7 = FF | R1 | G1 | B1 */ + or t3, t3, t8 /* t3 = FF | R2 | G2 | B2 */ + or t1, t1, t8 /* t1 = FF | R3 | G3 | B3 */ + or t4, t4, t8 /* t4 = FF | R4 | G4 | B4 */ + + sw t7, 0(a0) + sw t3, 4(a0) + sw t1, 8(a0) + sw t4, 12(a0) + srl t7, t2, 16 /* t7 = 0 | 0 | xx | R5 */ + b 31b + addiu a0, a0, 16 + +4: + beqz a2, 6f + nop +5: + lbu t0, 0(a1) /* t0 = 0 | 0 | 0 | R */ + lbu t1, 1(a1) /* t1 = 0 | 0 | 0 | G */ + lbu t2, 2(a1) /* t2 = 0 | 0 | 0 | B */ + addiu a1, a1, 3 + + sll t0, t0, 16 /* t2 = 0 | R | 0 | 0 */ + sll t1, t1, 8 /* t1 = 0 | 0 | G | 0 */ + + or t2, t2, t1 /* t2 = 0 | 0 | G | B */ + or t2, t2, t0 /* t2 = 0 | R | G | B */ + or t2, t2, t8 /* t2 = FF | R | G | B */ + + sw t2, 0(a0) + addiu a2, a2, -1 + bnez a2, 5b + addiu a0, a0, 4 +6: + j ra + nop + +END(pixman_composite_src_0888_8888_rev_asm_mips) + +LEAF_MIPS_DSPR2(pixman_composite_src_0888_0565_rev_asm_mips) +/* + * a0 - dst (r5g6b5) + * a1 - src (b8g8r8) + * a2 - w + */ + + SAVE_REGS_ON_STACK 0, v0, v1 + beqz a2, 6f + nop + + li t6, 0xf800f800 + li t7, 0x07e007e0 + li t8, 0x001F001F + srl t9, a2, 2 /* t9 = how many multiples of 4 src pixels */ + beqz t9, 4f /* branch if less than 4 src pixels */ + nop + + li t0, 0x1 + li t1, 0x2 + li t2, 0x3 + andi t3, a1, 0x3 + beq t3, t0, 1f + nop + beq t3, t1, 2f + nop + beq t3, t2, 3f + nop + +0: + beqz t9, 4f + addiu t9, t9, -1 + lw t0, 0(a1) /* t0 = R2 | B1 | G1 | R1 */ + lw t1, 4(a1) /* t1 = G3 | R3 | B2 | G2 */ + lw t2, 8(a1) /* t2 = B4 | G4 | R4 | B3 */ + + addiu a1, a1, 12 + addiu a2, a2, -4 + + wsbh t0, t0 /* t0 = B1 | R2 | R1 | G1 */ + wsbh t1, t1 /* t1 = R3 | G3 | G2 | B2 */ + wsbh t2, t2 /* t2 = G4 | B4 | B3 | R4 */ + + packrl.ph t3, t1, t0 /* t3 = G2 | B2 | B1 | R2 */ + packrl.ph t4, t0, t0 /* t4 = R1 | G1 | B1 | R2 */ + rotr t3, t3, 16 /* t3 = B1 | R2 | G2 | B2 */ + srl t4, t4, 8 /* t4 = 0 | R1 | G1 | B1 */ + packrl.ph t5, t2, t1 /* t5 = B3 | R4 | R3 | G3 */ + rotr t5, t5, 24 /* t5 = R4 | R3 | G3 | B3 */ + rotr t2, t2, 16 /* t2 = B3 | R4 | G4 | B4 */ + + CONVERT_2x8888_TO_2x0565 t4, t3, t4, t3, t6, t7, t8, v0, v1 + CONVERT_2x8888_TO_2x0565 t5, t2, t5, t2, t6, t7, t8, v0, v1 + + sh t4, 0(a0) + sh t3, 2(a0) + sh t5, 4(a0) + sh t2, 6(a0) + b 0b + addiu a0, a0, 8 + +1: + lbu t4, 0(a1) /* t4 = 0 | 0 | 0 | R1 */ + lhu t5, 1(a1) /* t5 = 0 | 0 | B1 | G1 */ + sll t4, t4, 16 /* t4 = 0 | R1 | 0 | 0 */ + wsbh t5, t5 /* t5 = 0 | 0 | G1 | B1 */ + or t5, t4, t5 /* t5 = 0 | R1 | G1 | B1 */ +11: + beqz t9, 4f + addiu t9, t9, -1 + lw t0, 3(a1) /* t0 = R3 | B2 | G2 | R2 */ + lw t1, 7(a1) /* t1 = G4 | R4 | B3 | G3 */ + lw t2, 11(a1) /* t2 = B5 | G5 | R5 | B4 */ + + addiu a1, a1, 12 + addiu a2, a2, -4 + + wsbh t0, t0 /* t0 = B2 | R3 | R2 | G2 */ + wsbh t1, t1 /* t1 = R4 | G4 | G3 | B3 */ + wsbh t2, t2 /* t2 = G5 | B5 | B4 | R5 */ + + packrl.ph t3, t1, t0 /* t3 = G3 | B3 | B2 | R3 */ + packrl.ph t4, t2, t1 /* t4 = B4 | R5 | R4 | G4 */ + rotr t0, t0, 24 /* t0 = R3 | R2 | G2 | B2 */ + rotr t3, t3, 16 /* t3 = B2 | R3 | G3 | B3 */ + rotr t4, t4, 24 /* t4 = R5 | R4 | G4 | B4 */ + + CONVERT_2x8888_TO_2x0565 t5, t0, t5, t0, t6, t7, t8, v0, v1 + CONVERT_2x8888_TO_2x0565 t3, t4, t3, t4, t6, t7, t8, v0, v1 + + sh t5, 0(a0) + sh t0, 2(a0) + sh t3, 4(a0) + sh t4, 6(a0) + rotr t5, t2, 16 /* t5 = xx | R5 | G5 | B5 */ + b 11b + addiu a0, a0, 8 + +2: + lhu t5, 0(a1) /* t5 = 0 | 0 | G1 | R1 */ + wsbh t5, t5 /* t5 = 0 | 0 | R1 | G1 */ +21: + beqz t9, 4f + addiu t9, t9, -1 + lw t0, 2(a1) /* t0 = B2 | G2 | R2 | B1 */ + lw t1, 6(a1) /* t1 = R4 | B3 | G3 | R3 */ + lw t2, 10(a1) /* t2 = G5 | R5 | B4 | G4 */ + + addiu a1, a1, 12 + addiu a2, a2, -4 + + wsbh t0, t0 /* t0 = G2 | B2 | B1 | R2 */ + wsbh t1, t1 /* t1 = B3 | R4 | R3 | G3 */ + wsbh t2, t2 /* t2 = R5 | G5 | G4 | B4 */ + + precr_sra.ph.w t5, t0, 0 /* t5 = R1 | G1 | B1 | R2 */ + rotr t0, t0, 16 /* t0 = B1 | R2 | G2 | B2 */ + packrl.ph t3, t2, t1 /* t3 = G4 | B4 | B3 | R4 */ + rotr t1, t1, 24 /* t1 = R4 | R3 | G3 | B3 */ + srl t5, t5, 8 /* t5 = 0 | R1 | G1 | B1 */ + rotr t3, t3, 16 /* t3 = B3 | R4 | G4 | B4 */ + + CONVERT_2x8888_TO_2x0565 t5, t0, t5, t0, t6, t7, t8, v0, v1 + CONVERT_2x8888_TO_2x0565 t1, t3, t1, t3, t6, t7, t8, v0, v1 + + sh t5, 0(a0) + sh t0, 2(a0) + sh t1, 4(a0) + sh t3, 6(a0) + srl t5, t2, 16 /* t5 = 0 | 0 | R5 | G5 */ + b 21b + addiu a0, a0, 8 + +3: + lbu t5, 0(a1) /* t5 = 0 | 0 | 0 | R1 */ +31: + beqz t9, 4f + addiu t9, t9, -1 + lw t0, 1(a1) /* t0 = G2 | R2 | B1 | G1 */ + lw t1, 5(a1) /* t1 = B3 | G3 | R3 | B2 */ + lw t2, 9(a1) /* t2 = R5 | B4 | G4 | R4 */ + + addiu a1, a1, 12 + addiu a2, a2, -4 + + wsbh t0, t0 /* t0 = R2 | G2 | G1 | B1 */ + wsbh t1, t1 /* t1 = G3 | B3 | B2 | R3 */ + wsbh t2, t2 /* t2 = B4 | R5 | R4 | G4 */ + + precr_sra.ph.w t5, t0, 0 /* t5 = xx | R1 | G1 | B1 */ + packrl.ph t3, t1, t0 /* t3 = B2 | R3 | R2 | G2 */ + rotr t1, t1, 16 /* t1 = B2 | R3 | G3 | B3 */ + rotr t4, t2, 24 /* t4 = R5 | R4 | G4 | B4 */ + rotr t3, t3, 24 /* t3 = R3 | R2 | G2 | B2 */ + + CONVERT_2x8888_TO_2x0565 t5, t3, t5, t3, t6, t7, t8, v0, v1 + CONVERT_2x8888_TO_2x0565 t1, t4, t1, t4, t6, t7, t8, v0, v1 + + sh t5, 0(a0) + sh t3, 2(a0) + sh t1, 4(a0) + sh t4, 6(a0) + srl t5, t2, 16 /* t5 = 0 | 0 | xx | R5 */ + b 31b + addiu a0, a0, 8 + +4: + beqz a2, 6f + nop +5: + lbu t0, 0(a1) /* t0 = 0 | 0 | 0 | R */ + lbu t1, 1(a1) /* t1 = 0 | 0 | 0 | G */ + lbu t2, 2(a1) /* t2 = 0 | 0 | 0 | B */ + addiu a1, a1, 3 + + sll t0, t0, 16 /* t2 = 0 | R | 0 | 0 */ + sll t1, t1, 8 /* t1 = 0 | 0 | G | 0 */ + + or t2, t2, t1 /* t2 = 0 | 0 | G | B */ + or t2, t2, t0 /* t2 = 0 | R | G | B */ + + CONVERT_1x8888_TO_1x0565 t2, t3, t4, t5 + + sh t3, 0(a0) + addiu a2, a2, -1 + bnez a2, 5b + addiu a0, a0, 2 +6: + RESTORE_REGS_FROM_STACK 0, v0, v1 + j ra + nop + +END(pixman_composite_src_0888_0565_rev_asm_mips) +#endif + +LEAF_MIPS_DSPR2(pixman_composite_src_pixbuf_8888_asm_mips) +/* + * a0 - dst (a8b8g8r8) + * a1 - src (a8r8g8b8) + * a2 - w + */ + + SAVE_REGS_ON_STACK 0, v0 + li v0, 0x00ff00ff + + beqz a2, 3f + nop + addiu t1, a2, -1 + beqz t1, 2f + nop +1: + lw t0, 0(a1) + lw t1, 4(a1) + addiu a1, a1, 8 + addiu a2, a2, -2 + srl t2, t0, 24 + srl t3, t1, 24 + + MIPS_2xUN8x4_MUL_2xUN8 t0, t1, t2, t3, t0, t1, v0, t4, t5, t6, t7, t8, t9 + + sll t0, t0, 8 + sll t1, t1, 8 + andi t2, t2, 0xff + andi t3, t3, 0xff + or t0, t0, t2 + or t1, t1, t3 + wsbh t0, t0 + wsbh t1, t1 + rotr t0, t0, 16 + rotr t1, t1, 16 + sw t0, 0(a0) + sw t1, 4(a0) + + addiu t2, a2, -1 + bgtz t2, 1b + addiu a0, a0, 8 +2: + beqz a2, 3f + nop + lw t0, 0(a1) + srl t1, t0, 24 + + MIPS_UN8x4_MUL_UN8 t0, t1, t0, v0, t3, t4, t5 + + sll t0, t0, 8 + andi t1, t1, 0xff + or t0, t0, t1 + wsbh t0, t0 + rotr t0, t0, 16 + sw t0, 0(a0) +3: + RESTORE_REGS_FROM_STACK 0, v0 + j ra + nop + +END(pixman_composite_src_pixbuf_8888_asm_mips) + +LEAF_MIPS_DSPR2(pixman_composite_src_rpixbuf_8888_asm_mips) +/* + * a0 - dst (a8r8g8b8) + * a1 - src (a8r8g8b8) + * a2 - w + */ + + SAVE_REGS_ON_STACK 0, v0 + li v0, 0x00ff00ff + + beqz a2, 3f + nop + addiu t1, a2, -1 + beqz t1, 2f + nop +1: + lw t0, 0(a1) + lw t1, 4(a1) + addiu a1, a1, 8 + addiu a2, a2, -2 + srl t2, t0, 24 + srl t3, t1, 24 + + MIPS_2xUN8x4_MUL_2xUN8 t0, t1, t2, t3, t0, t1, v0, t4, t5, t6, t7, t8, t9 + + sll t0, t0, 8 + sll t1, t1, 8 + andi t2, t2, 0xff + andi t3, t3, 0xff + or t0, t0, t2 + or t1, t1, t3 + rotr t0, t0, 8 + rotr t1, t1, 8 + sw t0, 0(a0) + sw t1, 4(a0) + + addiu t2, a2, -1 + bgtz t2, 1b + addiu a0, a0, 8 +2: + beqz a2, 3f + nop + lw t0, 0(a1) + srl t1, t0, 24 + + MIPS_UN8x4_MUL_UN8 t0, t1, t0, v0, t3, t4, t5 + + sll t0, t0, 8 + andi t1, t1, 0xff + or t0, t0, t1 + rotr t0, t0, 8 + sw t0, 0(a0) +3: + RESTORE_REGS_FROM_STACK 0, v0 + j ra + nop + +END(pixman_composite_src_rpixbuf_8888_asm_mips) + LEAF_MIPS_DSPR2(pixman_composite_src_n_8_8888_asm_mips) /* * a0 - dst (a8r8g8b8) @@ -451,34 +961,35 @@ LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_8888_ca_asm_mips) * a3 - w */ - SAVE_REGS_ON_STACK 8, s0, s1, s2, s3, s4, s5 - beqz a3, 4f + beqz a3, 8f nop + SAVE_REGS_ON_STACK 8, s0, s1, s2, s3, s4, s5 + li t6, 0xff addiu t7, zero, -1 /* t7 = 0xffffffff */ srl t8, a1, 24 /* t8 = srca */ li t9, 0x00ff00ff + addiu t1, a3, -1 - beqz t1, 3f /* last pixel */ - nop - beq t8, t6, 2f /* if (srca == 0xff) */ + beqz t1, 4f /* last pixel */ nop -1: - /* a1 = src */ + +0: lw t0, 0(a2) /* t0 = mask */ lw t1, 4(a2) /* t1 = mask */ + addiu a3, a3, -2 /* w = w - 2 */ or t2, t0, t1 - beqz t2, 12f /* if (t0 == 0) && (t1 == 0) */ + beqz t2, 3f /* if (t0 == 0) && (t1 == 0) */ addiu a2, a2, 8 - and t3, t0, t1 - move t4, a1 /* t4 = src */ - move t5, a1 /* t5 = src */ + and t2, t0, t1 + beq t2, t7, 1f /* if (t0 == 0xffffffff) && (t1 == 0xffffffff) */ + nop + +//if(ma) lw t2, 0(a0) /* t2 = dst */ - beq t3, t7, 11f /* if (t0 == 0xffffffff) && (t1 == 0xffffffff) */ - lw t3, 4(a0) /* t3 = dst */ + lw t3, 4(a0) /* t3 = dst */ MIPS_2xUN8x4_MUL_2xUN8x4 a1, a1, t0, t1, t4, t5, t9, s0, s1, s2, s3, s4, s5 MIPS_2xUN8x4_MUL_2xUN8 t0, t1, t8, t8, t0, t1, t9, s0, s1, s2, s3, s4, s5 -11: not t0, t0 not t1, t1 MIPS_2xUN8x4_MUL_2xUN8x4 t2, t3, t0, t1, t2, t3, t9, s0, s1, s2, s3, s4, s5 @@ -486,62 +997,79 @@ LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_8888_ca_asm_mips) addu_s.qb t3, t5, t3 sw t2, 0(a0) sw t3, 4(a0) -12: - addiu a3, a3, -2 addiu t1, a3, -1 - bgtz t1, 1b + bgtz t1, 0b addiu a0, a0, 8 - b 3f + b 4f + nop +1: +//if (t0 == 0xffffffff) && (t1 == 0xffffffff): + beq t8, t6, 2f /* if (srca == 0xff) */ nop -2: - /* a1 = src */ - lw t0, 0(a2) /* t0 = mask */ - lw t1, 4(a2) /* t1 = mask */ - or t2, t0, t1 - beqz t2, 22f /* if (t0 == 0) & (t1 == 0) */ - addiu a2, a2, 8 - and t2, t0, t1 - move t4, a1 - beq t2, t7, 21f /* if (t0 == 0xffffffff) && (t1 == 0xffffffff) */ - move t5, a1 lw t2, 0(a0) /* t2 = dst */ lw t3, 4(a0) /* t3 = dst */ - MIPS_2xUN8x4_MUL_2xUN8x4 a1, a1, t0, t1, t4, t5, t9, s0, s1, s2, s3, s4, s5 - not t0, t0 - not t1, t1 - MIPS_2xUN8x4_MUL_2xUN8x4 t2, t3, t0, t1, t2, t3, t9, s0, s1, s2, s3, s4, s5 - addu_s.qb t4, t4, t2 - addu_s.qb t5, t5, t3 -21: - sw t4, 0(a0) - sw t5, 4(a0) -22: - addiu a3, a3, -2 + not t0, a1 + not t1, a1 + srl t0, t0, 24 + srl t1, t1, 24 + MIPS_2xUN8x4_MUL_2xUN8 t2, t3, t0, t1, t2, t3, t9, s0, s1, s2, s3, s4, s5 + addu_s.qb t2, a1, t2 + addu_s.qb t3, a1, t3 + sw t2, 0(a0) + sw t3, 4(a0) addiu t1, a3, -1 - bgtz t1, 2b + bgtz t1, 0b addiu a0, a0, 8 + b 4f + nop +2: + sw a1, 0(a0) + sw a1, 4(a0) 3: - blez a3, 4f + addiu t1, a3, -1 + bgtz t1, 0b + addiu a0, a0, 8 + +4: + beqz a3, 7f nop /* a1 = src */ - lw t1, 0(a2) /* t1 = mask */ - beqz t1, 4f + lw t0, 0(a2) /* t0 = mask */ + beqz t0, 7f /* if (t0 == 0) */ nop - move t2, a1 /* t2 = src */ - beq t1, t7, 31f - lw t0, 0(a0) /* t0 = dst */ - - MIPS_UN8x4_MUL_UN8x4 a1, t1, t2, t9, t3, t4, t5, t6 - MIPS_UN8x4_MUL_UN8 t1, t8, t1, t9, t3, t4, t5 -31: - not t1, t1 - MIPS_UN8x4_MUL_UN8x4 t0, t1, t0, t9, t3, t4, t5, t6 - addu_s.qb t0, t2, t0 - sw t0, 0(a0) -4: + beq t0, t7, 5f /* if (t0 == 0xffffffff) */ + nop +//if(ma) + lw t1, 0(a0) /* t1 = dst */ + MIPS_UN8x4_MUL_UN8x4 a1, t0, t2, t9, t3, t4, t5, s0 + MIPS_UN8x4_MUL_UN8 t0, t8, t0, t9, t3, t4, t5 + not t0, t0 + MIPS_UN8x4_MUL_UN8x4 t1, t0, t1, t9, t3, t4, t5, s0 + addu_s.qb t1, t2, t1 + sw t1, 0(a0) + RESTORE_REGS_FROM_STACK 8, s0, s1, s2, s3, s4, s5 + j ra + nop +5: +//if (t0 == 0xffffffff) + beq t8, t6, 6f /* if (srca == 0xff) */ + nop + lw t1, 0(a0) /* t1 = dst */ + not t0, a1 + srl t0, t0, 24 + MIPS_UN8x4_MUL_UN8 t1, t0, t1, t9, t2, t3, t4 + addu_s.qb t1, a1, t1 + sw t1, 0(a0) RESTORE_REGS_FROM_STACK 8, s0, s1, s2, s3, s4, s5 j ra nop +6: + sw a1, 0(a0) +7: + RESTORE_REGS_FROM_STACK 8, s0, s1, s2, s3, s4, s5 +8: + j ra + nop END(pixman_composite_over_n_8888_8888_ca_asm_mips) @@ -553,111 +1081,251 @@ LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_0565_ca_asm_mips) * a3 - w */ - SAVE_REGS_ON_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7, s8 - beqz a3, 4f + beqz a3, 8f nop - li t5, 0xf800f800 - li t6, 0x07e007e0 - li t7, 0x001F001F - li t9, 0x00ff00ff + SAVE_REGS_ON_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7, s8 + li t6, 0xff + addiu t7, zero, -1 /* t7 = 0xffffffff */ srl t8, a1, 24 /* t8 = srca */ + li t9, 0x00ff00ff + li s6, 0xf800f800 + li s7, 0x07e007e0 + li s8, 0x001F001F + addiu t1, a3, -1 - beqz t1, 3f /* last pixel */ + beqz t1, 4f /* last pixel */ nop - li s0, 0xff /* s0 = 0xff */ - addiu s1, zero, -1 /* s1 = 0xffffffff */ - beq t8, s0, 2f /* if (srca == 0xff) */ - nop -1: - /* a1 = src */ +0: lw t0, 0(a2) /* t0 = mask */ lw t1, 4(a2) /* t1 = mask */ + addiu a3, a3, -2 /* w = w - 2 */ or t2, t0, t1 - beqz t2, 12f /* if (t0 == 0) && (t1 == 0) */ + beqz t2, 3f /* if (t0 == 0) && (t1 == 0) */ addiu a2, a2, 8 - and t3, t0, t1 - move s2, a1 /* s2 = src */ - move s3, a1 /* s3 = src */ + and t2, t0, t1 + beq t2, t7, 1f /* if (t0 == 0xffffffff) && (t1 == 0xffffffff) */ + nop + +//if(ma) lhu t2, 0(a0) /* t2 = dst */ - beq t3, s1, 11f /* if (t0 == 0xffffffff) && (t1 == 0xffffffff) */ - lhu t3, 2(a0) /* t3 = dst */ - MIPS_2xUN8x4_MUL_2xUN8x4 a1, a1, t0, t1, s2, s3, t9, t4, s4, s5, s6, s7, s8 - MIPS_2xUN8x4_MUL_2xUN8 t0, t1, t8, t8, t0, t1, t9, t4, s4, s5, s6, s7, s8 -11: + lhu t3, 2(a0) /* t3 = dst */ + MIPS_2xUN8x4_MUL_2xUN8x4 a1, a1, t0, t1, t4, t5, t9, s0, s1, s2, s3, s4, s5 + MIPS_2xUN8x4_MUL_2xUN8 t0, t1, t8, t8, t0, t1, t9, s0, s1, s2, s3, s4, s5 not t0, t0 not t1, t1 - CONVERT_2x0565_TO_2x8888 t2, t3, s4, s5, t6, t7, t4, s6, s7, s8 - MIPS_2xUN8x4_MUL_2xUN8x4 s4, s5, t0, t1, s4, s5, t9, t4, s6, s7, s8, t0, t1 - addu_s.qb s2, s2, s4 - addu_s.qb s3, s3, s5 - CONVERT_2x8888_TO_2x0565 s2, s3, t2, t3, t5, t6, t7, s4, s5 + CONVERT_2x0565_TO_2x8888 t2, t3, t2, t3, s7, s8, s0, s1, s2, s3 + MIPS_2xUN8x4_MUL_2xUN8x4 t2, t3, t0, t1, t2, t3, t9, s0, s1, s2, s3, s4, s5 + addu_s.qb t2, t4, t2 + addu_s.qb t3, t5, t3 + CONVERT_2x8888_TO_2x0565 t2, t3, t2, t3, s6, s7, s8, s0, s1 sh t2, 0(a0) sh t3, 2(a0) -12: - addiu a3, a3, -2 addiu t1, a3, -1 - bgtz t1, 1b + bgtz t1, 0b addiu a0, a0, 4 - b 3f + b 4f + nop +1: +//if (t0 == 0xffffffff) && (t1 == 0xffffffff): + beq t8, t6, 2f /* if (srca == 0xff) */ nop -2: - /* a1 = src */ - lw t0, 0(a2) /* t0 = mask */ - lw t1, 4(a2) /* t1 = mask */ - or t2, t0, t1 - beqz t2, 22f /* if (t0 == 0) & (t1 == 0) */ - addiu a2, a2, 8 - and t3, t0, t1 - move t2, a1 - beq t3, s1, 21f /* if (t0 == 0xffffffff) && (t1 == 0xffffffff) */ - move t3, a1 lhu t2, 0(a0) /* t2 = dst */ lhu t3, 2(a0) /* t3 = dst */ - MIPS_2xUN8x4_MUL_2xUN8x4 a1, a1, t0, t1, s2, s3, t9, t4, s4, s5, s6, s7, s8 - not t0, t0 - not t1, t1 - CONVERT_2x0565_TO_2x8888 t2, t3, s4, s5, t6, t7, t4, s6, s7, s8 - MIPS_2xUN8x4_MUL_2xUN8x4 s4, s5, t0, t1, s4, s5, t9, t4, s6, s7, s8, t2, t3 - addu_s.qb t2, s2, s4 - addu_s.qb t3, s3, s5 -21: - CONVERT_2x8888_TO_2x0565 t2, t3, t0, t1, t5, t6, t7, s2, s3 - sh t0, 0(a0) - sh t1, 2(a0) -22: - addiu a3, a3, -2 + not t0, a1 + not t1, a1 + srl t0, t0, 24 + srl t1, t1, 24 + CONVERT_2x0565_TO_2x8888 t2, t3, t2, t3, s7, s8, s0, s1, s2, s3 + MIPS_2xUN8x4_MUL_2xUN8 t2, t3, t0, t1, t2, t3, t9, s0, s1, s2, s3, s4, s5 + addu_s.qb t2, a1, t2 + addu_s.qb t3, a1, t3 + CONVERT_2x8888_TO_2x0565 t2, t3, t2, t3, s6, s7, s8, s0, s1 + sh t2, 0(a0) + sh t3, 2(a0) addiu t1, a3, -1 - bgtz t1, 2b + bgtz t1, 0b addiu a0, a0, 4 + b 4f + nop +2: + CONVERT_1x8888_TO_1x0565 a1, t2, s0, s1 + sh t2, 0(a0) + sh t2, 2(a0) 3: - blez a3, 4f + addiu t1, a3, -1 + bgtz t1, 0b + addiu a0, a0, 4 + +4: + beqz a3, 7f nop /* a1 = src */ - lw t1, 0(a2) /* t1 = mask */ - beqz t1, 4f + lw t0, 0(a2) /* t0 = mask */ + beqz t0, 7f /* if (t0 == 0) */ nop - move t2, a1 /* t2 = src */ - beq t1, t7, 31f - lhu t0, 0(a0) /* t0 = dst */ - - MIPS_UN8x4_MUL_UN8x4 a1, t1, t2, t9, t3, t4, t5, t6 - MIPS_UN8x4_MUL_UN8 t1, t8, t1, t9, t3, t4, t5 -31: - not t1, t1 - CONVERT_1x0565_TO_1x8888 t0, s1, s2, s3 - MIPS_UN8x4_MUL_UN8x4 s1, t1, t3, t9, t4, t5, t6, t7 - addu_s.qb t0, t2, t3 - CONVERT_1x8888_TO_1x0565 t0, s1, s2, s3 - sh s1, 0(a0) -4: - RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7, s8 + beq t0, t7, 5f /* if (t0 == 0xffffffff) */ + nop +//if(ma) + lhu t1, 0(a0) /* t1 = dst */ + MIPS_UN8x4_MUL_UN8x4 a1, t0, t2, t9, t3, t4, t5, s0 + MIPS_UN8x4_MUL_UN8 t0, t8, t0, t9, t3, t4, t5 + not t0, t0 + CONVERT_1x0565_TO_1x8888 t1, s1, s2, s3 + MIPS_UN8x4_MUL_UN8x4 s1, t0, s1, t9, t3, t4, t5, s0 + addu_s.qb s1, t2, s1 + CONVERT_1x8888_TO_1x0565 s1, t1, s0, s2 + sh t1, 0(a0) + RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7, s8 + j ra + nop +5: +//if (t0 == 0xffffffff) + beq t8, t6, 6f /* if (srca == 0xff) */ + nop + lhu t1, 0(a0) /* t1 = dst */ + not t0, a1 + srl t0, t0, 24 + CONVERT_1x0565_TO_1x8888 t1, s1, s2, s3 + MIPS_UN8x4_MUL_UN8 s1, t0, s1, t9, t2, t3, t4 + addu_s.qb s1, a1, s1 + CONVERT_1x8888_TO_1x0565 s1, t1, s0, s2 + sh t1, 0(a0) + RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7, s8 + j ra + nop +6: + CONVERT_1x8888_TO_1x0565 a1, t1, s0, s2 + sh t1, 0(a0) +7: + RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7, s8 +8: j ra nop END(pixman_composite_over_n_8888_0565_ca_asm_mips) +LEAF_MIPS_DSPR2(pixman_composite_over_n_8_8_asm_mips) +/* + * a0 - dst (a8) + * a1 - src (32bit constant) + * a2 - mask (a8) + * a3 - w + */ + + SAVE_REGS_ON_STACK 0, v0 + li t9, 0x00ff00ff + beqz a3, 3f + nop + srl v0, a3, 2 /* v0 = how many multiples of 4 dst pixels */ + beqz v0, 1f /* branch if less than 4 src pixels */ + nop + + srl t8, a1, 24 + replv.ph t8, t8 + +0: + beqz v0, 1f + addiu v0, v0, -1 + lbu t0, 0(a2) + lbu t1, 1(a2) + lbu t2, 2(a2) + lbu t3, 3(a2) + lbu t4, 0(a0) + lbu t5, 1(a0) + lbu t6, 2(a0) + lbu t7, 3(a0) + + addiu a2, a2, 4 + + precr_sra.ph.w t1, t0, 0 + precr_sra.ph.w t3, t2, 0 + precr_sra.ph.w t5, t4, 0 + precr_sra.ph.w t7, t6, 0 + + precr.qb.ph t0, t3, t1 + precr.qb.ph t1, t7, t5 + + muleu_s.ph.qbl t2, t0, t8 + muleu_s.ph.qbr t3, t0, t8 + shra_r.ph t4, t2, 8 + shra_r.ph t5, t3, 8 + and t4, t4, t9 + and t5, t5, t9 + addq.ph t2, t2, t4 + addq.ph t3, t3, t5 + shra_r.ph t2, t2, 8 + shra_r.ph t3, t3, 8 + precr.qb.ph t0, t2, t3 + not t6, t0 + + preceu.ph.qbl t7, t6 + preceu.ph.qbr t6, t6 + + muleu_s.ph.qbl t2, t1, t7 + muleu_s.ph.qbr t3, t1, t6 + shra_r.ph t4, t2, 8 + shra_r.ph t5, t3, 8 + and t4, t4, t9 + and t5, t5, t9 + addq.ph t2, t2, t4 + addq.ph t3, t3, t5 + shra_r.ph t2, t2, 8 + shra_r.ph t3, t3, 8 + precr.qb.ph t1, t2, t3 + + addu_s.qb t2, t0, t1 + + sb t2, 0(a0) + srl t2, t2, 8 + sb t2, 1(a0) + srl t2, t2, 8 + sb t2, 2(a0) + srl t2, t2, 8 + sb t2, 3(a0) + addiu a3, a3, -4 + b 0b + addiu a0, a0, 4 + +1: + beqz a3, 3f + nop + srl t8, a1, 24 +2: + lbu t0, 0(a2) + lbu t1, 0(a0) + addiu a2, a2, 1 + + mul t2, t0, t8 + shra_r.ph t3, t2, 8 + andi t3, t3, 0x00ff + addq.ph t2, t2, t3 + shra_r.ph t2, t2, 8 + not t3, t2 + andi t3, t3, 0x00ff + + + mul t4, t1, t3 + shra_r.ph t5, t4, 8 + andi t5, t5, 0x00ff + addq.ph t4, t4, t5 + shra_r.ph t4, t4, 8 + andi t4, t4, 0x00ff + + addu_s.qb t2, t2, t4 + sb t2, 0(a0) + addiu a3, a3, -1 + bnez a3, 2b + addiu a0, a0, 1 + +3: + RESTORE_REGS_FROM_STACK 0, v0 + j ra + nop + +END(pixman_composite_over_n_8_8_asm_mips) + LEAF_MIPS_DSPR2(pixman_composite_over_n_8_8888_asm_mips) /* * a0 - dst (a8r8g8b8) @@ -1342,6 +2010,84 @@ LEAF_MIPS_DSPR2(pixman_composite_over_8888_8888_asm_mips) END(pixman_composite_over_8888_8888_asm_mips) +LEAF_MIPS_DSPR2(pixman_composite_over_8888_0565_asm_mips) +/* + * a0 - dst (r5g6b5) + * a1 - src (a8r8g8b8) + * a2 - w + */ + + SAVE_REGS_ON_STACK 8, s0, s1, s2, s3, s4, s5 + li t4, 0x00ff00ff + li s3, 0xf800f800 + li s4, 0x07e007e0 + li s5, 0x001F001F + beqz a2, 3f + nop + addiu t1, a2, -1 + beqz t1, 2f + nop +1: + lw t0, 0(a1) /* t0 = source (a8r8g8b8) */ + lw t1, 4(a1) /* t1 = source (a8r8g8b8) */ + lhu t2, 0(a0) /* t2 = destination (r5g6b5) */ + lhu t3, 2(a0) /* t3 = destination (r5g6b5) */ + addiu a1, a1, 8 + + not t5, t0 + srl t5, t5, 24 + not t6, t1 + srl t6, t6, 24 + + or t7, t5, t6 + beqz t7, 11f + or t8, t0, t1 + beqz t8, 12f + + CONVERT_2x0565_TO_2x8888 t2, t3, s0, s1, s4, s5, t7, t8, t9, s2 + MIPS_2xUN8x4_MUL_2xUN8 s0, s1, t5, t6, t7, t8, t4, t9, t2, t3, s2, s0, s1 + + addu_s.qb t0, t7, t0 + addu_s.qb t1, t8, t1 +11: + CONVERT_2x8888_TO_2x0565 t0, t1, t7, t8, s3, s4, s5, t2, t3 + sh t7, 0(a0) + sh t8, 2(a0) +12: + addiu a2, a2, -2 + addiu t1, a2, -1 + bgtz t1, 1b + addiu a0, a0, 4 +2: + beqz a2, 3f + nop + + lw t0, 0(a1) /* t0 = source (a8r8g8b8) */ + lhu t1, 0(a0) /* t1 = destination (r5g6b5) */ + addiu a1, a1, 4 + + not t2, t0 + srl t2, t2, 24 + + beqz t2, 21f + nop + beqz t0, 3f + + CONVERT_1x0565_TO_1x8888 t1, s0, t8, t9 + MIPS_UN8x4_MUL_UN8 s0, t2, t3, t4, t5, t6, t7 + + addu_s.qb t0, t3, t0 +21: + CONVERT_1x8888_TO_1x0565 t0, s0, t8, t9 + sh s0, 0(a0) + +3: + RESTORE_REGS_FROM_STACK 8, s0, s1, s2, s3, s4, s5 + j ra + nop + +END(pixman_composite_over_8888_0565_asm_mips) + LEAF_MIPS_DSPR2(pixman_composite_over_n_0565_asm_mips) /* * a0 - dst (r5g6b5) @@ -2349,101 +3095,265 @@ END(pixman_composite_over_reverse_n_8888_asm_mips) LEAF_MIPS_DSPR2(pixman_composite_in_n_8_asm_mips) /* * a0 - dst (a8) - * a1 - src (a8r8g8b8) + * a1 - src (32bit constant) * a2 - w */ - beqz a2, 5f + li t9, 0x00ff00ff + beqz a2, 3f nop - - SAVE_REGS_ON_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7 - move t7, a1 - srl t5, t7, 24 - replv.ph t5, t5 - srl t9, a2, 2 /* t1 = how many multiples of 4 src pixels */ - beqz t9, 2f /* branch if less than 4 src pixels */ + srl t7, a2, 2 /* t7 = how many multiples of 4 dst pixels */ + beqz t7, 1f /* branch if less than 4 src pixels */ nop -1: - addiu t9, t9, -1 - addiu a2, a2, -4 + srl t8, a1, 24 + replv.ph t8, t8 + +0: + beqz t7, 1f + addiu t7, t7, -1 lbu t0, 0(a0) lbu t1, 1(a0) lbu t2, 2(a0) lbu t3, 3(a0) - muleu_s.ph.qbl s0, t0, t5 - muleu_s.ph.qbr s1, t0, t5 - muleu_s.ph.qbl s2, t1, t5 - muleu_s.ph.qbr s3, t1, t5 - muleu_s.ph.qbl s4, t2, t5 - muleu_s.ph.qbr s5, t2, t5 - muleu_s.ph.qbl s6, t3, t5 - muleu_s.ph.qbr s7, t3, t5 - - shrl.ph t4, s0, 8 - shrl.ph t6, s1, 8 - shrl.ph t7, s2, 8 - shrl.ph t8, s3, 8 - addq.ph t0, s0, t4 - addq.ph t1, s1, t6 - addq.ph t2, s2, t7 - addq.ph t3, s3, t8 - shra_r.ph t0, t0, 8 - shra_r.ph t1, t1, 8 + precr_sra.ph.w t1, t0, 0 + precr_sra.ph.w t3, t2, 0 + precr.qb.ph t0, t3, t1 + + muleu_s.ph.qbl t2, t0, t8 + muleu_s.ph.qbr t3, t0, t8 + shra_r.ph t4, t2, 8 + shra_r.ph t5, t3, 8 + and t4, t4, t9 + and t5, t5, t9 + addq.ph t2, t2, t4 + addq.ph t3, t3, t5 shra_r.ph t2, t2, 8 shra_r.ph t3, t3, 8 - shrl.ph t4, s4, 8 - shrl.ph t6, s5, 8 - shrl.ph t7, s6, 8 - shrl.ph t8, s7, 8 - addq.ph s0, s4, t4 - addq.ph s1, s5, t6 - addq.ph s2, s6, t7 - addq.ph s3, s7, t8 - shra_r.ph t4, s0, 8 - shra_r.ph t6, s1, 8 - shra_r.ph t7, s2, 8 - shra_r.ph t8, s3, 8 - - precr.qb.ph s0, t0, t1 - precr.qb.ph s1, t2, t3 - precr.qb.ph s2, t4, t6 - precr.qb.ph s3, t7, t8 + precr.qb.ph t2, t2, t3 - sb s0, 0(a0) - sb s1, 1(a0) - sb s2, 2(a0) - sb s3, 3(a0) - bgtz t9, 1b + sb t2, 0(a0) + srl t2, t2, 8 + sb t2, 1(a0) + srl t2, t2, 8 + sb t2, 2(a0) + srl t2, t2, 8 + sb t2, 3(a0) + addiu a2, a2, -4 + b 0b addiu a0, a0, 4 -2: - beqz a2, 4f + +1: + beqz a2, 3f nop -3: - lbu t1, 0(a0) + srl t8, a1, 24 +2: + lbu t0, 0(a0) + + mul t2, t0, t8 + shra_r.ph t3, t2, 8 + andi t3, t3, 0x00ff + addq.ph t2, t2, t3 + shra_r.ph t2, t2, 8 - muleu_s.ph.qbl t4, t1, t5 - muleu_s.ph.qbr t7, t1, t5 - shrl.ph t6, t4, 8 - shrl.ph t0, t7, 8 - addq.ph t8, t4, t6 - addq.ph t9, t7, t0 - shra_r.ph t8, t8, 8 - shra_r.ph t9, t9, 8 - precr.qb.ph t2, t8, t9 sb t2, 0(a0) addiu a2, a2, -1 - bnez a2, 3b + bnez a2, 2b addiu a0, a0, 1 -4: - RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7 -5: + +3: j ra nop END(pixman_composite_in_n_8_asm_mips) +LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_8888_8888_OVER_asm_mips) +/* + * a0 - dst (a8r8g8b8) + * a1 - src (a8r8g8b8) + * a2 - w + * a3 - vx + * 16(sp) - unit_x + */ + + SAVE_REGS_ON_STACK 0, s0, s1, s2, s3 + lw t8, 16(sp) /* t8 = unit_x */ + li t6, 0x00ff00ff + beqz a2, 3f + nop + addiu t1, a2, -1 + beqz t1, 2f + nop +1: + sra t0, a3, 16 /* t0 = vx >> 16 */ + sll t0, t0, 2 /* t0 = t0 * 4 (a8r8g8b8) */ + addu t0, a1, t0 + lw t0, 0(t0) /* t0 = source (a8r8g8b8) */ + addu a3, a3, t8 /* a3 = vx + unit_x */ + + sra t1, a3, 16 /* t0 = vx >> 16 */ + sll t1, t1, 2 /* t0 = t0 * 4 (a8r8g8b8) */ + addu t1, a1, t1 + lw t1, 0(t1) /* t1 = source (a8r8g8b8) */ + addu a3, a3, t8 /* a3 = vx + unit_x */ + + lw t2, 0(a0) /* t2 = destination (a8r8g8b8) */ + lw t3, 4(a0) /* t3 = destination (a8r8g8b8) */ + + OVER_2x8888_2x8888 t0, t1, t2, t3, t4, t5, t6, t7, t9, s0, s1, s2, s3 + + sw t4, 0(a0) + sw t5, 4(a0) + addiu a2, a2, -2 + addiu t1, a2, -1 + bgtz t1, 1b + addiu a0, a0, 8 +2: + beqz a2, 3f + nop + sra t0, a3, 16 /* t0 = vx >> 16 */ + sll t0, t0, 2 /* t0 = t0 * 4 (a8r8g8b8) */ + addu t0, a1, t0 + lw t0, 0(t0) /* t0 = source (a8r8g8b8) */ + lw t1, 0(a0) /* t1 = destination (a8r8g8b8) */ + addu a3, a3, t8 /* a3 = vx + unit_x */ + + OVER_8888_8888 t0, t1, t2, t6, t4, t5, t3, t7 + + sw t2, 0(a0) +3: + RESTORE_REGS_FROM_STACK 0, s0, s1, s2, s3 + j ra + nop + +END(pixman_scaled_nearest_scanline_8888_8888_OVER_asm_mips) + +LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_8888_0565_OVER_asm_mips) +/* + * a0 - dst (r5g6b5) + * a1 - src (a8r8g8b8) + * a2 - w + * a3 - vx + * 16(sp) - unit_x + */ + + SAVE_REGS_ON_STACK 24, s0, s1, s2, s3, s4, v0, v1 + lw t8, 40(sp) /* t8 = unit_x */ + li t4, 0x00ff00ff + li t5, 0xf800f800 + li t6, 0x07e007e0 + li t7, 0x001F001F + beqz a2, 3f + nop + addiu t1, a2, -1 + beqz t1, 2f + nop +1: + sra t0, a3, 16 /* t0 = vx >> 16 */ + sll t0, t0, 2 /* t0 = t0 * 4 (a8r8g8b8) */ + addu t0, a1, t0 + lw t0, 0(t0) /* t0 = source (a8r8g8b8) */ + addu a3, a3, t8 /* a3 = vx + unit_x */ + sra t1, a3, 16 /* t0 = vx >> 16 */ + sll t1, t1, 2 /* t0 = t0 * 4 (a8r8g8b8) */ + addu t1, a1, t1 + lw t1, 0(t1) /* t1 = source (a8r8g8b8) */ + addu a3, a3, t8 /* a3 = vx + unit_x */ + lhu t2, 0(a0) /* t2 = destination (r5g6b5) */ + lhu t3, 2(a0) /* t3 = destination (r5g6b5) */ + + CONVERT_2x0565_TO_2x8888 t2, t3, v0, v1, t6, t7, s0, s1, s2, s3 + OVER_2x8888_2x8888 t0, t1, v0, v1, t2, t3, t4, t9, s0, s1, s2, s3, s4 + CONVERT_2x8888_TO_2x0565 t2, t3, v0, v1, t5, t6, t7, t9, s2 + + sh v0, 0(a0) + sh v1, 2(a0) + addiu a2, a2, -2 + addiu t1, a2, -1 + bgtz t1, 1b + addiu a0, a0, 4 +2: + beqz a2, 3f + nop + sra t0, a3, 16 /* t0 = vx >> 16 */ + sll t0, t0, 2 /* t0 = t0 * 4 (a8r8g8b8) */ + addu t0, a1, t0 + lw t0, 0(t0) /* t0 = source (a8r8g8b8) */ + lhu t1, 0(a0) /* t1 = destination (r5g6b5) */ + addu a3, a3, t8 /* a3 = vx + unit_x */ + + CONVERT_1x0565_TO_1x8888 t1, t2, t5, t6 + OVER_8888_8888 t0, t2, t1, t4, t3, t5, t6, t7 + CONVERT_1x8888_TO_1x0565 t1, t2, t5, t6 + + sh t2, 0(a0) +3: + RESTORE_REGS_FROM_STACK 24, s0, s1, s2, s3, s4, v0, v1 + j ra + nop + +END(pixman_scaled_nearest_scanline_8888_0565_OVER_asm_mips) + +LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_0565_8888_SRC_asm_mips) +/* + * a0 - dst (a8r8g8b8) + * a1 - src (r5g6b5) + * a2 - w + * a3 - vx + * 16(sp) - unit_x + */ + + SAVE_REGS_ON_STACK 0, v0 + beqz a2, 3f + nop + + lw v0, 16(sp) /* v0 = unit_x */ + addiu t1, a2, -1 + beqz t1, 2f + nop + + li t4, 0x07e007e0 + li t5, 0x001F001F +1: + sra t0, a3, 16 /* t0 = vx >> 16 */ + sll t0, t0, 1 /* t0 = t0 * 2 ((r5g6b5)) */ + addu t0, a1, t0 + lhu t0, 0(t0) /* t0 = source ((r5g6b5)) */ + addu a3, a3, v0 /* a3 = vx + unit_x */ + sra t1, a3, 16 /* t1 = vx >> 16 */ + sll t1, t1, 1 /* t1 = t1 * 2 ((r5g6b5)) */ + addu t1, a1, t1 + lhu t1, 0(t1) /* t1 = source ((r5g6b5)) */ + addu a3, a3, v0 /* a3 = vx + unit_x */ + addiu a2, a2, -2 + + CONVERT_2x0565_TO_2x8888 t0, t1, t2, t3, t4, t5, t6, t7, t8, t9 + + sw t2, 0(a0) + sw t3, 4(a0) + + addiu t2, a2, -1 + bgtz t2, 1b + addiu a0, a0, 8 +2: + beqz a2, 3f + nop + sra t0, a3, 16 /* t0 = vx >> 16 */ + sll t0, t0, 1 /* t0 = t0 * 2 ((r5g6b5)) */ + addu t0, a1, t0 + lhu t0, 0(t0) /* t0 = source ((r5g6b5)) */ + + CONVERT_1x0565_TO_1x8888 t0, t1, t2, t3 + + sw t1, 0(a0) +3: + RESTORE_REGS_FROM_STACK 0, v0 + j ra + nop + +END(pixman_scaled_nearest_scanline_0565_8888_SRC_asm_mips) + LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_8888_8_0565_OVER_asm_mips) /* * a0 - dst (r5g6b5) diff --git a/gfx/cairo/libpixman/src/pixman-mips-dspr2-asm.h b/gfx/cairo/libpixman/src/pixman-mips-dspr2-asm.h index b330c0f0d9..e238566196 100644 --- a/gfx/cairo/libpixman/src/pixman-mips-dspr2-asm.h +++ b/gfx/cairo/libpixman/src/pixman-mips-dspr2-asm.h @@ -26,7 +26,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * Author: Nemanja Lukic (nlukic@mips.com) + * Author: Nemanja Lukic (nemanja.lukic@rt-rk.com) */ #ifndef PIXMAN_MIPS_DSPR2_ASM_H @@ -72,6 +72,7 @@ #define LEAF_MIPS32R2(symbol) \ .globl symbol; \ .align 2; \ + .hidden symbol; \ .type symbol, @function; \ .ent symbol, 0; \ symbol: .frame sp, 0, ra; \ @@ -354,17 +355,16 @@ LEAF_MIPS32R2(symbol) \ out1_565, out2_565, \ maskR, maskG, maskB, \ scratch1, scratch2 - precrq.ph.w \scratch1, \in2_8888, \in1_8888 - precr_sra.ph.w \in2_8888, \in1_8888, 0 - shll.ph \scratch1, \scratch1, 8 - srl \in2_8888, \in2_8888, 3 - and \scratch2, \in2_8888, \maskB - and \scratch1, \scratch1, \maskR - srl \in2_8888, \in2_8888, 2 - and \out2_565, \in2_8888, \maskG - or \out2_565, \out2_565, \scratch2 - or \out1_565, \out2_565, \scratch1 - srl \out2_565, \out1_565, 16 + precr.qb.ph \scratch1, \in2_8888, \in1_8888 + precrq.qb.ph \in2_8888, \in2_8888, \in1_8888 + and \out1_565, \scratch1, \maskR + shrl.ph \scratch1, \scratch1, 3 + shll.ph \in2_8888, \in2_8888, 3 + and \scratch1, \scratch1, \maskB + or \out1_565, \out1_565, \scratch1 + and \in2_8888, \in2_8888, \maskG + or \out1_565, \out1_565, \in2_8888 + srl \out2_565, \out1_565, 16 .endm /* @@ -587,6 +587,36 @@ LEAF_MIPS32R2(symbol) \ addu_s.qb \out_8888, \out_8888, \s_8888 .endm +/* + * OVER operation on two a8r8g8b8 source pixels (s1_8888 and s2_8888) and two + * a8r8g8b8 destination pixels (d1_8888 and d2_8888). It also requires maskLSR + * needed for rounding process. maskLSR must have following value: + * li maskLSR, 0x00ff00ff + */ +.macro OVER_2x8888_2x8888 s1_8888, \ + s2_8888, \ + d1_8888, \ + d2_8888, \ + out1_8888, \ + out2_8888, \ + maskLSR, \ + scratch1, scratch2, scratch3, \ + scratch4, scratch5, scratch6 + not \scratch1, \s1_8888 + srl \scratch1, \scratch1, 24 + not \scratch2, \s2_8888 + srl \scratch2, \scratch2, 24 + MIPS_2xUN8x4_MUL_2xUN8 \d1_8888, \d2_8888, \ + \scratch1, \scratch2, \ + \out1_8888, \out2_8888, \ + \maskLSR, \ + \scratch3, \scratch4, \scratch5, \ + \scratch6, \d1_8888, \d2_8888 + + addu_s.qb \out1_8888, \out1_8888, \s1_8888 + addu_s.qb \out2_8888, \out2_8888, \s2_8888 +.endm + .macro MIPS_UN8x4_MUL_UN8_ADD_UN8x4 s_8888, \ m_8, \ d_8888, \ diff --git a/gfx/cairo/libpixman/src/pixman-mips-dspr2.c b/gfx/cairo/libpixman/src/pixman-mips-dspr2.c index e14e1c43b9..87969ae704 100644 --- a/gfx/cairo/libpixman/src/pixman-mips-dspr2.c +++ b/gfx/cairo/libpixman/src/pixman-mips-dspr2.c @@ -26,7 +26,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * Author: Nemanja Lukic (nlukic@mips.com) + * Author: Nemanja Lukic (nemanja.lukic@rt-rk.com) */ #ifdef HAVE_CONFIG_H @@ -48,8 +48,20 @@ PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (DO_FAST_MEMCPY, src_8888_8888, uint32_t, 1, uint32_t, 1) PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (DO_FAST_MEMCPY, src_0888_0888, uint8_t, 3, uint8_t, 3) +#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL) +PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, src_0888_8888_rev, + uint8_t, 3, uint32_t, 1) +PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, src_0888_0565_rev, + uint8_t, 3, uint16_t, 1) +#endif +PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, src_pixbuf_8888, + uint32_t, 1, uint32_t, 1) +PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, src_rpixbuf_8888, + uint32_t, 1, uint32_t, 1) PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, over_8888_8888, uint32_t, 1, uint32_t, 1) +PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, over_8888_0565, + uint32_t, 1, uint16_t, 1) PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, add_8_8, uint8_t, 1, uint8_t, 1) PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, add_8888_8888, @@ -67,6 +79,8 @@ PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8888_8888_ca, uint32_t, 1, uint32_t, 1) PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8888_0565_ca, uint32_t, 1, uint16_t, 1) +PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8_8, + uint8_t, 1, uint8_t, 1) PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8_8888, uint8_t, 1, uint32_t, 1) PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8_0565, @@ -111,6 +125,13 @@ PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (over_0565_8_0565, uint16_t, 1, PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (over_8888_8888_8888, uint32_t, 1, uint32_t, 1, uint32_t, 1) +PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_DST (8888_8888, OVER, + uint32_t, uint32_t) +PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_DST (8888_0565, OVER, + uint32_t, uint16_t) +PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_DST (0565_8888, SRC, + uint16_t, uint32_t) + PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (0, 8888_8888, SRC, uint32_t, uint32_t) PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (0, 8888_0565, SRC, @@ -278,6 +299,14 @@ static const pixman_fast_path_t mips_dspr2_fast_paths[] = PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, mips_composite_src_x888_8888), PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, mips_composite_src_x888_8888), PIXMAN_STD_FAST_PATH (SRC, r8g8b8, null, r8g8b8, mips_composite_src_0888_0888), +#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL) + PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, x8r8g8b8, mips_composite_src_0888_8888_rev), + PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, r5g6b5, mips_composite_src_0888_0565_rev), +#endif + PIXMAN_STD_FAST_PATH (SRC, pixbuf, pixbuf, a8r8g8b8, mips_composite_src_pixbuf_8888), + PIXMAN_STD_FAST_PATH (SRC, pixbuf, pixbuf, a8b8g8r8, mips_composite_src_rpixbuf_8888), + PIXMAN_STD_FAST_PATH (SRC, rpixbuf, rpixbuf, a8r8g8b8, mips_composite_src_rpixbuf_8888), + PIXMAN_STD_FAST_PATH (SRC, rpixbuf, rpixbuf, a8b8g8r8, mips_composite_src_pixbuf_8888), PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8r8g8b8, mips_composite_src_n_8_8888), PIXMAN_STD_FAST_PATH (SRC, solid, a8, x8r8g8b8, mips_composite_src_n_8_8888), PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8b8g8r8, mips_composite_src_n_8_8888), @@ -290,6 +319,7 @@ static const pixman_fast_path_t mips_dspr2_fast_paths[] = PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, mips_composite_over_n_8888_8888_ca), PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5, mips_composite_over_n_8888_0565_ca), PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5, mips_composite_over_n_8888_0565_ca), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8, mips_composite_over_n_8_8), PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, mips_composite_over_n_8_8888), PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, mips_composite_over_n_8_8888), PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, mips_composite_over_n_8_8888), @@ -318,6 +348,8 @@ static const pixman_fast_path_t mips_dspr2_fast_paths[] = PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, mips_composite_over_8888_8888), PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, mips_composite_over_8888_8888), PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, mips_composite_over_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, r5g6b5, mips_composite_over_8888_0565), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, mips_composite_over_8888_0565), PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, mips_composite_add_n_8_8), PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8r8g8b8, mips_composite_add_n_8_8888), PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8b8g8r8, mips_composite_add_n_8_8888), @@ -340,11 +372,27 @@ static const pixman_fast_path_t mips_dspr2_fast_paths[] = PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, mips_composite_over_reverse_n_8888), PIXMAN_STD_FAST_PATH (IN, solid, null, a8, mips_composite_in_n_8), - PIXMAN_MIPS_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8r8g8b8, r5g6b5, mips_8888_8_0565), - PIXMAN_MIPS_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8b8g8r8, b5g6r5, mips_8888_8_0565), + PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, mips_8888_8888), + PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, mips_8888_8888), + PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, mips_8888_8888), + PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, mips_8888_8888), + + PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, r5g6b5, mips_8888_0565), + PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, b5g6r5, mips_8888_0565), + + PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (SRC, b5g6r5, x8b8g8r8, mips_0565_8888), + PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, x8r8g8b8, mips_0565_8888), + /* Note: NONE repeat is not supported yet */ + SIMPLE_NEAREST_FAST_PATH_COVER (SRC, r5g6b5, a8r8g8b8, mips_0565_8888), + SIMPLE_NEAREST_FAST_PATH_COVER (SRC, b5g6r5, a8b8g8r8, mips_0565_8888), + SIMPLE_NEAREST_FAST_PATH_PAD (SRC, r5g6b5, a8r8g8b8, mips_0565_8888), + SIMPLE_NEAREST_FAST_PATH_PAD (SRC, b5g6r5, a8b8g8r8, mips_0565_8888), + + SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8r8g8b8, r5g6b5, mips_8888_8_0565), + SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8b8g8r8, b5g6r5, mips_8888_8_0565), - PIXMAN_MIPS_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, r5g6b5, r5g6b5, mips_0565_8_0565), - PIXMAN_MIPS_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, b5g6r5, b5g6r5, mips_0565_8_0565), + SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, r5g6b5, r5g6b5, mips_0565_8_0565), + SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, b5g6r5, b5g6r5, mips_0565_8_0565), SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, mips_8888_8888), SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, mips_8888_8888), diff --git a/gfx/cairo/libpixman/src/pixman-mips-dspr2.h b/gfx/cairo/libpixman/src/pixman-mips-dspr2.h index 4ac9ff95df..57b38359e3 100644 --- a/gfx/cairo/libpixman/src/pixman-mips-dspr2.h +++ b/gfx/cairo/libpixman/src/pixman-mips-dspr2.h @@ -26,7 +26,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * Author: Nemanja Lukic (nlukic@mips.com) + * Author: Nemanja Lukic (nemanja.lukic@rt-rk.com) */ #ifndef PIXMAN_MIPS_DSPR2_H @@ -246,6 +246,48 @@ mips_composite_##name (pixman_implementation_t *imp, \ } \ } +/****************************************************************************/ + +#define PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_DST(name, op, \ + src_type, dst_type) \ +void \ +pixman_scaled_nearest_scanline_##name##_##op##_asm_mips ( \ + dst_type * dst, \ + const src_type * src, \ + int32_t w, \ + pixman_fixed_t vx, \ + pixman_fixed_t unit_x); \ + \ +static force_inline void \ +scaled_nearest_scanline_mips_##name##_##op (dst_type * pd, \ + const src_type * ps, \ + int32_t w, \ + pixman_fixed_t vx, \ + pixman_fixed_t unit_x, \ + pixman_fixed_t max_vx, \ + pixman_bool_t zero_src) \ +{ \ + pixman_scaled_nearest_scanline_##name##_##op##_asm_mips (pd, ps, w, \ + vx, unit_x); \ +} \ + \ +FAST_NEAREST_MAINLOOP (mips_##name##_cover_##op, \ + scaled_nearest_scanline_mips_##name##_##op, \ + src_type, dst_type, COVER) \ +FAST_NEAREST_MAINLOOP (mips_##name##_none_##op, \ + scaled_nearest_scanline_mips_##name##_##op, \ + src_type, dst_type, NONE) \ +FAST_NEAREST_MAINLOOP (mips_##name##_pad_##op, \ + scaled_nearest_scanline_mips_##name##_##op, \ + src_type, dst_type, PAD) + +/* Provide entries for the fast path table */ +#define PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH(op,s,d,func) \ + SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func), \ + SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func), \ + SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func) + + /*****************************************************************************/ #define PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_A8_DST(flags, name, op, \ @@ -286,12 +328,6 @@ FAST_NEAREST_MAINLOOP_COMMON (mips_##name##_pad_##op, \ scaled_nearest_scanline_mips_##name##_##op, \ src_type, uint8_t, dst_type, PAD, TRUE, FALSE) -/* Provide entries for the fast path table */ -#define PIXMAN_MIPS_SIMPLE_NEAREST_A8_MASK_FAST_PATH(op,s,d,func) \ - SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER (op,s,d,func), \ - SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE (op,s,d,func), \ - SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD (op,s,d,func) - /****************************************************************************/ #define PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST(flags, name, op, \ diff --git a/gfx/cairo/libpixman/src/pixman-mmx.c b/gfx/cairo/libpixman/src/pixman-mmx.c index ca2ac83d90..d7cf2659df 100644 --- a/gfx/cairo/libpixman/src/pixman-mmx.c +++ b/gfx/cairo/libpixman/src/pixman-mmx.c @@ -44,8 +44,6 @@ #include "pixman-combine32.h" #include "pixman-inlines.h" -#define no_vERBOSE - #ifdef VERBOSE #define CHECKPOINT() error_f ("at %s %d\n", __FUNCTION__, __LINE__) #else @@ -91,21 +89,7 @@ _mm_mulhi_pu16 (__m64 __A, __m64 __B) return __A; } -# ifdef __OPTIMIZE__ -extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_shuffle_pi16 (__m64 __A, int8_t const __N) -{ - __m64 ret; - - asm ("pshufw %2, %1, %0\n\t" - : "=y" (ret) - : "y" (__A), "K" (__N) - ); - - return ret; -} -# else -# define _mm_shuffle_pi16(A, N) \ +# define _mm_shuffle_pi16(A, N) \ ({ \ __m64 ret; \ \ @@ -116,7 +100,6 @@ _mm_shuffle_pi16 (__m64 __A, int8_t const __N) \ ret; \ }) -# endif # endif #endif @@ -303,6 +286,29 @@ negate (__m64 mask) return _mm_xor_si64 (mask, MC (4x00ff)); } +/* Computes the product of two unsigned fixed-point 8-bit values from 0 to 1 + * and maps its result to the same range. + * + * Jim Blinn gives multiple ways to compute this in "Jim Blinn's Corner: + * Notation, Notation, Notation", the first of which is + * + * prod(a, b) = (a * b + 128) / 255. + * + * By approximating the division by 255 as 257/65536 it can be replaced by a + * multiply and a right shift. This is the implementation that we use in + * pix_multiply(), but we _mm_mulhi_pu16() by 257 (part of SSE1 or Extended + * 3DNow!, and unavailable at the time of the book's publication) to perform + * the multiplication and right shift in a single operation. + * + * prod(a, b) = ((a * b + 128) * 257) >> 16. + * + * A third way (how pix_multiply() was implemented prior to 14208344) exists + * also that performs the multiplication by 257 with adds and shifts. + * + * Where temp = a * b + 128 + * + * prod(a, b) = (temp + (temp >> 8)) >> 8. + */ static force_inline __m64 pix_multiply (__m64 a, __m64 b) { @@ -381,8 +387,10 @@ in_over (__m64 src, __m64 srca, __m64 mask, __m64 dest) static force_inline __m64 ldq_u(__m64 *p) { #ifdef USE_X86_MMX - /* x86's alignment restrictions are very relaxed. */ - return *(__m64 *)p; + /* x86's alignment restrictions are very relaxed, but that's no excuse */ + __m64 r; + memcpy(&r, p, sizeof(__m64)); + return r; #elif defined USE_ARM_IWMMXT int align = (uintptr_t)p & 7; __m64 *aligned_p; @@ -401,7 +409,9 @@ static force_inline uint32_t ldl_u(const uint32_t *p) { #ifdef USE_X86_MMX /* x86's alignment restrictions are very relaxed. */ - return *p; + uint32_t r; + memcpy(&r, p, sizeof(uint32_t)); + return r; #else struct __una_u32 { uint32_t x __attribute__((packed)); }; const struct __una_u32 *ptr = (const struct __una_u32 *) p; @@ -3534,13 +3544,111 @@ mmx_composite_over_reverse_n_8888 (pixman_implementation_t *imp, _mm_empty (); } +static force_inline void +scaled_nearest_scanline_mmx_8888_8888_OVER (uint32_t* pd, + const uint32_t* ps, + int32_t w, + pixman_fixed_t vx, + pixman_fixed_t unit_x, + pixman_fixed_t src_width_fixed, + pixman_bool_t fully_transparent_src) +{ + if (fully_transparent_src) + return; + + while (w) + { + __m64 d = load (pd); + __m64 s = load (ps + pixman_fixed_to_int (vx)); + vx += unit_x; + while (vx >= 0) + vx -= src_width_fixed; + + store8888 (pd, core_combine_over_u_pixel_mmx (s, d)); + pd++; + + w--; + } + + _mm_empty (); +} + +FAST_NEAREST_MAINLOOP (mmx_8888_8888_cover_OVER, + scaled_nearest_scanline_mmx_8888_8888_OVER, + uint32_t, uint32_t, COVER) +FAST_NEAREST_MAINLOOP (mmx_8888_8888_none_OVER, + scaled_nearest_scanline_mmx_8888_8888_OVER, + uint32_t, uint32_t, NONE) +FAST_NEAREST_MAINLOOP (mmx_8888_8888_pad_OVER, + scaled_nearest_scanline_mmx_8888_8888_OVER, + uint32_t, uint32_t, PAD) +FAST_NEAREST_MAINLOOP (mmx_8888_8888_normal_OVER, + scaled_nearest_scanline_mmx_8888_8888_OVER, + uint32_t, uint32_t, NORMAL) + +static force_inline void +scaled_nearest_scanline_mmx_8888_n_8888_OVER (const uint32_t * mask, + uint32_t * dst, + const uint32_t * src, + int32_t w, + pixman_fixed_t vx, + pixman_fixed_t unit_x, + pixman_fixed_t src_width_fixed, + pixman_bool_t zero_src) +{ + __m64 mm_mask; + + if (zero_src || (*mask >> 24) == 0) + { + /* A workaround for https://gcc.gnu.org/PR47759 */ + _mm_empty (); + return; + } + + mm_mask = expand_alpha (load8888 (mask)); + + while (w) + { + uint32_t s = *(src + pixman_fixed_to_int (vx)); + vx += unit_x; + while (vx >= 0) + vx -= src_width_fixed; + + if (s) + { + __m64 ms = load8888 (&s); + __m64 alpha = expand_alpha (ms); + __m64 dest = load8888 (dst); + + store8888 (dst, (in_over (ms, alpha, mm_mask, dest))); + } + + dst++; + w--; + } + + _mm_empty (); +} + +FAST_NEAREST_MAINLOOP_COMMON (mmx_8888_n_8888_cover_OVER, + scaled_nearest_scanline_mmx_8888_n_8888_OVER, + uint32_t, uint32_t, uint32_t, COVER, TRUE, TRUE) +FAST_NEAREST_MAINLOOP_COMMON (mmx_8888_n_8888_pad_OVER, + scaled_nearest_scanline_mmx_8888_n_8888_OVER, + uint32_t, uint32_t, uint32_t, PAD, TRUE, TRUE) +FAST_NEAREST_MAINLOOP_COMMON (mmx_8888_n_8888_none_OVER, + scaled_nearest_scanline_mmx_8888_n_8888_OVER, + uint32_t, uint32_t, uint32_t, NONE, TRUE, TRUE) +FAST_NEAREST_MAINLOOP_COMMON (mmx_8888_n_8888_normal_OVER, + scaled_nearest_scanline_mmx_8888_n_8888_OVER, + uint32_t, uint32_t, uint32_t, NORMAL, TRUE, TRUE) + #define BSHIFT ((1 << BILINEAR_INTERPOLATION_BITS)) #define BMSK (BSHIFT - 1) #define BILINEAR_DECLARE_VARIABLES \ const __m64 mm_wt = _mm_set_pi16 (wt, wt, wt, wt); \ const __m64 mm_wb = _mm_set_pi16 (wb, wb, wb, wb); \ - const __m64 mm_BSHIFT = _mm_set_pi16 (BSHIFT, BSHIFT, BSHIFT, BSHIFT); \ const __m64 mm_addc7 = _mm_set_pi16 (0, 1, 0, 1); \ const __m64 mm_xorc7 = _mm_set_pi16 (0, BMSK, 0, BMSK); \ const __m64 mm_ux = _mm_set_pi16 (unit_x, unit_x, unit_x, unit_x); \ @@ -3559,36 +3667,16 @@ do { \ __m64 b_lo = _mm_mullo_pi16 (_mm_unpacklo_pi8 (b, mm_zero), mm_wb); \ __m64 hi = _mm_add_pi16 (t_hi, b_hi); \ __m64 lo = _mm_add_pi16 (t_lo, b_lo); \ - vx += unit_x; \ - if (BILINEAR_INTERPOLATION_BITS < 8) \ - { \ - /* calculate horizontal weights */ \ - __m64 mm_wh = _mm_add_pi16 (mm_addc7, _mm_xor_si64 (mm_xorc7, \ + /* calculate horizontal weights */ \ + __m64 mm_wh = _mm_add_pi16 (mm_addc7, _mm_xor_si64 (mm_xorc7, \ _mm_srli_pi16 (mm_x, \ 16 - BILINEAR_INTERPOLATION_BITS))); \ - /* horizontal interpolation */ \ - __m64 p = _mm_unpacklo_pi16 (lo, hi); \ - __m64 q = _mm_unpackhi_pi16 (lo, hi); \ - lo = _mm_madd_pi16 (p, mm_wh); \ - hi = _mm_madd_pi16 (q, mm_wh); \ - } \ - else \ - { \ - /* calculate horizontal weights */ \ - __m64 mm_wh_lo = _mm_sub_pi16 (mm_BSHIFT, _mm_srli_pi16 (mm_x, \ - 16 - BILINEAR_INTERPOLATION_BITS)); \ - __m64 mm_wh_hi = _mm_srli_pi16 (mm_x, \ - 16 - BILINEAR_INTERPOLATION_BITS); \ - /* horizontal interpolation */ \ - __m64 mm_lo_lo = _mm_mullo_pi16 (lo, mm_wh_lo); \ - __m64 mm_lo_hi = _mm_mullo_pi16 (hi, mm_wh_hi); \ - __m64 mm_hi_lo = _mm_mulhi_pu16 (lo, mm_wh_lo); \ - __m64 mm_hi_hi = _mm_mulhi_pu16 (hi, mm_wh_hi); \ - lo = _mm_add_pi32 (_mm_unpacklo_pi16 (mm_lo_lo, mm_hi_lo), \ - _mm_unpacklo_pi16 (mm_lo_hi, mm_hi_hi)); \ - hi = _mm_add_pi32 (_mm_unpackhi_pi16 (mm_lo_lo, mm_hi_lo), \ - _mm_unpackhi_pi16 (mm_lo_hi, mm_hi_hi)); \ - } \ + /* horizontal interpolation */ \ + __m64 p = _mm_unpacklo_pi16 (lo, hi); \ + __m64 q = _mm_unpackhi_pi16 (lo, hi); \ + vx += unit_x; \ + lo = _mm_madd_pi16 (p, mm_wh); \ + hi = _mm_madd_pi16 (q, mm_wh); \ mm_x = _mm_add_pi16 (mm_x, mm_ux); \ /* shift and pack the result */ \ hi = _mm_srli_pi32 (hi, BILINEAR_INTERPOLATION_BITS * 2); \ @@ -3866,7 +3954,7 @@ mmx_fetch_a8 (pixman_iter_t *iter, const uint32_t *mask) while (w && (((uintptr_t)dst) & 15)) { - *dst++ = *(src++) << 24; + *dst++ = (uint32_t)*(src++) << 24; w--; } @@ -3893,7 +3981,7 @@ mmx_fetch_a8 (pixman_iter_t *iter, const uint32_t *mask) while (w) { - *dst++ = *(src++) << 24; + *dst++ = (uint32_t)*(src++) << 24; w--; } @@ -3901,52 +3989,23 @@ mmx_fetch_a8 (pixman_iter_t *iter, const uint32_t *mask) return iter->buffer; } -typedef struct -{ - pixman_format_code_t format; - pixman_iter_get_scanline_t get_scanline; -} fetcher_info_t; - -static const fetcher_info_t fetchers[] = -{ - { PIXMAN_x8r8g8b8, mmx_fetch_x8r8g8b8 }, - { PIXMAN_r5g6b5, mmx_fetch_r5g6b5 }, - { PIXMAN_a8, mmx_fetch_a8 }, - { PIXMAN_null } -}; - -static pixman_bool_t -mmx_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter) -{ - pixman_image_t *image = iter->image; - -#define FLAGS \ +#define IMAGE_FLAGS \ (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | \ FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST) - if ((iter->iter_flags & ITER_NARROW) && - (iter->image_flags & FLAGS) == FLAGS) - { - const fetcher_info_t *f; - - for (f = &fetchers[0]; f->format != PIXMAN_null; f++) - { - if (image->common.extended_format_code == f->format) - { - uint8_t *b = (uint8_t *)image->bits.bits; - int s = image->bits.rowstride * 4; - - iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (f->format) / 8; - iter->stride = s; - - iter->get_scanline = f->get_scanline; - return TRUE; - } - } - } - - return FALSE; -} +static const pixman_iter_info_t mmx_iters[] = +{ + { PIXMAN_x8r8g8b8, IMAGE_FLAGS, ITER_NARROW, + _pixman_iter_init_bits_stride, mmx_fetch_x8r8g8b8, NULL + }, + { PIXMAN_r5g6b5, IMAGE_FLAGS, ITER_NARROW, + _pixman_iter_init_bits_stride, mmx_fetch_r5g6b5, NULL + }, + { PIXMAN_a8, IMAGE_FLAGS, ITER_NARROW, + _pixman_iter_init_bits_stride, mmx_fetch_a8, NULL + }, + { PIXMAN_null }, +}; static const pixman_fast_path_t mmx_fast_paths[] = { @@ -4024,6 +4083,16 @@ static const pixman_fast_path_t mmx_fast_paths[] = PIXMAN_STD_FAST_PATH (IN, a8, null, a8, mmx_composite_in_8_8 ), PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, mmx_composite_in_n_8_8 ), + SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, mmx_8888_8888 ), + SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, mmx_8888_8888 ), + SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, mmx_8888_8888 ), + SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, mmx_8888_8888 ), + + SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, mmx_8888_n_8888 ), + SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, mmx_8888_n_8888 ), + SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, mmx_8888_n_8888 ), + SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, mmx_8888_n_8888 ), + SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, mmx_8888_8888 ), SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, mmx_8888_8888 ), SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, mmx_8888_8888 ), @@ -4076,7 +4145,7 @@ _pixman_implementation_create_mmx (pixman_implementation_t *fallback) imp->blt = mmx_blt; imp->fill = mmx_fill; - imp->src_iter_init = mmx_src_iter_init; + imp->iter_info = mmx_iters; return imp; } diff --git a/gfx/cairo/libpixman/src/pixman-noop.c b/gfx/cairo/libpixman/src/pixman-noop.c index e39996d9df..e59890492f 100644 --- a/gfx/cairo/libpixman/src/pixman-noop.c +++ b/gfx/cairo/libpixman/src/pixman-noop.c @@ -37,12 +37,6 @@ noop_composite (pixman_implementation_t *imp, return; } -static void -dest_write_back_direct (pixman_iter_t *iter) -{ - iter->buffer += iter->image->bits.rowstride; -} - static uint32_t * noop_get_scanline (pixman_iter_t *iter, const uint32_t *mask) { @@ -53,110 +47,102 @@ noop_get_scanline (pixman_iter_t *iter, const uint32_t *mask) return result; } -static uint32_t * -get_scanline_null (pixman_iter_t *iter, const uint32_t *mask) -{ - return NULL; +static void +noop_init_solid_narrow (pixman_iter_t *iter, + const pixman_iter_info_t *info) +{ + pixman_image_t *image = iter->image; + uint32_t *buffer = iter->buffer; + uint32_t *end = buffer + iter->width; + uint32_t color; + + if (iter->image->type == SOLID) + color = image->solid.color_32; + else + color = image->bits.fetch_pixel_32 (&image->bits, 0, 0); + + while (buffer < end) + *(buffer++) = color; } -static pixman_bool_t -noop_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter) +static void +noop_init_solid_wide (pixman_iter_t *iter, + const pixman_iter_info_t *info) { pixman_image_t *image = iter->image; + argb_t *buffer = (argb_t *)iter->buffer; + argb_t *end = buffer + iter->width; + argb_t color; -#define FLAGS \ - (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM) - - if (!image) - { - iter->get_scanline = get_scanline_null; - } - else if ((iter->iter_flags & (ITER_IGNORE_ALPHA | ITER_IGNORE_RGB)) == - (ITER_IGNORE_ALPHA | ITER_IGNORE_RGB)) - { - iter->get_scanline = _pixman_iter_get_scanline_noop; - } - else if (image->common.extended_format_code == PIXMAN_solid && - (iter->image->type == SOLID || - (iter->image_flags & FAST_PATH_NO_ALPHA_MAP))) - { - if (iter->iter_flags & ITER_NARROW) - { - uint32_t *buffer = iter->buffer; - uint32_t *end = buffer + iter->width; - uint32_t color; - - if (image->type == SOLID) - color = image->solid.color_32; - else - color = image->bits.fetch_pixel_32 (&image->bits, 0, 0); - - while (buffer < end) - *(buffer++) = color; - } - else - { - argb_t *buffer = (argb_t *)iter->buffer; - argb_t *end = buffer + iter->width; - argb_t color; - - if (image->type == SOLID) - color = image->solid.color_float; - else - color = image->bits.fetch_pixel_float (&image->bits, 0, 0); - - while (buffer < end) - *(buffer++) = color; - } - - iter->get_scanline = _pixman_iter_get_scanline_noop; - } - else if (image->common.extended_format_code == PIXMAN_a8r8g8b8 && - (iter->iter_flags & ITER_NARROW) && - (iter->image_flags & FLAGS) == FLAGS && - iter->x >= 0 && iter->y >= 0 && - iter->x + iter->width <= image->bits.width && - iter->y + iter->height <= image->bits.height) - { - iter->buffer = - image->bits.bits + iter->y * image->bits.rowstride + iter->x; - - iter->get_scanline = noop_get_scanline; - } + if (iter->image->type == SOLID) + color = image->solid.color_float; else - { - return FALSE; - } + color = image->bits.fetch_pixel_float (&image->bits, 0, 0); - return TRUE; + while (buffer < end) + *(buffer++) = color; } -static pixman_bool_t -noop_dest_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter) +static void +noop_init_direct_buffer (pixman_iter_t *iter, const pixman_iter_info_t *info) { pixman_image_t *image = iter->image; - uint32_t image_flags = iter->image_flags; - uint32_t iter_flags = iter->iter_flags; - - if ((image_flags & FAST_PATH_STD_DEST_FLAGS) == FAST_PATH_STD_DEST_FLAGS && - (iter_flags & ITER_NARROW) == ITER_NARROW && - ((image->common.extended_format_code == PIXMAN_a8r8g8b8) || - (image->common.extended_format_code == PIXMAN_x8r8g8b8 && - (iter_flags & (ITER_LOCALIZED_ALPHA))))) - { - iter->buffer = image->bits.bits + iter->y * image->bits.rowstride + iter->x; - - iter->get_scanline = _pixman_iter_get_scanline_noop; - iter->write_back = dest_write_back_direct; - - return TRUE; - } - else - { - return FALSE; - } + + iter->buffer = + image->bits.bits + iter->y * image->bits.rowstride + iter->x; } +static void +dest_write_back_direct (pixman_iter_t *iter) +{ + iter->buffer += iter->image->bits.rowstride; +} + +static const pixman_iter_info_t noop_iters[] = +{ + /* Source iters */ + { PIXMAN_any, + 0, ITER_IGNORE_ALPHA | ITER_IGNORE_RGB | ITER_SRC, + NULL, + _pixman_iter_get_scanline_noop, + NULL + }, + { PIXMAN_solid, + FAST_PATH_NO_ALPHA_MAP, ITER_NARROW | ITER_SRC, + noop_init_solid_narrow, + _pixman_iter_get_scanline_noop, + NULL, + }, + { PIXMAN_solid, + FAST_PATH_NO_ALPHA_MAP, ITER_WIDE | ITER_SRC, + noop_init_solid_wide, + _pixman_iter_get_scanline_noop, + NULL + }, + { PIXMAN_a8r8g8b8, + FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | + FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST, + ITER_NARROW | ITER_SRC, + noop_init_direct_buffer, + noop_get_scanline, + NULL + }, + /* Dest iters */ + { PIXMAN_a8r8g8b8, + FAST_PATH_STD_DEST_FLAGS, ITER_NARROW | ITER_DEST, + noop_init_direct_buffer, + _pixman_iter_get_scanline_noop, + dest_write_back_direct + }, + { PIXMAN_x8r8g8b8, + FAST_PATH_STD_DEST_FLAGS, ITER_NARROW | ITER_DEST | ITER_LOCALIZED_ALPHA, + noop_init_direct_buffer, + _pixman_iter_get_scanline_noop, + dest_write_back_direct + }, + { PIXMAN_null }, +}; + static const pixman_fast_path_t noop_fast_paths[] = { { PIXMAN_OP_DST, PIXMAN_any, 0, PIXMAN_any, 0, PIXMAN_any, 0, noop_composite }, @@ -169,8 +155,7 @@ _pixman_implementation_create_noop (pixman_implementation_t *fallback) pixman_implementation_t *imp = _pixman_implementation_create (fallback, noop_fast_paths); - imp->src_iter_init = noop_src_iter_init; - imp->dest_iter_init = noop_dest_iter_init; + imp->iter_info = noop_iters; return imp; } diff --git a/gfx/cairo/libpixman/src/pixman-private.h b/gfx/cairo/libpixman/src/pixman-private.h index 2313e65583..a0459dac19 100644 --- a/gfx/cairo/libpixman/src/pixman-private.h +++ b/gfx/cairo/libpixman/src/pixman-private.h @@ -9,7 +9,13 @@ #ifndef MOZILLA_VERSION #error "Need mozilla headers" #endif +#ifdef MOZ_GFX_OPTIMIZE_MOBILE +#define LOW_QUALITY_INTERPOLATION +#define LOWER_QUALITY_INTERPOLATION +#define BILINEAR_INTERPOLATION_BITS 4 +#else #define BILINEAR_INTERPOLATION_BITS 7 +#endif #define BILINEAR_INTERPOLATION_RANGE (1 << BILINEAR_INTERPOLATION_BITS) /* @@ -31,6 +37,7 @@ #include <stdio.h> #include <string.h> #include <stddef.h> +#include <float.h> #include "pixman-compiler.h" @@ -58,7 +65,7 @@ struct argb_t float b; }; -typedef void (*fetch_scanline_t) (pixman_image_t *image, +typedef void (*fetch_scanline_t) (bits_image_t *image, int x, int y, int width, @@ -182,7 +189,9 @@ struct bits_image uint32_t * free_me; int rowstride; /* in number of uint32_t's */ - fetch_scanline_t fetch_scanline_16; + pixman_dither_t dither; + uint32_t dither_offset_y; + uint32_t dither_offset_x; fetch_scanline_t fetch_scanline_32; fetch_pixel_32_t fetch_pixel_32; @@ -192,8 +201,6 @@ struct bits_image fetch_pixel_float_t fetch_pixel_float; store_scanline_t store_scanline_float; - store_scanline_t store_scanline_16; - /* Used for indirect access to the bits */ pixman_read_memory_func_t read_func; pixman_write_memory_func_t write_func; @@ -214,10 +221,12 @@ union pixman_image typedef struct pixman_iter_t pixman_iter_t; typedef uint32_t *(* pixman_iter_get_scanline_t) (pixman_iter_t *iter, const uint32_t *mask); typedef void (* pixman_iter_write_back_t) (pixman_iter_t *iter); +typedef void (* pixman_iter_fini_t) (pixman_iter_t *iter); typedef enum { - ITER_NARROW = (1 << 0), + ITER_NARROW = (1 << 0), + ITER_WIDE = (1 << 1), /* "Localized alpha" is when the alpha channel is used only to compute * the alpha value of the destination. This means that the computation @@ -234,16 +243,15 @@ typedef enum * we can treat it as if it were ARGB, which means in some cases we can * avoid copying it to a temporary buffer. */ - ITER_LOCALIZED_ALPHA = (1 << 1), - ITER_IGNORE_ALPHA = (1 << 2), - ITER_IGNORE_RGB = (1 << 3), - - /* With the addition of ITER_16 we now have two flags that to represent - * 3 pipelines. This means that there can be an invalid state when - * both ITER_NARROW and ITER_16 are set. In this case - * ITER_16 overrides NARROW and we should use the 16 bit pipeline. - * Note: ITER_16 still has a 32 bit mask, which is a bit weird. */ - ITER_16 = (1 << 4) + ITER_LOCALIZED_ALPHA = (1 << 2), + ITER_IGNORE_ALPHA = (1 << 3), + ITER_IGNORE_RGB = (1 << 4), + + /* These indicate whether the iterator is for a source + * or a destination image + */ + ITER_SRC = (1 << 5), + ITER_DEST = (1 << 6) } iter_flags_t; struct pixman_iter_t @@ -260,6 +268,7 @@ struct pixman_iter_t /* These function pointers are initialized by the implementation */ pixman_iter_get_scanline_t get_scanline; pixman_iter_write_back_t write_back; + pixman_iter_fini_t fini; /* These fields are scratch data that implementations can use */ void * data; @@ -267,6 +276,19 @@ struct pixman_iter_t int stride; }; +typedef struct pixman_iter_info_t pixman_iter_info_t; +typedef void (* pixman_iter_initializer_t) (pixman_iter_t *iter, + const pixman_iter_info_t *info); +struct pixman_iter_info_t +{ + pixman_format_code_t format; + uint32_t image_flags; + iter_flags_t iter_flags; + pixman_iter_initializer_t initializer; + pixman_iter_get_scanline_t get_scanline; + pixman_iter_write_back_t write_back; +}; + void _pixman_bits_image_setup_accessors (bits_image_t *image); @@ -331,13 +353,12 @@ _pixman_image_validate (pixman_image_t *image); */ typedef struct { - uint32_t left_ag; - uint32_t left_rb; - uint32_t right_ag; - uint32_t right_rb; - pixman_fixed_t left_x; - pixman_fixed_t right_x; - pixman_fixed_t stepper; + float a_s, a_b; + float r_s, r_b; + float g_s, g_b; + float b_s, b_b; + pixman_fixed_48_16_t left_x; + pixman_fixed_48_16_t right_x; pixman_gradient_stop_t *stops; int num_stops; @@ -355,9 +376,38 @@ void _pixman_gradient_walker_reset (pixman_gradient_walker_t *walker, pixman_fixed_48_16_t pos); -uint32_t -_pixman_gradient_walker_pixel (pixman_gradient_walker_t *walker, - pixman_fixed_48_16_t x); +typedef void (*pixman_gradient_walker_write_t) ( + pixman_gradient_walker_t *walker, + pixman_fixed_48_16_t x, + uint32_t *buffer); + +void +_pixman_gradient_walker_write_narrow(pixman_gradient_walker_t *walker, + pixman_fixed_48_16_t x, + uint32_t *buffer); + +void +_pixman_gradient_walker_write_wide(pixman_gradient_walker_t *walker, + pixman_fixed_48_16_t x, + uint32_t *buffer); + +typedef void (*pixman_gradient_walker_fill_t) ( + pixman_gradient_walker_t *walker, + pixman_fixed_48_16_t x, + uint32_t *buffer, + uint32_t *end); + +void +_pixman_gradient_walker_fill_narrow(pixman_gradient_walker_t *walker, + pixman_fixed_48_16_t x, + uint32_t *buffer, + uint32_t *end); + +void +_pixman_gradient_walker_fill_wide(pixman_gradient_walker_t *walker, + pixman_fixed_48_16_t x, + uint32_t *buffer, + uint32_t *end); /* * Edges @@ -467,10 +517,7 @@ typedef pixman_bool_t (*pixman_fill_func_t) (pixman_implementation_t *imp, int width, int height, uint32_t filler); -typedef pixman_bool_t (*pixman_iter_init_func_t) (pixman_implementation_t *imp, - pixman_iter_t *iter); -void _pixman_setup_combiner_functions_16 (pixman_implementation_t *imp); void _pixman_setup_combiner_functions_32 (pixman_implementation_t *imp); void _pixman_setup_combiner_functions_float (pixman_implementation_t *imp); @@ -491,14 +538,11 @@ struct pixman_implementation_t pixman_implementation_t * toplevel; pixman_implementation_t * fallback; const pixman_fast_path_t * fast_paths; + const pixman_iter_info_t * iter_info; pixman_blt_func_t blt; pixman_fill_func_t fill; - pixman_iter_init_func_t src_iter_init; - pixman_iter_init_func_t dest_iter_init; - pixman_combine_32_func_t combine_16[PIXMAN_N_OPERATORS]; - pixman_combine_32_func_t combine_16_ca[PIXMAN_N_OPERATORS]; pixman_combine_32_func_t combine_32[PIXMAN_N_OPERATORS]; pixman_combine_32_func_t combine_32_ca[PIXMAN_N_OPERATORS]; pixman_combine_float_func_t combine_float[PIXMAN_N_OPERATORS]; @@ -530,8 +574,7 @@ pixman_combine_32_func_t _pixman_implementation_lookup_combiner (pixman_implementation_t *imp, pixman_op_t op, pixman_bool_t component_alpha, - pixman_bool_t wide, - pixman_bool_t rgb16); + pixman_bool_t wide); pixman_bool_t _pixman_implementation_blt (pixman_implementation_t *imp, @@ -559,29 +602,17 @@ _pixman_implementation_fill (pixman_implementation_t *imp, int height, uint32_t filler); -pixman_bool_t -_pixman_implementation_src_iter_init (pixman_implementation_t *imp, - pixman_iter_t *iter, - pixman_image_t *image, - int x, - int y, - int width, - int height, - uint8_t *buffer, - iter_flags_t flags, - uint32_t image_flags); - -pixman_bool_t -_pixman_implementation_dest_iter_init (pixman_implementation_t *imp, - pixman_iter_t *iter, - pixman_image_t *image, - int x, - int y, - int width, - int height, - uint8_t *buffer, - iter_flags_t flags, - uint32_t image_flags); +void +_pixman_implementation_iter_init (pixman_implementation_t *imp, + pixman_iter_t *iter, + pixman_image_t *image, + int x, + int y, + int width, + int height, + uint8_t *buffer, + iter_flags_t flags, + uint32_t image_flags); /* Specific implementations */ pixman_implementation_t * @@ -603,6 +634,11 @@ pixman_implementation_t * _pixman_implementation_create_sse2 (pixman_implementation_t *fallback); #endif +#ifdef USE_SSSE3 +pixman_implementation_t * +_pixman_implementation_create_ssse3 (pixman_implementation_t *fallback); +#endif + #ifdef USE_ARM_SIMD pixman_implementation_t * _pixman_implementation_create_arm_simd (pixman_implementation_t *fallback); @@ -664,6 +700,9 @@ _pixman_compute_composite_region32 (pixman_region32_t * region, uint32_t * _pixman_iter_get_scanline_noop (pixman_iter_t *iter, const uint32_t *mask); +void +_pixman_iter_init_bits_stride (pixman_iter_t *iter, const pixman_iter_info_t *info); + /* These "formats" all have depth 0, so they * will never clash with any real ones */ @@ -703,7 +742,6 @@ _pixman_iter_get_scanline_noop (pixman_iter_t *iter, const uint32_t *mask); #define FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR (1 << 24) #define FAST_PATH_BITS_IMAGE (1 << 25) #define FAST_PATH_SEPARABLE_CONVOLUTION_FILTER (1 << 26) -#define FAST_PATH_16_FORMAT (1 << 27) #define FAST_PATH_PAD_REPEAT \ (FAST_PATH_NO_NONE_REPEAT | \ @@ -795,6 +833,9 @@ pixman_malloc_ab (unsigned int n, unsigned int b); void * pixman_malloc_abc (unsigned int a, unsigned int b, unsigned int c); +void * +pixman_malloc_ab_plus_c (unsigned int a, unsigned int b, unsigned int c); + pixman_bool_t _pixman_multiply_overflows_size (size_t a, size_t b); @@ -898,6 +939,8 @@ pixman_list_move_to_front (pixman_list_t *list, pixman_link_t *link) #define CLIP(v, low, high) ((v) < (low) ? (low) : ((v) > (high) ? (high) : (v))) +#define FLOAT_IS_ZERO(f) (-FLT_MIN < (f) && (f) < FLT_MIN) + /* Conversion between 8888 and 0565 */ static force_inline uint16_t @@ -1033,15 +1076,13 @@ float pixman_unorm_to_float (uint16_t u, int n_bits); #endif -#ifdef DEBUG - void _pixman_log_error (const char *function, const char *message); #define return_if_fail(expr) \ do \ { \ - if (!(expr)) \ + if (unlikely (!(expr))) \ { \ _pixman_log_error (FUNC, "The expression " # expr " was false"); \ return; \ @@ -1052,7 +1093,7 @@ _pixman_log_error (const char *function, const char *message); #define return_val_if_fail(expr, retval) \ do \ { \ - if (!(expr)) \ + if (unlikely (!(expr))) \ { \ _pixman_log_error (FUNC, "The expression " # expr " was false"); \ return (retval); \ @@ -1063,55 +1104,30 @@ _pixman_log_error (const char *function, const char *message); #define critical_if_fail(expr) \ do \ { \ - if (!(expr)) \ + if (unlikely (!(expr))) \ _pixman_log_error (FUNC, "The expression " # expr " was false"); \ } \ while (0) - -#else - -#define _pixman_log_error(f,m) do { } while (0) - -#define return_if_fail(expr) \ - do \ - { \ - if (!(expr)) \ - return; \ - } \ - while (0) - -#define return_val_if_fail(expr, retval) \ - do \ - { \ - if (!(expr)) \ - return (retval); \ - } \ - while (0) - -#define critical_if_fail(expr) \ - do \ - { \ - } \ - while (0) -#endif - /* * Matrix */ typedef struct { pixman_fixed_48_16_t v[3]; } pixman_vector_48_16_t; +PIXMAN_EXPORT pixman_bool_t pixman_transform_point_31_16 (const pixman_transform_t *t, const pixman_vector_48_16_t *v, pixman_vector_48_16_t *result); +PIXMAN_EXPORT void pixman_transform_point_31_16_3d (const pixman_transform_t *t, const pixman_vector_48_16_t *v, pixman_vector_48_16_t *result); +PIXMAN_EXPORT void pixman_transform_point_31_16_affine (const pixman_transform_t *t, const pixman_vector_48_16_t *v, diff --git a/gfx/cairo/libpixman/src/pixman-radial-gradient.c b/gfx/cairo/libpixman/src/pixman-radial-gradient.c index 3d539b1c86..e8e99c98b9 100644 --- a/gfx/cairo/libpixman/src/pixman-radial-gradient.c +++ b/gfx/cairo/libpixman/src/pixman-radial-gradient.c @@ -34,8 +34,6 @@ #include <math.h> #include "pixman-private.h" -#include "pixman-dither.h" - static inline pixman_fixed_32_32_t dot (pixman_fixed_48_16_t x1, pixman_fixed_48_16_t y1, @@ -68,15 +66,18 @@ fdot (double x1, return x1 * x2 + y1 * y2 + z1 * z2; } -static uint32_t -radial_compute_color (double a, - double b, - double c, - double inva, - double dr, - double mindr, - pixman_gradient_walker_t *walker, - pixman_repeat_t repeat) +static void +radial_write_color (double a, + double b, + double c, + double inva, + double dr, + double mindr, + pixman_gradient_walker_t *walker, + pixman_repeat_t repeat, + int Bpp, + pixman_gradient_walker_write_t write_pixel, + uint32_t *buffer) { /* * In this function error propagation can lead to bad results: @@ -101,21 +102,31 @@ radial_compute_color (double a, double t; if (b == 0) - return 0; + { + memset (buffer, 0, Bpp); + return; + } t = pixman_fixed_1 / 2 * c / b; if (repeat == PIXMAN_REPEAT_NONE) { if (0 <= t && t <= pixman_fixed_1) - return _pixman_gradient_walker_pixel (walker, t); + { + write_pixel (walker, t, buffer); + return; + } } else { if (t * dr >= mindr) - return _pixman_gradient_walker_pixel (walker, t); + { + write_pixel (walker, t, buffer); + return; + } } - return 0; + memset (buffer, 0, Bpp); + return; } discr = fdot (b, a, 0, b, -c, 0); @@ -141,24 +152,40 @@ radial_compute_color (double a, if (repeat == PIXMAN_REPEAT_NONE) { if (0 <= t0 && t0 <= pixman_fixed_1) - return _pixman_gradient_walker_pixel (walker, t0); + { + write_pixel (walker, t0, buffer); + return; + } else if (0 <= t1 && t1 <= pixman_fixed_1) - return _pixman_gradient_walker_pixel (walker, t1); + { + write_pixel (walker, t1, buffer); + return; + } } else { if (t0 * dr >= mindr) - return _pixman_gradient_walker_pixel (walker, t0); + { + write_pixel (walker, t0, buffer); + return; + } else if (t1 * dr >= mindr) - return _pixman_gradient_walker_pixel (walker, t1); + { + write_pixel (walker, t1, buffer); + return; + } } } - return 0; + memset (buffer, 0, Bpp); + return; } static uint32_t * -radial_get_scanline_narrow (pixman_iter_t *iter, const uint32_t *mask) +radial_get_scanline (pixman_iter_t *iter, + const uint32_t *mask, + int Bpp, + pixman_gradient_walker_write_t write_pixel) { /* * Implementation of radial gradients following the PDF specification. @@ -249,7 +276,7 @@ radial_get_scanline_narrow (pixman_iter_t *iter, const uint32_t *mask) gradient_t *gradient = (gradient_t *)image; radial_gradient_t *radial = (radial_gradient_t *)image; - uint32_t *end = buffer + width; + uint32_t *end = buffer + width * (Bpp / 4); pixman_gradient_walker_t walker; pixman_vector_t v, unit; @@ -332,18 +359,21 @@ radial_get_scanline_narrow (pixman_iter_t *iter, const uint32_t *mask) { if (!mask || *mask++) { - *buffer = radial_compute_color (radial->a, b, c, - radial->inva, - radial->delta.radius, - radial->mindr, - &walker, - image->common.repeat); + radial_write_color (radial->a, b, c, + radial->inva, + radial->delta.radius, + radial->mindr, + &walker, + image->common.repeat, + Bpp, + write_pixel, + buffer); } b += db; c += dc; dc += ddc; - ++buffer; + buffer += (Bpp / 4); } } else @@ -377,20 +407,23 @@ radial_get_scanline_narrow (pixman_iter_t *iter, const uint32_t *mask) pdx, pdy, radial->c1.radius); /* / pixman_fixed_1 / pixman_fixed_1 */ - *buffer = radial_compute_color (radial->a, b, c, - radial->inva, - radial->delta.radius, - radial->mindr, - &walker, - image->common.repeat); + radial_write_color (radial->a, b, c, + radial->inva, + radial->delta.radius, + radial->mindr, + &walker, + image->common.repeat, + Bpp, + write_pixel, + buffer); } else { - *buffer = 0; + memset (buffer, 0, Bpp); } } - ++buffer; + buffer += (Bpp / 4); v.vector[0] += unit.vector[0]; v.vector[1] += unit.vector[1]; @@ -403,286 +436,35 @@ radial_get_scanline_narrow (pixman_iter_t *iter, const uint32_t *mask) } static uint32_t * -radial_get_scanline_16 (pixman_iter_t *iter, const uint32_t *mask) +radial_get_scanline_narrow (pixman_iter_t *iter, const uint32_t *mask) { - /* - * Implementation of radial gradients following the PDF specification. - * See section 8.7.4.5.4 Type 3 (Radial) Shadings of the PDF Reference - * Manual (PDF 32000-1:2008 at the time of this writing). - * - * In the radial gradient problem we are given two circles (c₁,r₁) and - * (c₂,r₂) that define the gradient itself. - * - * Mathematically the gradient can be defined as the family of circles - * - * ((1-t)·c₁ + t·(c₂), (1-t)·r₁ + t·r₂) - * - * excluding those circles whose radius would be < 0. When a point - * belongs to more than one circle, the one with a bigger t is the only - * one that contributes to its color. When a point does not belong - * to any of the circles, it is transparent black, i.e. RGBA (0, 0, 0, 0). - * Further limitations on the range of values for t are imposed when - * the gradient is not repeated, namely t must belong to [0,1]. - * - * The graphical result is the same as drawing the valid (radius > 0) - * circles with increasing t in [-inf, +inf] (or in [0,1] if the gradient - * is not repeated) using SOURCE operator composition. - * - * It looks like a cone pointing towards the viewer if the ending circle - * is smaller than the starting one, a cone pointing inside the page if - * the starting circle is the smaller one and like a cylinder if they - * have the same radius. - * - * What we actually do is, given the point whose color we are interested - * in, compute the t values for that point, solving for t in: - * - * length((1-t)·c₁ + t·(c₂) - p) = (1-t)·r₁ + t·r₂ - * - * Let's rewrite it in a simpler way, by defining some auxiliary - * variables: - * - * cd = c₂ - c₁ - * pd = p - c₁ - * dr = r₂ - r₁ - * length(t·cd - pd) = r₁ + t·dr - * - * which actually means - * - * hypot(t·cdx - pdx, t·cdy - pdy) = r₁ + t·dr - * - * or - * - * ⎷((t·cdx - pdx)² + (t·cdy - pdy)²) = r₁ + t·dr. - * - * If we impose (as stated earlier) that r₁ + t·dr >= 0, it becomes: - * - * (t·cdx - pdx)² + (t·cdy - pdy)² = (r₁ + t·dr)² - * - * where we can actually expand the squares and solve for t: - * - * t²cdx² - 2t·cdx·pdx + pdx² + t²cdy² - 2t·cdy·pdy + pdy² = - * = r₁² + 2·r₁·t·dr + t²·dr² - * - * (cdx² + cdy² - dr²)t² - 2(cdx·pdx + cdy·pdy + r₁·dr)t + - * (pdx² + pdy² - r₁²) = 0 - * - * A = cdx² + cdy² - dr² - * B = pdx·cdx + pdy·cdy + r₁·dr - * C = pdx² + pdy² - r₁² - * At² - 2Bt + C = 0 - * - * The solutions (unless the equation degenerates because of A = 0) are: - * - * t = (B ± ⎷(B² - A·C)) / A - * - * The solution we are going to prefer is the bigger one, unless the - * radius associated to it is negative (or it falls outside the valid t - * range). - * - * Additional observations (useful for optimizations): - * A does not depend on p - * - * A < 0 <=> one of the two circles completely contains the other one - * <=> for every p, the radiuses associated with the two t solutions - * have opposite sign - */ - pixman_image_t *image = iter->image; - int x = iter->x; - int y = iter->y; - int width = iter->width; - uint16_t *buffer = iter->buffer; - pixman_bool_t toggle = ((x ^ y) & 1); - - gradient_t *gradient = (gradient_t *)image; - radial_gradient_t *radial = (radial_gradient_t *)image; - uint16_t *end = buffer + width; - pixman_gradient_walker_t walker; - pixman_vector_t v, unit; - - /* reference point is the center of the pixel */ - v.vector[0] = pixman_int_to_fixed (x) + pixman_fixed_1 / 2; - v.vector[1] = pixman_int_to_fixed (y) + pixman_fixed_1 / 2; - v.vector[2] = pixman_fixed_1; - - _pixman_gradient_walker_init (&walker, gradient, image->common.repeat); - - if (image->common.transform) - { - if (!pixman_transform_point_3d (image->common.transform, &v)) - return iter->buffer; - - unit.vector[0] = image->common.transform->matrix[0][0]; - unit.vector[1] = image->common.transform->matrix[1][0]; - unit.vector[2] = image->common.transform->matrix[2][0]; - } - else - { - unit.vector[0] = pixman_fixed_1; - unit.vector[1] = 0; - unit.vector[2] = 0; - } - - if (unit.vector[2] == 0 && v.vector[2] == pixman_fixed_1) - { - /* - * Given: - * - * t = (B ± ⎷(B² - A·C)) / A - * - * where - * - * A = cdx² + cdy² - dr² - * B = pdx·cdx + pdy·cdy + r₁·dr - * C = pdx² + pdy² - r₁² - * det = B² - A·C - * - * Since we have an affine transformation, we know that (pdx, pdy) - * increase linearly with each pixel, - * - * pdx = pdx₀ + n·ux, - * pdy = pdy₀ + n·uy, - * - * we can then express B, C and det through multiple differentiation. - */ - pixman_fixed_32_32_t b, db, c, dc, ddc; - - /* warning: this computation may overflow */ - v.vector[0] -= radial->c1.x; - v.vector[1] -= radial->c1.y; - - /* - * B and C are computed and updated exactly. - * If fdot was used instead of dot, in the worst case it would - * lose 11 bits of precision in each of the multiplication and - * summing up would zero out all the bit that were preserved, - * thus making the result 0 instead of the correct one. - * This would mean a worst case of unbound relative error or - * about 2^10 absolute error - */ - b = dot (v.vector[0], v.vector[1], radial->c1.radius, - radial->delta.x, radial->delta.y, radial->delta.radius); - db = dot (unit.vector[0], unit.vector[1], 0, - radial->delta.x, radial->delta.y, 0); - - c = dot (v.vector[0], v.vector[1], - -((pixman_fixed_48_16_t) radial->c1.radius), - v.vector[0], v.vector[1], radial->c1.radius); - dc = dot (2 * (pixman_fixed_48_16_t) v.vector[0] + unit.vector[0], - 2 * (pixman_fixed_48_16_t) v.vector[1] + unit.vector[1], - 0, - unit.vector[0], unit.vector[1], 0); - ddc = 2 * dot (unit.vector[0], unit.vector[1], 0, - unit.vector[0], unit.vector[1], 0); - - while (buffer < end) - { - if (!mask || *mask++) - { - *buffer = dither_8888_to_0565( - radial_compute_color (radial->a, b, c, - radial->inva, - radial->delta.radius, - radial->mindr, - &walker, - image->common.repeat), - toggle); - } - - toggle ^= 1; - b += db; - c += dc; - dc += ddc; - ++buffer; - } - } - else - { - /* projective */ - /* Warning: - * error propagation guarantees are much looser than in the affine case - */ - while (buffer < end) - { - if (!mask || *mask++) - { - if (v.vector[2] != 0) - { - double pdx, pdy, invv2, b, c; - - invv2 = 1. * pixman_fixed_1 / v.vector[2]; - - pdx = v.vector[0] * invv2 - radial->c1.x; - /* / pixman_fixed_1 */ - - pdy = v.vector[1] * invv2 - radial->c1.y; - /* / pixman_fixed_1 */ - - b = fdot (pdx, pdy, radial->c1.radius, - radial->delta.x, radial->delta.y, - radial->delta.radius); - /* / pixman_fixed_1 / pixman_fixed_1 */ - - c = fdot (pdx, pdy, -radial->c1.radius, - pdx, pdy, radial->c1.radius); - /* / pixman_fixed_1 / pixman_fixed_1 */ - - *buffer = dither_8888_to_0565 ( - radial_compute_color (radial->a, b, c, - radial->inva, - radial->delta.radius, - radial->mindr, - &walker, - image->common.repeat), - toggle); - } - else - { - *buffer = 0; - } - } - - ++buffer; - toggle ^= 1; - - v.vector[0] += unit.vector[0]; - v.vector[1] += unit.vector[1]; - v.vector[2] += unit.vector[2]; - } - } - - iter->y++; - return iter->buffer; + return radial_get_scanline (iter, mask, 4, + _pixman_gradient_walker_write_narrow); } + static uint32_t * radial_get_scanline_wide (pixman_iter_t *iter, const uint32_t *mask) { - uint32_t *buffer = radial_get_scanline_narrow (iter, NULL); - - pixman_expand_to_float ( - (argb_t *)buffer, buffer, PIXMAN_a8r8g8b8, iter->width); - - return buffer; + return radial_get_scanline (iter, NULL, 16, + _pixman_gradient_walker_write_wide); } void _pixman_radial_gradient_iter_init (pixman_image_t *image, pixman_iter_t *iter) { - if (iter->iter_flags & ITER_16) - iter->get_scanline = radial_get_scanline_16; - else if (iter->iter_flags & ITER_NARROW) + if (iter->iter_flags & ITER_NARROW) iter->get_scanline = radial_get_scanline_narrow; else iter->get_scanline = radial_get_scanline_wide; } - PIXMAN_EXPORT pixman_image_t * pixman_image_create_radial_gradient (const pixman_point_fixed_t * inner, - const pixman_point_fixed_t * outer, - pixman_fixed_t inner_radius, - pixman_fixed_t outer_radius, - const pixman_gradient_stop_t *stops, - int n_stops) + const pixman_point_fixed_t * outer, + pixman_fixed_t inner_radius, + pixman_fixed_t outer_radius, + const pixman_gradient_stop_t *stops, + int n_stops) { pixman_image_t *image; radial_gradient_t *radial; diff --git a/gfx/cairo/libpixman/src/pixman-region.c b/gfx/cairo/libpixman/src/pixman-region.c index 7f2e29b15b..59bc9c7971 100644 --- a/gfx/cairo/libpixman/src/pixman-region.c +++ b/gfx/cairo/libpixman/src/pixman-region.c @@ -298,13 +298,6 @@ PREFIX (_equal) (region_type_t *reg1, region_type_t *reg2) box_type_t *rects1; box_type_t *rects2; - /* - * If the region is empty the extents are undefined so we need to check - * for empty before comparing the extents. - */ - if (PIXREGION_NIL (reg1) && PIXREGION_NIL(reg2)) - return TRUE; - if (reg1->extents.x1 != reg2->extents.x1) return FALSE; @@ -1341,15 +1334,6 @@ PREFIX(_intersect_rect) (region_type_t *dest, region.extents.x2 = x + width; region.extents.y2 = y + height; - if (!GOOD_RECT (®ion.extents)) - { - if (BAD_RECT (®ion.extents)) - _pixman_log_error (FUNC, "Invalid rectangle passed"); - FREE_DATA (dest); - PREFIX (_init) (dest); - return TRUE; - } - return PREFIX(_intersect) (dest, source, ®ion); } @@ -1874,7 +1858,7 @@ pixman_region_subtract_o (region_type_t * region, else if (r2->x1 <= x1) { /* - * Subtrahend preceeds minuend: nuke left edge of minuend. + * Subtrahend precedes minuend: nuke left edge of minuend. */ x1 = r2->x2; if (x1 >= r1->x2) @@ -1998,7 +1982,7 @@ PREFIX (_subtract) (region_type_t *reg_d, } /* Add those rectangles in region 1 that aren't in region 2, - do yucky substraction for overlaps, and + do yucky subtraction for overlaps, and just throw away rectangles in region 2 that aren't in region 1 */ if (!pixman_op (reg_d, reg_m, reg_s, pixman_region_subtract_o, TRUE, FALSE)) return FALSE; @@ -2058,7 +2042,7 @@ PREFIX (_inverse) (region_type_t *new_reg, /* Destination region */ } /* Add those rectangles in region 1 that aren't in region 2, - * do yucky substraction for overlaps, and + * do yucky subtraction for overlaps, and * just throw away rectangles in region 2 that aren't in region 1 */ inv_reg.extents = *inv_rect; diff --git a/gfx/cairo/libpixman/src/pixman-solid-fill.c b/gfx/cairo/libpixman/src/pixman-solid-fill.c index 5f9fef6306..4694ebc700 100644 --- a/gfx/cairo/libpixman/src/pixman-solid-fill.c +++ b/gfx/cairo/libpixman/src/pixman-solid-fill.c @@ -30,10 +30,10 @@ static uint32_t color_to_uint32 (const pixman_color_t *color) { return - (color->alpha >> 8 << 24) | - (color->red >> 8 << 16) | - (color->green & 0xff00) | - (color->blue >> 8); + ((unsigned int) color->alpha >> 8 << 24) | + ((unsigned int) color->red >> 8 << 16) | + ((unsigned int) color->green & 0xff00) | + ((unsigned int) color->blue >> 8); } static argb_t diff --git a/gfx/cairo/libpixman/src/pixman-sse2.c b/gfx/cairo/libpixman/src/pixman-sse2.c index e4e668d389..ce4e75f247 100644 --- a/gfx/cairo/libpixman/src/pixman-sse2.c +++ b/gfx/cairo/libpixman/src/pixman-sse2.c @@ -30,6 +30,9 @@ #include <config.h> #endif +/* PSHUFD is slow on a lot of old processors, and new processors have SSSE3 */ +#define PSHUFD_IS_FAST 0 + #include <xmmintrin.h> /* for _mm_shuffle_pi16 and _MM_SHUFFLE */ #include <emmintrin.h> /* for SSE2 intrinsics */ #include "pixman-private.h" @@ -515,7 +518,8 @@ core_combine_over_u_pixel_sse2 (uint32_t src, uint32_t dst) static force_inline uint32_t combine1 (const uint32_t *ps, const uint32_t *pm) { - uint32_t s = *ps; + uint32_t s; + memcpy(&s, ps, sizeof(uint32_t)); if (pm) { @@ -3198,7 +3202,7 @@ sse2_composite_over_n_8_8888 (pixman_implementation_t *imp, uint8_t *mask_line, *mask; int dst_stride, mask_stride; int32_t w; - uint32_t m, d; + uint32_t d; __m128i xmm_src, xmm_alpha, xmm_def; __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; @@ -3253,7 +3257,8 @@ sse2_composite_over_n_8_8888 (pixman_implementation_t *imp, while (w >= 4) { - m = *((uint32_t*)mask); + uint32_t m; + memcpy(&m, mask, sizeof(uint32_t)); if (srca == 0xff && m == 0xffffffff) { @@ -3330,8 +3335,8 @@ sse2_fill (pixman_implementation_t *imp, if (bpp == 8) { - uint8_t b; - uint16_t w; + uint32_t b; + uint32_t w; stride = stride * (int) sizeof (uint32_t) / 1; byte_line = (uint8_t *)(((uint8_t *)bits) + stride * y + x); @@ -3473,7 +3478,6 @@ sse2_composite_src_n_8_8888 (pixman_implementation_t *imp, uint8_t *mask_line, *mask; int dst_stride, mask_stride; int32_t w; - uint32_t m; __m128i xmm_src, xmm_def; __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi; @@ -3525,7 +3529,8 @@ sse2_composite_src_n_8_8888 (pixman_implementation_t *imp, while (w >= 4) { - m = *((uint32_t*)mask); + uint32_t m; + memcpy(&m, mask, sizeof(uint32_t)); if (srca == 0xff && m == 0xffffffff) { @@ -3591,7 +3596,6 @@ sse2_composite_over_n_8_0565 (pixman_implementation_t *imp, uint8_t *mask_line, *mask; int dst_stride, mask_stride; int32_t w; - uint32_t m; __m128i mmx_src, mmx_alpha, mmx_mask, mmx_dest; __m128i xmm_src, xmm_alpha; @@ -3623,7 +3627,7 @@ sse2_composite_over_n_8_0565 (pixman_implementation_t *imp, while (w && (uintptr_t)dst & 15) { - m = *mask++; + uint8_t m = *mask++; if (m) { @@ -3643,11 +3647,13 @@ sse2_composite_over_n_8_0565 (pixman_implementation_t *imp, while (w >= 8) { + uint32_t m; + xmm_dst = load_128_aligned ((__m128i*) dst); unpack_565_128_4x128 (xmm_dst, &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3); - m = *((uint32_t*)mask); + memcpy(&m, mask, sizeof(uint32_t)); mask += 4; if (m) @@ -3667,7 +3673,7 @@ sse2_composite_over_n_8_0565 (pixman_implementation_t *imp, &xmm_dst0, &xmm_dst1); } - m = *((uint32_t*)mask); + memcpy(&m, mask, sizeof(uint32_t)); mask += 4; if (m) @@ -3696,7 +3702,7 @@ sse2_composite_over_n_8_0565 (pixman_implementation_t *imp, while (w) { - m = *mask++; + uint8_t m = *mask++; if (m) { @@ -4058,7 +4064,7 @@ sse2_composite_in_n_8_8 (pixman_implementation_t *imp, uint8_t *dst_line, *dst; uint8_t *mask_line, *mask; int dst_stride, mask_stride; - uint32_t d, m; + uint32_t d; uint32_t src; int32_t w; @@ -4085,7 +4091,7 @@ sse2_composite_in_n_8_8 (pixman_implementation_t *imp, while (w && ((uintptr_t)dst & 15)) { - m = (uint32_t) *mask++; + uint8_t m = *mask++; d = (uint32_t) *dst; *dst++ = (uint8_t) pack_1x128_32 ( @@ -4122,7 +4128,7 @@ sse2_composite_in_n_8_8 (pixman_implementation_t *imp, while (w) { - m = (uint32_t) *mask++; + uint8_t m = *mask++; d = (uint32_t) *dst; *dst++ = (uint8_t) pack_1x128_32 ( @@ -4299,7 +4305,7 @@ sse2_composite_add_n_8_8 (pixman_implementation_t *imp, int dst_stride, mask_stride; int32_t w; uint32_t src; - uint32_t m, d; + uint32_t d; __m128i xmm_alpha; __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi; @@ -4324,7 +4330,7 @@ sse2_composite_add_n_8_8 (pixman_implementation_t *imp, while (w && ((uintptr_t)dst & 15)) { - m = (uint32_t) *mask++; + uint8_t m = *mask++; d = (uint32_t) *dst; *dst++ = (uint8_t) pack_1x128_32 ( @@ -4360,7 +4366,7 @@ sse2_composite_add_n_8_8 (pixman_implementation_t *imp, while (w) { - m = (uint32_t) *mask++; + uint8_t m = (uint32_t) *mask++; d = (uint32_t) *dst; *dst++ = (uint8_t) pack_1x128_32 ( @@ -4558,7 +4564,7 @@ sse2_composite_add_n_8888 (pixman_implementation_t *imp, dst = dst_line; dst_line += dst_stride; - while (w && (unsigned long)dst & 15) + while (w && (uintptr_t)dst & 15) { d = *dst; *dst++ = @@ -4617,7 +4623,7 @@ sse2_composite_add_n_8_8888 (pixman_implementation_t *imp, mask_line += mask_stride; w = width; - while (w && ((unsigned long)dst & 15)) + while (w && ((uintptr_t)dst & 15)) { uint8_t m = *mask++; if (m) @@ -4633,7 +4639,9 @@ sse2_composite_add_n_8_8888 (pixman_implementation_t *imp, while (w >= 4) { - uint32_t m = *(uint32_t*)mask; + uint32_t m; + memcpy(&m, mask, sizeof(uint32_t)); + if (m) { __m128i xmm_mask_lo, xmm_mask_hi; @@ -4740,7 +4748,7 @@ sse2_blt (pixman_implementation_t *imp, while (w >= 2 && ((uintptr_t)d & 3)) { - *(uint16_t *)d = *(uint16_t *)s; + memmove(d, s, 2); w -= 2; s += 2; d += 2; @@ -4748,7 +4756,7 @@ sse2_blt (pixman_implementation_t *imp, while (w >= 4 && ((uintptr_t)d & 15)) { - *(uint32_t *)d = *(uint32_t *)s; + memmove(d, s, 4); w -= 4; s += 4; @@ -4785,7 +4793,7 @@ sse2_blt (pixman_implementation_t *imp, while (w >= 4) { - *(uint32_t *)d = *(uint32_t *)s; + memmove(d, s, 4); w -= 4; s += 4; @@ -4794,7 +4802,7 @@ sse2_blt (pixman_implementation_t *imp, if (w >= 2) { - *(uint16_t *)d = *(uint16_t *)s; + memmove(d, s, 2); w -= 2; s += 2; d += 2; @@ -4826,7 +4834,6 @@ sse2_composite_over_x888_8_8888 (pixman_implementation_t *imp, uint32_t *src, *src_line, s; uint32_t *dst, *dst_line, d; uint8_t *mask, *mask_line; - uint32_t m; int src_stride, mask_stride, dst_stride; int32_t w; __m128i ms; @@ -4855,8 +4862,8 @@ sse2_composite_over_x888_8_8888 (pixman_implementation_t *imp, while (w && (uintptr_t)dst & 15) { + uint8_t m = *mask++; s = 0xff000000 | *src++; - m = (uint32_t) *mask++; d = *dst; ms = unpack_32_1x128 (s); @@ -4874,7 +4881,8 @@ sse2_composite_over_x888_8_8888 (pixman_implementation_t *imp, while (w >= 4) { - m = *(uint32_t*) mask; + uint32_t m; + memcpy(&m, mask, sizeof(uint32_t)); xmm_src = _mm_or_si128 ( load_128_unaligned ((__m128i*)src), mask_ff000000); @@ -4910,7 +4918,7 @@ sse2_composite_over_x888_8_8888 (pixman_implementation_t *imp, while (w) { - m = (uint32_t) *mask++; + uint8_t m = *mask++; if (m) { @@ -4951,7 +4959,6 @@ sse2_composite_over_8888_8_8888 (pixman_implementation_t *imp, uint32_t *src, *src_line, s; uint32_t *dst, *dst_line, d; uint8_t *mask, *mask_line; - uint32_t m; int src_stride, mask_stride, dst_stride; int32_t w; @@ -4980,9 +4987,9 @@ sse2_composite_over_8888_8_8888 (pixman_implementation_t *imp, while (w && (uintptr_t)dst & 15) { uint32_t sa; + uint8_t m = *mask++; s = *src++; - m = (uint32_t) *mask++; d = *dst; sa = s >> 24; @@ -5013,7 +5020,8 @@ sse2_composite_over_8888_8_8888 (pixman_implementation_t *imp, while (w >= 4) { - m = *(uint32_t *) mask; + uint32_t m; + memcpy(&m, mask, sizeof(uint32_t)); if (m) { @@ -5052,9 +5060,9 @@ sse2_composite_over_8888_8_8888 (pixman_implementation_t *imp, while (w) { uint32_t sa; + uint8_t m = *mask++; s = *src++; - m = (uint32_t) *mask++; d = *dst; sa = s >> 24; @@ -5554,69 +5562,134 @@ FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_normal_OVER, scaled_nearest_scanline_sse2_8888_n_8888_OVER, uint32_t, uint32_t, uint32_t, NORMAL, TRUE, TRUE) -#define BMSK ((1 << BILINEAR_INTERPOLATION_BITS) - 1) +#if PSHUFD_IS_FAST + +/***********************************************************************************/ -#define BILINEAR_DECLARE_VARIABLES \ +# define BILINEAR_DECLARE_VARIABLES \ const __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt, wt, wt); \ const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb, wb, wb, wb, wb); \ - const __m128i xmm_xorc8 = _mm_set_epi16 (0, 0, 0, 0, BMSK, BMSK, BMSK, BMSK);\ - const __m128i xmm_addc8 = _mm_set_epi16 (0, 0, 0, 0, 1, 1, 1, 1); \ - const __m128i xmm_xorc7 = _mm_set_epi16 (0, BMSK, 0, BMSK, 0, BMSK, 0, BMSK);\ - const __m128i xmm_addc7 = _mm_set_epi16 (0, 1, 0, 1, 0, 1, 0, 1); \ - const __m128i xmm_ux = _mm_set_epi16 (unit_x, unit_x, unit_x, unit_x, \ - unit_x, unit_x, unit_x, unit_x); \ + const __m128i xmm_addc = _mm_set_epi16 (0, 1, 0, 1, 0, 1, 0, 1); \ + const __m128i xmm_ux1 = _mm_set_epi16 (unit_x, -unit_x, unit_x, -unit_x, \ + unit_x, -unit_x, unit_x, -unit_x); \ + const __m128i xmm_ux4 = _mm_set_epi16 (unit_x * 4, -unit_x * 4, \ + unit_x * 4, -unit_x * 4, \ + unit_x * 4, -unit_x * 4, \ + unit_x * 4, -unit_x * 4); \ const __m128i xmm_zero = _mm_setzero_si128 (); \ - __m128i xmm_x = _mm_set_epi16 (vx, vx, vx, vx, vx, vx, vx, vx) + __m128i xmm_x = _mm_set_epi16 (vx + unit_x * 3, -(vx + 1) - unit_x * 3, \ + vx + unit_x * 2, -(vx + 1) - unit_x * 2, \ + vx + unit_x * 1, -(vx + 1) - unit_x * 1, \ + vx + unit_x * 0, -(vx + 1) - unit_x * 0); \ + __m128i xmm_wh_state; -#define BILINEAR_INTERPOLATE_ONE_PIXEL(pix) \ +#define BILINEAR_INTERPOLATE_ONE_PIXEL_HELPER(pix, phase_) \ do { \ - __m128i xmm_wh, xmm_lo, xmm_hi, a; \ + int phase = phase_; \ + __m128i xmm_wh, xmm_a, xmm_b; \ /* fetch 2x2 pixel block into sse2 registers */ \ - __m128i tltr = _mm_loadl_epi64 ( \ - (__m128i *)&src_top[pixman_fixed_to_int (vx)]); \ - __m128i blbr = _mm_loadl_epi64 ( \ - (__m128i *)&src_bottom[pixman_fixed_to_int (vx)]); \ + __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]); \ + __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom[vx >> 16]); \ vx += unit_x; \ /* vertical interpolation */ \ - a = _mm_add_epi16 (_mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), \ - xmm_wt), \ - _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), \ - xmm_wb)); \ - if (BILINEAR_INTERPOLATION_BITS < 8) \ - { \ - /* calculate horizontal weights */ \ - xmm_wh = _mm_add_epi16 (xmm_addc7, _mm_xor_si128 (xmm_xorc7, \ - _mm_srli_epi16 (xmm_x, 16 - BILINEAR_INTERPOLATION_BITS))); \ - xmm_x = _mm_add_epi16 (xmm_x, xmm_ux); \ - /* horizontal interpolation */ \ - a = _mm_madd_epi16 (_mm_unpackhi_epi16 (_mm_shuffle_epi32 ( \ - a, _MM_SHUFFLE (1, 0, 3, 2)), a), xmm_wh); \ - } \ - else \ + xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); \ + xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); \ + xmm_a = _mm_add_epi16 (xmm_a, xmm_b); \ + /* calculate horizontal weights */ \ + if (phase <= 0) \ { \ - /* calculate horizontal weights */ \ - xmm_wh = _mm_add_epi16 (xmm_addc8, _mm_xor_si128 (xmm_xorc8, \ - _mm_srli_epi16 (xmm_x, 16 - BILINEAR_INTERPOLATION_BITS))); \ - xmm_x = _mm_add_epi16 (xmm_x, xmm_ux); \ - /* horizontal interpolation */ \ - xmm_lo = _mm_mullo_epi16 (a, xmm_wh); \ - xmm_hi = _mm_mulhi_epu16 (a, xmm_wh); \ - a = _mm_add_epi32 (_mm_unpacklo_epi16 (xmm_lo, xmm_hi), \ - _mm_unpackhi_epi16 (xmm_lo, xmm_hi)); \ + xmm_wh_state = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, \ + 16 - BILINEAR_INTERPOLATION_BITS)); \ + xmm_x = _mm_add_epi16 (xmm_x, (phase < 0) ? xmm_ux1 : xmm_ux4); \ + phase = 0; \ } \ - /* shift and pack the result */ \ - a = _mm_srli_epi32 (a, BILINEAR_INTERPOLATION_BITS * 2); \ - a = _mm_packs_epi32 (a, a); \ - a = _mm_packus_epi16 (a, a); \ - pix = _mm_cvtsi128_si32 (a); \ + xmm_wh = _mm_shuffle_epi32 (xmm_wh_state, _MM_SHUFFLE (phase, phase, \ + phase, phase)); \ + /* horizontal interpolation */ \ + xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (_mm_shuffle_epi32 ( \ + xmm_a, _MM_SHUFFLE (1, 0, 3, 2)), xmm_a), xmm_wh); \ + /* shift the result */ \ + pix = _mm_srli_epi32 (xmm_a, BILINEAR_INTERPOLATION_BITS * 2); \ } while (0) +#else /************************************************************************/ + +# define BILINEAR_DECLARE_VARIABLES \ + const __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt, wt, wt); \ + const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb, wb, wb, wb, wb); \ + const __m128i xmm_addc = _mm_set_epi16 (0, 1, 0, 1, 0, 1, 0, 1); \ + const __m128i xmm_ux1 = _mm_set_epi16 (unit_x, -unit_x, unit_x, -unit_x, \ + unit_x, -unit_x, unit_x, -unit_x); \ + const __m128i xmm_ux4 = _mm_set_epi16 (unit_x * 4, -unit_x * 4, \ + unit_x * 4, -unit_x * 4, \ + unit_x * 4, -unit_x * 4, \ + unit_x * 4, -unit_x * 4); \ + const __m128i xmm_zero = _mm_setzero_si128 (); \ + __m128i xmm_x = _mm_set_epi16 (vx, -(vx + 1), vx, -(vx + 1), \ + vx, -(vx + 1), vx, -(vx + 1)) + +#define BILINEAR_INTERPOLATE_ONE_PIXEL_HELPER(pix, phase) \ +do { \ + __m128i xmm_wh, xmm_a, xmm_b; \ + /* fetch 2x2 pixel block into sse2 registers */ \ + __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]); \ + __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom[vx >> 16]); \ + (void)xmm_ux4; /* suppress warning: unused variable 'xmm_ux4' */ \ + vx += unit_x; \ + /* vertical interpolation */ \ + xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); \ + xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); \ + xmm_a = _mm_add_epi16 (xmm_a, xmm_b); \ + /* calculate horizontal weights */ \ + xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, \ + 16 - BILINEAR_INTERPOLATION_BITS)); \ + xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); \ + /* horizontal interpolation */ \ + xmm_b = _mm_unpacklo_epi64 (/* any value is fine here */ xmm_b, xmm_a); \ + xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); \ + /* shift the result */ \ + pix = _mm_srli_epi32 (xmm_a, BILINEAR_INTERPOLATION_BITS * 2); \ +} while (0) + +/***********************************************************************************/ + +#endif + +#define BILINEAR_INTERPOLATE_ONE_PIXEL(pix); \ +do { \ + __m128i xmm_pix; \ + BILINEAR_INTERPOLATE_ONE_PIXEL_HELPER (xmm_pix, -1); \ + xmm_pix = _mm_packs_epi32 (xmm_pix, xmm_pix); \ + xmm_pix = _mm_packus_epi16 (xmm_pix, xmm_pix); \ + pix = _mm_cvtsi128_si32 (xmm_pix); \ +} while(0) + +#define BILINEAR_INTERPOLATE_FOUR_PIXELS(pix); \ +do { \ + __m128i xmm_pix1, xmm_pix2, xmm_pix3, xmm_pix4; \ + BILINEAR_INTERPOLATE_ONE_PIXEL_HELPER (xmm_pix1, 0); \ + BILINEAR_INTERPOLATE_ONE_PIXEL_HELPER (xmm_pix2, 1); \ + BILINEAR_INTERPOLATE_ONE_PIXEL_HELPER (xmm_pix3, 2); \ + BILINEAR_INTERPOLATE_ONE_PIXEL_HELPER (xmm_pix4, 3); \ + xmm_pix1 = _mm_packs_epi32 (xmm_pix1, xmm_pix2); \ + xmm_pix3 = _mm_packs_epi32 (xmm_pix3, xmm_pix4); \ + pix = _mm_packus_epi16 (xmm_pix1, xmm_pix3); \ +} while(0) + #define BILINEAR_SKIP_ONE_PIXEL() \ do { \ vx += unit_x; \ - xmm_x = _mm_add_epi16 (xmm_x, xmm_ux); \ + xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); \ } while(0) +#define BILINEAR_SKIP_FOUR_PIXELS() \ +do { \ + vx += unit_x * 4; \ + xmm_x = _mm_add_epi16 (xmm_x, xmm_ux4); \ +} while(0) + +/***********************************************************************************/ + static force_inline void scaled_bilinear_scanline_sse2_8888_8888_SRC (uint32_t * dst, const uint32_t * mask, @@ -5625,24 +5698,28 @@ scaled_bilinear_scanline_sse2_8888_8888_SRC (uint32_t * dst, int32_t w, int wt, int wb, - pixman_fixed_t vx, - pixman_fixed_t unit_x, + pixman_fixed_t vx_, + pixman_fixed_t unit_x_, pixman_fixed_t max_vx, pixman_bool_t zero_src) { + intptr_t vx = vx_; + intptr_t unit_x = unit_x_; BILINEAR_DECLARE_VARIABLES; - uint32_t pix1, pix2, pix3, pix4; + uint32_t pix1, pix2; - while ((w -= 4) >= 0) + while (w && ((uintptr_t)dst & 15)) { BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); - BILINEAR_INTERPOLATE_ONE_PIXEL (pix2); - BILINEAR_INTERPOLATE_ONE_PIXEL (pix3); - BILINEAR_INTERPOLATE_ONE_PIXEL (pix4); *dst++ = pix1; - *dst++ = pix2; - *dst++ = pix3; - *dst++ = pix4; + w--; + } + + while ((w -= 4) >= 0) { + __m128i xmm_src; + BILINEAR_INTERPOLATE_FOUR_PIXELS (xmm_src); + _mm_store_si128 ((__m128i *)dst, xmm_src); + dst += 4; } if (w & 2) @@ -5661,23 +5738,20 @@ scaled_bilinear_scanline_sse2_8888_8888_SRC (uint32_t * dst, } -/* Add extra NULL argument to the existing bilinear fast paths to indicate - * that we don't need two-pass processing */ - FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_cover_SRC, - scaled_bilinear_scanline_sse2_8888_8888_SRC, NULL, + scaled_bilinear_scanline_sse2_8888_8888_SRC, uint32_t, uint32_t, uint32_t, COVER, FLAG_NONE) FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_pad_SRC, - scaled_bilinear_scanline_sse2_8888_8888_SRC, NULL, + scaled_bilinear_scanline_sse2_8888_8888_SRC, uint32_t, uint32_t, uint32_t, PAD, FLAG_NONE) FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_none_SRC, - scaled_bilinear_scanline_sse2_8888_8888_SRC, NULL, + scaled_bilinear_scanline_sse2_8888_8888_SRC, uint32_t, uint32_t, uint32_t, NONE, FLAG_NONE) FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_normal_SRC, - scaled_bilinear_scanline_sse2_8888_8888_SRC, NULL, + scaled_bilinear_scanline_sse2_8888_8888_SRC, uint32_t, uint32_t, uint32_t, NORMAL, FLAG_NONE) @@ -5697,7 +5771,7 @@ scaled_bilinear_scanline_sse2_x888_8888_SRC (uint32_t * dst, intptr_t vx = vx_; intptr_t unit_x = unit_x_; BILINEAR_DECLARE_VARIABLES; - uint32_t pix1, pix2, pix3, pix4; + uint32_t pix1, pix2; while (w && ((uintptr_t)dst & 15)) { @@ -5708,14 +5782,9 @@ scaled_bilinear_scanline_sse2_x888_8888_SRC (uint32_t * dst, while ((w -= 4) >= 0) { __m128i xmm_src; - BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); - BILINEAR_INTERPOLATE_ONE_PIXEL (pix2); - BILINEAR_INTERPOLATE_ONE_PIXEL (pix3); - BILINEAR_INTERPOLATE_ONE_PIXEL (pix4); - - xmm_src = _mm_set_epi32 (pix4, pix3, pix2, pix1); - _mm_store_si128 ((__m128i *)dst, _mm_or_si128 (xmm_src, mask_ff000000)); - dst += 4; + BILINEAR_INTERPOLATE_FOUR_PIXELS (xmm_src); + _mm_store_si128 ((__m128i *)dst, _mm_or_si128 (xmm_src, mask_ff000000)); + dst += 4; } if (w & 2) @@ -5734,23 +5803,18 @@ scaled_bilinear_scanline_sse2_x888_8888_SRC (uint32_t * dst, } FAST_BILINEAR_MAINLOOP_COMMON (sse2_x888_8888_cover_SRC, - scaled_bilinear_scanline_sse2_x888_8888_SRC, NULL, + scaled_bilinear_scanline_sse2_x888_8888_SRC, uint32_t, uint32_t, uint32_t, COVER, FLAG_NONE) FAST_BILINEAR_MAINLOOP_COMMON (sse2_x888_8888_pad_SRC, - scaled_bilinear_scanline_sse2_x888_8888_SRC, NULL, + scaled_bilinear_scanline_sse2_x888_8888_SRC, uint32_t, uint32_t, uint32_t, PAD, FLAG_NONE) FAST_BILINEAR_MAINLOOP_COMMON (sse2_x888_8888_normal_SRC, - scaled_bilinear_scanline_sse2_x888_8888_SRC, NULL, + scaled_bilinear_scanline_sse2_x888_8888_SRC, uint32_t, uint32_t, uint32_t, NORMAL, FLAG_NONE) -#if 0 -FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_pad_OVER, - scaled_bilinear_scanline_sse2_8888_8888_OVER, NULL, - uint32_t, uint32_t, uint32_t, - PAD, FLAG_NONE) -#endif + static force_inline void scaled_bilinear_scanline_sse2_8888_8888_OVER (uint32_t * dst, const uint32_t * mask, @@ -5759,13 +5823,15 @@ scaled_bilinear_scanline_sse2_8888_8888_OVER (uint32_t * dst, int32_t w, int wt, int wb, - pixman_fixed_t vx, - pixman_fixed_t unit_x, + pixman_fixed_t vx_, + pixman_fixed_t unit_x_, pixman_fixed_t max_vx, pixman_bool_t zero_src) { + intptr_t vx = vx_; + intptr_t unit_x = unit_x_; BILINEAR_DECLARE_VARIABLES; - uint32_t pix1, pix2, pix3, pix4; + uint32_t pix1, pix2; while (w && ((uintptr_t)dst & 15)) { @@ -5787,12 +5853,7 @@ scaled_bilinear_scanline_sse2_8888_8888_OVER (uint32_t * dst, __m128i xmm_src_hi, xmm_src_lo, xmm_dst_hi, xmm_dst_lo; __m128i xmm_alpha_hi, xmm_alpha_lo; - BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); - BILINEAR_INTERPOLATE_ONE_PIXEL (pix2); - BILINEAR_INTERPOLATE_ONE_PIXEL (pix3); - BILINEAR_INTERPOLATE_ONE_PIXEL (pix4); - - xmm_src = _mm_set_epi32 (pix4, pix3, pix2, pix1); + BILINEAR_INTERPOLATE_FOUR_PIXELS (xmm_src); if (!is_zero (xmm_src)) { @@ -5835,56 +5896,22 @@ scaled_bilinear_scanline_sse2_8888_8888_OVER (uint32_t * dst, } FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_cover_OVER, - scaled_bilinear_scanline_sse2_8888_8888_OVER, NULL, + scaled_bilinear_scanline_sse2_8888_8888_OVER, uint32_t, uint32_t, uint32_t, COVER, FLAG_NONE) FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_pad_OVER, - scaled_bilinear_scanline_sse2_8888_8888_OVER, NULL, + scaled_bilinear_scanline_sse2_8888_8888_OVER, uint32_t, uint32_t, uint32_t, PAD, FLAG_NONE) FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_none_OVER, - scaled_bilinear_scanline_sse2_8888_8888_OVER, NULL, + scaled_bilinear_scanline_sse2_8888_8888_OVER, uint32_t, uint32_t, uint32_t, NONE, FLAG_NONE) FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_normal_OVER, - scaled_bilinear_scanline_sse2_8888_8888_OVER, NULL, + scaled_bilinear_scanline_sse2_8888_8888_OVER, uint32_t, uint32_t, uint32_t, NORMAL, FLAG_NONE) - -/* An example of SSE2 two-stage bilinear_over_8888_0565 fast path, which is implemented - as scaled_bilinear_scanline_sse2_8888_8888_SRC + op_bilinear_over_8888_0565 */ - -void op_bilinear_over_8888_0565(uint16_t *dst, const uint32_t *mask, const uint32_t *src, int width) -{ - /* Note: this is not really fast and should be based on 8 pixel loop from sse2_composite_over_8888_0565 */ - while (--width >= 0) - { - *dst = composite_over_8888_0565pixel (*src, *dst); - src++; - dst++; - } -} - -FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_0565_cover_OVER, - scaled_bilinear_scanline_sse2_8888_8888_SRC, op_bilinear_over_8888_0565, - uint32_t, uint32_t, uint16_t, - COVER, FLAG_NONE) -FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_0565_pad_OVER, - scaled_bilinear_scanline_sse2_8888_8888_SRC, op_bilinear_over_8888_0565, - uint32_t, uint32_t, uint16_t, - PAD, FLAG_NONE) -FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_0565_none_OVER, - scaled_bilinear_scanline_sse2_8888_8888_SRC, op_bilinear_over_8888_0565, - uint32_t, uint32_t, uint16_t, - NONE, FLAG_NONE) -FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_0565_normal_OVER, - scaled_bilinear_scanline_sse2_8888_8888_SRC, op_bilinear_over_8888_0565, - uint32_t, uint32_t, uint16_t, - NORMAL, FLAG_NONE) - -/*****************************/ - static force_inline void scaled_bilinear_scanline_sse2_8888_8_8888_OVER (uint32_t * dst, const uint8_t * mask, @@ -5893,20 +5920,20 @@ scaled_bilinear_scanline_sse2_8888_8_8888_OVER (uint32_t * dst, int32_t w, int wt, int wb, - pixman_fixed_t vx, - pixman_fixed_t unit_x, + pixman_fixed_t vx_, + pixman_fixed_t unit_x_, pixman_fixed_t max_vx, pixman_bool_t zero_src) { + intptr_t vx = vx_; + intptr_t unit_x = unit_x_; BILINEAR_DECLARE_VARIABLES; - uint32_t pix1, pix2, pix3, pix4; - uint32_t m; + uint32_t pix1, pix2; while (w && ((uintptr_t)dst & 15)) { uint32_t sa; - - m = (uint32_t) *mask++; + uint8_t m = *mask++; if (m) { @@ -5942,20 +5969,17 @@ scaled_bilinear_scanline_sse2_8888_8_8888_OVER (uint32_t * dst, while (w >= 4) { + uint32_t m; + __m128i xmm_src, xmm_src_lo, xmm_src_hi, xmm_srca_lo, xmm_srca_hi; __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi; - m = *(uint32_t*)mask; + memcpy(&m, mask, sizeof(uint32_t)); if (m) { - BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); - BILINEAR_INTERPOLATE_ONE_PIXEL (pix2); - BILINEAR_INTERPOLATE_ONE_PIXEL (pix3); - BILINEAR_INTERPOLATE_ONE_PIXEL (pix4); - - xmm_src = _mm_set_epi32 (pix4, pix3, pix2, pix1); + BILINEAR_INTERPOLATE_FOUR_PIXELS (xmm_src); if (m == 0xffffffff && is_opaque (xmm_src)) { @@ -5982,10 +6006,7 @@ scaled_bilinear_scanline_sse2_8888_8_8888_OVER (uint32_t * dst, } else { - BILINEAR_SKIP_ONE_PIXEL (); - BILINEAR_SKIP_ONE_PIXEL (); - BILINEAR_SKIP_ONE_PIXEL (); - BILINEAR_SKIP_ONE_PIXEL (); + BILINEAR_SKIP_FOUR_PIXELS (); } w -= 4; @@ -5996,8 +6017,7 @@ scaled_bilinear_scanline_sse2_8888_8_8888_OVER (uint32_t * dst, while (w) { uint32_t sa; - - m = (uint32_t) *mask++; + uint8_t m = *mask++; if (m) { @@ -6033,19 +6053,19 @@ scaled_bilinear_scanline_sse2_8888_8_8888_OVER (uint32_t * dst, } FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8_8888_cover_OVER, - scaled_bilinear_scanline_sse2_8888_8_8888_OVER, NULL, + scaled_bilinear_scanline_sse2_8888_8_8888_OVER, uint32_t, uint8_t, uint32_t, COVER, FLAG_HAVE_NON_SOLID_MASK) FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8_8888_pad_OVER, - scaled_bilinear_scanline_sse2_8888_8_8888_OVER, NULL, + scaled_bilinear_scanline_sse2_8888_8_8888_OVER, uint32_t, uint8_t, uint32_t, PAD, FLAG_HAVE_NON_SOLID_MASK) FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8_8888_none_OVER, - scaled_bilinear_scanline_sse2_8888_8_8888_OVER, NULL, + scaled_bilinear_scanline_sse2_8888_8_8888_OVER, uint32_t, uint8_t, uint32_t, NONE, FLAG_HAVE_NON_SOLID_MASK) FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8_8888_normal_OVER, - scaled_bilinear_scanline_sse2_8888_8_8888_OVER, NULL, + scaled_bilinear_scanline_sse2_8888_8_8888_OVER, uint32_t, uint8_t, uint32_t, NORMAL, FLAG_HAVE_NON_SOLID_MASK) @@ -6057,13 +6077,15 @@ scaled_bilinear_scanline_sse2_8888_n_8888_OVER (uint32_t * dst, int32_t w, int wt, int wb, - pixman_fixed_t vx, - pixman_fixed_t unit_x, + pixman_fixed_t vx_, + pixman_fixed_t unit_x_, pixman_fixed_t max_vx, pixman_bool_t zero_src) { + intptr_t vx = vx_; + intptr_t unit_x = unit_x_; BILINEAR_DECLARE_VARIABLES; - uint32_t pix1, pix2, pix3, pix4; + uint32_t pix1; __m128i xmm_mask; if (zero_src || (*mask >> 24) == 0) @@ -6093,19 +6115,15 @@ scaled_bilinear_scanline_sse2_8888_n_8888_OVER (uint32_t * dst, while (w >= 4) { - BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); - BILINEAR_INTERPOLATE_ONE_PIXEL (pix2); - BILINEAR_INTERPOLATE_ONE_PIXEL (pix3); - BILINEAR_INTERPOLATE_ONE_PIXEL (pix4); + __m128i xmm_src; + BILINEAR_INTERPOLATE_FOUR_PIXELS (xmm_src); - if (pix1 | pix2 | pix3 | pix4) + if (!is_zero (xmm_src)) { - __m128i xmm_src, xmm_src_lo, xmm_src_hi; + __m128i xmm_src_lo, xmm_src_hi; __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; __m128i xmm_alpha_lo, xmm_alpha_hi; - xmm_src = _mm_set_epi32 (pix4, pix3, pix2, pix1); - xmm_dst = load_128_aligned ((__m128i*)dst); unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); @@ -6148,19 +6166,19 @@ scaled_bilinear_scanline_sse2_8888_n_8888_OVER (uint32_t * dst, } FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_n_8888_cover_OVER, - scaled_bilinear_scanline_sse2_8888_n_8888_OVER, NULL, + scaled_bilinear_scanline_sse2_8888_n_8888_OVER, uint32_t, uint32_t, uint32_t, COVER, FLAG_HAVE_SOLID_MASK) FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_n_8888_pad_OVER, - scaled_bilinear_scanline_sse2_8888_n_8888_OVER, NULL, + scaled_bilinear_scanline_sse2_8888_n_8888_OVER, uint32_t, uint32_t, uint32_t, PAD, FLAG_HAVE_SOLID_MASK) FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_n_8888_none_OVER, - scaled_bilinear_scanline_sse2_8888_n_8888_OVER, NULL, + scaled_bilinear_scanline_sse2_8888_n_8888_OVER, uint32_t, uint32_t, uint32_t, NONE, FLAG_HAVE_SOLID_MASK) FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_n_8888_normal_OVER, - scaled_bilinear_scanline_sse2_8888_n_8888_OVER, NULL, + scaled_bilinear_scanline_sse2_8888_n_8888_OVER, uint32_t, uint32_t, uint32_t, NORMAL, FLAG_HAVE_SOLID_MASK) @@ -6260,31 +6278,15 @@ static const pixman_fast_path_t sse2_fast_paths[] = PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, sse2_composite_in_n_8_8), PIXMAN_STD_FAST_PATH (IN, solid, null, a8, sse2_composite_in_n_8), - SIMPLE_NEAREST_FAST_PATH_COVER (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8888), - SIMPLE_NEAREST_FAST_PATH_COVER (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8888), - SIMPLE_NEAREST_FAST_PATH_COVER (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888), - SIMPLE_NEAREST_FAST_PATH_COVER (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888), - SIMPLE_NEAREST_FAST_PATH_NONE (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8888), - SIMPLE_NEAREST_FAST_PATH_NONE (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8888), - SIMPLE_NEAREST_FAST_PATH_NONE (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888), - SIMPLE_NEAREST_FAST_PATH_NONE (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888), - SIMPLE_NEAREST_FAST_PATH_PAD (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8888), - SIMPLE_NEAREST_FAST_PATH_PAD (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8888), - SIMPLE_NEAREST_FAST_PATH_PAD (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888), - SIMPLE_NEAREST_FAST_PATH_PAD (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888), - SIMPLE_NEAREST_FAST_PATH_NORMAL (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8888), - SIMPLE_NEAREST_FAST_PATH_NORMAL (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8888), - SIMPLE_NEAREST_FAST_PATH_NORMAL (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888), - SIMPLE_NEAREST_FAST_PATH_NORMAL (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888), + SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8888), + SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8888), + SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888), + SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888), SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_n_8888), SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_n_8888), SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_n_8888), SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_n_8888), - SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_n_8888), - SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_n_8888), - SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_n_8888), - SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_n_8888), SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, sse2_8888_8888), SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, sse2_8888_8888), @@ -6315,11 +6317,6 @@ static const pixman_fast_path_t sse2_fast_paths[] = SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8_8888), SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8_8888), - /* and here the needed entries are added to the fast path table */ - - SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, r5g6b5, sse2_8888_0565), - SIMPLE_BILINEAR_FAST_PATH (OVER, a8b8g8r8, b5g6r5, sse2_8888_0565), - { PIXMAN_OP_NONE }, }; @@ -6417,7 +6414,7 @@ sse2_fetch_a8 (pixman_iter_t *iter, const uint32_t *mask) while (w && (((uintptr_t)dst) & 15)) { - *dst++ = *(src++) << 24; + *dst++ = (uint32_t)(*(src++)) << 24; w--; } @@ -6444,59 +6441,30 @@ sse2_fetch_a8 (pixman_iter_t *iter, const uint32_t *mask) while (w) { - *dst++ = *(src++) << 24; + *dst++ = (uint32_t)(*(src++)) << 24; w--; } return iter->buffer; } -typedef struct -{ - pixman_format_code_t format; - pixman_iter_get_scanline_t get_scanline; -} fetcher_info_t; - -static const fetcher_info_t fetchers[] = -{ - { PIXMAN_x8r8g8b8, sse2_fetch_x8r8g8b8 }, - { PIXMAN_r5g6b5, sse2_fetch_r5g6b5 }, - { PIXMAN_a8, sse2_fetch_a8 }, - { PIXMAN_null } -}; - -static pixman_bool_t -sse2_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter) -{ - pixman_image_t *image = iter->image; - -#define FLAGS \ +#define IMAGE_FLAGS \ (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | \ FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST) - if ((iter->iter_flags & ITER_NARROW) && - (iter->image_flags & FLAGS) == FLAGS) - { - const fetcher_info_t *f; - - for (f = &fetchers[0]; f->format != PIXMAN_null; f++) - { - if (image->common.extended_format_code == f->format) - { - uint8_t *b = (uint8_t *)image->bits.bits; - int s = image->bits.rowstride * 4; - - iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (f->format) / 8; - iter->stride = s; - - iter->get_scanline = f->get_scanline; - return TRUE; - } - } - } - - return FALSE; -} +static const pixman_iter_info_t sse2_iters[] = +{ + { PIXMAN_x8r8g8b8, IMAGE_FLAGS, ITER_NARROW, + _pixman_iter_init_bits_stride, sse2_fetch_x8r8g8b8, NULL + }, + { PIXMAN_r5g6b5, IMAGE_FLAGS, ITER_NARROW, + _pixman_iter_init_bits_stride, sse2_fetch_r5g6b5, NULL + }, + { PIXMAN_a8, IMAGE_FLAGS, ITER_NARROW, + _pixman_iter_init_bits_stride, sse2_fetch_a8, NULL + }, + { PIXMAN_null }, +}; #if defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__) __attribute__((__force_align_arg_pointer__)) @@ -6554,7 +6522,7 @@ _pixman_implementation_create_sse2 (pixman_implementation_t *fallback) imp->blt = sse2_blt; imp->fill = sse2_fill; - imp->src_iter_init = sse2_src_iter_init; + imp->iter_info = sse2_iters; return imp; } diff --git a/gfx/cairo/libpixman/src/pixman-ssse3.c b/gfx/cairo/libpixman/src/pixman-ssse3.c new file mode 100644 index 0000000000..680d6b95a0 --- /dev/null +++ b/gfx/cairo/libpixman/src/pixman-ssse3.c @@ -0,0 +1,351 @@ +/* + * Copyright © 2013 Soren Sandmann Pedersen + * Copyright © 2013 Red Hat, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Author: Soren Sandmann (soren.sandmann@gmail.com) + */ +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include <stdlib.h> +#include <mmintrin.h> +#include <xmmintrin.h> +#include <emmintrin.h> +#include <tmmintrin.h> +#include "pixman-private.h" +#include "pixman-inlines.h" + +typedef struct +{ + int y; + uint64_t * buffer; +} line_t; + +typedef struct +{ + line_t lines[2]; + pixman_fixed_t y; + pixman_fixed_t x; + uint64_t data[1]; +} bilinear_info_t; + +static void +ssse3_fetch_horizontal (bits_image_t *image, line_t *line, + int y, pixman_fixed_t x, pixman_fixed_t ux, int n) +{ + uint32_t *bits = image->bits + y * image->rowstride; + __m128i vx = _mm_set_epi16 ( + - (x + 1), x, - (x + 1), x, + - (x + ux + 1), x + ux, - (x + ux + 1), x + ux); + __m128i vux = _mm_set_epi16 ( + - 2 * ux, 2 * ux, - 2 * ux, 2 * ux, + - 2 * ux, 2 * ux, - 2 * ux, 2 * ux); + __m128i vaddc = _mm_set_epi16 (1, 0, 1, 0, 1, 0, 1, 0); + __m128i *b = (__m128i *)line->buffer; + __m128i vrl0, vrl1; + + while ((n -= 2) >= 0) + { + __m128i vw, vr, s; + + vrl1 = _mm_loadl_epi64 ( + (__m128i *)(bits + pixman_fixed_to_int (x + ux))); + /* vrl1: R1, L1 */ + + final_pixel: + vrl0 = _mm_loadl_epi64 ( + (__m128i *)(bits + pixman_fixed_to_int (x))); + /* vrl0: R0, L0 */ + + /* The weights are based on vx which is a vector of + * + * - (x + 1), x, - (x + 1), x, + * - (x + ux + 1), x + ux, - (x + ux + 1), x + ux + * + * so the 16 bit weights end up like this: + * + * iw0, w0, iw0, w0, iw1, w1, iw1, w1 + * + * and after shifting and packing, we get these bytes: + * + * iw0, w0, iw0, w0, iw1, w1, iw1, w1, + * iw0, w0, iw0, w0, iw1, w1, iw1, w1, + * + * which means the first and the second input pixel + * have to be interleaved like this: + * + * la0, ra0, lr0, rr0, la1, ra1, lr1, rr1, + * lg0, rg0, lb0, rb0, lg1, rg1, lb1, rb1 + * + * before maddubsw can be used. + */ + + vw = _mm_add_epi16 ( + vaddc, _mm_srli_epi16 (vx, 16 - BILINEAR_INTERPOLATION_BITS)); + /* vw: iw0, w0, iw0, w0, iw1, w1, iw1, w1 + */ + + vw = _mm_packus_epi16 (vw, vw); + /* vw: iw0, w0, iw0, w0, iw1, w1, iw1, w1, + * iw0, w0, iw0, w0, iw1, w1, iw1, w1 + */ + vx = _mm_add_epi16 (vx, vux); + + x += 2 * ux; + + vr = _mm_unpacklo_epi16 (vrl1, vrl0); + /* vr: rar0, rar1, rgb0, rgb1, lar0, lar1, lgb0, lgb1 */ + + s = _mm_shuffle_epi32 (vr, _MM_SHUFFLE (1, 0, 3, 2)); + /* s: lar0, lar1, lgb0, lgb1, rar0, rar1, rgb0, rgb1 */ + + vr = _mm_unpackhi_epi8 (vr, s); + /* vr: la0, ra0, lr0, rr0, la1, ra1, lr1, rr1, + * lg0, rg0, lb0, rb0, lg1, rg1, lb1, rb1 + */ + + vr = _mm_maddubs_epi16 (vr, vw); + + /* When the weight is 0, the inverse weight is + * 128 which can't be represented in a signed byte. + * As a result maddubsw computes the following: + * + * r = l * -128 + r * 0 + * + * rather than the desired + * + * r = l * 128 + r * 0 + * + * We fix this by taking the absolute value of the + * result. + */ + vr = _mm_abs_epi16 (vr); + + /* vr: A0, R0, A1, R1, G0, B0, G1, B1 */ + _mm_store_si128 (b++, vr); + } + + if (n == -1) + { + vrl1 = _mm_setzero_si128(); + goto final_pixel; + } + + line->y = y; +} + +static uint32_t * +ssse3_fetch_bilinear_cover (pixman_iter_t *iter, const uint32_t *mask) +{ + pixman_fixed_t fx, ux; + bilinear_info_t *info = iter->data; + line_t *line0, *line1; + int y0, y1; + int32_t dist_y; + __m128i vw; + int i; + + fx = info->x; + ux = iter->image->common.transform->matrix[0][0]; + + y0 = pixman_fixed_to_int (info->y); + y1 = y0 + 1; + + line0 = &info->lines[y0 & 0x01]; + line1 = &info->lines[y1 & 0x01]; + + if (line0->y != y0) + { + ssse3_fetch_horizontal ( + &iter->image->bits, line0, y0, fx, ux, iter->width); + } + + if (line1->y != y1) + { + ssse3_fetch_horizontal ( + &iter->image->bits, line1, y1, fx, ux, iter->width); + } + + dist_y = pixman_fixed_to_bilinear_weight (info->y); + dist_y <<= (16 - BILINEAR_INTERPOLATION_BITS); + + vw = _mm_set_epi16 ( + dist_y, dist_y, dist_y, dist_y, dist_y, dist_y, dist_y, dist_y); + + for (i = 0; i + 3 < iter->width; i += 4) + { + __m128i top0 = _mm_load_si128 ((__m128i *)(line0->buffer + i)); + __m128i bot0 = _mm_load_si128 ((__m128i *)(line1->buffer + i)); + __m128i top1 = _mm_load_si128 ((__m128i *)(line0->buffer + i + 2)); + __m128i bot1 = _mm_load_si128 ((__m128i *)(line1->buffer + i + 2)); + __m128i r0, r1, tmp, p; + + r0 = _mm_mulhi_epu16 ( + _mm_sub_epi16 (bot0, top0), vw); + tmp = _mm_cmplt_epi16 (bot0, top0); + tmp = _mm_and_si128 (tmp, vw); + r0 = _mm_sub_epi16 (r0, tmp); + r0 = _mm_add_epi16 (r0, top0); + r0 = _mm_srli_epi16 (r0, BILINEAR_INTERPOLATION_BITS); + /* r0: A0 R0 A1 R1 G0 B0 G1 B1 */ + r0 = _mm_shuffle_epi32 (r0, _MM_SHUFFLE (2, 0, 3, 1)); + /* r0: A1 R1 G1 B1 A0 R0 G0 B0 */ + + r1 = _mm_mulhi_epu16 ( + _mm_sub_epi16 (bot1, top1), vw); + tmp = _mm_cmplt_epi16 (bot1, top1); + tmp = _mm_and_si128 (tmp, vw); + r1 = _mm_sub_epi16 (r1, tmp); + r1 = _mm_add_epi16 (r1, top1); + r1 = _mm_srli_epi16 (r1, BILINEAR_INTERPOLATION_BITS); + r1 = _mm_shuffle_epi32 (r1, _MM_SHUFFLE (2, 0, 3, 1)); + /* r1: A3 R3 G3 B3 A2 R2 G2 B2 */ + + p = _mm_packus_epi16 (r0, r1); + + _mm_storeu_si128 ((__m128i *)(iter->buffer + i), p); + } + + while (i < iter->width) + { + __m128i top0 = _mm_load_si128 ((__m128i *)(line0->buffer + i)); + __m128i bot0 = _mm_load_si128 ((__m128i *)(line1->buffer + i)); + __m128i r0, tmp, p; + + r0 = _mm_mulhi_epu16 ( + _mm_sub_epi16 (bot0, top0), vw); + tmp = _mm_cmplt_epi16 (bot0, top0); + tmp = _mm_and_si128 (tmp, vw); + r0 = _mm_sub_epi16 (r0, tmp); + r0 = _mm_add_epi16 (r0, top0); + r0 = _mm_srli_epi16 (r0, BILINEAR_INTERPOLATION_BITS); + /* r0: A0 R0 A1 R1 G0 B0 G1 B1 */ + r0 = _mm_shuffle_epi32 (r0, _MM_SHUFFLE (2, 0, 3, 1)); + /* r0: A1 R1 G1 B1 A0 R0 G0 B0 */ + + p = _mm_packus_epi16 (r0, r0); + + if (iter->width - i == 1) + { + *(uint32_t *)(iter->buffer + i) = _mm_cvtsi128_si32 (p); + i++; + } + else + { + _mm_storel_epi64 ((__m128i *)(iter->buffer + i), p); + i += 2; + } + } + + info->y += iter->image->common.transform->matrix[1][1]; + + return iter->buffer; +} + +static void +ssse3_bilinear_cover_iter_fini (pixman_iter_t *iter) +{ + free (iter->data); +} + +static void +ssse3_bilinear_cover_iter_init (pixman_iter_t *iter, const pixman_iter_info_t *iter_info) +{ + int width = iter->width; + bilinear_info_t *info; + pixman_vector_t v; + + /* Reference point is the center of the pixel */ + v.vector[0] = pixman_int_to_fixed (iter->x) + pixman_fixed_1 / 2; + v.vector[1] = pixman_int_to_fixed (iter->y) + pixman_fixed_1 / 2; + v.vector[2] = pixman_fixed_1; + + if (!pixman_transform_point_3d (iter->image->common.transform, &v)) + goto fail; + + info = malloc (sizeof (*info) + (2 * width - 1) * sizeof (uint64_t) + 64); + if (!info) + goto fail; + + info->x = v.vector[0] - pixman_fixed_1 / 2; + info->y = v.vector[1] - pixman_fixed_1 / 2; + +#define ALIGN(addr) \ + ((void *)((((uintptr_t)(addr)) + 15) & (~15))) + + /* It is safe to set the y coordinates to -1 initially + * because COVER_CLIP_BILINEAR ensures that we will only + * be asked to fetch lines in the [0, height) interval + */ + info->lines[0].y = -1; + info->lines[0].buffer = ALIGN (&(info->data[0])); + info->lines[1].y = -1; + info->lines[1].buffer = ALIGN (info->lines[0].buffer + width); + + iter->get_scanline = ssse3_fetch_bilinear_cover; + iter->fini = ssse3_bilinear_cover_iter_fini; + + iter->data = info; + return; + +fail: + /* Something went wrong, either a bad matrix or OOM; in such cases, + * we don't guarantee any particular rendering. + */ + _pixman_log_error ( + FUNC, "Allocation failure or bad matrix, skipping rendering\n"); + + iter->get_scanline = _pixman_iter_get_scanline_noop; + iter->fini = NULL; +} + +static const pixman_iter_info_t ssse3_iters[] = +{ + { PIXMAN_a8r8g8b8, + (FAST_PATH_STANDARD_FLAGS | + FAST_PATH_SCALE_TRANSFORM | + FAST_PATH_BILINEAR_FILTER | + FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR), + ITER_NARROW | ITER_SRC, + ssse3_bilinear_cover_iter_init, + NULL, NULL + }, + + { PIXMAN_null }, +}; + +static const pixman_fast_path_t ssse3_fast_paths[] = +{ + { PIXMAN_OP_NONE }, +}; + +pixman_implementation_t * +_pixman_implementation_create_ssse3 (pixman_implementation_t *fallback) +{ + pixman_implementation_t *imp = + _pixman_implementation_create (fallback, ssse3_fast_paths); + + imp->iter_info = ssse3_iters; + + return imp; +} diff --git a/gfx/cairo/libpixman/src/pixman-utils.c b/gfx/cairo/libpixman/src/pixman-utils.c index b2ffb8ca2c..2c2dddd64c 100644 --- a/gfx/cairo/libpixman/src/pixman-utils.c +++ b/gfx/cairo/libpixman/src/pixman-utils.c @@ -27,7 +27,6 @@ #endif #include <stdio.h> #include <stdlib.h> -#include <limits.h> #include "pixman-private.h" @@ -50,6 +49,15 @@ _pixman_addition_overflows_int (unsigned int a, unsigned int b) } void * +pixman_malloc_ab_plus_c (unsigned int a, unsigned int b, unsigned int c) +{ + if (!b || a >= INT32_MAX / b || (a * b) > INT32_MAX - c) + return NULL; + + return malloc (a * b + c); +} + +void * pixman_malloc_ab (unsigned int a, unsigned int b) { @@ -198,7 +206,7 @@ pixman_contract_from_float (uint32_t *dst, for (i = 0; i < width; ++i) { - uint8_t a, r, g, b; + uint32_t a, r, g, b; a = float_to_unorm (src[i].a, 8); r = float_to_unorm (src[i].r, 8); @@ -215,6 +223,17 @@ _pixman_iter_get_scanline_noop (pixman_iter_t *iter, const uint32_t *mask) return iter->buffer; } +void +_pixman_iter_init_bits_stride (pixman_iter_t *iter, const pixman_iter_info_t *info) +{ + pixman_image_t *image = iter->image; + uint8_t *b = (uint8_t *)image->bits.bits; + int s = image->bits.rowstride * 4; + + iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (info->format) / 8; + iter->stride = s; +} + #define N_TMP_BOXES (16) pixman_bool_t @@ -293,8 +312,6 @@ _pixman_internal_only_get_implementation (void) return get_implementation (); } -#ifdef DEBUG - void _pixman_log_error (const char *function, const char *message) { @@ -311,5 +328,3 @@ _pixman_log_error (const char *function, const char *message) n_messages++; } } - -#endif diff --git a/gfx/cairo/libpixman/src/pixman-version.h b/gfx/cairo/libpixman/src/pixman-version.h index fac4225b07..8b0e774271 100644 --- a/gfx/cairo/libpixman/src/pixman-version.h +++ b/gfx/cairo/libpixman/src/pixman-version.h @@ -32,10 +32,10 @@ #endif #define PIXMAN_VERSION_MAJOR 0 -#define PIXMAN_VERSION_MINOR 27 -#define PIXMAN_VERSION_MICRO 1 +#define PIXMAN_VERSION_MINOR 40 +#define PIXMAN_VERSION_MICRO 0 -#define PIXMAN_VERSION_STRING "0.27.1" +#define PIXMAN_VERSION_STRING "0.40.0" #define PIXMAN_VERSION_ENCODE(major, minor, micro) ( \ ((major) * 10000) \ @@ -47,4 +47,8 @@ PIXMAN_VERSION_MINOR, \ PIXMAN_VERSION_MICRO) +#ifndef PIXMAN_API +# define PIXMAN_API +#endif + #endif /* PIXMAN_VERSION_H__ */ diff --git a/gfx/cairo/libpixman/src/pixman-version.h.in b/gfx/cairo/libpixman/src/pixman-version.h.in new file mode 100644 index 0000000000..64778a595c --- /dev/null +++ b/gfx/cairo/libpixman/src/pixman-version.h.in @@ -0,0 +1,54 @@ +/* + * Copyright © 2008 Red Hat, Inc. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Author: Carl D. Worth <cworth@cworth.org> + */ + +#ifndef PIXMAN_VERSION_H__ +#define PIXMAN_VERSION_H__ + +#ifndef PIXMAN_H__ +# error pixman-version.h should only be included by pixman.h +#endif + +#define PIXMAN_VERSION_MAJOR @PIXMAN_VERSION_MAJOR@ +#define PIXMAN_VERSION_MINOR @PIXMAN_VERSION_MINOR@ +#define PIXMAN_VERSION_MICRO @PIXMAN_VERSION_MICRO@ + +#define PIXMAN_VERSION_STRING "@PIXMAN_VERSION_MAJOR@.@PIXMAN_VERSION_MINOR@.@PIXMAN_VERSION_MICRO@" + +#define PIXMAN_VERSION_ENCODE(major, minor, micro) ( \ + ((major) * 10000) \ + + ((minor) * 100) \ + + ((micro) * 1)) + +#define PIXMAN_VERSION PIXMAN_VERSION_ENCODE( \ + PIXMAN_VERSION_MAJOR, \ + PIXMAN_VERSION_MINOR, \ + PIXMAN_VERSION_MICRO) + +#ifndef PIXMAN_API +# define PIXMAN_API +#endif + +#endif /* PIXMAN_VERSION_H__ */ diff --git a/gfx/cairo/libpixman/src/pixman-vmx.c b/gfx/cairo/libpixman/src/pixman-vmx.c index 6868704a87..52de37e69e 100644 --- a/gfx/cairo/libpixman/src/pixman-vmx.c +++ b/gfx/cairo/libpixman/src/pixman-vmx.c @@ -25,20 +25,46 @@ * Based on fbmmx.c by Owen Taylor, Søren Sandmann and Nicholas Miell */ +#ifdef HAVE_CONFIG_H #include <config.h> +#endif #include "pixman-private.h" #include "pixman-combine32.h" +#include "pixman-inlines.h" #include <altivec.h> #define AVV(x...) {x} +static vector unsigned int mask_ff000000; +static vector unsigned int mask_red; +static vector unsigned int mask_green; +static vector unsigned int mask_blue; +static vector unsigned int mask_565_fix_rb; +static vector unsigned int mask_565_fix_g; + static force_inline vector unsigned int splat_alpha (vector unsigned int pix) { +#ifdef WORDS_BIGENDIAN return vec_perm (pix, pix, (vector unsigned char)AVV ( 0x00, 0x00, 0x00, 0x00, 0x04, 0x04, 0x04, 0x04, 0x08, 0x08, 0x08, 0x08, 0x0C, 0x0C, 0x0C, 0x0C)); +#else + return vec_perm (pix, pix, + (vector unsigned char)AVV ( + 0x03, 0x03, 0x03, 0x03, 0x07, 0x07, 0x07, 0x07, + 0x0B, 0x0B, 0x0B, 0x0B, 0x0F, 0x0F, 0x0F, 0x0F)); +#endif +} + +static force_inline vector unsigned int +splat_pixel (vector unsigned int pix) +{ + return vec_perm (pix, pix, + (vector unsigned char)AVV ( + 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, + 0x02, 0x02, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03)); } static force_inline vector unsigned int @@ -48,12 +74,22 @@ pix_multiply (vector unsigned int p, vector unsigned int a) /* unpack to short */ hi = (vector unsigned short) +#ifdef WORDS_BIGENDIAN vec_mergeh ((vector unsigned char)AVV (0), (vector unsigned char)p); +#else + vec_mergeh ((vector unsigned char) p, + (vector unsigned char) AVV (0)); +#endif mod = (vector unsigned short) +#ifdef WORDS_BIGENDIAN vec_mergeh ((vector unsigned char)AVV (0), (vector unsigned char)a); +#else + vec_mergeh ((vector unsigned char) a, + (vector unsigned char) AVV (0)); +#endif hi = vec_mladd (hi, mod, (vector unsigned short) AVV (0x0080, 0x0080, 0x0080, 0x0080, @@ -65,11 +101,22 @@ pix_multiply (vector unsigned int p, vector unsigned int a) /* unpack to short */ lo = (vector unsigned short) +#ifdef WORDS_BIGENDIAN vec_mergel ((vector unsigned char)AVV (0), (vector unsigned char)p); +#else + vec_mergel ((vector unsigned char) p, + (vector unsigned char) AVV (0)); +#endif + mod = (vector unsigned short) +#ifdef WORDS_BIGENDIAN vec_mergel ((vector unsigned char)AVV (0), (vector unsigned char)a); +#else + vec_mergel ((vector unsigned char) a, + (vector unsigned char) AVV (0)); +#endif lo = vec_mladd (lo, mod, (vector unsigned short) AVV (0x0080, 0x0080, 0x0080, 0x0080, @@ -127,60 +174,316 @@ over (vector unsigned int src, over (pix_multiply (src, mask), \ pix_multiply (srca, mask), dest) +#ifdef WORDS_BIGENDIAN #define COMPUTE_SHIFT_MASK(source) \ source ## _mask = vec_lvsl (0, source); #define COMPUTE_SHIFT_MASKS(dest, source) \ - dest ## _mask = vec_lvsl (0, dest); \ - source ## _mask = vec_lvsl (0, source); \ - store_mask = vec_lvsr (0, dest); + source ## _mask = vec_lvsl (0, source); #define COMPUTE_SHIFT_MASKC(dest, source, mask) \ mask ## _mask = vec_lvsl (0, mask); \ - dest ## _mask = vec_lvsl (0, dest); \ - source ## _mask = vec_lvsl (0, source); \ - store_mask = vec_lvsr (0, dest); - -/* notice you have to declare temp vars... - * Note: tmp3 and tmp4 must remain untouched! - */ + source ## _mask = vec_lvsl (0, source); -#define LOAD_VECTORS(dest, source) \ +#define LOAD_VECTOR(source) \ +do \ +{ \ + vector unsigned char tmp1, tmp2; \ tmp1 = (typeof(tmp1))vec_ld (0, source); \ tmp2 = (typeof(tmp2))vec_ld (15, source); \ - tmp3 = (typeof(tmp3))vec_ld (0, dest); \ - v ## source = (typeof(v ## source)) \ + v ## source = (typeof(v ## source)) \ vec_perm (tmp1, tmp2, source ## _mask); \ - tmp4 = (typeof(tmp4))vec_ld (15, dest); \ - v ## dest = (typeof(v ## dest)) \ - vec_perm (tmp3, tmp4, dest ## _mask); +} while (0) + +#define LOAD_VECTORS(dest, source) \ +do \ +{ \ + LOAD_VECTOR(source); \ + v ## dest = (typeof(v ## dest))vec_ld (0, dest); \ +} while (0) #define LOAD_VECTORSC(dest, source, mask) \ - tmp1 = (typeof(tmp1))vec_ld (0, source); \ - tmp2 = (typeof(tmp2))vec_ld (15, source); \ - tmp3 = (typeof(tmp3))vec_ld (0, dest); \ - v ## source = (typeof(v ## source)) \ - vec_perm (tmp1, tmp2, source ## _mask); \ - tmp4 = (typeof(tmp4))vec_ld (15, dest); \ - tmp1 = (typeof(tmp1))vec_ld (0, mask); \ - v ## dest = (typeof(v ## dest)) \ - vec_perm (tmp3, tmp4, dest ## _mask); \ - tmp2 = (typeof(tmp2))vec_ld (15, mask); \ - v ## mask = (typeof(v ## mask)) \ - vec_perm (tmp1, tmp2, mask ## _mask); +do \ +{ \ + LOAD_VECTORS(dest, source); \ + LOAD_VECTOR(mask); \ +} while (0) + +#define DECLARE_SRC_MASK_VAR vector unsigned char src_mask +#define DECLARE_MASK_MASK_VAR vector unsigned char mask_mask + +#else + +/* Now the COMPUTE_SHIFT_{MASK, MASKS, MASKC} below are just no-op. + * They are defined that way because little endian altivec can do unaligned + * reads natively and have no need for constructing the permutation pattern + * variables. + */ +#define COMPUTE_SHIFT_MASK(source) + +#define COMPUTE_SHIFT_MASKS(dest, source) + +#define COMPUTE_SHIFT_MASKC(dest, source, mask) + +# define LOAD_VECTOR(source) \ + v ## source = (typeof(v ## source))vec_xl(0, source); + +# define LOAD_VECTORS(dest, source) \ + LOAD_VECTOR(source); \ + LOAD_VECTOR(dest); \ + +# define LOAD_VECTORSC(dest, source, mask) \ + LOAD_VECTORS(dest, source); \ + LOAD_VECTOR(mask); \ + +#define DECLARE_SRC_MASK_VAR +#define DECLARE_MASK_MASK_VAR + +#endif /* WORDS_BIGENDIAN */ #define LOAD_VECTORSM(dest, source, mask) \ - LOAD_VECTORSC (dest, source, mask) \ + LOAD_VECTORSC (dest, source, mask); \ v ## source = pix_multiply (v ## source, \ splat_alpha (v ## mask)); #define STORE_VECTOR(dest) \ - edges = vec_perm (tmp4, tmp3, dest ## _mask); \ - tmp3 = vec_perm ((vector unsigned char)v ## dest, edges, store_mask); \ - tmp1 = vec_perm (edges, (vector unsigned char)v ## dest, store_mask); \ - vec_st ((vector unsigned int) tmp3, 15, dest); \ - vec_st ((vector unsigned int) tmp1, 0, dest); + vec_st ((vector unsigned int) v ## dest, 0, dest); + +/* load 4 pixels from a 16-byte boundary aligned address */ +static force_inline vector unsigned int +load_128_aligned (const uint32_t* src) +{ + return *((vector unsigned int *) src); +} + +/* load 4 pixels from a unaligned address */ +static force_inline vector unsigned int +load_128_unaligned (const uint32_t* src) +{ + vector unsigned int vsrc; + DECLARE_SRC_MASK_VAR; + + COMPUTE_SHIFT_MASK (src); + LOAD_VECTOR (src); + + return vsrc; +} + +/* save 4 pixels on a 16-byte boundary aligned address */ +static force_inline void +save_128_aligned (uint32_t* data, + vector unsigned int vdata) +{ + STORE_VECTOR(data) +} + +static force_inline vector unsigned int +create_mask_1x32_128 (const uint32_t *src) +{ + vector unsigned int vsrc; + DECLARE_SRC_MASK_VAR; + + COMPUTE_SHIFT_MASK (src); + LOAD_VECTOR (src); + return vec_splat(vsrc, 0); +} + +static force_inline vector unsigned int +create_mask_32_128 (uint32_t mask) +{ + return create_mask_1x32_128(&mask); +} + +static force_inline vector unsigned int +unpacklo_128_16x8 (vector unsigned int data1, vector unsigned int data2) +{ + vector unsigned char lo; + + /* unpack to short */ + lo = (vector unsigned char) +#ifdef WORDS_BIGENDIAN + vec_mergel ((vector unsigned char) data2, + (vector unsigned char) data1); +#else + vec_mergel ((vector unsigned char) data1, + (vector unsigned char) data2); +#endif + + return (vector unsigned int) lo; +} + +static force_inline vector unsigned int +unpackhi_128_16x8 (vector unsigned int data1, vector unsigned int data2) +{ + vector unsigned char hi; + + /* unpack to short */ + hi = (vector unsigned char) +#ifdef WORDS_BIGENDIAN + vec_mergeh ((vector unsigned char) data2, + (vector unsigned char) data1); +#else + vec_mergeh ((vector unsigned char) data1, + (vector unsigned char) data2); +#endif + + return (vector unsigned int) hi; +} + +static force_inline vector unsigned int +unpacklo_128_8x16 (vector unsigned int data1, vector unsigned int data2) +{ + vector unsigned short lo; + + /* unpack to char */ + lo = (vector unsigned short) +#ifdef WORDS_BIGENDIAN + vec_mergel ((vector unsigned short) data2, + (vector unsigned short) data1); +#else + vec_mergel ((vector unsigned short) data1, + (vector unsigned short) data2); +#endif + + return (vector unsigned int) lo; +} + +static force_inline vector unsigned int +unpackhi_128_8x16 (vector unsigned int data1, vector unsigned int data2) +{ + vector unsigned short hi; + + /* unpack to char */ + hi = (vector unsigned short) +#ifdef WORDS_BIGENDIAN + vec_mergeh ((vector unsigned short) data2, + (vector unsigned short) data1); +#else + vec_mergeh ((vector unsigned short) data1, + (vector unsigned short) data2); +#endif + + return (vector unsigned int) hi; +} + +static force_inline void +unpack_128_2x128 (vector unsigned int data1, vector unsigned int data2, + vector unsigned int* data_lo, vector unsigned int* data_hi) +{ + *data_lo = unpacklo_128_16x8(data1, data2); + *data_hi = unpackhi_128_16x8(data1, data2); +} + +static force_inline void +unpack_128_2x128_16 (vector unsigned int data1, vector unsigned int data2, + vector unsigned int* data_lo, vector unsigned int* data_hi) +{ + *data_lo = unpacklo_128_8x16(data1, data2); + *data_hi = unpackhi_128_8x16(data1, data2); +} + +static force_inline vector unsigned int +unpack_565_to_8888 (vector unsigned int lo) +{ + vector unsigned int r, g, b, rb, t; + + r = vec_and (vec_sl(lo, create_mask_32_128(8)), mask_red); + g = vec_and (vec_sl(lo, create_mask_32_128(5)), mask_green); + b = vec_and (vec_sl(lo, create_mask_32_128(3)), mask_blue); + + rb = vec_or (r, b); + t = vec_and (rb, mask_565_fix_rb); + t = vec_sr (t, create_mask_32_128(5)); + rb = vec_or (rb, t); + + t = vec_and (g, mask_565_fix_g); + t = vec_sr (t, create_mask_32_128(6)); + g = vec_or (g, t); + + return vec_or (rb, g); +} + +static force_inline int +is_opaque (vector unsigned int x) +{ + uint32_t cmp_result; + vector bool int ffs = vec_cmpeq(x, x); + + cmp_result = vec_all_eq(x, ffs); + + return (cmp_result & 0x8888) == 0x8888; +} + +static force_inline int +is_zero (vector unsigned int x) +{ + uint32_t cmp_result; + + cmp_result = vec_all_eq(x, (vector unsigned int) AVV(0)); + + return cmp_result == 0xffff; +} + +static force_inline int +is_transparent (vector unsigned int x) +{ + uint32_t cmp_result; + + cmp_result = vec_all_eq(x, (vector unsigned int) AVV(0)); + return (cmp_result & 0x8888) == 0x8888; +} + +static force_inline uint32_t +core_combine_over_u_pixel_vmx (uint32_t src, uint32_t dst) +{ + uint32_t a; + + a = ALPHA_8(src); + + if (a == 0xff) + { + return src; + } + else if (src) + { + UN8x4_MUL_UN8_ADD_UN8x4(dst, (~a & MASK), src); + } + + return dst; +} + +static force_inline uint32_t +combine1 (const uint32_t *ps, const uint32_t *pm) +{ + uint32_t s = *ps; + + if (pm) + UN8x4_MUL_UN8(s, ALPHA_8(*pm)); + + return s; +} + +static force_inline vector unsigned int +combine4 (const uint32_t* ps, const uint32_t* pm) +{ + vector unsigned int src, msk; + + if (pm) + { + msk = load_128_unaligned(pm); + + if (is_transparent(msk)) + return (vector unsigned int) AVV(0); + } + + src = load_128_unaligned(ps); + + if (pm) + src = pix_multiply(src, msk); + + return src; +} static void vmx_combine_over_u_no_mask (uint32_t * dest, @@ -189,8 +492,19 @@ vmx_combine_over_u_no_mask (uint32_t * dest, { int i; vector unsigned int vdest, vsrc; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, store_mask; + DECLARE_SRC_MASK_VAR; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t s = *src++; + uint32_t d = *dest; + uint32_t ia = ALPHA_8 (~s); + + UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s); + + *dest++ = d; + width--; + } COMPUTE_SHIFT_MASKS (dest, src); @@ -228,8 +542,24 @@ vmx_combine_over_u_mask (uint32_t * dest, { int i; vector unsigned int vdest, vsrc, vmask; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, mask_mask, store_mask; + DECLARE_SRC_MASK_VAR; + DECLARE_MASK_MASK_VAR; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t m = ALPHA_8 (*mask++); + uint32_t s = *src++; + uint32_t d = *dest; + uint32_t ia; + + UN8x4_MUL_UN8 (s, m); + + ia = ALPHA_8 (~s); + + UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s); + *dest++ = d; + width--; + } COMPUTE_SHIFT_MASKC (dest, src, mask); @@ -284,8 +614,18 @@ vmx_combine_over_reverse_u_no_mask (uint32_t * dest, { int i; vector unsigned int vdest, vsrc; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, store_mask; + DECLARE_SRC_MASK_VAR; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t s = *src++; + uint32_t d = *dest; + uint32_t ia = ALPHA_8 (~d); + + UN8x4_MUL_UN8_ADD_UN8x4 (s, ia, d); + *dest++ = s; + width--; + } COMPUTE_SHIFT_MASKS (dest, src); @@ -322,8 +662,22 @@ vmx_combine_over_reverse_u_mask (uint32_t * dest, { int i; vector unsigned int vdest, vsrc, vmask; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, mask_mask, store_mask; + DECLARE_SRC_MASK_VAR; + DECLARE_MASK_MASK_VAR; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t m = ALPHA_8 (*mask++); + uint32_t s = *src++; + uint32_t d = *dest; + uint32_t ia = ALPHA_8 (~d); + + UN8x4_MUL_UN8 (s, m); + + UN8x4_MUL_UN8_ADD_UN8x4 (s, ia, d); + *dest++ = s; + width--; + } COMPUTE_SHIFT_MASKC (dest, src, mask); @@ -377,8 +731,17 @@ vmx_combine_in_u_no_mask (uint32_t * dest, { int i; vector unsigned int vdest, vsrc; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, store_mask; + DECLARE_SRC_MASK_VAR; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t s = *src++; + uint32_t a = ALPHA_8 (*dest); + + UN8x4_MUL_UN8 (s, a); + *dest++ = s; + width--; + } COMPUTE_SHIFT_MASKS (dest, src); @@ -413,8 +776,21 @@ vmx_combine_in_u_mask (uint32_t * dest, { int i; vector unsigned int vdest, vsrc, vmask; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, mask_mask, store_mask; + DECLARE_SRC_MASK_VAR; + DECLARE_MASK_MASK_VAR; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t m = ALPHA_8 (*mask++); + uint32_t s = *src++; + uint32_t a = ALPHA_8 (*dest); + + UN8x4_MUL_UN8 (s, m); + UN8x4_MUL_UN8 (s, a); + + *dest++ = s; + width--; + } COMPUTE_SHIFT_MASKC (dest, src, mask); @@ -466,8 +842,18 @@ vmx_combine_in_reverse_u_no_mask (uint32_t * dest, { int i; vector unsigned int vdest, vsrc; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, store_mask; + DECLARE_SRC_MASK_VAR; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t d = *dest; + uint32_t a = ALPHA_8 (*src++); + + UN8x4_MUL_UN8 (d, a); + + *dest++ = d; + width--; + } COMPUTE_SHIFT_MASKS (dest, src); @@ -503,8 +889,22 @@ vmx_combine_in_reverse_u_mask (uint32_t * dest, { int i; vector unsigned int vdest, vsrc, vmask; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, mask_mask, store_mask; + DECLARE_SRC_MASK_VAR; + DECLARE_MASK_MASK_VAR; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t m = ALPHA_8 (*mask++); + uint32_t d = *dest; + uint32_t a = *src++; + + UN8x4_MUL_UN8 (a, m); + a = ALPHA_8 (a); + UN8x4_MUL_UN8 (d, a); + + *dest++ = d; + width--; + } COMPUTE_SHIFT_MASKC (dest, src, mask); @@ -557,8 +957,18 @@ vmx_combine_out_u_no_mask (uint32_t * dest, { int i; vector unsigned int vdest, vsrc; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, store_mask; + DECLARE_SRC_MASK_VAR; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t s = *src++; + uint32_t a = ALPHA_8 (~(*dest)); + + UN8x4_MUL_UN8 (s, a); + + *dest++ = s; + width--; + } COMPUTE_SHIFT_MASKS (dest, src); @@ -594,8 +1004,21 @@ vmx_combine_out_u_mask (uint32_t * dest, { int i; vector unsigned int vdest, vsrc, vmask; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, mask_mask, store_mask; + DECLARE_SRC_MASK_VAR; + DECLARE_MASK_MASK_VAR; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t m = ALPHA_8 (*mask++); + uint32_t s = *src++; + uint32_t a = ALPHA_8 (~(*dest)); + + UN8x4_MUL_UN8 (s, m); + UN8x4_MUL_UN8 (s, a); + + *dest++ = s; + width--; + } COMPUTE_SHIFT_MASKC (dest, src, mask); @@ -647,8 +1070,18 @@ vmx_combine_out_reverse_u_no_mask (uint32_t * dest, { int i; vector unsigned int vdest, vsrc; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, store_mask; + DECLARE_SRC_MASK_VAR; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t d = *dest; + uint32_t a = ALPHA_8 (~(*src++)); + + UN8x4_MUL_UN8 (d, a); + + *dest++ = d; + width--; + } COMPUTE_SHIFT_MASKS (dest, src); @@ -685,8 +1118,22 @@ vmx_combine_out_reverse_u_mask (uint32_t * dest, { int i; vector unsigned int vdest, vsrc, vmask; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, mask_mask, store_mask; + DECLARE_SRC_MASK_VAR; + DECLARE_MASK_MASK_VAR; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t m = ALPHA_8 (*mask++); + uint32_t d = *dest; + uint32_t a = *src++; + + UN8x4_MUL_UN8 (a, m); + a = ALPHA_8 (~a); + UN8x4_MUL_UN8 (d, a); + + *dest++ = d; + width--; + } COMPUTE_SHIFT_MASKC (dest, src, mask); @@ -739,8 +1186,20 @@ vmx_combine_atop_u_no_mask (uint32_t * dest, { int i; vector unsigned int vdest, vsrc; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, store_mask; + DECLARE_SRC_MASK_VAR; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t s = *src++; + uint32_t d = *dest; + uint32_t dest_a = ALPHA_8 (d); + uint32_t src_ia = ALPHA_8 (~s); + + UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_a, d, src_ia); + + *dest++ = s; + width--; + } COMPUTE_SHIFT_MASKS (dest, src); @@ -779,8 +1238,26 @@ vmx_combine_atop_u_mask (uint32_t * dest, { int i; vector unsigned int vdest, vsrc, vmask; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, mask_mask, store_mask; + DECLARE_SRC_MASK_VAR; + DECLARE_MASK_MASK_VAR; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t m = ALPHA_8 (*mask++); + uint32_t s = *src++; + uint32_t d = *dest; + uint32_t dest_a = ALPHA_8 (d); + uint32_t src_ia; + + UN8x4_MUL_UN8 (s, m); + + src_ia = ALPHA_8 (~s); + + UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_a, d, src_ia); + + *dest++ = s; + width--; + } COMPUTE_SHIFT_MASKC (dest, src, mask); @@ -838,8 +1315,20 @@ vmx_combine_atop_reverse_u_no_mask (uint32_t * dest, { int i; vector unsigned int vdest, vsrc; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, store_mask; + DECLARE_SRC_MASK_VAR; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t s = *src++; + uint32_t d = *dest; + uint32_t src_a = ALPHA_8 (s); + uint32_t dest_ia = ALPHA_8 (~d); + + UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_a); + + *dest++ = s; + width--; + } COMPUTE_SHIFT_MASKS (dest, src); @@ -878,8 +1367,26 @@ vmx_combine_atop_reverse_u_mask (uint32_t * dest, { int i; vector unsigned int vdest, vsrc, vmask; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, mask_mask, store_mask; + DECLARE_SRC_MASK_VAR; + DECLARE_MASK_MASK_VAR; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t m = ALPHA_8 (*mask++); + uint32_t s = *src++; + uint32_t d = *dest; + uint32_t src_a; + uint32_t dest_ia = ALPHA_8 (~d); + + UN8x4_MUL_UN8 (s, m); + + src_a = ALPHA_8 (s); + + UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_a); + + *dest++ = s; + width--; + } COMPUTE_SHIFT_MASKC (dest, src, mask); @@ -937,8 +1444,20 @@ vmx_combine_xor_u_no_mask (uint32_t * dest, { int i; vector unsigned int vdest, vsrc; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, store_mask; + DECLARE_SRC_MASK_VAR; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t s = *src++; + uint32_t d = *dest; + uint32_t src_ia = ALPHA_8 (~s); + uint32_t dest_ia = ALPHA_8 (~d); + + UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_ia); + + *dest++ = s; + width--; + } COMPUTE_SHIFT_MASKS (dest, src); @@ -977,8 +1496,26 @@ vmx_combine_xor_u_mask (uint32_t * dest, { int i; vector unsigned int vdest, vsrc, vmask; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, mask_mask, store_mask; + DECLARE_SRC_MASK_VAR; + DECLARE_MASK_MASK_VAR; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t m = ALPHA_8 (*mask++); + uint32_t s = *src++; + uint32_t d = *dest; + uint32_t src_ia; + uint32_t dest_ia = ALPHA_8 (~d); + + UN8x4_MUL_UN8 (s, m); + + src_ia = ALPHA_8 (~s); + + UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_ia); + + *dest++ = s; + width--; + } COMPUTE_SHIFT_MASKC (dest, src, mask); @@ -1036,8 +1573,18 @@ vmx_combine_add_u_no_mask (uint32_t * dest, { int i; vector unsigned int vdest, vsrc; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, store_mask; + DECLARE_SRC_MASK_VAR; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t s = *src++; + uint32_t d = *dest; + + UN8x4_ADD_UN8x4 (d, s); + + *dest++ = d; + width--; + } COMPUTE_SHIFT_MASKS (dest, src); /* printf ("%s\n",__PRETTY_FUNCTION__); */ @@ -1072,8 +1619,21 @@ vmx_combine_add_u_mask (uint32_t * dest, { int i; vector unsigned int vdest, vsrc, vmask; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, mask_mask, store_mask; + DECLARE_SRC_MASK_VAR; + DECLARE_MASK_MASK_VAR; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t m = ALPHA_8 (*mask++); + uint32_t s = *src++; + uint32_t d = *dest; + + UN8x4_MUL_UN8 (s, m); + UN8x4_ADD_UN8x4 (d, s); + + *dest++ = d; + width--; + } COMPUTE_SHIFT_MASKC (dest, src, mask); @@ -1128,8 +1688,19 @@ vmx_combine_src_ca (pixman_implementation_t *imp, { int i; vector unsigned int vdest, vsrc, vmask; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, mask_mask, src_mask, store_mask; + DECLARE_SRC_MASK_VAR; + DECLARE_MASK_MASK_VAR; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t a = *mask++; + uint32_t s = *src++; + + UN8x4_MUL_UN8x4 (s, a); + + *dest++ = s; + width--; + } COMPUTE_SHIFT_MASKC (dest, src, mask); @@ -1168,8 +1739,23 @@ vmx_combine_over_ca (pixman_implementation_t *imp, { int i; vector unsigned int vdest, vsrc, vmask; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, mask_mask, src_mask, store_mask; + DECLARE_SRC_MASK_VAR; + DECLARE_MASK_MASK_VAR; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t a = *mask++; + uint32_t s = *src++; + uint32_t d = *dest; + uint32_t sa = ALPHA_8 (s); + + UN8x4_MUL_UN8x4 (s, a); + UN8x4_MUL_UN8 (a, sa); + UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ~a, s); + + *dest++ = d; + width--; + } COMPUTE_SHIFT_MASKC (dest, src, mask); @@ -1212,8 +1798,22 @@ vmx_combine_over_reverse_ca (pixman_implementation_t *imp, { int i; vector unsigned int vdest, vsrc, vmask; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, mask_mask, src_mask, store_mask; + DECLARE_SRC_MASK_VAR; + DECLARE_MASK_MASK_VAR; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t a = *mask++; + uint32_t s = *src++; + uint32_t d = *dest; + uint32_t ida = ALPHA_8 (~d); + + UN8x4_MUL_UN8x4 (s, a); + UN8x4_MUL_UN8_ADD_UN8x4 (s, ida, d); + + *dest++ = s; + width--; + } COMPUTE_SHIFT_MASKC (dest, src, mask); @@ -1255,8 +1855,21 @@ vmx_combine_in_ca (pixman_implementation_t *imp, { int i; vector unsigned int vdest, vsrc, vmask; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, mask_mask, src_mask, store_mask; + DECLARE_SRC_MASK_VAR; + DECLARE_MASK_MASK_VAR; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t a = *mask++; + uint32_t s = *src++; + uint32_t da = ALPHA_8 (*dest); + + UN8x4_MUL_UN8x4 (s, a); + UN8x4_MUL_UN8 (s, da); + + *dest++ = s; + width--; + } COMPUTE_SHIFT_MASKC (dest, src, mask); @@ -1297,8 +1910,21 @@ vmx_combine_in_reverse_ca (pixman_implementation_t *imp, { int i; vector unsigned int vdest, vsrc, vmask; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, mask_mask, src_mask, store_mask; + DECLARE_SRC_MASK_VAR; + DECLARE_MASK_MASK_VAR; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t a = *mask++; + uint32_t d = *dest; + uint32_t sa = ALPHA_8 (*src++); + + UN8x4_MUL_UN8 (a, sa); + UN8x4_MUL_UN8x4 (d, a); + + *dest++ = d; + width--; + } COMPUTE_SHIFT_MASKC (dest, src, mask); @@ -1340,8 +1966,22 @@ vmx_combine_out_ca (pixman_implementation_t *imp, { int i; vector unsigned int vdest, vsrc, vmask; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, mask_mask, src_mask, store_mask; + DECLARE_SRC_MASK_VAR; + DECLARE_MASK_MASK_VAR; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t a = *mask++; + uint32_t s = *src++; + uint32_t d = *dest; + uint32_t da = ALPHA_8 (~d); + + UN8x4_MUL_UN8x4 (s, a); + UN8x4_MUL_UN8 (s, da); + + *dest++ = s; + width--; + } COMPUTE_SHIFT_MASKC (dest, src, mask); @@ -1384,8 +2024,22 @@ vmx_combine_out_reverse_ca (pixman_implementation_t *imp, { int i; vector unsigned int vdest, vsrc, vmask; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, mask_mask, src_mask, store_mask; + DECLARE_SRC_MASK_VAR; + DECLARE_MASK_MASK_VAR; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t a = *mask++; + uint32_t s = *src++; + uint32_t d = *dest; + uint32_t sa = ALPHA_8 (s); + + UN8x4_MUL_UN8 (a, sa); + UN8x4_MUL_UN8x4 (d, ~a); + + *dest++ = d; + width--; + } COMPUTE_SHIFT_MASKC (dest, src, mask); @@ -1428,8 +2082,24 @@ vmx_combine_atop_ca (pixman_implementation_t *imp, { int i; vector unsigned int vdest, vsrc, vmask, vsrca; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, mask_mask, src_mask, store_mask; + DECLARE_SRC_MASK_VAR; + DECLARE_MASK_MASK_VAR; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t a = *mask++; + uint32_t s = *src++; + uint32_t d = *dest; + uint32_t sa = ALPHA_8 (s); + uint32_t da = ALPHA_8 (d); + + UN8x4_MUL_UN8x4 (s, a); + UN8x4_MUL_UN8 (a, sa); + UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ~a, s, da); + + *dest++ = d; + width--; + } COMPUTE_SHIFT_MASKC (dest, src, mask); @@ -1479,8 +2149,24 @@ vmx_combine_atop_reverse_ca (pixman_implementation_t *imp, { int i; vector unsigned int vdest, vsrc, vmask; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, mask_mask, src_mask, store_mask; + DECLARE_SRC_MASK_VAR; + DECLARE_MASK_MASK_VAR; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t a = *mask++; + uint32_t s = *src++; + uint32_t d = *dest; + uint32_t sa = ALPHA_8 (s); + uint32_t da = ALPHA_8 (~d); + + UN8x4_MUL_UN8x4 (s, a); + UN8x4_MUL_UN8 (a, sa); + UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, a, s, da); + + *dest++ = d; + width--; + } COMPUTE_SHIFT_MASKC (dest, src, mask); @@ -1527,8 +2213,24 @@ vmx_combine_xor_ca (pixman_implementation_t *imp, { int i; vector unsigned int vdest, vsrc, vmask; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, mask_mask, src_mask, store_mask; + DECLARE_SRC_MASK_VAR; + DECLARE_MASK_MASK_VAR; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t a = *mask++; + uint32_t s = *src++; + uint32_t d = *dest; + uint32_t sa = ALPHA_8 (s); + uint32_t da = ALPHA_8 (~d); + + UN8x4_MUL_UN8x4 (s, a); + UN8x4_MUL_UN8 (a, sa); + UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ~a, s, da); + + *dest++ = d; + width--; + } COMPUTE_SHIFT_MASKC (dest, src, mask); @@ -1575,8 +2277,21 @@ vmx_combine_add_ca (pixman_implementation_t *imp, { int i; vector unsigned int vdest, vsrc, vmask; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, mask_mask, src_mask, store_mask; + DECLARE_SRC_MASK_VAR; + DECLARE_MASK_MASK_VAR; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t a = *mask++; + uint32_t s = *src++; + uint32_t d = *dest; + + UN8x4_MUL_UN8x4 (s, a); + UN8x4_ADD_UN8x4 (s, d); + + *dest++ = s; + width--; + } COMPUTE_SHIFT_MASKC (dest, src, mask); @@ -1607,16 +2322,809 @@ vmx_combine_add_ca (pixman_implementation_t *imp, } } +static void +vmx_composite_over_n_8_8888 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t src, srca; + uint32_t *dst_line, *dst; + uint8_t *mask_line; + int dst_stride, mask_stride; + int32_t w; + uint32_t m, d, s, ia; + + vector unsigned int vsrc, valpha, vmask, vdst; + + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); + + srca = ALPHA_8(src); + if (src == 0) + return; + + PIXMAN_IMAGE_GET_LINE ( + dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE ( + mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); + + vsrc = (vector unsigned int) {src, src, src, src}; + valpha = splat_alpha(vsrc); + + while (height--) + { + const uint8_t *pm = mask_line; + dst = dst_line; + dst_line += dst_stride; + mask_line += mask_stride; + w = width; + + while (w && (uintptr_t)dst & 15) + { + s = src; + m = *pm++; + + if (m) + { + d = *dst; + UN8x4_MUL_UN8 (s, m); + ia = ALPHA_8 (~s); + UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s); + *dst = d; + } + + w--; + dst++; + } + + while (w >= 4) + { + m = *((uint32_t*)pm); + + if (srca == 0xff && m == 0xffffffff) + { + save_128_aligned(dst, vsrc); + } + else if (m) + { + vmask = splat_pixel((vector unsigned int) {m, m, m, m}); + + /* dst is 16-byte aligned */ + vdst = in_over (vsrc, valpha, vmask, load_128_aligned (dst)); + + save_128_aligned(dst, vdst); + } + + w -= 4; + dst += 4; + pm += 4; + } + + while (w) + { + s = src; + m = *pm++; + + if (m) + { + d = *dst; + UN8x4_MUL_UN8 (s, m); + ia = ALPHA_8 (~s); + UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s); + *dst = d; + } + + w--; + dst++; + } + } + +} + +static pixman_bool_t +vmx_fill (pixman_implementation_t *imp, + uint32_t * bits, + int stride, + int bpp, + int x, + int y, + int width, + int height, + uint32_t filler) +{ + uint32_t byte_width; + uint8_t *byte_line; + + vector unsigned int vfiller; + + if (bpp == 8) + { + uint8_t b; + uint16_t w; + + stride = stride * (int) sizeof (uint32_t) / 1; + byte_line = (uint8_t *)(((uint8_t *)bits) + stride * y + x); + byte_width = width; + stride *= 1; + + b = filler & 0xff; + w = (b << 8) | b; + filler = (w << 16) | w; + } + else if (bpp == 16) + { + stride = stride * (int) sizeof (uint32_t) / 2; + byte_line = (uint8_t *)(((uint16_t *)bits) + stride * y + x); + byte_width = 2 * width; + stride *= 2; + + filler = (filler & 0xffff) * 0x00010001; + } + else if (bpp == 32) + { + stride = stride * (int) sizeof (uint32_t) / 4; + byte_line = (uint8_t *)(((uint32_t *)bits) + stride * y + x); + byte_width = 4 * width; + stride *= 4; + } + else + { + return FALSE; + } + + vfiller = create_mask_1x32_128(&filler); + + while (height--) + { + int w; + uint8_t *d = byte_line; + byte_line += stride; + w = byte_width; + + if (w >= 1 && ((uintptr_t)d & 1)) + { + *(uint8_t *)d = filler; + w -= 1; + d += 1; + } + + while (w >= 2 && ((uintptr_t)d & 3)) + { + *(uint16_t *)d = filler; + w -= 2; + d += 2; + } + + while (w >= 4 && ((uintptr_t)d & 15)) + { + *(uint32_t *)d = filler; + + w -= 4; + d += 4; + } + + while (w >= 128) + { + vec_st(vfiller, 0, (uint32_t *) d); + vec_st(vfiller, 0, (uint32_t *) d + 4); + vec_st(vfiller, 0, (uint32_t *) d + 8); + vec_st(vfiller, 0, (uint32_t *) d + 12); + vec_st(vfiller, 0, (uint32_t *) d + 16); + vec_st(vfiller, 0, (uint32_t *) d + 20); + vec_st(vfiller, 0, (uint32_t *) d + 24); + vec_st(vfiller, 0, (uint32_t *) d + 28); + + d += 128; + w -= 128; + } + + if (w >= 64) + { + vec_st(vfiller, 0, (uint32_t *) d); + vec_st(vfiller, 0, (uint32_t *) d + 4); + vec_st(vfiller, 0, (uint32_t *) d + 8); + vec_st(vfiller, 0, (uint32_t *) d + 12); + + d += 64; + w -= 64; + } + + if (w >= 32) + { + vec_st(vfiller, 0, (uint32_t *) d); + vec_st(vfiller, 0, (uint32_t *) d + 4); + + d += 32; + w -= 32; + } + + if (w >= 16) + { + vec_st(vfiller, 0, (uint32_t *) d); + + d += 16; + w -= 16; + } + + while (w >= 4) + { + *(uint32_t *)d = filler; + + w -= 4; + d += 4; + } + + if (w >= 2) + { + *(uint16_t *)d = filler; + w -= 2; + d += 2; + } + + if (w >= 1) + { + *(uint8_t *)d = filler; + w -= 1; + d += 1; + } + } + + return TRUE; +} + +static void +vmx_composite_src_x888_8888 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t *dst_line, *dst; + uint32_t *src_line, *src; + int32_t w; + int dst_stride, src_stride; + + PIXMAN_IMAGE_GET_LINE ( + dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE ( + src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + + while (w && (uintptr_t)dst & 15) + { + *dst++ = *src++ | 0xff000000; + w--; + } + + while (w >= 16) + { + vector unsigned int vmx_src1, vmx_src2, vmx_src3, vmx_src4; + + vmx_src1 = load_128_unaligned (src); + vmx_src2 = load_128_unaligned (src + 4); + vmx_src3 = load_128_unaligned (src + 8); + vmx_src4 = load_128_unaligned (src + 12); + + save_128_aligned (dst, vec_or (vmx_src1, mask_ff000000)); + save_128_aligned (dst + 4, vec_or (vmx_src2, mask_ff000000)); + save_128_aligned (dst + 8, vec_or (vmx_src3, mask_ff000000)); + save_128_aligned (dst + 12, vec_or (vmx_src4, mask_ff000000)); + + dst += 16; + src += 16; + w -= 16; + } + + while (w) + { + *dst++ = *src++ | 0xff000000; + w--; + } + } +} + +static void +vmx_composite_over_n_8888 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t *dst_line, *dst; + uint32_t src, ia; + int i, w, dst_stride; + vector unsigned int vdst, vsrc, via; + + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); + + if (src == 0) + return; + + PIXMAN_IMAGE_GET_LINE ( + dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + + vsrc = (vector unsigned int){src, src, src, src}; + via = negate (splat_alpha (vsrc)); + ia = ALPHA_8 (~src); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + w = width; + + while (w && ((uintptr_t)dst & 15)) + { + uint32_t d = *dst; + UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, src); + *dst++ = d; + w--; + } + + for (i = w / 4; i > 0; i--) + { + vdst = pix_multiply (load_128_aligned (dst), via); + save_128_aligned (dst, pix_add (vsrc, vdst)); + dst += 4; + } + + for (i = w % 4; --i >= 0;) + { + uint32_t d = dst[i]; + UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, src); + dst[i] = d; + } + } +} + +static void +vmx_composite_over_8888_8888 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + int dst_stride, src_stride; + uint32_t *dst_line, *dst; + uint32_t *src_line, *src; + + PIXMAN_IMAGE_GET_LINE ( + dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE ( + src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + + dst = dst_line; + src = src_line; + + while (height--) + { + vmx_combine_over_u (imp, op, dst, src, NULL, width); + + dst += dst_stride; + src += src_stride; + } +} + +static void +vmx_composite_over_n_8888_8888_ca (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t src, ia; + uint32_t *dst_line, d; + uint32_t *mask_line, m; + uint32_t pack_cmp; + int dst_stride, mask_stride; + + vector unsigned int vsrc, valpha, vmask, vdest; + + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); + + if (src == 0) + return; + + PIXMAN_IMAGE_GET_LINE ( + dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE ( + mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1); + + vsrc = (vector unsigned int) {src, src, src, src}; + valpha = splat_alpha(vsrc); + ia = ALPHA_8 (src); + + while (height--) + { + int w = width; + const uint32_t *pm = (uint32_t *)mask_line; + uint32_t *pd = (uint32_t *)dst_line; + uint32_t s; + + dst_line += dst_stride; + mask_line += mask_stride; + + while (w && (uintptr_t)pd & 15) + { + s = src; + m = *pm++; + + if (m) + { + d = *pd; + UN8x4_MUL_UN8x4 (s, m); + UN8x4_MUL_UN8 (m, ia); + m = ~m; + UN8x4_MUL_UN8x4_ADD_UN8x4 (d, m, s); + *pd = d; + } + + pd++; + w--; + } + + while (w >= 4) + { + /* pm is NOT necessarily 16-byte aligned */ + vmask = load_128_unaligned (pm); + + pack_cmp = vec_all_eq(vmask, (vector unsigned int) AVV(0)); + + /* if all bits in mask are zero, pack_cmp is not 0 */ + if (pack_cmp == 0) + { + /* pd is 16-byte aligned */ + vdest = in_over (vsrc, valpha, vmask, load_128_aligned (pd)); + + save_128_aligned(pd, vdest); + } + + pd += 4; + pm += 4; + w -= 4; + } + + while (w) + { + s = src; + m = *pm++; + + if (m) + { + d = *pd; + UN8x4_MUL_UN8x4 (s, m); + UN8x4_MUL_UN8 (m, ia); + m = ~m; + UN8x4_MUL_UN8x4_ADD_UN8x4 (d, m, s); + *pd = d; + } + + pd++; + w--; + } + } +} + +static void +vmx_composite_add_8_8 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint8_t *dst_line, *dst; + uint8_t *src_line, *src; + int dst_stride, src_stride; + int32_t w; + uint16_t t; + + PIXMAN_IMAGE_GET_LINE ( + src_image, src_x, src_y, uint8_t, src_stride, src_line, 1); + PIXMAN_IMAGE_GET_LINE ( + dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); + + while (height--) + { + dst = dst_line; + src = src_line; + + dst_line += dst_stride; + src_line += src_stride; + w = width; + + /* Small head */ + while (w && (uintptr_t)dst & 3) + { + t = (*dst) + (*src++); + *dst++ = t | (0 - (t >> 8)); + w--; + } + + vmx_combine_add_u (imp, op, + (uint32_t*)dst, (uint32_t*)src, NULL, w >> 2); + + /* Small tail */ + dst += w & 0xfffc; + src += w & 0xfffc; + + w &= 3; + + while (w) + { + t = (*dst) + (*src++); + *dst++ = t | (0 - (t >> 8)); + w--; + } + } +} + +static void +vmx_composite_add_8888_8888 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t *dst_line, *dst; + uint32_t *src_line, *src; + int dst_stride, src_stride; + + PIXMAN_IMAGE_GET_LINE ( + src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + PIXMAN_IMAGE_GET_LINE ( + dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + + vmx_combine_add_u (imp, op, dst, src, NULL, width); + } +} + +static force_inline void +scaled_nearest_scanline_vmx_8888_8888_OVER (uint32_t* pd, + const uint32_t* ps, + int32_t w, + pixman_fixed_t vx, + pixman_fixed_t unit_x, + pixman_fixed_t src_width_fixed, + pixman_bool_t fully_transparent_src) +{ + uint32_t s, d; + const uint32_t* pm = NULL; + + vector unsigned int vsrc, vdst; + + if (fully_transparent_src) + return; + + /* Align dst on a 16-byte boundary */ + while (w && ((uintptr_t)pd & 15)) + { + d = *pd; + s = combine1 (ps + pixman_fixed_to_int (vx), pm); + vx += unit_x; + while (vx >= 0) + vx -= src_width_fixed; + + *pd++ = core_combine_over_u_pixel_vmx (s, d); + if (pm) + pm++; + w--; + } + + while (w >= 4) + { + vector unsigned int tmp; + uint32_t tmp1, tmp2, tmp3, tmp4; + + tmp1 = *(ps + pixman_fixed_to_int (vx)); + vx += unit_x; + while (vx >= 0) + vx -= src_width_fixed; + tmp2 = *(ps + pixman_fixed_to_int (vx)); + vx += unit_x; + while (vx >= 0) + vx -= src_width_fixed; + tmp3 = *(ps + pixman_fixed_to_int (vx)); + vx += unit_x; + while (vx >= 0) + vx -= src_width_fixed; + tmp4 = *(ps + pixman_fixed_to_int (vx)); + vx += unit_x; + while (vx >= 0) + vx -= src_width_fixed; + + tmp[0] = tmp1; + tmp[1] = tmp2; + tmp[2] = tmp3; + tmp[3] = tmp4; + + vsrc = combine4 ((const uint32_t *) &tmp, pm); + + if (is_opaque (vsrc)) + { + save_128_aligned (pd, vsrc); + } + else if (!is_zero (vsrc)) + { + vdst = over(vsrc, splat_alpha(vsrc), load_128_aligned (pd)); + + save_128_aligned (pd, vdst); + } + + w -= 4; + pd += 4; + if (pm) + pm += 4; + } + + while (w) + { + d = *pd; + s = combine1 (ps + pixman_fixed_to_int (vx), pm); + vx += unit_x; + while (vx >= 0) + vx -= src_width_fixed; + + *pd++ = core_combine_over_u_pixel_vmx (s, d); + if (pm) + pm++; + + w--; + } +} + +FAST_NEAREST_MAINLOOP (vmx_8888_8888_cover_OVER, + scaled_nearest_scanline_vmx_8888_8888_OVER, + uint32_t, uint32_t, COVER) +FAST_NEAREST_MAINLOOP (vmx_8888_8888_none_OVER, + scaled_nearest_scanline_vmx_8888_8888_OVER, + uint32_t, uint32_t, NONE) +FAST_NEAREST_MAINLOOP (vmx_8888_8888_pad_OVER, + scaled_nearest_scanline_vmx_8888_8888_OVER, + uint32_t, uint32_t, PAD) +FAST_NEAREST_MAINLOOP (vmx_8888_8888_normal_OVER, + scaled_nearest_scanline_vmx_8888_8888_OVER, + uint32_t, uint32_t, NORMAL) + static const pixman_fast_path_t vmx_fast_paths[] = { + PIXMAN_STD_FAST_PATH (OVER, solid, null, a8r8g8b8, vmx_composite_over_n_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, null, x8r8g8b8, vmx_composite_over_n_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, vmx_composite_over_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, vmx_composite_over_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, vmx_composite_over_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, vmx_composite_over_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, vmx_composite_over_n_8_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, vmx_composite_over_n_8_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, vmx_composite_over_n_8_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, vmx_composite_over_n_8_8888), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, vmx_composite_over_n_8888_8888_ca), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, vmx_composite_over_n_8888_8888_ca), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, vmx_composite_over_n_8888_8888_ca), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, vmx_composite_over_n_8888_8888_ca), + + /* PIXMAN_OP_ADD */ + PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, vmx_composite_add_8_8), + PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, vmx_composite_add_8888_8888), + PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, vmx_composite_add_8888_8888), + + /* PIXMAN_OP_SRC */ + PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, vmx_composite_src_x888_8888), + PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, vmx_composite_src_x888_8888), + + SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, vmx_8888_8888), + SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, vmx_8888_8888), + SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, vmx_8888_8888), + SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, vmx_8888_8888), + { PIXMAN_OP_NONE }, }; +static uint32_t * +vmx_fetch_x8r8g8b8 (pixman_iter_t *iter, const uint32_t *mask) +{ + int w = iter->width; + vector unsigned int ff000000 = mask_ff000000; + uint32_t *dst = iter->buffer; + uint32_t *src = (uint32_t *)iter->bits; + + iter->bits += iter->stride; + + while (w && ((uintptr_t)dst) & 0x0f) + { + *dst++ = (*src++) | 0xff000000; + w--; + } + + while (w >= 4) + { + save_128_aligned(dst, vec_or(load_128_unaligned(src), ff000000)); + + dst += 4; + src += 4; + w -= 4; + } + + while (w) + { + *dst++ = (*src++) | 0xff000000; + w--; + } + + return iter->buffer; +} + +static uint32_t * +vmx_fetch_a8 (pixman_iter_t *iter, const uint32_t *mask) +{ + int w = iter->width; + uint32_t *dst = iter->buffer; + uint8_t *src = iter->bits; + vector unsigned int vmx0, vmx1, vmx2, vmx3, vmx4, vmx5, vmx6; + + iter->bits += iter->stride; + + while (w && (((uintptr_t)dst) & 15)) + { + *dst++ = *(src++) << 24; + w--; + } + + while (w >= 16) + { + vmx0 = load_128_unaligned((uint32_t *) src); + + unpack_128_2x128((vector unsigned int) AVV(0), vmx0, &vmx1, &vmx2); + unpack_128_2x128_16((vector unsigned int) AVV(0), vmx1, &vmx3, &vmx4); + unpack_128_2x128_16((vector unsigned int) AVV(0), vmx2, &vmx5, &vmx6); + + save_128_aligned(dst, vmx6); + save_128_aligned((dst + 4), vmx5); + save_128_aligned((dst + 8), vmx4); + save_128_aligned((dst + 12), vmx3); + + dst += 16; + src += 16; + w -= 16; + } + + while (w) + { + *dst++ = *(src++) << 24; + w--; + } + + return iter->buffer; +} + +#define IMAGE_FLAGS \ + (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | \ + FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST) + +static const pixman_iter_info_t vmx_iters[] = +{ + { PIXMAN_x8r8g8b8, IMAGE_FLAGS, ITER_NARROW, + _pixman_iter_init_bits_stride, vmx_fetch_x8r8g8b8, NULL + }, + { PIXMAN_a8, IMAGE_FLAGS, ITER_NARROW, + _pixman_iter_init_bits_stride, vmx_fetch_a8, NULL + }, + { PIXMAN_null }, +}; + pixman_implementation_t * _pixman_implementation_create_vmx (pixman_implementation_t *fallback) { pixman_implementation_t *imp = _pixman_implementation_create (fallback, vmx_fast_paths); + /* VMX constants */ + mask_ff000000 = create_mask_32_128 (0xff000000); + mask_red = create_mask_32_128 (0x00f80000); + mask_green = create_mask_32_128 (0x0000fc00); + mask_blue = create_mask_32_128 (0x000000f8); + mask_565_fix_rb = create_mask_32_128 (0x00e000e0); + mask_565_fix_g = create_mask_32_128 (0x0000c000); + /* Set up function pointers */ imp->combine_32[PIXMAN_OP_OVER] = vmx_combine_over_u; @@ -1643,5 +3151,9 @@ _pixman_implementation_create_vmx (pixman_implementation_t *fallback) imp->combine_32_ca[PIXMAN_OP_XOR] = vmx_combine_xor_ca; imp->combine_32_ca[PIXMAN_OP_ADD] = vmx_combine_add_ca; + imp->fill = vmx_fill; + + imp->iter_info = vmx_iters; + return imp; } diff --git a/gfx/cairo/libpixman/src/pixman-x64-mmx-emulation.h b/gfx/cairo/libpixman/src/pixman-x64-mmx-emulation.h deleted file mode 100644 index 378019cf27..0000000000 --- a/gfx/cairo/libpixman/src/pixman-x64-mmx-emulation.h +++ /dev/null @@ -1,263 +0,0 @@ -#ifndef MMX_X64_H_INCLUDED -#define MMX_X64_H_INCLUDED - -/* Implementation of x64 MMX substitition functions, before - * pixman is reimplemented not to use __m64 type on Visual C++ - * - * Copyright (C)2009 by George Yohng - * Released in public domain. - */ - -#include <intrin.h> - -#define M64C(a) (*(const __m64 *)(&a)) -#define M64U(a) (*(const unsigned long long *)(&a)) - -__inline __m64 -_m_from_int (int a) -{ - long long i64 = a; - - return M64C (i64); -} - -__inline __m64 -_mm_setzero_si64 () -{ - long long i64 = 0; - - return M64C (i64); -} - -__inline __m64 -_mm_set_pi32 (int i1, int i0) -{ - unsigned long long i64 = ((unsigned)i0) + (((unsigned long long)(unsigned)i1) << 32); - - return M64C (i64); -} - -__inline void -_m_empty () -{ -} - -__inline __m64 -_mm_set1_pi16 (short w) -{ - unsigned long long i64 = ((unsigned long long)(unsigned short)(w)) * 0x0001000100010001ULL; - - return M64C (i64); -} - -__inline int -_m_to_int (__m64 m) -{ - return m.m64_i32[0]; -} - -__inline __m64 -_mm_movepi64_pi64 (__m128i a) -{ - return M64C (a.m128i_i64[0]); -} - -__inline __m64 -_m_pand (__m64 a, __m64 b) -{ - unsigned long long i64 = M64U (a) & M64U (b); - - return M64C (i64); -} - -__inline __m64 -_m_por (__m64 a, __m64 b) -{ - unsigned long long i64 = M64U (a) | M64U (b); - - return M64C (i64); -} - -__inline __m64 -_m_pxor (__m64 a, __m64 b) -{ - unsigned long long i64 = M64U (a) ^ M64U (b); - - return M64C (i64); -} - -__inline __m64 -_m_pmulhuw (__m64 a, __m64 b) /* unoptimized */ -{ - unsigned short d[4] = - { - (unsigned short)((((unsigned)a.m64_u16[0]) * b.m64_u16[0]) >> 16), - (unsigned short)((((unsigned)a.m64_u16[1]) * b.m64_u16[1]) >> 16), - (unsigned short)((((unsigned)a.m64_u16[2]) * b.m64_u16[2]) >> 16), - (unsigned short)((((unsigned)a.m64_u16[3]) * b.m64_u16[3]) >> 16) - }; - - return M64C (d[0]); -} - -__inline __m64 -_m_pmullw2 (__m64 a, __m64 b) /* unoptimized */ -{ - unsigned short d[4] = - { - (unsigned short)((((unsigned)a.m64_u16[0]) * b.m64_u16[0])), - (unsigned short)((((unsigned)a.m64_u16[1]) * b.m64_u16[1])), - (unsigned short)((((unsigned)a.m64_u16[2]) * b.m64_u16[2])), - (unsigned short)((((unsigned)a.m64_u16[3]) * b.m64_u16[3])) - }; - - return M64C (d[0]); -} - -__inline __m64 -_m_pmullw (__m64 a, __m64 b) /* unoptimized */ -{ - unsigned long long x = - ((unsigned long long)(unsigned short)((((unsigned)a.m64_u16[0]) * b.m64_u16[0]))) + - (((unsigned long long)(unsigned short)((((unsigned)a.m64_u16[1]) * b.m64_u16[1]))) << 16) + - (((unsigned long long)(unsigned short)((((unsigned)a.m64_u16[2]) * b.m64_u16[2]))) << 32) + - (((unsigned long long)(unsigned short)((((unsigned)a.m64_u16[3]) * b.m64_u16[3]))) << 48); - - return M64C (x); -} - -__inline __m64 -_m_paddusb (__m64 a, __m64 b) /* unoptimized */ -{ - unsigned long long x = (M64U (a) & 0x00FF00FF00FF00FFULL) + - (M64U (b) & 0x00FF00FF00FF00FFULL); - - unsigned long long y = ((M64U (a) >> 8) & 0x00FF00FF00FF00FFULL) + - ((M64U (b) >> 8) & 0x00FF00FF00FF00FFULL); - - x |= ((x & 0xFF00FF00FF00FF00ULL) >> 8) * 0xFF; - y |= ((y & 0xFF00FF00FF00FF00ULL) >> 8) * 0xFF; - - x = (x & 0x00FF00FF00FF00FFULL) | ((y & 0x00FF00FF00FF00FFULL) << 8); - - return M64C (x); -} - -__inline __m64 -_m_paddusw (__m64 a, __m64 b) /* unoptimized */ -{ - unsigned long long x = (M64U (a) & 0x0000FFFF0000FFFFULL) + - (M64U (b) & 0x0000FFFF0000FFFFULL); - - unsigned long long y = ((M64U (a) >> 16) & 0x0000FFFF0000FFFFULL) + - ((M64U (b) >> 16) & 0x0000FFFF0000FFFFULL); - - x |= ((x & 0xFFFF0000FFFF0000) >> 16) * 0xFFFF; - y |= ((y & 0xFFFF0000FFFF0000) >> 16) * 0xFFFF; - - x = (x & 0x0000FFFF0000FFFFULL) | ((y & 0x0000FFFF0000FFFFULL) << 16); - - return M64C (x); -} - -__inline __m64 -_m_pshufw (__m64 a, int n) /* unoptimized */ -{ - unsigned short d[4] = - { - a.m64_u16[n & 3], - a.m64_u16[(n >> 2) & 3], - a.m64_u16[(n >> 4) & 3], - a.m64_u16[(n >> 6) & 3] - }; - - return M64C (d[0]); -} - -__inline unsigned char -sat16 (unsigned short d) -{ - if (d > 0xFF) return 0xFF; - else return d & 0xFF; -} - -__inline __m64 -_m_packuswb (__m64 m1, __m64 m2) /* unoptimized */ -{ - unsigned char d[8] = - { - sat16 (m1.m64_u16[0]), - sat16 (m1.m64_u16[1]), - sat16 (m1.m64_u16[2]), - sat16 (m1.m64_u16[3]), - sat16 (m2.m64_u16[0]), - sat16 (m2.m64_u16[1]), - sat16 (m2.m64_u16[2]), - sat16 (m2.m64_u16[3]) - }; - - return M64C (d[0]); -} - -__inline __m64 _m_punpcklbw (__m64 m1, __m64 m2) /* unoptimized */ -{ - unsigned char d[8] = - { - m1.m64_u8[0], - m2.m64_u8[0], - m1.m64_u8[1], - m2.m64_u8[1], - m1.m64_u8[2], - m2.m64_u8[2], - m1.m64_u8[3], - m2.m64_u8[3], - }; - - return M64C (d[0]); -} - -__inline __m64 _m_punpckhbw (__m64 m1, __m64 m2) /* unoptimized */ -{ - unsigned char d[8] = - { - m1.m64_u8[4], - m2.m64_u8[4], - m1.m64_u8[5], - m2.m64_u8[5], - m1.m64_u8[6], - m2.m64_u8[6], - m1.m64_u8[7], - m2.m64_u8[7], - }; - - return M64C (d[0]); -} - -__inline __m64 _m_psrlwi (__m64 a, int n) /* unoptimized */ -{ - unsigned short d[4] = - { - a.m64_u16[0] >> n, - a.m64_u16[1] >> n, - a.m64_u16[2] >> n, - a.m64_u16[3] >> n - }; - - return M64C (d[0]); -} - -__inline __m64 _m_psrlqi (__m64 m, int n) -{ - unsigned long long x = M64U (m) >> n; - - return M64C (x); -} - -__inline __m64 _m_psllqi (__m64 m, int n) -{ - unsigned long long x = M64U (m) << n; - - return M64C (x); -} - -#endif /* MMX_X64_H_INCLUDED */ diff --git a/gfx/cairo/libpixman/src/pixman-x86.c b/gfx/cairo/libpixman/src/pixman-x86.c index feea23e790..2c702e5c3b 100644 --- a/gfx/cairo/libpixman/src/pixman-x86.c +++ b/gfx/cairo/libpixman/src/pixman-x86.c @@ -25,7 +25,7 @@ #include "pixman-private.h" -#if defined(USE_X86_MMX) || defined (USE_SSE2) +#if defined(USE_X86_MMX) || defined (USE_SSE2) || defined (USE_SSSE3) /* The CPU detection code needs to be in a file not compiled with * "-mmmx -msse", as gcc would generate CMOV instructions otherwise @@ -39,7 +39,8 @@ typedef enum X86_MMX_EXTENSIONS = (1 << 1), X86_SSE = (1 << 2) | X86_MMX_EXTENSIONS, X86_SSE2 = (1 << 3), - X86_CMOV = (1 << 4) + X86_CMOV = (1 << 4), + X86_SSSE3 = (1 << 5) } cpu_features_t; #ifdef HAVE_GETISAX @@ -64,6 +65,8 @@ detect_cpu_features (void) features |= X86_SSE; if (result & AV_386_SSE2) features |= X86_SSE2; + if (result & AV_386_SSSE3) + features |= X86_SSSE3; } return features; @@ -171,6 +174,8 @@ detect_cpu_features (void) features |= X86_SSE; if (d & (1 << 26)) features |= X86_SSE2; + if (c & (1 << 9)) + features |= X86_SSSE3; /* Check for AMD specific features */ if ((features & X86_MMX) && !(features & X86_SSE)) @@ -186,6 +191,7 @@ detect_cpu_features (void) memcpy (vendor + 8, &c, 4); if (strcmp (vendor, "AuthenticAMD") == 0 || + strcmp (vendor, "HygonGenuine") == 0 || strcmp (vendor, "Geode by NSC") == 0) { pixman_cpuid (0x80000000, &a, &b, &c, &d); @@ -226,6 +232,7 @@ _pixman_x86_get_implementations (pixman_implementation_t *imp) { #define MMX_BITS (X86_MMX | X86_MMX_EXTENSIONS) #define SSE2_BITS (X86_MMX | X86_MMX_EXTENSIONS | X86_SSE | X86_SSE2) +#define SSSE3_BITS (X86_SSE | X86_SSE2 | X86_SSSE3) #ifdef USE_X86_MMX if (!_pixman_disabled ("mmx") && have_feature (MMX_BITS)) @@ -237,5 +244,10 @@ _pixman_x86_get_implementations (pixman_implementation_t *imp) imp = _pixman_implementation_create_sse2 (imp); #endif +#ifdef USE_SSSE3 + if (!_pixman_disabled ("ssse3") && have_feature (SSSE3_BITS)) + imp = _pixman_implementation_create_ssse3 (imp); +#endif + return imp; } diff --git a/gfx/cairo/libpixman/src/pixman.c b/gfx/cairo/libpixman/src/pixman.c index 184f0c4e6a..c09b528083 100644 --- a/gfx/cairo/libpixman/src/pixman.c +++ b/gfx/cairo/libpixman/src/pixman.c @@ -325,18 +325,20 @@ _pixman_compute_composite_region32 (pixman_region32_t * region, return TRUE; } -typedef struct +typedef struct box_48_16 box_48_16_t; + +struct box_48_16 { - pixman_fixed_48_16_t x1; - pixman_fixed_48_16_t y1; - pixman_fixed_48_16_t x2; - pixman_fixed_48_16_t y2; -} box_48_16_t; + pixman_fixed_48_16_t x1; + pixman_fixed_48_16_t y1; + pixman_fixed_48_16_t x2; + pixman_fixed_48_16_t y2; +}; static pixman_bool_t -compute_transformed_extents (pixman_transform_t *transform, +compute_transformed_extents (pixman_transform_t *transform, const pixman_box32_t *extents, - box_48_16_t *transformed) + box_48_16_t *transformed) { pixman_fixed_48_16_t tx1, ty1, tx2, ty2; pixman_fixed_t x1, y1, x2, y2; @@ -495,21 +497,12 @@ analyze_extent (pixman_image_t *image, if (!compute_transformed_extents (transform, extents, &transformed)) return FALSE; - /* Expand the source area by a tiny bit so account of different rounding that - * may happen during sampling. Note that (8 * pixman_fixed_e) is very far from - * 0.5 so this won't cause the area computed to be overly pessimistic. - */ - transformed.x1 -= 8 * pixman_fixed_e; - transformed.y1 -= 8 * pixman_fixed_e; - transformed.x2 += 8 * pixman_fixed_e; - transformed.y2 += 8 * pixman_fixed_e; - if (image->common.type == BITS) { - if (pixman_fixed_to_int (transformed.x1) >= 0 && - pixman_fixed_to_int (transformed.y1) >= 0 && - pixman_fixed_to_int (transformed.x2) < image->bits.width && - pixman_fixed_to_int (transformed.y2) < image->bits.height) + if (pixman_fixed_to_int (transformed.x1 - pixman_fixed_e) >= 0 && + pixman_fixed_to_int (transformed.y1 - pixman_fixed_e) >= 0 && + pixman_fixed_to_int (transformed.x2 - pixman_fixed_e) < image->bits.width && + pixman_fixed_to_int (transformed.y2 - pixman_fixed_e) < image->bits.height) { *flags |= FAST_PATH_SAMPLES_COVER_CLIP_NEAREST; } @@ -605,7 +598,7 @@ pixman_image_composite32 (pixman_op_t op, else { mask_format = PIXMAN_null; - info.mask_flags = FAST_PATH_IS_OPAQUE; + info.mask_flags = FAST_PATH_IS_OPAQUE | FAST_PATH_NO_ALPHA_MAP; } dest_format = dest->common.extended_format_code; @@ -784,6 +777,11 @@ color_to_pixel (const pixman_color_t *color, { uint32_t c = color_to_uint32 (color); + if (PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_RGBA_FLOAT) + { + return FALSE; + } + if (!(format == PIXMAN_a8r8g8b8 || format == PIXMAN_x8r8g8b8 || format == PIXMAN_a8b8g8r8 || diff --git a/gfx/cairo/libpixman/src/pixman.h b/gfx/cairo/libpixman/src/pixman.h index 893adc50e0..66bc9abb1d 100644 --- a/gfx/cairo/libpixman/src/pixman.h +++ b/gfx/cairo/libpixman/src/pixman.h @@ -73,7 +73,6 @@ SOFTWARE. #include "cairo/pixman-rename.h" #endif - #include <pixman-version.h> #ifdef __cplusplus @@ -94,6 +93,18 @@ PIXMAN_BEGIN_DECLS #if defined (_SVR4) || defined (SVR4) || defined (__OpenBSD__) || defined (_sgi) || defined (__sun) || defined (sun) || defined (__digital__) || defined (__HP_cc) # include <inttypes.h> +/* VS 2010 (_MSC_VER 1600) has stdint.h */ +#elif defined (_MSC_VER) && _MSC_VER < 1600 +typedef __int8 int8_t; +typedef unsigned __int8 uint8_t; +typedef __int16 int16_t; +typedef unsigned __int16 uint16_t; +typedef __int32 int32_t; +typedef unsigned __int32 uint32_t; +typedef __int64 int64_t; +typedef unsigned __int64 uint64_t; +#elif defined (_AIX) +# include <sys/inttypes.h> #else # include <stdint.h> #endif @@ -120,7 +131,7 @@ typedef pixman_fixed_16_16_t pixman_fixed_t; #define pixman_fixed_1_minus_e (pixman_fixed_1 - pixman_fixed_e) #define pixman_fixed_minus_1 (pixman_int_to_fixed(-1)) #define pixman_fixed_to_int(f) ((int) ((f) >> 16)) -#define pixman_int_to_fixed(i) ((pixman_fixed_t) ((i) << 16)) +#define pixman_int_to_fixed(i) ((pixman_fixed_t) ((uint32_t) (i) << 16)) #define pixman_fixed_to_double(f) (double) ((f) / (double) pixman_fixed_1) #define pixman_double_to_fixed(d) ((pixman_fixed_t) ((d) * 65536.0)) #define pixman_fixed_frac(f) ((f) & pixman_fixed_1_minus_e) @@ -177,42 +188,73 @@ struct pixman_transform struct pixman_box16; typedef union pixman_image pixman_image_t; +PIXMAN_API void pixman_transform_init_identity (struct pixman_transform *matrix); + +PIXMAN_API pixman_bool_t pixman_transform_point_3d (const struct pixman_transform *transform, struct pixman_vector *vector); + +PIXMAN_API pixman_bool_t pixman_transform_point (const struct pixman_transform *transform, struct pixman_vector *vector); + +PIXMAN_API pixman_bool_t pixman_transform_multiply (struct pixman_transform *dst, const struct pixman_transform *l, const struct pixman_transform *r); + +PIXMAN_API void pixman_transform_init_scale (struct pixman_transform *t, pixman_fixed_t sx, pixman_fixed_t sy); + +PIXMAN_API pixman_bool_t pixman_transform_scale (struct pixman_transform *forward, struct pixman_transform *reverse, pixman_fixed_t sx, pixman_fixed_t sy); + +PIXMAN_API void pixman_transform_init_rotate (struct pixman_transform *t, pixman_fixed_t cos, pixman_fixed_t sin); + +PIXMAN_API pixman_bool_t pixman_transform_rotate (struct pixman_transform *forward, struct pixman_transform *reverse, pixman_fixed_t c, pixman_fixed_t s); + +PIXMAN_API void pixman_transform_init_translate (struct pixman_transform *t, pixman_fixed_t tx, pixman_fixed_t ty); + +PIXMAN_API pixman_bool_t pixman_transform_translate (struct pixman_transform *forward, struct pixman_transform *reverse, pixman_fixed_t tx, pixman_fixed_t ty); + +PIXMAN_API pixman_bool_t pixman_transform_bounds (const struct pixman_transform *matrix, struct pixman_box16 *b); + +PIXMAN_API pixman_bool_t pixman_transform_invert (struct pixman_transform *dst, const struct pixman_transform *src); + +PIXMAN_API pixman_bool_t pixman_transform_is_identity (const struct pixman_transform *t); + +PIXMAN_API pixman_bool_t pixman_transform_is_scale (const struct pixman_transform *t); + +PIXMAN_API pixman_bool_t pixman_transform_is_int_translate (const struct pixman_transform *t); + +PIXMAN_API pixman_bool_t pixman_transform_is_inverse (const struct pixman_transform *a, const struct pixman_transform *b); @@ -232,42 +274,70 @@ struct pixman_f_transform double m[3][3]; }; + +PIXMAN_API pixman_bool_t pixman_transform_from_pixman_f_transform (struct pixman_transform *t, const struct pixman_f_transform *ft); + +PIXMAN_API void pixman_f_transform_from_pixman_transform (struct pixman_f_transform *ft, const struct pixman_transform *t); + +PIXMAN_API pixman_bool_t pixman_f_transform_invert (struct pixman_f_transform *dst, const struct pixman_f_transform *src); + +PIXMAN_API pixman_bool_t pixman_f_transform_point (const struct pixman_f_transform *t, struct pixman_f_vector *v); + +PIXMAN_API void pixman_f_transform_point_3d (const struct pixman_f_transform *t, struct pixman_f_vector *v); + +PIXMAN_API void pixman_f_transform_multiply (struct pixman_f_transform *dst, const struct pixman_f_transform *l, const struct pixman_f_transform *r); + +PIXMAN_API void pixman_f_transform_init_scale (struct pixman_f_transform *t, double sx, double sy); + +PIXMAN_API pixman_bool_t pixman_f_transform_scale (struct pixman_f_transform *forward, struct pixman_f_transform *reverse, double sx, double sy); + +PIXMAN_API void pixman_f_transform_init_rotate (struct pixman_f_transform *t, double cos, double sin); + +PIXMAN_API pixman_bool_t pixman_f_transform_rotate (struct pixman_f_transform *forward, struct pixman_f_transform *reverse, double c, double s); + +PIXMAN_API void pixman_f_transform_init_translate (struct pixman_f_transform *t, double tx, double ty); + +PIXMAN_API pixman_bool_t pixman_f_transform_translate (struct pixman_f_transform *forward, struct pixman_f_transform *reverse, double tx, double ty); + +PIXMAN_API pixman_bool_t pixman_f_transform_bounds (const struct pixman_f_transform *t, struct pixman_box16 *b); + +PIXMAN_API void pixman_f_transform_init_identity (struct pixman_f_transform *t); typedef enum @@ -280,6 +350,16 @@ typedef enum typedef enum { + PIXMAN_DITHER_NONE, + PIXMAN_DITHER_FAST, + PIXMAN_DITHER_GOOD, + PIXMAN_DITHER_BEST, + PIXMAN_DITHER_ORDERED_BAYER_8, + PIXMAN_DITHER_ORDERED_BLUE_NOISE_64, +} pixman_dither_t; + +typedef enum +{ PIXMAN_FILTER_FAST, PIXMAN_FILTER_GOOD, PIXMAN_FILTER_BEST, @@ -416,73 +496,120 @@ typedef enum /* This function exists only to make it possible to preserve * the X ABI - it should go away at first opportunity. */ +PIXMAN_API void pixman_region_set_static_pointers (pixman_box16_t *empty_box, pixman_region16_data_t *empty_data, pixman_region16_data_t *broken_data); /* creation/destruction */ +PIXMAN_API void pixman_region_init (pixman_region16_t *region); + +PIXMAN_API void pixman_region_init_rect (pixman_region16_t *region, int x, int y, unsigned int width, unsigned int height); + +PIXMAN_API pixman_bool_t pixman_region_init_rects (pixman_region16_t *region, const pixman_box16_t *boxes, int count); + +PIXMAN_API void pixman_region_init_with_extents (pixman_region16_t *region, pixman_box16_t *extents); + +PIXMAN_API void pixman_region_init_from_image (pixman_region16_t *region, pixman_image_t *image); + +PIXMAN_API void pixman_region_fini (pixman_region16_t *region); /* manipulation */ +PIXMAN_API void pixman_region_translate (pixman_region16_t *region, int x, int y); + +PIXMAN_API pixman_bool_t pixman_region_copy (pixman_region16_t *dest, pixman_region16_t *source); + +PIXMAN_API pixman_bool_t pixman_region_intersect (pixman_region16_t *new_reg, pixman_region16_t *reg1, pixman_region16_t *reg2); + +PIXMAN_API pixman_bool_t pixman_region_union (pixman_region16_t *new_reg, pixman_region16_t *reg1, pixman_region16_t *reg2); + +PIXMAN_API pixman_bool_t pixman_region_union_rect (pixman_region16_t *dest, pixman_region16_t *source, int x, int y, unsigned int width, unsigned int height); + +PIXMAN_API pixman_bool_t pixman_region_intersect_rect (pixman_region16_t *dest, pixman_region16_t *source, int x, int y, unsigned int width, unsigned int height); + +PIXMAN_API pixman_bool_t pixman_region_subtract (pixman_region16_t *reg_d, pixman_region16_t *reg_m, pixman_region16_t *reg_s); + +PIXMAN_API pixman_bool_t pixman_region_inverse (pixman_region16_t *new_reg, pixman_region16_t *reg1, pixman_box16_t *inv_rect); + +PIXMAN_API pixman_bool_t pixman_region_contains_point (pixman_region16_t *region, int x, int y, pixman_box16_t *box); + +PIXMAN_API pixman_region_overlap_t pixman_region_contains_rectangle (pixman_region16_t *region, pixman_box16_t *prect); + +PIXMAN_API pixman_bool_t pixman_region_not_empty (pixman_region16_t *region); + +PIXMAN_API pixman_box16_t * pixman_region_extents (pixman_region16_t *region); + +PIXMAN_API int pixman_region_n_rects (pixman_region16_t *region); + +PIXMAN_API pixman_box16_t * pixman_region_rectangles (pixman_region16_t *region, int *n_rects); + +PIXMAN_API pixman_bool_t pixman_region_equal (pixman_region16_t *region1, pixman_region16_t *region2); + +PIXMAN_API pixman_bool_t pixman_region_selfcheck (pixman_region16_t *region); + +PIXMAN_API void pixman_region_reset (pixman_region16_t *region, pixman_box16_t *box); + +PIXMAN_API void pixman_region_clear (pixman_region16_t *region); /* * 32 bit regions @@ -516,72 +643,119 @@ struct pixman_region32 }; /* creation/destruction */ +PIXMAN_API void pixman_region32_init (pixman_region32_t *region); + +PIXMAN_API void pixman_region32_init_rect (pixman_region32_t *region, int x, int y, unsigned int width, unsigned int height); + +PIXMAN_API pixman_bool_t pixman_region32_init_rects (pixman_region32_t *region, const pixman_box32_t *boxes, int count); + +PIXMAN_API void pixman_region32_init_with_extents (pixman_region32_t *region, pixman_box32_t *extents); + +PIXMAN_API void pixman_region32_init_from_image (pixman_region32_t *region, pixman_image_t *image); + +PIXMAN_API void pixman_region32_fini (pixman_region32_t *region); /* manipulation */ +PIXMAN_API void pixman_region32_translate (pixman_region32_t *region, int x, int y); + +PIXMAN_API pixman_bool_t pixman_region32_copy (pixman_region32_t *dest, pixman_region32_t *source); + +PIXMAN_API pixman_bool_t pixman_region32_intersect (pixman_region32_t *new_reg, pixman_region32_t *reg1, pixman_region32_t *reg2); + +PIXMAN_API pixman_bool_t pixman_region32_union (pixman_region32_t *new_reg, pixman_region32_t *reg1, pixman_region32_t *reg2); + +PIXMAN_API pixman_bool_t pixman_region32_intersect_rect (pixman_region32_t *dest, pixman_region32_t *source, int x, int y, unsigned int width, unsigned int height); + +PIXMAN_API pixman_bool_t pixman_region32_union_rect (pixman_region32_t *dest, pixman_region32_t *source, int x, int y, unsigned int width, unsigned int height); + +PIXMAN_API pixman_bool_t pixman_region32_subtract (pixman_region32_t *reg_d, pixman_region32_t *reg_m, pixman_region32_t *reg_s); + +PIXMAN_API pixman_bool_t pixman_region32_inverse (pixman_region32_t *new_reg, pixman_region32_t *reg1, pixman_box32_t *inv_rect); + +PIXMAN_API pixman_bool_t pixman_region32_contains_point (pixman_region32_t *region, int x, int y, pixman_box32_t *box); + +PIXMAN_API pixman_region_overlap_t pixman_region32_contains_rectangle (pixman_region32_t *region, pixman_box32_t *prect); + +PIXMAN_API pixman_bool_t pixman_region32_not_empty (pixman_region32_t *region); + +PIXMAN_API pixman_box32_t * pixman_region32_extents (pixman_region32_t *region); + +PIXMAN_API int pixman_region32_n_rects (pixman_region32_t *region); + +PIXMAN_API pixman_box32_t * pixman_region32_rectangles (pixman_region32_t *region, int *n_rects); + +PIXMAN_API pixman_bool_t pixman_region32_equal (pixman_region32_t *region1, pixman_region32_t *region2); + +PIXMAN_API pixman_bool_t pixman_region32_selfcheck (pixman_region32_t *region); + +PIXMAN_API void pixman_region32_reset (pixman_region32_t *region, pixman_box32_t *box); + +PIXMAN_API void pixman_region32_clear (pixman_region32_t *region); /* Copy / Fill / Misc */ +PIXMAN_API pixman_bool_t pixman_blt (uint32_t *src_bits, uint32_t *dst_bits, int src_stride, @@ -594,6 +768,8 @@ pixman_bool_t pixman_blt (uint32_t *src_bits, int dest_y, int width, int height); + +PIXMAN_API pixman_bool_t pixman_fill (uint32_t *bits, int stride, int bpp, @@ -603,7 +779,11 @@ pixman_bool_t pixman_fill (uint32_t *bits, int height, uint32_t _xor); + +PIXMAN_API int pixman_version (void); + +PIXMAN_API const char* pixman_version_string (void); /* @@ -647,12 +827,24 @@ struct pixman_indexed ((g) << 4) | \ ((b))) -#define PIXMAN_FORMAT_BPP(f) (((f) >> 24) ) -#define PIXMAN_FORMAT_TYPE(f) (((f) >> 16) & 0xff) -#define PIXMAN_FORMAT_A(f) (((f) >> 12) & 0x0f) -#define PIXMAN_FORMAT_R(f) (((f) >> 8) & 0x0f) -#define PIXMAN_FORMAT_G(f) (((f) >> 4) & 0x0f) -#define PIXMAN_FORMAT_B(f) (((f) ) & 0x0f) +#define PIXMAN_FORMAT_BYTE(bpp,type,a,r,g,b) \ + (((bpp >> 3) << 24) | \ + (3 << 22) | ((type) << 16) | \ + ((a >> 3) << 12) | \ + ((r >> 3) << 8) | \ + ((g >> 3) << 4) | \ + ((b >> 3))) + +#define PIXMAN_FORMAT_RESHIFT(val, ofs, num) \ + (((val >> (ofs)) & ((1 << (num)) - 1)) << ((val >> 22) & 3)) + +#define PIXMAN_FORMAT_BPP(f) PIXMAN_FORMAT_RESHIFT(f, 24, 8) +#define PIXMAN_FORMAT_SHIFT(f) ((uint32_t)((f >> 22) & 3)) +#define PIXMAN_FORMAT_TYPE(f) (((f) >> 16) & 0x3f) +#define PIXMAN_FORMAT_A(f) PIXMAN_FORMAT_RESHIFT(f, 12, 4) +#define PIXMAN_FORMAT_R(f) PIXMAN_FORMAT_RESHIFT(f, 8, 4) +#define PIXMAN_FORMAT_G(f) PIXMAN_FORMAT_RESHIFT(f, 4, 4) +#define PIXMAN_FORMAT_B(f) PIXMAN_FORMAT_RESHIFT(f, 0, 4) #define PIXMAN_FORMAT_RGB(f) (((f) ) & 0xfff) #define PIXMAN_FORMAT_VIS(f) (((f) ) & 0xffff) #define PIXMAN_FORMAT_DEPTH(f) (PIXMAN_FORMAT_A(f) + \ @@ -671,15 +863,22 @@ struct pixman_indexed #define PIXMAN_TYPE_BGRA 8 #define PIXMAN_TYPE_RGBA 9 #define PIXMAN_TYPE_ARGB_SRGB 10 +#define PIXMAN_TYPE_RGBA_FLOAT 11 #define PIXMAN_FORMAT_COLOR(f) \ (PIXMAN_FORMAT_TYPE(f) == PIXMAN_TYPE_ARGB || \ PIXMAN_FORMAT_TYPE(f) == PIXMAN_TYPE_ABGR || \ PIXMAN_FORMAT_TYPE(f) == PIXMAN_TYPE_BGRA || \ - PIXMAN_FORMAT_TYPE(f) == PIXMAN_TYPE_RGBA) + PIXMAN_FORMAT_TYPE(f) == PIXMAN_TYPE_RGBA || \ + PIXMAN_FORMAT_TYPE(f) == PIXMAN_TYPE_RGBA_FLOAT) -/* 32bpp formats */ typedef enum { +/* 128bpp formats */ + PIXMAN_rgba_float = PIXMAN_FORMAT_BYTE(128,PIXMAN_TYPE_RGBA_FLOAT,32,32,32,32), +/* 96bpp formats */ + PIXMAN_rgb_float = PIXMAN_FORMAT_BYTE(96,PIXMAN_TYPE_RGBA_FLOAT,0,32,32,32), + +/* 32bpp formats */ PIXMAN_a8r8g8b8 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ARGB,8,8,8,8), PIXMAN_x8r8g8b8 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ARGB,0,8,8,8), PIXMAN_a8b8g8r8 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ABGR,8,8,8,8), @@ -750,30 +949,44 @@ typedef enum { } pixman_format_code_t; /* Querying supported format values. */ +PIXMAN_API pixman_bool_t pixman_format_supported_destination (pixman_format_code_t format); + +PIXMAN_API pixman_bool_t pixman_format_supported_source (pixman_format_code_t format); /* Constructors */ +PIXMAN_API pixman_image_t *pixman_image_create_solid_fill (const pixman_color_t *color); + +PIXMAN_API pixman_image_t *pixman_image_create_linear_gradient (const pixman_point_fixed_t *p1, const pixman_point_fixed_t *p2, const pixman_gradient_stop_t *stops, int n_stops); + +PIXMAN_API pixman_image_t *pixman_image_create_radial_gradient (const pixman_point_fixed_t *inner, const pixman_point_fixed_t *outer, pixman_fixed_t inner_radius, pixman_fixed_t outer_radius, const pixman_gradient_stop_t *stops, int n_stops); + +PIXMAN_API pixman_image_t *pixman_image_create_conical_gradient (const pixman_point_fixed_t *center, pixman_fixed_t angle, const pixman_gradient_stop_t *stops, int n_stops); + +PIXMAN_API pixman_image_t *pixman_image_create_bits (pixman_format_code_t format, int width, int height, uint32_t *bits, int rowstride_bytes); + +PIXMAN_API pixman_image_t *pixman_image_create_bits_no_clear (pixman_format_code_t format, int width, int height, @@ -781,48 +994,99 @@ pixman_image_t *pixman_image_create_bits_no_clear (pixman_format_code_t forma int rowstride_bytes); /* Destructor */ +PIXMAN_API pixman_image_t *pixman_image_ref (pixman_image_t *image); + +PIXMAN_API pixman_bool_t pixman_image_unref (pixman_image_t *image); + +PIXMAN_API void pixman_image_set_destroy_function (pixman_image_t *image, pixman_image_destroy_func_t function, void *data); + +PIXMAN_API void * pixman_image_get_destroy_data (pixman_image_t *image); /* Set properties */ +PIXMAN_API pixman_bool_t pixman_image_set_clip_region (pixman_image_t *image, pixman_region16_t *region); + +PIXMAN_API pixman_bool_t pixman_image_set_clip_region32 (pixman_image_t *image, pixman_region32_t *region); + +PIXMAN_API void pixman_image_set_has_client_clip (pixman_image_t *image, pixman_bool_t clien_clip); + +PIXMAN_API pixman_bool_t pixman_image_set_transform (pixman_image_t *image, const pixman_transform_t *transform); + +PIXMAN_API void pixman_image_set_repeat (pixman_image_t *image, pixman_repeat_t repeat); + +PIXMAN_API +void pixman_image_set_dither (pixman_image_t *image, + pixman_dither_t dither); + +PIXMAN_API +void pixman_image_set_dither_offset (pixman_image_t *image, + int offset_x, + int offset_y); + +PIXMAN_API pixman_bool_t pixman_image_set_filter (pixman_image_t *image, pixman_filter_t filter, const pixman_fixed_t *filter_params, int n_filter_params); + +PIXMAN_API void pixman_image_set_source_clipping (pixman_image_t *image, pixman_bool_t source_clipping); + +PIXMAN_API void pixman_image_set_alpha_map (pixman_image_t *image, pixman_image_t *alpha_map, int16_t x, int16_t y); + +PIXMAN_API void pixman_image_set_component_alpha (pixman_image_t *image, pixman_bool_t component_alpha); + +PIXMAN_API pixman_bool_t pixman_image_get_component_alpha (pixman_image_t *image); + +PIXMAN_API void pixman_image_set_accessors (pixman_image_t *image, pixman_read_memory_func_t read_func, pixman_write_memory_func_t write_func); + +PIXMAN_API void pixman_image_set_indexed (pixman_image_t *image, const pixman_indexed_t *indexed); + +PIXMAN_API uint32_t *pixman_image_get_data (pixman_image_t *image); + +PIXMAN_API int pixman_image_get_width (pixman_image_t *image); + +PIXMAN_API int pixman_image_get_height (pixman_image_t *image); + +PIXMAN_API int pixman_image_get_stride (pixman_image_t *image); /* in bytes */ + +PIXMAN_API int pixman_image_get_depth (pixman_image_t *image); + +PIXMAN_API pixman_format_code_t pixman_image_get_format (pixman_image_t *image); typedef enum @@ -840,6 +1104,7 @@ typedef enum /* Create the parameter list for a SEPARABLE_CONVOLUTION filter * with the given kernels and scale parameters. */ +PIXMAN_API pixman_fixed_t * pixman_filter_create_separable_convolution (int *n_values, pixman_fixed_t scale_x, @@ -851,11 +1116,15 @@ pixman_filter_create_separable_convolution (int *n_values, int subsample_bits_x, int subsample_bits_y); + +PIXMAN_API pixman_bool_t pixman_image_fill_rectangles (pixman_op_t op, pixman_image_t *image, const pixman_color_t *color, int n_rects, const pixman_rectangle16_t *rects); + +PIXMAN_API pixman_bool_t pixman_image_fill_boxes (pixman_op_t op, pixman_image_t *dest, const pixman_color_t *color, @@ -863,6 +1132,7 @@ pixman_bool_t pixman_image_fill_boxes (pixman_op_t const pixman_box32_t *boxes); /* Composite */ +PIXMAN_API pixman_bool_t pixman_compute_composite_region (pixman_region16_t *region, pixman_image_t *src_image, pixman_image_t *mask_image, @@ -875,6 +1145,8 @@ pixman_bool_t pixman_compute_composite_region (pixman_region16_t *region, int16_t dest_y, uint16_t width, uint16_t height); + +PIXMAN_API void pixman_image_composite (pixman_op_t op, pixman_image_t *src, pixman_image_t *mask, @@ -887,6 +1159,8 @@ void pixman_image_composite (pixman_op_t op, int16_t dest_y, uint16_t width, uint16_t height); + +PIXMAN_API void pixman_image_composite32 (pixman_op_t op, pixman_image_t *src, pixman_image_t *mask, @@ -918,6 +1192,7 @@ void pixman_image_composite32 (pixman_op_t op, * Since 0.21.2, pixman doesn't do these workarounds anymore, so now this * function is a no-op. */ +PIXMAN_API void pixman_disable_out_of_bounds_workaround (void); /* @@ -930,29 +1205,48 @@ typedef struct const void *glyph; } pixman_glyph_t; +PIXMAN_API pixman_glyph_cache_t *pixman_glyph_cache_create (void); + +PIXMAN_API void pixman_glyph_cache_destroy (pixman_glyph_cache_t *cache); + +PIXMAN_API void pixman_glyph_cache_freeze (pixman_glyph_cache_t *cache); + +PIXMAN_API void pixman_glyph_cache_thaw (pixman_glyph_cache_t *cache); + +PIXMAN_API const void * pixman_glyph_cache_lookup (pixman_glyph_cache_t *cache, void *font_key, void *glyph_key); + +PIXMAN_API const void * pixman_glyph_cache_insert (pixman_glyph_cache_t *cache, void *font_key, void *glyph_key, int origin_x, int origin_y, pixman_image_t *glyph_image); + +PIXMAN_API void pixman_glyph_cache_remove (pixman_glyph_cache_t *cache, void *font_key, void *glyph_key); + +PIXMAN_API void pixman_glyph_get_extents (pixman_glyph_cache_t *cache, int n_glyphs, pixman_glyph_t *glyphs, pixman_box32_t *extents); + +PIXMAN_API pixman_format_code_t pixman_glyph_get_mask_format (pixman_glyph_cache_t *cache, int n_glyphs, const pixman_glyph_t *glyphs); + +PIXMAN_API void pixman_composite_glyphs (pixman_op_t op, pixman_image_t *src, pixman_image_t *dest, @@ -968,6 +1262,8 @@ void pixman_composite_glyphs (pixman_op_t op, pixman_glyph_cache_t *cache, int n_glyphs, const pixman_glyph_t *glyphs); + +PIXMAN_API void pixman_composite_glyphs_no_mask (pixman_op_t op, pixman_image_t *src, pixman_image_t *dest, @@ -1023,7 +1319,7 @@ struct pixman_triangle #define pixman_trapezoid_valid(t) \ ((t)->left.p1.y != (t)->left.p2.y && \ (t)->right.p1.y != (t)->right.p2.y && \ - (int) ((t)->bottom - (t)->top) > 0) + ((t)->bottom > (t)->top)) struct pixman_span_fix { @@ -1035,12 +1331,19 @@ struct pixman_trap pixman_span_fix_t top, bot; }; +PIXMAN_API pixman_fixed_t pixman_sample_ceil_y (pixman_fixed_t y, int bpp); + +PIXMAN_API pixman_fixed_t pixman_sample_floor_y (pixman_fixed_t y, int bpp); + +PIXMAN_API void pixman_edge_step (pixman_edge_t *e, int n); + +PIXMAN_API void pixman_edge_init (pixman_edge_t *e, int bpp, pixman_fixed_t y_start, @@ -1048,31 +1351,43 @@ void pixman_edge_init (pixman_edge_t *e, pixman_fixed_t y_top, pixman_fixed_t x_bot, pixman_fixed_t y_bot); + +PIXMAN_API void pixman_line_fixed_edge_init (pixman_edge_t *e, int bpp, pixman_fixed_t y, const pixman_line_fixed_t *line, int x_off, int y_off); + +PIXMAN_API void pixman_rasterize_edges (pixman_image_t *image, pixman_edge_t *l, pixman_edge_t *r, pixman_fixed_t t, pixman_fixed_t b); + +PIXMAN_API void pixman_add_traps (pixman_image_t *image, int16_t x_off, int16_t y_off, int ntrap, const pixman_trap_t *traps); + +PIXMAN_API void pixman_add_trapezoids (pixman_image_t *image, int16_t x_off, int y_off, int ntraps, const pixman_trapezoid_t *traps); + +PIXMAN_API void pixman_rasterize_trapezoid (pixman_image_t *image, const pixman_trapezoid_t *trap, int x_off, int y_off); + +PIXMAN_API void pixman_composite_trapezoids (pixman_op_t op, pixman_image_t * src, pixman_image_t * dst, @@ -1083,6 +1398,8 @@ void pixman_composite_trapezoids (pixman_op_t op, int y_dst, int n_traps, const pixman_trapezoid_t * traps); + +PIXMAN_API void pixman_composite_triangles (pixman_op_t op, pixman_image_t * src, pixman_image_t * dst, @@ -1093,6 +1410,8 @@ void pixman_composite_triangles (pixman_op_t op, int y_dst, int n_tris, const pixman_triangle_t * tris); + +PIXMAN_API void pixman_add_triangles (pixman_image_t *image, int32_t x_off, int32_t y_off, diff --git a/gfx/cairo/libpixman/src/refactor b/gfx/cairo/libpixman/src/refactor deleted file mode 100644 index 52fceab175..0000000000 --- a/gfx/cairo/libpixman/src/refactor +++ /dev/null @@ -1,478 +0,0 @@ -Roadmap - -- Move all the fetchers etc. into pixman-image to make pixman-compose.c - less intimidating. - - DONE - -- Make combiners for unified alpha take a mask argument. That way - we won't need two separate paths for unified vs component in the - general compositing code. - - DONE, except that the Altivec code needs to be updated. Luca is - looking into that. - -- Delete separate 'unified alpha' path - - DONE - -- Split images into their own files - - DONE - -- Split the gradient walker code out into its own file - - DONE - -- Add scanline getters per image - - DONE - -- Generic 64 bit fetcher - - DONE - -- Split fast path tables into their respective architecture dependent - files. - -See "Render Algorithm" below for rationale - -Images will eventually have these virtual functions: - - get_scanline() - get_scanline_wide() - get_pixel() - get_pixel_wide() - get_untransformed_pixel() - get_untransformed_pixel_wide() - get_unfiltered_pixel() - get_unfiltered_pixel_wide() - - store_scanline() - store_scanline_wide() - -1. - -Initially we will just have get_scanline() and get_scanline_wide(); -these will be based on the ones in pixman-compose. Hopefully this will -reduce the complexity in pixman_composite_rect_general(). - -Note that there is access considerations - the compose function is -being compiled twice. - - -2. - -Split image types into their own source files. Export noop virtual -reinit() call. Call this whenever a property of the image changes. - - -3. - -Split the get_scanline() call into smaller functions that are -initialized by the reinit() call. - -The Render Algorithm: - (first repeat, then filter, then transform, then clip) - -Starting from a destination pixel (x, y), do - - 1 x = x - xDst + xSrc - y = y - yDst + ySrc - - 2 reject pixel that is outside the clip - - This treats clipping as something that happens after - transformation, which I think is correct for client clips. For - hierarchy clips it is wrong, but who really cares? Without - GraphicsExposes hierarchy clips are basically irrelevant. Yes, - you could imagine cases where the pixels of a subwindow of a - redirected, transformed window should be treated as - transparent. I don't really care - - Basically, I think the render spec should say that pixels that - are unavailable due to the hierarcy have undefined content, - and that GraphicsExposes are not generated. Ie., basically - that using non-redirected windows as sources is fail. This is - at least consistent with the current implementation and we can - update the spec later if someone makes it work. - - The implication for render is that it should stop passing the - hierarchy clip to pixman. In pixman, if a souce image has a - clip it should be used in computing the composite region and - nowhere else, regardless of what "has_client_clip" says. The - default should be for there to not be any clip. - - I would really like to get rid of the client clip as well for - source images, but unfortunately there is at least one - application in the wild that uses them. - - 3 Transform pixel: (x, y) = T(x, y) - - 4 Call p = GetUntransformedPixel (x, y) - - 5 If the image has an alpha map, then - - Call GetUntransformedPixel (x, y) on the alpha map - - add resulting alpha channel to p - - return p - - Where GetUnTransformedPixel is: - - 6 switch (filter) - { - case NEAREST: - return GetUnfilteredPixel (x, y); - break; - - case BILINEAR: - return GetUnfilteredPixel (...) // 4 times - break; - - case CONVOLUTION: - return GetUnfilteredPixel (...) // as many times as necessary. - break; - } - - Where GetUnfilteredPixel (x, y) is - - 7 switch (repeat) - { - case REPEAT_NORMAL: - case REPEAT_PAD: - case REPEAT_REFLECT: - // adjust x, y as appropriate - break; - - case REPEAT_NONE: - if (x, y) is outside image bounds - return 0; - break; - } - - return GetRawPixel(x, y) - - Where GetRawPixel (x, y) is - - 8 Compute the pixel in question, depending on image type. - -For gradients, repeat has a totally different meaning, so -UnfilteredPixel() and RawPixel() must be the same function so that -gradients can do their own repeat algorithm. - -So, the GetRawPixel - - for bits must deal with repeats - for gradients must deal with repeats (differently) - for solids, should ignore repeats. - - for polygons, when we add them, either ignore repeats or do - something similar to bits (in which case, we may want an extra - layer of indirection to modify the coordinates). - -It is then possible to build things like "get scanline" or "get tile" on -top of this. In the simplest case, just repeatedly calling GetPixel() -would work, but specialized get_scanline()s or get_tile()s could be -plugged in for common cases. - -By not plugging anything in for images with access functions, we only -have to compile the pixel functions twice, not the scanline functions. - -And we can get rid of fetchers for the bizarre formats that no one -uses. Such as b2g3r3 etc. r1g2b1? Seriously? It is also worth -considering a generic format based pixel fetcher for these edge cases. - -Since the actual routines depend on the image attributes, the images -must be notified when those change and update their function pointers -appropriately. So there should probably be a virtual function called -(* reinit) or something like that. - -There will also be wide fetchers for both pixels and lines. The line -fetcher will just call the wide pixel fetcher. The wide pixel fetcher -will just call expand, except for 10 bit formats. - -Rendering pipeline: - -Drawable: - 0. if (picture has alpha map) - 0.1. Position alpha map according to the alpha_x/alpha_y - 0.2. Where the two drawables intersect, the alpha channel - Replace the alpha channel of source with the one - from the alpha map. Replacement only takes place - in the intersection of the two drawables' geometries. - 1. Repeat the drawable according to the repeat attribute - 2. Reconstruct a continuous image according to the filter - 3. Transform according to the transform attribute - 4. Position image such that src_x, src_y is over dst_x, dst_y - 5. Sample once per destination pixel - 6. Clip. If a pixel is not within the source clip, then no - compositing takes place at that pixel. (Ie., it's *not* - treated as 0). - - Sampling a drawable: - - - If the channel does not have an alpha channel, the pixels in it - are treated as opaque. - - Note on reconstruction: - - - The top left pixel has coordinates (0.5, 0.5) and pixels are - spaced 1 apart. - -Gradient: - 1. Unless gradient type is conical, repeat the underlying (0, 1) - gradient according to the repeat attribute - 2. Integrate the gradient across the plane according to type. - 3. Transform according to transform attribute - 4. Position gradient - 5. Sample once per destination pixel. - 6. Clip - -Solid Fill: - 1. Repeat has no effect - 2. Image is already continuous and defined for the entire plane - 3. Transform has no effect - 4. Positioning has no effect - 5. Sample once per destination pixel. - 6. Clip - -Polygon: - 1. Repeat has no effect - 2. Image is already continuous and defined on the whole plane - 3. Transform according to transform attribute - 4. Position image - 5. Supersample 15x17 per destination pixel. - 6. Clip - -Possibly interesting additions: - - More general transformations, such as warping, or general - shading. - - - Shader image where a function is called to generate the - pixel (ie., uploading assembly code). - - - Resampling kernels - - In principle the polygon image uses a 15x17 box filter for - resampling. If we allow general resampling filters, then we - get all the various antialiasing types for free. - - Bilinear downsampling looks terrible and could be much - improved by a resampling filter. NEAREST reconstruction - combined with a box resampling filter is what GdkPixbuf - does, I believe. - - Useful for high frequency gradients as well. - - (Note that the difference between a reconstruction and a - resampling filter is mainly where in the pipeline they - occur. High quality resampling should use a correctly - oriented kernel so it should happen after transformation. - - An implementation can transform the resampling kernel and - convolve it with the reconstruction if it so desires, but it - will need to deal with the fact that the resampling kernel - will not necessarily be pixel aligned. - - "Output kernels" - - One could imagine doing the resampling after compositing, - ie., for each destination pixel sample each source image 16 - times, then composite those subpixels individually, then - finally apply a kernel. - - However, this is effectively the same as full screen - antialiasing, which is a simpler way to think about it. So - resampling kernels may make sense for individual images, but - not as a post-compositing step. - - Fullscreen AA is inefficient without chained compositing - though. Consider an (image scaled up to oversample size IN - some polygon) scaled down to screen size. With the current - implementation, there will be a huge temporary. With chained - compositing, the whole thing ends up being equivalent to the - output kernel from above. - - - Color space conversion - - The complete model here is that each surface has a color - space associated with it and that the compositing operation - also has one associated with it. Note also that gradients - should have associcated colorspaces. - - - Dithering - - If people dither something that is already dithered, it will - look terrible, but don't do that, then. (Dithering happens - after resampling if at all - what is the relationship - with color spaces? Presumably dithering should happen in linear - intensity space). - - - Floating point surfaces, 16, 32 and possibly 64 bit per - channel. - - Maybe crack: - - - Glyph polygons - - If glyphs could be given as polygons, they could be - positioned and rasterized more accurately. The glyph - structure would need subpixel positioning though. - - - Luminance vs. coverage for the alpha channel - - Whether the alpha channel should be interpreted as luminance - modulation or as coverage (intensity modulation). This is a - bit of a departure from the rendering model though. It could - also be considered whether it should be possible to have - both channels in the same drawable. - - - Alternative for component alpha - - - Set component-alpha on the output image. - - - This means each of the components are sampled - independently and composited in the corresponding - channel only. - - - Have 3 x oversampled mask - - - Scale it down by 3 horizontally, with [ 1/3, 1/3, 1/3 ] - resampling filter. - - Is this equivalent to just using a component alpha mask? - - Incompatible changes: - - - Gradients could be specified with premultiplied colors. (You - can use a mask to get things like gradients from solid red to - transparent red. - -Refactoring pixman - -The pixman code is not particularly nice to put it mildly. Among the -issues are - -- inconsistent naming style (fb vs Fb, camelCase vs - underscore_naming). Sometimes there is even inconsistency *within* - one name. - - fetchProc32 ACCESS(pixman_fetchProcForPicture32) - - may be one of the uglies names ever created. - - coding style: - use the one from cairo except that pixman uses this brace style: - - while (blah) - { - } - - Format do while like this: - - do - { - - } - while (...); - -- PIXMAN_COMPOSITE_RECT_GENERAL() is horribly complex - -- switch case logic in pixman-access.c - - Instead it would be better to just store function pointers in the - image objects themselves, - - get_pixel() - get_scanline() - -- Much of the scanline fetching code is for formats that no one - ever uses. a2r2g2b2 anyone? - - It would probably be worthwhile having a generic fetcher for any - pixman format whatsoever. - -- Code related to particular image types should be split into individual - files. - - pixman-bits-image.c - pixman-linear-gradient-image.c - pixman-radial-gradient-image.c - pixman-solid-image.c - -- Fast path code should be split into files based on architecture: - - pixman-mmx-fastpath.c - pixman-sse2-fastpath.c - pixman-c-fastpath.c - - etc. - - Each of these files should then export a fastpath table, which would - be declared in pixman-private.h. This should allow us to get rid - of the pixman-mmx.h files. - - The fast path table should describe each fast path. Ie there should - be bitfields indicating what things the fast path can handle, rather than - like now where it is only allowed to take one format per src/mask/dest. Ie., - - { - FAST_a8r8g8b8 | FAST_x8r8g8b8, - FAST_null, - FAST_x8r8g8b8, - FAST_repeat_normal | FAST_repeat_none, - the_fast_path - } - -There should then be *one* file that implements pixman_image_composite(). -This should do this: - - optimize_operator(); - - convert 1x1 repeat to solid (actually this should be done at - image creation time). - - is there a useful fastpath? - -There should be a file called pixman-cpu.c that contains all the -architecture specific stuff to detect what CPU features we have. - -Issues that must be kept in mind: - - - we need accessor code to be preserved - - - maybe there should be a "store_scanline" too? - - Is this sufficient? - - We should preserve the optimization where the - compositing happens directly in the destination - whenever possible. - - - It should be possible to create GPU samplers from the - images. - -The "horizontal" classification should be a bit in the image, the -"vertical" classification should just happen inside the gradient -file. Note though that - - (a) these will change if the tranformation/repeat changes. - - (b) at the moment the optimization for linear gradients - takes the source rectangle into account. Presumably - this is to also optimize the case where the gradient - is close enough to horizontal? - -Who is responsible for repeats? In principle it should be the scanline -fetch. Right now NORMAL repeats are handled by walk_composite_region() -while other repeats are handled by the scanline code. - - -(Random note on filtering: do you filter before or after -transformation? Hardware is going to filter after transformation; -this is also what pixman does currently). It's not completely clear -what filtering *after* transformation means. One thing that might look -good would be to do *supersampling*, ie., compute multiple subpixels -per destination pixel, then average them together. diff --git a/gfx/cairo/libpixman/src/solaris-hwcap.mapfile b/gfx/cairo/libpixman/src/solaris-hwcap.mapfile new file mode 100644 index 0000000000..87efce1e34 --- /dev/null +++ b/gfx/cairo/libpixman/src/solaris-hwcap.mapfile @@ -0,0 +1,30 @@ +############################################################################### +# +# Copyright 2009, Oracle and/or its affiliates. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. +# +############################################################################### +# +# Override the linker's detection of CMOV/MMX/SSE instructions so this +# library isn't flagged as only usable on CPU's with those ISA's, since it +# checks at runtime for availability before calling them + +hwcap_1 = V0x0 FPU OVERRIDE; |