Commit 3f59dfc9 by wangguotao

1.更新libyuv版本 2.支持输出sdi hd数据

parent e90327f6
This source diff could not be displayed because it is too large. You can view the blob instead.
...@@ -65,7 +65,7 @@ ...@@ -65,7 +65,7 @@
<ClCompile> <ClCompile>
<AdditionalIncludeDirectories>.\ThirdParty\stb_image\;.\ThirdParty\rabbitmq\include;.\ThirdParty\ffmpeg-master-latest-win64-gpl-shared\include;.\ThirdParty\libyuv\include;.\ThirdParty\OpenCV\include;.\ThirdParty\NewTek\include;.\ThirdParty\BlackmagicDesign\include;.\include;%(AdditionalIncludeDirectories);$(Qt_INCLUDEPATH_)</AdditionalIncludeDirectories> <AdditionalIncludeDirectories>.\ThirdParty\stb_image\;.\ThirdParty\rabbitmq\include;.\ThirdParty\ffmpeg-master-latest-win64-gpl-shared\include;.\ThirdParty\libyuv\include;.\ThirdParty\OpenCV\include;.\ThirdParty\NewTek\include;.\ThirdParty\BlackmagicDesign\include;.\include;%(AdditionalIncludeDirectories);$(Qt_INCLUDEPATH_)</AdditionalIncludeDirectories>
<ShowIncludes>false</ShowIncludes> <ShowIncludes>false</ShowIncludes>
<PreprocessorDefinitions>WIN32;_WINSOCKAPI_;HAVE_CONFIG_H;%(PreprocessorDefinitions)</PreprocessorDefinitions> <PreprocessorDefinitions>WIN32;_WINSOCKAPI_;HAVE_CONFIG_H;LIBYUV_USING_SHARED_LIBRARY;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<OpenMPSupport>true</OpenMPSupport> <OpenMPSupport>true</OpenMPSupport>
</ClCompile> </ClCompile>
<Link> <Link>
...@@ -131,6 +131,30 @@ ...@@ -131,6 +131,30 @@
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<ClInclude Include="include\BlackMagicDesign\DeckLinkDeviceDiscovery.h" /> <ClInclude Include="include\BlackMagicDesign\DeckLinkDeviceDiscovery.h" />
<QtMoc Include="include\Threads\VideoScaleThread.h" />
<ClInclude Include="ThirdParty\libyuv\include\libyuv\basic_types.h" />
<ClInclude Include="ThirdParty\libyuv\include\libyuv\compare.h" />
<ClInclude Include="ThirdParty\libyuv\include\libyuv\compare_row.h" />
<ClInclude Include="ThirdParty\libyuv\include\libyuv\convert.h" />
<ClInclude Include="ThirdParty\libyuv\include\libyuv\convert_argb.h" />
<ClInclude Include="ThirdParty\libyuv\include\libyuv\convert_from.h" />
<ClInclude Include="ThirdParty\libyuv\include\libyuv\convert_from_argb.h" />
<ClInclude Include="ThirdParty\libyuv\include\libyuv\cpu_id.h" />
<ClInclude Include="ThirdParty\libyuv\include\libyuv\loongson_intrinsics.h" />
<ClInclude Include="ThirdParty\libyuv\include\libyuv\macros_msa.h" />
<ClInclude Include="ThirdParty\libyuv\include\libyuv\mjpeg_decoder.h" />
<ClInclude Include="ThirdParty\libyuv\include\libyuv\planar_functions.h" />
<ClInclude Include="ThirdParty\libyuv\include\libyuv\rotate.h" />
<ClInclude Include="ThirdParty\libyuv\include\libyuv\rotate_argb.h" />
<ClInclude Include="ThirdParty\libyuv\include\libyuv\rotate_row.h" />
<ClInclude Include="ThirdParty\libyuv\include\libyuv\row.h" />
<ClInclude Include="ThirdParty\libyuv\include\libyuv\scale.h" />
<ClInclude Include="ThirdParty\libyuv\include\libyuv\scale_argb.h" />
<ClInclude Include="ThirdParty\libyuv\include\libyuv\scale_rgb.h" />
<ClInclude Include="ThirdParty\libyuv\include\libyuv\scale_row.h" />
<ClInclude Include="ThirdParty\libyuv\include\libyuv\scale_uv.h" />
<ClInclude Include="ThirdParty\libyuv\include\libyuv\version.h" />
<ClInclude Include="ThirdParty\libyuv\include\libyuv\video_common.h" />
<QtMoc Include="include\Threads\ConsumerMqThread.h" /> <QtMoc Include="include\Threads\ConsumerMqThread.h" />
<QtMoc Include="include\Threads\ProcessMaskThread.h" /> <QtMoc Include="include\Threads\ProcessMaskThread.h" />
<QtMoc Include="include\Threads\ReplayThread.h" /> <QtMoc Include="include\Threads\ReplayThread.h" />
...@@ -157,27 +181,6 @@ ...@@ -157,27 +181,6 @@
<ClInclude Include="include\Utils\SSEFunction.h" /> <ClInclude Include="include\Utils\SSEFunction.h" />
<ClInclude Include="include\Utils\yuv4k.h" /> <ClInclude Include="include\Utils\yuv4k.h" />
<ClInclude Include="ThirdParty\libyuv\include\libyuv.h" /> <ClInclude Include="ThirdParty\libyuv\include\libyuv.h" />
<ClInclude Include="ThirdParty\libyuv\include\libyuv\basic_types.h" />
<ClInclude Include="ThirdParty\libyuv\include\libyuv\compare.h" />
<ClInclude Include="ThirdParty\libyuv\include\libyuv\compare_row.h" />
<ClInclude Include="ThirdParty\libyuv\include\libyuv\convert.h" />
<ClInclude Include="ThirdParty\libyuv\include\libyuv\convert_argb.h" />
<ClInclude Include="ThirdParty\libyuv\include\libyuv\convert_from.h" />
<ClInclude Include="ThirdParty\libyuv\include\libyuv\convert_from_argb.h" />
<ClInclude Include="ThirdParty\libyuv\include\libyuv\cpu_id.h" />
<ClInclude Include="ThirdParty\libyuv\include\libyuv\macros_msa.h" />
<ClInclude Include="ThirdParty\libyuv\include\libyuv\mjpeg_decoder.h" />
<ClInclude Include="ThirdParty\libyuv\include\libyuv\planar_functions.h" />
<ClInclude Include="ThirdParty\libyuv\include\libyuv\rotate.h" />
<ClInclude Include="ThirdParty\libyuv\include\libyuv\rotate_argb.h" />
<ClInclude Include="ThirdParty\libyuv\include\libyuv\rotate_row.h" />
<ClInclude Include="ThirdParty\libyuv\include\libyuv\row.h" />
<ClInclude Include="ThirdParty\libyuv\include\libyuv\scale.h" />
<ClInclude Include="ThirdParty\libyuv\include\libyuv\scale_argb.h" />
<ClInclude Include="ThirdParty\libyuv\include\libyuv\scale_row.h" />
<ClInclude Include="ThirdParty\libyuv\include\libyuv\scale_uv.h" />
<ClInclude Include="ThirdParty\libyuv\include\libyuv\version.h" />
<ClInclude Include="ThirdParty\libyuv\include\libyuv\video_common.h" />
<QtMoc Include="include\BlackMagicDesign\DeckLinkInputDevice.h" /> <QtMoc Include="include\BlackMagicDesign\DeckLinkInputDevice.h" />
<QtMoc Include="include\BlackMagicDesign\DeckLinkOutputDevice.h" /> <QtMoc Include="include\BlackMagicDesign\DeckLinkOutputDevice.h" />
<QtMoc Include="include\BlackMagicDesign\DeckLinkOutputPage.h" /> <QtMoc Include="include\BlackMagicDesign\DeckLinkOutputPage.h" />
...@@ -241,6 +244,7 @@ ...@@ -241,6 +244,7 @@
<ClCompile Include="src\Threads\DecodeMaskThread.cpp" /> <ClCompile Include="src\Threads\DecodeMaskThread.cpp" />
<ClCompile Include="src\Threads\ProcessMaskThread.cpp" /> <ClCompile Include="src\Threads\ProcessMaskThread.cpp" />
<ClCompile Include="src\Threads\ReplayThread.cpp" /> <ClCompile Include="src\Threads\ReplayThread.cpp" />
<ClCompile Include="src\Threads\VideoScaleThread.cpp" />
<ClCompile Include="src\Threads\ZoomThread.cpp" /> <ClCompile Include="src\Threads\ZoomThread.cpp" />
<ClCompile Include="src\TimePlus.cpp" /> <ClCompile Include="src\TimePlus.cpp" />
<ClCompile Include="src\NDI\NDIOutputThread.cpp" /> <ClCompile Include="src\NDI\NDIOutputThread.cpp" />
......
...@@ -140,6 +140,9 @@ ...@@ -140,6 +140,9 @@
<QtMoc Include="include\BlackMagicDesign\OpenFile.h"> <QtMoc Include="include\BlackMagicDesign\OpenFile.h">
<Filter>Header Files\BlackMagicDesign</Filter> <Filter>Header Files\BlackMagicDesign</Filter>
</QtMoc> </QtMoc>
<QtMoc Include="include\Threads\VideoScaleThread.h">
<Filter>Header Files\Threads</Filter>
</QtMoc>
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<ClInclude Include="include\stdafx.h"> <ClInclude Include="include\stdafx.h">
...@@ -202,6 +205,57 @@ ...@@ -202,6 +205,57 @@
<ClInclude Include="ThirdParty\libyuv\include\libyuv.h"> <ClInclude Include="ThirdParty\libyuv\include\libyuv.h">
<Filter>ThirdParty\libuv\include</Filter> <Filter>ThirdParty\libuv\include</Filter>
</ClInclude> </ClInclude>
<ClInclude Include="include\Utils\AudioConvert.h">
<Filter>Header Files\Utils</Filter>
</ClInclude>
<ClInclude Include="include\Utils\Base64.h">
<Filter>Header Files\Utils</Filter>
</ClInclude>
<ClInclude Include="include\Utils\VideoScale.h">
<Filter>Header Files\Utils</Filter>
</ClInclude>
<ClInclude Include="include\Utils\MaskBuffer.h">
<Filter>Header Files\Utils</Filter>
</ClInclude>
<ClInclude Include="include\Utils\Algorithm.h">
<Filter>Header Files\Utils</Filter>
</ClInclude>
<ClInclude Include="include\Record\Record.h">
<Filter>Header Files\Record</Filter>
</ClInclude>
<ClInclude Include="include\Record\RecordStore.h">
<Filter>Header Files\Record</Filter>
</ClInclude>
<ClInclude Include="include\Record\RecordThread.h">
<Filter>Header Files\Record</Filter>
</ClInclude>
<ClInclude Include="include\Utils\SafeMap.h">
<Filter>Header Files\Utils</Filter>
</ClInclude>
<ClInclude Include="include\Utils\SSEFunction.h">
<Filter>Header Files\Utils</Filter>
</ClInclude>
<ClInclude Include="include\Utils\Memory4k.h">
<Filter>Header Files\Utils</Filter>
</ClInclude>
<ClInclude Include="include\Utils\yuv4k.h">
<Filter>Header Files\Utils</Filter>
</ClInclude>
<ClInclude Include="include\Utils\SampleDeque.h">
<Filter>Header Files\Utils</Filter>
</ClInclude>
<ClInclude Include="include\Utils\Settings.h">
<Filter>Header Files\Utils</Filter>
</ClInclude>
<ClInclude Include="include\Utils\FastMemcpy_Avx.h">
<Filter>Header Files\Utils</Filter>
</ClInclude>
<ClInclude Include="include\Utils\FastMemcpy.h">
<Filter>Header Files\Utils</Filter>
</ClInclude>
<ClInclude Include="include\Utils\Computer.h">
<Filter>Header Files\Utils</Filter>
</ClInclude>
<ClInclude Include="ThirdParty\libyuv\include\libyuv\video_common.h"> <ClInclude Include="ThirdParty\libyuv\include\libyuv\video_common.h">
<Filter>ThirdParty\libuv\include\include</Filter> <Filter>ThirdParty\libuv\include\include</Filter>
</ClInclude> </ClInclude>
...@@ -214,6 +268,9 @@ ...@@ -214,6 +268,9 @@
<ClInclude Include="ThirdParty\libyuv\include\libyuv\scale_row.h"> <ClInclude Include="ThirdParty\libyuv\include\libyuv\scale_row.h">
<Filter>ThirdParty\libuv\include\include</Filter> <Filter>ThirdParty\libuv\include\include</Filter>
</ClInclude> </ClInclude>
<ClInclude Include="ThirdParty\libyuv\include\libyuv\scale_rgb.h">
<Filter>ThirdParty\libuv\include\include</Filter>
</ClInclude>
<ClInclude Include="ThirdParty\libyuv\include\libyuv\scale_argb.h"> <ClInclude Include="ThirdParty\libyuv\include\libyuv\scale_argb.h">
<Filter>ThirdParty\libuv\include\include</Filter> <Filter>ThirdParty\libuv\include\include</Filter>
</ClInclude> </ClInclude>
...@@ -241,6 +298,9 @@ ...@@ -241,6 +298,9 @@
<ClInclude Include="ThirdParty\libyuv\include\libyuv\macros_msa.h"> <ClInclude Include="ThirdParty\libyuv\include\libyuv\macros_msa.h">
<Filter>ThirdParty\libuv\include\include</Filter> <Filter>ThirdParty\libuv\include\include</Filter>
</ClInclude> </ClInclude>
<ClInclude Include="ThirdParty\libyuv\include\libyuv\loongson_intrinsics.h">
<Filter>ThirdParty\libuv\include\include</Filter>
</ClInclude>
<ClInclude Include="ThirdParty\libyuv\include\libyuv\cpu_id.h"> <ClInclude Include="ThirdParty\libyuv\include\libyuv\cpu_id.h">
<Filter>ThirdParty\libuv\include\include</Filter> <Filter>ThirdParty\libuv\include\include</Filter>
</ClInclude> </ClInclude>
...@@ -265,57 +325,6 @@ ...@@ -265,57 +325,6 @@
<ClInclude Include="ThirdParty\libyuv\include\libyuv\basic_types.h"> <ClInclude Include="ThirdParty\libyuv\include\libyuv\basic_types.h">
<Filter>ThirdParty\libuv\include\include</Filter> <Filter>ThirdParty\libuv\include\include</Filter>
</ClInclude> </ClInclude>
<ClInclude Include="include\Utils\AudioConvert.h">
<Filter>Header Files\Utils</Filter>
</ClInclude>
<ClInclude Include="include\Utils\Base64.h">
<Filter>Header Files\Utils</Filter>
</ClInclude>
<ClInclude Include="include\Utils\VideoScale.h">
<Filter>Header Files\Utils</Filter>
</ClInclude>
<ClInclude Include="include\Utils\MaskBuffer.h">
<Filter>Header Files\Utils</Filter>
</ClInclude>
<ClInclude Include="include\Utils\Algorithm.h">
<Filter>Header Files\Utils</Filter>
</ClInclude>
<ClInclude Include="include\Record\Record.h">
<Filter>Header Files\Record</Filter>
</ClInclude>
<ClInclude Include="include\Record\RecordStore.h">
<Filter>Header Files\Record</Filter>
</ClInclude>
<ClInclude Include="include\Record\RecordThread.h">
<Filter>Header Files\Record</Filter>
</ClInclude>
<ClInclude Include="include\Utils\SafeMap.h">
<Filter>Header Files\Utils</Filter>
</ClInclude>
<ClInclude Include="include\Utils\SSEFunction.h">
<Filter>Header Files\Utils</Filter>
</ClInclude>
<ClInclude Include="include\Utils\Memory4k.h">
<Filter>Header Files\Utils</Filter>
</ClInclude>
<ClInclude Include="include\Utils\yuv4k.h">
<Filter>Header Files\Utils</Filter>
</ClInclude>
<ClInclude Include="include\Utils\SampleDeque.h">
<Filter>Header Files\Utils</Filter>
</ClInclude>
<ClInclude Include="include\Utils\Settings.h">
<Filter>Header Files\Utils</Filter>
</ClInclude>
<ClInclude Include="include\Utils\FastMemcpy_Avx.h">
<Filter>Header Files\Utils</Filter>
</ClInclude>
<ClInclude Include="include\Utils\FastMemcpy.h">
<Filter>Header Files\Utils</Filter>
</ClInclude>
<ClInclude Include="include\Utils\Computer.h">
<Filter>Header Files\Utils</Filter>
</ClInclude>
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<QtRcc Include="Form\MomentaMedia.qrc"> <QtRcc Include="Form\MomentaMedia.qrc">
...@@ -454,6 +463,9 @@ ...@@ -454,6 +463,9 @@
<ClCompile Include="src\BlackMagicDesign\OpenFile.cpp"> <ClCompile Include="src\BlackMagicDesign\OpenFile.cpp">
<Filter>Source Files\BlackMagicDesign</Filter> <Filter>Source Files\BlackMagicDesign</Filter>
</ClCompile> </ClCompile>
<ClCompile Include="src\Threads\VideoScaleThread.cpp">
<Filter>Source Files\Threads</Filter>
</ClCompile>
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<QtUic Include="Form\TimePlus.ui"> <QtUic Include="Form\TimePlus.ui">
......
...@@ -28,7 +28,10 @@ extern "C" { ...@@ -28,7 +28,10 @@ extern "C" {
#endif #endif
// MemorySanitizer does not support assembly code yet. http://crbug.com/344505 // MemorySanitizer does not support assembly code yet. http://crbug.com/344505
#if defined(__has_feature) #if defined(__has_feature)
#if __has_feature(memory_sanitizer) #if __has_feature(memory_sanitizer) && !defined(LIBYUV_DISABLE_NEON)
#define LIBYUV_DISABLE_NEON
#endif
#if __has_feature(memory_sanitizer) && !defined(LIBYUV_DISABLE_X86)
#define LIBYUV_DISABLE_X86 #define LIBYUV_DISABLE_X86
#endif #endif
#endif #endif
...@@ -75,8 +78,16 @@ extern "C" { ...@@ -75,8 +78,16 @@ extern "C" {
// The following are available for Neon: // The following are available for Neon:
#if !defined(LIBYUV_DISABLE_NEON) && \ #if !defined(LIBYUV_DISABLE_NEON) && \
(defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__)) (defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__))
#define HAS_SUMSQUAREERROR_NEON
#define HAS_HAMMINGDISTANCE_NEON #define HAS_HAMMINGDISTANCE_NEON
#define HAS_SUMSQUAREERROR_NEON
#endif
// The following are available for AArch64 Neon:
#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
#define HAS_HASHDJB2_NEON
#define HAS_HAMMINGDISTANCE_NEON_DOTPROD
#define HAS_SUMSQUAREERROR_NEON_DOTPROD
#endif #endif
#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) #if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
...@@ -99,6 +110,9 @@ uint32_t HammingDistance_AVX2(const uint8_t* src_a, ...@@ -99,6 +110,9 @@ uint32_t HammingDistance_AVX2(const uint8_t* src_a,
uint32_t HammingDistance_NEON(const uint8_t* src_a, uint32_t HammingDistance_NEON(const uint8_t* src_a,
const uint8_t* src_b, const uint8_t* src_b,
int count); int count);
uint32_t HammingDistance_NEON_DotProd(const uint8_t* src_a,
const uint8_t* src_b,
int count);
uint32_t HammingDistance_MSA(const uint8_t* src_a, uint32_t HammingDistance_MSA(const uint8_t* src_a,
const uint8_t* src_b, const uint8_t* src_b,
int count); int count);
...@@ -114,6 +128,9 @@ uint32_t SumSquareError_AVX2(const uint8_t* src_a, ...@@ -114,6 +128,9 @@ uint32_t SumSquareError_AVX2(const uint8_t* src_a,
uint32_t SumSquareError_NEON(const uint8_t* src_a, uint32_t SumSquareError_NEON(const uint8_t* src_a,
const uint8_t* src_b, const uint8_t* src_b,
int count); int count);
uint32_t SumSquareError_NEON_DotProd(const uint8_t* src_a,
const uint8_t* src_b,
int count);
uint32_t SumSquareError_MSA(const uint8_t* src_a, uint32_t SumSquareError_MSA(const uint8_t* src_a,
const uint8_t* src_b, const uint8_t* src_b,
int count); int count);
...@@ -121,6 +138,7 @@ uint32_t SumSquareError_MSA(const uint8_t* src_a, ...@@ -121,6 +138,7 @@ uint32_t SumSquareError_MSA(const uint8_t* src_a,
uint32_t HashDjb2_C(const uint8_t* src, int count, uint32_t seed); uint32_t HashDjb2_C(const uint8_t* src, int count, uint32_t seed);
uint32_t HashDjb2_SSE41(const uint8_t* src, int count, uint32_t seed); uint32_t HashDjb2_SSE41(const uint8_t* src, int count, uint32_t seed);
uint32_t HashDjb2_AVX2(const uint8_t* src, int count, uint32_t seed); uint32_t HashDjb2_AVX2(const uint8_t* src, int count, uint32_t seed);
uint32_t HashDjb2_NEON(const uint8_t* src, int count, uint32_t seed);
#ifdef __cplusplus #ifdef __cplusplus
} // extern "C" } // extern "C"
......
...@@ -367,6 +367,23 @@ int I212ToI422(const uint16_t* src_y, ...@@ -367,6 +367,23 @@ int I212ToI422(const uint16_t* src_y,
int width, int width,
int height); int height);
#define H212ToH420 I212ToI420
LIBYUV_API
int I212ToI420(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
#define H412ToH444 I412ToI444 #define H412ToH444 I412ToI444
LIBYUV_API LIBYUV_API
int I412ToI444(const uint16_t* src_y, int I412ToI444(const uint16_t* src_y,
...@@ -384,6 +401,23 @@ int I412ToI444(const uint16_t* src_y, ...@@ -384,6 +401,23 @@ int I412ToI444(const uint16_t* src_y,
int width, int width,
int height); int height);
#define H412ToH420 I412ToI420
LIBYUV_API
int I412ToI420(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
#define I412ToI012 I410ToI010 #define I412ToI012 I410ToI010
#define H410ToH010 I410ToI010 #define H410ToH010 I410ToI010
#define H412ToH012 I410ToI010 #define H412ToH012 I410ToI010
...@@ -751,6 +785,21 @@ int ARGBToI420(const uint8_t* src_argb, ...@@ -751,6 +785,21 @@ int ARGBToI420(const uint8_t* src_argb,
int width, int width,
int height); int height);
// Convert ARGB to I420 with Alpha
LIBYUV_API
int ARGBToI420Alpha(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
uint8_t* dst_a,
int dst_stride_a,
int width,
int height);
// BGRA little endian (argb in memory) to I420. // BGRA little endian (argb in memory) to I420.
LIBYUV_API LIBYUV_API
int BGRAToI420(const uint8_t* src_bgra, int BGRAToI420(const uint8_t* src_bgra,
......
...@@ -67,6 +67,8 @@ LIBYUV_API extern const struct YuvConstants kYvuV2020Constants; // BT.2020 full ...@@ -67,6 +67,8 @@ LIBYUV_API extern const struct YuvConstants kYvuV2020Constants; // BT.2020 full
I210ToAR30Matrix(a, b, e, f, c, d, g, h, i##VU, j, k) I210ToAR30Matrix(a, b, e, f, c, d, g, h, i##VU, j, k)
#define I410ToAB30Matrix(a, b, c, d, e, f, g, h, i, j, k) \ #define I410ToAB30Matrix(a, b, c, d, e, f, g, h, i, j, k) \
I410ToAR30Matrix(a, b, e, f, c, d, g, h, i##VU, j, k) I410ToAR30Matrix(a, b, e, f, c, d, g, h, i##VU, j, k)
#define I012ToAB30Matrix(a, b, c, d, e, f, g, h, i, j, k) \
I012ToAR30Matrix(a, b, e, f, c, d, g, h, i##VU, j, k)
#define I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, k, l, m, n) \ #define I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, k, l, m, n) \
I420AlphaToARGBMatrix(a, b, e, f, c, d, g, h, i, j, k##VU, l, m, n) I420AlphaToARGBMatrix(a, b, e, f, c, d, g, h, i, j, k##VU, l, m, n)
#define I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, k, l, m, n) \ #define I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, k, l, m, n) \
...@@ -1902,6 +1904,26 @@ int NV21ToRGB24Matrix(const uint8_t* src_y, ...@@ -1902,6 +1904,26 @@ int NV21ToRGB24Matrix(const uint8_t* src_y,
int width, int width,
int height); int height);
// Convert YUY2 to ARGB with matrix.
LIBYUV_API
int YUY2ToARGBMatrix(const uint8_t* src_yuy2,
int src_stride_yuy2,
uint8_t* dst_argb,
int dst_stride_argb,
const struct YuvConstants* yuvconstants,
int width,
int height);
// Convert UYVY to ARGB with matrix.
LIBYUV_API
int UYVYToARGBMatrix(const uint8_t* src_uyvy,
int src_stride_uyvy,
uint8_t* dst_argb,
int dst_stride_argb,
const struct YuvConstants* yuvconstants,
int width,
int height);
// Convert Android420 to ARGB with matrix. // Convert Android420 to ARGB with matrix.
LIBYUV_API LIBYUV_API
int Android420ToARGBMatrix(const uint8_t* src_y, int Android420ToARGBMatrix(const uint8_t* src_y,
......
...@@ -21,40 +21,52 @@ extern "C" { ...@@ -21,40 +21,52 @@ extern "C" {
// Internal flag to indicate cpuid requires initialization. // Internal flag to indicate cpuid requires initialization.
static const int kCpuInitialized = 0x1; static const int kCpuInitialized = 0x1;
// These flags are only valid on ARM processors. // These flags are only valid on Arm processors.
static const int kCpuHasARM = 0x2; static const int kCpuHasARM = 0x2;
static const int kCpuHasNEON = 0x4; static const int kCpuHasNEON = 0x4;
// 0x8 reserved for future ARM flag. // Leave a gap to avoid setting kCpuHasX86.
static const int kCpuHasNeonDotProd = 0x10;
static const int kCpuHasNeonI8MM = 0x20;
static const int kCpuHasSVE = 0x40;
static const int kCpuHasSVE2 = 0x80;
static const int kCpuHasSME = 0x100;
// These flags are only valid on x86 processors. // These flags are only valid on x86 processors.
static const int kCpuHasX86 = 0x10; static const int kCpuHasX86 = 0x8;
static const int kCpuHasSSE2 = 0x20; static const int kCpuHasSSE2 = 0x10;
static const int kCpuHasSSSE3 = 0x40; static const int kCpuHasSSSE3 = 0x20;
static const int kCpuHasSSE41 = 0x80; static const int kCpuHasSSE41 = 0x40;
static const int kCpuHasSSE42 = 0x100; // unused at this time. static const int kCpuHasSSE42 = 0x80;
static const int kCpuHasAVX = 0x200; static const int kCpuHasAVX = 0x100;
static const int kCpuHasAVX2 = 0x400; static const int kCpuHasAVX2 = 0x200;
static const int kCpuHasERMS = 0x800; static const int kCpuHasERMS = 0x400;
static const int kCpuHasFMA3 = 0x1000; static const int kCpuHasFMA3 = 0x800;
static const int kCpuHasF16C = 0x2000; static const int kCpuHasF16C = 0x1000;
static const int kCpuHasGFNI = 0x4000; static const int kCpuHasAVX512BW = 0x2000;
static const int kCpuHasAVX512BW = 0x8000; static const int kCpuHasAVX512VL = 0x4000;
static const int kCpuHasAVX512VL = 0x10000; static const int kCpuHasAVX512VNNI = 0x8000;
static const int kCpuHasAVX512VNNI = 0x20000; static const int kCpuHasAVX512VBMI = 0x10000;
static const int kCpuHasAVX512VBMI = 0x40000; static const int kCpuHasAVX512VBMI2 = 0x20000;
static const int kCpuHasAVX512VBMI2 = 0x80000; static const int kCpuHasAVX512VBITALG = 0x40000;
static const int kCpuHasAVX512VBITALG = 0x100000; static const int kCpuHasAVX10 = 0x80000;
static const int kCpuHasAVX512VPOPCNTDQ = 0x200000; static const int kCpuHasAVXVNNI = 0x100000;
static const int kCpuHasAVXVNNIINT8 = 0x200000;
static const int kCpuHasAMXINT8 = 0x400000;
// These flags are only valid on MIPS processors. // These flags are only valid on MIPS processors.
static const int kCpuHasMIPS = 0x400000; static const int kCpuHasMIPS = 0x800000;
static const int kCpuHasMSA = 0x800000; static const int kCpuHasMSA = 0x1000000;
// These flags are only valid on LOONGARCH processors. // These flags are only valid on LOONGARCH processors.
static const int kCpuHasLOONGARCH = 0x2000000; static const int kCpuHasLOONGARCH = 0x2000000;
static const int kCpuHasLSX = 0x4000000; static const int kCpuHasLSX = 0x4000000;
static const int kCpuHasLASX = 0x8000000; static const int kCpuHasLASX = 0x8000000;
// These flags are only valid on RISCV processors.
static const int kCpuHasRISCV = 0x10000000;
static const int kCpuHasRVV = 0x20000000;
static const int kCpuHasRVVZVFH = 0x40000000;
// Optional init function. TestCpuFlag does an auto-init. // Optional init function. TestCpuFlag does an auto-init.
// Returns cpu_info flags. // Returns cpu_info flags.
LIBYUV_API LIBYUV_API
...@@ -78,6 +90,19 @@ LIBYUV_API ...@@ -78,6 +90,19 @@ LIBYUV_API
int ArmCpuCaps(const char* cpuinfo_name); int ArmCpuCaps(const char* cpuinfo_name);
LIBYUV_API LIBYUV_API
int MipsCpuCaps(const char* cpuinfo_name); int MipsCpuCaps(const char* cpuinfo_name);
LIBYUV_API
int RiscvCpuCaps(const char* cpuinfo_name);
#ifdef __aarch64__
#if __linux__
// On Linux, parse AArch64 features from getauxval(AT_HWCAP{,2}).
LIBYUV_API
int AArch64CpuCaps(unsigned long hwcap, unsigned long hwcap2);
#else
LIBYUV_API
int AArch64CpuCaps();
#endif
#endif
// For testing, allow CPU flags to be disabled. // For testing, allow CPU flags to be disabled.
// ie MaskCpuFlags(~kCpuHasSSSE3) to disable SSSE3. // ie MaskCpuFlags(~kCpuHasSSSE3) to disable SSSE3.
......
...@@ -20,7 +20,7 @@ ...@@ -20,7 +20,7 @@
({ \ ({ \
const uint8_t* psrc_lw_m = (const uint8_t*)(psrc); \ const uint8_t* psrc_lw_m = (const uint8_t*)(psrc); \
uint32_t val_m; \ uint32_t val_m; \
asm volatile("lw %[val_m], %[psrc_lw_m] \n" \ asm("lw %[val_m], %[psrc_lw_m] \n" \
: [val_m] "=r"(val_m) \ : [val_m] "=r"(val_m) \
: [psrc_lw_m] "m"(*psrc_lw_m)); \ : [psrc_lw_m] "m"(*psrc_lw_m)); \
val_m; \ val_m; \
...@@ -31,7 +31,7 @@ ...@@ -31,7 +31,7 @@
({ \ ({ \
const uint8_t* psrc_ld_m = (const uint8_t*)(psrc); \ const uint8_t* psrc_ld_m = (const uint8_t*)(psrc); \
uint64_t val_m = 0; \ uint64_t val_m = 0; \
asm volatile("ld %[val_m], %[psrc_ld_m] \n" \ asm("ld %[val_m], %[psrc_ld_m] \n" \
: [val_m] "=r"(val_m) \ : [val_m] "=r"(val_m) \
: [psrc_ld_m] "m"(*psrc_ld_m)); \ : [psrc_ld_m] "m"(*psrc_ld_m)); \
val_m; \ val_m; \
...@@ -55,7 +55,7 @@ ...@@ -55,7 +55,7 @@
({ \ ({ \
uint8_t* pdst_sw_m = (uint8_t*)(pdst); /* NOLINT */ \ uint8_t* pdst_sw_m = (uint8_t*)(pdst); /* NOLINT */ \
uint32_t val_m = (val); \ uint32_t val_m = (val); \
asm volatile("sw %[val_m], %[pdst_sw_m] \n" \ asm("sw %[val_m], %[pdst_sw_m] \n" \
: [pdst_sw_m] "=m"(*pdst_sw_m) \ : [pdst_sw_m] "=m"(*pdst_sw_m) \
: [val_m] "r"(val_m)); \ : [val_m] "r"(val_m)); \
}) })
...@@ -65,7 +65,7 @@ ...@@ -65,7 +65,7 @@
({ \ ({ \
uint8_t* pdst_sd_m = (uint8_t*)(pdst); /* NOLINT */ \ uint8_t* pdst_sd_m = (uint8_t*)(pdst); /* NOLINT */ \
uint64_t val_m = (val); \ uint64_t val_m = (val); \
asm volatile("sd %[val_m], %[pdst_sd_m] \n" \ asm("sd %[val_m], %[pdst_sd_m] \n" \
: [pdst_sd_m] "=m"(*pdst_sd_m) \ : [pdst_sd_m] "=m"(*pdst_sd_m) \
: [val_m] "r"(val_m)); \ : [val_m] "r"(val_m)); \
}) })
...@@ -86,8 +86,7 @@ ...@@ -86,8 +86,7 @@
uint8_t* psrc_lw_m = (uint8_t*)(psrc); \ uint8_t* psrc_lw_m = (uint8_t*)(psrc); \
uint32_t val_lw_m; \ uint32_t val_lw_m; \
\ \
__asm__ volatile( \ asm("lwr %[val_lw_m], 0(%[psrc_lw_m]) \n\t" \
"lwr %[val_lw_m], 0(%[psrc_lw_m]) \n\t" \
"lwl %[val_lw_m], 3(%[psrc_lw_m]) \n\t" \ "lwl %[val_lw_m], 3(%[psrc_lw_m]) \n\t" \
\ \
: [val_lw_m] "=&r"(val_lw_m) \ : [val_lw_m] "=&r"(val_lw_m) \
...@@ -102,8 +101,7 @@ ...@@ -102,8 +101,7 @@
uint8_t* psrc_ld_m = (uint8_t*)(psrc); \ uint8_t* psrc_ld_m = (uint8_t*)(psrc); \
uint64_t val_ld_m = 0; \ uint64_t val_ld_m = 0; \
\ \
__asm__ volatile( \ asm("ldr %[val_ld_m], 0(%[psrc_ld_m]) \n\t" \
"ldr %[val_ld_m], 0(%[psrc_ld_m]) \n\t" \
"ldl %[val_ld_m], 7(%[psrc_ld_m]) \n\t" \ "ldl %[val_ld_m], 7(%[psrc_ld_m]) \n\t" \
\ \
: [val_ld_m] "=&r"(val_ld_m) \ : [val_ld_m] "=&r"(val_ld_m) \
...@@ -130,7 +128,7 @@ ...@@ -130,7 +128,7 @@
({ \ ({ \
uint8_t* pdst_sw_m = (uint8_t*)(pdst); /* NOLINT */ \ uint8_t* pdst_sw_m = (uint8_t*)(pdst); /* NOLINT */ \
uint32_t val_m = (val); \ uint32_t val_m = (val); \
asm volatile("usw %[val_m], %[pdst_sw_m] \n" \ asm("usw %[val_m], %[pdst_sw_m] \n" \
: [pdst_sw_m] "=m"(*pdst_sw_m) \ : [pdst_sw_m] "=m"(*pdst_sw_m) \
: [val_m] "r"(val_m)); \ : [val_m] "r"(val_m)); \
}) })
......
...@@ -30,7 +30,10 @@ extern "C" { ...@@ -30,7 +30,10 @@ extern "C" {
#endif #endif
// MemorySanitizer does not support assembly code yet. http://crbug.com/344505 // MemorySanitizer does not support assembly code yet. http://crbug.com/344505
#if defined(__has_feature) #if defined(__has_feature)
#if __has_feature(memory_sanitizer) #if __has_feature(memory_sanitizer) && !defined(LIBYUV_DISABLE_NEON)
#define LIBYUV_DISABLE_NEON
#endif
#if __has_feature(memory_sanitizer) && !defined(LIBYUV_DISABLE_X86)
#define LIBYUV_DISABLE_X86 #define LIBYUV_DISABLE_X86
#endif #endif
#endif #endif
...@@ -827,15 +830,6 @@ int ARGBCopyYToAlpha(const uint8_t* src_y, ...@@ -827,15 +830,6 @@ int ARGBCopyYToAlpha(const uint8_t* src_y,
int width, int width,
int height); int height);
typedef void (*ARGBBlendRow)(const uint8_t* src_argb0,
const uint8_t* src_argb1,
uint8_t* dst_argb,
int width);
// Get function to Alpha Blend ARGB pixels and store to destination.
LIBYUV_API
ARGBBlendRow GetARGBBlend();
// Alpha Blend ARGB images and store to destination. // Alpha Blend ARGB images and store to destination.
// Source is pre-multiplied by alpha using ARGBAttenuate. // Source is pre-multiplied by alpha using ARGBAttenuate.
// Alpha of destination is set to 255. // Alpha of destination is set to 255.
......
...@@ -26,9 +26,20 @@ extern "C" { ...@@ -26,9 +26,20 @@ extern "C" {
#if defined(__native_client__) #if defined(__native_client__)
#define LIBYUV_DISABLE_NEON #define LIBYUV_DISABLE_NEON
#endif #endif
// clang >= 19.0.0 required for SME
#if !defined(LIBYUV_DISABLE_SME) && defined(__clang__) && defined(__aarch64__)
#if __clang_major__ < 19
#define LIBYUV_DISABLE_SME
#endif
#endif
// MemorySanitizer does not support assembly code yet. http://crbug.com/344505 // MemorySanitizer does not support assembly code yet. http://crbug.com/344505
#if defined(__has_feature) #if defined(__has_feature)
#if __has_feature(memory_sanitizer) #if __has_feature(memory_sanitizer) && !defined(LIBYUV_DISABLE_NEON)
#define LIBYUV_DISABLE_NEON
#endif
#if __has_feature(memory_sanitizer) && !defined(LIBYUV_DISABLE_X86)
#define LIBYUV_DISABLE_X86 #define LIBYUV_DISABLE_X86
#endif #endif
#endif #endif
...@@ -42,6 +53,8 @@ extern "C" { ...@@ -42,6 +53,8 @@ extern "C" {
// The following are available for GCC 32 or 64 bit: // The following are available for GCC 32 or 64 bit:
#if !defined(LIBYUV_DISABLE_X86) && (defined(__i386__) || defined(__x86_64__)) #if !defined(LIBYUV_DISABLE_X86) && (defined(__i386__) || defined(__x86_64__))
#define HAS_TRANSPOSEWX8_SSSE3 #define HAS_TRANSPOSEWX8_SSSE3
#define HAS_TRANSPOSE4X4_32_SSE2
#define HAS_TRANSPOSE4X4_32_AVX2
#endif #endif
// The following are available for 64 bit GCC: // The following are available for 64 bit GCC:
...@@ -52,8 +65,18 @@ extern "C" { ...@@ -52,8 +65,18 @@ extern "C" {
#if !defined(LIBYUV_DISABLE_NEON) && \ #if !defined(LIBYUV_DISABLE_NEON) && \
(defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__)) (defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__))
#if defined(__aarch64__)
#define HAS_TRANSPOSEWX16_NEON
#else
#define HAS_TRANSPOSEWX8_NEON #define HAS_TRANSPOSEWX8_NEON
#endif
#define HAS_TRANSPOSEUVWX8_NEON #define HAS_TRANSPOSEUVWX8_NEON
#define HAS_TRANSPOSE4X4_32_NEON
#endif
#if !defined(LIBYUV_DISABLE_SME) && defined(__aarch64__)
#define HAS_TRANSPOSEWXH_SME
#define HAS_TRANSPOSEUVWXH_SME
#endif #endif
#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) #if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
...@@ -88,6 +111,17 @@ void TransposeWx8_NEON(const uint8_t* src, ...@@ -88,6 +111,17 @@ void TransposeWx8_NEON(const uint8_t* src,
uint8_t* dst, uint8_t* dst,
int dst_stride, int dst_stride,
int width); int width);
void TransposeWx16_NEON(const uint8_t* src,
int src_stride,
uint8_t* dst,
int dst_stride,
int width);
void TransposeWxH_SME(const uint8_t* src,
int src_stride,
uint8_t* dst,
int dst_stride,
int width,
int height);
void TransposeWx8_SSSE3(const uint8_t* src, void TransposeWx8_SSSE3(const uint8_t* src,
int src_stride, int src_stride,
uint8_t* dst, uint8_t* dst,
...@@ -114,6 +148,11 @@ void TransposeWx8_Any_NEON(const uint8_t* src, ...@@ -114,6 +148,11 @@ void TransposeWx8_Any_NEON(const uint8_t* src,
uint8_t* dst, uint8_t* dst,
int dst_stride, int dst_stride,
int width); int width);
void TransposeWx16_Any_NEON(const uint8_t* src,
int src_stride,
uint8_t* dst,
int dst_stride,
int width);
void TransposeWx8_Any_SSSE3(const uint8_t* src, void TransposeWx8_Any_SSSE3(const uint8_t* src,
int src_stride, int src_stride,
uint8_t* dst, uint8_t* dst,
...@@ -172,6 +211,14 @@ void TransposeUVWx8_NEON(const uint8_t* src, ...@@ -172,6 +211,14 @@ void TransposeUVWx8_NEON(const uint8_t* src,
uint8_t* dst_b, uint8_t* dst_b,
int dst_stride_b, int dst_stride_b,
int width); int width);
void TransposeUVWxH_SME(const uint8_t* src,
int src_stride,
uint8_t* dst_a,
int dst_stride_a,
uint8_t* dst_b,
int dst_stride_b,
int width,
int height);
void TransposeUVWx16_MSA(const uint8_t* src, void TransposeUVWx16_MSA(const uint8_t* src,
int src_stride, int src_stride,
uint8_t* dst_a, uint8_t* dst_a,
...@@ -240,14 +287,19 @@ void Transpose4x4_32_NEON(const uint8_t* src, ...@@ -240,14 +287,19 @@ void Transpose4x4_32_NEON(const uint8_t* src,
int dst_stride, int dst_stride,
int width); int width);
void Transpose4x4_32_C(const uint8_t* src, void Transpose4x4_32_SSE2(const uint8_t* src,
int src_stride, int src_stride,
uint8_t* dst, uint8_t* dst,
int dst_stride, int dst_stride,
int width); int width);
// Transpose 32 bit values (ARGB) void Transpose4x4_32_AVX2(const uint8_t* src,
void Transpose8x8_32_NEON(const uint8_t* src, int src_stride,
uint8_t* dst,
int dst_stride,
int width);
void Transpose4x4_32_C(const uint8_t* src,
int src_stride, int src_stride,
uint8_t* dst, uint8_t* dst,
int dst_stride, int dst_stride,
......
...@@ -31,7 +31,10 @@ extern "C" { ...@@ -31,7 +31,10 @@ extern "C" {
#endif #endif
// MemorySanitizer does not support assembly code yet. http://crbug.com/344505 // MemorySanitizer does not support assembly code yet. http://crbug.com/344505
#if defined(__has_feature) #if defined(__has_feature)
#if __has_feature(memory_sanitizer) #if __has_feature(memory_sanitizer) && !defined(LIBYUV_DISABLE_NEON)
#define LIBYUV_DISABLE_NEON
#endif
#if __has_feature(memory_sanitizer) && !defined(LIBYUV_DISABLE_X86)
#define LIBYUV_DISABLE_X86 #define LIBYUV_DISABLE_X86
#endif #endif
#endif #endif
...@@ -161,7 +164,6 @@ extern "C" { ...@@ -161,7 +164,6 @@ extern "C" {
#define HAS_ARGBSEPIAROW_SSSE3 #define HAS_ARGBSEPIAROW_SSSE3
#define HAS_ARGBSHADEROW_SSE2 #define HAS_ARGBSHADEROW_SSE2
#define HAS_ARGBSUBTRACTROW_SSE2 #define HAS_ARGBSUBTRACTROW_SSE2
#define HAS_ARGBUNATTENUATEROW_SSE2
#define HAS_BLENDPLANEROW_SSSE3 #define HAS_BLENDPLANEROW_SSSE3
#define HAS_COMPUTECUMULATIVESUMROW_SSE2 #define HAS_COMPUTECUMULATIVESUMROW_SSE2
#define HAS_CUMULATIVESUMTOAVERAGEROW_SSE2 #define HAS_CUMULATIVESUMTOAVERAGEROW_SSE2
...@@ -171,9 +173,6 @@ extern "C" { ...@@ -171,9 +173,6 @@ extern "C" {
#define HAS_SOBELXROW_SSE2 #define HAS_SOBELXROW_SSE2
#define HAS_SOBELXYROW_SSE2 #define HAS_SOBELXYROW_SSE2
#define HAS_SOBELYROW_SSE2 #define HAS_SOBELYROW_SSE2
#if !defined(LIBYUV_BIT_EXACT)
#define HAS_ARGBATTENUATEROW_SSSE3
#endif
// The following functions fail on gcc/clang 32 bit with fpic and framepointer. // The following functions fail on gcc/clang 32 bit with fpic and framepointer.
// caveat: clangcl uses row_win.cc which works. // caveat: clangcl uses row_win.cc which works.
...@@ -241,11 +240,7 @@ extern "C" { ...@@ -241,11 +240,7 @@ extern "C" {
#define HAS_ARGBADDROW_AVX2 #define HAS_ARGBADDROW_AVX2
#define HAS_ARGBMULTIPLYROW_AVX2 #define HAS_ARGBMULTIPLYROW_AVX2
#define HAS_ARGBSUBTRACTROW_AVX2 #define HAS_ARGBSUBTRACTROW_AVX2
#define HAS_ARGBUNATTENUATEROW_AVX2
#define HAS_BLENDPLANEROW_AVX2 #define HAS_BLENDPLANEROW_AVX2
#if !defined(LIBYUV_BIT_EXACT)
#define HAS_ARGBATTENUATEROW_AVX2
#endif
#if defined(__x86_64__) || !defined(__pic__) || defined(__clang__) || \ #if defined(__x86_64__) || !defined(__pic__) || defined(__clang__) || \
defined(_MSC_VER) defined(_MSC_VER)
...@@ -285,14 +280,15 @@ extern "C" { ...@@ -285,14 +280,15 @@ extern "C" {
#define HAS_ABGRTOAR30ROW_SSSE3 #define HAS_ABGRTOAR30ROW_SSSE3
#define HAS_ABGRTOYJROW_SSSE3 #define HAS_ABGRTOYJROW_SSSE3
#define HAS_AR64TOARGBROW_SSSE3 #define HAS_AR64TOARGBROW_SSSE3
#define HAS_ARGBATTENUATEROW_SSSE3
#define HAS_ARGBTOAB64ROW_SSSE3 #define HAS_ARGBTOAB64ROW_SSSE3
#define HAS_ARGBTOAR30ROW_SSSE3 #define HAS_ARGBTOAR30ROW_SSSE3
#define HAS_ARGBTOAR64ROW_SSSE3 #define HAS_ARGBTOAR64ROW_SSSE3
#define HAS_ARGBUNATTENUATEROW_SSE2
#define HAS_CONVERT16TO8ROW_SSSE3 #define HAS_CONVERT16TO8ROW_SSSE3
#define HAS_CONVERT8TO16ROW_SSE2 #define HAS_CONVERT8TO16ROW_SSE2
#define HAS_DETILEROW_SSE2
#define HAS_DETILEROW_16_SSE2 #define HAS_DETILEROW_16_SSE2
#define HAS_DETILEROW_16_AVX #define HAS_DETILEROW_SSE2
#define HAS_DETILESPLITUVROW_SSSE3 #define HAS_DETILESPLITUVROW_SSSE3
#define HAS_DETILETOYUY2_SSE2 #define HAS_DETILETOYUY2_SSE2
#define HAS_HALFMERGEUVROW_SSSE3 #define HAS_HALFMERGEUVROW_SSSE3
...@@ -345,13 +341,16 @@ extern "C" { ...@@ -345,13 +341,16 @@ extern "C" {
#define HAS_ABGRTOYJROW_AVX2 #define HAS_ABGRTOYJROW_AVX2
#define HAS_ABGRTOYROW_AVX2 #define HAS_ABGRTOYROW_AVX2
#define HAS_AR64TOARGBROW_AVX2 #define HAS_AR64TOARGBROW_AVX2
#define HAS_ARGBATTENUATEROW_AVX2
#define HAS_ARGBTOAB64ROW_AVX2 #define HAS_ARGBTOAB64ROW_AVX2
#define HAS_ARGBTOAR30ROW_AVX2 #define HAS_ARGBTOAR30ROW_AVX2
#define HAS_ARGBTOAR64ROW_AVX2 #define HAS_ARGBTOAR64ROW_AVX2
#define HAS_ARGBTORAWROW_AVX2 #define HAS_ARGBTORAWROW_AVX2
#define HAS_ARGBTORGB24ROW_AVX2 #define HAS_ARGBTORGB24ROW_AVX2
#define HAS_ARGBUNATTENUATEROW_AVX2
#define HAS_CONVERT16TO8ROW_AVX2 #define HAS_CONVERT16TO8ROW_AVX2
#define HAS_CONVERT8TO16ROW_AVX2 #define HAS_CONVERT8TO16ROW_AVX2
#define HAS_DETILEROW_16_AVX
#define HAS_DIVIDEROW_16_AVX2 #define HAS_DIVIDEROW_16_AVX2
#define HAS_HALFMERGEUVROW_AVX2 #define HAS_HALFMERGEUVROW_AVX2
#define HAS_I210TOAR30ROW_AVX2 #define HAS_I210TOAR30ROW_AVX2
...@@ -402,9 +401,8 @@ extern "C" { ...@@ -402,9 +401,8 @@ extern "C" {
// The following are available for AVX512 clang x86 platforms: // The following are available for AVX512 clang x86 platforms:
// TODO(fbarchard): Port to GCC and Visual C // TODO(fbarchard): Port to GCC and Visual C
// TODO(fbarchard): re-enable HAS_ARGBTORGB24ROW_AVX512VBMI. Issue libyuv:789 // TODO(fbarchard): re-enable HAS_ARGBTORGB24ROW_AVX512VBMI. Issue libyuv:789
// TODO(fbarchard): Port MERGEUV to assembly
#if !defined(LIBYUV_DISABLE_X86) && \ #if !defined(LIBYUV_DISABLE_X86) && \
(defined(__x86_64__) || defined(__i386__)) && (defined(CLANG_HAS_AVX512) && !defined(_MSC_VER)) (defined(__x86_64__) || defined(__i386__)) && defined(CLANG_HAS_AVX512)
#define HAS_ARGBTORGB24ROW_AVX512VBMI #define HAS_ARGBTORGB24ROW_AVX512VBMI
#define HAS_MERGEUVROW_AVX512BW #define HAS_MERGEUVROW_AVX512BW
#endif #endif
...@@ -555,6 +553,67 @@ extern "C" { ...@@ -555,6 +553,67 @@ extern "C" {
// The following are available on AArch64 platforms: // The following are available on AArch64 platforms:
#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) #if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
#define HAS_ARGBTOAR30ROW_NEON
#define HAS_ABGRTOAR30ROW_NEON
#define HAS_I210ALPHATOARGBROW_NEON
#define HAS_I410ALPHATOARGBROW_NEON
#define HAS_I210TOARGBROW_NEON
#define HAS_I410TOARGBROW_NEON
#define HAS_I210TOAR30ROW_NEON
#define HAS_I410TOAR30ROW_NEON
#define HAS_I212TOARGBROW_NEON
#define HAS_I212TOAR30ROW_NEON
#define HAS_I422TOAR30ROW_NEON
#define HAS_P210TOAR30ROW_NEON
#define HAS_P210TOARGBROW_NEON
#define HAS_P410TOAR30ROW_NEON
#define HAS_P410TOARGBROW_NEON
#define HAS_ABGRTOYJROW_NEON_DOTPROD
#define HAS_ABGRTOYROW_NEON_DOTPROD
#define HAS_ARGBTOYJROW_NEON_DOTPROD
#define HAS_ARGBTOYROW_NEON_DOTPROD
#define HAS_BGRATOYROW_NEON_DOTPROD
#define HAS_RGBATOYJROW_NEON_DOTPROD
#define HAS_RGBATOYROW_NEON_DOTPROD
#define HAS_ARGBCOLORMATRIXROW_NEON_I8MM
#define HAS_ARGBTOUV444ROW_NEON_I8MM
#endif
// The following are available on AArch64 SVE platforms:
#if !defined(LIBYUV_DISABLE_SVE) && defined(__aarch64__)
#define HAS_ABGRTOUVJROW_SVE2
#define HAS_ABGRTOUVROW_SVE2
#define HAS_ARGB1555TOARGBROW_SVE2
#define HAS_ARGBTORAWROW_SVE2
#define HAS_ARGBTORGB24ROW_SVE2
#define HAS_ARGBTORGB565DITHERROW_SVE2
#define HAS_ARGBTORGB565ROW_SVE2
#define HAS_ARGBTOUVJROW_SVE2
#define HAS_ARGBTOUVROW_SVE2
#define HAS_AYUVTOUVROW_SVE2
#define HAS_AYUVTOVUROW_SVE2
#define HAS_BGRATOUVROW_SVE2
#define HAS_I400TOARGBROW_SVE2
#define HAS_I422ALPHATOARGBROW_SVE2
#define HAS_I422TOARGBROW_SVE2
#define HAS_I422TORGBAROW_SVE2
#define HAS_I444ALPHATOARGBROW_SVE2
#define HAS_I444TOARGBROW_SVE2
#define HAS_NV12TOARGBROW_SVE2
#define HAS_NV21TOARGBROW_SVE2
#define HAS_RAWTOARGBROW_SVE2
#define HAS_RAWTORGB24ROW_SVE2
#define HAS_RAWTORGBAROW_SVE2
#define HAS_RGB24TOARGBROW_SVE2
#define HAS_RGBATOUVROW_SVE2
#define HAS_UYVYTOARGBROW_SVE2
#define HAS_YUY2TOARGBROW_SVE2
#endif
// The following are available on AArch64 platforms:
#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
#define HAS_GAUSSCOL_F32_NEON #define HAS_GAUSSCOL_F32_NEON
#define HAS_GAUSSROW_F32_NEON #define HAS_GAUSSROW_F32_NEON
#define HAS_INTERPOLATEROW_16TO8_NEON #define HAS_INTERPOLATEROW_16TO8_NEON
...@@ -651,20 +710,47 @@ extern "C" { ...@@ -651,20 +710,47 @@ extern "C" {
#define HAS_ARGB1555TOUVROW_LSX #define HAS_ARGB1555TOUVROW_LSX
#define HAS_ARGB1555TOYROW_LSX #define HAS_ARGB1555TOYROW_LSX
#define HAS_ARGB4444TOARGBROW_LSX #define HAS_ARGB4444TOARGBROW_LSX
#define HAS_ARGBADDROW_LSX
#define HAS_ARGBATTENUATEROW_LSX
#define HAS_ARGBBLENDROW_LSX #define HAS_ARGBBLENDROW_LSX
#define HAS_ARGBCOLORMATRIXROW_LSX #define HAS_ARGBCOLORMATRIXROW_LSX
#define HAS_ARGBEXTRACTALPHAROW_LSX #define HAS_ARGBEXTRACTALPHAROW_LSX
#define HAS_ARGBGRAYROW_LSX
#define HAS_ARGBSEPIAROW_LSX
#define HAS_ARGBSHADEROW_LSX
#define HAS_ARGBSHUFFLEROW_LSX
#define HAS_ARGBSUBTRACTROW_LSX
#define HAS_ARGBQUANTIZEROW_LSX #define HAS_ARGBQUANTIZEROW_LSX
#define HAS_ARGBSETROW_LSX #define HAS_ARGBSETROW_LSX
#define HAS_ARGBTOARGB1555ROW_LSX
#define HAS_ARGBTOARGB4444ROW_LSX
#define HAS_ARGBTORAWROW_LSX
#define HAS_ARGBTORGB24ROW_LSX
#define HAS_ARGBTORGB565ROW_LSX
#define HAS_ARGBTORGB565DITHERROW_LSX
#define HAS_ARGBTOUVJROW_LSX #define HAS_ARGBTOUVJROW_LSX
#define HAS_ARGBTOUV444ROW_LSX
#define HAS_ARGBTOUVROW_LSX
#define HAS_ARGBTOYJROW_LSX #define HAS_ARGBTOYJROW_LSX
#define HAS_ARGBMIRRORROW_LSX
#define HAS_ARGBMULTIPLYROW_LSX
#define HAS_BGRATOUVROW_LSX #define HAS_BGRATOUVROW_LSX
#define HAS_BGRATOYROW_LSX #define HAS_BGRATOYROW_LSX
#define HAS_I400TOARGBROW_LSX #define HAS_I400TOARGBROW_LSX
#define HAS_I444TOARGBROW_LSX #define HAS_I444TOARGBROW_LSX
#define HAS_INTERPOLATEROW_LSX #define HAS_INTERPOLATEROW_LSX
#define HAS_I422ALPHATOARGBROW_LSX
#define HAS_I422TOARGB1555ROW_LSX
#define HAS_I422TOARGB4444ROW_LSX
#define HAS_I422TORGB24ROW_LSX
#define HAS_I422TORGB565ROW_LSX
#define HAS_I422TORGBAROW_LSX
#define HAS_I422TOUYVYROW_LSX
#define HAS_I422TOYUY2ROW_LSX
#define HAS_J400TOARGBROW_LSX #define HAS_J400TOARGBROW_LSX
#define HAS_MERGEUVROW_LSX #define HAS_MERGEUVROW_LSX
#define HAS_MIRRORROW_LSX
#define HAS_MIRRORUVROW_LSX
#define HAS_MIRRORSPLITUVROW_LSX #define HAS_MIRRORSPLITUVROW_LSX
#define HAS_NV12TOARGBROW_LSX #define HAS_NV12TOARGBROW_LSX
#define HAS_NV12TORGB565ROW_LSX #define HAS_NV12TORGB565ROW_LSX
...@@ -687,7 +773,13 @@ extern "C" { ...@@ -687,7 +773,13 @@ extern "C" {
#define HAS_SOBELXYROW_LSX #define HAS_SOBELXYROW_LSX
#define HAS_SPLITUVROW_LSX #define HAS_SPLITUVROW_LSX
#define HAS_UYVYTOARGBROW_LSX #define HAS_UYVYTOARGBROW_LSX
#define HAS_UYVYTOUV422ROW_LSX
#define HAS_UYVYTOUVROW_LSX
#define HAS_UYVYTOYROW_LSX
#define HAS_YUY2TOARGBROW_LSX #define HAS_YUY2TOARGBROW_LSX
#define HAS_YUY2TOUVROW_LSX
#define HAS_YUY2TOUV422ROW_LSX
#define HAS_YUY2TOYROW_LSX
#define HAS_ARGBTOYROW_LSX #define HAS_ARGBTOYROW_LSX
#define HAS_ABGRTOYJROW_LSX #define HAS_ABGRTOYJROW_LSX
#define HAS_RGBATOYJROW_LSX #define HAS_RGBATOYJROW_LSX
...@@ -695,6 +787,10 @@ extern "C" { ...@@ -695,6 +787,10 @@ extern "C" {
#define HAS_RAWTOYJROW_LSX #define HAS_RAWTOYJROW_LSX
#endif #endif
#if !defined(LIBYUV_DISABLE_LSX) && defined(__loongarch_sx)
#define HAS_I422TOARGBROW_LSX
#endif
#if !defined(LIBYUV_DISABLE_LASX) && defined(__loongarch_asx) #if !defined(LIBYUV_DISABLE_LASX) && defined(__loongarch_asx)
#define HAS_ARGB1555TOARGBROW_LASX #define HAS_ARGB1555TOARGBROW_LASX
#define HAS_ARGB1555TOUVROW_LASX #define HAS_ARGB1555TOUVROW_LASX
...@@ -758,6 +854,74 @@ extern "C" { ...@@ -758,6 +854,74 @@ extern "C" {
#define HAS_RAWTOYJROW_LASX #define HAS_RAWTOYJROW_LASX
#endif #endif
#if !defined(LIBYUV_DISABLE_RVV) && defined(__riscv_vector)
#if __riscv_v_intrinsic > 11000
// Since v0.12, TUPLE_TYPE is introduced for segment load and store.
#define LIBYUV_RVV_HAS_TUPLE_TYPE
// Since v0.12, VXRM(fixed-point rounding mode) is included in arguments of
// fixed-point intrinsics.
#define LIBYUV_RVV_HAS_VXRM_ARG
#endif
#define HAS_COPYROW_RVV
#define HAS_AB64TOARGBROW_RVV
#define HAS_ABGRTOYJROW_RVV
#define HAS_ABGRTOYROW_RVV
#define HAS_AR64TOARGBROW_RVV
#define HAS_AR64TOAB64ROW_RVV
#define HAS_ARGBATTENUATEROW_RVV
#define HAS_ARGBBLENDROW_RVV
#define HAS_ARGBCOPYYTOALPHAROW_RVV
#define HAS_ARGBEXTRACTALPHAROW_RVV
#define HAS_ARGBTOAB64ROW_RVV
#define HAS_ARGBTOABGRROW_RVV
#define HAS_ARGBTOAR64ROW_RVV
#define HAS_ARGBTOBGRAROW_RVV
#define HAS_ARGBTORAWROW_RVV
#define HAS_ARGBTORGB24ROW_RVV
#define HAS_ARGBTORGBAROW_RVV
#define HAS_ARGBTOYJROW_RVV
#define HAS_ARGBTOYMATRIXROW_RVV
#define HAS_ARGBTOYROW_RVV
#define HAS_BGRATOYROW_RVV
#define HAS_BLENDPLANEROW_RVV
#define HAS_I400TOARGBROW_RVV
#define HAS_I422ALPHATOARGBROW_RVV
#define HAS_I422TOARGBROW_RVV
#define HAS_I422TORGB24ROW_RVV
#define HAS_I422TORGBAROW_RVV
#define HAS_I444ALPHATOARGBROW_RVV
#define HAS_I444TOARGBROW_RVV
#define HAS_I444TORGB24ROW_RVV
#define HAS_INTERPOLATEROW_RVV
#define HAS_J400TOARGBROW_RVV
#define HAS_MERGEARGBROW_RVV
#define HAS_MERGERGBROW_RVV
#define HAS_MERGEUVROW_RVV
#define HAS_MERGEXRGBROW_RVV
#define HAS_NV12TOARGBROW_RVV
#define HAS_NV12TORGB24ROW_RVV
#define HAS_NV21TOARGBROW_RVV
#define HAS_NV21TORGB24ROW_RVV
#define HAS_RAWTOARGBROW_RVV
#define HAS_RAWTORGB24ROW_RVV
#define HAS_RAWTORGBAROW_RVV
#define HAS_RAWTOYJROW_RVV
#define HAS_RAWTOYROW_RVV
#define HAS_RGB24TOARGBROW_RVV
#define HAS_RGB24TOYJROW_RVV
#define HAS_RGB24TOYROW_RVV
#define HAS_RGBATOARGBROW_RVV
#define HAS_RGBATOYJROW_RVV
#define HAS_RGBATOYMATRIXROW_RVV
#define HAS_RGBATOYROW_RVV
#define HAS_RGBTOYMATRIXROW_RVV
#define HAS_SPLITARGBROW_RVV
#define HAS_SPLITRGBROW_RVV
#define HAS_SPLITUVROW_RVV
#define HAS_SPLITXRGBROW_RVV
#endif
#if defined(_MSC_VER) && !defined(__CLR_VER) && !defined(__clang__) #if defined(_MSC_VER) && !defined(__CLR_VER) && !defined(__clang__)
#if defined(VISUALC_HAS_AVX2) #if defined(VISUALC_HAS_AVX2)
#define SIMD_ALIGNED(var) __declspec(align(32)) var #define SIMD_ALIGNED(var) __declspec(align(32)) var
...@@ -817,8 +981,8 @@ typedef uint32_t ulvec32[8]; ...@@ -817,8 +981,8 @@ typedef uint32_t ulvec32[8];
typedef uint8_t ulvec8[32]; typedef uint8_t ulvec8[32];
#endif #endif
#if defined(__aarch64__) || defined(__arm__) #if defined(__aarch64__) || defined(__arm__) || defined(__riscv)
// This struct is for ARM color conversion. // This struct is for ARM and RISC-V color conversion.
struct YuvConstants { struct YuvConstants {
uvec8 kUVCoeff; uvec8 kUVCoeff;
vec16 kRGBCoeffBias; vec16 kRGBCoeffBias;
...@@ -852,14 +1016,6 @@ struct YuvConstants { ...@@ -852,14 +1016,6 @@ struct YuvConstants {
free(var##_mem); \ free(var##_mem); \
var = NULL var = NULL
#define align_buffer_64_16(var, size) \
void* var##_mem = malloc((size)*2 + 63); /* NOLINT */ \
uint16_t* var = (uint16_t*)(((intptr_t)var##_mem + 63) & ~63) /* NOLINT */
#define free_aligned_buffer_64_16(var) \
free(var##_mem); \
var = NULL
#if defined(__APPLE__) || defined(__x86_64__) || defined(__llvm__) #if defined(__APPLE__) || defined(__x86_64__) || defined(__llvm__)
#define OMITFP #define OMITFP
#else #else
...@@ -924,24 +1080,92 @@ struct YuvConstants { ...@@ -924,24 +1080,92 @@ struct YuvConstants {
IACA_UD_BYTES \ IACA_UD_BYTES \
} }
void I210AlphaToARGBRow_NEON(const uint16_t* src_y,
const uint16_t* src_u,
const uint16_t* src_v,
const uint16_t* src_a,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width);
void I410AlphaToARGBRow_NEON(const uint16_t* src_y,
const uint16_t* src_u,
const uint16_t* src_v,
const uint16_t* src_a,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width);
void I444ToARGBRow_NEON(const uint8_t* src_y, void I444ToARGBRow_NEON(const uint8_t* src_y,
const uint8_t* src_u, const uint8_t* src_u,
const uint8_t* src_v, const uint8_t* src_v,
uint8_t* dst_argb, uint8_t* dst_argb,
const struct YuvConstants* yuvconstants, const struct YuvConstants* yuvconstants,
int width); int width);
void I444ToARGBRow_SVE2(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void I444ToRGB24Row_NEON(const uint8_t* src_y, void I444ToRGB24Row_NEON(const uint8_t* src_y,
const uint8_t* src_u, const uint8_t* src_u,
const uint8_t* src_v, const uint8_t* src_v,
uint8_t* dst_rgb24, uint8_t* dst_rgb24,
const struct YuvConstants* yuvconstants, const struct YuvConstants* yuvconstants,
int width); int width);
void I210ToARGBRow_NEON(const uint16_t* src_y,
const uint16_t* src_u,
const uint16_t* src_v,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width);
void I410ToARGBRow_NEON(const uint16_t* src_y,
const uint16_t* src_u,
const uint16_t* src_v,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width);
void I210ToAR30Row_NEON(const uint16_t* src_y,
const uint16_t* src_u,
const uint16_t* src_v,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width);
void I410ToAR30Row_NEON(const uint16_t* src_y,
const uint16_t* src_u,
const uint16_t* src_v,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width);
void I212ToARGBRow_NEON(const uint16_t* src_y,
const uint16_t* src_u,
const uint16_t* src_v,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width);
void I212ToAR30Row_NEON(const uint16_t* src_y,
const uint16_t* src_u,
const uint16_t* src_v,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width);
void I422ToARGBRow_NEON(const uint8_t* src_y, void I422ToARGBRow_NEON(const uint8_t* src_y,
const uint8_t* src_u, const uint8_t* src_u,
const uint8_t* src_v, const uint8_t* src_v,
uint8_t* dst_argb, uint8_t* dst_argb,
const struct YuvConstants* yuvconstants, const struct YuvConstants* yuvconstants,
int width); int width);
void I422ToARGBRow_SVE2(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void I422ToAR30Row_NEON(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void I444AlphaToARGBRow_NEON(const uint8_t* src_y, void I444AlphaToARGBRow_NEON(const uint8_t* src_y,
const uint8_t* src_u, const uint8_t* src_u,
const uint8_t* src_v, const uint8_t* src_v,
...@@ -949,6 +1173,13 @@ void I444AlphaToARGBRow_NEON(const uint8_t* src_y, ...@@ -949,6 +1173,13 @@ void I444AlphaToARGBRow_NEON(const uint8_t* src_y,
uint8_t* dst_argb, uint8_t* dst_argb,
const struct YuvConstants* yuvconstants, const struct YuvConstants* yuvconstants,
int width); int width);
void I444AlphaToARGBRow_SVE2(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
const uint8_t* src_a,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void I422AlphaToARGBRow_NEON(const uint8_t* src_y, void I422AlphaToARGBRow_NEON(const uint8_t* src_y,
const uint8_t* src_u, const uint8_t* src_u,
const uint8_t* src_v, const uint8_t* src_v,
...@@ -956,12 +1187,25 @@ void I422AlphaToARGBRow_NEON(const uint8_t* src_y, ...@@ -956,12 +1187,25 @@ void I422AlphaToARGBRow_NEON(const uint8_t* src_y,
uint8_t* dst_argb, uint8_t* dst_argb,
const struct YuvConstants* yuvconstants, const struct YuvConstants* yuvconstants,
int width); int width);
void I422AlphaToARGBRow_SVE2(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
const uint8_t* src_a,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void I422ToRGBARow_NEON(const uint8_t* src_y, void I422ToRGBARow_NEON(const uint8_t* src_y,
const uint8_t* src_u, const uint8_t* src_u,
const uint8_t* src_v, const uint8_t* src_v,
uint8_t* dst_rgba, uint8_t* dst_rgba,
const struct YuvConstants* yuvconstants, const struct YuvConstants* yuvconstants,
int width); int width);
void I422ToRGBARow_SVE2(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
uint8_t* dst_rgba,
const struct YuvConstants* yuvconstants,
int width);
void I422ToRGB24Row_NEON(const uint8_t* src_y, void I422ToRGB24Row_NEON(const uint8_t* src_y,
const uint8_t* src_u, const uint8_t* src_u,
const uint8_t* src_v, const uint8_t* src_v,
...@@ -991,6 +1235,11 @@ void NV12ToARGBRow_NEON(const uint8_t* src_y, ...@@ -991,6 +1235,11 @@ void NV12ToARGBRow_NEON(const uint8_t* src_y,
uint8_t* dst_argb, uint8_t* dst_argb,
const struct YuvConstants* yuvconstants, const struct YuvConstants* yuvconstants,
int width); int width);
void NV12ToARGBRow_SVE2(const uint8_t* src_y,
const uint8_t* src_uv,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void NV12ToRGB565Row_NEON(const uint8_t* src_y, void NV12ToRGB565Row_NEON(const uint8_t* src_y,
const uint8_t* src_uv, const uint8_t* src_uv,
uint8_t* dst_rgb565, uint8_t* dst_rgb565,
...@@ -1001,6 +1250,11 @@ void NV21ToARGBRow_NEON(const uint8_t* src_y, ...@@ -1001,6 +1250,11 @@ void NV21ToARGBRow_NEON(const uint8_t* src_y,
uint8_t* dst_argb, uint8_t* dst_argb,
const struct YuvConstants* yuvconstants, const struct YuvConstants* yuvconstants,
int width); int width);
void NV21ToARGBRow_SVE2(const uint8_t* src_y,
const uint8_t* src_vu,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void NV12ToRGB24Row_NEON(const uint8_t* src_y, void NV12ToRGB24Row_NEON(const uint8_t* src_y,
const uint8_t* src_uv, const uint8_t* src_uv,
uint8_t* dst_rgb24, uint8_t* dst_rgb24,
...@@ -1019,10 +1273,62 @@ void YUY2ToARGBRow_NEON(const uint8_t* src_yuy2, ...@@ -1019,10 +1273,62 @@ void YUY2ToARGBRow_NEON(const uint8_t* src_yuy2,
uint8_t* dst_argb, uint8_t* dst_argb,
const struct YuvConstants* yuvconstants, const struct YuvConstants* yuvconstants,
int width); int width);
void YUY2ToARGBRow_SVE2(const uint8_t* src_yuy2,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void UYVYToARGBRow_NEON(const uint8_t* src_uyvy, void UYVYToARGBRow_NEON(const uint8_t* src_uyvy,
uint8_t* dst_argb, uint8_t* dst_argb,
const struct YuvConstants* yuvconstants, const struct YuvConstants* yuvconstants,
int width); int width);
void UYVYToARGBRow_SVE2(const uint8_t* src_uyvy,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void I444ToARGBRow_RVV(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void I444AlphaToARGBRow_RVV(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
const uint8_t* src_a,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void I444ToRGB24Row_RVV(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
uint8_t* dst_rgb24,
const struct YuvConstants* yuvconstants,
int width);
void I422ToARGBRow_RVV(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void I422AlphaToARGBRow_RVV(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
const uint8_t* src_a,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void I422ToRGBARow_RVV(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
uint8_t* dst_rgba,
const struct YuvConstants* yuvconstants,
int width);
void I422ToRGB24Row_RVV(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
uint8_t* dst_rgb24,
const struct YuvConstants* yuvconstants,
int width);
void I444ToARGBRow_MSA(const uint8_t* src_y, void I444ToARGBRow_MSA(const uint8_t* src_y,
const uint8_t* src_u, const uint8_t* src_u,
const uint8_t* src_v, const uint8_t* src_v,
...@@ -1042,6 +1348,12 @@ void I422ToARGBRow_MSA(const uint8_t* src_y, ...@@ -1042,6 +1348,12 @@ void I422ToARGBRow_MSA(const uint8_t* src_y,
uint8_t* dst_argb, uint8_t* dst_argb,
const struct YuvConstants* yuvconstants, const struct YuvConstants* yuvconstants,
int width); int width);
void I422ToARGBRow_LSX(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void I422ToARGBRow_LASX(const uint8_t* src_y, void I422ToARGBRow_LASX(const uint8_t* src_y,
const uint8_t* src_u, const uint8_t* src_u,
const uint8_t* src_v, const uint8_t* src_v,
...@@ -1054,6 +1366,12 @@ void I422ToRGBARow_MSA(const uint8_t* src_y, ...@@ -1054,6 +1366,12 @@ void I422ToRGBARow_MSA(const uint8_t* src_y,
uint8_t* dst_argb, uint8_t* dst_argb,
const struct YuvConstants* yuvconstants, const struct YuvConstants* yuvconstants,
int width); int width);
void I422ToRGBARow_LSX(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void I422ToRGBARow_LASX(const uint8_t* src_y, void I422ToRGBARow_LASX(const uint8_t* src_y,
const uint8_t* src_u, const uint8_t* src_u,
const uint8_t* src_v, const uint8_t* src_v,
...@@ -1067,6 +1385,13 @@ void I422AlphaToARGBRow_MSA(const uint8_t* src_y, ...@@ -1067,6 +1385,13 @@ void I422AlphaToARGBRow_MSA(const uint8_t* src_y,
uint8_t* dst_argb, uint8_t* dst_argb,
const struct YuvConstants* yuvconstants, const struct YuvConstants* yuvconstants,
int width); int width);
void I422AlphaToARGBRow_LSX(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
const uint8_t* src_a,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void I422AlphaToARGBRow_LASX(const uint8_t* src_y, void I422AlphaToARGBRow_LASX(const uint8_t* src_y,
const uint8_t* src_u, const uint8_t* src_u,
const uint8_t* src_v, const uint8_t* src_v,
...@@ -1080,6 +1405,12 @@ void I422ToRGB24Row_MSA(const uint8_t* src_y, ...@@ -1080,6 +1405,12 @@ void I422ToRGB24Row_MSA(const uint8_t* src_y,
uint8_t* dst_argb, uint8_t* dst_argb,
const struct YuvConstants* yuvconstants, const struct YuvConstants* yuvconstants,
int width); int width);
void I422ToRGB24Row_LSX(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void I422ToRGB24Row_LASX(const uint8_t* src_y, void I422ToRGB24Row_LASX(const uint8_t* src_y,
const uint8_t* src_u, const uint8_t* src_u,
const uint8_t* src_v, const uint8_t* src_v,
...@@ -1092,6 +1423,12 @@ void I422ToRGB565Row_MSA(const uint8_t* src_y, ...@@ -1092,6 +1423,12 @@ void I422ToRGB565Row_MSA(const uint8_t* src_y,
uint8_t* dst_rgb565, uint8_t* dst_rgb565,
const struct YuvConstants* yuvconstants, const struct YuvConstants* yuvconstants,
int width); int width);
void I422ToRGB565Row_LSX(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
uint8_t* dst_rgb565,
const struct YuvConstants* yuvconstants,
int width);
void I422ToRGB565Row_LASX(const uint8_t* src_y, void I422ToRGB565Row_LASX(const uint8_t* src_y,
const uint8_t* src_u, const uint8_t* src_u,
const uint8_t* src_v, const uint8_t* src_v,
...@@ -1104,6 +1441,12 @@ void I422ToARGB4444Row_MSA(const uint8_t* src_y, ...@@ -1104,6 +1441,12 @@ void I422ToARGB4444Row_MSA(const uint8_t* src_y,
uint8_t* dst_argb4444, uint8_t* dst_argb4444,
const struct YuvConstants* yuvconstants, const struct YuvConstants* yuvconstants,
int width); int width);
void I422ToARGB4444Row_LSX(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
uint8_t* dst_argb4444,
const struct YuvConstants* yuvconstants,
int width);
void I422ToARGB4444Row_LASX(const uint8_t* src_y, void I422ToARGB4444Row_LASX(const uint8_t* src_y,
const uint8_t* src_u, const uint8_t* src_u,
const uint8_t* src_v, const uint8_t* src_v,
...@@ -1116,6 +1459,12 @@ void I422ToARGB1555Row_MSA(const uint8_t* src_y, ...@@ -1116,6 +1459,12 @@ void I422ToARGB1555Row_MSA(const uint8_t* src_y,
uint8_t* dst_argb1555, uint8_t* dst_argb1555,
const struct YuvConstants* yuvconstants, const struct YuvConstants* yuvconstants,
int width); int width);
void I422ToARGB1555Row_LSX(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
uint8_t* dst_argb1555,
const struct YuvConstants* yuvconstants,
int width);
void I422ToARGB1555Row_LASX(const uint8_t* src_y, void I422ToARGB1555Row_LASX(const uint8_t* src_y,
const uint8_t* src_u, const uint8_t* src_u,
const uint8_t* src_v, const uint8_t* src_v,
...@@ -1184,6 +1533,26 @@ void UYVYToARGBRow_LSX(const uint8_t* src_uyvy, ...@@ -1184,6 +1533,26 @@ void UYVYToARGBRow_LSX(const uint8_t* src_uyvy,
uint8_t* dst_argb, uint8_t* dst_argb,
const struct YuvConstants* yuvconstants, const struct YuvConstants* yuvconstants,
int width); int width);
void NV12ToARGBRow_RVV(const uint8_t* src_y,
const uint8_t* src_uv,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void NV21ToARGBRow_RVV(const uint8_t* src_y,
const uint8_t* src_vu,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void NV12ToRGB24Row_RVV(const uint8_t* src_y,
const uint8_t* src_uv,
uint8_t* dst_rgb24,
const struct YuvConstants* yuvconstants,
int width);
void NV21ToRGB24Row_RVV(const uint8_t* src_y,
const uint8_t* src_vu,
uint8_t* dst_rgb24,
const struct YuvConstants* yuvconstants,
int width);
void ARGBToYRow_AVX2(const uint8_t* src_argb, uint8_t* dst_y, int width); void ARGBToYRow_AVX2(const uint8_t* src_argb, uint8_t* dst_y, int width);
void ARGBToYRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGBToYRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
...@@ -1213,6 +1582,22 @@ void ARGBToYRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width); ...@@ -1213,6 +1582,22 @@ void ARGBToYRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width);
void ARGBToYJRow_NEON(const uint8_t* src_argb, uint8_t* dst_yj, int width); void ARGBToYJRow_NEON(const uint8_t* src_argb, uint8_t* dst_yj, int width);
void ABGRToYJRow_NEON(const uint8_t* src_abgr, uint8_t* dst_yj, int width); void ABGRToYJRow_NEON(const uint8_t* src_abgr, uint8_t* dst_yj, int width);
void RGBAToYJRow_NEON(const uint8_t* src_rgba, uint8_t* dst_yj, int width); void RGBAToYJRow_NEON(const uint8_t* src_rgba, uint8_t* dst_yj, int width);
void ARGBToYRow_NEON_DotProd(const uint8_t* src_argb,
uint8_t* dst_y,
int width);
void ARGBToYJRow_NEON_DotProd(const uint8_t* src_argb,
uint8_t* dst_yj,
int width);
void ABGRToYJRow_NEON_DotProd(const uint8_t* src_abgr,
uint8_t* dst_yj,
int width);
void RGBAToYJRow_NEON_DotProd(const uint8_t* src_rgba,
uint8_t* dst_yj,
int width);
void ARGBToYRow_RVV(const uint8_t* src_argb, uint8_t* dst_y, int width);
void ARGBToYJRow_RVV(const uint8_t* src_argb, uint8_t* dst_yj, int width);
void ABGRToYJRow_RVV(const uint8_t* src_rgba, uint8_t* dst_yj, int width);
void RGBAToYJRow_RVV(const uint8_t* src_rgba, uint8_t* dst_yj, int width);
void ARGBToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width); void ARGBToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width);
void ARGBToYJRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width); void ARGBToYJRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width);
void ARGBToYRow_LSX(const uint8_t* src_argb0, uint8_t* dst_y, int width); void ARGBToYRow_LSX(const uint8_t* src_argb0, uint8_t* dst_y, int width);
...@@ -1227,11 +1612,20 @@ void ARGBToUV444Row_NEON(const uint8_t* src_argb, ...@@ -1227,11 +1612,20 @@ void ARGBToUV444Row_NEON(const uint8_t* src_argb,
uint8_t* dst_u, uint8_t* dst_u,
uint8_t* dst_v, uint8_t* dst_v,
int width); int width);
void ARGBToUV444Row_NEON_I8MM(const uint8_t* src_argb,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void ARGBToUVRow_NEON(const uint8_t* src_argb, void ARGBToUVRow_NEON(const uint8_t* src_argb,
int src_stride_argb, int src_stride_argb,
uint8_t* dst_u, uint8_t* dst_u,
uint8_t* dst_v, uint8_t* dst_v,
int width); int width);
void ARGBToUVRow_SVE2(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void ARGBToUV444Row_MSA(const uint8_t* src_argb, void ARGBToUV444Row_MSA(const uint8_t* src_argb,
uint8_t* dst_u, uint8_t* dst_u,
uint8_t* dst_v, uint8_t* dst_v,
...@@ -1241,11 +1635,20 @@ void ARGBToUVRow_MSA(const uint8_t* src_argb, ...@@ -1241,11 +1635,20 @@ void ARGBToUVRow_MSA(const uint8_t* src_argb,
uint8_t* dst_u, uint8_t* dst_u,
uint8_t* dst_v, uint8_t* dst_v,
int width); int width);
void ARGBToUVRow_LSX(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void ARGBToUVRow_LASX(const uint8_t* src_argb, void ARGBToUVRow_LASX(const uint8_t* src_argb,
int src_stride_argb, int src_stride_argb,
uint8_t* dst_u, uint8_t* dst_u,
uint8_t* dst_v, uint8_t* dst_v,
int width); int width);
void ARGBToUV444Row_LSX(const uint8_t* src_argb,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void ARGBToUV444Row_LASX(const uint8_t* src_argb, void ARGBToUV444Row_LASX(const uint8_t* src_argb,
uint8_t* dst_u, uint8_t* dst_u,
uint8_t* dst_v, uint8_t* dst_v,
...@@ -1255,26 +1658,51 @@ void ARGBToUVJRow_NEON(const uint8_t* src_argb, ...@@ -1255,26 +1658,51 @@ void ARGBToUVJRow_NEON(const uint8_t* src_argb,
uint8_t* dst_u, uint8_t* dst_u,
uint8_t* dst_v, uint8_t* dst_v,
int width); int width);
void ARGBToUVJRow_SVE2(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void ABGRToUVJRow_NEON(const uint8_t* src_abgr, void ABGRToUVJRow_NEON(const uint8_t* src_abgr,
int src_stride_abgr, int src_stride_abgr,
uint8_t* dst_uj, uint8_t* dst_uj,
uint8_t* dst_vj, uint8_t* dst_vj,
int width); int width);
void ABGRToUVJRow_SVE2(const uint8_t* src_abgr,
int src_stride_abgr,
uint8_t* dst_uj,
uint8_t* dst_vj,
int width);
void BGRAToUVRow_NEON(const uint8_t* src_bgra, void BGRAToUVRow_NEON(const uint8_t* src_bgra,
int src_stride_bgra, int src_stride_bgra,
uint8_t* dst_u, uint8_t* dst_u,
uint8_t* dst_v, uint8_t* dst_v,
int width); int width);
void BGRAToUVRow_SVE2(const uint8_t* src_bgra,
int src_stride_bgra,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void ABGRToUVRow_NEON(const uint8_t* src_abgr, void ABGRToUVRow_NEON(const uint8_t* src_abgr,
int src_stride_abgr, int src_stride_abgr,
uint8_t* dst_u, uint8_t* dst_u,
uint8_t* dst_v, uint8_t* dst_v,
int width); int width);
void ABGRToUVRow_SVE2(const uint8_t* src_abgr,
int src_stride_abgr,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void RGBAToUVRow_NEON(const uint8_t* src_rgba, void RGBAToUVRow_NEON(const uint8_t* src_rgba,
int src_stride_rgba, int src_stride_rgba,
uint8_t* dst_u, uint8_t* dst_u,
uint8_t* dst_v, uint8_t* dst_v,
int width); int width);
void RGBAToUVRow_SVE2(const uint8_t* src_rgba,
int src_stride_rgba,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void RGB24ToUVRow_NEON(const uint8_t* src_rgb24, void RGB24ToUVRow_NEON(const uint8_t* src_rgb24,
int src_stride_rgb24, int src_stride_rgb24,
uint8_t* dst_u, uint8_t* dst_u,
...@@ -1423,6 +1851,15 @@ void RAWToUVRow_LASX(const uint8_t* src_raw, ...@@ -1423,6 +1851,15 @@ void RAWToUVRow_LASX(const uint8_t* src_raw,
void BGRAToYRow_NEON(const uint8_t* src_bgra, uint8_t* dst_y, int width); void BGRAToYRow_NEON(const uint8_t* src_bgra, uint8_t* dst_y, int width);
void ABGRToYRow_NEON(const uint8_t* src_abgr, uint8_t* dst_y, int width); void ABGRToYRow_NEON(const uint8_t* src_abgr, uint8_t* dst_y, int width);
void RGBAToYRow_NEON(const uint8_t* src_rgba, uint8_t* dst_y, int width); void RGBAToYRow_NEON(const uint8_t* src_rgba, uint8_t* dst_y, int width);
void BGRAToYRow_NEON_DotProd(const uint8_t* src_bgra,
uint8_t* dst_y,
int width);
void ABGRToYRow_NEON_DotProd(const uint8_t* src_abgr,
uint8_t* dst_y,
int width);
void RGBAToYRow_NEON_DotProd(const uint8_t* src_rgba,
uint8_t* dst_y,
int width);
void RGB24ToYRow_NEON(const uint8_t* src_rgb24, uint8_t* dst_y, int width); void RGB24ToYRow_NEON(const uint8_t* src_rgb24, uint8_t* dst_y, int width);
void RGB24ToYJRow_NEON(const uint8_t* src_rgb24, uint8_t* dst_yj, int width); void RGB24ToYJRow_NEON(const uint8_t* src_rgb24, uint8_t* dst_yj, int width);
void RAWToYRow_NEON(const uint8_t* src_raw, uint8_t* dst_y, int width); void RAWToYRow_NEON(const uint8_t* src_raw, uint8_t* dst_y, int width);
...@@ -1434,6 +1871,13 @@ void ARGB1555ToYRow_NEON(const uint8_t* src_argb1555, ...@@ -1434,6 +1871,13 @@ void ARGB1555ToYRow_NEON(const uint8_t* src_argb1555,
void ARGB4444ToYRow_NEON(const uint8_t* src_argb4444, void ARGB4444ToYRow_NEON(const uint8_t* src_argb4444,
uint8_t* dst_y, uint8_t* dst_y,
int width); int width);
void BGRAToYRow_RVV(const uint8_t* src_bgra, uint8_t* dst_y, int width);
void ABGRToYRow_RVV(const uint8_t* src_abgr, uint8_t* dst_y, int width);
void RGBAToYRow_RVV(const uint8_t* src_rgba, uint8_t* dst_y, int width);
void RGB24ToYRow_RVV(const uint8_t* src_rgb24, uint8_t* dst_y, int width);
void RGB24ToYJRow_RVV(const uint8_t* src_rgb24, uint8_t* dst_yj, int width);
void RAWToYRow_RVV(const uint8_t* src_raw, uint8_t* dst_y, int width);
void RAWToYJRow_RVV(const uint8_t* src_raw, uint8_t* dst_yj, int width);
void BGRAToYRow_MSA(const uint8_t* src_argb, uint8_t* dst_y, int width); void BGRAToYRow_MSA(const uint8_t* src_argb, uint8_t* dst_y, int width);
void ABGRToYRow_MSA(const uint8_t* src_argb, uint8_t* dst_y, int width); void ABGRToYRow_MSA(const uint8_t* src_argb, uint8_t* dst_y, int width);
void RGBAToYRow_MSA(const uint8_t* src_argb, uint8_t* dst_y, int width); void RGBAToYRow_MSA(const uint8_t* src_argb, uint8_t* dst_y, int width);
...@@ -1493,12 +1937,33 @@ void RAWToYJRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); ...@@ -1493,12 +1937,33 @@ void RAWToYJRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void RGB24ToYJRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void RGB24ToYJRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void RAWToYJRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void RAWToYJRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void ARGBToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGBToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void ARGBToYRow_Any_NEON_DotProd(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int width);
void ARGBToYJRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGBToYJRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void ARGBToYJRow_Any_NEON_DotProd(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int width);
void ABGRToYJRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ABGRToYJRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void ABGRToYJRow_Any_NEON_DotProd(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int width);
void RGBAToYJRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void RGBAToYJRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void RGBAToYJRow_Any_NEON_DotProd(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int width);
void BGRAToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void BGRAToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void BGRAToYRow_Any_NEON_DotProd(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int width);
void ABGRToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ABGRToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void ABGRToYRow_Any_NEON_DotProd(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int width);
void RGBAToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void RGBAToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void RGBAToYRow_Any_NEON_DotProd(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int width);
void RGB24ToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void RGB24ToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void RGB24ToYJRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void RGB24ToYJRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void RAWToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void RAWToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
...@@ -1658,11 +2123,20 @@ void ARGBToUV444Row_Any_NEON(const uint8_t* src_ptr, ...@@ -1658,11 +2123,20 @@ void ARGBToUV444Row_Any_NEON(const uint8_t* src_ptr,
uint8_t* dst_u, uint8_t* dst_u,
uint8_t* dst_v, uint8_t* dst_v,
int width); int width);
void ARGBToUV444Row_Any_NEON_I8MM(const uint8_t* src_ptr,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void ARGBToUVRow_Any_NEON(const uint8_t* src_ptr, void ARGBToUVRow_Any_NEON(const uint8_t* src_ptr,
int src_stride, int src_stride,
uint8_t* dst_u, uint8_t* dst_u,
uint8_t* dst_v, uint8_t* dst_v,
int width); int width);
void ARGBToUVRow_Any_SVE2(const uint8_t* src_ptr,
int src_stride,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void ARGBToUV444Row_Any_MSA(const uint8_t* src_ptr, void ARGBToUV444Row_Any_MSA(const uint8_t* src_ptr,
uint8_t* dst_u, uint8_t* dst_u,
uint8_t* dst_v, uint8_t* dst_v,
...@@ -1672,11 +2146,20 @@ void ARGBToUVRow_Any_MSA(const uint8_t* src_ptr, ...@@ -1672,11 +2146,20 @@ void ARGBToUVRow_Any_MSA(const uint8_t* src_ptr,
uint8_t* dst_u, uint8_t* dst_u,
uint8_t* dst_v, uint8_t* dst_v,
int width); int width);
void ARGBToUVRow_Any_LSX(const uint8_t* src_ptr,
int src_stride_ptr,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void ARGBToUVRow_Any_LASX(const uint8_t* src_ptr, void ARGBToUVRow_Any_LASX(const uint8_t* src_ptr,
int src_stride_ptr, int src_stride_ptr,
uint8_t* dst_u, uint8_t* dst_u,
uint8_t* dst_v, uint8_t* dst_v,
int width); int width);
void ARGBToUV444Row_Any_LSX(const uint8_t* src_ptr,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void ARGBToUV444Row_Any_LASX(const uint8_t* src_ptr, void ARGBToUV444Row_Any_LASX(const uint8_t* src_ptr,
uint8_t* dst_u, uint8_t* dst_u,
uint8_t* dst_v, uint8_t* dst_v,
...@@ -1686,26 +2169,51 @@ void ARGBToUVJRow_Any_NEON(const uint8_t* src_ptr, ...@@ -1686,26 +2169,51 @@ void ARGBToUVJRow_Any_NEON(const uint8_t* src_ptr,
uint8_t* dst_u, uint8_t* dst_u,
uint8_t* dst_v, uint8_t* dst_v,
int width); int width);
void ARGBToUVJRow_Any_SVE2(const uint8_t* src_ptr,
int src_stride,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void ABGRToUVJRow_Any_NEON(const uint8_t* src_ptr, void ABGRToUVJRow_Any_NEON(const uint8_t* src_ptr,
int src_stride, int src_stride,
uint8_t* dst_u, uint8_t* dst_u,
uint8_t* dst_v, uint8_t* dst_v,
int width); int width);
void ABGRToUVJRow_Any_SVE2(const uint8_t* src_ptr,
int src_stride,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void BGRAToUVRow_Any_NEON(const uint8_t* src_ptr, void BGRAToUVRow_Any_NEON(const uint8_t* src_ptr,
int src_stride, int src_stride,
uint8_t* dst_u, uint8_t* dst_u,
uint8_t* dst_v, uint8_t* dst_v,
int width); int width);
void BGRAToUVRow_Any_SVE2(const uint8_t* src_ptr,
int src_stride,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void ABGRToUVRow_Any_NEON(const uint8_t* src_ptr, void ABGRToUVRow_Any_NEON(const uint8_t* src_ptr,
int src_stride, int src_stride,
uint8_t* dst_u, uint8_t* dst_u,
uint8_t* dst_v, uint8_t* dst_v,
int width); int width);
void ABGRToUVRow_Any_SVE2(const uint8_t* src_ptr,
int src_stride,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void RGBAToUVRow_Any_NEON(const uint8_t* src_ptr, void RGBAToUVRow_Any_NEON(const uint8_t* src_ptr,
int src_stride, int src_stride,
uint8_t* dst_u, uint8_t* dst_u,
uint8_t* dst_v, uint8_t* dst_v,
int width); int width);
void RGBAToUVRow_Any_SVE2(const uint8_t* src_ptr,
int src_stride,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void RGB24ToUVRow_Any_NEON(const uint8_t* src_ptr, void RGB24ToUVRow_Any_NEON(const uint8_t* src_ptr,
int src_stride, int src_stride,
uint8_t* dst_u, uint8_t* dst_u,
...@@ -1940,6 +2448,7 @@ void MirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width); ...@@ -1940,6 +2448,7 @@ void MirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width);
void MirrorRow_SSSE3(const uint8_t* src, uint8_t* dst, int width); void MirrorRow_SSSE3(const uint8_t* src, uint8_t* dst, int width);
void MirrorRow_NEON(const uint8_t* src, uint8_t* dst, int width); void MirrorRow_NEON(const uint8_t* src, uint8_t* dst, int width);
void MirrorRow_MSA(const uint8_t* src, uint8_t* dst, int width); void MirrorRow_MSA(const uint8_t* src, uint8_t* dst, int width);
void MirrorRow_LSX(const uint8_t* src, uint8_t* dst, int width);
void MirrorRow_LASX(const uint8_t* src, uint8_t* dst, int width); void MirrorRow_LASX(const uint8_t* src, uint8_t* dst, int width);
void MirrorRow_C(const uint8_t* src, uint8_t* dst, int width); void MirrorRow_C(const uint8_t* src, uint8_t* dst, int width);
void MirrorRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void MirrorRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
...@@ -1947,17 +2456,20 @@ void MirrorRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); ...@@ -1947,17 +2456,20 @@ void MirrorRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void MirrorRow_Any_SSE2(const uint8_t* src, uint8_t* dst, int width); void MirrorRow_Any_SSE2(const uint8_t* src, uint8_t* dst, int width);
void MirrorRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void MirrorRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void MirrorRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void MirrorRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void MirrorRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void MirrorRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void MirrorRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void MirrorUVRow_AVX2(const uint8_t* src_uv, uint8_t* dst_uv, int width); void MirrorUVRow_AVX2(const uint8_t* src_uv, uint8_t* dst_uv, int width);
void MirrorUVRow_SSSE3(const uint8_t* src_uv, uint8_t* dst_uv, int width); void MirrorUVRow_SSSE3(const uint8_t* src_uv, uint8_t* dst_uv, int width);
void MirrorUVRow_NEON(const uint8_t* src_uv, uint8_t* dst_uv, int width); void MirrorUVRow_NEON(const uint8_t* src_uv, uint8_t* dst_uv, int width);
void MirrorUVRow_MSA(const uint8_t* src_uv, uint8_t* dst_uv, int width); void MirrorUVRow_MSA(const uint8_t* src_uv, uint8_t* dst_uv, int width);
void MirrorUVRow_LSX(const uint8_t* src_uv, uint8_t* dst_uv, int width);
void MirrorUVRow_LASX(const uint8_t* src_uv, uint8_t* dst_uv, int width); void MirrorUVRow_LASX(const uint8_t* src_uv, uint8_t* dst_uv, int width);
void MirrorUVRow_C(const uint8_t* src_uv, uint8_t* dst_uv, int width); void MirrorUVRow_C(const uint8_t* src_uv, uint8_t* dst_uv, int width);
void MirrorUVRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void MirrorUVRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void MirrorUVRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void MirrorUVRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void MirrorUVRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void MirrorUVRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void MirrorUVRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void MirrorUVRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void MirrorUVRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void MirrorUVRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void MirrorUVRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void MirrorSplitUVRow_SSSE3(const uint8_t* src, void MirrorSplitUVRow_SSSE3(const uint8_t* src,
...@@ -1987,6 +2499,7 @@ void ARGBMirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width); ...@@ -1987,6 +2499,7 @@ void ARGBMirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width);
void ARGBMirrorRow_SSE2(const uint8_t* src, uint8_t* dst, int width); void ARGBMirrorRow_SSE2(const uint8_t* src, uint8_t* dst, int width);
void ARGBMirrorRow_NEON(const uint8_t* src_argb, uint8_t* dst_argb, int width); void ARGBMirrorRow_NEON(const uint8_t* src_argb, uint8_t* dst_argb, int width);
void ARGBMirrorRow_MSA(const uint8_t* src, uint8_t* dst, int width); void ARGBMirrorRow_MSA(const uint8_t* src, uint8_t* dst, int width);
void ARGBMirrorRow_LSX(const uint8_t* src, uint8_t* dst, int width);
void ARGBMirrorRow_LASX(const uint8_t* src, uint8_t* dst, int width); void ARGBMirrorRow_LASX(const uint8_t* src, uint8_t* dst, int width);
void ARGBMirrorRow_C(const uint8_t* src, uint8_t* dst, int width); void ARGBMirrorRow_C(const uint8_t* src, uint8_t* dst, int width);
void ARGBMirrorRow_Any_AVX2(const uint8_t* src_ptr, void ARGBMirrorRow_Any_AVX2(const uint8_t* src_ptr,
...@@ -1999,6 +2512,7 @@ void ARGBMirrorRow_Any_NEON(const uint8_t* src_ptr, ...@@ -1999,6 +2512,7 @@ void ARGBMirrorRow_Any_NEON(const uint8_t* src_ptr,
uint8_t* dst_ptr, uint8_t* dst_ptr,
int width); int width);
void ARGBMirrorRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGBMirrorRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void ARGBMirrorRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void ARGBMirrorRow_Any_LASX(const uint8_t* src_ptr, void ARGBMirrorRow_Any_LASX(const uint8_t* src_ptr,
uint8_t* dst_ptr, uint8_t* dst_ptr,
int width); int width);
...@@ -2041,6 +2555,10 @@ void SplitUVRow_LSX(const uint8_t* src_uv, ...@@ -2041,6 +2555,10 @@ void SplitUVRow_LSX(const uint8_t* src_uv,
uint8_t* dst_u, uint8_t* dst_u,
uint8_t* dst_v, uint8_t* dst_v,
int width); int width);
void SplitUVRow_RVV(const uint8_t* src_uv,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void SplitUVRow_Any_SSE2(const uint8_t* src_ptr, void SplitUVRow_Any_SSE2(const uint8_t* src_ptr,
uint8_t* dst_u, uint8_t* dst_u,
uint8_t* dst_v, uint8_t* dst_v,
...@@ -2202,6 +2720,10 @@ void MergeUVRow_LSX(const uint8_t* src_u, ...@@ -2202,6 +2720,10 @@ void MergeUVRow_LSX(const uint8_t* src_u,
const uint8_t* src_v, const uint8_t* src_v,
uint8_t* dst_uv, uint8_t* dst_uv,
int width); int width);
void MergeUVRow_RVV(const uint8_t* src_u,
const uint8_t* src_v,
uint8_t* dst_uv,
int width);
void MergeUVRow_Any_SSE2(const uint8_t* y_buf, void MergeUVRow_Any_SSE2(const uint8_t* y_buf,
const uint8_t* uv_buf, const uint8_t* uv_buf,
uint8_t* dst_ptr, uint8_t* dst_ptr,
...@@ -2265,7 +2787,12 @@ void SplitRGBRow_SSSE3(const uint8_t* src_rgb, ...@@ -2265,7 +2787,12 @@ void SplitRGBRow_SSSE3(const uint8_t* src_rgb,
uint8_t* dst_g, uint8_t* dst_g,
uint8_t* dst_b, uint8_t* dst_b,
int width); int width);
void SplitRGBRow_NEON(const uint8_t* src_rgb, void SplitRGBRow_NEON(const uint8_t* src_rgb,
uint8_t* dst_r,
uint8_t* dst_g,
uint8_t* dst_b,
int width);
void SplitRGBRow_RVV(const uint8_t* src_rgb,
uint8_t* dst_r, uint8_t* dst_r,
uint8_t* dst_g, uint8_t* dst_g,
uint8_t* dst_b, uint8_t* dst_b,
...@@ -2296,6 +2823,11 @@ void MergeRGBRow_NEON(const uint8_t* src_r, ...@@ -2296,6 +2823,11 @@ void MergeRGBRow_NEON(const uint8_t* src_r,
const uint8_t* src_b, const uint8_t* src_b,
uint8_t* dst_rgb, uint8_t* dst_rgb,
int width); int width);
void MergeRGBRow_RVV(const uint8_t* src_r,
const uint8_t* src_g,
const uint8_t* src_b,
uint8_t* dst_rgb,
int width);
void MergeRGBRow_Any_SSSE3(const uint8_t* y_buf, void MergeRGBRow_Any_SSSE3(const uint8_t* y_buf,
const uint8_t* u_buf, const uint8_t* u_buf,
const uint8_t* v_buf, const uint8_t* v_buf,
...@@ -2330,6 +2862,12 @@ void MergeARGBRow_NEON(const uint8_t* src_r, ...@@ -2330,6 +2862,12 @@ void MergeARGBRow_NEON(const uint8_t* src_r,
const uint8_t* src_a, const uint8_t* src_a,
uint8_t* dst_argb, uint8_t* dst_argb,
int width); int width);
void MergeARGBRow_RVV(const uint8_t* src_r,
const uint8_t* src_g,
const uint8_t* src_b,
const uint8_t* src_a,
uint8_t* dst_argb,
int width);
void MergeARGBRow_Any_SSE2(const uint8_t* y_buf, void MergeARGBRow_Any_SSE2(const uint8_t* y_buf,
const uint8_t* u_buf, const uint8_t* u_buf,
const uint8_t* v_buf, const uint8_t* v_buf,
...@@ -2378,6 +2916,12 @@ void SplitARGBRow_NEON(const uint8_t* src_rgba, ...@@ -2378,6 +2916,12 @@ void SplitARGBRow_NEON(const uint8_t* src_rgba,
uint8_t* dst_b, uint8_t* dst_b,
uint8_t* dst_a, uint8_t* dst_a,
int width); int width);
void SplitARGBRow_RVV(const uint8_t* src_rgba,
uint8_t* dst_r,
uint8_t* dst_g,
uint8_t* dst_b,
uint8_t* dst_a,
int width);
void SplitARGBRow_Any_SSE2(const uint8_t* src_ptr, void SplitARGBRow_Any_SSE2(const uint8_t* src_ptr,
uint8_t* dst_r, uint8_t* dst_r,
uint8_t* dst_g, uint8_t* dst_g,
...@@ -2422,6 +2966,11 @@ void MergeXRGBRow_NEON(const uint8_t* src_r, ...@@ -2422,6 +2966,11 @@ void MergeXRGBRow_NEON(const uint8_t* src_r,
const uint8_t* src_b, const uint8_t* src_b,
uint8_t* dst_argb, uint8_t* dst_argb,
int width); int width);
void MergeXRGBRow_RVV(const uint8_t* src_r,
const uint8_t* src_g,
const uint8_t* src_b,
uint8_t* dst_argb,
int width);
void MergeXRGBRow_Any_SSE2(const uint8_t* y_buf, void MergeXRGBRow_Any_SSE2(const uint8_t* y_buf,
const uint8_t* u_buf, const uint8_t* u_buf,
const uint8_t* v_buf, const uint8_t* v_buf,
...@@ -2462,6 +3011,11 @@ void SplitXRGBRow_NEON(const uint8_t* src_rgba, ...@@ -2462,6 +3011,11 @@ void SplitXRGBRow_NEON(const uint8_t* src_rgba,
uint8_t* dst_g, uint8_t* dst_g,
uint8_t* dst_b, uint8_t* dst_b,
int width); int width);
void SplitXRGBRow_RVV(const uint8_t* src_rgba,
uint8_t* dst_r,
uint8_t* dst_g,
uint8_t* dst_b,
int width);
void SplitXRGBRow_Any_SSE2(const uint8_t* src_ptr, void SplitXRGBRow_Any_SSE2(const uint8_t* src_ptr,
uint8_t* dst_r, uint8_t* dst_r,
uint8_t* dst_g, uint8_t* dst_g,
...@@ -2805,6 +3359,7 @@ void CopyRow_AVX(const uint8_t* src, uint8_t* dst, int width); ...@@ -2805,6 +3359,7 @@ void CopyRow_AVX(const uint8_t* src, uint8_t* dst, int width);
void CopyRow_ERMS(const uint8_t* src, uint8_t* dst, int width); void CopyRow_ERMS(const uint8_t* src, uint8_t* dst, int width);
void CopyRow_NEON(const uint8_t* src, uint8_t* dst, int width); void CopyRow_NEON(const uint8_t* src, uint8_t* dst, int width);
void CopyRow_MIPS(const uint8_t* src, uint8_t* dst, int count); void CopyRow_MIPS(const uint8_t* src, uint8_t* dst, int count);
void CopyRow_RVV(const uint8_t* src, uint8_t* dst, int count);
void CopyRow_C(const uint8_t* src, uint8_t* dst, int count); void CopyRow_C(const uint8_t* src, uint8_t* dst, int count);
void CopyRow_Any_SSE2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void CopyRow_Any_SSE2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void CopyRow_Any_AVX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void CopyRow_Any_AVX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
...@@ -2838,6 +3393,9 @@ void ARGBExtractAlphaRow_MSA(const uint8_t* src_argb, ...@@ -2838,6 +3393,9 @@ void ARGBExtractAlphaRow_MSA(const uint8_t* src_argb,
void ARGBExtractAlphaRow_LSX(const uint8_t* src_argb, void ARGBExtractAlphaRow_LSX(const uint8_t* src_argb,
uint8_t* dst_a, uint8_t* dst_a,
int width); int width);
void ARGBExtractAlphaRow_RVV(const uint8_t* src_argb,
uint8_t* dst_a,
int width);
void ARGBExtractAlphaRow_Any_SSE2(const uint8_t* src_ptr, void ARGBExtractAlphaRow_Any_SSE2(const uint8_t* src_ptr,
uint8_t* dst_ptr, uint8_t* dst_ptr,
int width); int width);
...@@ -2857,6 +3415,7 @@ void ARGBExtractAlphaRow_Any_LSX(const uint8_t* src_ptr, ...@@ -2857,6 +3415,7 @@ void ARGBExtractAlphaRow_Any_LSX(const uint8_t* src_ptr,
void ARGBCopyYToAlphaRow_C(const uint8_t* src, uint8_t* dst, int width); void ARGBCopyYToAlphaRow_C(const uint8_t* src, uint8_t* dst, int width);
void ARGBCopyYToAlphaRow_SSE2(const uint8_t* src, uint8_t* dst, int width); void ARGBCopyYToAlphaRow_SSE2(const uint8_t* src, uint8_t* dst, int width);
void ARGBCopyYToAlphaRow_AVX2(const uint8_t* src, uint8_t* dst, int width); void ARGBCopyYToAlphaRow_AVX2(const uint8_t* src, uint8_t* dst, int width);
void ARGBCopyYToAlphaRow_RVV(const uint8_t* src, uint8_t* dst, int width);
void ARGBCopyYToAlphaRow_Any_SSE2(const uint8_t* src_ptr, void ARGBCopyYToAlphaRow_Any_SSE2(const uint8_t* src_ptr,
uint8_t* dst_ptr, uint8_t* dst_ptr,
int width); int width);
...@@ -2904,6 +3463,10 @@ void ARGBShuffleRow_MSA(const uint8_t* src_argb, ...@@ -2904,6 +3463,10 @@ void ARGBShuffleRow_MSA(const uint8_t* src_argb,
uint8_t* dst_argb, uint8_t* dst_argb,
const uint8_t* shuffler, const uint8_t* shuffler,
int width); int width);
void ARGBShuffleRow_LSX(const uint8_t* src_argb,
uint8_t* dst_argb,
const uint8_t* shuffler,
int width);
void ARGBShuffleRow_LASX(const uint8_t* src_argb, void ARGBShuffleRow_LASX(const uint8_t* src_argb,
uint8_t* dst_argb, uint8_t* dst_argb,
const uint8_t* shuffler, const uint8_t* shuffler,
...@@ -2924,6 +3487,10 @@ void ARGBShuffleRow_Any_MSA(const uint8_t* src_ptr, ...@@ -2924,6 +3487,10 @@ void ARGBShuffleRow_Any_MSA(const uint8_t* src_ptr,
uint8_t* dst_ptr, uint8_t* dst_ptr,
const uint8_t* param, const uint8_t* param,
int width); int width);
void ARGBShuffleRow_Any_LSX(const uint8_t* src_ptr,
uint8_t* dst_ptr,
const uint8_t* param,
int width);
void ARGBShuffleRow_Any_LASX(const uint8_t* src_ptr, void ARGBShuffleRow_Any_LASX(const uint8_t* src_ptr,
uint8_t* dst_ptr, uint8_t* dst_ptr,
const uint8_t* param, const uint8_t* param,
...@@ -2951,19 +3518,29 @@ void ARGB4444ToARGBRow_AVX2(const uint8_t* src_argb4444, ...@@ -2951,19 +3518,29 @@ void ARGB4444ToARGBRow_AVX2(const uint8_t* src_argb4444,
void RGB24ToARGBRow_NEON(const uint8_t* src_rgb24, void RGB24ToARGBRow_NEON(const uint8_t* src_rgb24,
uint8_t* dst_argb, uint8_t* dst_argb,
int width); int width);
void RGB24ToARGBRow_SVE2(const uint8_t* src_rgb24,
uint8_t* dst_argb,
int width);
void RGB24ToARGBRow_MSA(const uint8_t* src_rgb24, uint8_t* dst_argb, int width); void RGB24ToARGBRow_MSA(const uint8_t* src_rgb24, uint8_t* dst_argb, int width);
void RGB24ToARGBRow_LSX(const uint8_t* src_rgb24, uint8_t* dst_argb, int width); void RGB24ToARGBRow_LSX(const uint8_t* src_rgb24, uint8_t* dst_argb, int width);
void RGB24ToARGBRow_LASX(const uint8_t* src_rgb24, void RGB24ToARGBRow_LASX(const uint8_t* src_rgb24,
uint8_t* dst_argb, uint8_t* dst_argb,
int width); int width);
void RGB24ToARGBRow_RVV(const uint8_t* src_rgb24, uint8_t* dst_argb, int width);
void RAWToARGBRow_NEON(const uint8_t* src_raw, uint8_t* dst_argb, int width); void RAWToARGBRow_NEON(const uint8_t* src_raw, uint8_t* dst_argb, int width);
void RAWToARGBRow_SVE2(const uint8_t* src_raw, uint8_t* dst_argb, int width);
void RAWToRGBARow_NEON(const uint8_t* src_raw, uint8_t* dst_rgba, int width); void RAWToRGBARow_NEON(const uint8_t* src_raw, uint8_t* dst_rgba, int width);
void RAWToRGBARow_SVE2(const uint8_t* src_raw, uint8_t* dst_rgba, int width);
void RAWToARGBRow_MSA(const uint8_t* src_raw, uint8_t* dst_argb, int width); void RAWToARGBRow_MSA(const uint8_t* src_raw, uint8_t* dst_argb, int width);
void RAWToARGBRow_LSX(const uint8_t* src_raw, uint8_t* dst_argb, int width); void RAWToARGBRow_LSX(const uint8_t* src_raw, uint8_t* dst_argb, int width);
void RAWToARGBRow_LASX(const uint8_t* src_raw, uint8_t* dst_argb, int width); void RAWToARGBRow_LASX(const uint8_t* src_raw, uint8_t* dst_argb, int width);
void RAWToARGBRow_RVV(const uint8_t* src_raw, uint8_t* dst_argb, int width);
void RAWToRGBARow_RVV(const uint8_t* src_raw, uint8_t* dst_rgba, int width);
void RAWToRGB24Row_NEON(const uint8_t* src_raw, uint8_t* dst_rgb24, int width); void RAWToRGB24Row_NEON(const uint8_t* src_raw, uint8_t* dst_rgb24, int width);
void RAWToRGB24Row_SVE2(const uint8_t* src_raw, uint8_t* dst_rgb24, int width);
void RAWToRGB24Row_MSA(const uint8_t* src_raw, uint8_t* dst_rgb24, int width); void RAWToRGB24Row_MSA(const uint8_t* src_raw, uint8_t* dst_rgb24, int width);
void RAWToRGB24Row_LSX(const uint8_t* src_raw, uint8_t* dst_rgb24, int width); void RAWToRGB24Row_LSX(const uint8_t* src_raw, uint8_t* dst_rgb24, int width);
void RAWToRGB24Row_RVV(const uint8_t* src_raw, uint8_t* dst_rgb24, int width);
void RGB565ToARGBRow_NEON(const uint8_t* src_rgb565, void RGB565ToARGBRow_NEON(const uint8_t* src_rgb565,
uint8_t* dst_argb, uint8_t* dst_argb,
int width); int width);
...@@ -2979,6 +3556,9 @@ void RGB565ToARGBRow_LASX(const uint8_t* src_rgb565, ...@@ -2979,6 +3556,9 @@ void RGB565ToARGBRow_LASX(const uint8_t* src_rgb565,
void ARGB1555ToARGBRow_NEON(const uint8_t* src_argb1555, void ARGB1555ToARGBRow_NEON(const uint8_t* src_argb1555,
uint8_t* dst_argb, uint8_t* dst_argb,
int width); int width);
void ARGB1555ToARGBRow_SVE2(const uint8_t* src_argb1555,
uint8_t* dst_argb,
int width);
void ARGB1555ToARGBRow_MSA(const uint8_t* src_argb1555, void ARGB1555ToARGBRow_MSA(const uint8_t* src_argb1555,
uint8_t* dst_argb, uint8_t* dst_argb,
int width); int width);
...@@ -3123,15 +3703,15 @@ void ARGBToRGB24Row_AVX512VBMI(const uint8_t* src, uint8_t* dst, int width); ...@@ -3123,15 +3703,15 @@ void ARGBToRGB24Row_AVX512VBMI(const uint8_t* src, uint8_t* dst, int width);
void ARGBToRGB565DitherRow_C(const uint8_t* src_argb, void ARGBToRGB565DitherRow_C(const uint8_t* src_argb,
uint8_t* dst_rgb, uint8_t* dst_rgb,
const uint32_t dither4, uint32_t dither4,
int width); int width);
void ARGBToRGB565DitherRow_SSE2(const uint8_t* src, void ARGBToRGB565DitherRow_SSE2(const uint8_t* src,
uint8_t* dst, uint8_t* dst,
const uint32_t dither4, uint32_t dither4,
int width); int width);
void ARGBToRGB565DitherRow_AVX2(const uint8_t* src, void ARGBToRGB565DitherRow_AVX2(const uint8_t* src,
uint8_t* dst, uint8_t* dst,
const uint32_t dither4, uint32_t dither4,
int width); int width);
void ARGBToRGB565Row_AVX2(const uint8_t* src_argb, uint8_t* dst_rgb, int width); void ARGBToRGB565Row_AVX2(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
...@@ -3147,10 +3727,17 @@ void ARGBToAR30Row_AVX2(const uint8_t* src, uint8_t* dst, int width); ...@@ -3147,10 +3727,17 @@ void ARGBToAR30Row_AVX2(const uint8_t* src, uint8_t* dst, int width);
void ARGBToRGB24Row_NEON(const uint8_t* src_argb, void ARGBToRGB24Row_NEON(const uint8_t* src_argb,
uint8_t* dst_rgb24, uint8_t* dst_rgb24,
int width); int width);
void ARGBToRGB24Row_SVE2(const uint8_t* src_argb,
uint8_t* dst_rgb24,
int width);
void ARGBToRAWRow_NEON(const uint8_t* src_argb, uint8_t* dst_raw, int width); void ARGBToRAWRow_NEON(const uint8_t* src_argb, uint8_t* dst_raw, int width);
void ARGBToRAWRow_SVE2(const uint8_t* src_argb, uint8_t* dst_raw, int width);
void ARGBToRGB565Row_NEON(const uint8_t* src_argb, void ARGBToRGB565Row_NEON(const uint8_t* src_argb,
uint8_t* dst_rgb565, uint8_t* dst_rgb565,
int width); int width);
void ARGBToRGB565Row_SVE2(const uint8_t* src_argb,
uint8_t* dst_rgb565,
int width);
void ARGBToARGB1555Row_NEON(const uint8_t* src_argb, void ARGBToARGB1555Row_NEON(const uint8_t* src_argb,
uint8_t* dst_argb1555, uint8_t* dst_argb1555,
int width); int width);
...@@ -3159,7 +3746,11 @@ void ARGBToARGB4444Row_NEON(const uint8_t* src_argb, ...@@ -3159,7 +3746,11 @@ void ARGBToARGB4444Row_NEON(const uint8_t* src_argb,
int width); int width);
void ARGBToRGB565DitherRow_NEON(const uint8_t* src_argb, void ARGBToRGB565DitherRow_NEON(const uint8_t* src_argb,
uint8_t* dst_rgb, uint8_t* dst_rgb,
const uint32_t dither4, uint32_t dither4,
int width);
void ARGBToRGB565DitherRow_SVE2(const uint8_t* src_argb,
uint8_t* dst_rgb,
uint32_t dither4,
int width); int width);
void ARGBToRGB24Row_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width); void ARGBToRGB24Row_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
void ARGBToRAWRow_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width); void ARGBToRAWRow_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
...@@ -3172,23 +3763,44 @@ void ARGBToARGB4444Row_MSA(const uint8_t* src_argb, ...@@ -3172,23 +3763,44 @@ void ARGBToARGB4444Row_MSA(const uint8_t* src_argb,
int width); int width);
void ARGBToRGB565DitherRow_MSA(const uint8_t* src_argb, void ARGBToRGB565DitherRow_MSA(const uint8_t* src_argb,
uint8_t* dst_rgb, uint8_t* dst_rgb,
const uint32_t dither4, uint32_t dither4,
int width);
void ARGBToRGB565DitherRow_LSX(const uint8_t* src_argb,
uint8_t* dst_rgb,
uint32_t dither4,
int width); int width);
void ARGBToRGB565DitherRow_LASX(const uint8_t* src_argb, void ARGBToRGB565DitherRow_LASX(const uint8_t* src_argb,
uint8_t* dst_rgb, uint8_t* dst_rgb,
const uint32_t dither4, uint32_t dither4,
int width); int width);
void ARGBToRGB24Row_LSX(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
void ARGBToRGB24Row_LASX(const uint8_t* src_argb, uint8_t* dst_rgb, int width); void ARGBToRGB24Row_LASX(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
void ARGBToRAWRow_LSX(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
void ARGBToRAWRow_LASX(const uint8_t* src_argb, uint8_t* dst_rgb, int width); void ARGBToRAWRow_LASX(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
void ARGBToRGB565Row_LSX(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
void ARGBToRGB565Row_LASX(const uint8_t* src_argb, uint8_t* dst_rgb, int width); void ARGBToRGB565Row_LASX(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
void ARGBToARGB1555Row_LSX(const uint8_t* src_argb,
uint8_t* dst_rgb,
int width);
void ARGBToARGB1555Row_LASX(const uint8_t* src_argb, void ARGBToARGB1555Row_LASX(const uint8_t* src_argb,
uint8_t* dst_rgb, uint8_t* dst_rgb,
int width); int width);
void ARGBToARGB4444Row_LSX(const uint8_t* src_argb,
uint8_t* dst_rgb,
int width);
void ARGBToARGB4444Row_LASX(const uint8_t* src_argb, void ARGBToARGB4444Row_LASX(const uint8_t* src_argb,
uint8_t* dst_rgb, uint8_t* dst_rgb,
int width); int width);
void ARGBToRAWRow_RVV(const uint8_t* src_argb, uint8_t* dst_raw, int width);
void ARGBToABGRRow_RVV(const uint8_t* src_argb, uint8_t* dst_abgr, int width);
void ARGBToBGRARow_RVV(const uint8_t* src_argb, uint8_t* dst_rgba, int width);
void ARGBToRGBARow_RVV(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
void ARGBToRGB24Row_RVV(const uint8_t* src_argb, uint8_t* dst_rgb24, int width);
void ARGBToABGRRow_C(const uint8_t* src_argb, uint8_t* dst_abgr, int width);
void ARGBToBGRARow_C(const uint8_t* src_argb, uint8_t* dst_bgra, int width);
void ARGBToRGBARow_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width); void ARGBToRGBARow_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
void ARGBToRGB24Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width); void ARGBToRGB24Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
void ARGBToRAWRow_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width); void ARGBToRAWRow_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
...@@ -3202,6 +3814,8 @@ void ARGBToAR64Row_C(const uint8_t* src_argb, uint16_t* dst_ar64, int width); ...@@ -3202,6 +3814,8 @@ void ARGBToAR64Row_C(const uint8_t* src_argb, uint16_t* dst_ar64, int width);
void ARGBToAB64Row_C(const uint8_t* src_argb, uint16_t* dst_ab64, int width); void ARGBToAB64Row_C(const uint8_t* src_argb, uint16_t* dst_ab64, int width);
void AR64ToARGBRow_C(const uint16_t* src_ar64, uint8_t* dst_argb, int width); void AR64ToARGBRow_C(const uint16_t* src_ar64, uint8_t* dst_argb, int width);
void AB64ToARGBRow_C(const uint16_t* src_ab64, uint8_t* dst_argb, int width); void AB64ToARGBRow_C(const uint16_t* src_ab64, uint8_t* dst_argb, int width);
void AR64ToAB64Row_C(const uint16_t* src_ar64, uint16_t* dst_ab64, int width);
void RGBAToARGBRow_C(const uint8_t* src_rgba, uint8_t* dst_argb, int width);
void AR64ShuffleRow_C(const uint8_t* src_ar64, void AR64ShuffleRow_C(const uint8_t* src_ar64,
uint8_t* dst_ar64, uint8_t* dst_ar64,
const uint8_t* shuffler, const uint8_t* shuffler,
...@@ -3226,6 +3840,12 @@ void ARGBToAR64Row_NEON(const uint8_t* src_argb, uint16_t* dst_ar64, int width); ...@@ -3226,6 +3840,12 @@ void ARGBToAR64Row_NEON(const uint8_t* src_argb, uint16_t* dst_ar64, int width);
void ARGBToAB64Row_NEON(const uint8_t* src_argb, uint16_t* dst_ab64, int width); void ARGBToAB64Row_NEON(const uint8_t* src_argb, uint16_t* dst_ab64, int width);
void AR64ToARGBRow_NEON(const uint16_t* src_ar64, uint8_t* dst_argb, int width); void AR64ToARGBRow_NEON(const uint16_t* src_ar64, uint8_t* dst_argb, int width);
void AB64ToARGBRow_NEON(const uint16_t* src_ab64, uint8_t* dst_argb, int width); void AB64ToARGBRow_NEON(const uint16_t* src_ab64, uint8_t* dst_argb, int width);
void ARGBToAR64Row_RVV(const uint8_t* src_argb, uint16_t* dst_ar64, int width);
void ARGBToAB64Row_RVV(const uint8_t* src_argb, uint16_t* dst_ab64, int width);
void AR64ToARGBRow_RVV(const uint16_t* src_ar64, uint8_t* dst_argb, int width);
void AB64ToARGBRow_RVV(const uint16_t* src_ab64, uint8_t* dst_argb, int width);
void AR64ToAB64Row_RVV(const uint16_t* src_ar64, uint16_t* dst_ab64, int width);
void RGBAToARGBRow_RVV(const uint8_t* src_rgba, uint8_t* dst_argb, int width);
void ARGBToAR64Row_Any_SSSE3(const uint8_t* src_ptr, void ARGBToAR64Row_Any_SSSE3(const uint8_t* src_ptr,
uint16_t* dst_ptr, uint16_t* dst_ptr,
int width); int width);
...@@ -3268,6 +3888,7 @@ void J400ToARGBRow_AVX2(const uint8_t* src_y, uint8_t* dst_argb, int width); ...@@ -3268,6 +3888,7 @@ void J400ToARGBRow_AVX2(const uint8_t* src_y, uint8_t* dst_argb, int width);
void J400ToARGBRow_NEON(const uint8_t* src_y, uint8_t* dst_argb, int width); void J400ToARGBRow_NEON(const uint8_t* src_y, uint8_t* dst_argb, int width);
void J400ToARGBRow_MSA(const uint8_t* src_y, uint8_t* dst_argb, int width); void J400ToARGBRow_MSA(const uint8_t* src_y, uint8_t* dst_argb, int width);
void J400ToARGBRow_LSX(const uint8_t* src_y, uint8_t* dst_argb, int width); void J400ToARGBRow_LSX(const uint8_t* src_y, uint8_t* dst_argb, int width);
void J400ToARGBRow_RVV(const uint8_t* src_y, uint8_t* dst_argb, int width);
void J400ToARGBRow_C(const uint8_t* src_y, uint8_t* dst_argb, int width); void J400ToARGBRow_C(const uint8_t* src_y, uint8_t* dst_argb, int width);
void J400ToARGBRow_Any_SSE2(const uint8_t* src_ptr, void J400ToARGBRow_Any_SSE2(const uint8_t* src_ptr,
uint8_t* dst_ptr, uint8_t* dst_ptr,
...@@ -4189,6 +4810,10 @@ void I400ToARGBRow_NEON(const uint8_t* src_y, ...@@ -4189,6 +4810,10 @@ void I400ToARGBRow_NEON(const uint8_t* src_y,
uint8_t* dst_argb, uint8_t* dst_argb,
const struct YuvConstants* yuvconstants, const struct YuvConstants* yuvconstants,
int width); int width);
void I400ToARGBRow_SVE2(const uint8_t* src_y,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void I400ToARGBRow_MSA(const uint8_t* src_y, void I400ToARGBRow_MSA(const uint8_t* src_y,
uint8_t* dst_argb, uint8_t* dst_argb,
const struct YuvConstants* yuvconstants, const struct YuvConstants* yuvconstants,
...@@ -4197,6 +4822,10 @@ void I400ToARGBRow_LSX(const uint8_t* src_y, ...@@ -4197,6 +4822,10 @@ void I400ToARGBRow_LSX(const uint8_t* src_y,
uint8_t* dst_argb, uint8_t* dst_argb,
const struct YuvConstants* yuvconstants, const struct YuvConstants* yuvconstants,
int width); int width);
void I400ToARGBRow_RVV(const uint8_t* src_y,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void I400ToARGBRow_Any_SSE2(const uint8_t* src_ptr, void I400ToARGBRow_Any_SSE2(const uint8_t* src_ptr,
uint8_t* dst_ptr, uint8_t* dst_ptr,
const struct YuvConstants* param, const struct YuvConstants* param,
...@@ -4235,6 +4864,10 @@ void ARGBBlendRow_LSX(const uint8_t* src_argb0, ...@@ -4235,6 +4864,10 @@ void ARGBBlendRow_LSX(const uint8_t* src_argb0,
const uint8_t* src_argb1, const uint8_t* src_argb1,
uint8_t* dst_argb, uint8_t* dst_argb,
int width); int width);
void ARGBBlendRow_RVV(const uint8_t* src_argb0,
const uint8_t* src_argb1,
uint8_t* dst_argb,
int width);
void ARGBBlendRow_C(const uint8_t* src_argb, void ARGBBlendRow_C(const uint8_t* src_argb,
const uint8_t* src_argb1, const uint8_t* src_argb1,
uint8_t* dst_argb, uint8_t* dst_argb,
...@@ -4261,6 +4894,11 @@ void BlendPlaneRow_Any_AVX2(const uint8_t* y_buf, ...@@ -4261,6 +4894,11 @@ void BlendPlaneRow_Any_AVX2(const uint8_t* y_buf,
const uint8_t* v_buf, const uint8_t* v_buf,
uint8_t* dst_ptr, uint8_t* dst_ptr,
int width); int width);
void BlendPlaneRow_RVV(const uint8_t* src0,
const uint8_t* src1,
const uint8_t* alpha,
uint8_t* dst,
int width);
void BlendPlaneRow_C(const uint8_t* src0, void BlendPlaneRow_C(const uint8_t* src0,
const uint8_t* src1, const uint8_t* src1,
const uint8_t* alpha, const uint8_t* alpha,
...@@ -4305,10 +4943,18 @@ void ARGBMultiplyRow_Any_MSA(const uint8_t* y_buf, ...@@ -4305,10 +4943,18 @@ void ARGBMultiplyRow_Any_MSA(const uint8_t* y_buf,
const uint8_t* uv_buf, const uint8_t* uv_buf,
uint8_t* dst_ptr, uint8_t* dst_ptr,
int width); int width);
void ARGBMultiplyRow_LSX(const uint8_t* src_argb0,
const uint8_t* src_argb1,
uint8_t* dst_argb,
int width);
void ARGBMultiplyRow_LASX(const uint8_t* src_argb0, void ARGBMultiplyRow_LASX(const uint8_t* src_argb0,
const uint8_t* src_argb1, const uint8_t* src_argb1,
uint8_t* dst_argb, uint8_t* dst_argb,
int width); int width);
void ARGBMultiplyRow_Any_LSX(const uint8_t* y_buf,
const uint8_t* uv_buf,
uint8_t* dst_ptr,
int width);
void ARGBMultiplyRow_Any_LASX(const uint8_t* y_buf, void ARGBMultiplyRow_Any_LASX(const uint8_t* y_buf,
const uint8_t* uv_buf, const uint8_t* uv_buf,
uint8_t* dst_ptr, uint8_t* dst_ptr,
...@@ -4351,10 +4997,18 @@ void ARGBAddRow_Any_MSA(const uint8_t* y_buf, ...@@ -4351,10 +4997,18 @@ void ARGBAddRow_Any_MSA(const uint8_t* y_buf,
const uint8_t* uv_buf, const uint8_t* uv_buf,
uint8_t* dst_ptr, uint8_t* dst_ptr,
int width); int width);
void ARGBAddRow_LSX(const uint8_t* src_argb0,
const uint8_t* src_argb1,
uint8_t* dst_argb,
int width);
void ARGBAddRow_LASX(const uint8_t* src_argb0, void ARGBAddRow_LASX(const uint8_t* src_argb0,
const uint8_t* src_argb1, const uint8_t* src_argb1,
uint8_t* dst_argb, uint8_t* dst_argb,
int width); int width);
void ARGBAddRow_Any_LSX(const uint8_t* y_buf,
const uint8_t* uv_buf,
uint8_t* dst_ptr,
int width);
void ARGBAddRow_Any_LASX(const uint8_t* y_buf, void ARGBAddRow_Any_LASX(const uint8_t* y_buf,
const uint8_t* uv_buf, const uint8_t* uv_buf,
uint8_t* dst_ptr, uint8_t* dst_ptr,
...@@ -4398,10 +5052,18 @@ void ARGBSubtractRow_Any_MSA(const uint8_t* y_buf, ...@@ -4398,10 +5052,18 @@ void ARGBSubtractRow_Any_MSA(const uint8_t* y_buf,
const uint8_t* uv_buf, const uint8_t* uv_buf,
uint8_t* dst_ptr, uint8_t* dst_ptr,
int width); int width);
void ARGBSubtractRow_LSX(const uint8_t* src_argb0,
const uint8_t* src_argb1,
uint8_t* dst_argb,
int width);
void ARGBSubtractRow_LASX(const uint8_t* src_argb0, void ARGBSubtractRow_LASX(const uint8_t* src_argb0,
const uint8_t* src_argb1, const uint8_t* src_argb1,
uint8_t* dst_argb, uint8_t* dst_argb,
int width); int width);
void ARGBSubtractRow_Any_LSX(const uint8_t* y_buf,
const uint8_t* uv_buf,
uint8_t* dst_ptr,
int width);
void ARGBSubtractRow_Any_LASX(const uint8_t* y_buf, void ARGBSubtractRow_Any_LASX(const uint8_t* y_buf,
const uint8_t* uv_buf, const uint8_t* uv_buf,
uint8_t* dst_ptr, uint8_t* dst_ptr,
...@@ -4494,21 +5156,37 @@ void ARGBToRGB565DitherRow_Any_MSA(const uint8_t* src_ptr, ...@@ -4494,21 +5156,37 @@ void ARGBToRGB565DitherRow_Any_MSA(const uint8_t* src_ptr,
uint8_t* dst_ptr, uint8_t* dst_ptr,
const uint32_t param, const uint32_t param,
int width); int width);
void ARGBToRGB565DitherRow_Any_LSX(const uint8_t* src_ptr,
uint8_t* dst_ptr,
const uint32_t param,
int width);
void ARGBToRGB565DitherRow_Any_LASX(const uint8_t* src_ptr, void ARGBToRGB565DitherRow_Any_LASX(const uint8_t* src_ptr,
uint8_t* dst_ptr, uint8_t* dst_ptr,
const uint32_t param, const uint32_t param,
int width); int width);
void ARGBToRGB24Row_Any_LSX(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int width);
void ARGBToRGB24Row_Any_LASX(const uint8_t* src_ptr, void ARGBToRGB24Row_Any_LASX(const uint8_t* src_ptr,
uint8_t* dst_ptr, uint8_t* dst_ptr,
int width); int width);
void ARGBToRAWRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void ARGBToRAWRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGBToRAWRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void ARGBToRGB565Row_Any_LSX(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int width);
void ARGBToRGB565Row_Any_LASX(const uint8_t* src_ptr, void ARGBToRGB565Row_Any_LASX(const uint8_t* src_ptr,
uint8_t* dst_ptr, uint8_t* dst_ptr,
int width); int width);
void ARGBToARGB1555Row_Any_LSX(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int width);
void ARGBToARGB1555Row_Any_LASX(const uint8_t* src_ptr, void ARGBToARGB1555Row_Any_LASX(const uint8_t* src_ptr,
uint8_t* dst_ptr, uint8_t* dst_ptr,
int width); int width);
void ARGBToARGB4444Row_Any_LSX(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int width);
void ARGBToARGB4444Row_Any_LASX(const uint8_t* src_ptr, void ARGBToARGB4444Row_Any_LASX(const uint8_t* src_ptr,
uint8_t* dst_ptr, uint8_t* dst_ptr,
int width); int width);
...@@ -4531,6 +5209,42 @@ void I422ToARGBRow_Any_NEON(const uint8_t* y_buf, ...@@ -4531,6 +5209,42 @@ void I422ToARGBRow_Any_NEON(const uint8_t* y_buf,
uint8_t* dst_ptr, uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants, const struct YuvConstants* yuvconstants,
int width); int width);
void I210ToARGBRow_Any_NEON(const uint16_t* y_buf,
const uint16_t* u_buf,
const uint16_t* v_buf,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
void I410ToARGBRow_Any_NEON(const uint16_t* y_buf,
const uint16_t* u_buf,
const uint16_t* v_buf,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
void I210ToAR30Row_Any_NEON(const uint16_t* y_buf,
const uint16_t* u_buf,
const uint16_t* v_buf,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
void I410ToAR30Row_Any_NEON(const uint16_t* y_buf,
const uint16_t* u_buf,
const uint16_t* v_buf,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
void I212ToARGBRow_Any_NEON(const uint16_t* y_buf,
const uint16_t* u_buf,
const uint16_t* v_buf,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
void I212ToAR30Row_Any_NEON(const uint16_t* y_buf,
const uint16_t* u_buf,
const uint16_t* v_buf,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
void I444AlphaToARGBRow_Any_NEON(const uint8_t* y_buf, void I444AlphaToARGBRow_Any_NEON(const uint8_t* y_buf,
const uint8_t* u_buf, const uint8_t* u_buf,
const uint8_t* v_buf, const uint8_t* v_buf,
...@@ -4545,6 +5259,20 @@ void I422AlphaToARGBRow_Any_NEON(const uint8_t* y_buf, ...@@ -4545,6 +5259,20 @@ void I422AlphaToARGBRow_Any_NEON(const uint8_t* y_buf,
uint8_t* dst_ptr, uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants, const struct YuvConstants* yuvconstants,
int width); int width);
void I410AlphaToARGBRow_Any_NEON(const uint16_t* y_buf,
const uint16_t* u_buf,
const uint16_t* v_buf,
const uint16_t* a_buf,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
void I210AlphaToARGBRow_Any_NEON(const uint16_t* y_buf,
const uint16_t* u_buf,
const uint16_t* v_buf,
const uint16_t* a_buf,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
void I422ToRGBARow_Any_NEON(const uint8_t* y_buf, void I422ToRGBARow_Any_NEON(const uint8_t* y_buf,
const uint8_t* u_buf, const uint8_t* u_buf,
const uint8_t* v_buf, const uint8_t* v_buf,
...@@ -4575,6 +5303,12 @@ void I422ToRGB565Row_Any_NEON(const uint8_t* y_buf, ...@@ -4575,6 +5303,12 @@ void I422ToRGB565Row_Any_NEON(const uint8_t* y_buf,
uint8_t* dst_ptr, uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants, const struct YuvConstants* yuvconstants,
int width); int width);
void I422ToAR30Row_Any_NEON(const uint8_t* y_buf,
const uint8_t* u_buf,
const uint8_t* v_buf,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
void NV12ToARGBRow_Any_NEON(const uint8_t* y_buf, void NV12ToARGBRow_Any_NEON(const uint8_t* y_buf,
const uint8_t* uv_buf, const uint8_t* uv_buf,
uint8_t* dst_ptr, uint8_t* dst_ptr,
...@@ -4612,6 +5346,14 @@ void UYVYToARGBRow_Any_NEON(const uint8_t* src_ptr, ...@@ -4612,6 +5346,14 @@ void UYVYToARGBRow_Any_NEON(const uint8_t* src_ptr,
uint8_t* dst_ptr, uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants, const struct YuvConstants* yuvconstants,
int width); int width);
void ARGBToAR30Row_NEON(const uint8_t* src, uint8_t* dst, int width);
void ABGRToAR30Row_NEON(const uint8_t* src, uint8_t* dst, int width);
void ABGRToAR30Row_Any_NEON(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int width);
void ARGBToAR30Row_Any_NEON(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int width);
void P210ToARGBRow_NEON(const uint16_t* y_buf, void P210ToARGBRow_NEON(const uint16_t* y_buf,
const uint16_t* uv_buf, const uint16_t* uv_buf,
uint8_t* dst_argb, uint8_t* dst_argb,
...@@ -4670,6 +5412,12 @@ void I422ToARGBRow_Any_MSA(const uint8_t* y_buf, ...@@ -4670,6 +5412,12 @@ void I422ToARGBRow_Any_MSA(const uint8_t* y_buf,
uint8_t* dst_ptr, uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants, const struct YuvConstants* yuvconstants,
int width); int width);
void I422ToARGBRow_Any_LSX(const uint8_t* y_buf,
const uint8_t* u_buf,
const uint8_t* v_buf,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
void I422ToARGBRow_Any_LASX(const uint8_t* y_buf, void I422ToARGBRow_Any_LASX(const uint8_t* y_buf,
const uint8_t* u_buf, const uint8_t* u_buf,
const uint8_t* v_buf, const uint8_t* v_buf,
...@@ -4682,6 +5430,12 @@ void I422ToRGBARow_Any_MSA(const uint8_t* y_buf, ...@@ -4682,6 +5430,12 @@ void I422ToRGBARow_Any_MSA(const uint8_t* y_buf,
uint8_t* dst_ptr, uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants, const struct YuvConstants* yuvconstants,
int width); int width);
void I422ToRGBARow_Any_LSX(const uint8_t* y_buf,
const uint8_t* u_buf,
const uint8_t* v_buf,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
void I422ToRGBARow_Any_LASX(const uint8_t* y_buf, void I422ToRGBARow_Any_LASX(const uint8_t* y_buf,
const uint8_t* u_buf, const uint8_t* u_buf,
const uint8_t* v_buf, const uint8_t* v_buf,
...@@ -4695,6 +5449,13 @@ void I422AlphaToARGBRow_Any_MSA(const uint8_t* y_buf, ...@@ -4695,6 +5449,13 @@ void I422AlphaToARGBRow_Any_MSA(const uint8_t* y_buf,
uint8_t* dst_ptr, uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants, const struct YuvConstants* yuvconstants,
int width); int width);
void I422AlphaToARGBRow_Any_LSX(const uint8_t* y_buf,
const uint8_t* u_buf,
const uint8_t* v_buf,
const uint8_t* a_buf,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
void I422AlphaToARGBRow_Any_LASX(const uint8_t* y_buf, void I422AlphaToARGBRow_Any_LASX(const uint8_t* y_buf,
const uint8_t* u_buf, const uint8_t* u_buf,
const uint8_t* v_buf, const uint8_t* v_buf,
...@@ -4708,6 +5469,12 @@ void I422ToRGB24Row_Any_MSA(const uint8_t* y_buf, ...@@ -4708,6 +5469,12 @@ void I422ToRGB24Row_Any_MSA(const uint8_t* y_buf,
uint8_t* dst_ptr, uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants, const struct YuvConstants* yuvconstants,
int width); int width);
void I422ToRGB24Row_Any_LSX(const uint8_t* y_buf,
const uint8_t* u_buf,
const uint8_t* v_buf,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
void I422ToRGB24Row_Any_LASX(const uint8_t* y_buf, void I422ToRGB24Row_Any_LASX(const uint8_t* y_buf,
const uint8_t* u_buf, const uint8_t* u_buf,
const uint8_t* v_buf, const uint8_t* v_buf,
...@@ -4720,6 +5487,12 @@ void I422ToRGB565Row_Any_MSA(const uint8_t* y_buf, ...@@ -4720,6 +5487,12 @@ void I422ToRGB565Row_Any_MSA(const uint8_t* y_buf,
uint8_t* dst_ptr, uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants, const struct YuvConstants* yuvconstants,
int width); int width);
void I422ToRGB565Row_Any_LSX(const uint8_t* y_buf,
const uint8_t* u_buf,
const uint8_t* v_buf,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
void I422ToRGB565Row_Any_LASX(const uint8_t* y_buf, void I422ToRGB565Row_Any_LASX(const uint8_t* y_buf,
const uint8_t* u_buf, const uint8_t* u_buf,
const uint8_t* v_buf, const uint8_t* v_buf,
...@@ -4732,6 +5505,12 @@ void I422ToARGB4444Row_Any_MSA(const uint8_t* y_buf, ...@@ -4732,6 +5505,12 @@ void I422ToARGB4444Row_Any_MSA(const uint8_t* y_buf,
uint8_t* dst_ptr, uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants, const struct YuvConstants* yuvconstants,
int width); int width);
void I422ToARGB4444Row_Any_LSX(const uint8_t* y_buf,
const uint8_t* u_buf,
const uint8_t* v_buf,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
void I422ToARGB4444Row_Any_LASX(const uint8_t* y_buf, void I422ToARGB4444Row_Any_LASX(const uint8_t* y_buf,
const uint8_t* u_buf, const uint8_t* u_buf,
const uint8_t* v_buf, const uint8_t* v_buf,
...@@ -4744,6 +5523,12 @@ void I422ToARGB1555Row_Any_MSA(const uint8_t* y_buf, ...@@ -4744,6 +5523,12 @@ void I422ToARGB1555Row_Any_MSA(const uint8_t* y_buf,
uint8_t* dst_ptr, uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants, const struct YuvConstants* yuvconstants,
int width); int width);
void I422ToARGB1555Row_Any_LSX(const uint8_t* y_buf,
const uint8_t* u_buf,
const uint8_t* v_buf,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
void I422ToARGB1555Row_Any_LASX(const uint8_t* y_buf, void I422ToARGB1555Row_Any_LASX(const uint8_t* y_buf,
const uint8_t* u_buf, const uint8_t* u_buf,
const uint8_t* v_buf, const uint8_t* v_buf,
...@@ -4856,12 +5641,18 @@ void YUY2ToUV422Row_NEON(const uint8_t* src_yuy2, ...@@ -4856,12 +5641,18 @@ void YUY2ToUV422Row_NEON(const uint8_t* src_yuy2,
uint8_t* dst_v, uint8_t* dst_v,
int width); int width);
void YUY2ToYRow_MSA(const uint8_t* src_yuy2, uint8_t* dst_y, int width); void YUY2ToYRow_MSA(const uint8_t* src_yuy2, uint8_t* dst_y, int width);
void YUY2ToYRow_LSX(const uint8_t* src_yuy2, uint8_t* dst_y, int width);
void YUY2ToYRow_LASX(const uint8_t* src_yuy2, uint8_t* dst_y, int width); void YUY2ToYRow_LASX(const uint8_t* src_yuy2, uint8_t* dst_y, int width);
void YUY2ToUVRow_MSA(const uint8_t* src_yuy2, void YUY2ToUVRow_MSA(const uint8_t* src_yuy2,
int src_stride_yuy2, int src_stride_yuy2,
uint8_t* dst_u, uint8_t* dst_u,
uint8_t* dst_v, uint8_t* dst_v,
int width); int width);
void YUY2ToUVRow_LSX(const uint8_t* src_yuy2,
int src_stride_yuy2,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void YUY2ToUVRow_LASX(const uint8_t* src_yuy2, void YUY2ToUVRow_LASX(const uint8_t* src_yuy2,
int src_stride_yuy2, int src_stride_yuy2,
uint8_t* dst_u, uint8_t* dst_u,
...@@ -4871,6 +5662,10 @@ void YUY2ToUV422Row_MSA(const uint8_t* src_yuy2, ...@@ -4871,6 +5662,10 @@ void YUY2ToUV422Row_MSA(const uint8_t* src_yuy2,
uint8_t* dst_u, uint8_t* dst_u,
uint8_t* dst_v, uint8_t* dst_v,
int width); int width);
void YUY2ToUV422Row_LSX(const uint8_t* src_yuy2,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void YUY2ToUV422Row_LASX(const uint8_t* src_yuy2, void YUY2ToUV422Row_LASX(const uint8_t* src_yuy2,
uint8_t* dst_u, uint8_t* dst_u,
uint8_t* dst_v, uint8_t* dst_v,
...@@ -4932,12 +5727,18 @@ void YUY2ToUV422Row_Any_NEON(const uint8_t* src_ptr, ...@@ -4932,12 +5727,18 @@ void YUY2ToUV422Row_Any_NEON(const uint8_t* src_ptr,
uint8_t* dst_v, uint8_t* dst_v,
int width); int width);
void YUY2ToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void YUY2ToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void YUY2ToYRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void YUY2ToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void YUY2ToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void YUY2ToUVRow_Any_MSA(const uint8_t* src_ptr, void YUY2ToUVRow_Any_MSA(const uint8_t* src_ptr,
int src_stride_ptr, int src_stride_ptr,
uint8_t* dst_u, uint8_t* dst_u,
uint8_t* dst_v, uint8_t* dst_v,
int width); int width);
void YUY2ToUVRow_Any_LSX(const uint8_t* src_ptr,
int src_stride_ptr,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void YUY2ToUVRow_Any_LASX(const uint8_t* src_ptr, void YUY2ToUVRow_Any_LASX(const uint8_t* src_ptr,
int src_stride_ptr, int src_stride_ptr,
uint8_t* dst_u, uint8_t* dst_u,
...@@ -4947,6 +5748,10 @@ void YUY2ToUV422Row_Any_MSA(const uint8_t* src_ptr, ...@@ -4947,6 +5748,10 @@ void YUY2ToUV422Row_Any_MSA(const uint8_t* src_ptr,
uint8_t* dst_u, uint8_t* dst_u,
uint8_t* dst_v, uint8_t* dst_v,
int width); int width);
void YUY2ToUV422Row_Any_LSX(const uint8_t* src_ptr,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void YUY2ToUV422Row_Any_LASX(const uint8_t* src_ptr, void YUY2ToUV422Row_Any_LASX(const uint8_t* src_ptr,
uint8_t* dst_u, uint8_t* dst_u,
uint8_t* dst_v, uint8_t* dst_v,
...@@ -4992,12 +5797,18 @@ void UYVYToUV422Row_NEON(const uint8_t* src_uyvy, ...@@ -4992,12 +5797,18 @@ void UYVYToUV422Row_NEON(const uint8_t* src_uyvy,
uint8_t* dst_v, uint8_t* dst_v,
int width); int width);
void UYVYToYRow_MSA(const uint8_t* src_uyvy, uint8_t* dst_y, int width); void UYVYToYRow_MSA(const uint8_t* src_uyvy, uint8_t* dst_y, int width);
void UYVYToYRow_LSX(const uint8_t* src_uyvy, uint8_t* dst_y, int width);
void UYVYToYRow_LASX(const uint8_t* src_uyvy, uint8_t* dst_y, int width); void UYVYToYRow_LASX(const uint8_t* src_uyvy, uint8_t* dst_y, int width);
void UYVYToUVRow_MSA(const uint8_t* src_uyvy, void UYVYToUVRow_MSA(const uint8_t* src_uyvy,
int src_stride_uyvy, int src_stride_uyvy,
uint8_t* dst_u, uint8_t* dst_u,
uint8_t* dst_v, uint8_t* dst_v,
int width); int width);
void UYVYToUVRow_LSX(const uint8_t* src_uyvy,
int src_stride_uyvy,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void UYVYToUVRow_LASX(const uint8_t* src_uyvy, void UYVYToUVRow_LASX(const uint8_t* src_uyvy,
int src_stride_uyvy, int src_stride_uyvy,
uint8_t* dst_u, uint8_t* dst_u,
...@@ -5007,6 +5818,10 @@ void UYVYToUV422Row_MSA(const uint8_t* src_uyvy, ...@@ -5007,6 +5818,10 @@ void UYVYToUV422Row_MSA(const uint8_t* src_uyvy,
uint8_t* dst_u, uint8_t* dst_u,
uint8_t* dst_v, uint8_t* dst_v,
int width); int width);
void UYVYToUV422Row_LSX(const uint8_t* src_uyvy,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void UYVYToUV422Row_LASX(const uint8_t* src_uyvy, void UYVYToUV422Row_LASX(const uint8_t* src_uyvy,
uint8_t* dst_u, uint8_t* dst_u,
uint8_t* dst_v, uint8_t* dst_v,
...@@ -5053,12 +5868,18 @@ void UYVYToUV422Row_Any_NEON(const uint8_t* src_ptr, ...@@ -5053,12 +5868,18 @@ void UYVYToUV422Row_Any_NEON(const uint8_t* src_ptr,
uint8_t* dst_v, uint8_t* dst_v,
int width); int width);
void UYVYToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void UYVYToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void UYVYToYRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void UYVYToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void UYVYToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void UYVYToUVRow_Any_MSA(const uint8_t* src_ptr, void UYVYToUVRow_Any_MSA(const uint8_t* src_ptr,
int src_stride_ptr, int src_stride_ptr,
uint8_t* dst_u, uint8_t* dst_u,
uint8_t* dst_v, uint8_t* dst_v,
int width); int width);
void UYVYToUVRow_Any_LSX(const uint8_t* src_ptr,
int src_stride_ptr,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void UYVYToUVRow_Any_LASX(const uint8_t* src_ptr, void UYVYToUVRow_Any_LASX(const uint8_t* src_ptr,
int src_stride_ptr, int src_stride_ptr,
uint8_t* dst_u, uint8_t* dst_u,
...@@ -5068,6 +5889,10 @@ void UYVYToUV422Row_Any_MSA(const uint8_t* src_ptr, ...@@ -5068,6 +5889,10 @@ void UYVYToUV422Row_Any_MSA(const uint8_t* src_ptr,
uint8_t* dst_u, uint8_t* dst_u,
uint8_t* dst_v, uint8_t* dst_v,
int width); int width);
void UYVYToUV422Row_Any_LSX(const uint8_t* src_ptr,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void UYVYToUV422Row_Any_LASX(const uint8_t* src_ptr, void UYVYToUV422Row_Any_LASX(const uint8_t* src_ptr,
uint8_t* dst_u, uint8_t* dst_u,
uint8_t* dst_v, uint8_t* dst_v,
...@@ -5093,19 +5918,35 @@ void AYUVToUVRow_NEON(const uint8_t* src_ayuv, ...@@ -5093,19 +5918,35 @@ void AYUVToUVRow_NEON(const uint8_t* src_ayuv,
int src_stride_ayuv, int src_stride_ayuv,
uint8_t* dst_uv, uint8_t* dst_uv,
int width); int width);
void AYUVToUVRow_SVE2(const uint8_t* src_ayuv,
int src_stride_ayuv,
uint8_t* dst_uv,
int width);
void AYUVToVURow_NEON(const uint8_t* src_ayuv, void AYUVToVURow_NEON(const uint8_t* src_ayuv,
int src_stride_ayuv, int src_stride_ayuv,
uint8_t* dst_vu, uint8_t* dst_vu,
int width); int width);
void AYUVToVURow_SVE2(const uint8_t* src_ayuv,
int src_stride_ayuv,
uint8_t* dst_vu,
int width);
void AYUVToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void AYUVToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void AYUVToUVRow_Any_NEON(const uint8_t* src_ptr, void AYUVToUVRow_Any_NEON(const uint8_t* src_ptr,
int src_stride, int src_stride,
uint8_t* dst_vu, uint8_t* dst_vu,
int width); int width);
void AYUVToUVRow_Any_SVE2(const uint8_t* src_ptr,
int src_stride,
uint8_t* dst_vu,
int width);
void AYUVToVURow_Any_NEON(const uint8_t* src_ptr, void AYUVToVURow_Any_NEON(const uint8_t* src_ptr,
int src_stride, int src_stride,
uint8_t* dst_vu, uint8_t* dst_vu,
int width); int width);
void AYUVToVURow_Any_SVE2(const uint8_t* src_ptr,
int src_stride,
uint8_t* dst_vu,
int width);
void I422ToYUY2Row_C(const uint8_t* src_y, void I422ToYUY2Row_C(const uint8_t* src_y,
const uint8_t* src_u, const uint8_t* src_u,
...@@ -5182,6 +6023,11 @@ void I422ToYUY2Row_MSA(const uint8_t* src_y, ...@@ -5182,6 +6023,11 @@ void I422ToYUY2Row_MSA(const uint8_t* src_y,
const uint8_t* src_v, const uint8_t* src_v,
uint8_t* dst_yuy2, uint8_t* dst_yuy2,
int width); int width);
void I422ToYUY2Row_LSX(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
uint8_t* dst_yuy2,
int width);
void I422ToYUY2Row_LASX(const uint8_t* src_y, void I422ToYUY2Row_LASX(const uint8_t* src_y,
const uint8_t* src_u, const uint8_t* src_u,
const uint8_t* src_v, const uint8_t* src_v,
...@@ -5192,6 +6038,11 @@ void I422ToUYVYRow_MSA(const uint8_t* src_y, ...@@ -5192,6 +6038,11 @@ void I422ToUYVYRow_MSA(const uint8_t* src_y,
const uint8_t* src_v, const uint8_t* src_v,
uint8_t* dst_uyvy, uint8_t* dst_uyvy,
int width); int width);
void I422ToUYVYRow_LSX(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
uint8_t* dst_uyvy,
int width);
void I422ToUYVYRow_LASX(const uint8_t* src_y, void I422ToUYVYRow_LASX(const uint8_t* src_y,
const uint8_t* src_u, const uint8_t* src_u,
const uint8_t* src_v, const uint8_t* src_v,
...@@ -5202,6 +6053,11 @@ void I422ToYUY2Row_Any_MSA(const uint8_t* y_buf, ...@@ -5202,6 +6053,11 @@ void I422ToYUY2Row_Any_MSA(const uint8_t* y_buf,
const uint8_t* v_buf, const uint8_t* v_buf,
uint8_t* dst_ptr, uint8_t* dst_ptr,
int width); int width);
void I422ToYUY2Row_Any_LSX(const uint8_t* y_buf,
const uint8_t* u_buf,
const uint8_t* v_buf,
uint8_t* dst_ptr,
int width);
void I422ToYUY2Row_Any_LASX(const uint8_t* y_buf, void I422ToYUY2Row_Any_LASX(const uint8_t* y_buf,
const uint8_t* u_buf, const uint8_t* u_buf,
const uint8_t* v_buf, const uint8_t* v_buf,
...@@ -5212,6 +6068,11 @@ void I422ToUYVYRow_Any_MSA(const uint8_t* y_buf, ...@@ -5212,6 +6068,11 @@ void I422ToUYVYRow_Any_MSA(const uint8_t* y_buf,
const uint8_t* v_buf, const uint8_t* v_buf,
uint8_t* dst_ptr, uint8_t* dst_ptr,
int width); int width);
void I422ToUYVYRow_Any_LSX(const uint8_t* y_buf,
const uint8_t* u_buf,
const uint8_t* v_buf,
uint8_t* dst_ptr,
int width);
void I422ToUYVYRow_Any_LASX(const uint8_t* y_buf, void I422ToUYVYRow_Any_LASX(const uint8_t* y_buf,
const uint8_t* u_buf, const uint8_t* u_buf,
const uint8_t* v_buf, const uint8_t* v_buf,
...@@ -5232,9 +6093,15 @@ void ARGBAttenuateRow_NEON(const uint8_t* src_argb, ...@@ -5232,9 +6093,15 @@ void ARGBAttenuateRow_NEON(const uint8_t* src_argb,
void ARGBAttenuateRow_MSA(const uint8_t* src_argb, void ARGBAttenuateRow_MSA(const uint8_t* src_argb,
uint8_t* dst_argb, uint8_t* dst_argb,
int width); int width);
void ARGBAttenuateRow_LSX(const uint8_t* src_argb,
uint8_t* dst_argb,
int width);
void ARGBAttenuateRow_LASX(const uint8_t* src_argb, void ARGBAttenuateRow_LASX(const uint8_t* src_argb,
uint8_t* dst_argb, uint8_t* dst_argb,
int width); int width);
void ARGBAttenuateRow_RVV(const uint8_t* src_argb,
uint8_t* dst_argb,
int width);
void ARGBAttenuateRow_Any_SSSE3(const uint8_t* src_ptr, void ARGBAttenuateRow_Any_SSSE3(const uint8_t* src_ptr,
uint8_t* dst_ptr, uint8_t* dst_ptr,
int width); int width);
...@@ -5247,6 +6114,9 @@ void ARGBAttenuateRow_Any_NEON(const uint8_t* src_ptr, ...@@ -5247,6 +6114,9 @@ void ARGBAttenuateRow_Any_NEON(const uint8_t* src_ptr,
void ARGBAttenuateRow_Any_MSA(const uint8_t* src_ptr, void ARGBAttenuateRow_Any_MSA(const uint8_t* src_ptr,
uint8_t* dst_ptr, uint8_t* dst_ptr,
int width); int width);
void ARGBAttenuateRow_Any_LSX(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int width);
void ARGBAttenuateRow_Any_LASX(const uint8_t* src_ptr, void ARGBAttenuateRow_Any_LASX(const uint8_t* src_ptr,
uint8_t* dst_ptr, uint8_t* dst_ptr,
int width); int width);
...@@ -5273,12 +6143,14 @@ void ARGBGrayRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width); ...@@ -5273,12 +6143,14 @@ void ARGBGrayRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width);
void ARGBGrayRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_argb, int width); void ARGBGrayRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_argb, int width);
void ARGBGrayRow_NEON(const uint8_t* src_argb, uint8_t* dst_argb, int width); void ARGBGrayRow_NEON(const uint8_t* src_argb, uint8_t* dst_argb, int width);
void ARGBGrayRow_MSA(const uint8_t* src_argb, uint8_t* dst_argb, int width); void ARGBGrayRow_MSA(const uint8_t* src_argb, uint8_t* dst_argb, int width);
void ARGBGrayRow_LSX(const uint8_t* src_argb, uint8_t* dst_argb, int width);
void ARGBGrayRow_LASX(const uint8_t* src_argb, uint8_t* dst_argb, int width); void ARGBGrayRow_LASX(const uint8_t* src_argb, uint8_t* dst_argb, int width);
void ARGBSepiaRow_C(uint8_t* dst_argb, int width); void ARGBSepiaRow_C(uint8_t* dst_argb, int width);
void ARGBSepiaRow_SSSE3(uint8_t* dst_argb, int width); void ARGBSepiaRow_SSSE3(uint8_t* dst_argb, int width);
void ARGBSepiaRow_NEON(uint8_t* dst_argb, int width); void ARGBSepiaRow_NEON(uint8_t* dst_argb, int width);
void ARGBSepiaRow_MSA(uint8_t* dst_argb, int width); void ARGBSepiaRow_MSA(uint8_t* dst_argb, int width);
void ARGBSepiaRow_LSX(uint8_t* dst_argb, int width);
void ARGBSepiaRow_LASX(uint8_t* dst_argb, int width); void ARGBSepiaRow_LASX(uint8_t* dst_argb, int width);
void ARGBColorMatrixRow_C(const uint8_t* src_argb, void ARGBColorMatrixRow_C(const uint8_t* src_argb,
...@@ -5293,6 +6165,10 @@ void ARGBColorMatrixRow_NEON(const uint8_t* src_argb, ...@@ -5293,6 +6165,10 @@ void ARGBColorMatrixRow_NEON(const uint8_t* src_argb,
uint8_t* dst_argb, uint8_t* dst_argb,
const int8_t* matrix_argb, const int8_t* matrix_argb,
int width); int width);
void ARGBColorMatrixRow_NEON_I8MM(const uint8_t* src_argb,
uint8_t* dst_argb,
const int8_t* matrix_argb,
int width);
void ARGBColorMatrixRow_MSA(const uint8_t* src_argb, void ARGBColorMatrixRow_MSA(const uint8_t* src_argb,
uint8_t* dst_argb, uint8_t* dst_argb,
const int8_t* matrix_argb, const int8_t* matrix_argb,
...@@ -5358,6 +6234,10 @@ void ARGBShadeRow_MSA(const uint8_t* src_argb, ...@@ -5358,6 +6234,10 @@ void ARGBShadeRow_MSA(const uint8_t* src_argb,
uint8_t* dst_argb, uint8_t* dst_argb,
int width, int width,
uint32_t value); uint32_t value);
void ARGBShadeRow_LSX(const uint8_t* src_argb,
uint8_t* dst_argb,
int width,
uint32_t value);
void ARGBShadeRow_LASX(const uint8_t* src_argb, void ARGBShadeRow_LASX(const uint8_t* src_argb,
uint8_t* dst_argb, uint8_t* dst_argb,
int width, int width,
...@@ -5430,6 +6310,11 @@ void InterpolateRow_LSX(uint8_t* dst_ptr, ...@@ -5430,6 +6310,11 @@ void InterpolateRow_LSX(uint8_t* dst_ptr,
ptrdiff_t src_stride, ptrdiff_t src_stride,
int width, int width,
int source_y_fraction); int source_y_fraction);
void InterpolateRow_RVV(uint8_t* dst_ptr,
const uint8_t* src_ptr,
ptrdiff_t src_stride,
int width,
int source_y_fraction);
void InterpolateRow_Any_NEON(uint8_t* dst_ptr, void InterpolateRow_Any_NEON(uint8_t* dst_ptr,
const uint8_t* src_ptr, const uint8_t* src_ptr,
ptrdiff_t src_stride_ptr, ptrdiff_t src_stride_ptr,
...@@ -5737,7 +6622,19 @@ void ByteToFloatRow_Any_NEON(const uint8_t* src_ptr, ...@@ -5737,7 +6622,19 @@ void ByteToFloatRow_Any_NEON(const uint8_t* src_ptr,
float* dst_ptr, float* dst_ptr,
float param, float param,
int width); int width);
// Convert FP16 Half Floats to FP32 Floats
void ConvertFP16ToFP32Row_NEON(const uint16_t* src, // fp16
float* dst,
int width);
// Convert a column of FP16 Half Floats to a row of FP32 Floats
void ConvertFP16ToFP32Column_NEON(const uint16_t* src, // fp16
int src_stride, // stride in elements
float* dst,
int width);
// Convert FP32 Floats to FP16 Half Floats
void ConvertFP32ToFP16Row_NEON(const float* src,
uint16_t* dst, // fp16
int width);
void ARGBLumaColorTableRow_C(const uint8_t* src_argb, void ARGBLumaColorTableRow_C(const uint8_t* src_argb,
uint8_t* dst_argb, uint8_t* dst_argb,
int width, int width,
......
...@@ -27,8 +27,9 @@ typedef enum FilterMode { ...@@ -27,8 +27,9 @@ typedef enum FilterMode {
} FilterModeEnum; } FilterModeEnum;
// Scale a YUV plane. // Scale a YUV plane.
// Returns 0 if successful.
LIBYUV_API LIBYUV_API
void ScalePlane(const uint8_t* src, int ScalePlane(const uint8_t* src,
int src_stride, int src_stride,
int src_width, int src_width,
int src_height, int src_height,
...@@ -39,7 +40,7 @@ void ScalePlane(const uint8_t* src, ...@@ -39,7 +40,7 @@ void ScalePlane(const uint8_t* src,
enum FilterMode filtering); enum FilterMode filtering);
LIBYUV_API LIBYUV_API
void ScalePlane_16(const uint16_t* src, int ScalePlane_16(const uint16_t* src,
int src_stride, int src_stride,
int src_width, int src_width,
int src_height, int src_height,
...@@ -51,7 +52,7 @@ void ScalePlane_16(const uint16_t* src, ...@@ -51,7 +52,7 @@ void ScalePlane_16(const uint16_t* src,
// Sample is expected to be in the low 12 bits. // Sample is expected to be in the low 12 bits.
LIBYUV_API LIBYUV_API
void ScalePlane_12(const uint16_t* src, int ScalePlane_12(const uint16_t* src,
int src_stride, int src_stride,
int src_width, int src_width,
int src_height, int src_height,
......
...@@ -29,7 +29,10 @@ extern "C" { ...@@ -29,7 +29,10 @@ extern "C" {
#endif #endif
// MemorySanitizer does not support assembly code yet. http://crbug.com/344505 // MemorySanitizer does not support assembly code yet. http://crbug.com/344505
#if defined(__has_feature) #if defined(__has_feature)
#if __has_feature(memory_sanitizer) #if __has_feature(memory_sanitizer) && !defined(LIBYUV_DISABLE_NEON)
#define LIBYUV_DISABLE_NEON
#endif
#if __has_feature(memory_sanitizer) && !defined(LIBYUV_DISABLE_X86)
#define LIBYUV_DISABLE_X86 #define LIBYUV_DISABLE_X86
#endif #endif
#endif #endif
...@@ -133,6 +136,8 @@ extern "C" { ...@@ -133,6 +136,8 @@ extern "C" {
#define HAS_SCALEROWDOWN34_NEON #define HAS_SCALEROWDOWN34_NEON
#define HAS_SCALEROWDOWN38_NEON #define HAS_SCALEROWDOWN38_NEON
#define HAS_SCALEROWDOWN4_NEON #define HAS_SCALEROWDOWN4_NEON
#define HAS_SCALEUVROWDOWN2_NEON
#define HAS_SCALEUVROWDOWN2LINEAR_NEON
#define HAS_SCALEUVROWDOWN2BOX_NEON #define HAS_SCALEUVROWDOWN2BOX_NEON
#define HAS_SCALEUVROWDOWNEVEN_NEON #define HAS_SCALEUVROWDOWNEVEN_NEON
#define HAS_SCALEROWUP2_LINEAR_NEON #define HAS_SCALEROWUP2_LINEAR_NEON
...@@ -173,6 +178,38 @@ extern "C" { ...@@ -173,6 +178,38 @@ extern "C" {
#define HAS_SCALEROWDOWN34_LSX #define HAS_SCALEROWDOWN34_LSX
#endif #endif
#if !defined(LIBYUV_DISABLE_RVV) && defined(__riscv_vector)
#define HAS_SCALEADDROW_RVV
// TODO: Test ScaleARGBRowDownEven_RVV and enable it
// #define HAS_SCALEARGBROWDOWNEVEN_RVV
#if defined(__riscv_zve64x)
#define HAS_SCALEUVROWDOWN4_RVV
#endif
#define HAS_SCALEUVROWDOWNEVEN_RVV
#define HAS_SCALEARGBROWDOWN2_RVV
#define HAS_SCALEARGBROWDOWN2BOX_RVV
#define HAS_SCALEARGBROWDOWN2LINEAR_RVV
#define HAS_SCALEARGBROWDOWNEVENBOX_RVV
#define HAS_SCALEROWDOWN2_RVV
#define HAS_SCALEROWDOWN2BOX_RVV
#define HAS_SCALEROWDOWN2LINEAR_RVV
#define HAS_SCALEROWDOWN34_0_BOX_RVV
#define HAS_SCALEROWDOWN34_1_BOX_RVV
#define HAS_SCALEROWDOWN34_RVV
#define HAS_SCALEROWDOWN38_2_BOX_RVV
#define HAS_SCALEROWDOWN38_3_BOX_RVV
#define HAS_SCALEROWDOWN38_RVV
#define HAS_SCALEROWDOWN4_RVV
#define HAS_SCALEROWDOWN4BOX_RVV
#define HAS_SCALEROWUP2_BILINEAR_RVV
#define HAS_SCALEROWUP2_LINEAR_RVV
#define HAS_SCALEUVROWDOWN2_RVV
#define HAS_SCALEUVROWDOWN2BOX_RVV
#define HAS_SCALEUVROWDOWN2LINEAR_RVV
#define HAS_SCALEUVROWUP2_BILINEAR_RVV
#define HAS_SCALEUVROWUP2_LINEAR_RVV
#endif
// Scale ARGB vertically with bilinear interpolation. // Scale ARGB vertically with bilinear interpolation.
void ScalePlaneVertical(int src_height, void ScalePlaneVertical(int src_height,
int dst_width, int dst_width,
...@@ -947,6 +984,18 @@ void ScaleARGBRowDown2Box_NEON(const uint8_t* src_ptr, ...@@ -947,6 +984,18 @@ void ScaleARGBRowDown2Box_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride, ptrdiff_t src_stride,
uint8_t* dst, uint8_t* dst,
int dst_width); int dst_width);
void ScaleARGBRowDown2_RVV(const uint8_t* src_argb,
ptrdiff_t src_stride,
uint8_t* dst_argb,
int dst_width);
void ScaleARGBRowDown2Linear_RVV(const uint8_t* src_argb,
ptrdiff_t src_stride,
uint8_t* dst_argb,
int dst_width);
void ScaleARGBRowDown2Box_RVV(const uint8_t* src_argb,
ptrdiff_t src_stride,
uint8_t* dst_argb,
int dst_width);
void ScaleARGBRowDown2_MSA(const uint8_t* src_argb, void ScaleARGBRowDown2_MSA(const uint8_t* src_argb,
ptrdiff_t src_stride, ptrdiff_t src_stride,
uint8_t* dst_argb, uint8_t* dst_argb,
...@@ -1059,6 +1108,16 @@ void ScaleARGBRowDownEvenBox_LSX(const uint8_t* src_argb, ...@@ -1059,6 +1108,16 @@ void ScaleARGBRowDownEvenBox_LSX(const uint8_t* src_argb,
int src_stepx, int src_stepx,
uint8_t* dst_argb, uint8_t* dst_argb,
int dst_width); int dst_width);
void ScaleARGBRowDownEven_RVV(const uint8_t* src_argb,
ptrdiff_t src_stride,
int32_t src_stepx,
uint8_t* dst_argb,
int dst_width);
void ScaleARGBRowDownEvenBox_RVV(const uint8_t* src_argb,
ptrdiff_t src_stride,
int src_stepx,
uint8_t* dst_argb,
int dst_width);
void ScaleARGBRowDownEven_Any_SSE2(const uint8_t* src_ptr, void ScaleARGBRowDownEven_Any_SSE2(const uint8_t* src_ptr,
ptrdiff_t src_stride, ptrdiff_t src_stride,
int src_stepx, int src_stepx,
...@@ -1141,6 +1200,18 @@ void ScaleUVRowDown2Box_MSA(const uint8_t* src_ptr, ...@@ -1141,6 +1200,18 @@ void ScaleUVRowDown2Box_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride, ptrdiff_t src_stride,
uint8_t* dst_uv, uint8_t* dst_uv,
int dst_width); int dst_width);
void ScaleUVRowDown2_RVV(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_uv,
int dst_width);
void ScaleUVRowDown2Linear_RVV(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_uv,
int dst_width);
void ScaleUVRowDown2Box_RVV(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width);
void ScaleUVRowDown2_Any_SSSE3(const uint8_t* src_ptr, void ScaleUVRowDown2_Any_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride, ptrdiff_t src_stride,
uint8_t* dst_ptr, uint8_t* dst_ptr,
...@@ -1201,6 +1272,16 @@ void ScaleUVRowDownEvenBox_NEON(const uint8_t* src_ptr, ...@@ -1201,6 +1272,16 @@ void ScaleUVRowDownEvenBox_NEON(const uint8_t* src_ptr,
int src_stepx, int src_stepx,
uint8_t* dst_uv, uint8_t* dst_uv,
int dst_width); int dst_width);
void ScaleUVRowDown4_RVV(const uint8_t* src_ptr,
ptrdiff_t src_stride,
int32_t src_stepx,
uint8_t* dst_uv,
int dst_width);
void ScaleUVRowDownEven_RVV(const uint8_t* src_ptr,
ptrdiff_t src_stride,
int32_t src_stepx,
uint8_t* dst_uv,
int dst_width);
void ScaleUVRowDownEven_MSA(const uint8_t* src_ptr, void ScaleUVRowDownEven_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride, ptrdiff_t src_stride,
int32_t src_stepx, int32_t src_stepx,
...@@ -1290,6 +1371,14 @@ void ScaleUVRowUp2_Bilinear_Any_NEON(const uint8_t* src_ptr, ...@@ -1290,6 +1371,14 @@ void ScaleUVRowUp2_Bilinear_Any_NEON(const uint8_t* src_ptr,
uint8_t* dst_ptr, uint8_t* dst_ptr,
ptrdiff_t dst_stride, ptrdiff_t dst_stride,
int dst_width); int dst_width);
void ScaleUVRowUp2_Linear_RVV(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int dst_width);
void ScaleUVRowUp2_Bilinear_RVV(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleUVRowUp2_Linear_16_SSE41(const uint16_t* src_ptr, void ScaleUVRowUp2_Linear_16_SSE41(const uint16_t* src_ptr,
uint16_t* dst_ptr, uint16_t* dst_ptr,
int dst_width); int dst_width);
...@@ -1742,6 +1831,61 @@ void ScaleRowDown34_1_Box_Any_LSX(const uint8_t* src_ptr, ...@@ -1742,6 +1831,61 @@ void ScaleRowDown34_1_Box_Any_LSX(const uint8_t* src_ptr,
uint8_t* dst_ptr, uint8_t* dst_ptr,
int dst_width); int dst_width);
void ScaleAddRow_RVV(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width);
void ScaleRowDown2_RVV(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width);
void ScaleRowDown2Linear_RVV(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width);
void ScaleRowDown2Box_RVV(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width);
void ScaleRowDown4_RVV(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown4Box_RVV(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown34_RVV(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown34_0_Box_RVV(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown34_1_Box_RVV(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown38_RVV(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width);
void ScaleRowDown38_3_Box_RVV(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown38_2_Box_RVV(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowUp2_Linear_RVV(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowUp2_Bilinear_RVV(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
#ifdef __cplusplus #ifdef __cplusplus
} // extern "C" } // extern "C"
} // namespace libyuv } // namespace libyuv
......
...@@ -11,6 +11,6 @@ ...@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ #ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1860 #define LIBYUV_VERSION 1892
#endif // INCLUDE_LIBYUV_VERSION_H_ #endif // INCLUDE_LIBYUV_VERSION_H_
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_H_
#define INCLUDE_LIBYUV_H_
#include "libyuv/basic_types.h"
#include "libyuv/compare.h"
#include "libyuv/convert.h"
#include "libyuv/convert_argb.h"
#include "libyuv/convert_from.h"
#include "libyuv/convert_from_argb.h"
#include "libyuv/cpu_id.h"
#include "libyuv/mjpeg_decoder.h"
#include "libyuv/planar_functions.h"
#include "libyuv/rotate.h"
#include "libyuv/rotate_argb.h"
#include "libyuv/row.h"
#include "libyuv/scale.h"
#include "libyuv/scale_argb.h"
#include "libyuv/scale_row.h"
#include "libyuv/scale_uv.h"
#include "libyuv/version.h"
#include "libyuv/video_common.h"
#endif // INCLUDE_LIBYUV_H_
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_BASIC_TYPES_H_
#define INCLUDE_LIBYUV_BASIC_TYPES_H_
#include <stddef.h> // For size_t and NULL
#if !defined(INT_TYPES_DEFINED) && !defined(GG_LONGLONG)
#define INT_TYPES_DEFINED
#if defined(_MSC_VER) && (_MSC_VER < 1600)
#include <sys/types.h> // for uintptr_t on x86
typedef unsigned __int64 uint64_t;
typedef __int64 int64_t;
typedef unsigned int uint32_t;
typedef int int32_t;
typedef unsigned short uint16_t;
typedef short int16_t;
typedef unsigned char uint8_t;
typedef signed char int8_t;
#else
#include <stdint.h> // for uintptr_t and C99 types
#endif // defined(_MSC_VER) && (_MSC_VER < 1600)
// Types are deprecated. Enable this macro for legacy types.
#ifdef LIBYUV_LEGACY_TYPES
typedef uint64_t uint64;
typedef int64_t int64;
typedef uint32_t uint32;
typedef int32_t int32;
typedef uint16_t uint16;
typedef int16_t int16;
typedef uint8_t uint8;
typedef int8_t int8;
#endif // LIBYUV_LEGACY_TYPES
#endif // INT_TYPES_DEFINED
#if !defined(LIBYUV_API)
#if defined(_WIN32) || defined(__CYGWIN__)
#if defined(LIBYUV_BUILDING_SHARED_LIBRARY)
#define LIBYUV_API __declspec(dllexport)
#elif defined(LIBYUV_USING_SHARED_LIBRARY)
#define LIBYUV_API __declspec(dllimport)
#else
#define LIBYUV_API
#endif // LIBYUV_BUILDING_SHARED_LIBRARY
#elif defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__APPLE__) && \
(defined(LIBYUV_BUILDING_SHARED_LIBRARY) || \
defined(LIBYUV_USING_SHARED_LIBRARY))
#define LIBYUV_API __attribute__((visibility("default")))
#else
#define LIBYUV_API
#endif // __GNUC__
#endif // LIBYUV_API
// TODO(fbarchard): Remove bool macros.
#define LIBYUV_BOOL int
#define LIBYUV_FALSE 0
#define LIBYUV_TRUE 1
#endif // INCLUDE_LIBYUV_BASIC_TYPES_H_
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_COMPARE_H_
#define INCLUDE_LIBYUV_COMPARE_H_
#include "libyuv/basic_types.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// Compute a hash for specified memory. Seed of 5381 recommended.
LIBYUV_API
uint32_t HashDjb2(const uint8_t* src, uint64_t count, uint32_t seed);
// Hamming Distance
LIBYUV_API
uint64_t ComputeHammingDistance(const uint8_t* src_a,
const uint8_t* src_b,
int count);
// Scan an opaque argb image and return fourcc based on alpha offset.
// Returns FOURCC_ARGB, FOURCC_BGRA, or 0 if unknown.
LIBYUV_API
uint32_t ARGBDetect(const uint8_t* argb,
int stride_argb,
int width,
int height);
// Sum Square Error - used to compute Mean Square Error or PSNR.
LIBYUV_API
uint64_t ComputeSumSquareError(const uint8_t* src_a,
const uint8_t* src_b,
int count);
LIBYUV_API
uint64_t ComputeSumSquareErrorPlane(const uint8_t* src_a,
int stride_a,
const uint8_t* src_b,
int stride_b,
int width,
int height);
static const int kMaxPsnr = 128;
LIBYUV_API
double SumSquareErrorToPsnr(uint64_t sse, uint64_t count);
LIBYUV_API
double CalcFramePsnr(const uint8_t* src_a,
int stride_a,
const uint8_t* src_b,
int stride_b,
int width,
int height);
LIBYUV_API
double I420Psnr(const uint8_t* src_y_a,
int stride_y_a,
const uint8_t* src_u_a,
int stride_u_a,
const uint8_t* src_v_a,
int stride_v_a,
const uint8_t* src_y_b,
int stride_y_b,
const uint8_t* src_u_b,
int stride_u_b,
const uint8_t* src_v_b,
int stride_v_b,
int width,
int height);
LIBYUV_API
double CalcFrameSsim(const uint8_t* src_a,
int stride_a,
const uint8_t* src_b,
int stride_b,
int width,
int height);
LIBYUV_API
double I420Ssim(const uint8_t* src_y_a,
int stride_y_a,
const uint8_t* src_u_a,
int stride_u_a,
const uint8_t* src_v_a,
int stride_v_a,
const uint8_t* src_y_b,
int stride_y_b,
const uint8_t* src_u_b,
int stride_u_b,
const uint8_t* src_v_b,
int stride_v_b,
int width,
int height);
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
#endif // INCLUDE_LIBYUV_COMPARE_H_
/*
* Copyright 2013 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_COMPARE_ROW_H_
#define INCLUDE_LIBYUV_COMPARE_ROW_H_
#include "libyuv/basic_types.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
#if defined(__pnacl__) || defined(__CLR_VER) || \
(defined(__native_client__) && defined(__x86_64__)) || \
(defined(__i386__) && !defined(__SSE__) && !defined(__clang__))
#define LIBYUV_DISABLE_X86
#endif
#if defined(__native_client__)
#define LIBYUV_DISABLE_NEON
#endif
// MemorySanitizer does not support assembly code yet. http://crbug.com/344505
#if defined(__has_feature)
#if __has_feature(memory_sanitizer)
#define LIBYUV_DISABLE_X86
#endif
#endif
// Visual C 2012 required for AVX2.
#if defined(_M_IX86) && !defined(__clang__) && defined(_MSC_VER) && \
_MSC_VER >= 1700
#define VISUALC_HAS_AVX2 1
#endif // VisualStudio >= 2012
// clang >= 3.4.0 required for AVX2.
#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
#if (__clang_major__ > 3) || (__clang_major__ == 3 && (__clang_minor__ >= 4))
#define CLANG_HAS_AVX2 1
#endif // clang >= 3.4
#endif // __clang__
// The following are available for Visual C and GCC:
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(__x86_64__) || defined(__i386__) || defined(_M_IX86))
#define HAS_HASHDJB2_SSE41
#define HAS_SUMSQUAREERROR_SSE2
#define HAS_HAMMINGDISTANCE_SSE42
#endif
// The following are available for Visual C and clangcl 32 bit:
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER) && \
!defined(__clang__) && \
(defined(VISUALC_HAS_AVX2) || defined(CLANG_HAS_AVX2))
#define HAS_HASHDJB2_AVX2
#define HAS_SUMSQUAREERROR_AVX2
#endif
// The following are available for GCC and clangcl:
#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__))
#define HAS_HAMMINGDISTANCE_SSSE3
#endif
// The following are available for GCC and clangcl:
#if !defined(LIBYUV_DISABLE_X86) && defined(CLANG_HAS_AVX2) && \
(defined(__x86_64__) || defined(__i386__))
#define HAS_HAMMINGDISTANCE_AVX2
#endif
// The following are available for Neon:
#if !defined(LIBYUV_DISABLE_NEON) && \
(defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__))
#define HAS_SUMSQUAREERROR_NEON
#define HAS_HAMMINGDISTANCE_NEON
#endif
#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
#define HAS_HAMMINGDISTANCE_MSA
#define HAS_SUMSQUAREERROR_MSA
#endif
uint32_t HammingDistance_C(const uint8_t* src_a,
const uint8_t* src_b,
int count);
uint32_t HammingDistance_SSE42(const uint8_t* src_a,
const uint8_t* src_b,
int count);
uint32_t HammingDistance_SSSE3(const uint8_t* src_a,
const uint8_t* src_b,
int count);
uint32_t HammingDistance_AVX2(const uint8_t* src_a,
const uint8_t* src_b,
int count);
uint32_t HammingDistance_NEON(const uint8_t* src_a,
const uint8_t* src_b,
int count);
uint32_t HammingDistance_MSA(const uint8_t* src_a,
const uint8_t* src_b,
int count);
uint32_t SumSquareError_C(const uint8_t* src_a,
const uint8_t* src_b,
int count);
uint32_t SumSquareError_SSE2(const uint8_t* src_a,
const uint8_t* src_b,
int count);
uint32_t SumSquareError_AVX2(const uint8_t* src_a,
const uint8_t* src_b,
int count);
uint32_t SumSquareError_NEON(const uint8_t* src_a,
const uint8_t* src_b,
int count);
uint32_t SumSquareError_MSA(const uint8_t* src_a,
const uint8_t* src_b,
int count);
uint32_t HashDjb2_C(const uint8_t* src, int count, uint32_t seed);
uint32_t HashDjb2_SSE41(const uint8_t* src, int count, uint32_t seed);
uint32_t HashDjb2_AVX2(const uint8_t* src, int count, uint32_t seed);
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
#endif // INCLUDE_LIBYUV_COMPARE_ROW_H_
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_CONVERT_H_
#define INCLUDE_LIBYUV_CONVERT_H_
#include "libyuv/basic_types.h"
#include "libyuv/rotate.h" // For enum RotationMode.
// TODO(fbarchard): fix WebRTC source to include following libyuv headers:
#include "libyuv/convert_argb.h" // For WebRTC I420ToARGB. b/620
#include "libyuv/convert_from.h" // For WebRTC ConvertFromI420. b/620
#include "libyuv/planar_functions.h" // For WebRTC I420Rect, CopyPlane. b/618
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// Convert I444 to I420.
LIBYUV_API
int I444ToI420(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
// Convert I444 to NV12.
LIBYUV_API
int I444ToNV12(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_uv,
int dst_stride_uv,
int width,
int height);
// Convert I444 to NV21.
LIBYUV_API
int I444ToNV21(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_vu,
int dst_stride_vu,
int width,
int height);
// Convert I422 to I420.
LIBYUV_API
int I422ToI420(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
// Convert I422 to I444.
LIBYUV_API
int I422ToI444(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
// Convert I422 to I210.
LIBYUV_API
int I422ToI210(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_u,
int dst_stride_u,
uint16_t* dst_v,
int dst_stride_v,
int width,
int height);
// Convert MM21 to NV12.
LIBYUV_API
int MM21ToNV12(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_uv,
int src_stride_uv,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_uv,
int dst_stride_uv,
int width,
int height);
// Convert MM21 to I420.
LIBYUV_API
int MM21ToI420(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_uv,
int src_stride_uv,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
// Convert MM21 to YUY2
LIBYUV_API
int MM21ToYUY2(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_uv,
int src_stride_uv,
uint8_t* dst_yuy2,
int dst_stride_yuy2,
int width,
int height);
// Convert MT2T to P010
// Note that src_y and src_uv point to packed 10-bit values, so the Y plane will
// be 10 / 8 times the dimensions of the image. Also for this reason,
// src_stride_y and src_stride_uv are given in bytes.
LIBYUV_API
int MT2TToP010(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_uv,
int src_stride_uv,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_uv,
int dst_stride_uv,
int width,
int height);
// Convert I422 to NV21.
LIBYUV_API
int I422ToNV21(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_vu,
int dst_stride_vu,
int width,
int height);
// Copy I420 to I420.
#define I420ToI420 I420Copy
LIBYUV_API
int I420Copy(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
// Convert I420 to I444.
LIBYUV_API
int I420ToI444(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
// Copy I010 to I010
#define I010ToI010 I010Copy
#define H010ToH010 I010Copy
LIBYUV_API
int I010Copy(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_u,
int dst_stride_u,
uint16_t* dst_v,
int dst_stride_v,
int width,
int height);
// Convert 10 bit YUV to 8 bit
#define H010ToH420 I010ToI420
LIBYUV_API
int I010ToI420(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
#define H210ToH420 I210ToI420
LIBYUV_API
int I210ToI420(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
#define H210ToH422 I210ToI422
LIBYUV_API
int I210ToI422(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
#define H410ToH420 I410ToI420
LIBYUV_API
int I410ToI420(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
#define H410ToH444 I410ToI444
LIBYUV_API
int I410ToI444(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
#define H012ToH420 I012ToI420
LIBYUV_API
int I012ToI420(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
#define H212ToH422 I212ToI422
LIBYUV_API
int I212ToI422(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
#define H412ToH444 I412ToI444
LIBYUV_API
int I412ToI444(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
#define I412ToI012 I410ToI010
#define H410ToH010 I410ToI010
#define H412ToH012 I410ToI010
LIBYUV_API
int I410ToI010(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_u,
int dst_stride_u,
uint16_t* dst_v,
int dst_stride_v,
int width,
int height);
#define I212ToI012 I210ToI010
#define H210ToH010 I210ToI010
#define H212ToH012 I210ToI010
LIBYUV_API
int I210ToI010(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_u,
int dst_stride_u,
uint16_t* dst_v,
int dst_stride_v,
int width,
int height);
// Convert I010 to I410
LIBYUV_API
int I010ToI410(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_u,
int dst_stride_u,
uint16_t* dst_v,
int dst_stride_v,
int width,
int height);
// Convert I012 to I412
#define I012ToI412 I010ToI410
// Convert I210 to I410
LIBYUV_API
int I210ToI410(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_u,
int dst_stride_u,
uint16_t* dst_v,
int dst_stride_v,
int width,
int height);
// Convert I212 to I412
#define I212ToI412 I210ToI410
// Convert I010 to P010
LIBYUV_API
int I010ToP010(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_uv,
int dst_stride_uv,
int width,
int height);
// Convert I210 to P210
LIBYUV_API
int I210ToP210(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_uv,
int dst_stride_uv,
int width,
int height);
// Convert I012 to P012
LIBYUV_API
int I012ToP012(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_uv,
int dst_stride_uv,
int width,
int height);
// Convert I212 to P212
LIBYUV_API
int I212ToP212(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_uv,
int dst_stride_uv,
int width,
int height);
// Convert I400 (grey) to I420.
LIBYUV_API
int I400ToI420(const uint8_t* src_y,
int src_stride_y,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
// Convert I400 (grey) to NV21.
LIBYUV_API
int I400ToNV21(const uint8_t* src_y,
int src_stride_y,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_vu,
int dst_stride_vu,
int width,
int height);
#define J400ToJ420 I400ToI420
// Convert NV12 to I420.
LIBYUV_API
int NV12ToI420(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_uv,
int src_stride_uv,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
// Convert NV21 to I420.
LIBYUV_API
int NV21ToI420(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_vu,
int src_stride_vu,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
// Convert NV12 to NV24.
LIBYUV_API
int NV12ToNV24(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_uv,
int src_stride_uv,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_uv,
int dst_stride_uv,
int width,
int height);
// Convert NV16 to NV24.
LIBYUV_API
int NV16ToNV24(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_uv,
int src_stride_uv,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_uv,
int dst_stride_uv,
int width,
int height);
// Convert P010 to I010.
LIBYUV_API
int P010ToI010(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_uv,
int src_stride_uv,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_u,
int dst_stride_u,
uint16_t* dst_v,
int dst_stride_v,
int width,
int height);
// Convert P012 to I012.
LIBYUV_API
int P012ToI012(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_uv,
int src_stride_uv,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_u,
int dst_stride_u,
uint16_t* dst_v,
int dst_stride_v,
int width,
int height);
// Convert P010 to P410.
LIBYUV_API
int P010ToP410(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_uv,
int src_stride_uv,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_uv,
int dst_stride_uv,
int width,
int height);
// Convert P012 to P412.
#define P012ToP412 P010ToP410
// Convert P016 to P416.
#define P016ToP416 P010ToP410
// Convert P210 to P410.
LIBYUV_API
int P210ToP410(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_uv,
int src_stride_uv,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_uv,
int dst_stride_uv,
int width,
int height);
// Convert P212 to P412.
#define P212ToP412 P210ToP410
// Convert P216 to P416.
#define P216ToP416 P210ToP410
// Convert YUY2 to I420.
LIBYUV_API
int YUY2ToI420(const uint8_t* src_yuy2,
int src_stride_yuy2,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
// Convert UYVY to I420.
LIBYUV_API
int UYVYToI420(const uint8_t* src_uyvy,
int src_stride_uyvy,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
// Convert AYUV to NV12.
LIBYUV_API
int AYUVToNV12(const uint8_t* src_ayuv,
int src_stride_ayuv,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_uv,
int dst_stride_uv,
int width,
int height);
// Convert AYUV to NV21.
LIBYUV_API
int AYUVToNV21(const uint8_t* src_ayuv,
int src_stride_ayuv,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_vu,
int dst_stride_vu,
int width,
int height);
// Convert Android420 to I420.
LIBYUV_API
int Android420ToI420(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
int src_pixel_stride_uv,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
// ARGB little endian (bgra in memory) to I420.
LIBYUV_API
int ARGBToI420(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
// BGRA little endian (argb in memory) to I420.
LIBYUV_API
int BGRAToI420(const uint8_t* src_bgra,
int src_stride_bgra,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
// ABGR little endian (rgba in memory) to I420.
LIBYUV_API
int ABGRToI420(const uint8_t* src_abgr,
int src_stride_abgr,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
// RGBA little endian (abgr in memory) to I420.
LIBYUV_API
int RGBAToI420(const uint8_t* src_rgba,
int src_stride_rgba,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
// RGB little endian (bgr in memory) to I420.
LIBYUV_API
int RGB24ToI420(const uint8_t* src_rgb24,
int src_stride_rgb24,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
// RGB little endian (bgr in memory) to J420.
LIBYUV_API
int RGB24ToJ420(const uint8_t* src_rgb24,
int src_stride_rgb24,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
// RGB big endian (rgb in memory) to I420.
LIBYUV_API
int RAWToI420(const uint8_t* src_raw,
int src_stride_raw,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
// RGB big endian (rgb in memory) to J420.
LIBYUV_API
int RAWToJ420(const uint8_t* src_raw,
int src_stride_raw,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
// RGB16 (RGBP fourcc) little endian to I420.
LIBYUV_API
int RGB565ToI420(const uint8_t* src_rgb565,
int src_stride_rgb565,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
// RGB15 (RGBO fourcc) little endian to I420.
LIBYUV_API
int ARGB1555ToI420(const uint8_t* src_argb1555,
int src_stride_argb1555,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
// RGB12 (R444 fourcc) little endian to I420.
LIBYUV_API
int ARGB4444ToI420(const uint8_t* src_argb4444,
int src_stride_argb4444,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
// RGB little endian (bgr in memory) to J400.
LIBYUV_API
int RGB24ToJ400(const uint8_t* src_rgb24,
int src_stride_rgb24,
uint8_t* dst_yj,
int dst_stride_yj,
int width,
int height);
// RGB big endian (rgb in memory) to J400.
LIBYUV_API
int RAWToJ400(const uint8_t* src_raw,
int src_stride_raw,
uint8_t* dst_yj,
int dst_stride_yj,
int width,
int height);
// src_width/height provided by capture.
// dst_width/height for clipping determine final size.
LIBYUV_API
int MJPGToI420(const uint8_t* sample,
size_t sample_size,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int src_width,
int src_height,
int dst_width,
int dst_height);
// JPEG to NV21
LIBYUV_API
int MJPGToNV21(const uint8_t* sample,
size_t sample_size,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_vu,
int dst_stride_vu,
int src_width,
int src_height,
int dst_width,
int dst_height);
// JPEG to NV12
LIBYUV_API
int MJPGToNV12(const uint8_t* sample,
size_t sample_size,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_uv,
int dst_stride_uv,
int src_width,
int src_height,
int dst_width,
int dst_height);
// Query size of MJPG in pixels.
LIBYUV_API
int MJPGSize(const uint8_t* sample,
size_t sample_size,
int* width,
int* height);
// Convert camera sample to I420 with cropping, rotation and vertical flip.
// "src_size" is needed to parse MJPG.
// "dst_stride_y" number of bytes in a row of the dst_y plane.
// Normally this would be the same as dst_width, with recommended alignment
// to 16 bytes for better efficiency.
// If rotation of 90 or 270 is used, stride is affected. The caller should
// allocate the I420 buffer according to rotation.
// "dst_stride_u" number of bytes in a row of the dst_u plane.
// Normally this would be the same as (dst_width + 1) / 2, with
// recommended alignment to 16 bytes for better efficiency.
// If rotation of 90 or 270 is used, stride is affected.
// "crop_x" and "crop_y" are starting position for cropping.
// To center, crop_x = (src_width - dst_width) / 2
// crop_y = (src_height - dst_height) / 2
// "src_width" / "src_height" is size of src_frame in pixels.
// "src_height" can be negative indicating a vertically flipped image source.
// "crop_width" / "crop_height" is the size to crop the src to.
// Must be less than or equal to src_width/src_height
// Cropping parameters are pre-rotation.
// "rotation" can be 0, 90, 180 or 270.
// "fourcc" is a fourcc. ie 'I420', 'YUY2'
// Returns 0 for successful; -1 for invalid parameter. Non-zero for failure.
LIBYUV_API
int ConvertToI420(const uint8_t* sample,
size_t sample_size,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int crop_x,
int crop_y,
int src_width,
int src_height,
int crop_width,
int crop_height,
enum RotationMode rotation,
uint32_t fourcc);
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
#endif // INCLUDE_LIBYUV_CONVERT_H_
/*
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_CONVERT_ARGB_H_
#define INCLUDE_LIBYUV_CONVERT_ARGB_H_
#include "libyuv/basic_types.h"
#include "libyuv/rotate.h" // For enum RotationMode.
#include "libyuv/scale.h" // For enum FilterMode.
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// Conversion matrix for YUV to RGB
LIBYUV_API extern const struct YuvConstants kYuvI601Constants; // BT.601
LIBYUV_API extern const struct YuvConstants kYuvJPEGConstants; // BT.601 full
LIBYUV_API extern const struct YuvConstants kYuvH709Constants; // BT.709
LIBYUV_API extern const struct YuvConstants kYuvF709Constants; // BT.709 full
LIBYUV_API extern const struct YuvConstants kYuv2020Constants; // BT.2020
LIBYUV_API extern const struct YuvConstants kYuvV2020Constants; // BT.2020 full
// Conversion matrix for YVU to BGR
LIBYUV_API extern const struct YuvConstants kYvuI601Constants; // BT.601
LIBYUV_API extern const struct YuvConstants kYvuJPEGConstants; // BT.601 full
LIBYUV_API extern const struct YuvConstants kYvuH709Constants; // BT.709
LIBYUV_API extern const struct YuvConstants kYvuF709Constants; // BT.709 full
LIBYUV_API extern const struct YuvConstants kYvu2020Constants; // BT.2020
LIBYUV_API extern const struct YuvConstants kYvuV2020Constants; // BT.2020 full
// Macros for end swapped destination Matrix conversions.
// Swap UV and pass mirrored kYvuJPEGConstants matrix.
// TODO(fbarchard): Add macro for each Matrix function.
#define kYuvI601ConstantsVU kYvuI601Constants
#define kYuvJPEGConstantsVU kYvuJPEGConstants
#define kYuvH709ConstantsVU kYvuH709Constants
#define kYuvF709ConstantsVU kYvuF709Constants
#define kYuv2020ConstantsVU kYvu2020Constants
#define kYuvV2020ConstantsVU kYvuV2020Constants
#define NV12ToABGRMatrix(a, b, c, d, e, f, g, h, i) \
NV21ToARGBMatrix(a, b, c, d, e, f, g##VU, h, i)
#define NV21ToABGRMatrix(a, b, c, d, e, f, g, h, i) \
NV12ToARGBMatrix(a, b, c, d, e, f, g##VU, h, i)
#define NV12ToRAWMatrix(a, b, c, d, e, f, g, h, i) \
NV21ToRGB24Matrix(a, b, c, d, e, f, g##VU, h, i)
#define NV21ToRAWMatrix(a, b, c, d, e, f, g, h, i) \
NV12ToRGB24Matrix(a, b, c, d, e, f, g##VU, h, i)
#define I010ToABGRMatrix(a, b, c, d, e, f, g, h, i, j, k) \
I010ToARGBMatrix(a, b, e, f, c, d, g, h, i##VU, j, k)
#define I210ToABGRMatrix(a, b, c, d, e, f, g, h, i, j, k) \
I210ToARGBMatrix(a, b, e, f, c, d, g, h, i##VU, j, k)
#define I410ToABGRMatrix(a, b, c, d, e, f, g, h, i, j, k) \
I410ToARGBMatrix(a, b, e, f, c, d, g, h, i##VU, j, k)
#define I010ToAB30Matrix(a, b, c, d, e, f, g, h, i, j, k) \
I010ToAR30Matrix(a, b, e, f, c, d, g, h, i##VU, j, k)
#define I210ToAB30Matrix(a, b, c, d, e, f, g, h, i, j, k) \
I210ToAR30Matrix(a, b, e, f, c, d, g, h, i##VU, j, k)
#define I410ToAB30Matrix(a, b, c, d, e, f, g, h, i, j, k) \
I410ToAR30Matrix(a, b, e, f, c, d, g, h, i##VU, j, k)
#define I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, k, l, m, n) \
I420AlphaToARGBMatrix(a, b, e, f, c, d, g, h, i, j, k##VU, l, m, n)
#define I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, k, l, m, n) \
I422AlphaToARGBMatrix(a, b, e, f, c, d, g, h, i, j, k##VU, l, m, n)
#define I444AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, k, l, m, n) \
I444AlphaToARGBMatrix(a, b, e, f, c, d, g, h, i, j, k##VU, l, m, n)
#define I010AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, k, l, m, n) \
I010AlphaToARGBMatrix(a, b, e, f, c, d, g, h, i, j, k##VU, l, m, n)
#define I210AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, k, l, m, n) \
I210AlphaToARGBMatrix(a, b, e, f, c, d, g, h, i, j, k##VU, l, m, n)
#define I410AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, k, l, m, n) \
I410AlphaToARGBMatrix(a, b, e, f, c, d, g, h, i, j, k##VU, l, m, n)
// Alias.
#define ARGBToARGB ARGBCopy
// Copy ARGB to ARGB.
LIBYUV_API
int ARGBCopy(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// Convert I420 to ARGB.
LIBYUV_API
int I420ToARGB(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// Convert I420 to ABGR.
LIBYUV_API
int I420ToABGR(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_abgr,
int dst_stride_abgr,
int width,
int height);
// Convert J420 to ARGB.
LIBYUV_API
int J420ToARGB(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// Convert J420 to ABGR.
LIBYUV_API
int J420ToABGR(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_abgr,
int dst_stride_abgr,
int width,
int height);
// Convert H420 to ARGB.
LIBYUV_API
int H420ToARGB(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// Convert H420 to ABGR.
LIBYUV_API
int H420ToABGR(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_abgr,
int dst_stride_abgr,
int width,
int height);
// Convert U420 to ARGB.
LIBYUV_API
int U420ToARGB(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// Convert U420 to ABGR.
LIBYUV_API
int U420ToABGR(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_abgr,
int dst_stride_abgr,
int width,
int height);
// Convert I422 to ARGB.
LIBYUV_API
int I422ToARGB(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// Convert I422 to ABGR.
LIBYUV_API
int I422ToABGR(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_abgr,
int dst_stride_abgr,
int width,
int height);
// Convert J422 to ARGB.
LIBYUV_API
int J422ToARGB(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// Convert J422 to ABGR.
LIBYUV_API
int J422ToABGR(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_abgr,
int dst_stride_abgr,
int width,
int height);
// Convert H422 to ARGB.
LIBYUV_API
int H422ToARGB(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// Convert H422 to ABGR.
LIBYUV_API
int H422ToABGR(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_abgr,
int dst_stride_abgr,
int width,
int height);
// Convert U422 to ARGB.
LIBYUV_API
int U422ToARGB(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// Convert U422 to ABGR.
LIBYUV_API
int U422ToABGR(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_abgr,
int dst_stride_abgr,
int width,
int height);
// Convert I444 to ARGB.
LIBYUV_API
int I444ToARGB(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// Convert I444 to ABGR.
LIBYUV_API
int I444ToABGR(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_abgr,
int dst_stride_abgr,
int width,
int height);
// Convert J444 to ARGB.
LIBYUV_API
int J444ToARGB(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// Convert J444 to ABGR.
LIBYUV_API
int J444ToABGR(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_abgr,
int dst_stride_abgr,
int width,
int height);
// Convert H444 to ARGB.
LIBYUV_API
int H444ToARGB(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// Convert H444 to ABGR.
LIBYUV_API
int H444ToABGR(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_abgr,
int dst_stride_abgr,
int width,
int height);
// Convert U444 to ARGB.
LIBYUV_API
int U444ToARGB(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// Convert U444 to ABGR.
LIBYUV_API
int U444ToABGR(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_abgr,
int dst_stride_abgr,
int width,
int height);
// Convert I444 to RGB24.
LIBYUV_API
int I444ToRGB24(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_rgb24,
int dst_stride_rgb24,
int width,
int height);
// Convert I444 to RAW.
LIBYUV_API
int I444ToRAW(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_raw,
int dst_stride_raw,
int width,
int height);
// Convert I010 to ARGB.
LIBYUV_API
int I010ToARGB(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// Convert I010 to ABGR.
LIBYUV_API
int I010ToABGR(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint8_t* dst_abgr,
int dst_stride_abgr,
int width,
int height);
// Convert H010 to ARGB.
LIBYUV_API
int H010ToARGB(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// Convert H010 to ABGR.
LIBYUV_API
int H010ToABGR(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint8_t* dst_abgr,
int dst_stride_abgr,
int width,
int height);
// Convert U010 to ARGB.
LIBYUV_API
int U010ToARGB(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// Convert U010 to ABGR.
LIBYUV_API
int U010ToABGR(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint8_t* dst_abgr,
int dst_stride_abgr,
int width,
int height);
// Convert I210 to ARGB.
LIBYUV_API
int I210ToARGB(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// Convert I210 to ABGR.
LIBYUV_API
int I210ToABGR(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint8_t* dst_abgr,
int dst_stride_abgr,
int width,
int height);
// Convert H210 to ARGB.
LIBYUV_API
int H210ToARGB(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// Convert H210 to ABGR.
LIBYUV_API
int H210ToABGR(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint8_t* dst_abgr,
int dst_stride_abgr,
int width,
int height);
// Convert U210 to ARGB.
LIBYUV_API
int U210ToARGB(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// Convert U210 to ABGR.
LIBYUV_API
int U210ToABGR(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint8_t* dst_abgr,
int dst_stride_abgr,
int width,
int height);
// Convert I420 with Alpha to preattenuated ARGB.
LIBYUV_API
int I420AlphaToARGB(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
const uint8_t* src_a,
int src_stride_a,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height,
int attenuate);
// Convert I420 with Alpha to preattenuated ABGR.
LIBYUV_API
int I420AlphaToABGR(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
const uint8_t* src_a,
int src_stride_a,
uint8_t* dst_abgr,
int dst_stride_abgr,
int width,
int height,
int attenuate);
// Convert I422 with Alpha to preattenuated ARGB.
LIBYUV_API
int I422AlphaToARGB(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
const uint8_t* src_a,
int src_stride_a,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height,
int attenuate);
// Convert I422 with Alpha to preattenuated ABGR.
LIBYUV_API
int I422AlphaToABGR(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
const uint8_t* src_a,
int src_stride_a,
uint8_t* dst_abgr,
int dst_stride_abgr,
int width,
int height,
int attenuate);
// Convert I444 with Alpha to preattenuated ARGB.
LIBYUV_API
int I444AlphaToARGB(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
const uint8_t* src_a,
int src_stride_a,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height,
int attenuate);
// Convert I444 with Alpha to preattenuated ABGR.
LIBYUV_API
int I444AlphaToABGR(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
const uint8_t* src_a,
int src_stride_a,
uint8_t* dst_abgr,
int dst_stride_abgr,
int width,
int height,
int attenuate);
// Convert I400 (grey) to ARGB. Reverse of ARGBToI400.
LIBYUV_API
int I400ToARGB(const uint8_t* src_y,
int src_stride_y,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// Convert J400 (jpeg grey) to ARGB.
LIBYUV_API
int J400ToARGB(const uint8_t* src_y,
int src_stride_y,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// Alias.
#define YToARGB I400ToARGB
// Convert NV12 to ARGB.
LIBYUV_API
int NV12ToARGB(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_uv,
int src_stride_uv,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// Convert NV21 to ARGB.
LIBYUV_API
int NV21ToARGB(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_vu,
int src_stride_vu,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// Convert NV12 to ABGR.
LIBYUV_API
int NV12ToABGR(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_uv,
int src_stride_uv,
uint8_t* dst_abgr,
int dst_stride_abgr,
int width,
int height);
// Convert NV21 to ABGR.
LIBYUV_API
int NV21ToABGR(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_vu,
int src_stride_vu,
uint8_t* dst_abgr,
int dst_stride_abgr,
int width,
int height);
// Convert NV12 to RGB24.
LIBYUV_API
int NV12ToRGB24(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_uv,
int src_stride_uv,
uint8_t* dst_rgb24,
int dst_stride_rgb24,
int width,
int height);
// Convert NV21 to RGB24.
LIBYUV_API
int NV21ToRGB24(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_vu,
int src_stride_vu,
uint8_t* dst_rgb24,
int dst_stride_rgb24,
int width,
int height);
// Convert NV21 to YUV24.
LIBYUV_API
int NV21ToYUV24(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_vu,
int src_stride_vu,
uint8_t* dst_yuv24,
int dst_stride_yuv24,
int width,
int height);
// Convert NV12 to RAW.
LIBYUV_API
int NV12ToRAW(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_uv,
int src_stride_uv,
uint8_t* dst_raw,
int dst_stride_raw,
int width,
int height);
// Convert NV21 to RAW.
LIBYUV_API
int NV21ToRAW(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_vu,
int src_stride_vu,
uint8_t* dst_raw,
int dst_stride_raw,
int width,
int height);
// Convert YUY2 to ARGB.
LIBYUV_API
int YUY2ToARGB(const uint8_t* src_yuy2,
int src_stride_yuy2,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// Convert UYVY to ARGB.
LIBYUV_API
int UYVYToARGB(const uint8_t* src_uyvy,
int src_stride_uyvy,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// Convert I010 to AR30.
LIBYUV_API
int I010ToAR30(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint8_t* dst_ar30,
int dst_stride_ar30,
int width,
int height);
// Convert H010 to AR30.
LIBYUV_API
int H010ToAR30(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint8_t* dst_ar30,
int dst_stride_ar30,
int width,
int height);
// Convert I010 to AB30.
LIBYUV_API
int I010ToAB30(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint8_t* dst_ab30,
int dst_stride_ab30,
int width,
int height);
// Convert H010 to AB30.
LIBYUV_API
int H010ToAB30(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint8_t* dst_ab30,
int dst_stride_ab30,
int width,
int height);
// Convert U010 to AR30.
LIBYUV_API
int U010ToAR30(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint8_t* dst_ar30,
int dst_stride_ar30,
int width,
int height);
// Convert U010 to AB30.
LIBYUV_API
int U010ToAB30(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint8_t* dst_ab30,
int dst_stride_ab30,
int width,
int height);
// Convert I210 to AR30.
LIBYUV_API
int I210ToAR30(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint8_t* dst_ar30,
int dst_stride_ar30,
int width,
int height);
// Convert I210 to AB30.
LIBYUV_API
int I210ToAB30(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint8_t* dst_ab30,
int dst_stride_ab30,
int width,
int height);
// Convert H210 to AR30.
LIBYUV_API
int H210ToAR30(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint8_t* dst_ar30,
int dst_stride_ar30,
int width,
int height);
// Convert H210 to AB30.
LIBYUV_API
int H210ToAB30(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint8_t* dst_ab30,
int dst_stride_ab30,
int width,
int height);
// Convert U210 to AR30.
LIBYUV_API
int U210ToAR30(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint8_t* dst_ar30,
int dst_stride_ar30,
int width,
int height);
// Convert U210 to AB30.
LIBYUV_API
int U210ToAB30(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint8_t* dst_ab30,
int dst_stride_ab30,
int width,
int height);
// BGRA little endian (argb in memory) to ARGB.
LIBYUV_API
int BGRAToARGB(const uint8_t* src_bgra,
int src_stride_bgra,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// ABGR little endian (rgba in memory) to ARGB.
LIBYUV_API
int ABGRToARGB(const uint8_t* src_abgr,
int src_stride_abgr,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// RGBA little endian (abgr in memory) to ARGB.
LIBYUV_API
int RGBAToARGB(const uint8_t* src_rgba,
int src_stride_rgba,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// Deprecated function name.
#define BG24ToARGB RGB24ToARGB
// RGB little endian (bgr in memory) to ARGB.
LIBYUV_API
int RGB24ToARGB(const uint8_t* src_rgb24,
int src_stride_rgb24,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// RGB big endian (rgb in memory) to ARGB.
LIBYUV_API
int RAWToARGB(const uint8_t* src_raw,
int src_stride_raw,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// RGB big endian (rgb in memory) to RGBA.
LIBYUV_API
int RAWToRGBA(const uint8_t* src_raw,
int src_stride_raw,
uint8_t* dst_rgba,
int dst_stride_rgba,
int width,
int height);
// RGB16 (RGBP fourcc) little endian to ARGB.
LIBYUV_API
int RGB565ToARGB(const uint8_t* src_rgb565,
int src_stride_rgb565,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// RGB15 (RGBO fourcc) little endian to ARGB.
LIBYUV_API
int ARGB1555ToARGB(const uint8_t* src_argb1555,
int src_stride_argb1555,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// RGB12 (R444 fourcc) little endian to ARGB.
LIBYUV_API
int ARGB4444ToARGB(const uint8_t* src_argb4444,
int src_stride_argb4444,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// Aliases
#define AB30ToARGB AR30ToABGR
#define AB30ToABGR AR30ToARGB
#define AB30ToAR30 AR30ToAB30
// Convert AR30 To ARGB.
LIBYUV_API
int AR30ToARGB(const uint8_t* src_ar30,
int src_stride_ar30,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// Convert AR30 To ABGR.
LIBYUV_API
int AR30ToABGR(const uint8_t* src_ar30,
int src_stride_ar30,
uint8_t* dst_abgr,
int dst_stride_abgr,
int width,
int height);
// Convert AR30 To AB30.
LIBYUV_API
int AR30ToAB30(const uint8_t* src_ar30,
int src_stride_ar30,
uint8_t* dst_ab30,
int dst_stride_ab30,
int width,
int height);
// Convert AR64 to ARGB.
LIBYUV_API
int AR64ToARGB(const uint16_t* src_ar64,
int src_stride_ar64,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// Convert AB64 to ABGR.
#define AB64ToABGR AR64ToARGB
// Convert AB64 to ARGB.
LIBYUV_API
int AB64ToARGB(const uint16_t* src_ab64,
int src_stride_ab64,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// Convert AR64 to ABGR.
#define AR64ToABGR AB64ToARGB
// Convert AR64 To AB64.
LIBYUV_API
int AR64ToAB64(const uint16_t* src_ar64,
int src_stride_ar64,
uint16_t* dst_ab64,
int dst_stride_ab64,
int width,
int height);
// Convert AB64 To AR64.
#define AB64ToAR64 AR64ToAB64
// src_width/height provided by capture
// dst_width/height for clipping determine final size.
LIBYUV_API
int MJPGToARGB(const uint8_t* sample,
size_t sample_size,
uint8_t* dst_argb,
int dst_stride_argb,
int src_width,
int src_height,
int dst_width,
int dst_height);
// Convert Android420 to ARGB.
LIBYUV_API
int Android420ToARGB(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
int src_pixel_stride_uv,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// Convert Android420 to ABGR.
LIBYUV_API
int Android420ToABGR(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
int src_pixel_stride_uv,
uint8_t* dst_abgr,
int dst_stride_abgr,
int width,
int height);
// Convert NV12 to RGB565.
LIBYUV_API
int NV12ToRGB565(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_uv,
int src_stride_uv,
uint8_t* dst_rgb565,
int dst_stride_rgb565,
int width,
int height);
// Convert I422 to BGRA.
LIBYUV_API
int I422ToBGRA(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_bgra,
int dst_stride_bgra,
int width,
int height);
// Convert I422 to ABGR.
LIBYUV_API
int I422ToABGR(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_abgr,
int dst_stride_abgr,
int width,
int height);
// Convert I422 to RGBA.
LIBYUV_API
int I422ToRGBA(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_rgba,
int dst_stride_rgba,
int width,
int height);
LIBYUV_API
int I420ToARGB(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
LIBYUV_API
int I420ToBGRA(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_bgra,
int dst_stride_bgra,
int width,
int height);
LIBYUV_API
int I420ToABGR(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_abgr,
int dst_stride_abgr,
int width,
int height);
LIBYUV_API
int I420ToRGBA(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_rgba,
int dst_stride_rgba,
int width,
int height);
LIBYUV_API
int I420ToRGB24(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_rgb24,
int dst_stride_rgb24,
int width,
int height);
LIBYUV_API
int I420ToRAW(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_raw,
int dst_stride_raw,
int width,
int height);
LIBYUV_API
int H420ToRGB24(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_rgb24,
int dst_stride_rgb24,
int width,
int height);
LIBYUV_API
int H420ToRAW(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_raw,
int dst_stride_raw,
int width,
int height);
LIBYUV_API
int J420ToRGB24(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_rgb24,
int dst_stride_rgb24,
int width,
int height);
LIBYUV_API
int J420ToRAW(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_raw,
int dst_stride_raw,
int width,
int height);
// Convert I422 to RGB24.
LIBYUV_API
int I422ToRGB24(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_rgb24,
int dst_stride_rgb24,
int width,
int height);
// Convert I422 to RAW.
LIBYUV_API
int I422ToRAW(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_raw,
int dst_stride_raw,
int width,
int height);
LIBYUV_API
int I420ToRGB565(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_rgb565,
int dst_stride_rgb565,
int width,
int height);
LIBYUV_API
int J420ToRGB565(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_rgb565,
int dst_stride_rgb565,
int width,
int height);
LIBYUV_API
int H420ToRGB565(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_rgb565,
int dst_stride_rgb565,
int width,
int height);
LIBYUV_API
int I422ToRGB565(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_rgb565,
int dst_stride_rgb565,
int width,
int height);
// Convert I420 To RGB565 with 4x4 dither matrix (16 bytes).
// Values in dither matrix from 0 to 7 recommended.
// The order of the dither matrix is first byte is upper left.
LIBYUV_API
int I420ToRGB565Dither(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_rgb565,
int dst_stride_rgb565,
const uint8_t* dither4x4,
int width,
int height);
LIBYUV_API
int I420ToARGB1555(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_argb1555,
int dst_stride_argb1555,
int width,
int height);
LIBYUV_API
int I420ToARGB4444(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_argb4444,
int dst_stride_argb4444,
int width,
int height);
// Convert I420 to AR30.
LIBYUV_API
int I420ToAR30(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_ar30,
int dst_stride_ar30,
int width,
int height);
// Convert I420 to AB30.
LIBYUV_API
int I420ToAB30(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_ab30,
int dst_stride_ab30,
int width,
int height);
// Convert H420 to AR30.
LIBYUV_API
int H420ToAR30(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_ar30,
int dst_stride_ar30,
int width,
int height);
// Convert H420 to AB30.
LIBYUV_API
int H420ToAB30(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_ab30,
int dst_stride_ab30,
int width,
int height);
// Convert I420 to ARGB with matrix.
LIBYUV_API
int I420ToARGBMatrix(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_argb,
int dst_stride_argb,
const struct YuvConstants* yuvconstants,
int width,
int height);
// Convert I422 to ARGB with matrix.
LIBYUV_API
int I422ToARGBMatrix(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_argb,
int dst_stride_argb,
const struct YuvConstants* yuvconstants,
int width,
int height);
// Convert I444 to ARGB with matrix.
LIBYUV_API
int I444ToARGBMatrix(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_argb,
int dst_stride_argb,
const struct YuvConstants* yuvconstants,
int width,
int height);
// Convert I444 to RGB24 with matrix.
LIBYUV_API
int I444ToRGB24Matrix(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_rgb24,
int dst_stride_rgb24,
const struct YuvConstants* yuvconstants,
int width,
int height);
// Convert 10 bit 420 YUV to ARGB with matrix.
LIBYUV_API
int I010ToAR30Matrix(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint8_t* dst_ar30,
int dst_stride_ar30,
const struct YuvConstants* yuvconstants,
int width,
int height);
// Convert 10 bit 420 YUV to ARGB with matrix.
LIBYUV_API
int I210ToAR30Matrix(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint8_t* dst_ar30,
int dst_stride_ar30,
const struct YuvConstants* yuvconstants,
int width,
int height);
// Convert 10 bit 444 YUV to ARGB with matrix.
LIBYUV_API
int I410ToAR30Matrix(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint8_t* dst_ar30,
int dst_stride_ar30,
const struct YuvConstants* yuvconstants,
int width,
int height);
// Convert 10 bit YUV to ARGB with matrix.
LIBYUV_API
int I010ToARGBMatrix(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint8_t* dst_argb,
int dst_stride_argb,
const struct YuvConstants* yuvconstants,
int width,
int height);
// multiply 12 bit yuv into high bits to allow any number of bits.
LIBYUV_API
int I012ToAR30Matrix(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint8_t* dst_ar30,
int dst_stride_ar30,
const struct YuvConstants* yuvconstants,
int width,
int height);
// Convert 12 bit YUV to ARGB with matrix.
LIBYUV_API
int I012ToARGBMatrix(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint8_t* dst_argb,
int dst_stride_argb,
const struct YuvConstants* yuvconstants,
int width,
int height);
// Convert 10 bit 422 YUV to ARGB with matrix.
LIBYUV_API
int I210ToARGBMatrix(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint8_t* dst_argb,
int dst_stride_argb,
const struct YuvConstants* yuvconstants,
int width,
int height);
// Convert 10 bit 444 YUV to ARGB with matrix.
LIBYUV_API
int I410ToARGBMatrix(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint8_t* dst_argb,
int dst_stride_argb,
const struct YuvConstants* yuvconstants,
int width,
int height);
// Convert P010 to ARGB with matrix.
LIBYUV_API
int P010ToARGBMatrix(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_uv,
int src_stride_uv,
uint8_t* dst_argb,
int dst_stride_argb,
const struct YuvConstants* yuvconstants,
int width,
int height);
// Convert P210 to ARGB with matrix.
LIBYUV_API
int P210ToARGBMatrix(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_uv,
int src_stride_uv,
uint8_t* dst_argb,
int dst_stride_argb,
const struct YuvConstants* yuvconstants,
int width,
int height);
// Convert P010 to AR30 with matrix.
LIBYUV_API
int P010ToAR30Matrix(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_uv,
int src_stride_uv,
uint8_t* dst_ar30,
int dst_stride_ar30,
const struct YuvConstants* yuvconstants,
int width,
int height);
// Convert P210 to AR30 with matrix.
LIBYUV_API
int P210ToAR30Matrix(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_uv,
int src_stride_uv,
uint8_t* dst_ar30,
int dst_stride_ar30,
const struct YuvConstants* yuvconstants,
int width,
int height);
// P012 and P010 use most significant bits so the conversion is the same.
// Convert P012 to ARGB with matrix.
#define P012ToARGBMatrix P010ToARGBMatrix
// Convert P012 to AR30 with matrix.
#define P012ToAR30Matrix P010ToAR30Matrix
// Convert P212 to ARGB with matrix.
#define P212ToARGBMatrix P210ToARGBMatrix
// Convert P212 to AR30 with matrix.
#define P212ToAR30Matrix P210ToAR30Matrix
// Convert P016 to ARGB with matrix.
#define P016ToARGBMatrix P010ToARGBMatrix
// Convert P016 to AR30 with matrix.
#define P016ToAR30Matrix P010ToAR30Matrix
// Convert P216 to ARGB with matrix.
#define P216ToARGBMatrix P210ToARGBMatrix
// Convert P216 to AR30 with matrix.
#define P216ToAR30Matrix P210ToAR30Matrix
// Convert I420 with Alpha to preattenuated ARGB with matrix.
LIBYUV_API
int I420AlphaToARGBMatrix(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
const uint8_t* src_a,
int src_stride_a,
uint8_t* dst_argb,
int dst_stride_argb,
const struct YuvConstants* yuvconstants,
int width,
int height,
int attenuate);
// Convert I422 with Alpha to preattenuated ARGB with matrix.
LIBYUV_API
int I422AlphaToARGBMatrix(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
const uint8_t* src_a,
int src_stride_a,
uint8_t* dst_argb,
int dst_stride_argb,
const struct YuvConstants* yuvconstants,
int width,
int height,
int attenuate);
// Convert I444 with Alpha to preattenuated ARGB with matrix.
LIBYUV_API
int I444AlphaToARGBMatrix(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
const uint8_t* src_a,
int src_stride_a,
uint8_t* dst_argb,
int dst_stride_argb,
const struct YuvConstants* yuvconstants,
int width,
int height,
int attenuate);
// Convert I010 with Alpha to preattenuated ARGB with matrix.
LIBYUV_API
int I010AlphaToARGBMatrix(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
const uint16_t* src_a,
int src_stride_a,
uint8_t* dst_argb,
int dst_stride_argb,
const struct YuvConstants* yuvconstants,
int width,
int height,
int attenuate);
// Convert I210 with Alpha to preattenuated ARGB with matrix.
LIBYUV_API
int I210AlphaToARGBMatrix(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
const uint16_t* src_a,
int src_stride_a,
uint8_t* dst_argb,
int dst_stride_argb,
const struct YuvConstants* yuvconstants,
int width,
int height,
int attenuate);
// Convert I410 with Alpha to preattenuated ARGB with matrix.
LIBYUV_API
int I410AlphaToARGBMatrix(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
const uint16_t* src_a,
int src_stride_a,
uint8_t* dst_argb,
int dst_stride_argb,
const struct YuvConstants* yuvconstants,
int width,
int height,
int attenuate);
// Convert NV12 to ARGB with matrix.
LIBYUV_API
int NV12ToARGBMatrix(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_uv,
int src_stride_uv,
uint8_t* dst_argb,
int dst_stride_argb,
const struct YuvConstants* yuvconstants,
int width,
int height);
// Convert NV21 to ARGB with matrix.
LIBYUV_API
int NV21ToARGBMatrix(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_vu,
int src_stride_vu,
uint8_t* dst_argb,
int dst_stride_argb,
const struct YuvConstants* yuvconstants,
int width,
int height);
// Convert NV12 to RGB565 with matrix.
LIBYUV_API
int NV12ToRGB565Matrix(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_uv,
int src_stride_uv,
uint8_t* dst_rgb565,
int dst_stride_rgb565,
const struct YuvConstants* yuvconstants,
int width,
int height);
// Convert NV12 to RGB24 with matrix.
LIBYUV_API
int NV12ToRGB24Matrix(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_uv,
int src_stride_uv,
uint8_t* dst_rgb24,
int dst_stride_rgb24,
const struct YuvConstants* yuvconstants,
int width,
int height);
// Convert NV21 to RGB24 with matrix.
LIBYUV_API
int NV21ToRGB24Matrix(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_vu,
int src_stride_vu,
uint8_t* dst_rgb24,
int dst_stride_rgb24,
const struct YuvConstants* yuvconstants,
int width,
int height);
// Convert Android420 to ARGB with matrix.
LIBYUV_API
int Android420ToARGBMatrix(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
int src_pixel_stride_uv,
uint8_t* dst_argb,
int dst_stride_argb,
const struct YuvConstants* yuvconstants,
int width,
int height);
// Convert I422 to RGBA with matrix.
LIBYUV_API
int I422ToRGBAMatrix(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_rgba,
int dst_stride_rgba,
const struct YuvConstants* yuvconstants,
int width,
int height);
// Convert I420 to RGBA with matrix.
LIBYUV_API
int I420ToRGBAMatrix(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_rgba,
int dst_stride_rgba,
const struct YuvConstants* yuvconstants,
int width,
int height);
// Convert I420 to RGB24 with matrix.
LIBYUV_API
int I420ToRGB24Matrix(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_rgb24,
int dst_stride_rgb24,
const struct YuvConstants* yuvconstants,
int width,
int height);
// Convert I422 to RGB24 with matrix.
LIBYUV_API
int I422ToRGB24Matrix(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_rgb24,
int dst_stride_rgb24,
const struct YuvConstants* yuvconstants,
int width,
int height);
// Convert I420 to RGB565 with specified color matrix.
LIBYUV_API
int I420ToRGB565Matrix(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_rgb565,
int dst_stride_rgb565,
const struct YuvConstants* yuvconstants,
int width,
int height);
// Convert I422 to RGB565 with specified color matrix.
LIBYUV_API
int I422ToRGB565Matrix(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_rgb565,
int dst_stride_rgb565,
const struct YuvConstants* yuvconstants,
int width,
int height);
// Convert I420 to AR30 with matrix.
LIBYUV_API
int I420ToAR30Matrix(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_ar30,
int dst_stride_ar30,
const struct YuvConstants* yuvconstants,
int width,
int height);
// Convert I400 (grey) to ARGB. Reverse of ARGBToI400.
LIBYUV_API
int I400ToARGBMatrix(const uint8_t* src_y,
int src_stride_y,
uint8_t* dst_argb,
int dst_stride_argb,
const struct YuvConstants* yuvconstants,
int width,
int height);
// Convert I420 to ARGB with matrix and UV filter mode.
LIBYUV_API
int I420ToARGBMatrixFilter(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_argb,
int dst_stride_argb,
const struct YuvConstants* yuvconstants,
int width,
int height,
enum FilterMode filter);
// Convert I422 to ARGB with matrix and UV filter mode.
LIBYUV_API
int I422ToARGBMatrixFilter(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_argb,
int dst_stride_argb,
const struct YuvConstants* yuvconstants,
int width,
int height,
enum FilterMode filter);
// Convert I422 to RGB24 with matrix and UV filter mode.
LIBYUV_API
int I422ToRGB24MatrixFilter(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_rgb24,
int dst_stride_rgb24,
const struct YuvConstants* yuvconstants,
int width,
int height,
enum FilterMode filter);
// Convert I420 to RGB24 with matrix and UV filter mode.
LIBYUV_API
int I420ToRGB24MatrixFilter(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_rgb24,
int dst_stride_rgb24,
const struct YuvConstants* yuvconstants,
int width,
int height,
enum FilterMode filter);
// Convert I010 to AR30 with matrix and UV filter mode.
LIBYUV_API
int I010ToAR30MatrixFilter(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint8_t* dst_ar30,
int dst_stride_ar30,
const struct YuvConstants* yuvconstants,
int width,
int height,
enum FilterMode filter);
// Convert I210 to AR30 with matrix and UV filter mode.
LIBYUV_API
int I210ToAR30MatrixFilter(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint8_t* dst_ar30,
int dst_stride_ar30,
const struct YuvConstants* yuvconstants,
int width,
int height,
enum FilterMode filter);
// Convert I010 to ARGB with matrix and UV filter mode.
LIBYUV_API
int I010ToARGBMatrixFilter(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint8_t* dst_argb,
int dst_stride_argb,
const struct YuvConstants* yuvconstants,
int width,
int height,
enum FilterMode filter);
// Convert I210 to ARGB with matrix and UV filter mode.
LIBYUV_API
int I210ToARGBMatrixFilter(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint8_t* dst_argb,
int dst_stride_argb,
const struct YuvConstants* yuvconstants,
int width,
int height,
enum FilterMode filter);
// Convert I420 with Alpha to attenuated ARGB with matrix and UV filter mode.
LIBYUV_API
int I420AlphaToARGBMatrixFilter(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
const uint8_t* src_a,
int src_stride_a,
uint8_t* dst_argb,
int dst_stride_argb,
const struct YuvConstants* yuvconstants,
int width,
int height,
int attenuate,
enum FilterMode filter);
// Convert I422 with Alpha to attenuated ARGB with matrix and UV filter mode.
LIBYUV_API
int I422AlphaToARGBMatrixFilter(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
const uint8_t* src_a,
int src_stride_a,
uint8_t* dst_argb,
int dst_stride_argb,
const struct YuvConstants* yuvconstants,
int width,
int height,
int attenuate,
enum FilterMode filter);
// Convert I010 with Alpha to attenuated ARGB with matrix and UV filter mode.
LIBYUV_API
int I010AlphaToARGBMatrixFilter(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
const uint16_t* src_a,
int src_stride_a,
uint8_t* dst_argb,
int dst_stride_argb,
const struct YuvConstants* yuvconstants,
int width,
int height,
int attenuate,
enum FilterMode filter);
// Convert I210 with Alpha to attenuated ARGB with matrix and UV filter mode.
LIBYUV_API
int I210AlphaToARGBMatrixFilter(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
const uint16_t* src_a,
int src_stride_a,
uint8_t* dst_argb,
int dst_stride_argb,
const struct YuvConstants* yuvconstants,
int width,
int height,
int attenuate,
enum FilterMode filter);
// Convert P010 to ARGB with matrix and UV filter mode.
LIBYUV_API
int P010ToARGBMatrixFilter(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_uv,
int src_stride_uv,
uint8_t* dst_argb,
int dst_stride_argb,
const struct YuvConstants* yuvconstants,
int width,
int height,
enum FilterMode filter);
// Convert P210 to ARGB with matrix and UV filter mode.
LIBYUV_API
int P210ToARGBMatrixFilter(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_uv,
int src_stride_uv,
uint8_t* dst_argb,
int dst_stride_argb,
const struct YuvConstants* yuvconstants,
int width,
int height,
enum FilterMode filter);
// Convert P010 to AR30 with matrix and UV filter mode.
LIBYUV_API
int P010ToAR30MatrixFilter(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_uv,
int src_stride_uv,
uint8_t* dst_ar30,
int dst_stride_ar30,
const struct YuvConstants* yuvconstants,
int width,
int height,
enum FilterMode filter);
// Convert P210 to AR30 with matrix and UV filter mode.
LIBYUV_API
int P210ToAR30MatrixFilter(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_uv,
int src_stride_uv,
uint8_t* dst_ar30,
int dst_stride_ar30,
const struct YuvConstants* yuvconstants,
int width,
int height,
enum FilterMode filter);
// Convert camera sample to ARGB with cropping, rotation and vertical flip.
// "sample_size" is needed to parse MJPG.
// "dst_stride_argb" number of bytes in a row of the dst_argb plane.
// Normally this would be the same as dst_width, with recommended alignment
// to 16 bytes for better efficiency.
// If rotation of 90 or 270 is used, stride is affected. The caller should
// allocate the I420 buffer according to rotation.
// "dst_stride_u" number of bytes in a row of the dst_u plane.
// Normally this would be the same as (dst_width + 1) / 2, with
// recommended alignment to 16 bytes for better efficiency.
// If rotation of 90 or 270 is used, stride is affected.
// "crop_x" and "crop_y" are starting position for cropping.
// To center, crop_x = (src_width - dst_width) / 2
// crop_y = (src_height - dst_height) / 2
// "src_width" / "src_height" is size of src_frame in pixels.
// "src_height" can be negative indicating a vertically flipped image source.
// "crop_width" / "crop_height" is the size to crop the src to.
// Must be less than or equal to src_width/src_height
// Cropping parameters are pre-rotation.
// "rotation" can be 0, 90, 180 or 270.
// "fourcc" is a fourcc. ie 'I420', 'YUY2'
// Returns 0 for successful; -1 for invalid parameter. Non-zero for failure.
LIBYUV_API
int ConvertToARGB(const uint8_t* sample,
size_t sample_size,
uint8_t* dst_argb,
int dst_stride_argb,
int crop_x,
int crop_y,
int src_width,
int src_height,
int crop_width,
int crop_height,
enum RotationMode rotation,
uint32_t fourcc);
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
#endif // INCLUDE_LIBYUV_CONVERT_ARGB_H_
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_CONVERT_FROM_H_
#define INCLUDE_LIBYUV_CONVERT_FROM_H_
#include "libyuv/basic_types.h"
#include "libyuv/rotate.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// See Also convert.h for conversions from formats to I420.
// Convert 8 bit YUV to 10 bit.
#define H420ToH010 I420ToI010
LIBYUV_API
int I420ToI010(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_u,
int dst_stride_u,
uint16_t* dst_v,
int dst_stride_v,
int width,
int height);
// Convert 8 bit YUV to 12 bit.
#define H420ToH012 I420ToI012
LIBYUV_API
int I420ToI012(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_u,
int dst_stride_u,
uint16_t* dst_v,
int dst_stride_v,
int width,
int height);
LIBYUV_API
int I420ToI422(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
LIBYUV_API
int I420ToI444(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
// Copy to I400. Source can be I420, I422, I444, I400, NV12 or NV21.
LIBYUV_API
int I400Copy(const uint8_t* src_y,
int src_stride_y,
uint8_t* dst_y,
int dst_stride_y,
int width,
int height);
LIBYUV_API
int I420ToNV12(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_uv,
int dst_stride_uv,
int width,
int height);
LIBYUV_API
int I420ToNV21(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_vu,
int dst_stride_vu,
int width,
int height);
LIBYUV_API
int I420ToYUY2(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_yuy2,
int dst_stride_yuy2,
int width,
int height);
LIBYUV_API
int I420ToUYVY(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_uyvy,
int dst_stride_uyvy,
int width,
int height);
// The following are from convert_argb.h
// DEPRECATED: The prototypes will be removed in future. Use convert_argb.h
// Convert I420 to ARGB.
LIBYUV_API
int I420ToARGB(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// Convert I420 to ABGR.
LIBYUV_API
int I420ToABGR(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_abgr,
int dst_stride_abgr,
int width,
int height);
// Convert I420 to specified format.
// "dst_sample_stride" is bytes in a row for the destination. Pass 0 if the
// buffer has contiguous rows. Can be negative. A multiple of 16 is optimal.
LIBYUV_API
int ConvertFromI420(const uint8_t* y,
int y_stride,
const uint8_t* u,
int u_stride,
const uint8_t* v,
int v_stride,
uint8_t* dst_sample,
int dst_sample_stride,
int width,
int height,
uint32_t fourcc);
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
#endif // INCLUDE_LIBYUV_CONVERT_FROM_H_
/*
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_CONVERT_FROM_ARGB_H_
#define INCLUDE_LIBYUV_CONVERT_FROM_ARGB_H_
#include "libyuv/basic_types.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// Copy ARGB to ARGB.
#define ARGBToARGB ARGBCopy
LIBYUV_API
int ARGBCopy(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// Convert ARGB To BGRA.
LIBYUV_API
int ARGBToBGRA(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_bgra,
int dst_stride_bgra,
int width,
int height);
// Convert ARGB To ABGR.
LIBYUV_API
int ARGBToABGR(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_abgr,
int dst_stride_abgr,
int width,
int height);
// Convert ARGB To RGBA.
LIBYUV_API
int ARGBToRGBA(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_rgba,
int dst_stride_rgba,
int width,
int height);
// Aliases
#define ARGBToAB30 ABGRToAR30
#define ABGRToAB30 ARGBToAR30
// Convert ABGR To AR30.
LIBYUV_API
int ABGRToAR30(const uint8_t* src_abgr,
int src_stride_abgr,
uint8_t* dst_ar30,
int dst_stride_ar30,
int width,
int height);
// Convert ARGB To AR30.
LIBYUV_API
int ARGBToAR30(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_ar30,
int dst_stride_ar30,
int width,
int height);
// Aliases
#define ABGRToRGB24 ARGBToRAW
#define ABGRToRAW ARGBToRGB24
// Convert ARGB To RGB24.
LIBYUV_API
int ARGBToRGB24(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_rgb24,
int dst_stride_rgb24,
int width,
int height);
// Convert ARGB To RAW.
LIBYUV_API
int ARGBToRAW(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_raw,
int dst_stride_raw,
int width,
int height);
// Convert ARGB To RGB565.
LIBYUV_API
int ARGBToRGB565(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_rgb565,
int dst_stride_rgb565,
int width,
int height);
// Convert ARGB To RGB565 with 4x4 dither matrix (16 bytes).
// Values in dither matrix from 0 to 7 recommended.
// The order of the dither matrix is first byte is upper left.
// TODO(fbarchard): Consider pointer to 2d array for dither4x4.
// const uint8_t(*dither)[4][4];
LIBYUV_API
int ARGBToRGB565Dither(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_rgb565,
int dst_stride_rgb565,
const uint8_t* dither4x4,
int width,
int height);
// Convert ARGB To ARGB1555.
LIBYUV_API
int ARGBToARGB1555(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_argb1555,
int dst_stride_argb1555,
int width,
int height);
// Convert ARGB To ARGB4444.
LIBYUV_API
int ARGBToARGB4444(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_argb4444,
int dst_stride_argb4444,
int width,
int height);
// Convert ARGB To I444.
LIBYUV_API
int ARGBToI444(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
// Convert ARGB to AR64.
LIBYUV_API
int ARGBToAR64(const uint8_t* src_argb,
int src_stride_argb,
uint16_t* dst_ar64,
int dst_stride_ar64,
int width,
int height);
// Convert ABGR to AB64.
#define ABGRToAB64 ARGBToAR64
// Convert ARGB to AB64.
LIBYUV_API
int ARGBToAB64(const uint8_t* src_argb,
int src_stride_argb,
uint16_t* dst_ab64,
int dst_stride_ab64,
int width,
int height);
// Convert ABGR to AR64.
#define ABGRToAR64 ARGBToAB64
// Convert ARGB To I422.
LIBYUV_API
int ARGBToI422(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
// Convert ARGB To I420. (also in convert.h)
LIBYUV_API
int ARGBToI420(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
// Convert ARGB to J420. (JPeg full range I420).
LIBYUV_API
int ARGBToJ420(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_yj,
int dst_stride_yj,
uint8_t* dst_uj,
int dst_stride_uj,
uint8_t* dst_vj,
int dst_stride_vj,
int width,
int height);
// Convert ARGB to J422.
LIBYUV_API
int ARGBToJ422(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_yj,
int dst_stride_yj,
uint8_t* dst_uj,
int dst_stride_uj,
uint8_t* dst_vj,
int dst_stride_vj,
int width,
int height);
// Convert ARGB to J400. (JPeg full range).
LIBYUV_API
int ARGBToJ400(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_yj,
int dst_stride_yj,
int width,
int height);
// Convert ABGR to J420. (JPeg full range I420).
LIBYUV_API
int ABGRToJ420(const uint8_t* src_abgr,
int src_stride_abgr,
uint8_t* dst_yj,
int dst_stride_yj,
uint8_t* dst_uj,
int dst_stride_uj,
uint8_t* dst_vj,
int dst_stride_vj,
int width,
int height);
// Convert ABGR to J422.
LIBYUV_API
int ABGRToJ422(const uint8_t* src_abgr,
int src_stride_abgr,
uint8_t* dst_yj,
int dst_stride_yj,
uint8_t* dst_uj,
int dst_stride_uj,
uint8_t* dst_vj,
int dst_stride_vj,
int width,
int height);
// Convert ABGR to J400. (JPeg full range).
LIBYUV_API
int ABGRToJ400(const uint8_t* src_abgr,
int src_stride_abgr,
uint8_t* dst_yj,
int dst_stride_yj,
int width,
int height);
// Convert RGBA to J400. (JPeg full range).
LIBYUV_API
int RGBAToJ400(const uint8_t* src_rgba,
int src_stride_rgba,
uint8_t* dst_yj,
int dst_stride_yj,
int width,
int height);
// Convert ARGB to I400.
LIBYUV_API
int ARGBToI400(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_y,
int dst_stride_y,
int width,
int height);
// Convert ARGB to G. (Reverse of J400toARGB, which replicates G back to ARGB)
LIBYUV_API
int ARGBToG(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_g,
int dst_stride_g,
int width,
int height);
// Convert ARGB To NV12.
LIBYUV_API
int ARGBToNV12(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_uv,
int dst_stride_uv,
int width,
int height);
// Convert ARGB To NV21.
LIBYUV_API
int ARGBToNV21(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_vu,
int dst_stride_vu,
int width,
int height);
// Convert ABGR To NV12.
LIBYUV_API
int ABGRToNV12(const uint8_t* src_abgr,
int src_stride_abgr,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_uv,
int dst_stride_uv,
int width,
int height);
// Convert ABGR To NV21.
LIBYUV_API
int ABGRToNV21(const uint8_t* src_abgr,
int src_stride_abgr,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_vu,
int dst_stride_vu,
int width,
int height);
// Convert ARGB To YUY2.
LIBYUV_API
int ARGBToYUY2(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_yuy2,
int dst_stride_yuy2,
int width,
int height);
// Convert ARGB To UYVY.
LIBYUV_API
int ARGBToUYVY(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_uyvy,
int dst_stride_uyvy,
int width,
int height);
// RAW to JNV21 full range NV21
LIBYUV_API
int RAWToJNV21(const uint8_t* src_raw,
int src_stride_raw,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_vu,
int dst_stride_vu,
int width,
int height);
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
#endif // INCLUDE_LIBYUV_CONVERT_FROM_ARGB_H_
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_CPU_ID_H_
#define INCLUDE_LIBYUV_CPU_ID_H_
#include "libyuv/basic_types.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// Internal flag to indicate cpuid requires initialization.
static const int kCpuInitialized = 0x1;
// These flags are only valid on ARM processors.
static const int kCpuHasARM = 0x2;
static const int kCpuHasNEON = 0x4;
// 0x8 reserved for future ARM flag.
// These flags are only valid on x86 processors.
static const int kCpuHasX86 = 0x10;
static const int kCpuHasSSE2 = 0x20;
static const int kCpuHasSSSE3 = 0x40;
static const int kCpuHasSSE41 = 0x80;
static const int kCpuHasSSE42 = 0x100; // unused at this time.
static const int kCpuHasAVX = 0x200;
static const int kCpuHasAVX2 = 0x400;
static const int kCpuHasERMS = 0x800;
static const int kCpuHasFMA3 = 0x1000;
static const int kCpuHasF16C = 0x2000;
static const int kCpuHasGFNI = 0x4000;
static const int kCpuHasAVX512BW = 0x8000;
static const int kCpuHasAVX512VL = 0x10000;
static const int kCpuHasAVX512VNNI = 0x20000;
static const int kCpuHasAVX512VBMI = 0x40000;
static const int kCpuHasAVX512VBMI2 = 0x80000;
static const int kCpuHasAVX512VBITALG = 0x100000;
static const int kCpuHasAVX512VPOPCNTDQ = 0x200000;
// These flags are only valid on MIPS processors.
static const int kCpuHasMIPS = 0x400000;
static const int kCpuHasMSA = 0x800000;
// These flags are only valid on LOONGARCH processors.
static const int kCpuHasLOONGARCH = 0x2000000;
static const int kCpuHasLSX = 0x4000000;
static const int kCpuHasLASX = 0x8000000;
// Optional init function. TestCpuFlag does an auto-init.
// Returns cpu_info flags.
LIBYUV_API
int InitCpuFlags(void);
// Detect CPU has SSE2 etc.
// Test_flag parameter should be one of kCpuHas constants above.
// Returns non-zero if instruction set is detected
static __inline int TestCpuFlag(int test_flag) {
LIBYUV_API extern int cpu_info_;
#ifdef __ATOMIC_RELAXED
int cpu_info = __atomic_load_n(&cpu_info_, __ATOMIC_RELAXED);
#else
int cpu_info = cpu_info_;
#endif
return (!cpu_info ? InitCpuFlags() : cpu_info) & test_flag;
}
// Internal function for parsing /proc/cpuinfo.
LIBYUV_API
int ArmCpuCaps(const char* cpuinfo_name);
LIBYUV_API
int MipsCpuCaps(const char* cpuinfo_name);
// For testing, allow CPU flags to be disabled.
// ie MaskCpuFlags(~kCpuHasSSSE3) to disable SSSE3.
// MaskCpuFlags(-1) to enable all cpu specific optimizations.
// MaskCpuFlags(1) to disable all cpu specific optimizations.
// MaskCpuFlags(0) to reset state so next call will auto init.
// Returns cpu_info flags.
LIBYUV_API
int MaskCpuFlags(int enable_flags);
// Sets the CPU flags to |cpu_flags|, bypassing the detection code. |cpu_flags|
// should be a valid combination of the kCpuHas constants above and include
// kCpuInitialized. Use this method when running in a sandboxed process where
// the detection code might fail (as it might access /proc/cpuinfo). In such
// cases the cpu_info can be obtained from a non sandboxed process by calling
// InitCpuFlags() and passed to the sandboxed process (via command line
// parameters, IPC...) which can then call this method to initialize the CPU
// flags.
// Notes:
// - when specifying 0 for |cpu_flags|, the auto initialization is enabled
// again.
// - enabling CPU features that are not supported by the CPU will result in
// undefined behavior.
// TODO(fbarchard): consider writing a helper function that translates from
// other library CPU info to libyuv CPU info and add a .md doc that explains
// CPU detection.
static __inline void SetCpuFlags(int cpu_flags) {
LIBYUV_API extern int cpu_info_;
#ifdef __ATOMIC_RELAXED
__atomic_store_n(&cpu_info_, cpu_flags, __ATOMIC_RELAXED);
#else
cpu_info_ = cpu_flags;
#endif
}
// Low level cpuid for X86. Returns zeros on other CPUs.
// eax is the info type that you want.
// ecx is typically the cpu number, and should normally be zero.
LIBYUV_API
void CpuId(int info_eax, int info_ecx, int* cpu_info);
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
#endif // INCLUDE_LIBYUV_CPU_ID_H_
/*
* Copyright 2022 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_LOONGSON_INTRINSICS_H
#define INCLUDE_LIBYUV_LOONGSON_INTRINSICS_H
/*
* Copyright (c) 2022 Loongson Technology Corporation Limited
* All rights reserved.
* Contributed by Shiyou Yin <yinshiyou-hf@loongson.cn>
* Xiwei Gu <guxiwei-hf@loongson.cn>
* Lu Wang <wanglu@loongson.cn>
*
* This file is a header file for loongarch builtin extension.
*
*/
#ifndef LOONGSON_INTRINSICS_H
#define LOONGSON_INTRINSICS_H
/**
* MAJOR version: Macro usage changes.
* MINOR version: Add new functions, or bug fixes.
* MICRO version: Comment changes or implementation changes.
*/
#define LSOM_VERSION_MAJOR 1
#define LSOM_VERSION_MINOR 1
#define LSOM_VERSION_MICRO 0
#define DUP2_ARG1(_INS, _IN0, _IN1, _OUT0, _OUT1) \
{ \
_OUT0 = _INS(_IN0); \
_OUT1 = _INS(_IN1); \
}
#define DUP2_ARG2(_INS, _IN0, _IN1, _IN2, _IN3, _OUT0, _OUT1) \
{ \
_OUT0 = _INS(_IN0, _IN1); \
_OUT1 = _INS(_IN2, _IN3); \
}
#define DUP2_ARG3(_INS, _IN0, _IN1, _IN2, _IN3, _IN4, _IN5, _OUT0, _OUT1) \
{ \
_OUT0 = _INS(_IN0, _IN1, _IN2); \
_OUT1 = _INS(_IN3, _IN4, _IN5); \
}
#define DUP4_ARG1(_INS, _IN0, _IN1, _IN2, _IN3, _OUT0, _OUT1, _OUT2, _OUT3) \
{ \
DUP2_ARG1(_INS, _IN0, _IN1, _OUT0, _OUT1); \
DUP2_ARG1(_INS, _IN2, _IN3, _OUT2, _OUT3); \
}
#define DUP4_ARG2(_INS, _IN0, _IN1, _IN2, _IN3, _IN4, _IN5, _IN6, _IN7, _OUT0, \
_OUT1, _OUT2, _OUT3) \
{ \
DUP2_ARG2(_INS, _IN0, _IN1, _IN2, _IN3, _OUT0, _OUT1); \
DUP2_ARG2(_INS, _IN4, _IN5, _IN6, _IN7, _OUT2, _OUT3); \
}
#define DUP4_ARG3(_INS, _IN0, _IN1, _IN2, _IN3, _IN4, _IN5, _IN6, _IN7, _IN8, \
_IN9, _IN10, _IN11, _OUT0, _OUT1, _OUT2, _OUT3) \
{ \
DUP2_ARG3(_INS, _IN0, _IN1, _IN2, _IN3, _IN4, _IN5, _OUT0, _OUT1); \
DUP2_ARG3(_INS, _IN6, _IN7, _IN8, _IN9, _IN10, _IN11, _OUT2, _OUT3); \
}
#ifdef __loongarch_sx
#include <lsxintrin.h>
/*
* =============================================================================
* Description : Dot product & addition of byte vector elements
* Arguments : Inputs - in_c, in_h, in_l
* Outputs - out
* Return Type - halfword
* Details : Signed byte elements from in_h are multiplied by
* signed byte elements from in_l, and then added adjacent to
* each other to get results with the twice size of input.
* Then the results plus to signed half-word elements from in_c.
* Example : out = __lsx_vdp2add_h_b(in_c, in_h, in_l)
* in_c : 1,2,3,4, 1,2,3,4
* in_h : 1,2,3,4, 5,6,7,8, 1,2,3,4, 5,6,7,8
* in_l : 8,7,6,5, 4,3,2,1, 8,7,6,5, 4,3,2,1
* out : 23,40,41,26, 23,40,41,26
* =============================================================================
*/
static inline __m128i __lsx_vdp2add_h_b(__m128i in_c,
__m128i in_h,
__m128i in_l) {
__m128i out;
out = __lsx_vmaddwev_h_b(in_c, in_h, in_l);
out = __lsx_vmaddwod_h_b(out, in_h, in_l);
return out;
}
/*
* =============================================================================
* Description : Dot product & addition of byte vector elements
* Arguments : Inputs - in_c, in_h, in_l
* Outputs - out
* Return Type - halfword
* Details : Unsigned byte elements from in_h are multiplied by
* unsigned byte elements from in_l, and then added adjacent to
* each other to get results with the twice size of input.
* The results plus to signed half-word elements from in_c.
* Example : out = __lsx_vdp2add_h_bu(in_c, in_h, in_l)
* in_c : 1,2,3,4, 1,2,3,4
* in_h : 1,2,3,4, 5,6,7,8, 1,2,3,4, 5,6,7,8
* in_l : 8,7,6,5, 4,3,2,1, 8,7,6,5, 4,3,2,1
* out : 23,40,41,26, 23,40,41,26
* =============================================================================
*/
static inline __m128i __lsx_vdp2add_h_bu(__m128i in_c,
__m128i in_h,
__m128i in_l) {
__m128i out;
out = __lsx_vmaddwev_h_bu(in_c, in_h, in_l);
out = __lsx_vmaddwod_h_bu(out, in_h, in_l);
return out;
}
/*
* =============================================================================
* Description : Dot product & addition of byte vector elements
* Arguments : Inputs - in_c, in_h, in_l
* Outputs - out
* Return Type - halfword
* Details : Unsigned byte elements from in_h are multiplied by
* signed byte elements from in_l, and then added adjacent to
* each other to get results with the twice size of input.
* The results plus to signed half-word elements from in_c.
* Example : out = __lsx_vdp2add_h_bu_b(in_c, in_h, in_l)
* in_c : 1,1,1,1, 1,1,1,1
* in_h : 1,2,3,4, 5,6,7,8, 1,2,3,4, 5,6,7,8
* in_l : -1,-2,-3,-4, -5,-6,-7,-8, 1,2,3,4, 5,6,7,8
* out : -4,-24,-60,-112, 6,26,62,114
* =============================================================================
*/
static inline __m128i __lsx_vdp2add_h_bu_b(__m128i in_c,
__m128i in_h,
__m128i in_l) {
__m128i out;
out = __lsx_vmaddwev_h_bu_b(in_c, in_h, in_l);
out = __lsx_vmaddwod_h_bu_b(out, in_h, in_l);
return out;
}
/*
* =============================================================================
* Description : Dot product & addition of half-word vector elements
* Arguments : Inputs - in_c, in_h, in_l
* Outputs - out
* Return Type - __m128i
* Details : Signed half-word elements from in_h are multiplied by
* signed half-word elements from in_l, and then added adjacent to
* each other to get results with the twice size of input.
* Then the results plus to signed word elements from in_c.
* Example : out = __lsx_vdp2add_h_b(in_c, in_h, in_l)
* in_c : 1,2,3,4
* in_h : 1,2,3,4, 5,6,7,8
* in_l : 8,7,6,5, 4,3,2,1
* out : 23,40,41,26
* =============================================================================
*/
static inline __m128i __lsx_vdp2add_w_h(__m128i in_c,
__m128i in_h,
__m128i in_l) {
__m128i out;
out = __lsx_vmaddwev_w_h(in_c, in_h, in_l);
out = __lsx_vmaddwod_w_h(out, in_h, in_l);
return out;
}
/*
* =============================================================================
* Description : Dot product of byte vector elements
* Arguments : Inputs - in_h, in_l
* Outputs - out
* Return Type - halfword
* Details : Signed byte elements from in_h are multiplied by
* signed byte elements from in_l, and then added adjacent to
* each other to get results with the twice size of input.
* Example : out = __lsx_vdp2_h_b(in_h, in_l)
* in_h : 1,2,3,4, 5,6,7,8, 1,2,3,4, 5,6,7,8
* in_l : 8,7,6,5, 4,3,2,1, 8,7,6,5, 4,3,2,1
* out : 22,38,38,22, 22,38,38,22
* =============================================================================
*/
static inline __m128i __lsx_vdp2_h_b(__m128i in_h, __m128i in_l) {
__m128i out;
out = __lsx_vmulwev_h_b(in_h, in_l);
out = __lsx_vmaddwod_h_b(out, in_h, in_l);
return out;
}
/*
* =============================================================================
* Description : Dot product of byte vector elements
* Arguments : Inputs - in_h, in_l
* Outputs - out
* Return Type - halfword
* Details : Unsigned byte elements from in_h are multiplied by
* unsigned byte elements from in_l, and then added adjacent to
* each other to get results with the twice size of input.
* Example : out = __lsx_vdp2_h_bu(in_h, in_l)
* in_h : 1,2,3,4, 5,6,7,8, 1,2,3,4, 5,6,7,8
* in_l : 8,7,6,5, 4,3,2,1, 8,7,6,5, 4,3,2,1
* out : 22,38,38,22, 22,38,38,22
* =============================================================================
*/
static inline __m128i __lsx_vdp2_h_bu(__m128i in_h, __m128i in_l) {
__m128i out;
out = __lsx_vmulwev_h_bu(in_h, in_l);
out = __lsx_vmaddwod_h_bu(out, in_h, in_l);
return out;
}
/*
* =============================================================================
* Description : Dot product of byte vector elements
* Arguments : Inputs - in_h, in_l
* Outputs - out
* Return Type - halfword
* Details : Unsigned byte elements from in_h are multiplied by
* signed byte elements from in_l, and then added adjacent to
* each other to get results with the twice size of input.
* Example : out = __lsx_vdp2_h_bu_b(in_h, in_l)
* in_h : 1,2,3,4, 5,6,7,8, 1,2,3,4, 5,6,7,8
* in_l : 8,7,6,5, 4,3,2,1, 8,7,6,5, 4,3,2,-1
* out : 22,38,38,22, 22,38,38,6
* =============================================================================
*/
static inline __m128i __lsx_vdp2_h_bu_b(__m128i in_h, __m128i in_l) {
__m128i out;
out = __lsx_vmulwev_h_bu_b(in_h, in_l);
out = __lsx_vmaddwod_h_bu_b(out, in_h, in_l);
return out;
}
/*
* =============================================================================
* Description : Dot product of byte vector elements
* Arguments : Inputs - in_h, in_l
* Outputs - out
* Return Type - halfword
* Details : Signed byte elements from in_h are multiplied by
* signed byte elements from in_l, and then added adjacent to
* each other to get results with the twice size of input.
* Example : out = __lsx_vdp2_w_h(in_h, in_l)
* in_h : 1,2,3,4, 5,6,7,8
* in_l : 8,7,6,5, 4,3,2,1
* out : 22,38,38,22
* =============================================================================
*/
static inline __m128i __lsx_vdp2_w_h(__m128i in_h, __m128i in_l) {
__m128i out;
out = __lsx_vmulwev_w_h(in_h, in_l);
out = __lsx_vmaddwod_w_h(out, in_h, in_l);
return out;
}
/*
* =============================================================================
* Description : Clip all halfword elements of input vector between min & max
* out = ((_in) < (min)) ? (min) : (((_in) > (max)) ? (max) :
* (_in))
* Arguments : Inputs - _in (input vector)
* - min (min threshold)
* - max (max threshold)
* Outputs - out (output vector with clipped elements)
* Return Type - signed halfword
* Example : out = __lsx_vclip_h(_in)
* _in : -8,2,280,249, -8,255,280,249
* min : 1,1,1,1, 1,1,1,1
* max : 9,9,9,9, 9,9,9,9
* out : 1,2,9,9, 1,9,9,9
* =============================================================================
*/
static inline __m128i __lsx_vclip_h(__m128i _in, __m128i min, __m128i max) {
__m128i out;
out = __lsx_vmax_h(min, _in);
out = __lsx_vmin_h(max, out);
return out;
}
/*
* =============================================================================
* Description : Set each element of vector between 0 and 255
* Arguments : Inputs - _in
* Outputs - out
* Return Type - halfword
* Details : Signed byte elements from _in are clamped between 0 and 255.
* Example : out = __lsx_vclip255_h(_in)
* _in : -8,255,280,249, -8,255,280,249
* out : 0,255,255,249, 0,255,255,249
* =============================================================================
*/
static inline __m128i __lsx_vclip255_h(__m128i _in) {
__m128i out;
out = __lsx_vmaxi_h(_in, 0);
out = __lsx_vsat_hu(out, 7);
return out;
}
/*
* =============================================================================
* Description : Set each element of vector between 0 and 255
* Arguments : Inputs - _in
* Outputs - out
* Return Type - word
* Details : Signed byte elements from _in are clamped between 0 and 255.
* Example : out = __lsx_vclip255_w(_in)
* _in : -8,255,280,249
* out : 0,255,255,249
* =============================================================================
*/
static inline __m128i __lsx_vclip255_w(__m128i _in) {
__m128i out;
out = __lsx_vmaxi_w(_in, 0);
out = __lsx_vsat_wu(out, 7);
return out;
}
/*
* =============================================================================
* Description : Swap two variables
* Arguments : Inputs - _in0, _in1
* Outputs - _in0, _in1 (in-place)
* Details : Swapping of two input variables using xor
* Example : LSX_SWAP(_in0, _in1)
* _in0 : 1,2,3,4
* _in1 : 5,6,7,8
* _in0(out) : 5,6,7,8
* _in1(out) : 1,2,3,4
* =============================================================================
*/
#define LSX_SWAP(_in0, _in1) \
{ \
_in0 = __lsx_vxor_v(_in0, _in1); \
_in1 = __lsx_vxor_v(_in0, _in1); \
_in0 = __lsx_vxor_v(_in0, _in1); \
}
/*
* =============================================================================
* Description : Transpose 4x4 block with word elements in vectors
* Arguments : Inputs - in0, in1, in2, in3
* Outputs - out0, out1, out2, out3
* Details :
* Example :
* 1, 2, 3, 4 1, 5, 9,13
* 5, 6, 7, 8 to 2, 6,10,14
* 9,10,11,12 =====> 3, 7,11,15
* 13,14,15,16 4, 8,12,16
* =============================================================================
*/
#define LSX_TRANSPOSE4x4_W(_in0, _in1, _in2, _in3, _out0, _out1, _out2, _out3) \
{ \
__m128i _t0, _t1, _t2, _t3; \
\
_t0 = __lsx_vilvl_w(_in1, _in0); \
_t1 = __lsx_vilvh_w(_in1, _in0); \
_t2 = __lsx_vilvl_w(_in3, _in2); \
_t3 = __lsx_vilvh_w(_in3, _in2); \
_out0 = __lsx_vilvl_d(_t2, _t0); \
_out1 = __lsx_vilvh_d(_t2, _t0); \
_out2 = __lsx_vilvl_d(_t3, _t1); \
_out3 = __lsx_vilvh_d(_t3, _t1); \
}
/*
* =============================================================================
* Description : Transpose 8x8 block with byte elements in vectors
* Arguments : Inputs - _in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7
* Outputs - _out0, _out1, _out2, _out3, _out4, _out5, _out6,
* _out7
* Details : The rows of the matrix become columns, and the columns
* become rows.
* Example : LSX_TRANSPOSE8x8_B
* _in0 : 00,01,02,03,04,05,06,07, 00,00,00,00,00,00,00,00
* _in1 : 10,11,12,13,14,15,16,17, 00,00,00,00,00,00,00,00
* _in2 : 20,21,22,23,24,25,26,27, 00,00,00,00,00,00,00,00
* _in3 : 30,31,32,33,34,35,36,37, 00,00,00,00,00,00,00,00
* _in4 : 40,41,42,43,44,45,46,47, 00,00,00,00,00,00,00,00
* _in5 : 50,51,52,53,54,55,56,57, 00,00,00,00,00,00,00,00
* _in6 : 60,61,62,63,64,65,66,67, 00,00,00,00,00,00,00,00
* _in7 : 70,71,72,73,74,75,76,77, 00,00,00,00,00,00,00,00
*
* _ out0 : 00,10,20,30,40,50,60,70, 00,00,00,00,00,00,00,00
* _ out1 : 01,11,21,31,41,51,61,71, 00,00,00,00,00,00,00,00
* _ out2 : 02,12,22,32,42,52,62,72, 00,00,00,00,00,00,00,00
* _ out3 : 03,13,23,33,43,53,63,73, 00,00,00,00,00,00,00,00
* _ out4 : 04,14,24,34,44,54,64,74, 00,00,00,00,00,00,00,00
* _ out5 : 05,15,25,35,45,55,65,75, 00,00,00,00,00,00,00,00
* _ out6 : 06,16,26,36,46,56,66,76, 00,00,00,00,00,00,00,00
* _ out7 : 07,17,27,37,47,57,67,77, 00,00,00,00,00,00,00,00
* =============================================================================
*/
#define LSX_TRANSPOSE8x8_B(_in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7, \
_out0, _out1, _out2, _out3, _out4, _out5, _out6, \
_out7) \
{ \
__m128i zero = {0}; \
__m128i shuf8 = {0x0F0E0D0C0B0A0908, 0x1716151413121110}; \
__m128i _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7; \
\
_t0 = __lsx_vilvl_b(_in2, _in0); \
_t1 = __lsx_vilvl_b(_in3, _in1); \
_t2 = __lsx_vilvl_b(_in6, _in4); \
_t3 = __lsx_vilvl_b(_in7, _in5); \
_t4 = __lsx_vilvl_b(_t1, _t0); \
_t5 = __lsx_vilvh_b(_t1, _t0); \
_t6 = __lsx_vilvl_b(_t3, _t2); \
_t7 = __lsx_vilvh_b(_t3, _t2); \
_out0 = __lsx_vilvl_w(_t6, _t4); \
_out2 = __lsx_vilvh_w(_t6, _t4); \
_out4 = __lsx_vilvl_w(_t7, _t5); \
_out6 = __lsx_vilvh_w(_t7, _t5); \
_out1 = __lsx_vshuf_b(zero, _out0, shuf8); \
_out3 = __lsx_vshuf_b(zero, _out2, shuf8); \
_out5 = __lsx_vshuf_b(zero, _out4, shuf8); \
_out7 = __lsx_vshuf_b(zero, _out6, shuf8); \
}
/*
* =============================================================================
* Description : Transpose 8x8 block with half-word elements in vectors
* Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7
* Outputs - out0, out1, out2, out3, out4, out5, out6, out7
* Details :
* Example :
* 00,01,02,03,04,05,06,07 00,10,20,30,40,50,60,70
* 10,11,12,13,14,15,16,17 01,11,21,31,41,51,61,71
* 20,21,22,23,24,25,26,27 02,12,22,32,42,52,62,72
* 30,31,32,33,34,35,36,37 to 03,13,23,33,43,53,63,73
* 40,41,42,43,44,45,46,47 ======> 04,14,24,34,44,54,64,74
* 50,51,52,53,54,55,56,57 05,15,25,35,45,55,65,75
* 60,61,62,63,64,65,66,67 06,16,26,36,46,56,66,76
* 70,71,72,73,74,75,76,77 07,17,27,37,47,57,67,77
* =============================================================================
*/
#define LSX_TRANSPOSE8x8_H(_in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7, \
_out0, _out1, _out2, _out3, _out4, _out5, _out6, \
_out7) \
{ \
__m128i _s0, _s1, _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7; \
\
_s0 = __lsx_vilvl_h(_in6, _in4); \
_s1 = __lsx_vilvl_h(_in7, _in5); \
_t0 = __lsx_vilvl_h(_s1, _s0); \
_t1 = __lsx_vilvh_h(_s1, _s0); \
_s0 = __lsx_vilvh_h(_in6, _in4); \
_s1 = __lsx_vilvh_h(_in7, _in5); \
_t2 = __lsx_vilvl_h(_s1, _s0); \
_t3 = __lsx_vilvh_h(_s1, _s0); \
_s0 = __lsx_vilvl_h(_in2, _in0); \
_s1 = __lsx_vilvl_h(_in3, _in1); \
_t4 = __lsx_vilvl_h(_s1, _s0); \
_t5 = __lsx_vilvh_h(_s1, _s0); \
_s0 = __lsx_vilvh_h(_in2, _in0); \
_s1 = __lsx_vilvh_h(_in3, _in1); \
_t6 = __lsx_vilvl_h(_s1, _s0); \
_t7 = __lsx_vilvh_h(_s1, _s0); \
\
_out0 = __lsx_vpickev_d(_t0, _t4); \
_out2 = __lsx_vpickev_d(_t1, _t5); \
_out4 = __lsx_vpickev_d(_t2, _t6); \
_out6 = __lsx_vpickev_d(_t3, _t7); \
_out1 = __lsx_vpickod_d(_t0, _t4); \
_out3 = __lsx_vpickod_d(_t1, _t5); \
_out5 = __lsx_vpickod_d(_t2, _t6); \
_out7 = __lsx_vpickod_d(_t3, _t7); \
}
/*
* =============================================================================
* Description : Transpose input 8x4 byte block into 4x8
* Arguments : Inputs - _in0, _in1, _in2, _in3 (input 8x4 byte block)
* Outputs - _out0, _out1, _out2, _out3 (output 4x8 byte block)
* Return Type - as per RTYPE
* Details : The rows of the matrix become columns, and the columns become
* rows.
* Example : LSX_TRANSPOSE8x4_B
* _in0 : 00,01,02,03,00,00,00,00, 00,00,00,00,00,00,00,00
* _in1 : 10,11,12,13,00,00,00,00, 00,00,00,00,00,00,00,00
* _in2 : 20,21,22,23,00,00,00,00, 00,00,00,00,00,00,00,00
* _in3 : 30,31,32,33,00,00,00,00, 00,00,00,00,00,00,00,00
* _in4 : 40,41,42,43,00,00,00,00, 00,00,00,00,00,00,00,00
* _in5 : 50,51,52,53,00,00,00,00, 00,00,00,00,00,00,00,00
* _in6 : 60,61,62,63,00,00,00,00, 00,00,00,00,00,00,00,00
* _in7 : 70,71,72,73,00,00,00,00, 00,00,00,00,00,00,00,00
*
* _out0 : 00,10,20,30,40,50,60,70, 00,00,00,00,00,00,00,00
* _out1 : 01,11,21,31,41,51,61,71, 00,00,00,00,00,00,00,00
* _out2 : 02,12,22,32,42,52,62,72, 00,00,00,00,00,00,00,00
* _out3 : 03,13,23,33,43,53,63,73, 00,00,00,00,00,00,00,00
* =============================================================================
*/
#define LSX_TRANSPOSE8x4_B(_in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7, \
_out0, _out1, _out2, _out3) \
{ \
__m128i _tmp0_m, _tmp1_m, _tmp2_m, _tmp3_m; \
\
_tmp0_m = __lsx_vpackev_w(_in4, _in0); \
_tmp1_m = __lsx_vpackev_w(_in5, _in1); \
_tmp2_m = __lsx_vilvl_b(_tmp1_m, _tmp0_m); \
_tmp0_m = __lsx_vpackev_w(_in6, _in2); \
_tmp1_m = __lsx_vpackev_w(_in7, _in3); \
\
_tmp3_m = __lsx_vilvl_b(_tmp1_m, _tmp0_m); \
_tmp0_m = __lsx_vilvl_h(_tmp3_m, _tmp2_m); \
_tmp1_m = __lsx_vilvh_h(_tmp3_m, _tmp2_m); \
\
_out0 = __lsx_vilvl_w(_tmp1_m, _tmp0_m); \
_out2 = __lsx_vilvh_w(_tmp1_m, _tmp0_m); \
_out1 = __lsx_vilvh_d(_out2, _out0); \
_out3 = __lsx_vilvh_d(_out0, _out2); \
}
/*
* =============================================================================
* Description : Transpose 16x8 block with byte elements in vectors
* Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7, in8
* in9, in10, in11, in12, in13, in14, in15
* Outputs - out0, out1, out2, out3, out4, out5, out6, out7
* Details :
* Example :
* 000,001,002,003,004,005,006,007
* 008,009,010,011,012,013,014,015
* 016,017,018,019,020,021,022,023
* 024,025,026,027,028,029,030,031
* 032,033,034,035,036,037,038,039
* 040,041,042,043,044,045,046,047 000,008,...,112,120
* 048,049,050,051,052,053,054,055 001,009,...,113,121
* 056,057,058,059,060,061,062,063 to 002,010,...,114,122
* 064,068,066,067,068,069,070,071 =====> 003,011,...,115,123
* 072,073,074,075,076,077,078,079 004,012,...,116,124
* 080,081,082,083,084,085,086,087 005,013,...,117,125
* 088,089,090,091,092,093,094,095 006,014,...,118,126
* 096,097,098,099,100,101,102,103 007,015,...,119,127
* 104,105,106,107,108,109,110,111
* 112,113,114,115,116,117,118,119
* 120,121,122,123,124,125,126,127
* =============================================================================
*/
#define LSX_TRANSPOSE16x8_B(_in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7, \
_in8, _in9, _in10, _in11, _in12, _in13, _in14, \
_in15, _out0, _out1, _out2, _out3, _out4, _out5, \
_out6, _out7) \
{ \
__m128i _tmp0, _tmp1, _tmp2, _tmp3, _tmp4, _tmp5, _tmp6, _tmp7; \
__m128i _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7; \
DUP4_ARG2(__lsx_vilvl_b, _in2, _in0, _in3, _in1, _in6, _in4, _in7, _in5, \
_tmp0, _tmp1, _tmp2, _tmp3); \
DUP4_ARG2(__lsx_vilvl_b, _in10, _in8, _in11, _in9, _in14, _in12, _in15, \
_in13, _tmp4, _tmp5, _tmp6, _tmp7); \
DUP2_ARG2(__lsx_vilvl_b, _tmp1, _tmp0, _tmp3, _tmp2, _t0, _t2); \
DUP2_ARG2(__lsx_vilvh_b, _tmp1, _tmp0, _tmp3, _tmp2, _t1, _t3); \
DUP2_ARG2(__lsx_vilvl_b, _tmp5, _tmp4, _tmp7, _tmp6, _t4, _t6); \
DUP2_ARG2(__lsx_vilvh_b, _tmp5, _tmp4, _tmp7, _tmp6, _t5, _t7); \
DUP2_ARG2(__lsx_vilvl_w, _t2, _t0, _t3, _t1, _tmp0, _tmp4); \
DUP2_ARG2(__lsx_vilvh_w, _t2, _t0, _t3, _t1, _tmp2, _tmp6); \
DUP2_ARG2(__lsx_vilvl_w, _t6, _t4, _t7, _t5, _tmp1, _tmp5); \
DUP2_ARG2(__lsx_vilvh_w, _t6, _t4, _t7, _t5, _tmp3, _tmp7); \
DUP2_ARG2(__lsx_vilvl_d, _tmp1, _tmp0, _tmp3, _tmp2, _out0, _out2); \
DUP2_ARG2(__lsx_vilvh_d, _tmp1, _tmp0, _tmp3, _tmp2, _out1, _out3); \
DUP2_ARG2(__lsx_vilvl_d, _tmp5, _tmp4, _tmp7, _tmp6, _out4, _out6); \
DUP2_ARG2(__lsx_vilvh_d, _tmp5, _tmp4, _tmp7, _tmp6, _out5, _out7); \
}
/*
* =============================================================================
* Description : Butterfly of 4 input vectors
* Arguments : Inputs - in0, in1, in2, in3
* Outputs - out0, out1, out2, out3
* Details : Butterfly operation
* Example :
* out0 = in0 + in3;
* out1 = in1 + in2;
* out2 = in1 - in2;
* out3 = in0 - in3;
* =============================================================================
*/
#define LSX_BUTTERFLY_4_B(_in0, _in1, _in2, _in3, _out0, _out1, _out2, _out3) \
{ \
_out0 = __lsx_vadd_b(_in0, _in3); \
_out1 = __lsx_vadd_b(_in1, _in2); \
_out2 = __lsx_vsub_b(_in1, _in2); \
_out3 = __lsx_vsub_b(_in0, _in3); \
}
#define LSX_BUTTERFLY_4_H(_in0, _in1, _in2, _in3, _out0, _out1, _out2, _out3) \
{ \
_out0 = __lsx_vadd_h(_in0, _in3); \
_out1 = __lsx_vadd_h(_in1, _in2); \
_out2 = __lsx_vsub_h(_in1, _in2); \
_out3 = __lsx_vsub_h(_in0, _in3); \
}
#define LSX_BUTTERFLY_4_W(_in0, _in1, _in2, _in3, _out0, _out1, _out2, _out3) \
{ \
_out0 = __lsx_vadd_w(_in0, _in3); \
_out1 = __lsx_vadd_w(_in1, _in2); \
_out2 = __lsx_vsub_w(_in1, _in2); \
_out3 = __lsx_vsub_w(_in0, _in3); \
}
#define LSX_BUTTERFLY_4_D(_in0, _in1, _in2, _in3, _out0, _out1, _out2, _out3) \
{ \
_out0 = __lsx_vadd_d(_in0, _in3); \
_out1 = __lsx_vadd_d(_in1, _in2); \
_out2 = __lsx_vsub_d(_in1, _in2); \
_out3 = __lsx_vsub_d(_in0, _in3); \
}
/*
* =============================================================================
* Description : Butterfly of 8 input vectors
* Arguments : Inputs - _in0, _in1, _in2, _in3, ~
* Outputs - _out0, _out1, _out2, _out3, ~
* Details : Butterfly operation
* Example :
* _out0 = _in0 + _in7;
* _out1 = _in1 + _in6;
* _out2 = _in2 + _in5;
* _out3 = _in3 + _in4;
* _out4 = _in3 - _in4;
* _out5 = _in2 - _in5;
* _out6 = _in1 - _in6;
* _out7 = _in0 - _in7;
* =============================================================================
*/
#define LSX_BUTTERFLY_8_B(_in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7, \
_out0, _out1, _out2, _out3, _out4, _out5, _out6, \
_out7) \
{ \
_out0 = __lsx_vadd_b(_in0, _in7); \
_out1 = __lsx_vadd_b(_in1, _in6); \
_out2 = __lsx_vadd_b(_in2, _in5); \
_out3 = __lsx_vadd_b(_in3, _in4); \
_out4 = __lsx_vsub_b(_in3, _in4); \
_out5 = __lsx_vsub_b(_in2, _in5); \
_out6 = __lsx_vsub_b(_in1, _in6); \
_out7 = __lsx_vsub_b(_in0, _in7); \
}
#define LSX_BUTTERFLY_8_H(_in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7, \
_out0, _out1, _out2, _out3, _out4, _out5, _out6, \
_out7) \
{ \
_out0 = __lsx_vadd_h(_in0, _in7); \
_out1 = __lsx_vadd_h(_in1, _in6); \
_out2 = __lsx_vadd_h(_in2, _in5); \
_out3 = __lsx_vadd_h(_in3, _in4); \
_out4 = __lsx_vsub_h(_in3, _in4); \
_out5 = __lsx_vsub_h(_in2, _in5); \
_out6 = __lsx_vsub_h(_in1, _in6); \
_out7 = __lsx_vsub_h(_in0, _in7); \
}
#define LSX_BUTTERFLY_8_W(_in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7, \
_out0, _out1, _out2, _out3, _out4, _out5, _out6, \
_out7) \
{ \
_out0 = __lsx_vadd_w(_in0, _in7); \
_out1 = __lsx_vadd_w(_in1, _in6); \
_out2 = __lsx_vadd_w(_in2, _in5); \
_out3 = __lsx_vadd_w(_in3, _in4); \
_out4 = __lsx_vsub_w(_in3, _in4); \
_out5 = __lsx_vsub_w(_in2, _in5); \
_out6 = __lsx_vsub_w(_in1, _in6); \
_out7 = __lsx_vsub_w(_in0, _in7); \
}
#define LSX_BUTTERFLY_8_D(_in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7, \
_out0, _out1, _out2, _out3, _out4, _out5, _out6, \
_out7) \
{ \
_out0 = __lsx_vadd_d(_in0, _in7); \
_out1 = __lsx_vadd_d(_in1, _in6); \
_out2 = __lsx_vadd_d(_in2, _in5); \
_out3 = __lsx_vadd_d(_in3, _in4); \
_out4 = __lsx_vsub_d(_in3, _in4); \
_out5 = __lsx_vsub_d(_in2, _in5); \
_out6 = __lsx_vsub_d(_in1, _in6); \
_out7 = __lsx_vsub_d(_in0, _in7); \
}
#endif // LSX
#ifdef __loongarch_asx
#include <lasxintrin.h>
/*
* =============================================================================
* Description : Dot product of byte vector elements
* Arguments : Inputs - in_h, in_l
* Output - out
* Return Type - signed halfword
* Details : Unsigned byte elements from in_h are multiplied with
* unsigned byte elements from in_l producing a result
* twice the size of input i.e. signed halfword.
* Then this multiplied results of adjacent odd-even elements
* are added to the out vector
* Example : See out = __lasx_xvdp2_w_h(in_h, in_l)
* =============================================================================
*/
static inline __m256i __lasx_xvdp2_h_bu(__m256i in_h, __m256i in_l) {
__m256i out;
out = __lasx_xvmulwev_h_bu(in_h, in_l);
out = __lasx_xvmaddwod_h_bu(out, in_h, in_l);
return out;
}
/*
* =============================================================================
* Description : Dot product of byte vector elements
* Arguments : Inputs - in_h, in_l
* Output - out
* Return Type - signed halfword
* Details : Signed byte elements from in_h are multiplied with
* signed byte elements from in_l producing a result
* twice the size of input i.e. signed halfword.
* Then this multiplication results of adjacent odd-even elements
* are added to the out vector
* Example : See out = __lasx_xvdp2_w_h(in_h, in_l)
* =============================================================================
*/
static inline __m256i __lasx_xvdp2_h_b(__m256i in_h, __m256i in_l) {
__m256i out;
out = __lasx_xvmulwev_h_b(in_h, in_l);
out = __lasx_xvmaddwod_h_b(out, in_h, in_l);
return out;
}
/*
* =============================================================================
* Description : Dot product of halfword vector elements
* Arguments : Inputs - in_h, in_l
* Output - out
* Return Type - signed word
* Details : Signed halfword elements from in_h are multiplied with
* signed halfword elements from in_l producing a result
* twice the size of input i.e. signed word.
* Then this multiplied results of adjacent odd-even elements
* are added to the out vector.
* Example : out = __lasx_xvdp2_w_h(in_h, in_l)
* in_h : 1,2,3,4, 5,6,7,8, 1,2,3,4, 5,6,7,8
* in_l : 8,7,6,5, 4,3,2,1, 8,7,6,5, 4,3,2,1
* out : 22,38,38,22, 22,38,38,22
* =============================================================================
*/
static inline __m256i __lasx_xvdp2_w_h(__m256i in_h, __m256i in_l) {
__m256i out;
out = __lasx_xvmulwev_w_h(in_h, in_l);
out = __lasx_xvmaddwod_w_h(out, in_h, in_l);
return out;
}
/*
* =============================================================================
* Description : Dot product of word vector elements
* Arguments : Inputs - in_h, in_l
* Output - out
* Return Type - signed double
* Details : Signed word elements from in_h are multiplied with
* signed word elements from in_l producing a result
* twice the size of input i.e. signed double-word.
* Then this multiplied results of adjacent odd-even elements
* are added to the out vector.
* Example : See out = __lasx_xvdp2_w_h(in_h, in_l)
* =============================================================================
*/
static inline __m256i __lasx_xvdp2_d_w(__m256i in_h, __m256i in_l) {
__m256i out;
out = __lasx_xvmulwev_d_w(in_h, in_l);
out = __lasx_xvmaddwod_d_w(out, in_h, in_l);
return out;
}
/*
* =============================================================================
* Description : Dot product of halfword vector elements
* Arguments : Inputs - in_h, in_l
* Output - out
* Return Type - signed word
* Details : Unsigned halfword elements from in_h are multiplied with
* signed halfword elements from in_l producing a result
* twice the size of input i.e. unsigned word.
* Multiplication result of adjacent odd-even elements
* are added to the out vector
* Example : See out = __lasx_xvdp2_w_h(in_h, in_l)
* =============================================================================
*/
static inline __m256i __lasx_xvdp2_w_hu_h(__m256i in_h, __m256i in_l) {
__m256i out;
out = __lasx_xvmulwev_w_hu_h(in_h, in_l);
out = __lasx_xvmaddwod_w_hu_h(out, in_h, in_l);
return out;
}
/*
* =============================================================================
* Description : Dot product & addition of byte vector elements
* Arguments : Inputs - in_h, in_l
* Output - out
* Return Type - halfword
* Details : Signed byte elements from in_h are multiplied with
* signed byte elements from in_l producing a result
* twice the size of input i.e. signed halfword.
* Then this multiplied results of adjacent odd-even elements
* are added to the in_c vector.
* Example : See out = __lasx_xvdp2add_w_h(in_c, in_h, in_l)
* =============================================================================
*/
static inline __m256i __lasx_xvdp2add_h_b(__m256i in_c,
__m256i in_h,
__m256i in_l) {
__m256i out;
out = __lasx_xvmaddwev_h_b(in_c, in_h, in_l);
out = __lasx_xvmaddwod_h_b(out, in_h, in_l);
return out;
}
/*
* =============================================================================
* Description : Dot product & addition of byte vector elements
* Arguments : Inputs - in_h, in_l
* Output - out
* Return Type - halfword
* Details : Unsigned byte elements from in_h are multiplied with
* unsigned byte elements from in_l producing a result
* twice the size of input i.e. signed halfword.
* Then this multiplied results of adjacent odd-even elements
* are added to the in_c vector.
* Example : See out = __lasx_xvdp2add_w_h(in_c, in_h, in_l)
* =============================================================================
*/
static inline __m256i __lasx_xvdp2add_h_bu(__m256i in_c,
__m256i in_h,
__m256i in_l) {
__m256i out;
out = __lasx_xvmaddwev_h_bu(in_c, in_h, in_l);
out = __lasx_xvmaddwod_h_bu(out, in_h, in_l);
return out;
}
/*
* =============================================================================
* Description : Dot product & addition of byte vector elements
* Arguments : Inputs - in_h, in_l
* Output - out
* Return Type - halfword
* Details : Unsigned byte elements from in_h are multiplied with
* signed byte elements from in_l producing a result
* twice the size of input i.e. signed halfword.
* Then this multiplied results of adjacent odd-even elements
* are added to the in_c vector.
* Example : See out = __lasx_xvdp2add_w_h(in_c, in_h, in_l)
* =============================================================================
*/
static inline __m256i __lasx_xvdp2add_h_bu_b(__m256i in_c,
__m256i in_h,
__m256i in_l) {
__m256i out;
out = __lasx_xvmaddwev_h_bu_b(in_c, in_h, in_l);
out = __lasx_xvmaddwod_h_bu_b(out, in_h, in_l);
return out;
}
/*
* =============================================================================
* Description : Dot product of halfword vector elements
* Arguments : Inputs - in_c, in_h, in_l
* Output - out
* Return Type - per RTYPE
* Details : Signed halfword elements from in_h are multiplied with
* signed halfword elements from in_l producing a result
* twice the size of input i.e. signed word.
* Multiplication result of adjacent odd-even elements
* are added to the in_c vector.
* Example : out = __lasx_xvdp2add_w_h(in_c, in_h, in_l)
* in_c : 1,2,3,4, 1,2,3,4
* in_h : 1,2,3,4, 5,6,7,8, 1,2,3,4, 5,6,7,8,
* in_l : 8,7,6,5, 4,3,2,1, 8,7,6,5, 4,3,2,1,
* out : 23,40,41,26, 23,40,41,26
* =============================================================================
*/
static inline __m256i __lasx_xvdp2add_w_h(__m256i in_c,
__m256i in_h,
__m256i in_l) {
__m256i out;
out = __lasx_xvmaddwev_w_h(in_c, in_h, in_l);
out = __lasx_xvmaddwod_w_h(out, in_h, in_l);
return out;
}
/*
* =============================================================================
* Description : Dot product of halfword vector elements
* Arguments : Inputs - in_c, in_h, in_l
* Output - out
* Return Type - signed word
* Details : Unsigned halfword elements from in_h are multiplied with
* unsigned halfword elements from in_l producing a result
* twice the size of input i.e. signed word.
* Multiplication result of adjacent odd-even elements
* are added to the in_c vector.
* Example : See out = __lasx_xvdp2add_w_h(in_c, in_h, in_l)
* =============================================================================
*/
static inline __m256i __lasx_xvdp2add_w_hu(__m256i in_c,
__m256i in_h,
__m256i in_l) {
__m256i out;
out = __lasx_xvmaddwev_w_hu(in_c, in_h, in_l);
out = __lasx_xvmaddwod_w_hu(out, in_h, in_l);
return out;
}
/*
* =============================================================================
* Description : Dot product of halfword vector elements
* Arguments : Inputs - in_c, in_h, in_l
* Output - out
* Return Type - signed word
* Details : Unsigned halfword elements from in_h are multiplied with
* signed halfword elements from in_l producing a result
* twice the size of input i.e. signed word.
* Multiplication result of adjacent odd-even elements
* are added to the in_c vector
* Example : See out = __lasx_xvdp2add_w_h(in_c, in_h, in_l)
* =============================================================================
*/
static inline __m256i __lasx_xvdp2add_w_hu_h(__m256i in_c,
__m256i in_h,
__m256i in_l) {
__m256i out;
out = __lasx_xvmaddwev_w_hu_h(in_c, in_h, in_l);
out = __lasx_xvmaddwod_w_hu_h(out, in_h, in_l);
return out;
}
/*
* =============================================================================
* Description : Vector Unsigned Dot Product and Subtract
* Arguments : Inputs - in_c, in_h, in_l
* Output - out
* Return Type - signed halfword
* Details : Unsigned byte elements from in_h are multiplied with
* unsigned byte elements from in_l producing a result
* twice the size of input i.e. signed halfword.
* Multiplication result of adjacent odd-even elements
* are added together and subtracted from double width elements
* in_c vector.
* Example : See out = __lasx_xvdp2sub_w_h(in_c, in_h, in_l)
* =============================================================================
*/
static inline __m256i __lasx_xvdp2sub_h_bu(__m256i in_c,
__m256i in_h,
__m256i in_l) {
__m256i out;
out = __lasx_xvmulwev_h_bu(in_h, in_l);
out = __lasx_xvmaddwod_h_bu(out, in_h, in_l);
out = __lasx_xvsub_h(in_c, out);
return out;
}
/*
* =============================================================================
* Description : Vector Signed Dot Product and Subtract
* Arguments : Inputs - in_c, in_h, in_l
* Output - out
* Return Type - signed word
* Details : Signed halfword elements from in_h are multiplied with
* Signed halfword elements from in_l producing a result
* twice the size of input i.e. signed word.
* Multiplication result of adjacent odd-even elements
* are added together and subtracted from double width elements
* in_c vector.
* Example : out = __lasx_xvdp2sub_w_h(in_c, in_h, in_l)
* in_c : 0,0,0,0, 0,0,0,0
* in_h : 3,1,3,0, 0,0,0,1, 0,0,1,1, 0,0,0,1
* in_l : 2,1,1,0, 1,0,0,0, 0,0,1,0, 1,0,0,1
* out : -7,-3,0,0, 0,-1,0,-1
* =============================================================================
*/
static inline __m256i __lasx_xvdp2sub_w_h(__m256i in_c,
__m256i in_h,
__m256i in_l) {
__m256i out;
out = __lasx_xvmulwev_w_h(in_h, in_l);
out = __lasx_xvmaddwod_w_h(out, in_h, in_l);
out = __lasx_xvsub_w(in_c, out);
return out;
}
/*
* =============================================================================
* Description : Dot product of halfword vector elements
* Arguments : Inputs - in_h, in_l
* Output - out
* Return Type - signed word
* Details : Signed halfword elements from in_h are multiplied with
* signed halfword elements from in_l producing a result
* four times the size of input i.e. signed doubleword.
* Then this multiplication results of four adjacent elements
* are added together and stored to the out vector.
* Example : out = __lasx_xvdp4_d_h(in_h, in_l)
* in_h : 3,1,3,0, 0,0,0,1, 0,0,1,-1, 0,0,0,1
* in_l : -2,1,1,0, 1,0,0,0, 0,0,1, 0, 1,0,0,1
* out : -2,0,1,1
* =============================================================================
*/
static inline __m256i __lasx_xvdp4_d_h(__m256i in_h, __m256i in_l) {
__m256i out;
out = __lasx_xvmulwev_w_h(in_h, in_l);
out = __lasx_xvmaddwod_w_h(out, in_h, in_l);
out = __lasx_xvhaddw_d_w(out, out);
return out;
}
/*
* =============================================================================
* Description : The high half of the vector elements are expanded and
* added after being doubled.
* Arguments : Inputs - in_h, in_l
* Output - out
* Details : The in_h vector and the in_l vector are added after the
* higher half of the two-fold sign extension (signed byte
* to signed halfword) and stored to the out vector.
* Example : See out = __lasx_xvaddwh_w_h(in_h, in_l)
* =============================================================================
*/
static inline __m256i __lasx_xvaddwh_h_b(__m256i in_h, __m256i in_l) {
__m256i out;
out = __lasx_xvilvh_b(in_h, in_l);
out = __lasx_xvhaddw_h_b(out, out);
return out;
}
/*
* =============================================================================
* Description : The high half of the vector elements are expanded and
* added after being doubled.
* Arguments : Inputs - in_h, in_l
* Output - out
* Details : The in_h vector and the in_l vector are added after the
* higher half of the two-fold sign extension (signed halfword
* to signed word) and stored to the out vector.
* Example : out = __lasx_xvaddwh_w_h(in_h, in_l)
* in_h : 3, 0,3,0, 0,0,0,-1, 0,0,1,-1, 0,0,0,1
* in_l : 2,-1,1,2, 1,0,0, 0, 1,0,1, 0, 1,0,0,1
* out : 1,0,0,-1, 1,0,0, 2
* =============================================================================
*/
static inline __m256i __lasx_xvaddwh_w_h(__m256i in_h, __m256i in_l) {
__m256i out;
out = __lasx_xvilvh_h(in_h, in_l);
out = __lasx_xvhaddw_w_h(out, out);
return out;
}
/*
* =============================================================================
* Description : The low half of the vector elements are expanded and
* added after being doubled.
* Arguments : Inputs - in_h, in_l
* Output - out
* Details : The in_h vector and the in_l vector are added after the
* lower half of the two-fold sign extension (signed byte
* to signed halfword) and stored to the out vector.
* Example : See out = __lasx_xvaddwl_w_h(in_h, in_l)
* =============================================================================
*/
static inline __m256i __lasx_xvaddwl_h_b(__m256i in_h, __m256i in_l) {
__m256i out;
out = __lasx_xvilvl_b(in_h, in_l);
out = __lasx_xvhaddw_h_b(out, out);
return out;
}
/*
* =============================================================================
* Description : The low half of the vector elements are expanded and
* added after being doubled.
* Arguments : Inputs - in_h, in_l
* Output - out
* Details : The in_h vector and the in_l vector are added after the
* lower half of the two-fold sign extension (signed halfword
* to signed word) and stored to the out vector.
* Example : out = __lasx_xvaddwl_w_h(in_h, in_l)
* in_h : 3, 0,3,0, 0,0,0,-1, 0,0,1,-1, 0,0,0,1
* in_l : 2,-1,1,2, 1,0,0, 0, 1,0,1, 0, 1,0,0,1
* out : 5,-1,4,2, 1,0,2,-1
* =============================================================================
*/
static inline __m256i __lasx_xvaddwl_w_h(__m256i in_h, __m256i in_l) {
__m256i out;
out = __lasx_xvilvl_h(in_h, in_l);
out = __lasx_xvhaddw_w_h(out, out);
return out;
}
/*
* =============================================================================
* Description : The low half of the vector elements are expanded and
* added after being doubled.
* Arguments : Inputs - in_h, in_l
* Output - out
* Details : The out vector and the out vector are added after the
* lower half of the two-fold zero extension (unsigned byte
* to unsigned halfword) and stored to the out vector.
* Example : See out = __lasx_xvaddwl_w_h(in_h, in_l)
* =============================================================================
*/
static inline __m256i __lasx_xvaddwl_h_bu(__m256i in_h, __m256i in_l) {
__m256i out;
out = __lasx_xvilvl_b(in_h, in_l);
out = __lasx_xvhaddw_hu_bu(out, out);
return out;
}
/*
* =============================================================================
* Description : The low half of the vector elements are expanded and
* added after being doubled.
* Arguments : Inputs - in_h, in_l
* Output - out
* Details : The in_l vector after double zero extension (unsigned byte to
* signed halfword),added to the in_h vector.
* Example : See out = __lasx_xvaddw_w_w_h(in_h, in_l)
* =============================================================================
*/
static inline __m256i __lasx_xvaddw_h_h_bu(__m256i in_h, __m256i in_l) {
__m256i out;
out = __lasx_xvsllwil_hu_bu(in_l, 0);
out = __lasx_xvadd_h(in_h, out);
return out;
}
/*
* =============================================================================
* Description : The low half of the vector elements are expanded and
* added after being doubled.
* Arguments : Inputs - in_h, in_l
* Output - out
* Details : The in_l vector after double sign extension (signed halfword to
* signed word), added to the in_h vector.
* Example : out = __lasx_xvaddw_w_w_h(in_h, in_l)
* in_h : 0, 1,0,0, -1,0,0,1,
* in_l : 2,-1,1,2, 1,0,0,0, 0,0,1,0, 1,0,0,1,
* out : 2, 0,1,2, -1,0,1,1,
* =============================================================================
*/
static inline __m256i __lasx_xvaddw_w_w_h(__m256i in_h, __m256i in_l) {
__m256i out;
out = __lasx_xvsllwil_w_h(in_l, 0);
out = __lasx_xvadd_w(in_h, out);
return out;
}
/*
* =============================================================================
* Description : Multiplication and addition calculation after expansion
* of the lower half of the vector.
* Arguments : Inputs - in_c, in_h, in_l
* Output - out
* Details : The in_h vector and the in_l vector are multiplied after
* the lower half of the two-fold sign extension (signed halfword
* to signed word), and the result is added to the vector in_c,
* then stored to the out vector.
* Example : out = __lasx_xvmaddwl_w_h(in_c, in_h, in_l)
* in_c : 1,2,3,4, 5,6,7,8
* in_h : 1,2,3,4, 1,2,3,4, 5,6,7,8, 5,6,7,8
* in_l : 200, 300, 400, 500, 2000, 3000, 4000, 5000,
* -200,-300,-400,-500, -2000,-3000,-4000,-5000
* out : 201, 602,1203,2004, -995, -1794,-2793,-3992
* =============================================================================
*/
static inline __m256i __lasx_xvmaddwl_w_h(__m256i in_c,
__m256i in_h,
__m256i in_l) {
__m256i tmp0, tmp1, out;
tmp0 = __lasx_xvsllwil_w_h(in_h, 0);
tmp1 = __lasx_xvsllwil_w_h(in_l, 0);
tmp0 = __lasx_xvmul_w(tmp0, tmp1);
out = __lasx_xvadd_w(tmp0, in_c);
return out;
}
/*
* =============================================================================
* Description : Multiplication and addition calculation after expansion
* of the higher half of the vector.
* Arguments : Inputs - in_c, in_h, in_l
* Output - out
* Details : The in_h vector and the in_l vector are multiplied after
* the higher half of the two-fold sign extension (signed
* halfword to signed word), and the result is added to
* the vector in_c, then stored to the out vector.
* Example : See out = __lasx_xvmaddwl_w_h(in_c, in_h, in_l)
* =============================================================================
*/
static inline __m256i __lasx_xvmaddwh_w_h(__m256i in_c,
__m256i in_h,
__m256i in_l) {
__m256i tmp0, tmp1, out;
tmp0 = __lasx_xvilvh_h(in_h, in_h);
tmp1 = __lasx_xvilvh_h(in_l, in_l);
tmp0 = __lasx_xvmulwev_w_h(tmp0, tmp1);
out = __lasx_xvadd_w(tmp0, in_c);
return out;
}
/*
* =============================================================================
* Description : Multiplication calculation after expansion of the lower
* half of the vector.
* Arguments : Inputs - in_h, in_l
* Output - out
* Details : The in_h vector and the in_l vector are multiplied after
* the lower half of the two-fold sign extension (signed
* halfword to signed word), then stored to the out vector.
* Example : out = __lasx_xvmulwl_w_h(in_h, in_l)
* in_h : 3,-1,3,0, 0,0,0,-1, 0,0,1,-1, 0,0,0,1
* in_l : 2,-1,1,2, 1,0,0, 0, 0,0,1, 0, 1,0,0,1
* out : 6,1,3,0, 0,0,1,0
* =============================================================================
*/
static inline __m256i __lasx_xvmulwl_w_h(__m256i in_h, __m256i in_l) {
__m256i tmp0, tmp1, out;
tmp0 = __lasx_xvsllwil_w_h(in_h, 0);
tmp1 = __lasx_xvsllwil_w_h(in_l, 0);
out = __lasx_xvmul_w(tmp0, tmp1);
return out;
}
/*
* =============================================================================
* Description : Multiplication calculation after expansion of the lower
* half of the vector.
* Arguments : Inputs - in_h, in_l
* Output - out
* Details : The in_h vector and the in_l vector are multiplied after
* the lower half of the two-fold sign extension (signed
* halfword to signed word), then stored to the out vector.
* Example : out = __lasx_xvmulwh_w_h(in_h, in_l)
* in_h : 3,-1,3,0, 0,0,0,-1, 0,0,1,-1, 0,0,0,1
* in_l : 2,-1,1,2, 1,0,0, 0, 0,0,1, 0, 1,0,0,1
* out : 0,0,0,0, 0,0,0,1
* =============================================================================
*/
static inline __m256i __lasx_xvmulwh_w_h(__m256i in_h, __m256i in_l) {
__m256i tmp0, tmp1, out;
tmp0 = __lasx_xvilvh_h(in_h, in_h);
tmp1 = __lasx_xvilvh_h(in_l, in_l);
out = __lasx_xvmulwev_w_h(tmp0, tmp1);
return out;
}
/*
* =============================================================================
* Description : The low half of the vector elements are added to the high half
* after being doubled, then saturated.
* Arguments : Inputs - in_h, in_l
* Output - out
* Details : The in_h vector adds the in_l vector after the lower half of
* the two-fold zero extension (unsigned byte to unsigned
* halfword) and then saturated. The results are stored to the out
* vector.
* Example : out = __lasx_xvsaddw_hu_hu_bu(in_h, in_l)
* in_h : 2,65532,1,2, 1,0,0,0, 0,0,1,0, 1,0,0,1
* in_l : 3,6,3,0, 0,0,0,1, 0,0,1,1, 0,0,0,1, 3,18,3,0, 0,0,0,1, 0,0,1,1,
* 0,0,0,1
* out : 5,65535,4,2, 1,0,0,1, 3,18,4,0, 1,0,0,2,
* =============================================================================
*/
static inline __m256i __lasx_xvsaddw_hu_hu_bu(__m256i in_h, __m256i in_l) {
__m256i tmp1, out;
__m256i zero = {0};
tmp1 = __lasx_xvilvl_b(zero, in_l);
out = __lasx_xvsadd_hu(in_h, tmp1);
return out;
}
/*
* =============================================================================
* Description : Clip all halfword elements of input vector between min & max
* out = ((in) < (min)) ? (min) : (((in) > (max)) ? (max) : (in))
* Arguments : Inputs - in (input vector)
* - min (min threshold)
* - max (max threshold)
* Outputs - in (output vector with clipped elements)
* Return Type - signed halfword
* Example : out = __lasx_xvclip_h(in, min, max)
* in : -8,2,280,249, -8,255,280,249, 4,4,4,4, 5,5,5,5
* min : 1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1
* max : 9,9,9,9, 9,9,9,9, 9,9,9,9, 9,9,9,9
* out : 1,2,9,9, 1,9,9,9, 4,4,4,4, 5,5,5,5
* =============================================================================
*/
static inline __m256i __lasx_xvclip_h(__m256i in, __m256i min, __m256i max) {
__m256i out;
out = __lasx_xvmax_h(min, in);
out = __lasx_xvmin_h(max, out);
return out;
}
/*
* =============================================================================
* Description : Clip all signed halfword elements of input vector
* between 0 & 255
* Arguments : Inputs - in (input vector)
* Outputs - out (output vector with clipped elements)
* Return Type - signed halfword
* Example : See out = __lasx_xvclip255_w(in)
* =============================================================================
*/
static inline __m256i __lasx_xvclip255_h(__m256i in) {
__m256i out;
out = __lasx_xvmaxi_h(in, 0);
out = __lasx_xvsat_hu(out, 7);
return out;
}
/*
* =============================================================================
* Description : Clip all signed word elements of input vector
* between 0 & 255
* Arguments : Inputs - in (input vector)
* Output - out (output vector with clipped elements)
* Return Type - signed word
* Example : out = __lasx_xvclip255_w(in)
* in : -8,255,280,249, -8,255,280,249
* out : 0,255,255,249, 0,255,255,249
* =============================================================================
*/
static inline __m256i __lasx_xvclip255_w(__m256i in) {
__m256i out;
out = __lasx_xvmaxi_w(in, 0);
out = __lasx_xvsat_wu(out, 7);
return out;
}
/*
* =============================================================================
* Description : Indexed halfword element values are replicated to all
* elements in output vector. If 'idx < 8' use xvsplati_l_*,
* if 'idx >= 8' use xvsplati_h_*.
* Arguments : Inputs - in, idx
* Output - out
* Details : Idx element value from in vector is replicated to all
* elements in out vector.
* Valid index range for halfword operation is 0-7
* Example : out = __lasx_xvsplati_l_h(in, idx)
* in : 20,10,11,12, 13,14,15,16, 0,0,2,0, 0,0,0,0
* idx : 0x02
* out : 11,11,11,11, 11,11,11,11, 11,11,11,11, 11,11,11,11
* =============================================================================
*/
static inline __m256i __lasx_xvsplati_l_h(__m256i in, int idx) {
__m256i out;
out = __lasx_xvpermi_q(in, in, 0x02);
out = __lasx_xvreplve_h(out, idx);
return out;
}
/*
* =============================================================================
* Description : Indexed halfword element values are replicated to all
* elements in output vector. If 'idx < 8' use xvsplati_l_*,
* if 'idx >= 8' use xvsplati_h_*.
* Arguments : Inputs - in, idx
* Output - out
* Details : Idx element value from in vector is replicated to all
* elements in out vector.
* Valid index range for halfword operation is 0-7
* Example : out = __lasx_xvsplati_h_h(in, idx)
* in : 20,10,11,12, 13,14,15,16, 0,2,0,0, 0,0,0,0
* idx : 0x09
* out : 2,2,2,2, 2,2,2,2, 2,2,2,2, 2,2,2,2
* =============================================================================
*/
static inline __m256i __lasx_xvsplati_h_h(__m256i in, int idx) {
__m256i out;
out = __lasx_xvpermi_q(in, in, 0x13);
out = __lasx_xvreplve_h(out, idx);
return out;
}
/*
* =============================================================================
* Description : Transpose 4x4 block with double-word elements in vectors
* Arguments : Inputs - _in0, _in1, _in2, _in3
* Outputs - _out0, _out1, _out2, _out3
* Example : LASX_TRANSPOSE4x4_D
* _in0 : 1,2,3,4
* _in1 : 1,2,3,4
* _in2 : 1,2,3,4
* _in3 : 1,2,3,4
*
* _out0 : 1,1,1,1
* _out1 : 2,2,2,2
* _out2 : 3,3,3,3
* _out3 : 4,4,4,4
* =============================================================================
*/
#define LASX_TRANSPOSE4x4_D(_in0, _in1, _in2, _in3, _out0, _out1, _out2, \
_out3) \
{ \
__m256i _tmp0, _tmp1, _tmp2, _tmp3; \
_tmp0 = __lasx_xvilvl_d(_in1, _in0); \
_tmp1 = __lasx_xvilvh_d(_in1, _in0); \
_tmp2 = __lasx_xvilvl_d(_in3, _in2); \
_tmp3 = __lasx_xvilvh_d(_in3, _in2); \
_out0 = __lasx_xvpermi_q(_tmp2, _tmp0, 0x20); \
_out2 = __lasx_xvpermi_q(_tmp2, _tmp0, 0x31); \
_out1 = __lasx_xvpermi_q(_tmp3, _tmp1, 0x20); \
_out3 = __lasx_xvpermi_q(_tmp3, _tmp1, 0x31); \
}
/*
* =============================================================================
* Description : Transpose 8x8 block with word elements in vectors
* Arguments : Inputs - _in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7
* Outputs - _out0, _out1, _out2, _out3, _out4, _out5, _out6,
* _out7
* Example : LASX_TRANSPOSE8x8_W
* _in0 : 1,2,3,4,5,6,7,8
* _in1 : 2,2,3,4,5,6,7,8
* _in2 : 3,2,3,4,5,6,7,8
* _in3 : 4,2,3,4,5,6,7,8
* _in4 : 5,2,3,4,5,6,7,8
* _in5 : 6,2,3,4,5,6,7,8
* _in6 : 7,2,3,4,5,6,7,8
* _in7 : 8,2,3,4,5,6,7,8
*
* _out0 : 1,2,3,4,5,6,7,8
* _out1 : 2,2,2,2,2,2,2,2
* _out2 : 3,3,3,3,3,3,3,3
* _out3 : 4,4,4,4,4,4,4,4
* _out4 : 5,5,5,5,5,5,5,5
* _out5 : 6,6,6,6,6,6,6,6
* _out6 : 7,7,7,7,7,7,7,7
* _out7 : 8,8,8,8,8,8,8,8
* =============================================================================
*/
#define LASX_TRANSPOSE8x8_W(_in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7, \
_out0, _out1, _out2, _out3, _out4, _out5, _out6, \
_out7) \
{ \
__m256i _s0_m, _s1_m; \
__m256i _tmp0_m, _tmp1_m, _tmp2_m, _tmp3_m; \
__m256i _tmp4_m, _tmp5_m, _tmp6_m, _tmp7_m; \
\
_s0_m = __lasx_xvilvl_w(_in2, _in0); \
_s1_m = __lasx_xvilvl_w(_in3, _in1); \
_tmp0_m = __lasx_xvilvl_w(_s1_m, _s0_m); \
_tmp1_m = __lasx_xvilvh_w(_s1_m, _s0_m); \
_s0_m = __lasx_xvilvh_w(_in2, _in0); \
_s1_m = __lasx_xvilvh_w(_in3, _in1); \
_tmp2_m = __lasx_xvilvl_w(_s1_m, _s0_m); \
_tmp3_m = __lasx_xvilvh_w(_s1_m, _s0_m); \
_s0_m = __lasx_xvilvl_w(_in6, _in4); \
_s1_m = __lasx_xvilvl_w(_in7, _in5); \
_tmp4_m = __lasx_xvilvl_w(_s1_m, _s0_m); \
_tmp5_m = __lasx_xvilvh_w(_s1_m, _s0_m); \
_s0_m = __lasx_xvilvh_w(_in6, _in4); \
_s1_m = __lasx_xvilvh_w(_in7, _in5); \
_tmp6_m = __lasx_xvilvl_w(_s1_m, _s0_m); \
_tmp7_m = __lasx_xvilvh_w(_s1_m, _s0_m); \
_out0 = __lasx_xvpermi_q(_tmp4_m, _tmp0_m, 0x20); \
_out1 = __lasx_xvpermi_q(_tmp5_m, _tmp1_m, 0x20); \
_out2 = __lasx_xvpermi_q(_tmp6_m, _tmp2_m, 0x20); \
_out3 = __lasx_xvpermi_q(_tmp7_m, _tmp3_m, 0x20); \
_out4 = __lasx_xvpermi_q(_tmp4_m, _tmp0_m, 0x31); \
_out5 = __lasx_xvpermi_q(_tmp5_m, _tmp1_m, 0x31); \
_out6 = __lasx_xvpermi_q(_tmp6_m, _tmp2_m, 0x31); \
_out7 = __lasx_xvpermi_q(_tmp7_m, _tmp3_m, 0x31); \
}
/*
* =============================================================================
* Description : Transpose input 16x8 byte block
* Arguments : Inputs - _in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7,
* _in8, _in9, _in10, _in11, _in12, _in13, _in14, _in15
* (input 16x8 byte block)
* Outputs - _out0, _out1, _out2, _out3, _out4, _out5, _out6,
* _out7 (output 8x16 byte block)
* Details : The rows of the matrix become columns, and the columns become
* rows.
* Example : See LASX_TRANSPOSE16x8_H
* =============================================================================
*/
#define LASX_TRANSPOSE16x8_B(_in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7, \
_in8, _in9, _in10, _in11, _in12, _in13, _in14, \
_in15, _out0, _out1, _out2, _out3, _out4, _out5, \
_out6, _out7) \
{ \
__m256i _tmp0_m, _tmp1_m, _tmp2_m, _tmp3_m; \
__m256i _tmp4_m, _tmp5_m, _tmp6_m, _tmp7_m; \
\
_tmp0_m = __lasx_xvilvl_b(_in2, _in0); \
_tmp1_m = __lasx_xvilvl_b(_in3, _in1); \
_tmp2_m = __lasx_xvilvl_b(_in6, _in4); \
_tmp3_m = __lasx_xvilvl_b(_in7, _in5); \
_tmp4_m = __lasx_xvilvl_b(_in10, _in8); \
_tmp5_m = __lasx_xvilvl_b(_in11, _in9); \
_tmp6_m = __lasx_xvilvl_b(_in14, _in12); \
_tmp7_m = __lasx_xvilvl_b(_in15, _in13); \
_out0 = __lasx_xvilvl_b(_tmp1_m, _tmp0_m); \
_out1 = __lasx_xvilvh_b(_tmp1_m, _tmp0_m); \
_out2 = __lasx_xvilvl_b(_tmp3_m, _tmp2_m); \
_out3 = __lasx_xvilvh_b(_tmp3_m, _tmp2_m); \
_out4 = __lasx_xvilvl_b(_tmp5_m, _tmp4_m); \
_out5 = __lasx_xvilvh_b(_tmp5_m, _tmp4_m); \
_out6 = __lasx_xvilvl_b(_tmp7_m, _tmp6_m); \
_out7 = __lasx_xvilvh_b(_tmp7_m, _tmp6_m); \
_tmp0_m = __lasx_xvilvl_w(_out2, _out0); \
_tmp2_m = __lasx_xvilvh_w(_out2, _out0); \
_tmp4_m = __lasx_xvilvl_w(_out3, _out1); \
_tmp6_m = __lasx_xvilvh_w(_out3, _out1); \
_tmp1_m = __lasx_xvilvl_w(_out6, _out4); \
_tmp3_m = __lasx_xvilvh_w(_out6, _out4); \
_tmp5_m = __lasx_xvilvl_w(_out7, _out5); \
_tmp7_m = __lasx_xvilvh_w(_out7, _out5); \
_out0 = __lasx_xvilvl_d(_tmp1_m, _tmp0_m); \
_out1 = __lasx_xvilvh_d(_tmp1_m, _tmp0_m); \
_out2 = __lasx_xvilvl_d(_tmp3_m, _tmp2_m); \
_out3 = __lasx_xvilvh_d(_tmp3_m, _tmp2_m); \
_out4 = __lasx_xvilvl_d(_tmp5_m, _tmp4_m); \
_out5 = __lasx_xvilvh_d(_tmp5_m, _tmp4_m); \
_out6 = __lasx_xvilvl_d(_tmp7_m, _tmp6_m); \
_out7 = __lasx_xvilvh_d(_tmp7_m, _tmp6_m); \
}
/*
* =============================================================================
* Description : Transpose input 16x8 byte block
* Arguments : Inputs - _in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7,
* _in8, _in9, _in10, _in11, _in12, _in13, _in14, _in15
* (input 16x8 byte block)
* Outputs - _out0, _out1, _out2, _out3, _out4, _out5, _out6,
* _out7 (output 8x16 byte block)
* Details : The rows of the matrix become columns, and the columns become
* rows.
* Example : LASX_TRANSPOSE16x8_H
* _in0 : 1,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0
* _in1 : 2,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0
* _in2 : 3,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0
* _in3 : 4,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0
* _in4 : 5,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0
* _in5 : 6,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0
* _in6 : 7,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0
* _in7 : 8,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0
* _in8 : 9,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0
* _in9 : 1,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0
* _in10 : 0,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0
* _in11 : 2,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0
* _in12 : 3,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0
* _in13 : 7,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0
* _in14 : 5,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0
* _in15 : 6,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0
*
* _out0 : 1,2,3,4,5,6,7,8,9,1,0,2,3,7,5,6
* _out1 : 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2
* _out2 : 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3
* _out3 : 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4
* _out4 : 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5
* _out5 : 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6
* _out6 : 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
* _out7 : 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8
* =============================================================================
*/
#define LASX_TRANSPOSE16x8_H(_in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7, \
_in8, _in9, _in10, _in11, _in12, _in13, _in14, \
_in15, _out0, _out1, _out2, _out3, _out4, _out5, \
_out6, _out7) \
{ \
__m256i _tmp0_m, _tmp1_m, _tmp2_m, _tmp3_m; \
__m256i _tmp4_m, _tmp5_m, _tmp6_m, _tmp7_m; \
__m256i _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7; \
\
_tmp0_m = __lasx_xvilvl_h(_in2, _in0); \
_tmp1_m = __lasx_xvilvl_h(_in3, _in1); \
_tmp2_m = __lasx_xvilvl_h(_in6, _in4); \
_tmp3_m = __lasx_xvilvl_h(_in7, _in5); \
_tmp4_m = __lasx_xvilvl_h(_in10, _in8); \
_tmp5_m = __lasx_xvilvl_h(_in11, _in9); \
_tmp6_m = __lasx_xvilvl_h(_in14, _in12); \
_tmp7_m = __lasx_xvilvl_h(_in15, _in13); \
_t0 = __lasx_xvilvl_h(_tmp1_m, _tmp0_m); \
_t1 = __lasx_xvilvh_h(_tmp1_m, _tmp0_m); \
_t2 = __lasx_xvilvl_h(_tmp3_m, _tmp2_m); \
_t3 = __lasx_xvilvh_h(_tmp3_m, _tmp2_m); \
_t4 = __lasx_xvilvl_h(_tmp5_m, _tmp4_m); \
_t5 = __lasx_xvilvh_h(_tmp5_m, _tmp4_m); \
_t6 = __lasx_xvilvl_h(_tmp7_m, _tmp6_m); \
_t7 = __lasx_xvilvh_h(_tmp7_m, _tmp6_m); \
_tmp0_m = __lasx_xvilvl_d(_t2, _t0); \
_tmp2_m = __lasx_xvilvh_d(_t2, _t0); \
_tmp4_m = __lasx_xvilvl_d(_t3, _t1); \
_tmp6_m = __lasx_xvilvh_d(_t3, _t1); \
_tmp1_m = __lasx_xvilvl_d(_t6, _t4); \
_tmp3_m = __lasx_xvilvh_d(_t6, _t4); \
_tmp5_m = __lasx_xvilvl_d(_t7, _t5); \
_tmp7_m = __lasx_xvilvh_d(_t7, _t5); \
_out0 = __lasx_xvpermi_q(_tmp1_m, _tmp0_m, 0x20); \
_out1 = __lasx_xvpermi_q(_tmp3_m, _tmp2_m, 0x20); \
_out2 = __lasx_xvpermi_q(_tmp5_m, _tmp4_m, 0x20); \
_out3 = __lasx_xvpermi_q(_tmp7_m, _tmp6_m, 0x20); \
\
_tmp0_m = __lasx_xvilvh_h(_in2, _in0); \
_tmp1_m = __lasx_xvilvh_h(_in3, _in1); \
_tmp2_m = __lasx_xvilvh_h(_in6, _in4); \
_tmp3_m = __lasx_xvilvh_h(_in7, _in5); \
_tmp4_m = __lasx_xvilvh_h(_in10, _in8); \
_tmp5_m = __lasx_xvilvh_h(_in11, _in9); \
_tmp6_m = __lasx_xvilvh_h(_in14, _in12); \
_tmp7_m = __lasx_xvilvh_h(_in15, _in13); \
_t0 = __lasx_xvilvl_h(_tmp1_m, _tmp0_m); \
_t1 = __lasx_xvilvh_h(_tmp1_m, _tmp0_m); \
_t2 = __lasx_xvilvl_h(_tmp3_m, _tmp2_m); \
_t3 = __lasx_xvilvh_h(_tmp3_m, _tmp2_m); \
_t4 = __lasx_xvilvl_h(_tmp5_m, _tmp4_m); \
_t5 = __lasx_xvilvh_h(_tmp5_m, _tmp4_m); \
_t6 = __lasx_xvilvl_h(_tmp7_m, _tmp6_m); \
_t7 = __lasx_xvilvh_h(_tmp7_m, _tmp6_m); \
_tmp0_m = __lasx_xvilvl_d(_t2, _t0); \
_tmp2_m = __lasx_xvilvh_d(_t2, _t0); \
_tmp4_m = __lasx_xvilvl_d(_t3, _t1); \
_tmp6_m = __lasx_xvilvh_d(_t3, _t1); \
_tmp1_m = __lasx_xvilvl_d(_t6, _t4); \
_tmp3_m = __lasx_xvilvh_d(_t6, _t4); \
_tmp5_m = __lasx_xvilvl_d(_t7, _t5); \
_tmp7_m = __lasx_xvilvh_d(_t7, _t5); \
_out4 = __lasx_xvpermi_q(_tmp1_m, _tmp0_m, 0x20); \
_out5 = __lasx_xvpermi_q(_tmp3_m, _tmp2_m, 0x20); \
_out6 = __lasx_xvpermi_q(_tmp5_m, _tmp4_m, 0x20); \
_out7 = __lasx_xvpermi_q(_tmp7_m, _tmp6_m, 0x20); \
}
/*
* =============================================================================
* Description : Transpose 4x4 block with halfword elements in vectors
* Arguments : Inputs - _in0, _in1, _in2, _in3
* Outputs - _out0, _out1, _out2, _out3
* Return Type - signed halfword
* Details : The rows of the matrix become columns, and the columns become
* rows.
* Example : See LASX_TRANSPOSE8x8_H
* =============================================================================
*/
#define LASX_TRANSPOSE4x4_H(_in0, _in1, _in2, _in3, _out0, _out1, _out2, \
_out3) \
{ \
__m256i _s0_m, _s1_m; \
\
_s0_m = __lasx_xvilvl_h(_in1, _in0); \
_s1_m = __lasx_xvilvl_h(_in3, _in2); \
_out0 = __lasx_xvilvl_w(_s1_m, _s0_m); \
_out2 = __lasx_xvilvh_w(_s1_m, _s0_m); \
_out1 = __lasx_xvilvh_d(_out0, _out0); \
_out3 = __lasx_xvilvh_d(_out2, _out2); \
}
/*
* =============================================================================
* Description : Transpose input 8x8 byte block
* Arguments : Inputs - _in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7
* (input 8x8 byte block)
* Outputs - _out0, _out1, _out2, _out3, _out4, _out5, _out6,
* _out7 (output 8x8 byte block)
* Example : See LASX_TRANSPOSE8x8_H
* =============================================================================
*/
#define LASX_TRANSPOSE8x8_B(_in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7, \
_out0, _out1, _out2, _out3, _out4, _out5, _out6, \
_out7) \
{ \
__m256i _tmp0_m, _tmp1_m, _tmp2_m, _tmp3_m; \
__m256i _tmp4_m, _tmp5_m, _tmp6_m, _tmp7_m; \
_tmp0_m = __lasx_xvilvl_b(_in2, _in0); \
_tmp1_m = __lasx_xvilvl_b(_in3, _in1); \
_tmp2_m = __lasx_xvilvl_b(_in6, _in4); \
_tmp3_m = __lasx_xvilvl_b(_in7, _in5); \
_tmp4_m = __lasx_xvilvl_b(_tmp1_m, _tmp0_m); \
_tmp5_m = __lasx_xvilvh_b(_tmp1_m, _tmp0_m); \
_tmp6_m = __lasx_xvilvl_b(_tmp3_m, _tmp2_m); \
_tmp7_m = __lasx_xvilvh_b(_tmp3_m, _tmp2_m); \
_out0 = __lasx_xvilvl_w(_tmp6_m, _tmp4_m); \
_out2 = __lasx_xvilvh_w(_tmp6_m, _tmp4_m); \
_out4 = __lasx_xvilvl_w(_tmp7_m, _tmp5_m); \
_out6 = __lasx_xvilvh_w(_tmp7_m, _tmp5_m); \
_out1 = __lasx_xvbsrl_v(_out0, 8); \
_out3 = __lasx_xvbsrl_v(_out2, 8); \
_out5 = __lasx_xvbsrl_v(_out4, 8); \
_out7 = __lasx_xvbsrl_v(_out6, 8); \
}
/*
* =============================================================================
* Description : Transpose 8x8 block with halfword elements in vectors.
* Arguments : Inputs - _in0, _in1, ~
* Outputs - _out0, _out1, ~
* Details : The rows of the matrix become columns, and the columns become
* rows.
* Example : LASX_TRANSPOSE8x8_H
* _in0 : 1,2,3,4, 5,6,7,8, 1,2,3,4, 5,6,7,8
* _in1 : 8,2,3,4, 5,6,7,8, 8,2,3,4, 5,6,7,8
* _in2 : 8,2,3,4, 5,6,7,8, 8,2,3,4, 5,6,7,8
* _in3 : 1,2,3,4, 5,6,7,8, 1,2,3,4, 5,6,7,8
* _in4 : 9,2,3,4, 5,6,7,8, 9,2,3,4, 5,6,7,8
* _in5 : 1,2,3,4, 5,6,7,8, 1,2,3,4, 5,6,7,8
* _in6 : 1,2,3,4, 5,6,7,8, 1,2,3,4, 5,6,7,8
* _in7 : 9,2,3,4, 5,6,7,8, 9,2,3,4, 5,6,7,8
*
* _out0 : 1,8,8,1, 9,1,1,9, 1,8,8,1, 9,1,1,9
* _out1 : 2,2,2,2, 2,2,2,2, 2,2,2,2, 2,2,2,2
* _out2 : 3,3,3,3, 3,3,3,3, 3,3,3,3, 3,3,3,3
* _out3 : 4,4,4,4, 4,4,4,4, 4,4,4,4, 4,4,4,4
* _out4 : 5,5,5,5, 5,5,5,5, 5,5,5,5, 5,5,5,5
* _out5 : 6,6,6,6, 6,6,6,6, 6,6,6,6, 6,6,6,6
* _out6 : 7,7,7,7, 7,7,7,7, 7,7,7,7, 7,7,7,7
* _out7 : 8,8,8,8, 8,8,8,8, 8,8,8,8, 8,8,8,8
* =============================================================================
*/
#define LASX_TRANSPOSE8x8_H(_in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7, \
_out0, _out1, _out2, _out3, _out4, _out5, _out6, \
_out7) \
{ \
__m256i _s0_m, _s1_m; \
__m256i _tmp0_m, _tmp1_m, _tmp2_m, _tmp3_m; \
__m256i _tmp4_m, _tmp5_m, _tmp6_m, _tmp7_m; \
\
_s0_m = __lasx_xvilvl_h(_in6, _in4); \
_s1_m = __lasx_xvilvl_h(_in7, _in5); \
_tmp0_m = __lasx_xvilvl_h(_s1_m, _s0_m); \
_tmp1_m = __lasx_xvilvh_h(_s1_m, _s0_m); \
_s0_m = __lasx_xvilvh_h(_in6, _in4); \
_s1_m = __lasx_xvilvh_h(_in7, _in5); \
_tmp2_m = __lasx_xvilvl_h(_s1_m, _s0_m); \
_tmp3_m = __lasx_xvilvh_h(_s1_m, _s0_m); \
\
_s0_m = __lasx_xvilvl_h(_in2, _in0); \
_s1_m = __lasx_xvilvl_h(_in3, _in1); \
_tmp4_m = __lasx_xvilvl_h(_s1_m, _s0_m); \
_tmp5_m = __lasx_xvilvh_h(_s1_m, _s0_m); \
_s0_m = __lasx_xvilvh_h(_in2, _in0); \
_s1_m = __lasx_xvilvh_h(_in3, _in1); \
_tmp6_m = __lasx_xvilvl_h(_s1_m, _s0_m); \
_tmp7_m = __lasx_xvilvh_h(_s1_m, _s0_m); \
\
_out0 = __lasx_xvpickev_d(_tmp0_m, _tmp4_m); \
_out2 = __lasx_xvpickev_d(_tmp1_m, _tmp5_m); \
_out4 = __lasx_xvpickev_d(_tmp2_m, _tmp6_m); \
_out6 = __lasx_xvpickev_d(_tmp3_m, _tmp7_m); \
_out1 = __lasx_xvpickod_d(_tmp0_m, _tmp4_m); \
_out3 = __lasx_xvpickod_d(_tmp1_m, _tmp5_m); \
_out5 = __lasx_xvpickod_d(_tmp2_m, _tmp6_m); \
_out7 = __lasx_xvpickod_d(_tmp3_m, _tmp7_m); \
}
/*
* =============================================================================
* Description : Butterfly of 4 input vectors
* Arguments : Inputs - _in0, _in1, _in2, _in3
* Outputs - _out0, _out1, _out2, _out3
* Details : Butterfly operation
* Example : LASX_BUTTERFLY_4
* _out0 = _in0 + _in3;
* _out1 = _in1 + _in2;
* _out2 = _in1 - _in2;
* _out3 = _in0 - _in3;
* =============================================================================
*/
#define LASX_BUTTERFLY_4_B(_in0, _in1, _in2, _in3, _out0, _out1, _out2, _out3) \
{ \
_out0 = __lasx_xvadd_b(_in0, _in3); \
_out1 = __lasx_xvadd_b(_in1, _in2); \
_out2 = __lasx_xvsub_b(_in1, _in2); \
_out3 = __lasx_xvsub_b(_in0, _in3); \
}
#define LASX_BUTTERFLY_4_H(_in0, _in1, _in2, _in3, _out0, _out1, _out2, _out3) \
{ \
_out0 = __lasx_xvadd_h(_in0, _in3); \
_out1 = __lasx_xvadd_h(_in1, _in2); \
_out2 = __lasx_xvsub_h(_in1, _in2); \
_out3 = __lasx_xvsub_h(_in0, _in3); \
}
#define LASX_BUTTERFLY_4_W(_in0, _in1, _in2, _in3, _out0, _out1, _out2, _out3) \
{ \
_out0 = __lasx_xvadd_w(_in0, _in3); \
_out1 = __lasx_xvadd_w(_in1, _in2); \
_out2 = __lasx_xvsub_w(_in1, _in2); \
_out3 = __lasx_xvsub_w(_in0, _in3); \
}
#define LASX_BUTTERFLY_4_D(_in0, _in1, _in2, _in3, _out0, _out1, _out2, _out3) \
{ \
_out0 = __lasx_xvadd_d(_in0, _in3); \
_out1 = __lasx_xvadd_d(_in1, _in2); \
_out2 = __lasx_xvsub_d(_in1, _in2); \
_out3 = __lasx_xvsub_d(_in0, _in3); \
}
/*
* =============================================================================
* Description : Butterfly of 8 input vectors
* Arguments : Inputs - _in0, _in1, _in2, _in3, ~
* Outputs - _out0, _out1, _out2, _out3, ~
* Details : Butterfly operation
* Example : LASX_BUTTERFLY_8
* _out0 = _in0 + _in7;
* _out1 = _in1 + _in6;
* _out2 = _in2 + _in5;
* _out3 = _in3 + _in4;
* _out4 = _in3 - _in4;
* _out5 = _in2 - _in5;
* _out6 = _in1 - _in6;
* _out7 = _in0 - _in7;
* =============================================================================
*/
#define LASX_BUTTERFLY_8_B(_in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7, \
_out0, _out1, _out2, _out3, _out4, _out5, _out6, \
_out7) \
{ \
_out0 = __lasx_xvadd_b(_in0, _in7); \
_out1 = __lasx_xvadd_b(_in1, _in6); \
_out2 = __lasx_xvadd_b(_in2, _in5); \
_out3 = __lasx_xvadd_b(_in3, _in4); \
_out4 = __lasx_xvsub_b(_in3, _in4); \
_out5 = __lasx_xvsub_b(_in2, _in5); \
_out6 = __lasx_xvsub_b(_in1, _in6); \
_out7 = __lasx_xvsub_b(_in0, _in7); \
}
#define LASX_BUTTERFLY_8_H(_in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7, \
_out0, _out1, _out2, _out3, _out4, _out5, _out6, \
_out7) \
{ \
_out0 = __lasx_xvadd_h(_in0, _in7); \
_out1 = __lasx_xvadd_h(_in1, _in6); \
_out2 = __lasx_xvadd_h(_in2, _in5); \
_out3 = __lasx_xvadd_h(_in3, _in4); \
_out4 = __lasx_xvsub_h(_in3, _in4); \
_out5 = __lasx_xvsub_h(_in2, _in5); \
_out6 = __lasx_xvsub_h(_in1, _in6); \
_out7 = __lasx_xvsub_h(_in0, _in7); \
}
#define LASX_BUTTERFLY_8_W(_in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7, \
_out0, _out1, _out2, _out3, _out4, _out5, _out6, \
_out7) \
{ \
_out0 = __lasx_xvadd_w(_in0, _in7); \
_out1 = __lasx_xvadd_w(_in1, _in6); \
_out2 = __lasx_xvadd_w(_in2, _in5); \
_out3 = __lasx_xvadd_w(_in3, _in4); \
_out4 = __lasx_xvsub_w(_in3, _in4); \
_out5 = __lasx_xvsub_w(_in2, _in5); \
_out6 = __lasx_xvsub_w(_in1, _in6); \
_out7 = __lasx_xvsub_w(_in0, _in7); \
}
#define LASX_BUTTERFLY_8_D(_in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7, \
_out0, _out1, _out2, _out3, _out4, _out5, _out6, \
_out7) \
{ \
_out0 = __lasx_xvadd_d(_in0, _in7); \
_out1 = __lasx_xvadd_d(_in1, _in6); \
_out2 = __lasx_xvadd_d(_in2, _in5); \
_out3 = __lasx_xvadd_d(_in3, _in4); \
_out4 = __lasx_xvsub_d(_in3, _in4); \
_out5 = __lasx_xvsub_d(_in2, _in5); \
_out6 = __lasx_xvsub_d(_in1, _in6); \
_out7 = __lasx_xvsub_d(_in0, _in7); \
}
#endif // LASX
/*
* =============================================================================
* Description : Print out elements in vector.
* Arguments : Inputs - RTYPE, _element_num, _in0, _enter
* Outputs -
* Details : Print out '_element_num' elements in 'RTYPE' vector '_in0', if
* '_enter' is TRUE, prefix "\nVP:" will be added first.
* Example : VECT_PRINT(v4i32,4,in0,1); // in0: 1,2,3,4
* VP:1,2,3,4,
* =============================================================================
*/
#define VECT_PRINT(RTYPE, element_num, in0, enter) \
{ \
RTYPE _tmp0 = (RTYPE)in0; \
int _i = 0; \
if (enter) \
printf("\nVP:"); \
for (_i = 0; _i < element_num; _i++) \
printf("%d,", _tmp0[_i]); \
}
#endif /* LOONGSON_INTRINSICS_H */
#endif /* INCLUDE_LIBYUV_LOONGSON_INTRINSICS_H */
/*
* Copyright 2016 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_MACROS_MSA_H_
#define INCLUDE_LIBYUV_MACROS_MSA_H_
#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
#include <msa.h>
#include <stdint.h>
#if (__mips_isa_rev >= 6)
#define LW(psrc) \
({ \
const uint8_t* psrc_lw_m = (const uint8_t*)(psrc); \
uint32_t val_m; \
asm volatile("lw %[val_m], %[psrc_lw_m] \n" \
: [val_m] "=r"(val_m) \
: [psrc_lw_m] "m"(*psrc_lw_m)); \
val_m; \
})
#if (__mips == 64)
#define LD(psrc) \
({ \
const uint8_t* psrc_ld_m = (const uint8_t*)(psrc); \
uint64_t val_m = 0; \
asm volatile("ld %[val_m], %[psrc_ld_m] \n" \
: [val_m] "=r"(val_m) \
: [psrc_ld_m] "m"(*psrc_ld_m)); \
val_m; \
})
#else // !(__mips == 64)
#define LD(psrc) \
({ \
const uint8_t* psrc_ld_m = (const uint8_t*)(psrc); \
uint32_t val0_m, val1_m; \
uint64_t val_m = 0; \
val0_m = LW(psrc_ld_m); \
val1_m = LW(psrc_ld_m + 4); \
val_m = (uint64_t)(val1_m); /* NOLINT */ \
val_m = (uint64_t)((val_m << 32) & 0xFFFFFFFF00000000); /* NOLINT */ \
val_m = (uint64_t)(val_m | (uint64_t)val0_m); /* NOLINT */ \
val_m; \
})
#endif // (__mips == 64)
#define SW(val, pdst) \
({ \
uint8_t* pdst_sw_m = (uint8_t*)(pdst); /* NOLINT */ \
uint32_t val_m = (val); \
asm volatile("sw %[val_m], %[pdst_sw_m] \n" \
: [pdst_sw_m] "=m"(*pdst_sw_m) \
: [val_m] "r"(val_m)); \
})
#if (__mips == 64)
#define SD(val, pdst) \
({ \
uint8_t* pdst_sd_m = (uint8_t*)(pdst); /* NOLINT */ \
uint64_t val_m = (val); \
asm volatile("sd %[val_m], %[pdst_sd_m] \n" \
: [pdst_sd_m] "=m"(*pdst_sd_m) \
: [val_m] "r"(val_m)); \
})
#else // !(__mips == 64)
#define SD(val, pdst) \
({ \
uint8_t* pdst_sd_m = (uint8_t*)(pdst); /* NOLINT */ \
uint32_t val0_m, val1_m; \
val0_m = (uint32_t)((val)&0x00000000FFFFFFFF); \
val1_m = (uint32_t)(((val) >> 32) & 0x00000000FFFFFFFF); \
SW(val0_m, pdst_sd_m); \
SW(val1_m, pdst_sd_m + 4); \
})
#endif // !(__mips == 64)
#else // !(__mips_isa_rev >= 6)
#define LW(psrc) \
({ \
uint8_t* psrc_lw_m = (uint8_t*)(psrc); \
uint32_t val_lw_m; \
\
__asm__ volatile( \
"lwr %[val_lw_m], 0(%[psrc_lw_m]) \n\t" \
"lwl %[val_lw_m], 3(%[psrc_lw_m]) \n\t" \
\
: [val_lw_m] "=&r"(val_lw_m) \
: [psrc_lw_m] "r"(psrc_lw_m)); \
\
val_lw_m; \
})
#if (__mips == 64)
#define LD(psrc) \
({ \
uint8_t* psrc_ld_m = (uint8_t*)(psrc); \
uint64_t val_ld_m = 0; \
\
__asm__ volatile( \
"ldr %[val_ld_m], 0(%[psrc_ld_m]) \n\t" \
"ldl %[val_ld_m], 7(%[psrc_ld_m]) \n\t" \
\
: [val_ld_m] "=&r"(val_ld_m) \
: [psrc_ld_m] "r"(psrc_ld_m)); \
\
val_ld_m; \
})
#else // !(__mips == 64)
#define LD(psrc) \
({ \
const uint8_t* psrc_ld_m = (const uint8_t*)(psrc); \
uint32_t val0_m, val1_m; \
uint64_t val_m = 0; \
val0_m = LW(psrc_ld_m); \
val1_m = LW(psrc_ld_m + 4); \
val_m = (uint64_t)(val1_m); /* NOLINT */ \
val_m = (uint64_t)((val_m << 32) & 0xFFFFFFFF00000000); /* NOLINT */ \
val_m = (uint64_t)(val_m | (uint64_t)val0_m); /* NOLINT */ \
val_m; \
})
#endif // (__mips == 64)
#define SW(val, pdst) \
({ \
uint8_t* pdst_sw_m = (uint8_t*)(pdst); /* NOLINT */ \
uint32_t val_m = (val); \
asm volatile("usw %[val_m], %[pdst_sw_m] \n" \
: [pdst_sw_m] "=m"(*pdst_sw_m) \
: [val_m] "r"(val_m)); \
})
#define SD(val, pdst) \
({ \
uint8_t* pdst_sd_m = (uint8_t*)(pdst); /* NOLINT */ \
uint32_t val0_m, val1_m; \
val0_m = (uint32_t)((val)&0x00000000FFFFFFFF); \
val1_m = (uint32_t)(((val) >> 32) & 0x00000000FFFFFFFF); \
SW(val0_m, pdst_sd_m); \
SW(val1_m, pdst_sd_m + 4); \
})
#endif // (__mips_isa_rev >= 6)
// TODO(fbarchard): Consider removing __VAR_ARGS versions.
#define LD_B(RTYPE, psrc) *((RTYPE*)(psrc)) /* NOLINT */
#define LD_UB(...) LD_B(const v16u8, __VA_ARGS__)
#define LD_H(RTYPE, psrc) *((RTYPE*)(psrc)) /* NOLINT */
#define LD_UH(...) LD_H(const v8u16, __VA_ARGS__)
#define ST_B(RTYPE, in, pdst) *((RTYPE*)(pdst)) = (in) /* NOLINT */
#define ST_UB(...) ST_B(v16u8, __VA_ARGS__)
#define ST_H(RTYPE, in, pdst) *((RTYPE*)(pdst)) = (in) /* NOLINT */
#define ST_UH(...) ST_H(v8u16, __VA_ARGS__)
/* Description : Load two vectors with 16 'byte' sized elements
Arguments : Inputs - psrc, stride
Outputs - out0, out1
Return Type - as per RTYPE
Details : Load 16 byte elements in 'out0' from (psrc)
Load 16 byte elements in 'out1' from (psrc + stride)
*/
#define LD_B2(RTYPE, psrc, stride, out0, out1) \
{ \
out0 = LD_B(RTYPE, (psrc)); \
out1 = LD_B(RTYPE, (psrc) + stride); \
}
#define LD_UB2(...) LD_B2(const v16u8, __VA_ARGS__)
#define LD_B4(RTYPE, psrc, stride, out0, out1, out2, out3) \
{ \
LD_B2(RTYPE, (psrc), stride, out0, out1); \
LD_B2(RTYPE, (psrc) + 2 * stride, stride, out2, out3); \
}
#define LD_UB4(...) LD_B4(const v16u8, __VA_ARGS__)
/* Description : Store two vectors with stride each having 16 'byte' sized
elements
Arguments : Inputs - in0, in1, pdst, stride
Details : Store 16 byte elements from 'in0' to (pdst)
Store 16 byte elements from 'in1' to (pdst + stride)
*/
#define ST_B2(RTYPE, in0, in1, pdst, stride) \
{ \
ST_B(RTYPE, in0, (pdst)); \
ST_B(RTYPE, in1, (pdst) + stride); \
}
#define ST_UB2(...) ST_B2(v16u8, __VA_ARGS__)
#define ST_B4(RTYPE, in0, in1, in2, in3, pdst, stride) \
{ \
ST_B2(RTYPE, in0, in1, (pdst), stride); \
ST_B2(RTYPE, in2, in3, (pdst) + 2 * stride, stride); \
}
#define ST_UB4(...) ST_B4(v16u8, __VA_ARGS__)
/* Description : Store vectors of 8 halfword elements with stride
Arguments : Inputs - in0, in1, pdst, stride
Details : Store 8 halfword elements from 'in0' to (pdst)
Store 8 halfword elements from 'in1' to (pdst + stride)
*/
#define ST_H2(RTYPE, in0, in1, pdst, stride) \
{ \
ST_H(RTYPE, in0, (pdst)); \
ST_H(RTYPE, in1, (pdst) + stride); \
}
#define ST_UH2(...) ST_H2(v8u16, __VA_ARGS__)
// TODO(fbarchard): Consider using __msa_vshf_b and __msa_ilvr_b directly.
/* Description : Shuffle byte vector elements as per mask vector
Arguments : Inputs - in0, in1, in2, in3, mask0, mask1
Outputs - out0, out1
Return Type - as per RTYPE
Details : Byte elements from 'in0' & 'in1' are copied selectively to
'out0' as per control vector 'mask0'
*/
#define VSHF_B2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1) \
{ \
out0 = (RTYPE)__msa_vshf_b((v16i8)mask0, (v16i8)in1, (v16i8)in0); \
out1 = (RTYPE)__msa_vshf_b((v16i8)mask1, (v16i8)in3, (v16i8)in2); \
}
#define VSHF_B2_UB(...) VSHF_B2(v16u8, __VA_ARGS__)
/* Description : Interleave both left and right half of input vectors
Arguments : Inputs - in0, in1
Outputs - out0, out1
Return Type - as per RTYPE
Details : Right half of byte elements from 'in0' and 'in1' are
interleaved and written to 'out0'
*/
#define ILVRL_B2(RTYPE, in0, in1, out0, out1) \
{ \
out0 = (RTYPE)__msa_ilvr_b((v16i8)in0, (v16i8)in1); \
out1 = (RTYPE)__msa_ilvl_b((v16i8)in0, (v16i8)in1); \
}
#define ILVRL_B2_UB(...) ILVRL_B2(v16u8, __VA_ARGS__)
#endif /* !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) */
#endif // INCLUDE_LIBYUV_MACROS_MSA_H_
/*
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_MJPEG_DECODER_H_
#define INCLUDE_LIBYUV_MJPEG_DECODER_H_
#include "libyuv/basic_types.h"
#ifdef __cplusplus
// NOTE: For a simplified public API use convert.h MJPGToI420().
struct jpeg_common_struct;
struct jpeg_decompress_struct;
struct jpeg_source_mgr;
namespace libyuv {
#ifdef __cplusplus
extern "C" {
#endif
LIBYUV_BOOL ValidateJpeg(const uint8_t* sample, size_t sample_size);
#ifdef __cplusplus
} // extern "C"
#endif
static const uint32_t kUnknownDataSize = 0xFFFFFFFF;
enum JpegSubsamplingType {
kJpegYuv420,
kJpegYuv422,
kJpegYuv444,
kJpegYuv400,
kJpegUnknown
};
struct Buffer {
const uint8_t* data;
int len;
};
struct BufferVector {
Buffer* buffers;
int len;
int pos;
};
struct SetJmpErrorMgr;
// MJPEG ("Motion JPEG") is a pseudo-standard video codec where the frames are
// simply independent JPEG images with a fixed huffman table (which is omitted).
// It is rarely used in video transmission, but is common as a camera capture
// format, especially in Logitech devices. This class implements a decoder for
// MJPEG frames.
//
// See http://tools.ietf.org/html/rfc2435
class LIBYUV_API MJpegDecoder {
public:
typedef void (*CallbackFunction)(void* opaque,
const uint8_t* const* data,
const int* strides,
int rows);
static const int kColorSpaceUnknown;
static const int kColorSpaceGrayscale;
static const int kColorSpaceRgb;
static const int kColorSpaceYCbCr;
static const int kColorSpaceCMYK;
static const int kColorSpaceYCCK;
MJpegDecoder();
~MJpegDecoder();
// Loads a new frame, reads its headers, and determines the uncompressed
// image format.
// Returns LIBYUV_TRUE if image looks valid and format is supported.
// If return value is LIBYUV_TRUE, then the values for all the following
// getters are populated.
// src_len is the size of the compressed mjpeg frame in bytes.
LIBYUV_BOOL LoadFrame(const uint8_t* src, size_t src_len);
// Returns width of the last loaded frame in pixels.
int GetWidth();
// Returns height of the last loaded frame in pixels.
int GetHeight();
// Returns format of the last loaded frame. The return value is one of the
// kColorSpace* constants.
int GetColorSpace();
// Number of color components in the color space.
int GetNumComponents();
// Sample factors of the n-th component.
int GetHorizSampFactor(int component);
int GetVertSampFactor(int component);
int GetHorizSubSampFactor(int component);
int GetVertSubSampFactor(int component);
// Public for testability.
int GetImageScanlinesPerImcuRow();
// Public for testability.
int GetComponentScanlinesPerImcuRow(int component);
// Width of a component in bytes.
int GetComponentWidth(int component);
// Height of a component.
int GetComponentHeight(int component);
// Width of a component in bytes with padding for DCTSIZE. Public for testing.
int GetComponentStride(int component);
// Size of a component in bytes.
int GetComponentSize(int component);
// Call this after LoadFrame() if you decide you don't want to decode it
// after all.
LIBYUV_BOOL UnloadFrame();
// Decodes the entire image into a one-buffer-per-color-component format.
// dst_width must match exactly. dst_height must be <= to image height; if
// less, the image is cropped. "planes" must have size equal to at least
// GetNumComponents() and they must point to non-overlapping buffers of size
// at least GetComponentSize(i). The pointers in planes are incremented
// to point to after the end of the written data.
// TODO(fbarchard): Add dst_x, dst_y to allow specific rect to be decoded.
LIBYUV_BOOL DecodeToBuffers(uint8_t** planes, int dst_width, int dst_height);
// Decodes the entire image and passes the data via repeated calls to a
// callback function. Each call will get the data for a whole number of
// image scanlines.
// TODO(fbarchard): Add dst_x, dst_y to allow specific rect to be decoded.
LIBYUV_BOOL DecodeToCallback(CallbackFunction fn,
void* opaque,
int dst_width,
int dst_height);
// The helper function which recognizes the jpeg sub-sampling type.
static JpegSubsamplingType JpegSubsamplingTypeHelper(
int* subsample_x,
int* subsample_y,
int number_of_components);
private:
void AllocOutputBuffers(int num_outbufs);
void DestroyOutputBuffers();
LIBYUV_BOOL StartDecode();
LIBYUV_BOOL FinishDecode();
void SetScanlinePointers(uint8_t** data);
LIBYUV_BOOL DecodeImcuRow();
int GetComponentScanlinePadding(int component);
// A buffer holding the input data for a frame.
Buffer buf_;
BufferVector buf_vec_;
jpeg_decompress_struct* decompress_struct_;
jpeg_source_mgr* source_mgr_;
SetJmpErrorMgr* error_mgr_;
// LIBYUV_TRUE iff at least one component has scanline padding. (i.e.,
// GetComponentScanlinePadding() != 0.)
LIBYUV_BOOL has_scanline_padding_;
// Temporaries used to point to scanline outputs.
int num_outbufs_; // Outermost size of all arrays below.
uint8_t*** scanlines_;
int* scanlines_sizes_;
// Temporary buffer used for decoding when we can't decode directly to the
// output buffers. Large enough for just one iMCU row.
uint8_t** databuf_;
int* databuf_strides_;
};
} // namespace libyuv
#endif // __cplusplus
#endif // INCLUDE_LIBYUV_MJPEG_DECODER_H_
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_PLANAR_FUNCTIONS_H_
#define INCLUDE_LIBYUV_PLANAR_FUNCTIONS_H_
#include "libyuv/basic_types.h"
// TODO(fbarchard): Remove the following headers includes.
#include "libyuv/convert.h"
#include "libyuv/convert_argb.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// TODO(fbarchard): Move cpu macros to row.h
#if defined(__pnacl__) || defined(__CLR_VER) || \
(defined(__native_client__) && defined(__x86_64__)) || \
(defined(__i386__) && !defined(__SSE__) && !defined(__clang__))
#define LIBYUV_DISABLE_X86
#endif
// MemorySanitizer does not support assembly code yet. http://crbug.com/344505
#if defined(__has_feature)
#if __has_feature(memory_sanitizer)
#define LIBYUV_DISABLE_X86
#endif
#endif
// The following are available on all x86 platforms:
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
#define HAS_ARGBAFFINEROW_SSE2
#endif
// Copy a plane of data.
LIBYUV_API
void CopyPlane(const uint8_t* src_y,
int src_stride_y,
uint8_t* dst_y,
int dst_stride_y,
int width,
int height);
LIBYUV_API
void CopyPlane_16(const uint16_t* src_y,
int src_stride_y,
uint16_t* dst_y,
int dst_stride_y,
int width,
int height);
LIBYUV_API
void Convert16To8Plane(const uint16_t* src_y,
int src_stride_y,
uint8_t* dst_y,
int dst_stride_y,
int scale, // 16384 for 10 bits
int width,
int height);
LIBYUV_API
void Convert8To16Plane(const uint8_t* src_y,
int src_stride_y,
uint16_t* dst_y,
int dst_stride_y,
int scale, // 1024 for 10 bits
int width,
int height);
// Set a plane of data to a 32 bit value.
LIBYUV_API
void SetPlane(uint8_t* dst_y,
int dst_stride_y,
int width,
int height,
uint32_t value);
// Convert a plane of tiles of 16 x H to linear.
LIBYUV_API
int DetilePlane(const uint8_t* src_y,
int src_stride_y,
uint8_t* dst_y,
int dst_stride_y,
int width,
int height,
int tile_height);
// Convert a plane of 16 bit tiles of 16 x H to linear.
LIBYUV_API
int DetilePlane_16(const uint16_t* src_y,
int src_stride_y,
uint16_t* dst_y,
int dst_stride_y,
int width,
int height,
int tile_height);
// Convert a UV plane of tiles of 16 x H into linear U and V planes.
LIBYUV_API
void DetileSplitUVPlane(const uint8_t* src_uv,
int src_stride_uv,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height,
int tile_height);
// Convert a Y and UV plane of tiles into interlaced YUY2.
LIBYUV_API
void DetileToYUY2(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_uv,
int src_stride_uv,
uint8_t* dst_yuy2,
int dst_stride_yuy2,
int width,
int height,
int tile_height);
// Split interleaved UV plane into separate U and V planes.
LIBYUV_API
void SplitUVPlane(const uint8_t* src_uv,
int src_stride_uv,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
// Merge separate U and V planes into one interleaved UV plane.
LIBYUV_API
void MergeUVPlane(const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_uv,
int dst_stride_uv,
int width,
int height);
// Split interleaved msb UV plane into separate lsb U and V planes.
LIBYUV_API
void SplitUVPlane_16(const uint16_t* src_uv,
int src_stride_uv,
uint16_t* dst_u,
int dst_stride_u,
uint16_t* dst_v,
int dst_stride_v,
int width,
int height,
int depth);
// Merge separate lsb U and V planes into one interleaved msb UV plane.
LIBYUV_API
void MergeUVPlane_16(const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint16_t* dst_uv,
int dst_stride_uv,
int width,
int height,
int depth);
// Convert lsb plane to msb plane
LIBYUV_API
void ConvertToMSBPlane_16(const uint16_t* src_y,
int src_stride_y,
uint16_t* dst_y,
int dst_stride_y,
int width,
int height,
int depth);
// Convert msb plane to lsb plane
LIBYUV_API
void ConvertToLSBPlane_16(const uint16_t* src_y,
int src_stride_y,
uint16_t* dst_y,
int dst_stride_y,
int width,
int height,
int depth);
// Scale U and V to half width and height and merge into interleaved UV plane.
// width and height are source size, allowing odd sizes.
// Use for converting I444 or I422 to NV12.
LIBYUV_API
void HalfMergeUVPlane(const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_uv,
int dst_stride_uv,
int width,
int height);
// Swap U and V channels in interleaved UV plane.
LIBYUV_API
void SwapUVPlane(const uint8_t* src_uv,
int src_stride_uv,
uint8_t* dst_vu,
int dst_stride_vu,
int width,
int height);
// Split interleaved RGB plane into separate R, G and B planes.
LIBYUV_API
void SplitRGBPlane(const uint8_t* src_rgb,
int src_stride_rgb,
uint8_t* dst_r,
int dst_stride_r,
uint8_t* dst_g,
int dst_stride_g,
uint8_t* dst_b,
int dst_stride_b,
int width,
int height);
// Merge separate R, G and B planes into one interleaved RGB plane.
LIBYUV_API
void MergeRGBPlane(const uint8_t* src_r,
int src_stride_r,
const uint8_t* src_g,
int src_stride_g,
const uint8_t* src_b,
int src_stride_b,
uint8_t* dst_rgb,
int dst_stride_rgb,
int width,
int height);
// Split interleaved ARGB plane into separate R, G, B and A planes.
// dst_a can be NULL to discard alpha plane.
LIBYUV_API
void SplitARGBPlane(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_r,
int dst_stride_r,
uint8_t* dst_g,
int dst_stride_g,
uint8_t* dst_b,
int dst_stride_b,
uint8_t* dst_a,
int dst_stride_a,
int width,
int height);
// Merge separate R, G, B and A planes into one interleaved ARGB plane.
// src_a can be NULL to fill opaque value to alpha.
LIBYUV_API
void MergeARGBPlane(const uint8_t* src_r,
int src_stride_r,
const uint8_t* src_g,
int src_stride_g,
const uint8_t* src_b,
int src_stride_b,
const uint8_t* src_a,
int src_stride_a,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// Merge separate 'depth' bit R, G and B planes stored in lsb
// into one interleaved XR30 plane.
// depth should in range [10, 16]
LIBYUV_API
void MergeXR30Plane(const uint16_t* src_r,
int src_stride_r,
const uint16_t* src_g,
int src_stride_g,
const uint16_t* src_b,
int src_stride_b,
uint8_t* dst_ar30,
int dst_stride_ar30,
int width,
int height,
int depth);
// Merge separate 'depth' bit R, G, B and A planes stored in lsb
// into one interleaved AR64 plane.
// src_a can be NULL to fill opaque value to alpha.
// depth should in range [1, 16]
LIBYUV_API
void MergeAR64Plane(const uint16_t* src_r,
int src_stride_r,
const uint16_t* src_g,
int src_stride_g,
const uint16_t* src_b,
int src_stride_b,
const uint16_t* src_a,
int src_stride_a,
uint16_t* dst_ar64,
int dst_stride_ar64,
int width,
int height,
int depth);
// Merge separate 'depth' bit R, G, B and A planes stored in lsb
// into one interleaved ARGB plane.
// src_a can be NULL to fill opaque value to alpha.
// depth should in range [8, 16]
LIBYUV_API
void MergeARGB16To8Plane(const uint16_t* src_r,
int src_stride_r,
const uint16_t* src_g,
int src_stride_g,
const uint16_t* src_b,
int src_stride_b,
const uint16_t* src_a,
int src_stride_a,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height,
int depth);
// Copy I400. Supports inverting.
LIBYUV_API
int I400ToI400(const uint8_t* src_y,
int src_stride_y,
uint8_t* dst_y,
int dst_stride_y,
int width,
int height);
#define J400ToJ400 I400ToI400
// Copy I422 to I422.
#define I422ToI422 I422Copy
LIBYUV_API
int I422Copy(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
// Copy I444 to I444.
#define I444ToI444 I444Copy
LIBYUV_API
int I444Copy(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
// Copy I210 to I210.
#define I210ToI210 I210Copy
LIBYUV_API
int I210Copy(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_u,
int dst_stride_u,
uint16_t* dst_v,
int dst_stride_v,
int width,
int height);
// Copy I410 to I410.
#define I410ToI410 I410Copy
LIBYUV_API
int I410Copy(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_u,
int dst_stride_u,
uint16_t* dst_v,
int dst_stride_v,
int width,
int height);
// Copy NV12. Supports inverting.
LIBYUV_API
int NV12Copy(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_uv,
int src_stride_uv,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_uv,
int dst_stride_uv,
int width,
int height);
// Copy NV21. Supports inverting.
LIBYUV_API
int NV21Copy(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_vu,
int src_stride_vu,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_vu,
int dst_stride_vu,
int width,
int height);
// Convert YUY2 to I422.
LIBYUV_API
int YUY2ToI422(const uint8_t* src_yuy2,
int src_stride_yuy2,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
// Convert UYVY to I422.
LIBYUV_API
int UYVYToI422(const uint8_t* src_uyvy,
int src_stride_uyvy,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
LIBYUV_API
int YUY2ToNV12(const uint8_t* src_yuy2,
int src_stride_yuy2,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_uv,
int dst_stride_uv,
int width,
int height);
LIBYUV_API
int UYVYToNV12(const uint8_t* src_uyvy,
int src_stride_uyvy,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_uv,
int dst_stride_uv,
int width,
int height);
// Convert NV21 to NV12.
LIBYUV_API
int NV21ToNV12(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_vu,
int src_stride_vu,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_uv,
int dst_stride_uv,
int width,
int height);
LIBYUV_API
int YUY2ToY(const uint8_t* src_yuy2,
int src_stride_yuy2,
uint8_t* dst_y,
int dst_stride_y,
int width,
int height);
LIBYUV_API
int UYVYToY(const uint8_t* src_uyvy,
int src_stride_uyvy,
uint8_t* dst_y,
int dst_stride_y,
int width,
int height);
// Convert I420 to I400. (calls CopyPlane ignoring u/v).
LIBYUV_API
int I420ToI400(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_y,
int dst_stride_y,
int width,
int height);
// Alias
#define J420ToJ400 I420ToI400
#define I420ToI420Mirror I420Mirror
// I420 mirror.
LIBYUV_API
int I420Mirror(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
// Alias
#define I400ToI400Mirror I400Mirror
// I400 mirror. A single plane is mirrored horizontally.
// Pass negative height to achieve 180 degree rotation.
LIBYUV_API
int I400Mirror(const uint8_t* src_y,
int src_stride_y,
uint8_t* dst_y,
int dst_stride_y,
int width,
int height);
// Alias
#define NV12ToNV12Mirror NV12Mirror
// NV12 mirror.
LIBYUV_API
int NV12Mirror(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_uv,
int src_stride_uv,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_uv,
int dst_stride_uv,
int width,
int height);
// Alias
#define ARGBToARGBMirror ARGBMirror
// ARGB mirror.
LIBYUV_API
int ARGBMirror(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// Alias
#define RGB24ToRGB24Mirror RGB24Mirror
// RGB24 mirror.
LIBYUV_API
int RGB24Mirror(const uint8_t* src_rgb24,
int src_stride_rgb24,
uint8_t* dst_rgb24,
int dst_stride_rgb24,
int width,
int height);
// Mirror a plane of data.
LIBYUV_API
void MirrorPlane(const uint8_t* src_y,
int src_stride_y,
uint8_t* dst_y,
int dst_stride_y,
int width,
int height);
// Mirror a plane of UV data.
LIBYUV_API
void MirrorUVPlane(const uint8_t* src_uv,
int src_stride_uv,
uint8_t* dst_uv,
int dst_stride_uv,
int width,
int height);
// Alias
#define RGB24ToRAW RAWToRGB24
LIBYUV_API
int RAWToRGB24(const uint8_t* src_raw,
int src_stride_raw,
uint8_t* dst_rgb24,
int dst_stride_rgb24,
int width,
int height);
// Draw a rectangle into I420.
LIBYUV_API
int I420Rect(uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int x,
int y,
int width,
int height,
int value_y,
int value_u,
int value_v);
// Draw a rectangle into ARGB.
LIBYUV_API
int ARGBRect(uint8_t* dst_argb,
int dst_stride_argb,
int dst_x,
int dst_y,
int width,
int height,
uint32_t value);
// Convert ARGB to gray scale ARGB.
LIBYUV_API
int ARGBGrayTo(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// Make a rectangle of ARGB gray scale.
LIBYUV_API
int ARGBGray(uint8_t* dst_argb,
int dst_stride_argb,
int dst_x,
int dst_y,
int width,
int height);
// Make a rectangle of ARGB Sepia tone.
LIBYUV_API
int ARGBSepia(uint8_t* dst_argb,
int dst_stride_argb,
int dst_x,
int dst_y,
int width,
int height);
// Apply a matrix rotation to each ARGB pixel.
// matrix_argb is 4 signed ARGB values. -128 to 127 representing -2 to 2.
// The first 4 coefficients apply to B, G, R, A and produce B of the output.
// The next 4 coefficients apply to B, G, R, A and produce G of the output.
// The next 4 coefficients apply to B, G, R, A and produce R of the output.
// The last 4 coefficients apply to B, G, R, A and produce A of the output.
LIBYUV_API
int ARGBColorMatrix(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_argb,
int dst_stride_argb,
const int8_t* matrix_argb,
int width,
int height);
// Deprecated. Use ARGBColorMatrix instead.
// Apply a matrix rotation to each ARGB pixel.
// matrix_argb is 3 signed ARGB values. -128 to 127 representing -1 to 1.
// The first 4 coefficients apply to B, G, R, A and produce B of the output.
// The next 4 coefficients apply to B, G, R, A and produce G of the output.
// The last 4 coefficients apply to B, G, R, A and produce R of the output.
LIBYUV_API
int RGBColorMatrix(uint8_t* dst_argb,
int dst_stride_argb,
const int8_t* matrix_rgb,
int dst_x,
int dst_y,
int width,
int height);
// Apply a color table each ARGB pixel.
// Table contains 256 ARGB values.
LIBYUV_API
int ARGBColorTable(uint8_t* dst_argb,
int dst_stride_argb,
const uint8_t* table_argb,
int dst_x,
int dst_y,
int width,
int height);
// Apply a color table each ARGB pixel but preserve destination alpha.
// Table contains 256 ARGB values.
LIBYUV_API
int RGBColorTable(uint8_t* dst_argb,
int dst_stride_argb,
const uint8_t* table_argb,
int dst_x,
int dst_y,
int width,
int height);
// Apply a luma/color table each ARGB pixel but preserve destination alpha.
// Table contains 32768 values indexed by [Y][C] where 7 it 7 bit luma from
// RGB (YJ style) and C is an 8 bit color component (R, G or B).
LIBYUV_API
int ARGBLumaColorTable(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_argb,
int dst_stride_argb,
const uint8_t* luma,
int width,
int height);
// Apply a 3 term polynomial to ARGB values.
// poly points to a 4x4 matrix. The first row is constants. The 2nd row is
// coefficients for b, g, r and a. The 3rd row is coefficients for b squared,
// g squared, r squared and a squared. The 4rd row is coefficients for b to
// the 3, g to the 3, r to the 3 and a to the 3. The values are summed and
// result clamped to 0 to 255.
// A polynomial approximation can be dirived using software such as 'R'.
LIBYUV_API
int ARGBPolynomial(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_argb,
int dst_stride_argb,
const float* poly,
int width,
int height);
// Convert plane of 16 bit shorts to half floats.
// Source values are multiplied by scale before storing as half float.
LIBYUV_API
int HalfFloatPlane(const uint16_t* src_y,
int src_stride_y,
uint16_t* dst_y,
int dst_stride_y,
float scale,
int width,
int height);
// Convert a buffer of bytes to floats, scale the values and store as floats.
LIBYUV_API
int ByteToFloat(const uint8_t* src_y, float* dst_y, float scale, int width);
// Quantize a rectangle of ARGB. Alpha unaffected.
// scale is a 16 bit fractional fixed point scaler between 0 and 65535.
// interval_size should be a value between 1 and 255.
// interval_offset should be a value between 0 and 255.
LIBYUV_API
int ARGBQuantize(uint8_t* dst_argb,
int dst_stride_argb,
int scale,
int interval_size,
int interval_offset,
int dst_x,
int dst_y,
int width,
int height);
// Copy ARGB to ARGB.
LIBYUV_API
int ARGBCopy(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// Copy Alpha channel of ARGB to alpha of ARGB.
LIBYUV_API
int ARGBCopyAlpha(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// Extract the alpha channel from ARGB.
LIBYUV_API
int ARGBExtractAlpha(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_a,
int dst_stride_a,
int width,
int height);
// Copy Y channel to Alpha of ARGB.
LIBYUV_API
int ARGBCopyYToAlpha(const uint8_t* src_y,
int src_stride_y,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
typedef void (*ARGBBlendRow)(const uint8_t* src_argb0,
const uint8_t* src_argb1,
uint8_t* dst_argb,
int width);
// Get function to Alpha Blend ARGB pixels and store to destination.
LIBYUV_API
ARGBBlendRow GetARGBBlend();
// Alpha Blend ARGB images and store to destination.
// Source is pre-multiplied by alpha using ARGBAttenuate.
// Alpha of destination is set to 255.
LIBYUV_API
int ARGBBlend(const uint8_t* src_argb0,
int src_stride_argb0,
const uint8_t* src_argb1,
int src_stride_argb1,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// Alpha Blend plane and store to destination.
// Source is not pre-multiplied by alpha.
LIBYUV_API
int BlendPlane(const uint8_t* src_y0,
int src_stride_y0,
const uint8_t* src_y1,
int src_stride_y1,
const uint8_t* alpha,
int alpha_stride,
uint8_t* dst_y,
int dst_stride_y,
int width,
int height);
// Alpha Blend YUV images and store to destination.
// Source is not pre-multiplied by alpha.
// Alpha is full width x height and subsampled to half size to apply to UV.
LIBYUV_API
int I420Blend(const uint8_t* src_y0,
int src_stride_y0,
const uint8_t* src_u0,
int src_stride_u0,
const uint8_t* src_v0,
int src_stride_v0,
const uint8_t* src_y1,
int src_stride_y1,
const uint8_t* src_u1,
int src_stride_u1,
const uint8_t* src_v1,
int src_stride_v1,
const uint8_t* alpha,
int alpha_stride,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
// Multiply ARGB image by ARGB image. Shifted down by 8. Saturates to 255.
LIBYUV_API
int ARGBMultiply(const uint8_t* src_argb0,
int src_stride_argb0,
const uint8_t* src_argb1,
int src_stride_argb1,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// Add ARGB image with ARGB image. Saturates to 255.
LIBYUV_API
int ARGBAdd(const uint8_t* src_argb0,
int src_stride_argb0,
const uint8_t* src_argb1,
int src_stride_argb1,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// Subtract ARGB image (argb1) from ARGB image (argb0). Saturates to 0.
LIBYUV_API
int ARGBSubtract(const uint8_t* src_argb0,
int src_stride_argb0,
const uint8_t* src_argb1,
int src_stride_argb1,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// Convert I422 to YUY2.
LIBYUV_API
int I422ToYUY2(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_yuy2,
int dst_stride_yuy2,
int width,
int height);
// Convert I422 to UYVY.
LIBYUV_API
int I422ToUYVY(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_uyvy,
int dst_stride_uyvy,
int width,
int height);
// Convert unattentuated ARGB to preattenuated ARGB.
LIBYUV_API
int ARGBAttenuate(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// Convert preattentuated ARGB to unattenuated ARGB.
LIBYUV_API
int ARGBUnattenuate(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// Internal function - do not call directly.
// Computes table of cumulative sum for image where the value is the sum
// of all values above and to the left of the entry. Used by ARGBBlur.
LIBYUV_API
int ARGBComputeCumulativeSum(const uint8_t* src_argb,
int src_stride_argb,
int32_t* dst_cumsum,
int dst_stride32_cumsum,
int width,
int height);
// Blur ARGB image.
// dst_cumsum table of width * (height + 1) * 16 bytes aligned to
// 16 byte boundary.
// dst_stride32_cumsum is number of ints in a row (width * 4).
// radius is number of pixels around the center. e.g. 1 = 3x3. 2=5x5.
// Blur is optimized for radius of 5 (11x11) or less.
LIBYUV_API
int ARGBBlur(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_argb,
int dst_stride_argb,
int32_t* dst_cumsum,
int dst_stride32_cumsum,
int width,
int height,
int radius);
// Gaussian 5x5 blur a float plane.
// Coefficients of 1, 4, 6, 4, 1.
// Each destination pixel is a blur of the 5x5
// pixels from the source.
// Source edges are clamped.
LIBYUV_API
int GaussPlane_F32(const float* src,
int src_stride,
float* dst,
int dst_stride,
int width,
int height);
// Multiply ARGB image by ARGB value.
LIBYUV_API
int ARGBShade(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height,
uint32_t value);
// Interpolate between two images using specified amount of interpolation
// (0 to 255) and store to destination.
// 'interpolation' is specified as 8 bit fraction where 0 means 100% src0
// and 255 means 1% src0 and 99% src1.
LIBYUV_API
int InterpolatePlane(const uint8_t* src0,
int src_stride0,
const uint8_t* src1,
int src_stride1,
uint8_t* dst,
int dst_stride,
int width,
int height,
int interpolation);
// Interpolate between two images using specified amount of interpolation
// (0 to 255) and store to destination.
// 'interpolation' is specified as 8 bit fraction where 0 means 100% src0
// and 255 means 1% src0 and 99% src1.
LIBYUV_API
int InterpolatePlane_16(const uint16_t* src0,
int src_stride0, // measured in 16 bit pixels
const uint16_t* src1,
int src_stride1,
uint16_t* dst,
int dst_stride,
int width,
int height,
int interpolation);
// Interpolate between two ARGB images using specified amount of interpolation
// Internally calls InterpolatePlane with width * 4 (bpp).
LIBYUV_API
int ARGBInterpolate(const uint8_t* src_argb0,
int src_stride_argb0,
const uint8_t* src_argb1,
int src_stride_argb1,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height,
int interpolation);
// Interpolate between two YUV images using specified amount of interpolation
// Internally calls InterpolatePlane on each plane where the U and V planes
// are half width and half height.
LIBYUV_API
int I420Interpolate(const uint8_t* src0_y,
int src0_stride_y,
const uint8_t* src0_u,
int src0_stride_u,
const uint8_t* src0_v,
int src0_stride_v,
const uint8_t* src1_y,
int src1_stride_y,
const uint8_t* src1_u,
int src1_stride_u,
const uint8_t* src1_v,
int src1_stride_v,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height,
int interpolation);
// Row function for copying pixels from a source with a slope to a row
// of destination. Useful for scaling, rotation, mirror, texture mapping.
LIBYUV_API
void ARGBAffineRow_C(const uint8_t* src_argb,
int src_argb_stride,
uint8_t* dst_argb,
const float* uv_dudv,
int width);
// TODO(fbarchard): Move ARGBAffineRow_SSE2 to row.h
LIBYUV_API
void ARGBAffineRow_SSE2(const uint8_t* src_argb,
int src_argb_stride,
uint8_t* dst_argb,
const float* uv_dudv,
int width);
// Shuffle ARGB channel order. e.g. BGRA to ARGB.
// shuffler is 16 bytes.
LIBYUV_API
int ARGBShuffle(const uint8_t* src_bgra,
int src_stride_bgra,
uint8_t* dst_argb,
int dst_stride_argb,
const uint8_t* shuffler,
int width,
int height);
// Shuffle AR64 channel order. e.g. AR64 to AB64.
// shuffler is 16 bytes.
LIBYUV_API
int AR64Shuffle(const uint16_t* src_ar64,
int src_stride_ar64,
uint16_t* dst_ar64,
int dst_stride_ar64,
const uint8_t* shuffler,
int width,
int height);
// Sobel ARGB effect with planar output.
LIBYUV_API
int ARGBSobelToPlane(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_y,
int dst_stride_y,
int width,
int height);
// Sobel ARGB effect.
LIBYUV_API
int ARGBSobel(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// Sobel ARGB effect w/ Sobel X, Sobel, Sobel Y in ARGB.
LIBYUV_API
int ARGBSobelXY(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
#endif // INCLUDE_LIBYUV_PLANAR_FUNCTIONS_H_
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_ROTATE_H_
#define INCLUDE_LIBYUV_ROTATE_H_
#include "libyuv/basic_types.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// Supported rotation.
typedef enum RotationMode {
kRotate0 = 0, // No rotation.
kRotate90 = 90, // Rotate 90 degrees clockwise.
kRotate180 = 180, // Rotate 180 degrees.
kRotate270 = 270, // Rotate 270 degrees clockwise.
// Deprecated.
kRotateNone = 0,
kRotateClockwise = 90,
kRotateCounterClockwise = 270,
} RotationModeEnum;
// Rotate I420 frame.
LIBYUV_API
int I420Rotate(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height,
enum RotationMode mode);
// Rotate I422 frame.
LIBYUV_API
int I422Rotate(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height,
enum RotationMode mode);
// Rotate I444 frame.
LIBYUV_API
int I444Rotate(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height,
enum RotationMode mode);
// Rotate I010 frame.
LIBYUV_API
int I010Rotate(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_u,
int dst_stride_u,
uint16_t* dst_v,
int dst_stride_v,
int width,
int height,
enum RotationMode mode);
// Rotate I210 frame.
LIBYUV_API
int I210Rotate(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_u,
int dst_stride_u,
uint16_t* dst_v,
int dst_stride_v,
int width,
int height,
enum RotationMode mode);
// Rotate I410 frame.
LIBYUV_API
int I410Rotate(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_u,
int dst_stride_u,
uint16_t* dst_v,
int dst_stride_v,
int width,
int height,
enum RotationMode mode);
// Rotate NV12 input and store in I420.
LIBYUV_API
int NV12ToI420Rotate(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_uv,
int src_stride_uv,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height,
enum RotationMode mode);
// Convert Android420 to I420 with rotation.
// "rotation" can be 0, 90, 180 or 270.
LIBYUV_API
int Android420ToI420Rotate(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
int src_pixel_stride_uv,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height,
enum RotationMode rotation);
// Rotate a plane by 0, 90, 180, or 270.
LIBYUV_API
int RotatePlane(const uint8_t* src,
int src_stride,
uint8_t* dst,
int dst_stride,
int width,
int height,
enum RotationMode mode);
// Rotate planes by 90, 180, 270. Deprecated.
LIBYUV_API
void RotatePlane90(const uint8_t* src,
int src_stride,
uint8_t* dst,
int dst_stride,
int width,
int height);
LIBYUV_API
void RotatePlane180(const uint8_t* src,
int src_stride,
uint8_t* dst,
int dst_stride,
int width,
int height);
LIBYUV_API
void RotatePlane270(const uint8_t* src,
int src_stride,
uint8_t* dst,
int dst_stride,
int width,
int height);
// Rotate a plane by 0, 90, 180, or 270.
LIBYUV_API
int RotatePlane_16(const uint16_t* src,
int src_stride,
uint16_t* dst,
int dst_stride,
int width,
int height,
enum RotationMode mode);
// Rotations for when U and V are interleaved.
// These functions take one UV input pointer and
// split the data into two buffers while
// rotating them.
// width and height expected to be half size for NV12.
LIBYUV_API
int SplitRotateUV(const uint8_t* src_uv,
int src_stride_uv,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height,
enum RotationMode mode);
LIBYUV_API
void SplitRotateUV90(const uint8_t* src,
int src_stride,
uint8_t* dst_a,
int dst_stride_a,
uint8_t* dst_b,
int dst_stride_b,
int width,
int height);
LIBYUV_API
void SplitRotateUV180(const uint8_t* src,
int src_stride,
uint8_t* dst_a,
int dst_stride_a,
uint8_t* dst_b,
int dst_stride_b,
int width,
int height);
LIBYUV_API
void SplitRotateUV270(const uint8_t* src,
int src_stride,
uint8_t* dst_a,
int dst_stride_a,
uint8_t* dst_b,
int dst_stride_b,
int width,
int height);
// The 90 and 270 functions are based on transposes.
// Doing a transpose with reversing the read/write
// order will result in a rotation by +- 90 degrees.
// Deprecated.
LIBYUV_API
void TransposePlane(const uint8_t* src,
int src_stride,
uint8_t* dst,
int dst_stride,
int width,
int height);
LIBYUV_API
void SplitTransposeUV(const uint8_t* src,
int src_stride,
uint8_t* dst_a,
int dst_stride_a,
uint8_t* dst_b,
int dst_stride_b,
int width,
int height);
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
#endif // INCLUDE_LIBYUV_ROTATE_H_
/*
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_ROTATE_ARGB_H_
#define INCLUDE_LIBYUV_ROTATE_ARGB_H_
#include "libyuv/basic_types.h"
#include "libyuv/rotate.h" // For RotationMode.
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// Rotate ARGB frame
LIBYUV_API
int ARGBRotate(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_argb,
int dst_stride_argb,
int src_width,
int src_height,
enum RotationMode mode);
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
#endif // INCLUDE_LIBYUV_ROTATE_ARGB_H_
/*
* Copyright 2013 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_ROTATE_ROW_H_
#define INCLUDE_LIBYUV_ROTATE_ROW_H_
#include "libyuv/basic_types.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
#if defined(__pnacl__) || defined(__CLR_VER) || \
(defined(__native_client__) && defined(__x86_64__)) || \
(defined(__i386__) && !defined(__SSE__) && !defined(__clang__))
#define LIBYUV_DISABLE_X86
#endif
#if defined(__native_client__)
#define LIBYUV_DISABLE_NEON
#endif
// MemorySanitizer does not support assembly code yet. http://crbug.com/344505
#if defined(__has_feature)
#if __has_feature(memory_sanitizer)
#define LIBYUV_DISABLE_X86
#endif
#endif
// The following are available for Visual C 32 bit:
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER) && \
!defined(__clang__)
#define HAS_TRANSPOSEWX8_SSSE3
#define HAS_TRANSPOSEUVWX8_SSE2
#endif
// The following are available for GCC 32 or 64 bit:
#if !defined(LIBYUV_DISABLE_X86) && (defined(__i386__) || defined(__x86_64__))
#define HAS_TRANSPOSEWX8_SSSE3
#endif
// The following are available for 64 bit GCC:
#if !defined(LIBYUV_DISABLE_X86) && defined(__x86_64__)
#define HAS_TRANSPOSEWX8_FAST_SSSE3
#define HAS_TRANSPOSEUVWX8_SSE2
#endif
#if !defined(LIBYUV_DISABLE_NEON) && \
(defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__))
#define HAS_TRANSPOSEWX8_NEON
#define HAS_TRANSPOSEUVWX8_NEON
#endif
#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
#define HAS_TRANSPOSEWX16_MSA
#define HAS_TRANSPOSEUVWX16_MSA
#endif
#if !defined(LIBYUV_DISABLE_LSX) && defined(__loongarch_sx)
#define HAS_TRANSPOSEWX16_LSX
#define HAS_TRANSPOSEUVWX16_LSX
#endif
void TransposeWxH_C(const uint8_t* src,
int src_stride,
uint8_t* dst,
int dst_stride,
int width,
int height);
void TransposeWx8_C(const uint8_t* src,
int src_stride,
uint8_t* dst,
int dst_stride,
int width);
void TransposeWx16_C(const uint8_t* src,
int src_stride,
uint8_t* dst,
int dst_stride,
int width);
void TransposeWx8_NEON(const uint8_t* src,
int src_stride,
uint8_t* dst,
int dst_stride,
int width);
void TransposeWx8_SSSE3(const uint8_t* src,
int src_stride,
uint8_t* dst,
int dst_stride,
int width);
void TransposeWx8_Fast_SSSE3(const uint8_t* src,
int src_stride,
uint8_t* dst,
int dst_stride,
int width);
void TransposeWx16_MSA(const uint8_t* src,
int src_stride,
uint8_t* dst,
int dst_stride,
int width);
void TransposeWx16_LSX(const uint8_t* src,
int src_stride,
uint8_t* dst,
int dst_stride,
int width);
void TransposeWx8_Any_NEON(const uint8_t* src,
int src_stride,
uint8_t* dst,
int dst_stride,
int width);
void TransposeWx8_Any_SSSE3(const uint8_t* src,
int src_stride,
uint8_t* dst,
int dst_stride,
int width);
void TransposeWx8_Fast_Any_SSSE3(const uint8_t* src,
int src_stride,
uint8_t* dst,
int dst_stride,
int width);
void TransposeWx16_Any_MSA(const uint8_t* src,
int src_stride,
uint8_t* dst,
int dst_stride,
int width);
void TransposeWx16_Any_LSX(const uint8_t* src,
int src_stride,
uint8_t* dst,
int dst_stride,
int width);
void TransposeUVWxH_C(const uint8_t* src,
int src_stride,
uint8_t* dst_a,
int dst_stride_a,
uint8_t* dst_b,
int dst_stride_b,
int width,
int height);
void TransposeUVWx8_C(const uint8_t* src,
int src_stride,
uint8_t* dst_a,
int dst_stride_a,
uint8_t* dst_b,
int dst_stride_b,
int width);
void TransposeUVWx16_C(const uint8_t* src,
int src_stride,
uint8_t* dst_a,
int dst_stride_a,
uint8_t* dst_b,
int dst_stride_b,
int width);
void TransposeUVWx8_SSE2(const uint8_t* src,
int src_stride,
uint8_t* dst_a,
int dst_stride_a,
uint8_t* dst_b,
int dst_stride_b,
int width);
void TransposeUVWx8_NEON(const uint8_t* src,
int src_stride,
uint8_t* dst_a,
int dst_stride_a,
uint8_t* dst_b,
int dst_stride_b,
int width);
void TransposeUVWx16_MSA(const uint8_t* src,
int src_stride,
uint8_t* dst_a,
int dst_stride_a,
uint8_t* dst_b,
int dst_stride_b,
int width);
void TransposeUVWx16_LSX(const uint8_t* src,
int src_stride,
uint8_t* dst_a,
int dst_stride_a,
uint8_t* dst_b,
int dst_stride_b,
int width);
void TransposeUVWx8_Any_SSE2(const uint8_t* src,
int src_stride,
uint8_t* dst_a,
int dst_stride_a,
uint8_t* dst_b,
int dst_stride_b,
int width);
void TransposeUVWx8_Any_NEON(const uint8_t* src,
int src_stride,
uint8_t* dst_a,
int dst_stride_a,
uint8_t* dst_b,
int dst_stride_b,
int width);
void TransposeUVWx16_Any_MSA(const uint8_t* src,
int src_stride,
uint8_t* dst_a,
int dst_stride_a,
uint8_t* dst_b,
int dst_stride_b,
int width);
void TransposeUVWx16_Any_LSX(const uint8_t* src,
int src_stride,
uint8_t* dst_a,
int dst_stride_a,
uint8_t* dst_b,
int dst_stride_b,
int width);
void TransposeWxH_16_C(const uint16_t* src,
int src_stride,
uint16_t* dst,
int dst_stride,
int width,
int height);
void TransposeWx8_16_C(const uint16_t* src,
int src_stride,
uint16_t* dst,
int dst_stride,
int width);
void TransposeWx1_16_C(const uint16_t* src,
int src_stride,
uint16_t* dst,
int dst_stride,
int width);
// Transpose 32 bit values (ARGB)
void Transpose4x4_32_NEON(const uint8_t* src,
int src_stride,
uint8_t* dst,
int dst_stride,
int width);
void Transpose4x4_32_C(const uint8_t* src,
int src_stride,
uint8_t* dst,
int dst_stride,
int width);
// Transpose 32 bit values (ARGB)
void Transpose8x8_32_NEON(const uint8_t* src,
int src_stride,
uint8_t* dst,
int dst_stride,
int width);
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
#endif // INCLUDE_LIBYUV_ROTATE_ROW_H_
This source diff could not be displayed because it is too large. You can view the blob instead.
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_SCALE_H_
#define INCLUDE_LIBYUV_SCALE_H_
#include "libyuv/basic_types.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// Supported filtering.
typedef enum FilterMode {
kFilterNone = 0, // Point sample; Fastest.
kFilterLinear = 1, // Filter horizontally only.
kFilterBilinear = 2, // Faster than box, but lower quality scaling down.
kFilterBox = 3 // Highest quality.
} FilterModeEnum;
// Scale a YUV plane.
LIBYUV_API
void ScalePlane(const uint8_t* src,
int src_stride,
int src_width,
int src_height,
uint8_t* dst,
int dst_stride,
int dst_width,
int dst_height,
enum FilterMode filtering);
LIBYUV_API
void ScalePlane_16(const uint16_t* src,
int src_stride,
int src_width,
int src_height,
uint16_t* dst,
int dst_stride,
int dst_width,
int dst_height,
enum FilterMode filtering);
// Sample is expected to be in the low 12 bits.
LIBYUV_API
void ScalePlane_12(const uint16_t* src,
int src_stride,
int src_width,
int src_height,
uint16_t* dst,
int dst_stride,
int dst_width,
int dst_height,
enum FilterMode filtering);
// Scales a YUV 4:2:0 image from the src width and height to the
// dst width and height.
// If filtering is kFilterNone, a simple nearest-neighbor algorithm is
// used. This produces basic (blocky) quality at the fastest speed.
// If filtering is kFilterBilinear, interpolation is used to produce a better
// quality image, at the expense of speed.
// If filtering is kFilterBox, averaging is used to produce ever better
// quality image, at further expense of speed.
// Returns 0 if successful.
LIBYUV_API
int I420Scale(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
int src_width,
int src_height,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int dst_width,
int dst_height,
enum FilterMode filtering);
LIBYUV_API
int I420Scale_16(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
int src_width,
int src_height,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_u,
int dst_stride_u,
uint16_t* dst_v,
int dst_stride_v,
int dst_width,
int dst_height,
enum FilterMode filtering);
LIBYUV_API
int I420Scale_12(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
int src_width,
int src_height,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_u,
int dst_stride_u,
uint16_t* dst_v,
int dst_stride_v,
int dst_width,
int dst_height,
enum FilterMode filtering);
// Scales a YUV 4:4:4 image from the src width and height to the
// dst width and height.
// If filtering is kFilterNone, a simple nearest-neighbor algorithm is
// used. This produces basic (blocky) quality at the fastest speed.
// If filtering is kFilterBilinear, interpolation is used to produce a better
// quality image, at the expense of speed.
// If filtering is kFilterBox, averaging is used to produce ever better
// quality image, at further expense of speed.
// Returns 0 if successful.
LIBYUV_API
int I444Scale(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
int src_width,
int src_height,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int dst_width,
int dst_height,
enum FilterMode filtering);
LIBYUV_API
int I444Scale_16(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
int src_width,
int src_height,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_u,
int dst_stride_u,
uint16_t* dst_v,
int dst_stride_v,
int dst_width,
int dst_height,
enum FilterMode filtering);
LIBYUV_API
int I444Scale_12(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
int src_width,
int src_height,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_u,
int dst_stride_u,
uint16_t* dst_v,
int dst_stride_v,
int dst_width,
int dst_height,
enum FilterMode filtering);
// Scales a YUV 4:2:2 image from the src width and height to the
// dst width and height.
// If filtering is kFilterNone, a simple nearest-neighbor algorithm is
// used. This produces basic (blocky) quality at the fastest speed.
// If filtering is kFilterBilinear, interpolation is used to produce a better
// quality image, at the expense of speed.
// If filtering is kFilterBox, averaging is used to produce ever better
// quality image, at further expense of speed.
// Returns 0 if successful.
LIBYUV_API
int I422Scale(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
int src_width,
int src_height,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int dst_width,
int dst_height,
enum FilterMode filtering);
LIBYUV_API
int I422Scale_16(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
int src_width,
int src_height,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_u,
int dst_stride_u,
uint16_t* dst_v,
int dst_stride_v,
int dst_width,
int dst_height,
enum FilterMode filtering);
LIBYUV_API
int I422Scale_12(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
int src_width,
int src_height,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_u,
int dst_stride_u,
uint16_t* dst_v,
int dst_stride_v,
int dst_width,
int dst_height,
enum FilterMode filtering);
// Scales an NV12 image from the src width and height to the
// dst width and height.
// If filtering is kFilterNone, a simple nearest-neighbor algorithm is
// used. This produces basic (blocky) quality at the fastest speed.
// If filtering is kFilterBilinear, interpolation is used to produce a better
// quality image, at the expense of speed.
// kFilterBox is not supported for the UV channel and will be treated as
// bilinear.
// Returns 0 if successful.
LIBYUV_API
int NV12Scale(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_uv,
int src_stride_uv,
int src_width,
int src_height,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_uv,
int dst_stride_uv,
int dst_width,
int dst_height,
enum FilterMode filtering);
#ifdef __cplusplus
// Legacy API. Deprecated.
LIBYUV_API
int Scale(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
int src_stride_y,
int src_stride_u,
int src_stride_v,
int src_width,
int src_height,
uint8_t* dst_y,
uint8_t* dst_u,
uint8_t* dst_v,
int dst_stride_y,
int dst_stride_u,
int dst_stride_v,
int dst_width,
int dst_height,
LIBYUV_BOOL interpolate);
// For testing, allow disabling of specialized scalers.
LIBYUV_API
void SetUseReferenceImpl(LIBYUV_BOOL use);
#endif // __cplusplus
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
#endif // INCLUDE_LIBYUV_SCALE_H_
/*
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_SCALE_ARGB_H_
#define INCLUDE_LIBYUV_SCALE_ARGB_H_
#include "libyuv/basic_types.h"
#include "libyuv/scale.h" // For FilterMode
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
LIBYUV_API
int ARGBScale(const uint8_t* src_argb,
int src_stride_argb,
int src_width,
int src_height,
uint8_t* dst_argb,
int dst_stride_argb,
int dst_width,
int dst_height,
enum FilterMode filtering);
// Clipped scale takes destination rectangle coordinates for clip values.
LIBYUV_API
int ARGBScaleClip(const uint8_t* src_argb,
int src_stride_argb,
int src_width,
int src_height,
uint8_t* dst_argb,
int dst_stride_argb,
int dst_width,
int dst_height,
int clip_x,
int clip_y,
int clip_width,
int clip_height,
enum FilterMode filtering);
// Scale with YUV conversion to ARGB and clipping.
LIBYUV_API
int YUVToARGBScaleClip(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint32_t src_fourcc,
int src_width,
int src_height,
uint8_t* dst_argb,
int dst_stride_argb,
uint32_t dst_fourcc,
int dst_width,
int dst_height,
int clip_x,
int clip_y,
int clip_width,
int clip_height,
enum FilterMode filtering);
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
#endif // INCLUDE_LIBYUV_SCALE_ARGB_H_
/*
* Copyright 2022 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_SCALE_RGB_H_
#define INCLUDE_LIBYUV_SCALE_RGB_H_
#include "libyuv/basic_types.h"
#include "libyuv/scale.h" // For FilterMode
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// RGB can be RAW, RGB24 or YUV24
// RGB scales 24 bit images by converting a row at a time to ARGB
// and using ARGB row functions to scale, then convert to RGB.
// TODO(fbarchard): Allow input/output formats to be specified.
LIBYUV_API
int RGBScale(const uint8_t* src_rgb,
int src_stride_rgb,
int src_width,
int src_height,
uint8_t* dst_rgb,
int dst_stride_rgb,
int dst_width,
int dst_height,
enum FilterMode filtering);
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
#endif // INCLUDE_LIBYUV_SCALE_UV_H_
/*
* Copyright 2013 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_SCALE_ROW_H_
#define INCLUDE_LIBYUV_SCALE_ROW_H_
#include "libyuv/basic_types.h"
#include "libyuv/scale.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
#if defined(__pnacl__) || defined(__CLR_VER) || \
(defined(__native_client__) && defined(__x86_64__)) || \
(defined(__i386__) && !defined(__SSE__) && !defined(__clang__))
#define LIBYUV_DISABLE_X86
#endif
#if defined(__native_client__)
#define LIBYUV_DISABLE_NEON
#endif
// MemorySanitizer does not support assembly code yet. http://crbug.com/344505
#if defined(__has_feature)
#if __has_feature(memory_sanitizer)
#define LIBYUV_DISABLE_X86
#endif
#endif
// GCC >= 4.7.0 required for AVX2.
#if defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))
#if (__GNUC__ > 4) || (__GNUC__ == 4 && (__GNUC_MINOR__ >= 7))
#define GCC_HAS_AVX2 1
#endif // GNUC >= 4.7
#endif // __GNUC__
// clang >= 3.4.0 required for AVX2.
#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
#if (__clang_major__ > 3) || (__clang_major__ == 3 && (__clang_minor__ >= 4))
#define CLANG_HAS_AVX2 1
#endif // clang >= 3.4
#endif // __clang__
// Visual C 2012 required for AVX2.
#if defined(_M_IX86) && !defined(__clang__) && defined(_MSC_VER) && \
_MSC_VER >= 1700
#define VISUALC_HAS_AVX2 1
#endif // VisualStudio >= 2012
// The following are available on all x86 platforms:
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
#define HAS_FIXEDDIV1_X86
#define HAS_FIXEDDIV_X86
#define HAS_SCALEADDROW_SSE2
#define HAS_SCALEARGBCOLS_SSE2
#define HAS_SCALEARGBCOLSUP2_SSE2
#define HAS_SCALEARGBFILTERCOLS_SSSE3
#define HAS_SCALEARGBROWDOWN2_SSE2
#define HAS_SCALEARGBROWDOWNEVEN_SSE2
#define HAS_SCALECOLSUP2_SSE2
#define HAS_SCALEFILTERCOLS_SSSE3
#define HAS_SCALEROWDOWN2_SSSE3
#define HAS_SCALEROWDOWN34_SSSE3
#define HAS_SCALEROWDOWN38_SSSE3
#define HAS_SCALEROWDOWN4_SSSE3
#endif
// The following are available for gcc/clang x86 platforms:
// TODO(fbarchard): Port to Visual C
#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__))
#define HAS_SCALEUVROWDOWN2BOX_SSSE3
#define HAS_SCALEROWUP2_LINEAR_SSE2
#define HAS_SCALEROWUP2_LINEAR_SSSE3
#define HAS_SCALEROWUP2_BILINEAR_SSE2
#define HAS_SCALEROWUP2_BILINEAR_SSSE3
#define HAS_SCALEROWUP2_LINEAR_12_SSSE3
#define HAS_SCALEROWUP2_BILINEAR_12_SSSE3
#define HAS_SCALEROWUP2_LINEAR_16_SSE2
#define HAS_SCALEROWUP2_BILINEAR_16_SSE2
#define HAS_SCALEUVROWUP2_LINEAR_SSSE3
#define HAS_SCALEUVROWUP2_BILINEAR_SSSE3
#define HAS_SCALEUVROWUP2_LINEAR_16_SSE41
#define HAS_SCALEUVROWUP2_BILINEAR_16_SSE41
#endif
// The following are available for gcc/clang x86 platforms, but
// require clang 3.4 or gcc 4.7.
// TODO(fbarchard): Port to Visual C
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(__x86_64__) || defined(__i386__)) && \
(defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2))
#define HAS_SCALEUVROWDOWN2BOX_AVX2
#define HAS_SCALEROWUP2_LINEAR_AVX2
#define HAS_SCALEROWUP2_BILINEAR_AVX2
#define HAS_SCALEROWUP2_LINEAR_12_AVX2
#define HAS_SCALEROWUP2_BILINEAR_12_AVX2
#define HAS_SCALEROWUP2_LINEAR_16_AVX2
#define HAS_SCALEROWUP2_BILINEAR_16_AVX2
#define HAS_SCALEUVROWUP2_LINEAR_AVX2
#define HAS_SCALEUVROWUP2_BILINEAR_AVX2
#define HAS_SCALEUVROWUP2_LINEAR_16_AVX2
#define HAS_SCALEUVROWUP2_BILINEAR_16_AVX2
#endif
// The following are available on all x86 platforms, but
// require VS2012, clang 3.4 or gcc 4.7.
// The code supports NaCL but requires a new compiler and validator.
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(VISUALC_HAS_AVX2) || defined(CLANG_HAS_AVX2) || \
defined(GCC_HAS_AVX2))
#define HAS_SCALEADDROW_AVX2
#define HAS_SCALEROWDOWN2_AVX2
#define HAS_SCALEROWDOWN4_AVX2
#endif
// The following are available on Neon platforms:
#if !defined(LIBYUV_DISABLE_NEON) && \
(defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__))
#define HAS_SCALEADDROW_NEON
#define HAS_SCALEARGBCOLS_NEON
#define HAS_SCALEARGBFILTERCOLS_NEON
#define HAS_SCALEARGBROWDOWN2_NEON
#define HAS_SCALEARGBROWDOWNEVEN_NEON
#define HAS_SCALEFILTERCOLS_NEON
#define HAS_SCALEROWDOWN2_NEON
#define HAS_SCALEROWDOWN34_NEON
#define HAS_SCALEROWDOWN38_NEON
#define HAS_SCALEROWDOWN4_NEON
#define HAS_SCALEUVROWDOWN2BOX_NEON
#define HAS_SCALEUVROWDOWNEVEN_NEON
#define HAS_SCALEROWUP2_LINEAR_NEON
#define HAS_SCALEROWUP2_BILINEAR_NEON
#define HAS_SCALEROWUP2_LINEAR_12_NEON
#define HAS_SCALEROWUP2_BILINEAR_12_NEON
#define HAS_SCALEROWUP2_LINEAR_16_NEON
#define HAS_SCALEROWUP2_BILINEAR_16_NEON
#define HAS_SCALEUVROWUP2_LINEAR_NEON
#define HAS_SCALEUVROWUP2_BILINEAR_NEON
#define HAS_SCALEUVROWUP2_LINEAR_16_NEON
#define HAS_SCALEUVROWUP2_BILINEAR_16_NEON
#endif
#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
#define HAS_SCALEADDROW_MSA
#define HAS_SCALEARGBCOLS_MSA
#define HAS_SCALEARGBFILTERCOLS_MSA
#define HAS_SCALEARGBROWDOWN2_MSA
#define HAS_SCALEARGBROWDOWNEVEN_MSA
#define HAS_SCALEFILTERCOLS_MSA
#define HAS_SCALEROWDOWN2_MSA
#define HAS_SCALEROWDOWN34_MSA
#define HAS_SCALEROWDOWN38_MSA
#define HAS_SCALEROWDOWN4_MSA
#endif
#if !defined(LIBYUV_DISABLE_LSX) && defined(__loongarch_sx)
#define HAS_SCALEARGBROWDOWN2_LSX
#define HAS_SCALEARGBROWDOWNEVEN_LSX
#define HAS_SCALEROWDOWN2_LSX
#define HAS_SCALEROWDOWN4_LSX
#define HAS_SCALEROWDOWN38_LSX
#define HAS_SCALEFILTERCOLS_LSX
#define HAS_SCALEADDROW_LSX
#define HAS_SCALEARGBCOLS_LSX
#define HAS_SCALEARGBFILTERCOLS_LSX
#define HAS_SCALEROWDOWN34_LSX
#endif
// Scale ARGB vertically with bilinear interpolation.
void ScalePlaneVertical(int src_height,
int dst_width,
int dst_height,
int src_stride,
int dst_stride,
const uint8_t* src_argb,
uint8_t* dst_argb,
int x,
int y,
int dy,
int bpp,
enum FilterMode filtering);
void ScalePlaneVertical_16(int src_height,
int dst_width,
int dst_height,
int src_stride,
int dst_stride,
const uint16_t* src_argb,
uint16_t* dst_argb,
int x,
int y,
int dy,
int wpp,
enum FilterMode filtering);
void ScalePlaneVertical_16To8(int src_height,
int dst_width,
int dst_height,
int src_stride,
int dst_stride,
const uint16_t* src_argb,
uint8_t* dst_argb,
int x,
int y,
int dy,
int wpp,
int scale,
enum FilterMode filtering);
void ScalePlaneDown2_16To8(int src_width,
int src_height,
int dst_width,
int dst_height,
int src_stride,
int dst_stride,
const uint16_t* src_ptr,
uint8_t* dst_ptr,
int scale,
enum FilterMode filtering);
// Simplify the filtering based on scale factors.
enum FilterMode ScaleFilterReduce(int src_width,
int src_height,
int dst_width,
int dst_height,
enum FilterMode filtering);
// Divide num by div and return as 16.16 fixed point result.
int FixedDiv_C(int num, int div);
int FixedDiv_X86(int num, int div);
int FixedDiv_MIPS(int num, int div);
// Divide num - 1 by div - 1 and return as 16.16 fixed point result.
int FixedDiv1_C(int num, int div);
int FixedDiv1_X86(int num, int div);
int FixedDiv1_MIPS(int num, int div);
#ifdef HAS_FIXEDDIV_X86
#define FixedDiv FixedDiv_X86
#define FixedDiv1 FixedDiv1_X86
#elif defined HAS_FIXEDDIV_MIPS
#define FixedDiv FixedDiv_MIPS
#define FixedDiv1 FixedDiv1_MIPS
#else
#define FixedDiv FixedDiv_C
#define FixedDiv1 FixedDiv1_C
#endif
// Compute slope values for stepping.
void ScaleSlope(int src_width,
int src_height,
int dst_width,
int dst_height,
enum FilterMode filtering,
int* x,
int* y,
int* dx,
int* dy);
void ScaleRowDown2_C(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width);
void ScaleRowDown2_16_C(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst,
int dst_width);
void ScaleRowDown2_16To8_C(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width,
int scale);
void ScaleRowDown2_16To8_Odd_C(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width,
int scale);
void ScaleRowDown2Linear_C(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width);
void ScaleRowDown2Linear_16_C(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst,
int dst_width);
void ScaleRowDown2Linear_16To8_C(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width,
int scale);
void ScaleRowDown2Linear_16To8_Odd_C(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width,
int scale);
void ScaleRowDown2Box_C(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width);
void ScaleRowDown2Box_Odd_C(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width);
void ScaleRowDown2Box_16_C(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst,
int dst_width);
void ScaleRowDown2Box_16To8_C(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width,
int scale);
void ScaleRowDown2Box_16To8_Odd_C(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width,
int scale);
void ScaleRowDown4_C(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width);
void ScaleRowDown4_16_C(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst,
int dst_width);
void ScaleRowDown4Box_C(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width);
void ScaleRowDown4Box_16_C(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst,
int dst_width);
void ScaleRowDown34_C(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width);
void ScaleRowDown34_16_C(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst,
int dst_width);
void ScaleRowDown34_0_Box_C(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* d,
int dst_width);
void ScaleRowDown34_0_Box_16_C(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* d,
int dst_width);
void ScaleRowDown34_1_Box_C(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* d,
int dst_width);
void ScaleRowDown34_1_Box_16_C(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* d,
int dst_width);
void ScaleRowUp2_Linear_C(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowUp2_Bilinear_C(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleRowUp2_Linear_16_C(const uint16_t* src_ptr,
uint16_t* dst_ptr,
int dst_width);
void ScaleRowUp2_Bilinear_16_C(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleRowUp2_Linear_Any_C(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowUp2_Bilinear_Any_C(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleRowUp2_Linear_16_Any_C(const uint16_t* src_ptr,
uint16_t* dst_ptr,
int dst_width);
void ScaleRowUp2_Bilinear_16_Any_C(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleCols_C(uint8_t* dst_ptr,
const uint8_t* src_ptr,
int dst_width,
int x,
int dx);
void ScaleCols_16_C(uint16_t* dst_ptr,
const uint16_t* src_ptr,
int dst_width,
int x,
int dx);
void ScaleColsUp2_C(uint8_t* dst_ptr,
const uint8_t* src_ptr,
int dst_width,
int,
int);
void ScaleColsUp2_16_C(uint16_t* dst_ptr,
const uint16_t* src_ptr,
int dst_width,
int,
int);
void ScaleFilterCols_C(uint8_t* dst_ptr,
const uint8_t* src_ptr,
int dst_width,
int x,
int dx);
void ScaleFilterCols_16_C(uint16_t* dst_ptr,
const uint16_t* src_ptr,
int dst_width,
int x,
int dx);
void ScaleFilterCols64_C(uint8_t* dst_ptr,
const uint8_t* src_ptr,
int dst_width,
int x32,
int dx);
void ScaleFilterCols64_16_C(uint16_t* dst_ptr,
const uint16_t* src_ptr,
int dst_width,
int x32,
int dx);
void ScaleRowDown38_C(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width);
void ScaleRowDown38_16_C(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst,
int dst_width);
void ScaleRowDown38_3_Box_C(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown38_3_Box_16_C(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst_ptr,
int dst_width);
void ScaleRowDown38_2_Box_C(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown38_2_Box_16_C(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst_ptr,
int dst_width);
void ScaleAddRow_C(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width);
void ScaleAddRow_16_C(const uint16_t* src_ptr,
uint32_t* dst_ptr,
int src_width);
void ScaleARGBRowDown2_C(const uint8_t* src_argb,
ptrdiff_t src_stride,
uint8_t* dst_argb,
int dst_width);
void ScaleARGBRowDown2Linear_C(const uint8_t* src_argb,
ptrdiff_t src_stride,
uint8_t* dst_argb,
int dst_width);
void ScaleARGBRowDown2Box_C(const uint8_t* src_argb,
ptrdiff_t src_stride,
uint8_t* dst_argb,
int dst_width);
void ScaleARGBRowDownEven_C(const uint8_t* src_argb,
ptrdiff_t src_stride,
int src_stepx,
uint8_t* dst_argb,
int dst_width);
void ScaleARGBRowDownEvenBox_C(const uint8_t* src_argb,
ptrdiff_t src_stride,
int src_stepx,
uint8_t* dst_argb,
int dst_width);
void ScaleARGBCols_C(uint8_t* dst_argb,
const uint8_t* src_argb,
int dst_width,
int x,
int dx);
void ScaleARGBCols64_C(uint8_t* dst_argb,
const uint8_t* src_argb,
int dst_width,
int x32,
int dx);
void ScaleARGBColsUp2_C(uint8_t* dst_argb,
const uint8_t* src_argb,
int dst_width,
int,
int);
void ScaleARGBFilterCols_C(uint8_t* dst_argb,
const uint8_t* src_argb,
int dst_width,
int x,
int dx);
void ScaleARGBFilterCols64_C(uint8_t* dst_argb,
const uint8_t* src_argb,
int dst_width,
int x32,
int dx);
void ScaleUVRowDown2_C(const uint8_t* src_uv,
ptrdiff_t src_stride,
uint8_t* dst_uv,
int dst_width);
void ScaleUVRowDown2Linear_C(const uint8_t* src_uv,
ptrdiff_t src_stride,
uint8_t* dst_uv,
int dst_width);
void ScaleUVRowDown2Box_C(const uint8_t* src_uv,
ptrdiff_t src_stride,
uint8_t* dst_uv,
int dst_width);
void ScaleUVRowDownEven_C(const uint8_t* src_uv,
ptrdiff_t src_stride,
int src_stepx,
uint8_t* dst_uv,
int dst_width);
void ScaleUVRowDownEvenBox_C(const uint8_t* src_uv,
ptrdiff_t src_stride,
int src_stepx,
uint8_t* dst_uv,
int dst_width);
void ScaleUVRowUp2_Linear_C(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int dst_width);
void ScaleUVRowUp2_Bilinear_C(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleUVRowUp2_Linear_Any_C(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int dst_width);
void ScaleUVRowUp2_Bilinear_Any_C(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleUVRowUp2_Linear_16_C(const uint16_t* src_ptr,
uint16_t* dst_ptr,
int dst_width);
void ScaleUVRowUp2_Bilinear_16_C(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleUVRowUp2_Linear_16_Any_C(const uint16_t* src_ptr,
uint16_t* dst_ptr,
int dst_width);
void ScaleUVRowUp2_Bilinear_16_Any_C(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleUVCols_C(uint8_t* dst_uv,
const uint8_t* src_uv,
int dst_width,
int x,
int dx);
void ScaleUVCols64_C(uint8_t* dst_uv,
const uint8_t* src_uv,
int dst_width,
int x32,
int dx);
void ScaleUVColsUp2_C(uint8_t* dst_uv,
const uint8_t* src_uv,
int dst_width,
int,
int);
void ScaleUVFilterCols_C(uint8_t* dst_uv,
const uint8_t* src_uv,
int dst_width,
int x,
int dx);
void ScaleUVFilterCols64_C(uint8_t* dst_uv,
const uint8_t* src_uv,
int dst_width,
int x32,
int dx);
// Specialized scalers for x86.
void ScaleRowDown2_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown2Linear_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown2Box_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown2_AVX2(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown2Linear_AVX2(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown2Box_AVX2(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown4_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown4Box_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown4_AVX2(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown4Box_AVX2(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown34_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown34_1_Box_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown34_0_Box_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown38_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown38_3_Box_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown38_2_Box_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowUp2_Linear_SSE2(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowUp2_Bilinear_SSE2(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleRowUp2_Linear_12_SSSE3(const uint16_t* src_ptr,
uint16_t* dst_ptr,
int dst_width);
void ScaleRowUp2_Bilinear_12_SSSE3(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleRowUp2_Linear_16_SSE2(const uint16_t* src_ptr,
uint16_t* dst_ptr,
int dst_width);
void ScaleRowUp2_Bilinear_16_SSE2(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleRowUp2_Linear_SSSE3(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowUp2_Bilinear_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleRowUp2_Linear_AVX2(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowUp2_Bilinear_AVX2(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleRowUp2_Linear_12_AVX2(const uint16_t* src_ptr,
uint16_t* dst_ptr,
int dst_width);
void ScaleRowUp2_Bilinear_12_AVX2(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleRowUp2_Linear_16_AVX2(const uint16_t* src_ptr,
uint16_t* dst_ptr,
int dst_width);
void ScaleRowUp2_Bilinear_16_AVX2(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleRowUp2_Linear_Any_SSE2(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowUp2_Bilinear_Any_SSE2(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleRowUp2_Linear_12_Any_SSSE3(const uint16_t* src_ptr,
uint16_t* dst_ptr,
int dst_width);
void ScaleRowUp2_Bilinear_12_Any_SSSE3(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleRowUp2_Linear_16_Any_SSE2(const uint16_t* src_ptr,
uint16_t* dst_ptr,
int dst_width);
void ScaleRowUp2_Bilinear_16_Any_SSE2(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleRowUp2_Linear_Any_SSSE3(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowUp2_Bilinear_Any_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleRowUp2_Linear_Any_AVX2(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowUp2_Bilinear_Any_AVX2(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleRowUp2_Linear_12_Any_AVX2(const uint16_t* src_ptr,
uint16_t* dst_ptr,
int dst_width);
void ScaleRowUp2_Bilinear_12_Any_AVX2(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleRowUp2_Linear_16_Any_AVX2(const uint16_t* src_ptr,
uint16_t* dst_ptr,
int dst_width);
void ScaleRowUp2_Bilinear_16_Any_AVX2(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleRowDown2_Any_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown2Linear_Any_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown2Box_Any_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown2Box_Odd_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown2_Any_AVX2(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown2Linear_Any_AVX2(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown2Box_Any_AVX2(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown2Box_Odd_AVX2(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown4_Any_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown4Box_Any_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown4_Any_AVX2(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown4Box_Any_AVX2(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown34_Any_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown34_1_Box_Any_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown34_0_Box_Any_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown38_Any_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown38_3_Box_Any_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown38_2_Box_Any_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleAddRow_SSE2(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width);
void ScaleAddRow_AVX2(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width);
void ScaleAddRow_Any_SSE2(const uint8_t* src_ptr,
uint16_t* dst_ptr,
int src_width);
void ScaleAddRow_Any_AVX2(const uint8_t* src_ptr,
uint16_t* dst_ptr,
int src_width);
void ScaleFilterCols_SSSE3(uint8_t* dst_ptr,
const uint8_t* src_ptr,
int dst_width,
int x,
int dx);
void ScaleColsUp2_SSE2(uint8_t* dst_ptr,
const uint8_t* src_ptr,
int dst_width,
int x,
int dx);
// ARGB Column functions
void ScaleARGBCols_SSE2(uint8_t* dst_argb,
const uint8_t* src_argb,
int dst_width,
int x,
int dx);
void ScaleARGBFilterCols_SSSE3(uint8_t* dst_argb,
const uint8_t* src_argb,
int dst_width,
int x,
int dx);
void ScaleARGBColsUp2_SSE2(uint8_t* dst_argb,
const uint8_t* src_argb,
int dst_width,
int x,
int dx);
void ScaleARGBFilterCols_NEON(uint8_t* dst_argb,
const uint8_t* src_argb,
int dst_width,
int x,
int dx);
void ScaleARGBCols_NEON(uint8_t* dst_argb,
const uint8_t* src_argb,
int dst_width,
int x,
int dx);
void ScaleARGBFilterCols_Any_NEON(uint8_t* dst_ptr,
const uint8_t* src_ptr,
int dst_width,
int x,
int dx);
void ScaleARGBCols_Any_NEON(uint8_t* dst_ptr,
const uint8_t* src_ptr,
int dst_width,
int x,
int dx);
void ScaleARGBFilterCols_MSA(uint8_t* dst_argb,
const uint8_t* src_argb,
int dst_width,
int x,
int dx);
void ScaleARGBCols_MSA(uint8_t* dst_argb,
const uint8_t* src_argb,
int dst_width,
int x,
int dx);
void ScaleARGBFilterCols_Any_MSA(uint8_t* dst_ptr,
const uint8_t* src_ptr,
int dst_width,
int x,
int dx);
void ScaleARGBCols_Any_MSA(uint8_t* dst_ptr,
const uint8_t* src_ptr,
int dst_width,
int x,
int dx);
// ARGB Row functions
void ScaleARGBRowDown2_SSE2(const uint8_t* src_argb,
ptrdiff_t src_stride,
uint8_t* dst_argb,
int dst_width);
void ScaleARGBRowDown2Linear_SSE2(const uint8_t* src_argb,
ptrdiff_t src_stride,
uint8_t* dst_argb,
int dst_width);
void ScaleARGBRowDown2Box_SSE2(const uint8_t* src_argb,
ptrdiff_t src_stride,
uint8_t* dst_argb,
int dst_width);
void ScaleARGBRowDown2_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width);
void ScaleARGBRowDown2Linear_NEON(const uint8_t* src_argb,
ptrdiff_t src_stride,
uint8_t* dst_argb,
int dst_width);
void ScaleARGBRowDown2Box_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width);
void ScaleARGBRowDown2_MSA(const uint8_t* src_argb,
ptrdiff_t src_stride,
uint8_t* dst_argb,
int dst_width);
void ScaleARGBRowDown2Linear_MSA(const uint8_t* src_argb,
ptrdiff_t src_stride,
uint8_t* dst_argb,
int dst_width);
void ScaleARGBRowDown2Box_MSA(const uint8_t* src_argb,
ptrdiff_t src_stride,
uint8_t* dst_argb,
int dst_width);
void ScaleARGBRowDown2_LSX(const uint8_t* src_argb,
ptrdiff_t src_stride,
uint8_t* dst_argb,
int dst_width);
void ScaleARGBRowDown2Linear_LSX(const uint8_t* src_argb,
ptrdiff_t src_stride,
uint8_t* dst_argb,
int dst_width);
void ScaleARGBRowDown2Box_LSX(const uint8_t* src_argb,
ptrdiff_t src_stride,
uint8_t* dst_argb,
int dst_width);
void ScaleARGBRowDown2_Any_SSE2(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleARGBRowDown2Linear_Any_SSE2(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleARGBRowDown2Box_Any_SSE2(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleARGBRowDown2_Any_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleARGBRowDown2Linear_Any_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleARGBRowDown2Box_Any_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleARGBRowDown2_Any_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleARGBRowDown2Linear_Any_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleARGBRowDown2Box_Any_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleARGBRowDown2_Any_LSX(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleARGBRowDown2Linear_Any_LSX(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleARGBRowDown2Box_Any_LSX(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleARGBRowDownEven_SSE2(const uint8_t* src_argb,
ptrdiff_t src_stride,
int src_stepx,
uint8_t* dst_argb,
int dst_width);
void ScaleARGBRowDownEvenBox_SSE2(const uint8_t* src_argb,
ptrdiff_t src_stride,
int src_stepx,
uint8_t* dst_argb,
int dst_width);
void ScaleARGBRowDownEven_NEON(const uint8_t* src_argb,
ptrdiff_t src_stride,
int src_stepx,
uint8_t* dst_argb,
int dst_width);
void ScaleARGBRowDownEvenBox_NEON(const uint8_t* src_argb,
ptrdiff_t src_stride,
int src_stepx,
uint8_t* dst_argb,
int dst_width);
void ScaleARGBRowDownEven_MSA(const uint8_t* src_argb,
ptrdiff_t src_stride,
int32_t src_stepx,
uint8_t* dst_argb,
int dst_width);
void ScaleARGBRowDownEvenBox_MSA(const uint8_t* src_argb,
ptrdiff_t src_stride,
int src_stepx,
uint8_t* dst_argb,
int dst_width);
void ScaleARGBRowDownEven_LSX(const uint8_t* src_argb,
ptrdiff_t src_stride,
int32_t src_stepx,
uint8_t* dst_argb,
int dst_width);
void ScaleARGBRowDownEvenBox_LSX(const uint8_t* src_argb,
ptrdiff_t src_stride,
int src_stepx,
uint8_t* dst_argb,
int dst_width);
void ScaleARGBRowDownEven_Any_SSE2(const uint8_t* src_ptr,
ptrdiff_t src_stride,
int src_stepx,
uint8_t* dst_ptr,
int dst_width);
void ScaleARGBRowDownEvenBox_Any_SSE2(const uint8_t* src_ptr,
ptrdiff_t src_stride,
int src_stepx,
uint8_t* dst_ptr,
int dst_width);
void ScaleARGBRowDownEven_Any_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
int src_stepx,
uint8_t* dst_ptr,
int dst_width);
void ScaleARGBRowDownEvenBox_Any_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
int src_stepx,
uint8_t* dst_ptr,
int dst_width);
void ScaleARGBRowDownEven_Any_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride,
int32_t src_stepx,
uint8_t* dst_ptr,
int dst_width);
void ScaleARGBRowDownEvenBox_Any_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride,
int src_stepx,
uint8_t* dst_ptr,
int dst_width);
void ScaleARGBRowDownEven_Any_LSX(const uint8_t* src_ptr,
ptrdiff_t src_stride,
int32_t src_stepx,
uint8_t* dst_ptr,
int dst_width);
void ScaleARGBRowDownEvenBox_Any_LSX(const uint8_t* src_ptr,
ptrdiff_t src_stride,
int src_stepx,
uint8_t* dst_ptr,
int dst_width);
// UV Row functions
void ScaleUVRowDown2_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_uv,
int dst_width);
void ScaleUVRowDown2Linear_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_uv,
int dst_width);
void ScaleUVRowDown2Box_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_uv,
int dst_width);
void ScaleUVRowDown2Box_AVX2(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_uv,
int dst_width);
void ScaleUVRowDown2_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width);
void ScaleUVRowDown2Linear_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_uv,
int dst_width);
void ScaleUVRowDown2Box_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width);
void ScaleUVRowDown2_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_uv,
int dst_width);
void ScaleUVRowDown2Linear_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_uv,
int dst_width);
void ScaleUVRowDown2Box_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_uv,
int dst_width);
void ScaleUVRowDown2_Any_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleUVRowDown2Linear_Any_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleUVRowDown2Box_Any_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleUVRowDown2Box_Any_AVX2(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleUVRowDown2_Any_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleUVRowDown2Linear_Any_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleUVRowDown2Box_Any_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleUVRowDown2_Any_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleUVRowDown2Linear_Any_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleUVRowDown2Box_Any_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleUVRowDownEven_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
int src_stepx,
uint8_t* dst_uv,
int dst_width);
void ScaleUVRowDownEvenBox_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
int src_stepx,
uint8_t* dst_uv,
int dst_width);
void ScaleUVRowDownEven_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
int src_stepx,
uint8_t* dst_uv,
int dst_width);
void ScaleUVRowDownEvenBox_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
int src_stepx,
uint8_t* dst_uv,
int dst_width);
void ScaleUVRowDownEven_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride,
int32_t src_stepx,
uint8_t* dst_uv,
int dst_width);
void ScaleUVRowDownEvenBox_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride,
int src_stepx,
uint8_t* dst_uv,
int dst_width);
void ScaleUVRowDownEven_Any_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
int src_stepx,
uint8_t* dst_ptr,
int dst_width);
void ScaleUVRowDownEvenBox_Any_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
int src_stepx,
uint8_t* dst_ptr,
int dst_width);
void ScaleUVRowDownEven_Any_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
int src_stepx,
uint8_t* dst_ptr,
int dst_width);
void ScaleUVRowDownEvenBox_Any_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
int src_stepx,
uint8_t* dst_ptr,
int dst_width);
void ScaleUVRowDownEven_Any_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride,
int32_t src_stepx,
uint8_t* dst_ptr,
int dst_width);
void ScaleUVRowDownEvenBox_Any_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride,
int src_stepx,
uint8_t* dst_ptr,
int dst_width);
void ScaleUVRowUp2_Linear_SSSE3(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int dst_width);
void ScaleUVRowUp2_Bilinear_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleUVRowUp2_Linear_Any_SSSE3(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int dst_width);
void ScaleUVRowUp2_Bilinear_Any_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleUVRowUp2_Linear_AVX2(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int dst_width);
void ScaleUVRowUp2_Bilinear_AVX2(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleUVRowUp2_Linear_Any_AVX2(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int dst_width);
void ScaleUVRowUp2_Bilinear_Any_AVX2(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleUVRowUp2_Linear_NEON(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int dst_width);
void ScaleUVRowUp2_Bilinear_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleUVRowUp2_Linear_Any_NEON(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int dst_width);
void ScaleUVRowUp2_Bilinear_Any_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleUVRowUp2_Linear_16_SSE41(const uint16_t* src_ptr,
uint16_t* dst_ptr,
int dst_width);
void ScaleUVRowUp2_Bilinear_16_SSE41(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleUVRowUp2_Linear_16_Any_SSE41(const uint16_t* src_ptr,
uint16_t* dst_ptr,
int dst_width);
void ScaleUVRowUp2_Bilinear_16_Any_SSE41(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleUVRowUp2_Linear_16_AVX2(const uint16_t* src_ptr,
uint16_t* dst_ptr,
int dst_width);
void ScaleUVRowUp2_Bilinear_16_AVX2(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleUVRowUp2_Linear_16_Any_AVX2(const uint16_t* src_ptr,
uint16_t* dst_ptr,
int dst_width);
void ScaleUVRowUp2_Bilinear_16_Any_AVX2(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleUVRowUp2_Linear_16_NEON(const uint16_t* src_ptr,
uint16_t* dst_ptr,
int dst_width);
void ScaleUVRowUp2_Bilinear_16_NEON(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleUVRowUp2_Linear_16_Any_NEON(const uint16_t* src_ptr,
uint16_t* dst_ptr,
int dst_width);
void ScaleUVRowUp2_Bilinear_16_Any_NEON(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
// ScaleRowDown2Box also used by planar functions
// NEON downscalers with interpolation.
// Note - not static due to reuse in convert for 444 to 420.
void ScaleRowDown2_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width);
void ScaleRowDown2Linear_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width);
void ScaleRowDown2Box_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width);
void ScaleRowDown4_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown4Box_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
// Down scale from 4 to 3 pixels. Use the neon multilane read/write
// to load up the every 4th pixel into a 4 different registers.
// Point samples 32 pixels to 24 pixels.
void ScaleRowDown34_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown34_0_Box_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown34_1_Box_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
// 32 -> 12
void ScaleRowDown38_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
// 32x3 -> 12x1
void ScaleRowDown38_3_Box_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
// 32x2 -> 12x1
void ScaleRowDown38_2_Box_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown2_Any_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown2Linear_Any_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown2Box_Any_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown2Box_Odd_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown4_Any_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown4Box_Any_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown34_Any_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown34_0_Box_Any_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown34_1_Box_Any_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
// 32 -> 12
void ScaleRowDown38_Any_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
// 32x3 -> 12x1
void ScaleRowDown38_3_Box_Any_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
// 32x2 -> 12x1
void ScaleRowDown38_2_Box_Any_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowUp2_Linear_NEON(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowUp2_Bilinear_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleRowUp2_Linear_12_NEON(const uint16_t* src_ptr,
uint16_t* dst_ptr,
int dst_width);
void ScaleRowUp2_Bilinear_12_NEON(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleRowUp2_Linear_16_NEON(const uint16_t* src_ptr,
uint16_t* dst_ptr,
int dst_width);
void ScaleRowUp2_Bilinear_16_NEON(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleRowUp2_Linear_Any_NEON(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowUp2_Bilinear_Any_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleRowUp2_Linear_12_Any_NEON(const uint16_t* src_ptr,
uint16_t* dst_ptr,
int dst_width);
void ScaleRowUp2_Bilinear_12_Any_NEON(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleRowUp2_Linear_16_Any_NEON(const uint16_t* src_ptr,
uint16_t* dst_ptr,
int dst_width);
void ScaleRowUp2_Bilinear_16_Any_NEON(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
void ScaleAddRow_NEON(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width);
void ScaleAddRow_Any_NEON(const uint8_t* src_ptr,
uint16_t* dst_ptr,
int src_width);
void ScaleFilterCols_NEON(uint8_t* dst_ptr,
const uint8_t* src_ptr,
int dst_width,
int x,
int dx);
void ScaleFilterCols_Any_NEON(uint8_t* dst_ptr,
const uint8_t* src_ptr,
int dst_width,
int x,
int dx);
void ScaleRowDown2_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width);
void ScaleRowDown2Linear_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width);
void ScaleRowDown2Box_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width);
void ScaleRowDown4_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width);
void ScaleRowDown4Box_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width);
void ScaleRowDown38_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width);
void ScaleRowDown38_2_Box_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown38_3_Box_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleAddRow_MSA(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width);
void ScaleFilterCols_MSA(uint8_t* dst_ptr,
const uint8_t* src_ptr,
int dst_width,
int x,
int dx);
void ScaleRowDown34_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width);
void ScaleRowDown34_0_Box_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* d,
int dst_width);
void ScaleRowDown34_1_Box_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* d,
int dst_width);
void ScaleRowDown2_Any_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown2Linear_Any_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown2Box_Any_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown4_Any_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown4Box_Any_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown38_Any_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown38_2_Box_Any_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown38_3_Box_Any_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleAddRow_Any_MSA(const uint8_t* src_ptr,
uint16_t* dst_ptr,
int src_width);
void ScaleFilterCols_Any_MSA(uint8_t* dst_ptr,
const uint8_t* src_ptr,
int dst_width,
int x,
int dx);
void ScaleRowDown34_Any_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown34_0_Box_Any_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown34_1_Box_Any_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown2_LSX(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width);
void ScaleRowDown2Linear_LSX(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width);
void ScaleRowDown2Box_LSX(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width);
void ScaleRowDown4_LSX(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width);
void ScaleRowDown4Box_LSX(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width);
void ScaleRowDown38_LSX(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width);
void ScaleRowDown38_2_Box_LSX(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown38_3_Box_LSX(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleAddRow_LSX(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width);
void ScaleFilterCols_LSX(uint8_t* dst_ptr,
const uint8_t* src_ptr,
int dst_width,
int x,
int dx);
void ScaleARGBFilterCols_LSX(uint8_t* dst_argb,
const uint8_t* src_argb,
int dst_width,
int x,
int dx);
void ScaleARGBCols_LSX(uint8_t* dst_argb,
const uint8_t* src_argb,
int dst_width,
int x,
int dx);
void ScaleRowDown34_LSX(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width);
void ScaleRowDown34_0_Box_LSX(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* d,
int dst_width);
void ScaleRowDown34_1_Box_LSX(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* d,
int dst_width);
void ScaleRowDown2_Any_LSX(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown2Linear_Any_LSX(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown2Box_Any_LSX(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown4_Any_LSX(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown4Box_Any_LSX(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown38_Any_LSX(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown38_2_Box_Any_LSX(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown38_3_Box_Any_LSX(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleAddRow_Any_LSX(const uint8_t* src_ptr,
uint16_t* dst_ptr,
int src_width);
void ScaleFilterCols_Any_LSX(uint8_t* dst_ptr,
const uint8_t* src_ptr,
int dst_width,
int x,
int dx);
void ScaleARGBCols_Any_LSX(uint8_t* dst_ptr,
const uint8_t* src_ptr,
int dst_width,
int x,
int dx);
void ScaleARGBFilterCols_Any_LSX(uint8_t* dst_ptr,
const uint8_t* src_ptr,
int dst_width,
int x,
int dx);
void ScaleRowDown34_Any_LSX(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown34_0_Box_Any_LSX(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown34_1_Box_Any_LSX(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
#endif // INCLUDE_LIBYUV_SCALE_ROW_H_
/*
* Copyright 2020 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_SCALE_UV_H_
#define INCLUDE_LIBYUV_SCALE_UV_H_
#include "libyuv/basic_types.h"
#include "libyuv/scale.h" // For FilterMode
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
LIBYUV_API
int UVScale(const uint8_t* src_uv,
int src_stride_uv,
int src_width,
int src_height,
uint8_t* dst_uv,
int dst_stride_uv,
int dst_width,
int dst_height,
enum FilterMode filtering);
// Scale a 16 bit UV image.
// This function is currently incomplete, it can't handle all cases.
LIBYUV_API
int UVScale_16(const uint16_t* src_uv,
int src_stride_uv,
int src_width,
int src_height,
uint16_t* dst_uv,
int dst_stride_uv,
int dst_width,
int dst_height,
enum FilterMode filtering);
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
#endif // INCLUDE_LIBYUV_SCALE_UV_H_
/*
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1860
#endif // INCLUDE_LIBYUV_VERSION_H_
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// Common definitions for video, including fourcc and VideoFormat.
#ifndef INCLUDE_LIBYUV_VIDEO_COMMON_H_
#define INCLUDE_LIBYUV_VIDEO_COMMON_H_
#include "libyuv/basic_types.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
//////////////////////////////////////////////////////////////////////////////
// Definition of FourCC codes
//////////////////////////////////////////////////////////////////////////////
// Convert four characters to a FourCC code.
// Needs to be a macro otherwise the OS X compiler complains when the kFormat*
// constants are used in a switch.
#ifdef __cplusplus
#define FOURCC(a, b, c, d) \
((static_cast<uint32_t>(a)) | (static_cast<uint32_t>(b) << 8) | \
(static_cast<uint32_t>(c) << 16) | /* NOLINT */ \
(static_cast<uint32_t>(d) << 24)) /* NOLINT */
#else
#define FOURCC(a, b, c, d) \
(((uint32_t)(a)) | ((uint32_t)(b) << 8) | /* NOLINT */ \
((uint32_t)(c) << 16) | ((uint32_t)(d) << 24)) /* NOLINT */
#endif
// Some pages discussing FourCC codes:
// http://www.fourcc.org/yuv.php
// http://v4l2spec.bytesex.org/spec/book1.htm
// http://developer.apple.com/quicktime/icefloe/dispatch020.html
// http://msdn.microsoft.com/library/windows/desktop/dd206750.aspx#nv12
// http://people.xiph.org/~xiphmont/containers/nut/nut4cc.txt
// FourCC codes grouped according to implementation efficiency.
// Primary formats should convert in 1 efficient step.
// Secondary formats are converted in 2 steps.
// Auxilliary formats call primary converters.
enum FourCC {
// 10 Primary YUV formats: 5 planar, 2 biplanar, 2 packed.
FOURCC_I420 = FOURCC('I', '4', '2', '0'),
FOURCC_I422 = FOURCC('I', '4', '2', '2'),
FOURCC_I444 = FOURCC('I', '4', '4', '4'),
FOURCC_I400 = FOURCC('I', '4', '0', '0'),
FOURCC_NV21 = FOURCC('N', 'V', '2', '1'),
FOURCC_NV12 = FOURCC('N', 'V', '1', '2'),
FOURCC_YUY2 = FOURCC('Y', 'U', 'Y', '2'),
FOURCC_UYVY = FOURCC('U', 'Y', 'V', 'Y'),
FOURCC_I010 = FOURCC('I', '0', '1', '0'), // bt.601 10 bit 420
FOURCC_I210 = FOURCC('I', '2', '1', '0'), // bt.601 10 bit 422
// 1 Secondary YUV format: row biplanar. deprecated.
FOURCC_M420 = FOURCC('M', '4', '2', '0'),
// 13 Primary RGB formats: 4 32 bpp, 2 24 bpp, 3 16 bpp, 1 10 bpc 2 64 bpp
FOURCC_ARGB = FOURCC('A', 'R', 'G', 'B'),
FOURCC_BGRA = FOURCC('B', 'G', 'R', 'A'),
FOURCC_ABGR = FOURCC('A', 'B', 'G', 'R'),
FOURCC_AR30 = FOURCC('A', 'R', '3', '0'), // 10 bit per channel. 2101010.
FOURCC_AB30 = FOURCC('A', 'B', '3', '0'), // ABGR version of 10 bit
FOURCC_AR64 = FOURCC('A', 'R', '6', '4'), // 16 bit per channel.
FOURCC_AB64 = FOURCC('A', 'B', '6', '4'), // ABGR version of 16 bit
FOURCC_24BG = FOURCC('2', '4', 'B', 'G'),
FOURCC_RAW = FOURCC('r', 'a', 'w', ' '),
FOURCC_RGBA = FOURCC('R', 'G', 'B', 'A'),
FOURCC_RGBP = FOURCC('R', 'G', 'B', 'P'), // rgb565 LE.
FOURCC_RGBO = FOURCC('R', 'G', 'B', 'O'), // argb1555 LE.
FOURCC_R444 = FOURCC('R', '4', '4', '4'), // argb4444 LE.
// 1 Primary Compressed YUV format.
FOURCC_MJPG = FOURCC('M', 'J', 'P', 'G'),
// 14 Auxiliary YUV variations: 3 with U and V planes are swapped, 1 Alias.
FOURCC_YV12 = FOURCC('Y', 'V', '1', '2'),
FOURCC_YV16 = FOURCC('Y', 'V', '1', '6'),
FOURCC_YV24 = FOURCC('Y', 'V', '2', '4'),
FOURCC_YU12 = FOURCC('Y', 'U', '1', '2'), // Linux version of I420.
FOURCC_J420 =
FOURCC('J', '4', '2', '0'), // jpeg (bt.601 full), unofficial fourcc
FOURCC_J422 =
FOURCC('J', '4', '2', '2'), // jpeg (bt.601 full), unofficial fourcc
FOURCC_J444 =
FOURCC('J', '4', '4', '4'), // jpeg (bt.601 full), unofficial fourcc
FOURCC_J400 =
FOURCC('J', '4', '0', '0'), // jpeg (bt.601 full), unofficial fourcc
FOURCC_F420 = FOURCC('F', '4', '2', '0'), // bt.709 full, unofficial fourcc
FOURCC_F422 = FOURCC('F', '4', '2', '2'), // bt.709 full, unofficial fourcc
FOURCC_F444 = FOURCC('F', '4', '4', '4'), // bt.709 full, unofficial fourcc
FOURCC_H420 = FOURCC('H', '4', '2', '0'), // bt.709, unofficial fourcc
FOURCC_H422 = FOURCC('H', '4', '2', '2'), // bt.709, unofficial fourcc
FOURCC_H444 = FOURCC('H', '4', '4', '4'), // bt.709, unofficial fourcc
FOURCC_U420 = FOURCC('U', '4', '2', '0'), // bt.2020, unofficial fourcc
FOURCC_U422 = FOURCC('U', '4', '2', '2'), // bt.2020, unofficial fourcc
FOURCC_U444 = FOURCC('U', '4', '4', '4'), // bt.2020, unofficial fourcc
FOURCC_F010 = FOURCC('F', '0', '1', '0'), // bt.709 full range 10 bit 420
FOURCC_H010 = FOURCC('H', '0', '1', '0'), // bt.709 10 bit 420
FOURCC_U010 = FOURCC('U', '0', '1', '0'), // bt.2020 10 bit 420
FOURCC_F210 = FOURCC('F', '2', '1', '0'), // bt.709 full range 10 bit 422
FOURCC_H210 = FOURCC('H', '2', '1', '0'), // bt.709 10 bit 422
FOURCC_U210 = FOURCC('U', '2', '1', '0'), // bt.2020 10 bit 422
FOURCC_P010 = FOURCC('P', '0', '1', '0'),
FOURCC_P210 = FOURCC('P', '2', '1', '0'),
// 14 Auxiliary aliases. CanonicalFourCC() maps these to canonical fourcc.
FOURCC_IYUV = FOURCC('I', 'Y', 'U', 'V'), // Alias for I420.
FOURCC_YU16 = FOURCC('Y', 'U', '1', '6'), // Alias for I422.
FOURCC_YU24 = FOURCC('Y', 'U', '2', '4'), // Alias for I444.
FOURCC_YUYV = FOURCC('Y', 'U', 'Y', 'V'), // Alias for YUY2.
FOURCC_YUVS = FOURCC('y', 'u', 'v', 's'), // Alias for YUY2 on Mac.
FOURCC_HDYC = FOURCC('H', 'D', 'Y', 'C'), // Alias for UYVY.
FOURCC_2VUY = FOURCC('2', 'v', 'u', 'y'), // Alias for UYVY on Mac.
FOURCC_JPEG = FOURCC('J', 'P', 'E', 'G'), // Alias for MJPG.
FOURCC_DMB1 = FOURCC('d', 'm', 'b', '1'), // Alias for MJPG on Mac.
FOURCC_BA81 = FOURCC('B', 'A', '8', '1'), // Alias for BGGR.
FOURCC_RGB3 = FOURCC('R', 'G', 'B', '3'), // Alias for RAW.
FOURCC_BGR3 = FOURCC('B', 'G', 'R', '3'), // Alias for 24BG.
FOURCC_CM32 = FOURCC(0, 0, 0, 32), // Alias for BGRA kCMPixelFormat_32ARGB
FOURCC_CM24 = FOURCC(0, 0, 0, 24), // Alias for RAW kCMPixelFormat_24RGB
FOURCC_L555 = FOURCC('L', '5', '5', '5'), // Alias for RGBO.
FOURCC_L565 = FOURCC('L', '5', '6', '5'), // Alias for RGBP.
FOURCC_5551 = FOURCC('5', '5', '5', '1'), // Alias for RGBO.
// deprecated formats. Not supported, but defined for backward compatibility.
FOURCC_I411 = FOURCC('I', '4', '1', '1'),
FOURCC_Q420 = FOURCC('Q', '4', '2', '0'),
FOURCC_RGGB = FOURCC('R', 'G', 'G', 'B'),
FOURCC_BGGR = FOURCC('B', 'G', 'G', 'R'),
FOURCC_GRBG = FOURCC('G', 'R', 'B', 'G'),
FOURCC_GBRG = FOURCC('G', 'B', 'R', 'G'),
FOURCC_H264 = FOURCC('H', '2', '6', '4'),
// Match any fourcc.
FOURCC_ANY = -1,
};
enum FourCCBpp {
// Canonical fourcc codes used in our code.
FOURCC_BPP_I420 = 12,
FOURCC_BPP_I422 = 16,
FOURCC_BPP_I444 = 24,
FOURCC_BPP_I411 = 12,
FOURCC_BPP_I400 = 8,
FOURCC_BPP_NV21 = 12,
FOURCC_BPP_NV12 = 12,
FOURCC_BPP_YUY2 = 16,
FOURCC_BPP_UYVY = 16,
FOURCC_BPP_M420 = 12, // deprecated
FOURCC_BPP_Q420 = 12,
FOURCC_BPP_ARGB = 32,
FOURCC_BPP_BGRA = 32,
FOURCC_BPP_ABGR = 32,
FOURCC_BPP_RGBA = 32,
FOURCC_BPP_AR30 = 32,
FOURCC_BPP_AB30 = 32,
FOURCC_BPP_AR64 = 64,
FOURCC_BPP_AB64 = 64,
FOURCC_BPP_24BG = 24,
FOURCC_BPP_RAW = 24,
FOURCC_BPP_RGBP = 16,
FOURCC_BPP_RGBO = 16,
FOURCC_BPP_R444 = 16,
FOURCC_BPP_RGGB = 8,
FOURCC_BPP_BGGR = 8,
FOURCC_BPP_GRBG = 8,
FOURCC_BPP_GBRG = 8,
FOURCC_BPP_YV12 = 12,
FOURCC_BPP_YV16 = 16,
FOURCC_BPP_YV24 = 24,
FOURCC_BPP_YU12 = 12,
FOURCC_BPP_J420 = 12,
FOURCC_BPP_J400 = 8,
FOURCC_BPP_H420 = 12,
FOURCC_BPP_H422 = 16,
FOURCC_BPP_I010 = 15,
FOURCC_BPP_I210 = 20,
FOURCC_BPP_H010 = 15,
FOURCC_BPP_H210 = 20,
FOURCC_BPP_P010 = 15,
FOURCC_BPP_P210 = 20,
FOURCC_BPP_MJPG = 0, // 0 means unknown.
FOURCC_BPP_H264 = 0,
FOURCC_BPP_IYUV = 12,
FOURCC_BPP_YU16 = 16,
FOURCC_BPP_YU24 = 24,
FOURCC_BPP_YUYV = 16,
FOURCC_BPP_YUVS = 16,
FOURCC_BPP_HDYC = 16,
FOURCC_BPP_2VUY = 16,
FOURCC_BPP_JPEG = 1,
FOURCC_BPP_DMB1 = 1,
FOURCC_BPP_BA81 = 8,
FOURCC_BPP_RGB3 = 24,
FOURCC_BPP_BGR3 = 24,
FOURCC_BPP_CM32 = 32,
FOURCC_BPP_CM24 = 24,
// Match any fourcc.
FOURCC_BPP_ANY = 0, // 0 means unknown.
};
// Converts fourcc aliases into canonical ones.
LIBYUV_API uint32_t CanonicalFourCC(uint32_t fourcc);
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
#endif // INCLUDE_LIBYUV_VIDEO_COMMON_H_
...@@ -62,6 +62,7 @@ public: ...@@ -62,6 +62,7 @@ public:
void SetSendSdiParams(SendSdiParams params); void SetSendSdiParams(SendSdiParams params);
void ClearFrontQueue(); void ClearFrontQueue();
void AddPicFrame(std::shared_ptr<VideoFrameWithMask> frame); void AddPicFrame(std::shared_ptr<VideoFrameWithMask> frame);
void GetInOutVideoParams(BmdVideoParams& input_params,BmdVideoParams& output_params);
public slots: public slots:
void AddAudioFrame(std::shared_ptr<AudioPacket> audio_packet); void AddAudioFrame(std::shared_ptr<AudioPacket> audio_packet);
void AddFrame(std::shared_ptr<Image> image); void AddFrame(std::shared_ptr<Image> image);
...@@ -134,6 +135,8 @@ private: ...@@ -134,6 +135,8 @@ private:
qint32 queue_max_size; qint32 queue_max_size;
BMDDisplayMode outputDisplayMode; BMDDisplayMode outputDisplayMode;
BMDDisplayMode getOutputDisplayMode() { return outputDisplayMode; }
int m_fps; int m_fps;
uint64_t m_lastRecvTS; uint64_t m_lastRecvTS;
...@@ -162,4 +165,8 @@ private: ...@@ -162,4 +165,8 @@ private:
qint64 last_start_tm{0}; qint64 last_start_tm{0};
SafeMap<qint64, std::shared_ptr<VideoFrameWithMask>> output_video_frame_map; SafeMap<qint64, std::shared_ptr<VideoFrameWithMask>> output_video_frame_map;
BmdVideoParams output_params;
BmdVideoParams input_params;
}; };
...@@ -8,8 +8,11 @@ ...@@ -8,8 +8,11 @@
#include "ZoomThread.h" #include "ZoomThread.h"
#include "Utils/Algorithm.h" #include "Utils/Algorithm.h"
#include "CropThread.h" #include "CropThread.h"
#include "VideoScaleThread.h"
class ProcessMaskThread : public QThread, public CropThread::Listener class ProcessMaskThread : public QThread,
public CropThread::Listener,
public VideoScaleThread::Listener
{ {
Q_OBJECT Q_OBJECT
public: public:
...@@ -22,12 +25,14 @@ public slots: ...@@ -22,12 +25,14 @@ public slots:
signals: signals:
void PushFrame(std::shared_ptr<VideoFrameWithMask> image); void PushFrame(std::shared_ptr<VideoFrameWithMask> image);
void PushFrameToReplay(std::shared_ptr<VideoFrameWithMask> image); void PushFrameToReplay(std::shared_ptr<VideoFrameWithMask> image);
void PushScaleFrameToReplay(std::shared_ptr<VideoFrameWithMask> image);
public: public:
void ClearQueue(); void ClearQueue();
void SetRecordStore(RecordStore* store) { p_store = store; } void SetRecordStore(RecordStore* store) { p_store = store; }
void SetZoomThread(std::shared_ptr<ZoomThread> thread) { zoom_thread = thread; } void SetZoomThread(std::shared_ptr<ZoomThread> thread) { zoom_thread = thread; }
void StartRecord(const uint64_t& start_time, const uint64_t& end_time); void StartRecord(const uint64_t& start_time, const uint64_t& end_time);
CropThread* GetCropThread(); CropThread* GetCropThread();
void SetBmdInOutVideoParams(const BmdVideoParams& input, const BmdVideoParams& output);
protected: protected:
enum MaskStatus enum MaskStatus
...@@ -38,6 +43,7 @@ protected: ...@@ -38,6 +43,7 @@ protected:
}; };
private: private:
void OnRecvViedoFrame(std::shared_ptr<VideoFrameWithMask>) override; void OnRecvViedoFrame(std::shared_ptr<VideoFrameWithMask>) override;
void OnRecvScaleViedoFrame(std::shared_ptr<VideoFrameWithMask>) override;
void OnChange() override; void OnChange() override;
void run() override; void run() override;
void process(); void process();
...@@ -80,6 +86,7 @@ private: ...@@ -80,6 +86,7 @@ private:
std::shared_ptr<ZoomThread> zoom_thread{nullptr}; std::shared_ptr<ZoomThread> zoom_thread{nullptr};
std::shared_ptr<CropThread> crop_thread{ nullptr }; std::shared_ptr<CropThread> crop_thread{ nullptr };
std::shared_ptr<VideoScaleThread> scale_thread{ nullptr };
Rect mask_rect{0,0,0,0}; Rect mask_rect{0,0,0,0};
int m_fps{0}; int m_fps{0};
...@@ -100,4 +107,11 @@ private: ...@@ -100,4 +107,11 @@ private:
CropMessage crop_msg; CropMessage crop_msg;
qint32 dynamic_out_num {0}; qint32 dynamic_out_num {0};
bool dynamic_flag{false}; bool dynamic_flag{false};
std::atomic_bool replay_flag_clear { false };
BMDDisplayMode output_display_mode;
BmdVideoParams output_params;
BmdVideoParams input_params;
std::atomic_bool scale_flag{ false };
}; };
\ No newline at end of file
...@@ -6,8 +6,9 @@ ...@@ -6,8 +6,9 @@
#include <queue> #include <queue>
#include "Utils/SafeMap.h" #include "Utils/SafeMap.h"
#include "CropThread.h" #include "CropThread.h"
#include "VideoScaleThread.h"
class ReplayThread : public QThread,public CropThread::Listener class ReplayThread : public QThread,public CropThread::Listener, public VideoScaleThread::Listener
{ {
Q_OBJECT Q_OBJECT
public: public:
...@@ -16,6 +17,7 @@ public: ...@@ -16,6 +17,7 @@ public:
public slots: public slots:
void addFrame(std::shared_ptr<videoFrameData> frameData); void addFrame(std::shared_ptr<videoFrameData> frameData);
void addCropFrame(std::shared_ptr<VideoFrameWithMask> frame); void addCropFrame(std::shared_ptr<VideoFrameWithMask> frame);
void addScaleFrame(std::shared_ptr<VideoFrameWithMask> frame);
signals: signals:
//void PushFrame(std::shared_ptr<Image> image); //void PushFrame(std::shared_ptr<Image> image);
void PushFrame(std::shared_ptr<videoFrameData> frameData); void PushFrame(std::shared_ptr<videoFrameData> frameData);
...@@ -24,10 +26,12 @@ public: ...@@ -24,10 +26,12 @@ public:
bool CanReplay(const ReplayParams& params); bool CanReplay(const ReplayParams& params);
void recvReplayParams(const ReplayParams& params); void recvReplayParams(const ReplayParams& params);
CropThread* GetCropThread(); CropThread* GetCropThread();
void SetBmdInOutVideoParams(const BmdVideoParams& input, const BmdVideoParams& output);
protected: protected:
void run() override; void run() override;
private: private:
void OnRecvViedoFrame(std::shared_ptr<VideoFrameWithMask> crop_frame) override; void OnRecvViedoFrame(std::shared_ptr<VideoFrameWithMask> crop_frame) override;
void OnRecvScaleViedoFrame(std::shared_ptr<VideoFrameWithMask> scale_frame) override;
void OnChange() override; void OnChange() override;
void SendFrameFunc(); void SendFrameFunc();
...@@ -45,6 +49,7 @@ private: ...@@ -45,6 +49,7 @@ private:
/*std::queue<std::shared_ptr<videoFrameData>> replayVideoQueue1; /*std::queue<std::shared_ptr<videoFrameData>> replayVideoQueue1;
std::queue<std::shared_ptr<videoFrameData>> replayVideoQueue2;*/ std::queue<std::shared_ptr<videoFrameData>> replayVideoQueue2;*/
uint32_t max_store_size; uint32_t max_store_size;
uint32_t max_store_scale_size;
uint32_t max_store_crop_size; uint32_t max_store_crop_size;
uint32_t max_store_back_size; uint32_t max_store_back_size;
std::atomic_bool replay_flag{ false }; std::atomic_bool replay_flag{ false };
...@@ -57,7 +62,16 @@ private: ...@@ -57,7 +62,16 @@ private:
bool send_err_flag {false}; bool send_err_flag {false};
std::shared_ptr<CropThread> crop_thread{ nullptr }; std::shared_ptr<CropThread> crop_thread{ nullptr };
std::shared_ptr<VideoScaleThread> scale_thread{ nullptr };
SafeMap<qint64, std::shared_ptr<VideoFrameWithMask>> storeCropMap; SafeMap<qint64, std::shared_ptr<VideoFrameWithMask>> storeCropMap;
SafeMap<qint64, std::shared_ptr<VideoFrameWithMask>> storeScaleMap;
SampleQueue<std::shared_ptr<videoFrameData>> needCropQueue; SampleQueue<std::shared_ptr<videoFrameData>> needCropQueue;
std::thread send_frame_thread; std::thread send_frame_thread;
BMDDisplayMode output_display_mode;
BmdVideoParams output_params;
BmdVideoParams input_params;
std::atomic_bool scale_flag{ false };
}; };
\ No newline at end of file
#pragma once
#include <QThread>
#include "Utils/SampleQueue.h"
class VideoScaleThread : public QThread
{
Q_OBJECT
public:
class Listener
{
public:
virtual void OnRecvScaleViedoFrame(std::shared_ptr<VideoFrameWithMask>) = 0;
};
public:
VideoScaleThread(Listener* listener);
~VideoScaleThread();
public:
void addVideoFrame(std::shared_ptr<VideoFrameWithMask> frame);
void addScaleMsg(const ScaleMsg& msg);
protected:
void run() override;
private:
void scale();
private:
SampleQueue<std::shared_ptr<VideoFrameWithMask>> video_queue;
SampleQueue<ScaleMsg> scale_msg_queue;
Listener* p_listener{NULL};
uint8_t* uyvy_to_i422{ NULL };
uint8_t* i422_scale{NULL};
qint32 i422_width{K4WIDTH};
qint32 i422_height{K4HEIGHT};
ScaleMsg scale_msg;
};
\ No newline at end of file
...@@ -30,6 +30,12 @@ ...@@ -30,6 +30,12 @@
#define K4WIDTH 3840 #define K4WIDTH 3840
#define K4HEIGHT 2160 #define K4HEIGHT 2160
#define HDWIDHT 1920
#define HDHEIGHT 1080
#define FRAME50P 50
#define FRAME50I 25
#define FRAME60P 60
enum ReplayStatus enum ReplayStatus
{ {
...@@ -75,6 +81,14 @@ enum CropDirection ...@@ -75,6 +81,14 @@ enum CropDirection
CD_RIGHT_LOW, CD_RIGHT_LOW,
}; };
enum OutputFormat
{
OF_SAME_INPUT = 0,
OF_1080_50I,
OF_1080_50P,
OF_2160_50P
};
long GetRowBytesFromPixelFormat(long width, BMDPixelFormat pixelFormat); long GetRowBytesFromPixelFormat(long width, BMDPixelFormat pixelFormat);
static QString GetCurrDateTimeStr() static QString GetCurrDateTimeStr()
...@@ -105,6 +119,73 @@ struct SendSdiParams ...@@ -105,6 +119,73 @@ struct SendSdiParams
qint64 start_time; qint64 start_time;
}; };
struct BmdVideoParams
{
qint32 width{0};
qint32 height{0};
qint32 frame{0};
};
//void OutputFmtToOutParams(BMDDisplayMode mode, OutputParams& params)
//{
// switch (mode)
// {
// case bmdModeHD1080p50:
// params.width = HDWIDHT;
// params.height = HDHEIGHT;
// params.frame = FRAME50P;
// break;
// case bmdModeHD1080i50:
// params.width = HDWIDHT;
// params.height = HDHEIGHT;
// params.frame = FRAME50I;
// break;
// case bmdMode4K2160p50:
// params.width = K4WIDTH;
// params.height = K4HEIGHT;
// params.frame = FRAME50P;
// break;
// case bmdMode4K2160p5994:
// case bmdMode4K2160p60:
// params.width = K4WIDTH;
// params.height = K4HEIGHT;
// params.frame = FRAME60P;
// break;
// default:
// break;
// }
//}
static void BmdDisplayModeToBmdVideoParams(BMDDisplayMode mode, BmdVideoParams& params)
{
switch (mode)
{
case bmdModeHD1080p50:
params.width = HDWIDHT;
params.height = HDHEIGHT;
params.frame = FRAME50P;
break;
case bmdModeHD1080i50:
params.width = HDWIDHT;
params.height = HDHEIGHT;
params.frame = FRAME50I;
break;
case bmdMode4K2160p50:
params.width = K4WIDTH;
params.height = K4HEIGHT;
params.frame = FRAME50P;
break;
case bmdMode4K2160p5994:
case bmdMode4K2160p60:
params.width = K4WIDTH;
params.height = K4HEIGHT;
params.frame = FRAME60P;
break;
default:
break;
}
}
struct HDRMetadata { struct HDRMetadata {
INT64 EOTF; INT64 EOTF;
...@@ -125,6 +206,12 @@ struct HDRMetadata { ...@@ -125,6 +206,12 @@ struct HDRMetadata {
enum class EOTF { SDR = 0, HDR = 1, PQ = 2, HLG = 3 }; enum class EOTF { SDR = 0, HDR = 1, PQ = 2, HLG = 3 };
struct ScaleMsg
{
int32_t scale_width{0};
int32_t scale_height{0};
};
class CropMessage class CropMessage
{ {
public: public:
......
...@@ -70,6 +70,7 @@ public: ...@@ -70,6 +70,7 @@ public:
static CropMessage CropMsg; static CropMessage CropMsg;
/*static uint8_t* PicData; /*static uint8_t* PicData;
static std::atomic_bool PicFlag;*/ static std::atomic_bool PicFlag;*/
}; };
......
...@@ -16,6 +16,50 @@ protected: ...@@ -16,6 +16,50 @@ protected:
size_t h; size_t h;
}; };
public: public:
static void UYVYToARGB4KWithMatrix(uint8_t* src, const size_t& src_step, uint8_t* dst, const size_t& dst_step, const uint32_t& width, const uint32_t& height,const BMDColorspace& space)
{
uint8_t* src1 = src;
uint8_t* src2 = src1 + src_step;
uint8_t* src3 = src2 + src_step;
uint8_t* src4 = src3 + src_step;
uint8_t* dst1 = dst;
uint8_t* dst2 = dst1 + dst_step;
uint8_t* dst3 = dst2 + dst_step;
uint8_t* dst4 = dst3 + dst_step;
{
switch (space)
{
case bmdColorspaceRec601:
libyuv::UYVYToARGBMatrix(src1, width << 1, dst1, width << 2, &libyuv::kYuvI601Constants, width, height);
libyuv::UYVYToARGBMatrix(src1, width << 1, dst1, width << 2, &libyuv::kYuvI601Constants, width, height);
libyuv::UYVYToARGBMatrix(src1, width << 1, dst1, width << 2, &libyuv::kYuvI601Constants, width, height);
libyuv::UYVYToARGBMatrix(src1, width << 1, dst1, width << 2, &libyuv::kYuvI601Constants, width, height);
break;
case bmdColorspaceRec709:
libyuv::UYVYToARGBMatrix(src1, width << 1, dst1, width << 2, &libyuv::kYuvH709Constants, width, height);
libyuv::UYVYToARGBMatrix(src1, width << 1, dst1, width << 2, &libyuv::kYuvH709Constants, width, height);
libyuv::UYVYToARGBMatrix(src1, width << 1, dst1, width << 2, &libyuv::kYuvH709Constants, width, height);
libyuv::UYVYToARGBMatrix(src1, width << 1, dst1, width << 2, &libyuv::kYuvH709Constants, width, height);
break;
case bmdColorspaceRec2020:
libyuv::UYVYToARGBMatrix(src1, width << 1, dst1, width << 2, &libyuv::kYuv2020Constants, width, height);
libyuv::UYVYToARGBMatrix(src1, width << 1, dst1, width << 2, &libyuv::kYuv2020Constants, width, height);
libyuv::UYVYToARGBMatrix(src1, width << 1, dst1, width << 2, &libyuv::kYuv2020Constants, width, height);
libyuv::UYVYToARGBMatrix(src1, width << 1, dst1, width << 2, &libyuv::kYuv2020Constants, width, height);
break;
default:
break;
}
/*libyuv::UYVYToARGB(src1, width << 1, dst1, width << 2, width, height);
libyuv::UYVYToARGB(src2, width << 1, dst2, width << 2, width, height);
libyuv::UYVYToARGB(src3, width << 1, dst3, width << 2, width, height);
libyuv::UYVYToARGB(src4, width << 1, dst4, width << 2, width, height);*/
}
}
static void UYVYToARGB4K(uint8_t* src,const size_t& src_step,uint8_t* dst,const size_t& dst_step,const uint32_t& width,const uint32_t& height) static void UYVYToARGB4K(uint8_t* src,const size_t& src_step,uint8_t* dst,const size_t& dst_step,const uint32_t& width,const uint32_t& height)
{ {
uint8_t* src1 = src; uint8_t* src1 = src;
...@@ -159,6 +203,104 @@ public: ...@@ -159,6 +203,104 @@ public:
} }
static void I422Scale(uint8_t* src, const size_t& src_w, const size_t& src_h, uint8_t* dst, const size_t& dst_w, const size_t& dst_h, const int32_t& omp, int32_t filter = 2)
{
size_t half_src_w = (src_w >> 1);
size_t half_src_h = (src_h >> 1);
size_t half_dst_w = (dst_w >> 1);
size_t half_dst_h = (dst_h >> 1);
uint32_t src_y_size = src_w * src_h;
uint32_t src_u_size = (src_w * src_h >> 1);
size_t src_mode_h = src_h % 2;
size_t src_mode_w = src_w % 2;
uint8_t* src_y = src;
uint8_t* src_u = src_y + src_y_size;
uint8_t* src_v = src_u + src_u_size;
uint8_t* src_y_1 = src_y;
uint8_t* src_y_2 = src_y + half_src_w;
uint8_t* src_y_3 = src_y + src_w * half_src_h;
uint8_t* src_y_4 = src_y + src_w * half_src_h + half_src_w;
uint8_t* src_u_1 = src_u;
uint8_t* src_u_2 = src_u + (half_src_w >> 1);
uint8_t* src_u_3 = src_u + (src_w * half_src_h >> 1);
uint8_t* src_u_4 = src_u + (src_w * half_src_h >> 1) + (half_src_w >> 1);
uint8_t* src_v_1 = src_v;
uint8_t* src_v_2 = src_v + (half_src_w >> 1);
uint8_t* src_v_3 = src_v + (src_w * half_src_h >> 1);
uint8_t* src_v_4 = src_v + (half_src_w >> 1) + (src_w * half_src_h >> 1);
uint32_t dst_y_size = dst_w * dst_h;
uint32_t dst_u_size = (dst_w * dst_h >> 1);
uint8_t* dst_y = dst;
uint8_t* dst_u = dst_y + dst_y_size;
uint8_t* dst_v = dst_u + dst_u_size;
uint8_t* dst_y_1 = dst_y;
uint8_t* dst_y_2 = dst_y + half_dst_w;
uint8_t* dst_y_3 = dst_y + dst_w * half_dst_h;
uint8_t* dst_y_4 = dst_y + dst_w * half_dst_h + half_dst_w;
uint8_t* dst_u_1 = dst_u;
uint8_t* dst_u_2 = dst_u + (half_dst_w >> 1);
uint8_t* dst_u_3 = dst_u + (dst_w * half_dst_h >> 1);
uint8_t* dst_u_4 = dst_u + (dst_w * half_dst_h >> 1) + (half_dst_w >> 1);
uint8_t* dst_v_1 = dst_v;
uint8_t* dst_v_2 = dst_v + (half_dst_w >> 1);
uint8_t* dst_v_3 = dst_v + (dst_w * half_dst_h >> 1);
uint8_t* dst_v_4 = dst_v + (half_dst_w >> 1) + (dst_w * half_dst_h >> 1);
std::vector<WH> src_wh_vec = { { half_src_w, half_src_h },{half_src_w + src_mode_w,half_src_h},{half_src_w,half_src_h + src_mode_h},{half_src_w + src_mode_w,half_src_h + src_mode_h} };
if (omp)
{
uint8_t* src_data[4][3] = { {src_y_1,src_u_1,src_v_1},{src_y_2,src_u_2,src_v_2},{src_y_3,src_u_3,src_v_3},{src_y_4,src_u_4,src_v_4} };
uint8_t* dst_data[4][3] = { {dst_y_1,dst_u_1,dst_v_1},{dst_y_2,dst_u_2,dst_v_2},{dst_y_3,dst_u_3,dst_v_3},{dst_y_4,dst_u_4,dst_v_4} };
int thread_num = 4;
omp_set_num_threads(thread_num);
#pragma omp parallel
{
#pragma omp for
for (int i = 0; i < thread_num; i++)
{
uint8_t* src_y = src_data[i][0];
uint8_t* src_u = src_data[i][1];
uint8_t* src_v = src_data[i][2];
uint8_t* dst_y = dst_data[i][0];
uint8_t* dst_u = dst_data[i][1];
uint8_t* dst_v = dst_data[i][2];
libyuv::I422Scale(src_y, src_w, src_u, src_w >> 1, src_v, src_w >> 1, src_wh_vec[i].w, src_wh_vec[i].h,
dst_y, dst_w, dst_u, dst_w >> 1, dst_v, dst_w >> 1, half_dst_w, half_dst_h, (libyuv::FilterMode)filter);
}
}
}
else
{
{
libyuv::I422Scale(src_y_1, src_w, src_u_1, src_w >> 1, src_v_1, src_w >> 1, src_wh_vec[0].w, src_wh_vec[0].h,
dst_y_1, dst_w, dst_u_1, dst_w >> 1, dst_v_1, dst_w >> 1, half_dst_w, half_dst_h, (libyuv::FilterMode)filter);
libyuv::I422Scale(src_y_2, src_w, src_u_2, src_w >> 1, src_v_2, src_w >> 1, src_wh_vec[1].w, src_wh_vec[1].h,
dst_y_2, dst_w, dst_u_2, dst_w >> 1, dst_v_2, dst_w >> 1, half_dst_w, half_dst_h, (libyuv::FilterMode)filter);
libyuv::I422Scale(src_y_3, src_w, src_u_3, src_w >> 1, src_v_3, src_w >> 1, src_wh_vec[2].w, src_wh_vec[2].h,
dst_y_3, dst_w, dst_u_3, dst_w >> 1, dst_v_3, dst_w >> 1, half_dst_w, half_dst_h, (libyuv::FilterMode)filter);
libyuv::I422Scale(src_y_4, src_w, src_u_4, src_w >> 1, src_v_4, src_w >> 1, src_wh_vec[3].w, src_wh_vec[3].h,
dst_y_4, dst_w, dst_u_4, dst_w >> 1, dst_v_4, dst_w >> 1, half_dst_w, half_dst_h, (libyuv::FilterMode)filter);
}
}
}
static void I422Scale4K(uint8_t* src, const size_t& src_w, const size_t& src_h, uint8_t* dst, const size_t& dst_w, const size_t& dst_h, const int32_t& omp, int32_t filter = 2) static void I422Scale4K(uint8_t* src, const size_t& src_w, const size_t& src_h, uint8_t* dst, const size_t& dst_w, const size_t& dst_h, const int32_t& omp, int32_t filter = 2)
{ {
size_t half_src_w = (src_w >> 1); size_t half_src_w = (src_w >> 1);
......
...@@ -217,6 +217,7 @@ HRESULT DeckLinkInputDevice::VideoInputFormatChanged(BMDVideoInputFormatChangedE ...@@ -217,6 +217,7 @@ HRESULT DeckLinkInputDevice::VideoInputFormatChanged(BMDVideoInputFormatChangedE
else else
// Invalid color depth for YUV // Invalid color depth for YUV
return E_INVALIDARG; return E_INVALIDARG;
pixelFormat = bmdFormat8BitYUV;
} }
else else
// Unexpected detected video input format flags // Unexpected detected video input format flags
...@@ -230,7 +231,7 @@ HRESULT DeckLinkInputDevice::VideoInputFormatChanged(BMDVideoInputFormatChangedE ...@@ -230,7 +231,7 @@ HRESULT DeckLinkInputDevice::VideoInputFormatChanged(BMDVideoInputFormatChangedE
// Set the video input mode // Set the video input mode
//pixelFormat = bmdFormat8BitYUV; //pixelFormat = bmdFormat8BitYUV;
result = DeckLinkInput->EnableVideoInput(displayMode, pixelFormat, bmdVideoInputEnableFormatDetection); result = DeckLinkInput->EnableVideoInput(displayMode, pixelFormat, bmdVideoInputFlagDefault);
if (result == S_OK) if (result == S_OK)
// Start the capture // Start the capture
...@@ -297,7 +298,7 @@ bool DeckLinkInputDevice::StartCapture(BMDDisplayMode displayMode, IDeckLinkScre ...@@ -297,7 +298,7 @@ bool DeckLinkInputDevice::StartCapture(BMDDisplayMode displayMode, IDeckLinkScre
DeckLinkInput->SetCallback(this); DeckLinkInput->SetCallback(this);
// Set the video input mode // Set the video input mode
if (DeckLinkInput->EnableVideoInput(bmdMode4K2160p50, bmdFormat8BitYUV, bmdVideoInputFlagDefault) != S_OK) if (DeckLinkInput->EnableVideoInput(displayMode, bmdFormat8BitYUV, videoInputFlags) != S_OK)
return false; return false;
if (DeckLinkInput->EnableAudioInput(bmdAudioSampleRate48kHz, bmdAudioSampleType32bitInteger, Settings::AudioChannel) != S_OK) if (DeckLinkInput->EnableAudioInput(bmdAudioSampleRate48kHz, bmdAudioSampleType32bitInteger, Settings::AudioChannel) != S_OK)
......
...@@ -158,7 +158,7 @@ void DeckLinkInputPage::StartCapture() ...@@ -158,7 +158,7 @@ void DeckLinkInputPage::StartCapture()
if (!SelectedDevice) if (!SelectedDevice)
return; return;
BMDDisplayMode displayMode = bmdModeUnknown; BMDDisplayMode displayMode = bmdModeNTSC;
bool applyDetectedInputMode = AutoDetectCheckBox->isChecked(); bool applyDetectedInputMode = AutoDetectCheckBox->isChecked();
displayMode = (BMDDisplayMode)VideoFormatCombo->currentData().value<unsigned int>(); displayMode = (BMDDisplayMode)VideoFormatCombo->currentData().value<unsigned int>();
......
...@@ -18,7 +18,7 @@ ...@@ -18,7 +18,7 @@
//extern int SdiOutWaitNums; //extern int SdiOutWaitNums;
//extern std::map<qint32, qint32> map_output_delay; //extern std::map<qint32, qint32> map_output_delay;
#define OUTPUT_1080 1 //#define OUTPUT_1080 1
#define AUDIOMAXSIZE (OutputDeleyTime + 3) * 50 #define AUDIOMAXSIZE (OutputDeleyTime + 3) * 50
#define TESTWRITEFILE 0 #define TESTWRITEFILE 0
#define USETHREADS 1 #define USETHREADS 1
...@@ -148,25 +148,38 @@ HRESULT DeckLinkOutputDevice::ScheduledPlaybackHasStopped() ...@@ -148,25 +148,38 @@ HRESULT DeckLinkOutputDevice::ScheduledPlaybackHasStopped()
return S_OK; return S_OK;
} }
void DeckLinkOutputDevice::GetInOutVideoParams(BmdVideoParams& input_params, BmdVideoParams& output_params)
{
input_params = this->input_params;
output_params = this->output_params;
}
bool DeckLinkOutputDevice::StartPlayback(BMDDisplayMode displayMode, bool enable3D, BMDPixelFormat pixelFormat, bool requireReferenceLocked, IDeckLinkScreenPreviewCallback* screenPreviewCallback) bool DeckLinkOutputDevice::StartPlayback(BMDDisplayMode displayMode, bool enable3D, BMDPixelFormat pixelFormat, bool requireReferenceLocked, IDeckLinkScreenPreviewCallback* screenPreviewCallback)
{ {
BMDDisplayMode outputDisplayMode; //BMDDisplayMode outputDisplayMode;
#if OUTPUT_1080
switch (Settings::OutputPlayMode) switch (Settings::OutputPlayMode)
{ {
case 1: case OF_SAME_INPUT:
outputDisplayMode = displayMode;
break;
case OF_1080_50I:
outputDisplayMode = bmdModeHD1080i50; outputDisplayMode = bmdModeHD1080i50;
break; break;
case OF_1080_50P:
outputDisplayMode = bmdModeHD1080p50;
break;
case OF_2160_50P:
outputDisplayMode = bmdMode4K2160p50;
break;
default: default:
//outputDisplayMode = bmdModeHD1080p25; //outputDisplayMode = bmdModeHD1080p25;
outputDisplayMode = displayMode; outputDisplayMode = displayMode;
break; break;
} }
#else
outputDisplayMode = BMDDisplayMode::bmdModeHD720p50;
#endif
BmdDisplayModeToBmdVideoParams(outputDisplayMode, output_params);
BmdDisplayModeToBmdVideoParams(displayMode, input_params);
// Pass through RP188 timecode and VANC from input frame. VITC timecode is forwarded with VANC // Pass through RP188 timecode and VANC from input frame. VITC timecode is forwarded with VANC
BMDVideoOutputFlags outputFlags = (BMDVideoOutputFlags)(bmdVideoOutputRP188 | bmdVideoOutputVANC); BMDVideoOutputFlags outputFlags = (BMDVideoOutputFlags)(bmdVideoOutputRP188 | bmdVideoOutputVANC);
...@@ -188,10 +201,10 @@ bool DeckLinkOutputDevice::StartPlayback(BMDDisplayMode displayMode, bool enable ...@@ -188,10 +201,10 @@ bool DeckLinkOutputDevice::StartPlayback(BMDDisplayMode displayMode, bool enable
return false; return false;
} }
if (outputDisplayMode == bmdModeHD1080i50) /*if (outputDisplayMode == bmdModeHD1080i50)
qDebug() << "output display HD1080i50"; qDebug() << "output display HD1080i50";
else if (outputDisplayMode == bmdModeHD1080p25) else if (outputDisplayMode == bmdModeHD1080p25)
qDebug() << "output display HD1080p25"; qDebug() << "output display HD1080p25";*/
if (deckLinkOutput->GetDisplayMode(outputDisplayMode, deckLinkDisplayMode.ReleaseAndGetAddressOf()) != S_OK) if (deckLinkOutput->GetDisplayMode(outputDisplayMode, deckLinkDisplayMode.ReleaseAndGetAddressOf()) != S_OK)
{ {
......
...@@ -309,6 +309,7 @@ void DeckLinkOutputPage::RequestedDeviceGranted(ComPtr<IDeckLink>& device) ...@@ -309,6 +309,7 @@ void DeckLinkOutputPage::RequestedDeviceGranted(ComPtr<IDeckLink>& device)
//connect(this, &DeckLinkOutputPage::PushMask, ProcessMask.get(), &ProcessMaskThread::addMaskBuffer, Qt::DirectConnection); //connect(this, &DeckLinkOutputPage::PushMask, ProcessMask.get(), &ProcessMaskThread::addMaskBuffer, Qt::DirectConnection);
connect(ProcessMask.get(), &ProcessMaskThread::PushFrame, SelectedDevice.Get(), &DeckLinkOutputDevice::AddVideoFrameMask, Qt::DirectConnection); connect(ProcessMask.get(), &ProcessMaskThread::PushFrame, SelectedDevice.Get(), &DeckLinkOutputDevice::AddVideoFrameMask, Qt::DirectConnection);
connect(ProcessMask.get(), &ProcessMaskThread::PushFrameToReplay, BindingInputPage->GetReplay(), &ReplayThread::addCropFrame, Qt::DirectConnection); connect(ProcessMask.get(), &ProcessMaskThread::PushFrameToReplay, BindingInputPage->GetReplay(), &ReplayThread::addCropFrame, Qt::DirectConnection);
connect(ProcessMask.get(), &ProcessMaskThread::PushScaleFrameToReplay, BindingInputPage->GetReplay(), &ReplayThread::addScaleFrame, Qt::DirectConnection);
if (Zoom) if (Zoom)
{ {
...@@ -352,11 +353,11 @@ void DeckLinkOutputPage::RequestedDeviceGranted(ComPtr<IDeckLink>& device) ...@@ -352,11 +353,11 @@ void DeckLinkOutputPage::RequestedDeviceGranted(ComPtr<IDeckLink>& device)
NDIOutput->start(); NDIOutput->start();
} }
StartOutput(); StartOutput();
#if USE_TIMEPLUS //#if USE_TIMEPLUS
SelectedDevice->StartPlayback(bmdMode4K2160p50, false, bmdFormat8BitYUV, false, PreviewView->GetDelegate()); // SelectedDevice->StartPlayback(bmdMode4K2160p50, false, bmdFormat8BitYUV, false, PreviewView->GetDelegate());
#elif USE_H2V //#elif USE_H2V
SelectedDevice->StartPlayback(bmdModeHD1080p50, false, bmdFormat10BitYUV, false, PreviewView->GetDelegate()); // SelectedDevice->StartPlayback(bmdModeHD1080p50, false, bmdFormat10BitYUV, false, PreviewView->GetDelegate());
#endif // USE_TIMEPLUS //#endif // USE_TIMEPLUS
...@@ -488,10 +489,16 @@ void DeckLinkOutputPage::FormatChanged(BMDDisplayMode displayMode) ...@@ -488,10 +489,16 @@ void DeckLinkOutputPage::FormatChanged(BMDDisplayMode displayMode)
if (firstFmtChanged) firstFmtChanged = false; if (firstFmtChanged) firstFmtChanged = false;
DisplayMode = displayMode; DisplayMode = displayMode;
//BMDPixelFormat pixelFormat = bmdFormat10BitYUV; //BMDPixelFormat pixelFormat = bmdFormat10BitYUV;
PixelFormat = bmdFormat10BitYUV; PixelFormat = bmdFormat8BitYUV;
SelectedDevice->StartPlayback(DisplayMode, false, PixelFormat, false, PreviewView->GetDelegate()); SelectedDevice->StartPlayback(DisplayMode, false, PixelFormat, false, PreviewView->GetDelegate());
BmdVideoParams input_params, output_params;
SelectedDevice->GetInOutVideoParams(input_params, output_params);
if (ProcessMask) ProcessMask->SetBmdInOutVideoParams(input_params,output_params);
if (BindingInputPage->GetReplay()) BindingInputPage->GetReplay()->SetBmdInOutVideoParams(input_params, output_params);
int indexToSelect = DeviceListCombo->findData(QVariant::fromValue((void*)SelectedDevice->GetDeckLinkInstance().Get())); int indexToSelect = DeviceListCombo->findData(QVariant::fromValue((void*)SelectedDevice->GetDeckLinkInstance().Get()));
bool active = SelectedDevice->isPlaybackActive(); bool active = SelectedDevice->isPlaybackActive();
QToolBox* toolBox = findParent<QToolBox>(this); QToolBox* toolBox = findParent<QToolBox>(this);
......
...@@ -24,16 +24,17 @@ void COpenFile::doOpenFile() ...@@ -24,16 +24,17 @@ void COpenFile::doOpenFile()
qDebug() << "read pic fail........." << "\n"; qDebug() << "read pic fail........." << "\n";
return; return;
} }
if (bk_mat.cols != K4WIDTH || bk_mat.rows != K4HEIGHT) return; //if (bk_mat.cols != K4WIDTH || bk_mat.rows != K4HEIGHT) return;
qint32 w = bk_mat.cols, h = bk_mat.rows;
//cv::cvtColor(bk_mat, bk_mat, cv::COLOR_RGB2RGBA); //cv::cvtColor(bk_mat, bk_mat, cv::COLOR_RGB2RGBA);
emit tellFilePath(path); emit tellFilePath(path);
qint32 size = K4WIDTH * K4HEIGHT << 1; qint32 size = w * h << 1;
uint8_t* data = new uint8_t[K4WIDTH * K4HEIGHT << 1]; uint8_t* data = new uint8_t[w * h << 1];
cv::cvtColor(bk_mat, bk_mat, cv::COLOR_BGR2BGRA); cv::cvtColor(bk_mat, bk_mat, cv::COLOR_BGR2BGRA);
Yuv4k::ARGBToUYVY4K(bk_mat.data, K4WIDTH * K4HEIGHT, data, K4WIDTH * K4HEIGHT >> 1, K4WIDTH >> 1, K4HEIGHT >> 1); Yuv4k::ARGBToUYVY4K(bk_mat.data, w * h, data, w * h >> 1, w >> 1, h >> 1);
std::shared_ptr<VideoFrameWithMask> frame = std::make_shared<VideoFrameWithMask>(K4WIDTH,K4HEIGHT, data,bmdFormat8BitYUV, BS_IDEL, kDefaultHLGBT2020HDRMetadata); std::shared_ptr<VideoFrameWithMask> frame = std::make_shared<VideoFrameWithMask>(w,h, data,bmdFormat8BitYUV, BS_IDEL, kDefaultHLGBT2020HDRMetadata);
emit SendPicData(frame); emit SendPicData(frame);
} }
} }
\ No newline at end of file
...@@ -220,6 +220,16 @@ void NDIOutputThread::run() ...@@ -220,6 +220,16 @@ void NDIOutputThread::run()
VideoMaskQueue.Pop(frame_mask); VideoMaskQueue.Pop(frame_mask);
if (!frame_mask || (!frame_mask->data_ && !frame_mask->pImage && !frame_mask->pImage->data)) continue; if (!frame_mask || (!frame_mask->data_ && !frame_mask->pImage && !frame_mask->pImage->data)) continue;
if (frame_mask->flag_ == BS_START)
{
if (send_one_frame_flag)
{
send_one_frame_flag = false;
send_count = 0;
end_blend_frame_ = nullptr;
}
}
if (Settings::NdiOneFrameDuration > 0 && frame_mask->flag_ == BS_END) if (Settings::NdiOneFrameDuration > 0 && frame_mask->flag_ == BS_END)
{ {
end_blend_frame_ = frame_mask; end_blend_frame_ = frame_mask;
...@@ -252,6 +262,9 @@ void NDIOutputThread::run() ...@@ -252,6 +262,9 @@ void NDIOutputThread::run()
if (frame_mask->data_) Frame.p_data = frame_mask->data_; if (frame_mask->data_) Frame.p_data = frame_mask->data_;
else Frame.p_data = frame_mask->pImage->uyvy_data; else Frame.p_data = frame_mask->pImage->uyvy_data;
} }
Frame.xres = frame_mask->width_;
Frame.yres = frame_mask->height_;
Frame.line_stride_in_bytes = Frame.xres * 2;
Frame.timestamp = frame_mask->timestamp_; Frame.timestamp = frame_mask->timestamp_;
Frame.timecode = frame_mask->timestamp_ * 1000; Frame.timecode = frame_mask->timestamp_ * 1000;
NDIlib_send_send_video_v2(Instance, &Frame); NDIlib_send_send_video_v2(Instance, &Frame);
......
...@@ -5,11 +5,18 @@ ...@@ -5,11 +5,18 @@
#include "Utils/Memory4k.h" #include "Utils/Memory4k.h"
#include <omp.h> #include <omp.h>
#include "Utils/Settings.h" #include "Utils/Settings.h"
#include "libyuv/convert_argb.h"
#include "libyuv/row.h"
#pragma intrinsic(memcpy) #pragma intrinsic(memcpy)
//extern int RecordFlag; //extern int RecordFlag;
//extern int OpenOMP; //extern int OpenOMP;
//extern "C"
//{
// extern const struct libyuv::YuvConstants libyuv::kYuv2020Constants;
//}
static int64_t GetCurrTimeMS() static int64_t GetCurrTimeMS()
{ {
...@@ -155,7 +162,8 @@ void CaptureThread::run() ...@@ -155,7 +162,8 @@ void CaptureThread::run()
if (!Settings::OpenOMP) if (!Settings::OpenOMP)
{ {
size_t yuv_size = width_ * height_ << 1; size_t yuv_size = width_ * height_ << 1;
Yuv4k::UYVYToARGB4K(videoFrame->uyvy_data, yuv_size, videoFrame->data, once_size, width_, height_); //Yuv4k::UYVYToARGB4K(videoFrame->uyvy_data, yuv_size, videoFrame->data, once_size, width_, height_);
Yuv4k::UYVYToARGB4KWithMatrix(videoFrame->uyvy_data, yuv_size, videoFrame->data, once_size, width_, height_,videoFrame->meta_.colorspace);
} }
else else
{ {
...@@ -175,7 +183,24 @@ void CaptureThread::run() ...@@ -175,7 +183,24 @@ void CaptureThread::run()
auto dst = videoFrame->data + i * once_size; auto dst = videoFrame->data + i * once_size;
auto src = videoFrame->uyvy_data + i * yuv_size; auto src = videoFrame->uyvy_data + i * yuv_size;
switch (videoFrame->meta_.colorspace)
{
case bmdColorspaceRec601:
libyuv::UYVYToARGBMatrix(src, width_ << 1, dst, width_ << 2, &libyuv::kYuvI601Constants, width_, height_);
break;
case bmdColorspaceRec709:
libyuv::UYVYToARGBMatrix(src, width_ << 1, dst, width_ << 2, &libyuv::kYuvI601Constants, width_, height_);
break;
case bmdColorspaceRec2020:
libyuv::UYVYToARGBMatrix(src, width_ << 1, dst, width_ << 2, &libyuv::kYuv2020Constants, width_, height_);
break;
default:
libyuv::UYVYToARGB(src, width_ << 1, dst, width_ << 2, width_, height_); libyuv::UYVYToARGB(src, width_ << 1, dst, width_ << 2, width_, height_);
break;
}
//libyuv::UYVYToARGBMatrix(src, width_ << 1, dst, width_ << 2, &libyuv::kYuvI601Constants, width_, height_);
//libyuv::UYVYToARGB(src, width_ << 1, dst, width_ << 2, width_, height_);
//qDebug() << "get omp thread id:" << omp_get_thread_num() << "\n"; //qDebug() << "get omp thread id:" << omp_get_thread_num() << "\n";
//memcpy(dst, src, once_size); //memcpy(dst, src, once_size);
} }
......
...@@ -103,10 +103,10 @@ void CropThread::CropScale() ...@@ -103,10 +103,10 @@ void CropThread::CropScale()
{ {
} }
else if (p_frame->fmt_ == bmdFormat8BitYUV && (p_frame->data_ || (p_frame->pImage && p_frame->pImage->data))) else if (p_frame->fmt_ == bmdFormat8BitYUV && (p_frame->data_ /*|| (p_frame->pImage && p_frame->pImage->data)*/))
{ {
uyvy_data = (p_frame->data_ ? p_frame->data_ : p_frame->pImage->data); //uyvy_data = (p_frame->data_ ? p_frame->data_ : p_frame->pImage->data);
uyvy_data = p_frame->data_;
Yuv4k::UYVYCopy(uyvy_data, crop_msg.crop_x, crop_msg.crop_y, p_frame->width_, p_frame->height_, uyvy_crop_data, crop_msg.crop_w, crop_msg.crop_h); Yuv4k::UYVYCopy(uyvy_data, crop_msg.crop_x, crop_msg.crop_y, p_frame->width_, p_frame->height_, uyvy_crop_data, crop_msg.crop_w, crop_msg.crop_h);
Yuv4k::UYVYToI422(uyvy_crop_data, i422_crop_data, crop_msg.crop_w, crop_msg.crop_h); Yuv4k::UYVYToI422(uyvy_crop_data, i422_crop_data, crop_msg.crop_w, crop_msg.crop_h);
Yuv4k::I422Scale4K(i422_crop_data, crop_msg.crop_w, crop_msg.crop_h, i422_4k_data, p_frame->width_, p_frame->height_, Settings::ZoomUseOmp); Yuv4k::I422Scale4K(i422_crop_data, crop_msg.crop_w, crop_msg.crop_h, i422_4k_data, p_frame->width_, p_frame->height_, Settings::ZoomUseOmp);
......
...@@ -135,9 +135,27 @@ CropThread* ProcessMaskThread::GetCropThread() ...@@ -135,9 +135,27 @@ CropThread* ProcessMaskThread::GetCropThread()
return crop_thread.get(); return crop_thread.get();
} }
void ProcessMaskThread::SetBmdInOutVideoParams(const BmdVideoParams& input, const BmdVideoParams& output)
{
input_params = input;
output_params = output;
if (input_params.width != output_params.width || input_params.height != output_params.height) {
scale_flag = true;
if (!scale_thread)
{
scale_thread = std::make_shared<VideoScaleThread>(this);
ScaleMsg msg{output.width,output.height};
scale_thread->addScaleMsg(msg);
scale_thread->start();
}
}
else scale_flag = false;
}
void ProcessMaskThread::ClearQueue() void ProcessMaskThread::ClearQueue()
{ {
if(taskImageQueue.Size()) taskImageQueue.Reset(); if(taskImageQueue.Size()) taskImageQueue.Reset();
if (!replay_flag_clear) replay_flag_clear = true;
} }
void ProcessMaskThread::run() void ProcessMaskThread::run()
...@@ -242,6 +260,18 @@ void ProcessMaskThread::OnRecvViedoFrame(std::shared_ptr<VideoFrameWithMask> fra ...@@ -242,6 +260,18 @@ void ProcessMaskThread::OnRecvViedoFrame(std::shared_ptr<VideoFrameWithMask> fra
} }
} }
void ProcessMaskThread::OnRecvScaleViedoFrame(std::shared_ptr<VideoFrameWithMask> frame)
{
if (frame)
{
emit PushFrame(frame);
if (!frame->replay_flag && frame->flag_ == BS_IDEL)
{
emit PushScaleFrameToReplay(frame);
}
}
}
void ProcessMaskThread::CropScaleWithUYVY() void ProcessMaskThread::CropScaleWithUYVY()
{ {
//while (true) //while (true)
...@@ -339,6 +369,21 @@ void ProcessMaskThread::workMaskWithUYVY(const std::shared_ptr<videoFrameData>& ...@@ -339,6 +369,21 @@ void ProcessMaskThread::workMaskWithUYVY(const std::shared_ptr<videoFrameData>&
uint8_t* alpha = NULL; uint8_t* alpha = NULL;
bool err = false; bool err = false;
if (replay_flag_clear)
{
if (dynamic_flag)
{
dynamic_flag = false;
dynamic_out_num = 0;
}
status = BS_IDEL;
memset(tmp_alpha, 0, width * height);
last_masked = nullptr;
if(masked_map.size()) masked_map.clear();
memset(bk_argb, 0, width * height << 2);
replay_flag_clear = false;
}
if (mask_flag && mask_buffer) if (mask_flag && mask_buffer)
{ {
x = mask_buffer->upper_left_point.x; x = mask_buffer->upper_left_point.x;
...@@ -403,6 +448,7 @@ void ProcessMaskThread::workMaskWithUYVY(const std::shared_ptr<videoFrameData>& ...@@ -403,6 +448,7 @@ void ProcessMaskThread::workMaskWithUYVY(const std::shared_ptr<videoFrameData>&
if (dynamic_flag) if (dynamic_flag)
{ {
dynamic_flag = false; dynamic_flag = false;
dynamic_out_num = 0;
status = BS_IDEL; status = BS_IDEL;
memset(tmp_alpha, 0, width * height); memset(tmp_alpha, 0, width * height);
last_masked = nullptr; last_masked = nullptr;
...@@ -491,7 +537,9 @@ void ProcessMaskThread::workMaskWithUYVY(const std::shared_ptr<videoFrameData>& ...@@ -491,7 +537,9 @@ void ProcessMaskThread::workMaskWithUYVY(const std::shared_ptr<videoFrameData>&
libyuv::ARGBAttenuate(crop_buffer_argb, crop_width << 2, crop_buffer_argb, crop_width << 2, crop_width, crop_height); libyuv::ARGBAttenuate(crop_buffer_argb, crop_width << 2, crop_buffer_argb, crop_width << 2, crop_width, crop_height);
libyuv::ARGBBlend(crop_buffer_argb, crop_width << 2, bk_argb + offset, width << 2, bk_argb + offset, width << 2, crop_width, crop_height); libyuv::ARGBBlend(crop_buffer_argb, crop_width << 2, bk_argb + offset, width << 2, bk_argb + offset, width << 2, crop_width, crop_height);
Rect cross_rect; Rect cross_rect;
//outputAlpha2(buffer, last_masked, cross_rect); //outputAlpha2(buffer, last_masked, cross_rect);
outputAlphaRect(buffer, last_rect, cross_rect); outputAlphaRect(buffer, last_rect, cross_rect);
...@@ -590,6 +638,7 @@ void ProcessMaskThread::workMaskWithUYVY(const std::shared_ptr<videoFrameData>& ...@@ -590,6 +638,7 @@ void ProcessMaskThread::workMaskWithUYVY(const std::shared_ptr<videoFrameData>&
libyuv::ARGBCopy(src_bgra + offset_b, width << 2, tmp_bgra, width << 2, mask_rect.width, mask_rect.height); libyuv::ARGBCopy(src_bgra + offset_b, width << 2, tmp_bgra, width << 2, mask_rect.width, mask_rect.height);
libyuv::ARGBBlend(bk_argb + offset_b, width << 2, tmp_bgra, width << 2, tmp_bgra, width << 2, mask_rect.width, mask_rect.height); libyuv::ARGBBlend(bk_argb + offset_b, width << 2, tmp_bgra, width << 2, tmp_bgra, width << 2, mask_rect.width, mask_rect.height);
qint32 x_ = ((mask_rect.x + 1) >> 1 << 1); qint32 x_ = ((mask_rect.x + 1) >> 1 << 1);
qint32 width_ = (x_ == mask_rect.x ? (mask_rect.width >> 1 << 1) : ((mask_rect.width - 1) >> 1 << 1)); qint32 width_ = (x_ == mask_rect.x ? (mask_rect.width >> 1 << 1) : ((mask_rect.width - 1) >> 1 << 1));
qint32 height_ = (mask_rect.height); qint32 height_ = (mask_rect.height);
...@@ -627,6 +676,10 @@ void ProcessMaskThread::workMaskWithUYVY(const std::shared_ptr<videoFrameData>& ...@@ -627,6 +676,10 @@ void ProcessMaskThread::workMaskWithUYVY(const std::shared_ptr<videoFrameData>&
video_frame->replay_flag = replay_flag; video_frame->replay_flag = replay_flag;
//std::shared_ptr<VideoFrameWithMask> video_frame = std::make_shared<VideoFrameWithMask>(pImage, status, start_time); //std::shared_ptr<VideoFrameWithMask> video_frame = std::make_shared<VideoFrameWithMask>(pImage, status, start_time);
if (Settings::CropFlag && /*(crop_msg.crop_x >= 0 || crop_msg.crop_y >= 0) &&*/ crop_thread) crop_thread->addVideoFrame(video_frame); if (Settings::CropFlag && /*(crop_msg.crop_x >= 0 || crop_msg.crop_y >= 0) &&*/ crop_thread) crop_thread->addVideoFrame(video_frame);
else if (scale_flag && scale_thread) {
//if (!scale_thread) scale_thread = std::make_shared<VideoScaleThread>(this);
scale_thread->addVideoFrame(video_frame);
}
else else
{ {
emit PushFrame(video_frame); emit PushFrame(video_frame);
...@@ -676,7 +729,7 @@ void ProcessMaskThread::workMaskWithUYVY(const std::shared_ptr<videoFrameData>& ...@@ -676,7 +729,7 @@ void ProcessMaskThread::workMaskWithUYVY(const std::shared_ptr<videoFrameData>&
else else
{ {
//emit PushFrame(video_frame); //emit PushFrame(video_frame);
if (Settings::CropFlag && /*(crop_msg.crop_x >=0 || crop_msg.crop_y >= 0) &&*/ crop_thread) if ((Settings::CropFlag && crop_thread) /*|| (scale_flag && scale_thread)*/)
{ {
//auto t2 = TimeMilliSecond(); //auto t2 = TimeMilliSecond();
/*size_t dst_argb_size = width * height << 2; /*size_t dst_argb_size = width * height << 2;
...@@ -713,9 +766,15 @@ void ProcessMaskThread::workMaskWithUYVY(const std::shared_ptr<videoFrameData>& ...@@ -713,9 +766,15 @@ void ProcessMaskThread::workMaskWithUYVY(const std::shared_ptr<videoFrameData>&
std::shared_ptr<VideoFrameWithMask> video_frame_i = std::make_shared<VideoFrameWithMask>(width, height, timestamp, pImage->sequenceNum, dst_uyvy, bmdFormat8BitYUV, pImage->flag_, pImage->meta_); std::shared_ptr<VideoFrameWithMask> video_frame_i = std::make_shared<VideoFrameWithMask>(width, height, timestamp, pImage->sequenceNum, dst_uyvy, bmdFormat8BitYUV, pImage->flag_, pImage->meta_);
//std::shared_ptr<VideoFrameWithMask> video_frame = std::make_shared<VideoFrameWithMask>(pImage, status); //std::shared_ptr<VideoFrameWithMask> video_frame = std::make_shared<VideoFrameWithMask>(pImage, status);
if(crop_thread) crop_thread->addVideoFrame(video_frame_i); if (crop_thread) crop_thread->addVideoFrame(video_frame_i);
//if (scale_thread) scale_thread->addVideoFrame(video_frame_i);
//qDebug() << "memcpy duration:" << TimeMilliSecond() - t2 << "\n"; //qDebug() << "memcpy duration:" << TimeMilliSecond() - t2 << "\n";
} }
else if (scale_flag && scale_thread) {
std::shared_ptr<VideoFrameWithMask> video_frame = std::make_shared<VideoFrameWithMask>(pImage, pImage->flag_);
scale_thread->addVideoFrame(video_frame);
}
else else
{ {
std::shared_ptr<VideoFrameWithMask> video_frame = std::make_shared<VideoFrameWithMask>(pImage, pImage->flag_); std::shared_ptr<VideoFrameWithMask> video_frame = std::make_shared<VideoFrameWithMask>(pImage, pImage->flag_);
......
...@@ -11,6 +11,7 @@ ReplayThread::ReplayThread() ...@@ -11,6 +11,7 @@ ReplayThread::ReplayThread()
max_store_size = Settings::ReplayStoreTime * Settings::FrameRate / 1000; max_store_size = Settings::ReplayStoreTime * Settings::FrameRate / 1000;
max_store_back_size = Settings::ReplayBackStoreTime * Settings::FrameRate / 1000; max_store_back_size = Settings::ReplayBackStoreTime * Settings::FrameRate / 1000;
max_store_crop_size = Settings::ReplayCropStoreTime * Settings::FrameRate / 1000; max_store_crop_size = Settings::ReplayCropStoreTime * Settings::FrameRate / 1000;
max_store_scale_size = max_store_crop_size;
interval = 1000 / Settings::FrameRate; interval = 1000 / Settings::FrameRate;
} }
...@@ -33,6 +34,19 @@ void ReplayThread::addCropFrame(std::shared_ptr<VideoFrameWithMask> frame) ...@@ -33,6 +34,19 @@ void ReplayThread::addCropFrame(std::shared_ptr<VideoFrameWithMask> frame)
} }
void ReplayThread::addScaleFrame(std::shared_ptr<VideoFrameWithMask> frame)
{
if (frame && (frame->data_ || frame->pImage))
{
if (replay_params.status == RS_IDEL || replay_params.status == RS_END)
storeScaleMap.Insert(frame->timestamp_, frame);
if (storeScaleMap.Size() > max_store_scale_size)
{
storeScaleMap.Pop();
}
}
}
void ReplayThread::addFrame(std::shared_ptr<videoFrameData> frameData) void ReplayThread::addFrame(std::shared_ptr<videoFrameData> frameData)
{ {
if(frameData && frameData->data) if(frameData && frameData->data)
...@@ -128,6 +142,23 @@ CropThread* ReplayThread::GetCropThread() ...@@ -128,6 +142,23 @@ CropThread* ReplayThread::GetCropThread()
return crop_thread.get(); return crop_thread.get();
} }
void ReplayThread::SetBmdInOutVideoParams(const BmdVideoParams& input, const BmdVideoParams& output)
{
input_params = input;
output_params = output;
if (input_params.width != output_params.width || input_params.height != output_params.height) {
scale_flag = true;
if (!scale_thread)
{
scale_thread = std::make_shared<VideoScaleThread>(this);
ScaleMsg msg{ output.width,output.height };
scale_thread->addScaleMsg(msg);
scale_thread->start();
}
}
else scale_flag = false;
}
void ReplayThread::OnRecvViedoFrame(std::shared_ptr<VideoFrameWithMask> crop_frame) void ReplayThread::OnRecvViedoFrame(std::shared_ptr<VideoFrameWithMask> crop_frame)
{ {
if (crop_frame) if (crop_frame)
...@@ -137,6 +168,15 @@ void ReplayThread::OnRecvViedoFrame(std::shared_ptr<VideoFrameWithMask> crop_fra ...@@ -137,6 +168,15 @@ void ReplayThread::OnRecvViedoFrame(std::shared_ptr<VideoFrameWithMask> crop_fra
} }
} }
void ReplayThread::OnRecvScaleViedoFrame(std::shared_ptr<VideoFrameWithMask> scale_frame)
{
if(scale_frame)
{
storeCropMap.EnsureInsert(scale_frame->timestamp_, scale_frame);
replayVideoSdiVec.push_back(scale_frame);
}
}
void ReplayThread::OnChange() void ReplayThread::OnChange()
{ {
...@@ -239,6 +279,7 @@ void ReplayThread::run() ...@@ -239,6 +279,7 @@ void ReplayThread::run()
} }
qint64 rp_begin = in_tm - dy_in_count; qint64 rp_begin = in_tm - dy_in_count;
if (dy_out_count <= 0) dy_out_count = 1;
qint64 rp_end = out_tm + dy_out_count; qint64 rp_end = out_tm + dy_out_count;
auto begin_tm = storeVideoMap.begin()->first; auto begin_tm = storeVideoMap.begin()->first;
...@@ -296,6 +337,7 @@ void ReplayThread::run() ...@@ -296,6 +337,7 @@ void ReplayThread::run()
for (auto itor = itor_begin;; itor++) for (auto itor = itor_begin;; itor++)
{ {
if (itor == in_itor) if (itor == in_itor)
//if (itor == ndi_itor)
{ {
break; break;
} }
...@@ -318,6 +360,19 @@ void ReplayThread::run() ...@@ -318,6 +360,19 @@ void ReplayThread::run()
needCropQueue.Push(frame); needCropQueue.Push(frame);
} }
} }
else if (scale_flag)
{
std::shared_ptr<VideoFrameWithMask> scale_frame = nullptr;
if(storeScaleMap.Find(itor->first, scale_frame))
{
replayVideoSdiVec.push_back(scale_frame);
}
else
{
std::shared_ptr<VideoFrameWithMask> video_frame = std::make_shared<VideoFrameWithMask>(frame, frame->flag_);
scale_thread->addVideoFrame(video_frame);
}
}
else else
{ {
std::shared_ptr<VideoFrameWithMask> video_frame = std::make_shared<VideoFrameWithMask>(frame, frame->flag_); std::shared_ptr<VideoFrameWithMask> video_frame = std::make_shared<VideoFrameWithMask>(frame, frame->flag_);
......
#include "Threads/VideoScaleThread.h"
#include "Utils/yuv4k.h"
#include "Utils/Settings.h"
VideoScaleThread::VideoScaleThread(Listener* listener) :p_listener(listener)
{
if(!uyvy_to_i422) uyvy_to_i422 = new uint8_t[i422_width * i422_height << 1];
}
VideoScaleThread::~VideoScaleThread()
{
if (uyvy_to_i422) delete uyvy_to_i422;
if (i422_scale) delete i422_scale;
}
void VideoScaleThread::addVideoFrame(std::shared_ptr<VideoFrameWithMask> frame)
{
if(frame) video_queue.Push(frame);
}
void VideoScaleThread::addScaleMsg(const ScaleMsg& msg)
{
scale_msg_queue.Push(msg);
}
void VideoScaleThread::run()
{
scale();
}
void VideoScaleThread::scale()
{
while (true)
{
std::shared_ptr<VideoFrameWithMask> p_frame = nullptr;
if (video_queue.WaitFor(p_frame))
{
if (scale_msg_queue.Size())
{
ScaleMsg tmp_msg;
scale_msg_queue.PopLast(tmp_msg);
/*if (!crop_msg.equal(tmp_msg) && p_listener)
{
p_listener->OnChange();
}*/
if (tmp_msg.scale_width && tmp_msg.scale_height)
{
if (tmp_msg.scale_width != scale_msg.scale_width || tmp_msg.scale_height != scale_msg.scale_height)
{
if (i422_scale) delete i422_scale;
i422_scale = new uint8_t[tmp_msg.scale_width * tmp_msg.scale_height << 1];
}
scale_msg = tmp_msg;
}
}
if (p_frame)
{
uint8_t* uyvy_data = NULL;
if (p_frame->fmt_ == bmdFormat8BitBGRA && p_frame->data_)
{
}
else if (p_frame->fmt_ == bmdFormat8BitYUV && (p_frame->data_ || (p_frame->pImage && p_frame->pImage->uyvy_data)))
{
uyvy_data = (p_frame->data_ ? p_frame->data_ : p_frame->pImage->uyvy_data);
//uyvy_data = p_frame->data_;
auto src_w = p_frame->width_;
auto src_h = p_frame->height_;
if (src_w * src_h > i422_width * i422_height)
{
delete uyvy_to_i422;
uyvy_to_i422 = new uint8_t[src_w * src_h << 1];
i422_width = src_w;
i422_height = src_h;
}
Yuv4k::UYVYToI422(uyvy_data, uyvy_to_i422, src_w, src_h);
Yuv4k::I422Scale(uyvy_to_i422, src_w, src_h, i422_scale, scale_msg.scale_width, scale_msg.scale_height, Settings::ZoomUseOmp);
uint8_t* scale_uyvy = new uint8_t[scale_msg.scale_width * scale_msg.scale_height << 1];
Yuv4k::I422ToUYVY(i422_scale, scale_uyvy, scale_msg.scale_width, scale_msg.scale_height, 0);
p_frame->width_ = scale_msg.scale_width;
p_frame->height_ = scale_msg.scale_height;
p_frame->size_ = scale_msg.scale_width * scale_msg.scale_height << 1;
if (p_frame->data_) delete p_frame->data_;
p_frame->data_ = scale_uyvy;
}
if (p_listener) p_listener->OnRecvScaleViedoFrame(p_frame);
}
}
}
}
\ No newline at end of file
...@@ -161,6 +161,8 @@ MomentaMedia::MomentaMedia(QWidget *parent) ...@@ -161,6 +161,8 @@ MomentaMedia::MomentaMedia(QWidget *parent)
OutputModeCombo->addItem(QString::fromWCharArray(L"跟随输入格式")); OutputModeCombo->addItem(QString::fromWCharArray(L"跟随输入格式"));
OutputModeCombo->addItem("1080 50I"); OutputModeCombo->addItem("1080 50I");
OutputModeCombo->addItem("1080 50P");
OutputModeCombo->addItem("2160 50P");
OutputModeCombo->setCurrentIndex(mode); OutputModeCombo->setCurrentIndex(mode);
connect(OutputModeCombo, QOverload<int>::of(&QComboBox::currentIndexChanged), this, &MomentaMedia::OutputModeChanged); connect(OutputModeCombo, QOverload<int>::of(&QComboBox::currentIndexChanged), this, &MomentaMedia::OutputModeChanged);
......
No preview for this file type
No preview for this file type
No preview for this file type
...@@ -29,7 +29,7 @@ CROP_X=3200 ...@@ -29,7 +29,7 @@ CROP_X=3200
CROP_Y=1800 CROP_Y=1800
CROP_DIRECTION=4 CROP_DIRECTION=4
USE_PIC_FLAG=1 USE_PIC_FLAG=1
OUTPUT_PLAY_MODE=0 OUTPUT_PLAY_MODE=2
AUDIO_CHANNEL=2 AUDIO_CHANNEL=2
ASPEC_DEN=9 ASPEC_DEN=9
ASPEC_NUM=16 ASPEC_NUM=16
......
No preview for this file type
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment