Commit 9eb8ec43 by wangguotao

更新到v2.1.1

parent 063d74b4
objct name changed "deviceOutputPage3"
objct name changed "deviceOutputPage4"
available device "DeckLink 8K Pro (1)"
available device "DeckLink 8K Pro (2)"
"2024-05-16 14:18:31.039" index: 0 DeckLinkInputDevice get video frame No input source 80000000 ------------
available device "DeckLink 8K Pro (3)"
"2024-05-16 14:18:31.059" index: 0 DeckLinkInputDevice get video frame No input source 80000000 ------------
available device "DeckLink 8K Pro (4)"
"2024-05-16 14:18:31.097" index: 0 DeckLinkInputDevice get video frame No input source 80000000 ------------
"2024-05-16 14:18:31.117" index: 0 DeckLinkInputDevice get video frame No input source 80000000 ------------
"2024-05-16 14:18:31.137" index: 0 DeckLinkInputDevice get video frame No input source 80000000 ------------
"2024-05-16 14:18:31.157" index: 0 DeckLinkInputDevice get video frame No input source 80000000 ------------
"2024-05-16 14:18:31.177" index: 0 DeckLinkInputDevice get video frame No input source 80000000 ------------
"2024-05-16 14:18:31.197" index: 0 DeckLinkInputDevice get video frame No input source 80000000 ------------
"2024-05-16 14:18:31.217" index: 0 DeckLinkInputDevice get video frame No input source 80000000 ------------
"2024-05-16 14:18:31.275" index: 0 DeckLinkInputDevice get video frame No input source 80000000 ------------
"2024-05-16 14:18:32.299" decklink input fps 50
"2024-05-16 14:18:33.319" decklink input fps 51
"2024-05-16 14:18:33.385" decklink output fps 51 , qsize 0
"2024-05-16 14:18:34.319" decklink input fps 50
"2024-05-16 14:18:34.405" decklink output fps 51 , qsize 0
"2024-05-16 14:18:35.339" decklink input fps 51
"2024-05-16 14:18:35.405" decklink output fps 50 , qsize 0
"2024-05-16 14:18:36.339" decklink input fps 50
"2024-05-16 14:18:36.404" decklink output fps 50 , qsize 0
//=====================================================================
//
// FastMemcpy.c - skywind3000@163.com, 2015
//
// feature:
// 50% speed up in avg. vs standard memcpy (tested in vc2012/gcc5.1)
//
//=====================================================================
#ifndef __FAST_MEMCPY_H__
#define __FAST_MEMCPY_H__
#include <stddef.h>
#include <stdint.h>
#include <emmintrin.h>
//---------------------------------------------------------------------
// force inline for compilers
//---------------------------------------------------------------------
#ifndef INLINE
#ifdef __GNUC__
#if (__GNUC__ > 3) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 1))
#define INLINE __inline__ __attribute__((always_inline))
#else
#define INLINE __inline__
#endif
#elif defined(_MSC_VER)
#define INLINE __forceinline
#elif (defined(__BORLANDC__) || defined(__WATCOMC__))
#define INLINE __inline
#else
#define INLINE
#endif
#endif
//---------------------------------------------------------------------
// fast copy for different sizes
//---------------------------------------------------------------------
static INLINE void memcpy_sse2_16(void *dst, const void *src) {
__m128i m0 = _mm_loadu_si128(((const __m128i*)src) + 0);
_mm_storeu_si128(((__m128i*)dst) + 0, m0);
}
static INLINE void memcpy_sse2_32(void *dst, const void *src) {
__m128i m0 = _mm_loadu_si128(((const __m128i*)src) + 0);
__m128i m1 = _mm_loadu_si128(((const __m128i*)src) + 1);
_mm_storeu_si128(((__m128i*)dst) + 0, m0);
_mm_storeu_si128(((__m128i*)dst) + 1, m1);
}
static INLINE void memcpy_sse2_64(void *dst, const void *src) {
__m128i m0 = _mm_loadu_si128(((const __m128i*)src) + 0);
__m128i m1 = _mm_loadu_si128(((const __m128i*)src) + 1);
__m128i m2 = _mm_loadu_si128(((const __m128i*)src) + 2);
__m128i m3 = _mm_loadu_si128(((const __m128i*)src) + 3);
_mm_storeu_si128(((__m128i*)dst) + 0, m0);
_mm_storeu_si128(((__m128i*)dst) + 1, m1);
_mm_storeu_si128(((__m128i*)dst) + 2, m2);
_mm_storeu_si128(((__m128i*)dst) + 3, m3);
}
static INLINE void memcpy_sse2_128(void *dst, const void *src) {
__m128i m0 = _mm_loadu_si128(((const __m128i*)src) + 0);
__m128i m1 = _mm_loadu_si128(((const __m128i*)src) + 1);
__m128i m2 = _mm_loadu_si128(((const __m128i*)src) + 2);
__m128i m3 = _mm_loadu_si128(((const __m128i*)src) + 3);
__m128i m4 = _mm_loadu_si128(((const __m128i*)src) + 4);
__m128i m5 = _mm_loadu_si128(((const __m128i*)src) + 5);
__m128i m6 = _mm_loadu_si128(((const __m128i*)src) + 6);
__m128i m7 = _mm_loadu_si128(((const __m128i*)src) + 7);
_mm_storeu_si128(((__m128i*)dst) + 0, m0);
_mm_storeu_si128(((__m128i*)dst) + 1, m1);
_mm_storeu_si128(((__m128i*)dst) + 2, m2);
_mm_storeu_si128(((__m128i*)dst) + 3, m3);
_mm_storeu_si128(((__m128i*)dst) + 4, m4);
_mm_storeu_si128(((__m128i*)dst) + 5, m5);
_mm_storeu_si128(((__m128i*)dst) + 6, m6);
_mm_storeu_si128(((__m128i*)dst) + 7, m7);
}
//---------------------------------------------------------------------
// tiny memory copy with jump table optimized
//---------------------------------------------------------------------
static INLINE void *memcpy_tiny(void *dst, const void *src, size_t size) {
unsigned char *dd = ((unsigned char*)dst) + size;
const unsigned char *ss = ((const unsigned char*)src) + size;
switch (size) {
case 64:
memcpy_sse2_64(dd - 64, ss - 64);
case 0:
break;
case 65:
memcpy_sse2_64(dd - 65, ss - 65);
case 1:
dd[-1] = ss[-1];
break;
case 66:
memcpy_sse2_64(dd - 66, ss - 66);
case 2:
*((uint16_t*)(dd - 2)) = *((uint16_t*)(ss - 2));
break;
case 67:
memcpy_sse2_64(dd - 67, ss - 67);
case 3:
*((uint16_t*)(dd - 3)) = *((uint16_t*)(ss - 3));
dd[-1] = ss[-1];
break;
case 68:
memcpy_sse2_64(dd - 68, ss - 68);
case 4:
*((uint32_t*)(dd - 4)) = *((uint32_t*)(ss - 4));
break;
case 69:
memcpy_sse2_64(dd - 69, ss - 69);
case 5:
*((uint32_t*)(dd - 5)) = *((uint32_t*)(ss - 5));
dd[-1] = ss[-1];
break;
case 70:
memcpy_sse2_64(dd - 70, ss - 70);
case 6:
*((uint32_t*)(dd - 6)) = *((uint32_t*)(ss - 6));
*((uint16_t*)(dd - 2)) = *((uint16_t*)(ss - 2));
break;
case 71:
memcpy_sse2_64(dd - 71, ss - 71);
case 7:
*((uint32_t*)(dd - 7)) = *((uint32_t*)(ss - 7));
*((uint32_t*)(dd - 4)) = *((uint32_t*)(ss - 4));
break;
case 72:
memcpy_sse2_64(dd - 72, ss - 72);
case 8:
*((uint64_t*)(dd - 8)) = *((uint64_t*)(ss - 8));
break;
case 73:
memcpy_sse2_64(dd - 73, ss - 73);
case 9:
*((uint64_t*)(dd - 9)) = *((uint64_t*)(ss - 9));
dd[-1] = ss[-1];
break;
case 74:
memcpy_sse2_64(dd - 74, ss - 74);
case 10:
*((uint64_t*)(dd - 10)) = *((uint64_t*)(ss - 10));
*((uint16_t*)(dd - 2)) = *((uint16_t*)(ss - 2));
break;
case 75:
memcpy_sse2_64(dd - 75, ss - 75);
case 11:
*((uint64_t*)(dd - 11)) = *((uint64_t*)(ss - 11));
*((uint32_t*)(dd - 4)) = *((uint32_t*)(ss - 4));
break;
case 76:
memcpy_sse2_64(dd - 76, ss - 76);
case 12:
*((uint64_t*)(dd - 12)) = *((uint64_t*)(ss - 12));
*((uint32_t*)(dd - 4)) = *((uint32_t*)(ss - 4));
break;
case 77:
memcpy_sse2_64(dd - 77, ss - 77);
case 13:
*((uint64_t*)(dd - 13)) = *((uint64_t*)(ss - 13));
*((uint32_t*)(dd - 5)) = *((uint32_t*)(ss - 5));
dd[-1] = ss[-1];
break;
case 78:
memcpy_sse2_64(dd - 78, ss - 78);
case 14:
*((uint64_t*)(dd - 14)) = *((uint64_t*)(ss - 14));
*((uint64_t*)(dd - 8)) = *((uint64_t*)(ss - 8));
break;
case 79:
memcpy_sse2_64(dd - 79, ss - 79);
case 15:
*((uint64_t*)(dd - 15)) = *((uint64_t*)(ss - 15));
*((uint64_t*)(dd - 8)) = *((uint64_t*)(ss - 8));
break;
case 80:
memcpy_sse2_64(dd - 80, ss - 80);
case 16:
memcpy_sse2_16(dd - 16, ss - 16);
break;
case 81:
memcpy_sse2_64(dd - 81, ss - 81);
case 17:
memcpy_sse2_16(dd - 17, ss - 17);
dd[-1] = ss[-1];
break;
case 82:
memcpy_sse2_64(dd - 82, ss - 82);
case 18:
memcpy_sse2_16(dd - 18, ss - 18);
*((uint16_t*)(dd - 2)) = *((uint16_t*)(ss - 2));
break;
case 83:
memcpy_sse2_64(dd - 83, ss - 83);
case 19:
memcpy_sse2_16(dd - 19, ss - 19);
*((uint16_t*)(dd - 3)) = *((uint16_t*)(ss - 3));
dd[-1] = ss[-1];
break;
case 84:
memcpy_sse2_64(dd - 84, ss - 84);
case 20:
memcpy_sse2_16(dd - 20, ss - 20);
*((uint32_t*)(dd - 4)) = *((uint32_t*)(ss - 4));
break;
case 85:
memcpy_sse2_64(dd - 85, ss - 85);
case 21:
memcpy_sse2_16(dd - 21, ss - 21);
*((uint32_t*)(dd - 5)) = *((uint32_t*)(ss - 5));
dd[-1] = ss[-1];
break;
case 86:
memcpy_sse2_64(dd - 86, ss - 86);
case 22:
memcpy_sse2_16(dd - 22, ss - 22);
*((uint32_t*)(dd - 6)) = *((uint32_t*)(ss - 6));
*((uint16_t*)(dd - 2)) = *((uint16_t*)(ss - 2));
break;
case 87:
memcpy_sse2_64(dd - 87, ss - 87);
case 23:
memcpy_sse2_16(dd - 23, ss - 23);
*((uint32_t*)(dd - 7)) = *((uint32_t*)(ss - 7));
*((uint32_t*)(dd - 4)) = *((uint32_t*)(ss - 4));
break;
case 88:
memcpy_sse2_64(dd - 88, ss - 88);
case 24:
memcpy_sse2_16(dd - 24, ss - 24);
memcpy_sse2_16(dd - 16, ss - 16);
break;
case 89:
memcpy_sse2_64(dd - 89, ss - 89);
case 25:
memcpy_sse2_16(dd - 25, ss - 25);
memcpy_sse2_16(dd - 16, ss - 16);
break;
case 90:
memcpy_sse2_64(dd - 90, ss - 90);
case 26:
memcpy_sse2_16(dd - 26, ss - 26);
memcpy_sse2_16(dd - 16, ss - 16);
break;
case 91:
memcpy_sse2_64(dd - 91, ss - 91);
case 27:
memcpy_sse2_16(dd - 27, ss - 27);
memcpy_sse2_16(dd - 16, ss - 16);
break;
case 92:
memcpy_sse2_64(dd - 92, ss - 92);
case 28:
memcpy_sse2_16(dd - 28, ss - 28);
memcpy_sse2_16(dd - 16, ss - 16);
break;
case 93:
memcpy_sse2_64(dd - 93, ss - 93);
case 29:
memcpy_sse2_16(dd - 29, ss - 29);
memcpy_sse2_16(dd - 16, ss - 16);
break;
case 94:
memcpy_sse2_64(dd - 94, ss - 94);
case 30:
memcpy_sse2_16(dd - 30, ss - 30);
memcpy_sse2_16(dd - 16, ss - 16);
break;
case 95:
memcpy_sse2_64(dd - 95, ss - 95);
case 31:
memcpy_sse2_16(dd - 31, ss - 31);
memcpy_sse2_16(dd - 16, ss - 16);
break;
case 96:
memcpy_sse2_64(dd - 96, ss - 96);
case 32:
memcpy_sse2_32(dd - 32, ss - 32);
break;
case 97:
memcpy_sse2_64(dd - 97, ss - 97);
case 33:
memcpy_sse2_32(dd - 33, ss - 33);
dd[-1] = ss[-1];
break;
case 98:
memcpy_sse2_64(dd - 98, ss - 98);
case 34:
memcpy_sse2_32(dd - 34, ss - 34);
*((uint16_t*)(dd - 2)) = *((uint16_t*)(ss - 2));
break;
case 99:
memcpy_sse2_64(dd - 99, ss - 99);
case 35:
memcpy_sse2_32(dd - 35, ss - 35);
*((uint16_t*)(dd - 3)) = *((uint16_t*)(ss - 3));
dd[-1] = ss[-1];
break;
case 100:
memcpy_sse2_64(dd - 100, ss - 100);
case 36:
memcpy_sse2_32(dd - 36, ss - 36);
*((uint32_t*)(dd - 4)) = *((uint32_t*)(ss - 4));
break;
case 101:
memcpy_sse2_64(dd - 101, ss - 101);
case 37:
memcpy_sse2_32(dd - 37, ss - 37);
*((uint32_t*)(dd - 5)) = *((uint32_t*)(ss - 5));
dd[-1] = ss[-1];
break;
case 102:
memcpy_sse2_64(dd - 102, ss - 102);
case 38:
memcpy_sse2_32(dd - 38, ss - 38);
*((uint32_t*)(dd - 6)) = *((uint32_t*)(ss - 6));
*((uint16_t*)(dd - 2)) = *((uint16_t*)(ss - 2));
break;
case 103:
memcpy_sse2_64(dd - 103, ss - 103);
case 39:
memcpy_sse2_32(dd - 39, ss - 39);
*((uint32_t*)(dd - 7)) = *((uint32_t*)(ss - 7));
*((uint32_t*)(dd - 4)) = *((uint32_t*)(ss - 4));
break;
case 104:
memcpy_sse2_64(dd - 104, ss - 104);
case 40:
memcpy_sse2_32(dd - 40, ss - 40);
*((uint64_t*)(dd - 8)) = *((uint64_t*)(ss - 8));
break;
case 105:
memcpy_sse2_64(dd - 105, ss - 105);
case 41:
memcpy_sse2_32(dd - 41, ss - 41);
*((uint64_t*)(dd - 9)) = *((uint64_t*)(ss - 9));
dd[-1] = ss[-1];
break;
case 106:
memcpy_sse2_64(dd - 106, ss - 106);
case 42:
memcpy_sse2_32(dd - 42, ss - 42);
*((uint64_t*)(dd - 10)) = *((uint64_t*)(ss - 10));
*((uint16_t*)(dd - 2)) = *((uint16_t*)(ss - 2));
break;
case 107:
memcpy_sse2_64(dd - 107, ss - 107);
case 43:
memcpy_sse2_32(dd - 43, ss - 43);
*((uint64_t*)(dd - 11)) = *((uint64_t*)(ss - 11));
*((uint32_t*)(dd - 4)) = *((uint32_t*)(ss - 4));
break;
case 108:
memcpy_sse2_64(dd - 108, ss - 108);
case 44:
memcpy_sse2_32(dd - 44, ss - 44);
*((uint64_t*)(dd - 12)) = *((uint64_t*)(ss - 12));
*((uint32_t*)(dd - 4)) = *((uint32_t*)(ss - 4));
break;
case 109:
memcpy_sse2_64(dd - 109, ss - 109);
case 45:
memcpy_sse2_32(dd - 45, ss - 45);
*((uint64_t*)(dd - 13)) = *((uint64_t*)(ss - 13));
*((uint32_t*)(dd - 5)) = *((uint32_t*)(ss - 5));
dd[-1] = ss[-1];
break;
case 110:
memcpy_sse2_64(dd - 110, ss - 110);
case 46:
memcpy_sse2_32(dd - 46, ss - 46);
*((uint64_t*)(dd - 14)) = *((uint64_t*)(ss - 14));
*((uint64_t*)(dd - 8)) = *((uint64_t*)(ss - 8));
break;
case 111:
memcpy_sse2_64(dd - 111, ss - 111);
case 47:
memcpy_sse2_32(dd - 47, ss - 47);
*((uint64_t*)(dd - 15)) = *((uint64_t*)(ss - 15));
*((uint64_t*)(dd - 8)) = *((uint64_t*)(ss - 8));
break;
case 112:
memcpy_sse2_64(dd - 112, ss - 112);
case 48:
memcpy_sse2_32(dd - 48, ss - 48);
memcpy_sse2_16(dd - 16, ss - 16);
break;
case 113:
memcpy_sse2_64(dd - 113, ss - 113);
case 49:
memcpy_sse2_32(dd - 49, ss - 49);
memcpy_sse2_16(dd - 17, ss - 17);
dd[-1] = ss[-1];
break;
case 114:
memcpy_sse2_64(dd - 114, ss - 114);
case 50:
memcpy_sse2_32(dd - 50, ss - 50);
memcpy_sse2_16(dd - 18, ss - 18);
*((uint16_t*)(dd - 2)) = *((uint16_t*)(ss - 2));
break;
case 115:
memcpy_sse2_64(dd - 115, ss - 115);
case 51:
memcpy_sse2_32(dd - 51, ss - 51);
memcpy_sse2_16(dd - 19, ss - 19);
*((uint16_t*)(dd - 3)) = *((uint16_t*)(ss - 3));
dd[-1] = ss[-1];
break;
case 116:
memcpy_sse2_64(dd - 116, ss - 116);
case 52:
memcpy_sse2_32(dd - 52, ss - 52);
memcpy_sse2_16(dd - 20, ss - 20);
*((uint32_t*)(dd - 4)) = *((uint32_t*)(ss - 4));
break;
case 117:
memcpy_sse2_64(dd - 117, ss - 117);
case 53:
memcpy_sse2_32(dd - 53, ss - 53);
memcpy_sse2_16(dd - 21, ss - 21);
*((uint32_t*)(dd - 5)) = *((uint32_t*)(ss - 5));
dd[-1] = ss[-1];
break;
case 118:
memcpy_sse2_64(dd - 118, ss - 118);
case 54:
memcpy_sse2_32(dd - 54, ss - 54);
memcpy_sse2_16(dd - 22, ss - 22);
*((uint32_t*)(dd - 6)) = *((uint32_t*)(ss - 6));
*((uint16_t*)(dd - 2)) = *((uint16_t*)(ss - 2));
break;
case 119:
memcpy_sse2_64(dd - 119, ss - 119);
case 55:
memcpy_sse2_32(dd - 55, ss - 55);
memcpy_sse2_16(dd - 23, ss - 23);
*((uint32_t*)(dd - 7)) = *((uint32_t*)(ss - 7));
*((uint32_t*)(dd - 4)) = *((uint32_t*)(ss - 4));
break;
case 120:
memcpy_sse2_64(dd - 120, ss - 120);
case 56:
memcpy_sse2_32(dd - 56, ss - 56);
memcpy_sse2_16(dd - 24, ss - 24);
memcpy_sse2_16(dd - 16, ss - 16);
break;
case 121:
memcpy_sse2_64(dd - 121, ss - 121);
case 57:
memcpy_sse2_32(dd - 57, ss - 57);
memcpy_sse2_16(dd - 25, ss - 25);
memcpy_sse2_16(dd - 16, ss - 16);
break;
case 122:
memcpy_sse2_64(dd - 122, ss - 122);
case 58:
memcpy_sse2_32(dd - 58, ss - 58);
memcpy_sse2_16(dd - 26, ss - 26);
memcpy_sse2_16(dd - 16, ss - 16);
break;
case 123:
memcpy_sse2_64(dd - 123, ss - 123);
case 59:
memcpy_sse2_32(dd - 59, ss - 59);
memcpy_sse2_16(dd - 27, ss - 27);
memcpy_sse2_16(dd - 16, ss - 16);
break;
case 124:
memcpy_sse2_64(dd - 124, ss - 124);
case 60:
memcpy_sse2_32(dd - 60, ss - 60);
memcpy_sse2_16(dd - 28, ss - 28);
memcpy_sse2_16(dd - 16, ss - 16);
break;
case 125:
memcpy_sse2_64(dd - 125, ss - 125);
case 61:
memcpy_sse2_32(dd - 61, ss - 61);
memcpy_sse2_16(dd - 29, ss - 29);
memcpy_sse2_16(dd - 16, ss - 16);
break;
case 126:
memcpy_sse2_64(dd - 126, ss - 126);
case 62:
memcpy_sse2_32(dd - 62, ss - 62);
memcpy_sse2_16(dd - 30, ss - 30);
memcpy_sse2_16(dd - 16, ss - 16);
break;
case 127:
memcpy_sse2_64(dd - 127, ss - 127);
case 63:
memcpy_sse2_32(dd - 63, ss - 63);
memcpy_sse2_16(dd - 31, ss - 31);
memcpy_sse2_16(dd - 16, ss - 16);
break;
case 128:
memcpy_sse2_128(dd - 128, ss - 128);
break;
}
return dst;
}
//---------------------------------------------------------------------
// main routine
//---------------------------------------------------------------------
static void* memcpy_fast(void *destination, const void *source, size_t size)
{
unsigned char *dst = (unsigned char*)destination;
const unsigned char *src = (const unsigned char*)source;
static size_t cachesize = 0x200000; // L2-cache size
size_t padding;
// small memory copy
if (size <= 128) {
return memcpy_tiny(dst, src, size);
}
// align destination to 16 bytes boundary
padding = (16 - (((size_t)dst) & 15)) & 15;
if (padding > 0) {
__m128i head = _mm_loadu_si128((const __m128i*)src);
_mm_storeu_si128((__m128i*)dst, head);
dst += padding;
src += padding;
size -= padding;
}
// medium size copy
if (size <= cachesize) {
__m128i c0, c1, c2, c3, c4, c5, c6, c7;
for (; size >= 128; size -= 128) {
c0 = _mm_loadu_si128(((const __m128i*)src) + 0);
c1 = _mm_loadu_si128(((const __m128i*)src) + 1);
c2 = _mm_loadu_si128(((const __m128i*)src) + 2);
c3 = _mm_loadu_si128(((const __m128i*)src) + 3);
c4 = _mm_loadu_si128(((const __m128i*)src) + 4);
c5 = _mm_loadu_si128(((const __m128i*)src) + 5);
c6 = _mm_loadu_si128(((const __m128i*)src) + 6);
c7 = _mm_loadu_si128(((const __m128i*)src) + 7);
_mm_prefetch((const char*)(src + 256), _MM_HINT_NTA);
src += 128;
_mm_store_si128((((__m128i*)dst) + 0), c0);
_mm_store_si128((((__m128i*)dst) + 1), c1);
_mm_store_si128((((__m128i*)dst) + 2), c2);
_mm_store_si128((((__m128i*)dst) + 3), c3);
_mm_store_si128((((__m128i*)dst) + 4), c4);
_mm_store_si128((((__m128i*)dst) + 5), c5);
_mm_store_si128((((__m128i*)dst) + 6), c6);
_mm_store_si128((((__m128i*)dst) + 7), c7);
dst += 128;
}
}
else { // big memory copy
__m128i c0, c1, c2, c3, c4, c5, c6, c7;
_mm_prefetch((const char*)(src), _MM_HINT_NTA);
if ((((size_t)src) & 15) == 0) { // source aligned
for (; size >= 128; size -= 128) {
c0 = _mm_load_si128(((const __m128i*)src) + 0);
c1 = _mm_load_si128(((const __m128i*)src) + 1);
c2 = _mm_load_si128(((const __m128i*)src) + 2);
c3 = _mm_load_si128(((const __m128i*)src) + 3);
c4 = _mm_load_si128(((const __m128i*)src) + 4);
c5 = _mm_load_si128(((const __m128i*)src) + 5);
c6 = _mm_load_si128(((const __m128i*)src) + 6);
c7 = _mm_load_si128(((const __m128i*)src) + 7);
_mm_prefetch((const char*)(src + 256), _MM_HINT_NTA);
src += 128;
_mm_stream_si128((((__m128i*)dst) + 0), c0);
_mm_stream_si128((((__m128i*)dst) + 1), c1);
_mm_stream_si128((((__m128i*)dst) + 2), c2);
_mm_stream_si128((((__m128i*)dst) + 3), c3);
_mm_stream_si128((((__m128i*)dst) + 4), c4);
_mm_stream_si128((((__m128i*)dst) + 5), c5);
_mm_stream_si128((((__m128i*)dst) + 6), c6);
_mm_stream_si128((((__m128i*)dst) + 7), c7);
dst += 128;
}
}
else { // source unaligned
for (; size >= 128; size -= 128) {
c0 = _mm_loadu_si128(((const __m128i*)src) + 0);
c1 = _mm_loadu_si128(((const __m128i*)src) + 1);
c2 = _mm_loadu_si128(((const __m128i*)src) + 2);
c3 = _mm_loadu_si128(((const __m128i*)src) + 3);
c4 = _mm_loadu_si128(((const __m128i*)src) + 4);
c5 = _mm_loadu_si128(((const __m128i*)src) + 5);
c6 = _mm_loadu_si128(((const __m128i*)src) + 6);
c7 = _mm_loadu_si128(((const __m128i*)src) + 7);
_mm_prefetch((const char*)(src + 256), _MM_HINT_NTA);
src += 128;
_mm_stream_si128((((__m128i*)dst) + 0), c0);
_mm_stream_si128((((__m128i*)dst) + 1), c1);
_mm_stream_si128((((__m128i*)dst) + 2), c2);
_mm_stream_si128((((__m128i*)dst) + 3), c3);
_mm_stream_si128((((__m128i*)dst) + 4), c4);
_mm_stream_si128((((__m128i*)dst) + 5), c5);
_mm_stream_si128((((__m128i*)dst) + 6), c6);
_mm_stream_si128((((__m128i*)dst) + 7), c7);
dst += 128;
}
}
_mm_sfence();
}
memcpy_tiny(dst, src, size);
return destination;
}
#endif
//=====================================================================
//
// FastMemcpy.c - skywind3000@163.com, 2015
//
// feature:
// 50% speed up in avg. vs standard memcpy (tested in vc2012/gcc5.1)
//
//=====================================================================
#ifndef __FAST_MEMCPY_H__
#define __FAST_MEMCPY_H__
#include <stddef.h>
#include <stdint.h>
#include <immintrin.h>
//---------------------------------------------------------------------
// force inline for compilers
//---------------------------------------------------------------------
#ifndef INLINE
#ifdef __GNUC__
#if (__GNUC__ > 3) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 1))
#define INLINE __inline__ __attribute__((always_inline))
#else
#define INLINE __inline__
#endif
#elif defined(_MSC_VER)
#define INLINE __forceinline
#elif (defined(__BORLANDC__) || defined(__WATCOMC__))
#define INLINE __inline
#else
#define INLINE
#endif
#endif
//---------------------------------------------------------------------
// fast copy for different sizes
//---------------------------------------------------------------------
static INLINE void memcpy_avx_16(void *dst, const void *src) {
#if 1
__m128i m0 = _mm_loadu_si128(((const __m128i*)src) + 0);
_mm_storeu_si128(((__m128i*)dst) + 0, m0);
#else
*((uint64_t*)((char*)dst + 0)) = *((uint64_t*)((const char*)src + 0));
*((uint64_t*)((char*)dst + 8)) = *((uint64_t*)((const char*)src + 8));
#endif
}
static INLINE void memcpy_avx_32(void *dst, const void *src) {
__m256i m0 = _mm256_loadu_si256(((const __m256i*)src) + 0);
_mm256_storeu_si256(((__m256i*)dst) + 0, m0);
}
static INLINE void memcpy_avx_64(void *dst, const void *src) {
__m256i m0 = _mm256_loadu_si256(((const __m256i*)src) + 0);
__m256i m1 = _mm256_loadu_si256(((const __m256i*)src) + 1);
_mm256_storeu_si256(((__m256i*)dst) + 0, m0);
_mm256_storeu_si256(((__m256i*)dst) + 1, m1);
}
static INLINE void memcpy_avx_128(void *dst, const void *src) {
__m256i m0 = _mm256_loadu_si256(((const __m256i*)src) + 0);
__m256i m1 = _mm256_loadu_si256(((const __m256i*)src) + 1);
__m256i m2 = _mm256_loadu_si256(((const __m256i*)src) + 2);
__m256i m3 = _mm256_loadu_si256(((const __m256i*)src) + 3);
_mm256_storeu_si256(((__m256i*)dst) + 0, m0);
_mm256_storeu_si256(((__m256i*)dst) + 1, m1);
_mm256_storeu_si256(((__m256i*)dst) + 2, m2);
_mm256_storeu_si256(((__m256i*)dst) + 3, m3);
}
static INLINE void memcpy_avx_256(void *dst, const void *src) {
__m256i m0 = _mm256_loadu_si256(((const __m256i*)src) + 0);
__m256i m1 = _mm256_loadu_si256(((const __m256i*)src) + 1);
__m256i m2 = _mm256_loadu_si256(((const __m256i*)src) + 2);
__m256i m3 = _mm256_loadu_si256(((const __m256i*)src) + 3);
__m256i m4 = _mm256_loadu_si256(((const __m256i*)src) + 4);
__m256i m5 = _mm256_loadu_si256(((const __m256i*)src) + 5);
__m256i m6 = _mm256_loadu_si256(((const __m256i*)src) + 6);
__m256i m7 = _mm256_loadu_si256(((const __m256i*)src) + 7);
_mm256_storeu_si256(((__m256i*)dst) + 0, m0);
_mm256_storeu_si256(((__m256i*)dst) + 1, m1);
_mm256_storeu_si256(((__m256i*)dst) + 2, m2);
_mm256_storeu_si256(((__m256i*)dst) + 3, m3);
_mm256_storeu_si256(((__m256i*)dst) + 4, m4);
_mm256_storeu_si256(((__m256i*)dst) + 5, m5);
_mm256_storeu_si256(((__m256i*)dst) + 6, m6);
_mm256_storeu_si256(((__m256i*)dst) + 7, m7);
}
//---------------------------------------------------------------------
// tiny memory copy with jump table optimized
//---------------------------------------------------------------------
static INLINE void *memcpy_tiny(void *dst, const void *src, size_t size) {
unsigned char *dd = ((unsigned char*)dst) + size;
const unsigned char *ss = ((const unsigned char*)src) + size;
switch (size) {
case 128: memcpy_avx_128(dd - 128, ss - 128);
case 0: break;
case 129: memcpy_avx_128(dd - 129, ss - 129);
case 1: dd[-1] = ss[-1]; break;
case 130: memcpy_avx_128(dd - 130, ss - 130);
case 2: *((uint16_t*)(dd - 2)) = *((uint16_t*)(ss - 2)); break;
case 131: memcpy_avx_128(dd - 131, ss - 131);
case 3: *((uint16_t*)(dd - 3)) = *((uint16_t*)(ss - 3)); dd[-1] = ss[-1]; break;
case 132: memcpy_avx_128(dd - 132, ss - 132);
case 4: *((uint32_t*)(dd - 4)) = *((uint32_t*)(ss - 4)); break;
case 133: memcpy_avx_128(dd - 133, ss - 133);
case 5: *((uint32_t*)(dd - 5)) = *((uint32_t*)(ss - 5)); dd[-1] = ss[-1]; break;
case 134: memcpy_avx_128(dd - 134, ss - 134);
case 6: *((uint32_t*)(dd - 6)) = *((uint32_t*)(ss - 6)); *((uint16_t*)(dd - 2)) = *((uint16_t*)(ss - 2)); break;
case 135: memcpy_avx_128(dd - 135, ss - 135);
case 7: *((uint32_t*)(dd - 7)) = *((uint32_t*)(ss - 7)); *((uint32_t*)(dd - 4)) = *((uint32_t*)(ss - 4)); break;
case 136: memcpy_avx_128(dd - 136, ss - 136);
case 8: *((uint64_t*)(dd - 8)) = *((uint64_t*)(ss - 8)); break;
case 137: memcpy_avx_128(dd - 137, ss - 137);
case 9: *((uint64_t*)(dd - 9)) = *((uint64_t*)(ss - 9)); dd[-1] = ss[-1]; break;
case 138: memcpy_avx_128(dd - 138, ss - 138);
case 10: *((uint64_t*)(dd - 10)) = *((uint64_t*)(ss - 10)); *((uint16_t*)(dd - 2)) = *((uint16_t*)(ss - 2)); break;
case 139: memcpy_avx_128(dd - 139, ss - 139);
case 11: *((uint64_t*)(dd - 11)) = *((uint64_t*)(ss - 11)); *((uint32_t*)(dd - 4)) = *((uint32_t*)(ss - 4)); break;
case 140: memcpy_avx_128(dd - 140, ss - 140);
case 12: *((uint64_t*)(dd - 12)) = *((uint64_t*)(ss - 12)); *((uint32_t*)(dd - 4)) = *((uint32_t*)(ss - 4)); break;
case 141: memcpy_avx_128(dd - 141, ss - 141);
case 13: *((uint64_t*)(dd - 13)) = *((uint64_t*)(ss - 13)); *((uint64_t*)(dd - 8)) = *((uint64_t*)(ss - 8)); break;
case 142: memcpy_avx_128(dd - 142, ss - 142);
case 14: *((uint64_t*)(dd - 14)) = *((uint64_t*)(ss - 14)); *((uint64_t*)(dd - 8)) = *((uint64_t*)(ss - 8)); break;
case 143: memcpy_avx_128(dd - 143, ss - 143);
case 15: *((uint64_t*)(dd - 15)) = *((uint64_t*)(ss - 15)); *((uint64_t*)(dd - 8)) = *((uint64_t*)(ss - 8)); break;
case 144: memcpy_avx_128(dd - 144, ss - 144);
case 16: memcpy_avx_16(dd - 16, ss - 16); break;
case 145: memcpy_avx_128(dd - 145, ss - 145);
case 17: memcpy_avx_16(dd - 17, ss - 17); dd[-1] = ss[-1]; break;
case 146: memcpy_avx_128(dd - 146, ss - 146);
case 18: memcpy_avx_16(dd - 18, ss - 18); *((uint16_t*)(dd - 2)) = *((uint16_t*)(ss - 2)); break;
case 147: memcpy_avx_128(dd - 147, ss - 147);
case 19: memcpy_avx_16(dd - 19, ss - 19); *((uint32_t*)(dd - 4)) = *((uint32_t*)(ss - 4)); break;
case 148: memcpy_avx_128(dd - 148, ss - 148);
case 20: memcpy_avx_16(dd - 20, ss - 20); *((uint32_t*)(dd - 4)) = *((uint32_t*)(ss - 4)); break;
case 149: memcpy_avx_128(dd - 149, ss - 149);
case 21: memcpy_avx_16(dd - 21, ss - 21); *((uint64_t*)(dd - 8)) = *((uint64_t*)(ss - 8)); break;
case 150: memcpy_avx_128(dd - 150, ss - 150);
case 22: memcpy_avx_16(dd - 22, ss - 22); *((uint64_t*)(dd - 8)) = *((uint64_t*)(ss - 8)); break;
case 151: memcpy_avx_128(dd - 151, ss - 151);
case 23: memcpy_avx_16(dd - 23, ss - 23); *((uint64_t*)(dd - 8)) = *((uint64_t*)(ss - 8)); break;
case 152: memcpy_avx_128(dd - 152, ss - 152);
case 24: memcpy_avx_16(dd - 24, ss - 24); *((uint64_t*)(dd - 8)) = *((uint64_t*)(ss - 8)); break;
case 153: memcpy_avx_128(dd - 153, ss - 153);
case 25: memcpy_avx_16(dd - 25, ss - 25); memcpy_avx_16(dd - 16, ss - 16); break;
case 154: memcpy_avx_128(dd - 154, ss - 154);
case 26: memcpy_avx_16(dd - 26, ss - 26); memcpy_avx_16(dd - 16, ss - 16); break;
case 155: memcpy_avx_128(dd - 155, ss - 155);
case 27: memcpy_avx_16(dd - 27, ss - 27); memcpy_avx_16(dd - 16, ss - 16); break;
case 156: memcpy_avx_128(dd - 156, ss - 156);
case 28: memcpy_avx_16(dd - 28, ss - 28); memcpy_avx_16(dd - 16, ss - 16); break;
case 157: memcpy_avx_128(dd - 157, ss - 157);
case 29: memcpy_avx_16(dd - 29, ss - 29); memcpy_avx_16(dd - 16, ss - 16); break;
case 158: memcpy_avx_128(dd - 158, ss - 158);
case 30: memcpy_avx_16(dd - 30, ss - 30); memcpy_avx_16(dd - 16, ss - 16); break;
case 159: memcpy_avx_128(dd - 159, ss - 159);
case 31: memcpy_avx_16(dd - 31, ss - 31); memcpy_avx_16(dd - 16, ss - 16); break;
case 160: memcpy_avx_128(dd - 160, ss - 160);
case 32: memcpy_avx_32(dd - 32, ss - 32); break;
case 161: memcpy_avx_128(dd - 161, ss - 161);
case 33: memcpy_avx_32(dd - 33, ss - 33); dd[-1] = ss[-1]; break;
case 162: memcpy_avx_128(dd - 162, ss - 162);
case 34: memcpy_avx_32(dd - 34, ss - 34); *((uint16_t*)(dd - 2)) = *((uint16_t*)(ss - 2)); break;
case 163: memcpy_avx_128(dd - 163, ss - 163);
case 35: memcpy_avx_32(dd - 35, ss - 35); *((uint32_t*)(dd - 4)) = *((uint32_t*)(ss - 4)); break;
case 164: memcpy_avx_128(dd - 164, ss - 164);
case 36: memcpy_avx_32(dd - 36, ss - 36); *((uint32_t*)(dd - 4)) = *((uint32_t*)(ss - 4)); break;
case 165: memcpy_avx_128(dd - 165, ss - 165);
case 37: memcpy_avx_32(dd - 37, ss - 37); *((uint64_t*)(dd - 8)) = *((uint64_t*)(ss - 8)); break;
case 166: memcpy_avx_128(dd - 166, ss - 166);
case 38: memcpy_avx_32(dd - 38, ss - 38); *((uint64_t*)(dd - 8)) = *((uint64_t*)(ss - 8)); break;
case 167: memcpy_avx_128(dd - 167, ss - 167);
case 39: memcpy_avx_32(dd - 39, ss - 39); *((uint64_t*)(dd - 8)) = *((uint64_t*)(ss - 8)); break;
case 168: memcpy_avx_128(dd - 168, ss - 168);
case 40: memcpy_avx_32(dd - 40, ss - 40); *((uint64_t*)(dd - 8)) = *((uint64_t*)(ss - 8)); break;
case 169: memcpy_avx_128(dd - 169, ss - 169);
case 41: memcpy_avx_32(dd - 41, ss - 41); memcpy_avx_16(dd - 16, ss - 16); break;
case 170: memcpy_avx_128(dd - 170, ss - 170);
case 42: memcpy_avx_32(dd - 42, ss - 42); memcpy_avx_16(dd - 16, ss - 16); break;
case 171: memcpy_avx_128(dd - 171, ss - 171);
case 43: memcpy_avx_32(dd - 43, ss - 43); memcpy_avx_16(dd - 16, ss - 16); break;
case 172: memcpy_avx_128(dd - 172, ss - 172);
case 44: memcpy_avx_32(dd - 44, ss - 44); memcpy_avx_16(dd - 16, ss - 16); break;
case 173: memcpy_avx_128(dd - 173, ss - 173);
case 45: memcpy_avx_32(dd - 45, ss - 45); memcpy_avx_16(dd - 16, ss - 16); break;
case 174: memcpy_avx_128(dd - 174, ss - 174);
case 46: memcpy_avx_32(dd - 46, ss - 46); memcpy_avx_16(dd - 16, ss - 16); break;
case 175: memcpy_avx_128(dd - 175, ss - 175);
case 47: memcpy_avx_32(dd - 47, ss - 47); memcpy_avx_16(dd - 16, ss - 16); break;
case 176: memcpy_avx_128(dd - 176, ss - 176);
case 48: memcpy_avx_32(dd - 48, ss - 48); memcpy_avx_16(dd - 16, ss - 16); break;
case 177: memcpy_avx_128(dd - 177, ss - 177);
case 49: memcpy_avx_32(dd - 49, ss - 49); memcpy_avx_32(dd - 32, ss - 32); break;
case 178: memcpy_avx_128(dd - 178, ss - 178);
case 50: memcpy_avx_32(dd - 50, ss - 50); memcpy_avx_32(dd - 32, ss - 32); break;
case 179: memcpy_avx_128(dd - 179, ss - 179);
case 51: memcpy_avx_32(dd - 51, ss - 51); memcpy_avx_32(dd - 32, ss - 32); break;
case 180: memcpy_avx_128(dd - 180, ss - 180);
case 52: memcpy_avx_32(dd - 52, ss - 52); memcpy_avx_32(dd - 32, ss - 32); break;
case 181: memcpy_avx_128(dd - 181, ss - 181);
case 53: memcpy_avx_32(dd - 53, ss - 53); memcpy_avx_32(dd - 32, ss - 32); break;
case 182: memcpy_avx_128(dd - 182, ss - 182);
case 54: memcpy_avx_32(dd - 54, ss - 54); memcpy_avx_32(dd - 32, ss - 32); break;
case 183: memcpy_avx_128(dd - 183, ss - 183);
case 55: memcpy_avx_32(dd - 55, ss - 55); memcpy_avx_32(dd - 32, ss - 32); break;
case 184: memcpy_avx_128(dd - 184, ss - 184);
case 56: memcpy_avx_32(dd - 56, ss - 56); memcpy_avx_32(dd - 32, ss - 32); break;
case 185: memcpy_avx_128(dd - 185, ss - 185);
case 57: memcpy_avx_32(dd - 57, ss - 57); memcpy_avx_32(dd - 32, ss - 32); break;
case 186: memcpy_avx_128(dd - 186, ss - 186);
case 58: memcpy_avx_32(dd - 58, ss - 58); memcpy_avx_32(dd - 32, ss - 32); break;
case 187: memcpy_avx_128(dd - 187, ss - 187);
case 59: memcpy_avx_32(dd - 59, ss - 59); memcpy_avx_32(dd - 32, ss - 32); break;
case 188: memcpy_avx_128(dd - 188, ss - 188);
case 60: memcpy_avx_32(dd - 60, ss - 60); memcpy_avx_32(dd - 32, ss - 32); break;
case 189: memcpy_avx_128(dd - 189, ss - 189);
case 61: memcpy_avx_32(dd - 61, ss - 61); memcpy_avx_32(dd - 32, ss - 32); break;
case 190: memcpy_avx_128(dd - 190, ss - 190);
case 62: memcpy_avx_32(dd - 62, ss - 62); memcpy_avx_32(dd - 32, ss - 32); break;
case 191: memcpy_avx_128(dd - 191, ss - 191);
case 63: memcpy_avx_32(dd - 63, ss - 63); memcpy_avx_32(dd - 32, ss - 32); break;
case 192: memcpy_avx_128(dd - 192, ss - 192);
case 64: memcpy_avx_64(dd - 64, ss - 64); break;
case 193: memcpy_avx_128(dd - 193, ss - 193);
case 65: memcpy_avx_64(dd - 65, ss - 65); dd[-1] = ss[-1]; break;
case 194: memcpy_avx_128(dd - 194, ss - 194);
case 66: memcpy_avx_64(dd - 66, ss - 66); *((uint16_t*)(dd - 2)) = *((uint16_t*)(ss - 2)); break;
case 195: memcpy_avx_128(dd - 195, ss - 195);
case 67: memcpy_avx_64(dd - 67, ss - 67); *((uint32_t*)(dd - 4)) = *((uint32_t*)(ss - 4)); break;
case 196: memcpy_avx_128(dd - 196, ss - 196);
case 68: memcpy_avx_64(dd - 68, ss - 68); *((uint32_t*)(dd - 4)) = *((uint32_t*)(ss - 4)); break;
case 197: memcpy_avx_128(dd - 197, ss - 197);
case 69: memcpy_avx_64(dd - 69, ss - 69); *((uint64_t*)(dd - 8)) = *((uint64_t*)(ss - 8)); break;
case 198: memcpy_avx_128(dd - 198, ss - 198);
case 70: memcpy_avx_64(dd - 70, ss - 70); *((uint64_t*)(dd - 8)) = *((uint64_t*)(ss - 8)); break;
case 199: memcpy_avx_128(dd - 199, ss - 199);
case 71: memcpy_avx_64(dd - 71, ss - 71); *((uint64_t*)(dd - 8)) = *((uint64_t*)(ss - 8)); break;
case 200: memcpy_avx_128(dd - 200, ss - 200);
case 72: memcpy_avx_64(dd - 72, ss - 72); *((uint64_t*)(dd - 8)) = *((uint64_t*)(ss - 8)); break;
case 201: memcpy_avx_128(dd - 201, ss - 201);
case 73: memcpy_avx_64(dd - 73, ss - 73); memcpy_avx_16(dd - 16, ss - 16); break;
case 202: memcpy_avx_128(dd - 202, ss - 202);
case 74: memcpy_avx_64(dd - 74, ss - 74); memcpy_avx_16(dd - 16, ss - 16); break;
case 203: memcpy_avx_128(dd - 203, ss - 203);
case 75: memcpy_avx_64(dd - 75, ss - 75); memcpy_avx_16(dd - 16, ss - 16); break;
case 204: memcpy_avx_128(dd - 204, ss - 204);
case 76: memcpy_avx_64(dd - 76, ss - 76); memcpy_avx_16(dd - 16, ss - 16); break;
case 205: memcpy_avx_128(dd - 205, ss - 205);
case 77: memcpy_avx_64(dd - 77, ss - 77); memcpy_avx_16(dd - 16, ss - 16); break;
case 206: memcpy_avx_128(dd - 206, ss - 206);
case 78: memcpy_avx_64(dd - 78, ss - 78); memcpy_avx_16(dd - 16, ss - 16); break;
case 207: memcpy_avx_128(dd - 207, ss - 207);
case 79: memcpy_avx_64(dd - 79, ss - 79); memcpy_avx_16(dd - 16, ss - 16); break;
case 208: memcpy_avx_128(dd - 208, ss - 208);
case 80: memcpy_avx_64(dd - 80, ss - 80); memcpy_avx_16(dd - 16, ss - 16); break;
case 209: memcpy_avx_128(dd - 209, ss - 209);
case 81: memcpy_avx_64(dd - 81, ss - 81); memcpy_avx_32(dd - 32, ss - 32); break;
case 210: memcpy_avx_128(dd - 210, ss - 210);
case 82: memcpy_avx_64(dd - 82, ss - 82); memcpy_avx_32(dd - 32, ss - 32); break;
case 211: memcpy_avx_128(dd - 211, ss - 211);
case 83: memcpy_avx_64(dd - 83, ss - 83); memcpy_avx_32(dd - 32, ss - 32); break;
case 212: memcpy_avx_128(dd - 212, ss - 212);
case 84: memcpy_avx_64(dd - 84, ss - 84); memcpy_avx_32(dd - 32, ss - 32); break;
case 213: memcpy_avx_128(dd - 213, ss - 213);
case 85: memcpy_avx_64(dd - 85, ss - 85); memcpy_avx_32(dd - 32, ss - 32); break;
case 214: memcpy_avx_128(dd - 214, ss - 214);
case 86: memcpy_avx_64(dd - 86, ss - 86); memcpy_avx_32(dd - 32, ss - 32); break;
case 215: memcpy_avx_128(dd - 215, ss - 215);
case 87: memcpy_avx_64(dd - 87, ss - 87); memcpy_avx_32(dd - 32, ss - 32); break;
case 216: memcpy_avx_128(dd - 216, ss - 216);
case 88: memcpy_avx_64(dd - 88, ss - 88); memcpy_avx_32(dd - 32, ss - 32); break;
case 217: memcpy_avx_128(dd - 217, ss - 217);
case 89: memcpy_avx_64(dd - 89, ss - 89); memcpy_avx_32(dd - 32, ss - 32); break;
case 218: memcpy_avx_128(dd - 218, ss - 218);
case 90: memcpy_avx_64(dd - 90, ss - 90); memcpy_avx_32(dd - 32, ss - 32); break;
case 219: memcpy_avx_128(dd - 219, ss - 219);
case 91: memcpy_avx_64(dd - 91, ss - 91); memcpy_avx_32(dd - 32, ss - 32); break;
case 220: memcpy_avx_128(dd - 220, ss - 220);
case 92: memcpy_avx_64(dd - 92, ss - 92); memcpy_avx_32(dd - 32, ss - 32); break;
case 221: memcpy_avx_128(dd - 221, ss - 221);
case 93: memcpy_avx_64(dd - 93, ss - 93); memcpy_avx_32(dd - 32, ss - 32); break;
case 222: memcpy_avx_128(dd - 222, ss - 222);
case 94: memcpy_avx_64(dd - 94, ss - 94); memcpy_avx_32(dd - 32, ss - 32); break;
case 223: memcpy_avx_128(dd - 223, ss - 223);
case 95: memcpy_avx_64(dd - 95, ss - 95); memcpy_avx_32(dd - 32, ss - 32); break;
case 224: memcpy_avx_128(dd - 224, ss - 224);
case 96: memcpy_avx_64(dd - 96, ss - 96); memcpy_avx_32(dd - 32, ss - 32); break;
case 225: memcpy_avx_128(dd - 225, ss - 225);
case 97: memcpy_avx_64(dd - 97, ss - 97); memcpy_avx_64(dd - 64, ss - 64); break;
case 226: memcpy_avx_128(dd - 226, ss - 226);
case 98: memcpy_avx_64(dd - 98, ss - 98); memcpy_avx_64(dd - 64, ss - 64); break;
case 227: memcpy_avx_128(dd - 227, ss - 227);
case 99: memcpy_avx_64(dd - 99, ss - 99); memcpy_avx_64(dd - 64, ss - 64); break;
case 228: memcpy_avx_128(dd - 228, ss - 228);
case 100: memcpy_avx_64(dd - 100, ss - 100); memcpy_avx_64(dd - 64, ss - 64); break;
case 229: memcpy_avx_128(dd - 229, ss - 229);
case 101: memcpy_avx_64(dd - 101, ss - 101); memcpy_avx_64(dd - 64, ss - 64); break;
case 230: memcpy_avx_128(dd - 230, ss - 230);
case 102: memcpy_avx_64(dd - 102, ss - 102); memcpy_avx_64(dd - 64, ss - 64); break;
case 231: memcpy_avx_128(dd - 231, ss - 231);
case 103: memcpy_avx_64(dd - 103, ss - 103); memcpy_avx_64(dd - 64, ss - 64); break;
case 232: memcpy_avx_128(dd - 232, ss - 232);
case 104: memcpy_avx_64(dd - 104, ss - 104); memcpy_avx_64(dd - 64, ss - 64); break;
case 233: memcpy_avx_128(dd - 233, ss - 233);
case 105: memcpy_avx_64(dd - 105, ss - 105); memcpy_avx_64(dd - 64, ss - 64); break;
case 234: memcpy_avx_128(dd - 234, ss - 234);
case 106: memcpy_avx_64(dd - 106, ss - 106); memcpy_avx_64(dd - 64, ss - 64); break;
case 235: memcpy_avx_128(dd - 235, ss - 235);
case 107: memcpy_avx_64(dd - 107, ss - 107); memcpy_avx_64(dd - 64, ss - 64); break;
case 236: memcpy_avx_128(dd - 236, ss - 236);
case 108: memcpy_avx_64(dd - 108, ss - 108); memcpy_avx_64(dd - 64, ss - 64); break;
case 237: memcpy_avx_128(dd - 237, ss - 237);
case 109: memcpy_avx_64(dd - 109, ss - 109); memcpy_avx_64(dd - 64, ss - 64); break;
case 238: memcpy_avx_128(dd - 238, ss - 238);
case 110: memcpy_avx_64(dd - 110, ss - 110); memcpy_avx_64(dd - 64, ss - 64); break;
case 239: memcpy_avx_128(dd - 239, ss - 239);
case 111: memcpy_avx_64(dd - 111, ss - 111); memcpy_avx_64(dd - 64, ss - 64); break;
case 240: memcpy_avx_128(dd - 240, ss - 240);
case 112: memcpy_avx_64(dd - 112, ss - 112); memcpy_avx_64(dd - 64, ss - 64); break;
case 241: memcpy_avx_128(dd - 241, ss - 241);
case 113: memcpy_avx_64(dd - 113, ss - 113); memcpy_avx_64(dd - 64, ss - 64); break;
case 242: memcpy_avx_128(dd - 242, ss - 242);
case 114: memcpy_avx_64(dd - 114, ss - 114); memcpy_avx_64(dd - 64, ss - 64); break;
case 243: memcpy_avx_128(dd - 243, ss - 243);
case 115: memcpy_avx_64(dd - 115, ss - 115); memcpy_avx_64(dd - 64, ss - 64); break;
case 244: memcpy_avx_128(dd - 244, ss - 244);
case 116: memcpy_avx_64(dd - 116, ss - 116); memcpy_avx_64(dd - 64, ss - 64); break;
case 245: memcpy_avx_128(dd - 245, ss - 245);
case 117: memcpy_avx_64(dd - 117, ss - 117); memcpy_avx_64(dd - 64, ss - 64); break;
case 246: memcpy_avx_128(dd - 246, ss - 246);
case 118: memcpy_avx_64(dd - 118, ss - 118); memcpy_avx_64(dd - 64, ss - 64); break;
case 247: memcpy_avx_128(dd - 247, ss - 247);
case 119: memcpy_avx_64(dd - 119, ss - 119); memcpy_avx_64(dd - 64, ss - 64); break;
case 248: memcpy_avx_128(dd - 248, ss - 248);
case 120: memcpy_avx_64(dd - 120, ss - 120); memcpy_avx_64(dd - 64, ss - 64); break;
case 249: memcpy_avx_128(dd - 249, ss - 249);
case 121: memcpy_avx_64(dd - 121, ss - 121); memcpy_avx_64(dd - 64, ss - 64); break;
case 250: memcpy_avx_128(dd - 250, ss - 250);
case 122: memcpy_avx_64(dd - 122, ss - 122); memcpy_avx_64(dd - 64, ss - 64); break;
case 251: memcpy_avx_128(dd - 251, ss - 251);
case 123: memcpy_avx_64(dd - 123, ss - 123); memcpy_avx_64(dd - 64, ss - 64); break;
case 252: memcpy_avx_128(dd - 252, ss - 252);
case 124: memcpy_avx_64(dd - 124, ss - 124); memcpy_avx_64(dd - 64, ss - 64); break;
case 253: memcpy_avx_128(dd - 253, ss - 253);
case 125: memcpy_avx_64(dd - 125, ss - 125); memcpy_avx_64(dd - 64, ss - 64); break;
case 254: memcpy_avx_128(dd - 254, ss - 254);
case 126: memcpy_avx_64(dd - 126, ss - 126); memcpy_avx_64(dd - 64, ss - 64); break;
case 255: memcpy_avx_128(dd - 255, ss - 255);
case 127: memcpy_avx_64(dd - 127, ss - 127); memcpy_avx_64(dd - 64, ss - 64); break;
case 256: memcpy_avx_256(dd - 256, ss - 256); break;
}
return dst;
}
//---------------------------------------------------------------------
// main routine
//---------------------------------------------------------------------
static void* memcpy_fast(void *destination, const void *source, size_t size)
{
unsigned char *dst = (unsigned char*)destination;
const unsigned char *src = (const unsigned char*)source;
static size_t cachesize = 0x200000; // L3-cache size
size_t padding;
// small memory copy
if (size <= 256) {
memcpy_tiny(dst, src, size);
_mm256_zeroupper();
return destination;
}
// align destination to 16 bytes boundary
padding = (32 - (((size_t)dst) & 31)) & 31;
#if 0
if (padding > 0) {
__m256i head = _mm256_loadu_si256((const __m256i*)src);
_mm256_storeu_si256((__m256i*)dst, head);
dst += padding;
src += padding;
size -= padding;
}
#else
__m256i head = _mm256_loadu_si256((const __m256i*)src);
_mm256_storeu_si256((__m256i*)dst, head);
dst += padding;
src += padding;
size -= padding;
#endif
// medium size copy
if (size <= cachesize) {
__m256i c0, c1, c2, c3, c4, c5, c6, c7;
for (; size >= 256; size -= 256) {
c0 = _mm256_loadu_si256(((const __m256i*)src) + 0);
c1 = _mm256_loadu_si256(((const __m256i*)src) + 1);
c2 = _mm256_loadu_si256(((const __m256i*)src) + 2);
c3 = _mm256_loadu_si256(((const __m256i*)src) + 3);
c4 = _mm256_loadu_si256(((const __m256i*)src) + 4);
c5 = _mm256_loadu_si256(((const __m256i*)src) + 5);
c6 = _mm256_loadu_si256(((const __m256i*)src) + 6);
c7 = _mm256_loadu_si256(((const __m256i*)src) + 7);
_mm_prefetch((const char*)(src + 512), _MM_HINT_NTA);
src += 256;
_mm256_storeu_si256((((__m256i*)dst) + 0), c0);
_mm256_storeu_si256((((__m256i*)dst) + 1), c1);
_mm256_storeu_si256((((__m256i*)dst) + 2), c2);
_mm256_storeu_si256((((__m256i*)dst) + 3), c3);
_mm256_storeu_si256((((__m256i*)dst) + 4), c4);
_mm256_storeu_si256((((__m256i*)dst) + 5), c5);
_mm256_storeu_si256((((__m256i*)dst) + 6), c6);
_mm256_storeu_si256((((__m256i*)dst) + 7), c7);
dst += 256;
}
}
else { // big memory copy
__m256i c0, c1, c2, c3, c4, c5, c6, c7;
/* __m256i c0, c1, c2, c3, c4, c5, c6, c7; */
_mm_prefetch((const char*)(src), _MM_HINT_NTA);
if ((((size_t)src) & 31) == 0) { // source aligned
for (; size >= 256; size -= 256) {
c0 = _mm256_load_si256(((const __m256i*)src) + 0);
c1 = _mm256_load_si256(((const __m256i*)src) + 1);
c2 = _mm256_load_si256(((const __m256i*)src) + 2);
c3 = _mm256_load_si256(((const __m256i*)src) + 3);
c4 = _mm256_load_si256(((const __m256i*)src) + 4);
c5 = _mm256_load_si256(((const __m256i*)src) + 5);
c6 = _mm256_load_si256(((const __m256i*)src) + 6);
c7 = _mm256_load_si256(((const __m256i*)src) + 7);
_mm_prefetch((const char*)(src + 512), _MM_HINT_NTA);
src += 256;
_mm256_stream_si256((((__m256i*)dst) + 0), c0);
_mm256_stream_si256((((__m256i*)dst) + 1), c1);
_mm256_stream_si256((((__m256i*)dst) + 2), c2);
_mm256_stream_si256((((__m256i*)dst) + 3), c3);
_mm256_stream_si256((((__m256i*)dst) + 4), c4);
_mm256_stream_si256((((__m256i*)dst) + 5), c5);
_mm256_stream_si256((((__m256i*)dst) + 6), c6);
_mm256_stream_si256((((__m256i*)dst) + 7), c7);
dst += 256;
}
}
else { // source unaligned
for (; size >= 256; size -= 256) {
c0 = _mm256_loadu_si256(((const __m256i*)src) + 0);
c1 = _mm256_loadu_si256(((const __m256i*)src) + 1);
c2 = _mm256_loadu_si256(((const __m256i*)src) + 2);
c3 = _mm256_loadu_si256(((const __m256i*)src) + 3);
c4 = _mm256_loadu_si256(((const __m256i*)src) + 4);
c5 = _mm256_loadu_si256(((const __m256i*)src) + 5);
c6 = _mm256_loadu_si256(((const __m256i*)src) + 6);
c7 = _mm256_loadu_si256(((const __m256i*)src) + 7);
_mm_prefetch((const char*)(src + 512), _MM_HINT_NTA);
src += 256;
_mm256_stream_si256((((__m256i*)dst) + 0), c0);
_mm256_stream_si256((((__m256i*)dst) + 1), c1);
_mm256_stream_si256((((__m256i*)dst) + 2), c2);
_mm256_stream_si256((((__m256i*)dst) + 3), c3);
_mm256_stream_si256((((__m256i*)dst) + 4), c4);
_mm256_stream_si256((((__m256i*)dst) + 5), c5);
_mm256_stream_si256((((__m256i*)dst) + 6), c6);
_mm256_stream_si256((((__m256i*)dst) + 7), c7);
dst += 256;
}
}
_mm_sfence();
}
memcpy_tiny(dst, src, size);
_mm256_zeroupper();
return destination;
}
#endif
......@@ -18,8 +18,8 @@ public:
static int32_t RecordStoreDuration ;
static int32_t RecordFlag;
static int32_t OpenOMP ;
static int32_t TimeoutFrames ;
static int32_t SdiOutWaitNums;
static int32_t TimeoutFrames ; //mask 贴图的超时时间 针对如果未收到算法给的结束帧情况使用
static int32_t SecondSdiOutWaitNums; //sdi输出的二次确认功能保留贴图开始的前多少帧
static bool HaveBlackDataFlag;
static int32_t DrawFlag;
......@@ -40,7 +40,7 @@ public:
static uint32_t ReplayForward;
static uint32_t ReplayDeley;
static uint32_t SDIOutputWaitNums;
static uint32_t RTSDIOutputWaitNums; //实时输出的缓存帧大小 一般5帧
static int32_t CropFlag;
static int32_t CropX;
static int32_t CropY;
......
......@@ -423,7 +423,7 @@ void DeckLinkInputPage::ObjectNameChanged(const QString& newName)
//connect(Capture.get(), SIGNAL(PushFrame(std::shared_ptr<videoFrameData>)), NDIOutput.get(), SLOT(AddFrame(std::shared_ptr<videoFrameData>)));
connect(Replay.get(), SIGNAL(PushFrame(std::shared_ptr<videoFrameData>)), NDIOutput.get(), SLOT(AddFrame(std::shared_ptr<videoFrameData>)), Qt::DirectConnection);
connect(Capture.get(), SIGNAL(PushFrameToReplay(std::shared_ptr<videoFrameData>)), Replay.get(), SLOT(addFrame(std::shared_ptr<videoFrameData>))/*, Qt::DirectConnection*/);
connect(Capture.get(), SIGNAL(PushFrameToReplay(std::shared_ptr<videoFrameData>)), Replay.get(), SLOT(addFrame(std::shared_ptr<videoFrameData>)), Qt::DirectConnection);
connect(udpServer.get(), SIGNAL(SendMsg(QByteArray)),this, SLOT(RecvMsg(QByteArray)));
//NDIOutput->moveToThread(Capture.get());
//connect(SelectedDevice.Get(), SIGNAL(ArrivedFrame(ComPtr<IDeckLinkVideoInputFrame>)), NDIOutput.get(), SLOT(AddFrame(ComPtr<IDeckLinkVideoInputFrame>)));
......
......@@ -245,13 +245,13 @@ bool DeckLinkOutputDevice::StartPlayback(BMDDisplayMode displayMode, bool enable
/*scheduleVideoFramesThread = std::thread(&DeckLinkOutputDevice::scheduleVideoFramesFunc, this);
scheduleAudioFramesThread = std::thread(&DeckLinkOutputDevice::scheduleAudioFramesFuncDeley, this);*/
if (Settings::SdiOutWaitNums)
if (Settings::SecondSdiOutWaitNums)
{
scheduleVideoFramesThread = std::thread(&DeckLinkOutputDevice::scheduleVideoFramesWaitFunc, this);
}
else
{
if(Settings::SDIOutputWaitNums) scheduleVideoFramesThread = std::thread(&DeckLinkOutputDevice::scheduleVideoFramesRealTimeFunc, this);
if(Settings::RTSDIOutputWaitNums) scheduleVideoFramesThread = std::thread(&DeckLinkOutputDevice::scheduleVideoFramesRealTimeFunc, this);
else scheduleVideoFramesThread = std::thread(&DeckLinkOutputDevice::scheduleVideoFramesNoDeleyFunc, this);
}
......@@ -503,10 +503,10 @@ bool DeckLinkOutputDevice::getReferenceSignalMode(BMDDisplayMode* mode)
while (true)
{
if (state == PlaybackState::Stopping) break;
std::shared_ptr<VideoFrameWithMask> outputFrame;
std::shared_ptr<VideoFrameWithMask> outputFrame = nullptr;
std::vector<std::shared_ptr<VideoFrameWithMask>> frames;
if (outputMaskVideoFrameQueue.WaitFor(Settings::SdiOutWaitNums))
if (outputMaskVideoFrameQueue.WaitFor(Settings::SecondSdiOutWaitNums))
{
outputMaskVideoFrameQueue.Pop(outputFrame);
outputMaskVideoFrameDeque.Pop();
......@@ -839,7 +839,7 @@ bool DeckLinkOutputDevice::getReferenceSignalMode(BMDDisplayMode* mode)
//bool send_zoom_frame_flag = false;
std::shared_ptr<VideoFrameWithMask> wait_zoom_frame = nullptr;
HRESULT ret = S_OK;
int wait_size = Settings::SDIOutputWaitNums;
int wait_size = Settings::RTSDIOutputWaitNums;
while (true)
{
......@@ -1544,7 +1544,7 @@ void DeckLinkOutputDevice::AddZoomFrame(std::shared_ptr<VideoFrameWithMask> fram
{
if (frame && (frame->data_ || (frame->pImage && (frame->pImage->data || frame->pImage->uyvy_data))))
{
if(Settings::SdiOutWaitNums)
if(Settings::SecondSdiOutWaitNums)
{
auto start_time = frame->start_tm_;
//std::lock_guard<std::mutex> locker(sdi_clear_mutex);
......@@ -1572,7 +1572,7 @@ void DeckLinkOutputDevice::AddZoomFrame(std::shared_ptr<VideoFrameWithMask> fram
void DeckLinkOutputDevice::AddVideoFrameMask(std::shared_ptr<VideoFrameWithMask> frame)
{
//auto t1 = TimeMilliSecond();
if (outputMaskVideoFrameQueue.Size() > (50 * 3 + Settings::SdiOutWaitNums) || output_video_frame_map.Size() > (50 * 3 + Settings::SdiOutWaitNums))
if (outputMaskVideoFrameQueue.Size() > (50 * 3 + Settings::SecondSdiOutWaitNums) || output_video_frame_map.Size() > (50 * 3 + Settings::SecondSdiOutWaitNums))
{
if(outputMaskVideoFrameQueue.Size()) outputMaskVideoFrameQueue.Reset();
if (output_video_frame_map.Size()) output_video_frame_map.Clear();
......@@ -1586,31 +1586,23 @@ void DeckLinkOutputDevice::AddVideoFrameMask(std::shared_ptr<VideoFrameWithMask>
//qDebug() << "AddVideoFrameMask:" << "timestamp:" << frame->sequenceNum_ << GetCurrDateTimeStr() << "\n";
}
if (Settings::SDIOutputWaitNums)
{
if (frame->fmt_ == bmdFormat8BitYUV)
{
output_video_frame_map.Insert(frame->timestamp_, frame);
}
else if (frame->fmt_ == bmdFormat8BitBGRA)
{
if (p_dispatch_queue)
if (Settings::SecondSdiOutWaitNums)
{
DispatchQueue* dispath = (DispatchQueue*)p_dispatch_queue;
dispath->dispatch(std::bind(&DeckLinkOutputDevice::BGRAToUYVY, this, std::placeholders::_1), frame);
}
}
outputMaskVideoFrameQueue.Push(frame);
outputMaskVideoFrameDeque.PushBack(frame);
}
else
{
if (frame->fmt_ == bmdFormat8BitYUV)
{
outputMaskVideoFrameQueue.Push(frame);
if (Settings::SdiOutWaitNums) outputMaskVideoFrameDeque.PushBack(frame);
if(Settings::RTSDIOutputWaitNums) output_video_frame_map.Insert(frame->timestamp_, frame);
else outputMaskVideoFrameQueue.Push(frame);
}
}
else if (frame->fmt_ == bmdFormat8BitBGRA)
{
if (Settings::SdiOutWaitNums)
if (Settings::SecondSdiOutWaitNums)
{
if (frame->flag_ == BS_START)
{
......@@ -1618,10 +1610,8 @@ void DeckLinkOutputDevice::AddVideoFrameMask(std::shared_ptr<VideoFrameWithMask>
else
{
auto tmp_map = new std::map<qint64, std::shared_ptr<VideoFrameWithMask>>();
/*auto tmp_tm = frame->timestamp_ - SdiOutWaitNums;
tmp_tm = (tmp_tm < 0 ? 0 : tmp_tm);*/
int size = outputMaskVideoFrameDeque.Size();
int nums = (size > Settings::SdiOutWaitNums ? Settings::SdiOutWaitNums : size);
int nums = (size > Settings::SecondSdiOutWaitNums ? Settings::SecondSdiOutWaitNums : size);
std::vector<std::shared_ptr<VideoFrameWithMask>> tmp_vec;
outputMaskVideoFrameDeque.Put(nums, tmp_vec);
for (auto data : tmp_vec)
......@@ -1637,26 +1627,83 @@ void DeckLinkOutputDevice::AddVideoFrameMask(std::shared_ptr<VideoFrameWithMask>
}
}
}
/*while (masked_map.size() > 2)
{
auto itor = masked_map.begin();
if (itor->second)
{
itor->second->clear();
delete itor->second;
}
masked_map.erase(itor->first);
}*/
if (p_dispatch_queue)
{
DispatchQueue* dispath = (DispatchQueue*)p_dispatch_queue;
dispath->dispatch(std::bind(&DeckLinkOutputDevice::BGRAToUYVY, this, std::placeholders::_1), frame);
}
}
}
//if (Settings::SDIOutputWaitNums)
//{
// if (frame->fmt_ == bmdFormat8BitYUV)
// {
// output_video_frame_map.Insert(frame->timestamp_, frame);
// }
// else if (frame->fmt_ == bmdFormat8BitBGRA)
// {
// if (p_dispatch_queue)
// {
// DispatchQueue* dispath = (DispatchQueue*)p_dispatch_queue;
// dispath->dispatch(std::bind(&DeckLinkOutputDevice::BGRAToUYVY, this, std::placeholders::_1), frame);
// }
// }
//}
//else
//{
// if (frame->fmt_ == bmdFormat8BitYUV)
// {
// outputMaskVideoFrameQueue.Push(frame);
// if (Settings::SdiOutWaitNums) outputMaskVideoFrameDeque.PushBack(frame);
// }
// else if (frame->fmt_ == bmdFormat8BitBGRA)
// {
// if (Settings::SdiOutWaitNums)
// {
// if (frame->flag_ == BS_START)
// {
// if (masked_map.find(frame->timestamp_) != masked_map.end())return;
// else
// {
// auto tmp_map = new std::map<qint64, std::shared_ptr<VideoFrameWithMask>>();
// /*auto tmp_tm = frame->timestamp_ - SdiOutWaitNums;
// tmp_tm = (tmp_tm < 0 ? 0 : tmp_tm);*/
// int size = outputMaskVideoFrameDeque.Size();
// int nums = (size > Settings::SdiOutWaitNums ? Settings::SdiOutWaitNums : size);
// std::vector<std::shared_ptr<VideoFrameWithMask>> tmp_vec;
// outputMaskVideoFrameDeque.Put(nums, tmp_vec);
// for (auto data : tmp_vec)
// {
// if (data)
// {
// data->start_tm_ = frame->timestamp_;
// tmp_map->insert({ data->timestamp_,data });
// }
// }
// masked_map.insert({ frame->timestamp_,tmp_map });
// }
// }
// }
// /*while (masked_map.size() > 2)
// {
// auto itor = masked_map.begin();
// if (itor->second)
// {
// itor->second->clear();
// delete itor->second;
// }
// masked_map.erase(itor->first);
// }*/
// if (p_dispatch_queue)
// {
// DispatchQueue* dispath = (DispatchQueue*)p_dispatch_queue;
// dispath->dispatch(std::bind(&DeckLinkOutputDevice::BGRAToUYVY, this, std::placeholders::_1), frame);
// }
// }
//}
}
#else
......@@ -1774,7 +1821,7 @@ void DeckLinkOutputDevice::BGRAToUYVY(const std::shared_ptr<VideoFrameWithMask>&
std::make_shared<VideoFrameWithMask>(src_w,src_h,tm,image->sequenceNum_,uyvy,bmdFormat8BitYUV,image->flag_,image->meta_, start_time);
//sort_map_.insert({ image->timestamp_ ,video_frame });
if(Settings::SdiOutWaitNums)
if(Settings::SecondSdiOutWaitNums)
{
std::lock_guard<std::mutex> locker(sdi_clear_mutex);
auto itor = masked_map.find(start_time);
......@@ -1786,7 +1833,7 @@ void DeckLinkOutputDevice::BGRAToUYVY(const std::shared_ptr<VideoFrameWithMask>&
}
else
{
if (Settings::SDIOutputWaitNums)
if (Settings::RTSDIOutputWaitNums)
{
output_video_frame_map.Insert(video_frame->timestamp_, video_frame);
}
......
......@@ -233,7 +233,7 @@ void NDIOutputThread::run()
}
else if (timePlusFlag)
{
if (VideoMaskQueue.WaitFor(Settings::SDIOutputWaitNums))
if (VideoMaskQueue.WaitFor(Settings::RTSDIOutputWaitNums))
{
//if (!frame_mask ||(frame_mask->mask_flag && !frame_mask->data_)) continue;
VideoMaskQueue.Pop(frame_mask);
......
......@@ -117,7 +117,8 @@ void ProcessMaskThread::process()
std::shared_ptr<videoFrameData> image = nullptr;
if (taskImageQueue.WaitFor(min_size - 1) /*&& taskImageQueue.Size() >= min_size*/)//有横屏数据了
{
if (taskImageQueue.Front(image))
/*auto ret = taskImageQueue.Front(image);
if (ret)
{
taskImageQueue.Pop();
if (!image || !image->data)
......@@ -125,6 +126,10 @@ void ProcessMaskThread::process()
continue;
}
}
else continue;*/
taskImageQueue.Pop(image);
if (!image || !image->data) continue;
timestamp = (image->replaySeq ? image->replaySeq : image->timestamp);
//auto sequence = image->sequenceNum;
//seqnum = sequence;
......@@ -446,7 +451,7 @@ void ProcessMaskThread::workMaskModify(const std::shared_ptr<videoFrameData>& pI
{
status = BS_START;
start_time = timestamp;
QString msg = "{\"type\":\"MaskMsg\",\"data\":{\"status\":" + QString::number(status) + ",\"timestamp\":" + QString::number(timestamp) + "}}";
QString msg = "{\"signal\":\"MaskMsg\",\"data\":{\"status\":" + QString::number(status) + ",\"timestamp\":" + QString::number(timestamp) + "}}";
UdpSend::GetInstance().SendUdpMsg(msg, Settings::UIIpAddr, QString::number(Settings::UIUdpPort));
//qDebug() << "start copy crop pic.......\n";
}
......@@ -457,7 +462,7 @@ void ProcessMaskThread::workMaskModify(const std::shared_ptr<videoFrameData>& pI
if (status == BS_END)
{
//send msg to ui with udp-protocol
QString msg = "{\"type\":\"MaskMsg\",\"data\":{\"status\":" + QString::number(status) + ",\"timestamp\":" + QString::number(timestamp) + "}}";
QString msg = "{\"signal\":\"MaskMsg\",\"data\":{\"status\":" + QString::number(status) + ",\"timestamp\":" + QString::number(timestamp) + "}}";
UdpSend::GetInstance().SendUdpMsg(msg, Settings::UIIpAddr, QString::number(Settings::UIUdpPort));
}
}
......
......@@ -114,7 +114,7 @@ void ReplayThread::run()
qint32 errNo = (itor_end == storeVideoMap.end() ? 2 : (itor_begin == storeVideoMap.end() ? 1 : 3));
QString msg = "{\"type\":\"ReplayResp\",\"data\":{\"inTime\":" + QString::number(replay_params.start_time) +",\"outTime\":" + QString::number(replay_params.end_time) +
QString msg = "{\"signal\":\"ReplayResp\",\"data\":{\"inTime\":" + QString::number(replay_params.start_time) +",\"outTime\":" + QString::number(replay_params.end_time) +
",\"status\":0,\"errNo\":" + QString::number(errNo) +"}}";
UdpSend::GetInstance().SendUdpMsg(msg, Settings::UIIpAddr, QString::number(Settings::UIUdpPort));
qDebug() << "replay fail,errno:"<<errNo << "\n";
......@@ -165,7 +165,7 @@ void ReplayThread::run()
std::shared_ptr<videoFrameData> frame = nullptr;
frame = (resend_frame ? (resend_start ? resend_frame : replayVideoVec[replay_position] ) : replayVideoVec[replay_position]);
frame->replaySeq = current_seq;
emit PushFrame(frame);
if(frame) emit PushFrame(frame);
if ((!resend_start || !resend_frame) && replay_position < (replayVideoVec.size() - 1)) replay_position++;
if (resend_frame)
......
......@@ -21,7 +21,7 @@ std::map<qint32, qint32> map_scale_mode;
int main_ver = 2;
int mid_ver = 1;
int small_ver = 0;
int small_ver = 1;
//int ReplayStoreTime = 10000;//单位ms
//int FrontDeleyTime = 3000;//单位ms
......@@ -778,12 +778,12 @@ void MomentaMedia::ReadSettings()
Settings::TimeoutFrames = settings.value("TIMEOUTFRAMES").toInt();
if (Settings::TimeoutFrames <= 0) Settings::TimeoutFrames = 50;
Settings::SdiOutWaitNums = settings.value("DELEYSDINUMS",100).toInt();
Settings::SecondSdiOutWaitNums = settings.value("SECONDDELEYSDINUMS",100).toInt();
Settings::ReplayForward = settings.value("REPLAY_START_FORWARD", 5).toInt();
Settings::ReplayDeley = settings.value("REPLAY_END_DELEY", 5).toInt();
Settings::SDIOutputWaitNums = settings.value("SDIOUTPUTWAITNUMS", 5).toInt();
Settings::RTSDIOutputWaitNums = settings.value("RTSDIOUTPUTWAITNUMS", 5).toInt();
Settings::CropFlag = settings.value("CROPRECORD", 0).toInt();
Settings::CropX = settings.value("CROP_X", 0).toInt();
......
......@@ -15,7 +15,7 @@ int32_t Settings::RecordStoreDuration = 10000;
int32_t Settings::RecordFlag = 0;
int32_t Settings::OpenOMP = 1;
int32_t Settings::TimeoutFrames = 50;
int32_t Settings::SdiOutWaitNums = 100;
int32_t Settings::SecondSdiOutWaitNums = 100;
bool Settings::HaveBlackDataFlag = false;
int32_t Settings::DrawFlag = 0;
......@@ -37,7 +37,7 @@ uint32_t Settings::ZoomOutWaitCnt = 0;
uint32_t Settings::ReplayForward = 0;
uint32_t Settings::ReplayDeley = 0;
uint32_t Settings::SDIOutputWaitNums = 0;
uint32_t Settings::RTSDIOutputWaitNums = 0;
int32_t Settings::CropFlag = 0;
int32_t Settings::CropX = 0;
......
No preview for this file type
No preview for this file type
......@@ -4,13 +4,14 @@ available device "DeckLink 8K Pro (1)"
available device "DeckLink 8K Pro (2)"
available device "DeckLink 8K Pro (3)"
available device "DeckLink 8K Pro (4)"
"2024-05-16 14:30:21.752" index: 0 DeckLinkInputDevice get video frame No input source 80000000 ------------
"2024-05-16 14:30:21.811" index: 0 DeckLinkInputDevice get video frame No input source 80000000 ------------
"2024-05-16 14:30:21.831" index: 0 DeckLinkInputDevice get video frame No input source 80000000 ------------
"2024-05-16 14:30:21.851" index: 0 DeckLinkInputDevice get video frame No input source 80000000 ------------
"2024-05-16 14:30:21.871" index: 0 DeckLinkInputDevice get video frame No input source 80000000 ------------
"2024-05-16 14:30:21.891" index: 0 DeckLinkInputDevice get video frame No input source 80000000 ------------
"2024-05-16 14:30:21.911" index: 0 DeckLinkInputDevice get video frame No input source 80000000 ------------
"2024-05-16 14:30:21.931" index: 0 DeckLinkInputDevice get video frame No input source 80000000 ------------
"2024-05-16 14:30:21.989" index: 0 DeckLinkInputDevice get video frame No input source 80000000 ------------
"2024-05-16 14:30:23.034" decklink input fps 51
"2024-05-17 19:02:03.216" index: 0 DeckLinkInputDevice get video frame No input source 80000000 ------------
"2024-05-17 19:02:03.236" index: 0 DeckLinkInputDevice get video frame No input source 80000000 ------------
"2024-05-17 19:02:03.256" index: 0 DeckLinkInputDevice get video frame No input source 80000000 ------------
"2024-05-17 19:02:03.276" index: 0 DeckLinkInputDevice get video frame No input source 80000000 ------------
"2024-05-17 19:02:03.296" index: 0 DeckLinkInputDevice get video frame No input source 80000000 ------------
"2024-05-17 19:02:03.316" index: 0 DeckLinkInputDevice get video frame No input source 80000000 ------------
"2024-05-17 19:02:03.336" index: 0 DeckLinkInputDevice get video frame No input source 80000000 ------------
"2024-05-17 19:02:03.394" index: 0 DeckLinkInputDevice get video frame No input source 80000000 ------------
"2024-05-17 19:02:04.438" decklink input fps 51
"2024-05-17 19:02:05.439" decklink input fps 50
"2024-05-17 19:02:06.439" decklink input fps 50
......@@ -42,13 +42,13 @@ SDIONEFRAMEDURATION=50
RECORDFLAG=1
OPENOMP=1
TIMEOUTFRAMES=250
DELEYSDINUMS=0
SECONDDELEYSDINUMS=100
REPLAY_START_FORWARD=50
REPLAY_END_DELEY=50
SDIOUTPUTWAITNUMS=5
CROPRECORD=0c
RTSDIOUTPUTWAITNUMS=5
CROPRECORD=0
CROP_X=400
CROP_Y=1800
CROP_DIRECTION=3
UI_UDP_PORT=8100
UI_IP_ADDR=192.168.31.83
UI_IP_ADDR=127.0.0.1
......@@ -42,10 +42,10 @@ SDIONEFRAMEDURATION=50
RECORDFLAG=1
OPENOMP=1
TIMEOUTFRAMES=250
DELEYSDINUMS=0
SECONDDELEYSDINUMS=100
REPLAY_START_FORWARD=50
REPLAY_END_DELEY=50
SDIOUTPUTWAITNUMS=5
RTSDIOUTPUTWAITNUMS=5
CROPRECORD=0
CROP_X=400
CROP_Y=1800
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment