.gear/rules | 2 + .../tags/b5deaf3ce90c8f23c2e406cea65fa1699705c233 | 13 ++++ .gear/tags/list | 1 + bootstrap | 2 +- configure.ac | 4 +- libdvbcsa.spec | 78 ++++++++++++++++++++++ src/dvbcsa/dvbcsa.h | 12 +++- src/dvbcsa_algo.c | 2 +- src/dvbcsa_bs_algo.c | 2 +- src/dvbcsa_bs_altivec.h | 1 - src/dvbcsa_bs_block.c | 40 ++++++----- src/dvbcsa_bs_mmx.h | 3 +- src/dvbcsa_bs_sse.h | 21 +++--- src/dvbcsa_bs_stream.c | 18 ++--- src/dvbcsa_bs_uint32.h | 1 - src/dvbcsa_bs_uint64.h | 1 - test/testbsops.c | 7 -- 17 files changed, 152 insertions(+), 56 deletions(-) diff --git a/.gear/rules b/.gear/rules new file mode 100644 index 0000000..4fcfd99 --- /dev/null +++ b/.gear/rules @@ -0,0 +1,2 @@ +tar: @version@:. +diff: @version@:. . diff --git a/.gear/tags/b5deaf3ce90c8f23c2e406cea65fa1699705c233 b/.gear/tags/b5deaf3ce90c8f23c2e406cea65fa1699705c233 new file mode 100644 index 0000000..edab98b --- /dev/null +++ b/.gear/tags/b5deaf3ce90c8f23c2e406cea65fa1699705c233 @@ -0,0 +1,13 @@ +object 1fd44b1e296f63e30b8082d2669d32d6d49c9a3f +type commit +tag 1.1.0 +tagger Alexei Takaseev 1398704524 +0900 + +1.1.0 +-----BEGIN PGP SIGNATURE----- +Version: GnuPG v1 + +iEYEABECAAYFAlNeiYwACgkQ/bd21Jt2ne8WpwCgg5/RDa8QQcmaIx4sKQQcDlIR +cMAAniDg27dOl7JaXpvbxJeka7rrgwuF +=OUS6 +-----END PGP SIGNATURE----- diff --git a/.gear/tags/list b/.gear/tags/list new file mode 100644 index 0000000..9986114 --- /dev/null +++ b/.gear/tags/list @@ -0,0 +1 @@ +b5deaf3ce90c8f23c2e406cea65fa1699705c233 1.1.0 diff --git a/bootstrap b/bootstrap index 61ddb46..d06106b 100755 --- a/bootstrap +++ b/bootstrap @@ -32,7 +32,7 @@ aclocalflags="`sed -ne 's/^[ \t]*ACLOCAL_AMFLAGS[ \t]*=//p' Makefile.am`" # Check for automake amvers="no" -for v in "-1.11" "-1.10" "110" "-1.9" "19" "-1.8" "18" "-1.7" "17" "-1.6" "16" "-1.5" "15"; do +for v in "-1.14" "-1.11" "-1.10" "110" "-1.9" "19" "-1.8" "18" "-1.7" "17" "-1.6" "16" "-1.5" "15"; do if automake${v} --version >/dev/null 2>&1; then amvers="${v}" break diff --git a/configure.ac b/configure.ac index 742f631..ed8a1ad 100644 --- a/configure.ac +++ b/configure.ac @@ -6,7 +6,7 @@ AC_ARG_ENABLE(debug, AC_HELP_STRING(--enable-debug, [Enable debug]), enable_debu if test "$enable_debug" = "yes" ; then GCC_CFLAGS="$CFLAGS -g -DDVBCSA_DEBUG -D_XOPEN_SOURCE=600" else - GCC_CFLAGS="$CFLAGS -O2 -fexpensive-optimizations -funroll-loops -fomit-frame-pointer -D_XOPEN_SOURCE=600" + GCC_CFLAGS="$CFLAGS -O3 -fomit-frame-pointer -D_XOPEN_SOURCE=600" fi AC_ARG_ENABLE(uint32, AC_HELP_STRING(--enable-uint32, [Use native 32 bits integers for bitslice]), enable_uint32=$enableval, enable_uint32=no) @@ -16,7 +16,7 @@ AC_ARG_ENABLE(sse2, AC_HELP_STRING(--enable-sse2, [Use SSE2 for bitslice]), sse2 AC_ARG_ENABLE(altivec, AC_HELP_STRING(--enable-altivec, [Use AltiVec for bitslice]), altivec_debug=$enableval, enable_altivec=no) AM_INIT_AUTOMAKE(libdvbcsa, 1.1.0) -AM_CONFIG_HEADER(config.h) +AC_CONFIG_HEADERS(config.h) AC_PROG_CC diff --git a/libdvbcsa.spec b/libdvbcsa.spec new file mode 100644 index 0000000..85b1d29 --- /dev/null +++ b/libdvbcsa.spec @@ -0,0 +1,78 @@ +Name: libdvbcsa +Version: 1.1.0 +Release: alt4 + +Summary: DVB Common Scrambling Algorithm with encryption and decryption capabilities +License: GPLv2 +Group: System/Libraries +Url: http://www.videolan.org/developers/libdvbcsa.html +Packager: Alexei Takaseev + +Source: %name-%version.tar +Patch0: %name-%version-%release.patch + +%description +libdvbcsa is a free implementation of the DVB Common +Scrambling Algorithm - DVB/CSA - with encryption and +decryption capabilities. +Features + * Portability. This library has been successfully + tested on different processors with 32 bits, + 64 bits and 128 bits word width, little-endian + and big-endian bytes ordering. + * Performance. It comes in two flavors: a classical + single packet implementation and a faster parallel + bitslice implementation. + * The parallel implementation can take advantages + of MMX, SSE or Altivec instruction sets. Parallel + implementation can process Mpeg TS packets at + 300Mbps or more on recent processors. + * Freedom. libdvbcsa is released under the General + Public License, ensuring it will stay free, and used + only for free software products. + * Simplicity. The API comes with only 5 functions + fot the single packet implementation, and 6 functions + for the parallel bitslice implementation. + +%package devel +Summary: Development files for %name +Group: Development/C +Requires: %name = %version-%release + +%description devel +The %name-devel package contains libraries and header files for +developing applications that use %name. + + +%prep +%setup +%patch0 -p1 + +%build +./bootstrap +%configure --enable-sse2 +%make + +%install +%make_install DESTDIR="%buildroot" install + +%files +%doc COPYING README INSTALL AUTHORS NEWS +%_libdir/*.so.* + +%files devel +%_includedir/dvbcsa +%_libdir/libdvbcsa.so + +%changelog +* Sun Mar 01 2015 Alexei Takaseev 1.1.0-alt4 +- Fix C++ compilation using the library + +* Thu Oct 16 2014 Alexei Takaseev 1.1.0-alt3 +- Enable SSE2 + +* Sat Aug 30 2014 Alexei Takaseev 1.1.0-alt2 +- Disable SSE + +* Tue Apr 29 2014 Alexei Takaseev 1.1.0-alt1 +- Initial build for ALT Linux Sisyphus. diff --git a/src/dvbcsa/dvbcsa.h b/src/dvbcsa/dvbcsa.h index c8fec59..b4d29d1 100644 --- a/src/dvbcsa/dvbcsa.h +++ b/src/dvbcsa/dvbcsa.h @@ -27,6 +27,10 @@ #ifndef LIBDVBCSA_H_ #define LIBDVBCSA_H_ +#ifdef __cplusplus +extern "C" { +#endif + /* csa control word */ typedef unsigned char dvbcsa_cw_t[8]; @@ -38,7 +42,7 @@ typedef unsigned char dvbcsa_cw_t[8]; typedef struct dvbcsa_key_s dvbcsa_key_t; /** allocate a new csa key context */ -struct dvbcsa_key_s * dvbcsa_key_alloc(); +struct dvbcsa_key_s * dvbcsa_key_alloc(void); /** free a csa key context */ @@ -76,7 +80,7 @@ typedef struct dvbcsa_bs_key_s dvbcsa_bs_key_t; /** allocate a new csa bitslice key context */ -struct dvbcsa_bs_key_s * dvbcsa_bs_key_alloc(); +struct dvbcsa_bs_key_s * dvbcsa_bs_key_alloc(void); /** free a csa bitslice key context */ @@ -108,5 +112,9 @@ void dvbcsa_bs_encrypt(const struct dvbcsa_bs_key_s *key, const struct dvbcsa_bs_batch_s *pcks, unsigned int maxlen); +#ifdef __cplusplus +} +#endif + #endif diff --git a/src/dvbcsa_algo.c b/src/dvbcsa_algo.c index e24ed43..6e8de8c 100644 --- a/src/dvbcsa_algo.c +++ b/src/dvbcsa_algo.c @@ -70,7 +70,7 @@ void dvbcsa_encrypt (const struct dvbcsa_key_s *key, uint8_t *data, unsigned int #endif } -struct dvbcsa_key_s * dvbcsa_key_alloc() +struct dvbcsa_key_s * dvbcsa_key_alloc(void) { return malloc(sizeof (struct dvbcsa_key_s)); } diff --git a/src/dvbcsa_bs_algo.c b/src/dvbcsa_bs_algo.c index 7838990..bc8e61f 100644 --- a/src/dvbcsa_bs_algo.c +++ b/src/dvbcsa_bs_algo.c @@ -74,7 +74,7 @@ void dvbcsa_bs_encrypt(const struct dvbcsa_bs_key_s *key, BS_EMPTY (); // restore CPU multimedia state } -struct dvbcsa_bs_key_s * dvbcsa_bs_key_alloc() +struct dvbcsa_bs_key_s * dvbcsa_bs_key_alloc(void) { void *p; diff --git a/src/dvbcsa_bs_altivec.h b/src/dvbcsa_bs_altivec.h index 7a84dfa..8c1b608 100644 --- a/src/dvbcsa_bs_altivec.h +++ b/src/dvbcsa_bs_altivec.h @@ -86,7 +86,6 @@ static DVBCSA_INLINE dvbcsa_bs_word_t get_bs_val8( uint8_t val, dvbcsa_bs_word_t #define BS_AND(a, b) vec_and((a), (b)) #define BS_OR(a, b) vec_or((a), (b)) #define BS_XOR(a, b) vec_xor((a), (b)) -#define BS_XOREQ(a, b) { dvbcsa_bs_word_t *_t = &(a); *_t = vec_xor(*_t, (b)); } #define BS_NOT(a) vec_nor((a), (a)) #define SHVAL_1 BS_VAL8(01) diff --git a/src/dvbcsa_bs_block.c b/src/dvbcsa_bs_block.c index a3754fe..58f6ca9 100644 --- a/src/dvbcsa_bs_block.c +++ b/src/dvbcsa_bs_block.c @@ -26,22 +26,12 @@ #include "dvbcsa/dvbcsa.h" #include "dvbcsa_bs.h" -DVBCSA_INLINE static inline void -dvbcsa_bs_block_sbox(dvbcsa_bs_word_t *w) -{ - // table lookup, works one byte at a time - uint8_t *si = (uint8_t *)w; - int i; - - for (i = 0; i < BS_BATCH_BYTES; i++) - si[i] = dvbcsa_block_sbox[si[i]]; -} - +#define BS_XOREQ(a, b) do { dvbcsa_bs_word_t *_t = &(a); *_t = BS_XOR(*_t, (b)); } while (0) DVBCSA_INLINE static inline void dvbcsa_bs_block_decrypt_register (const dvbcsa_bs_word_t *block, dvbcsa_bs_word_t *r) { - int i, g; + int i, j, g; r += 8 * 56; @@ -54,9 +44,17 @@ dvbcsa_bs_block_decrypt_register (const dvbcsa_bs_word_t *block, dvbcsa_bs_word_ for (g = 0; g < 8; g++) { - dvbcsa_bs_word_t sbox_out = BS_XOR(block[i], r6_N[g]); + union { + dvbcsa_bs_word_t so; + uint8_t si[BS_BATCH_BYTES]; + } u; - dvbcsa_bs_block_sbox(&sbox_out); + u.so = BS_XOR(block[i], r6_N[g]); + + for (j = 0; j < BS_BATCH_BYTES; j++) + u.si[j] = dvbcsa_block_sbox[u.si[j]]; + + dvbcsa_bs_word_t sbox_out = u.so; // bit permutation @@ -118,7 +116,7 @@ void dvbcsa_bs_block_decrypt_batch(const struct dvbcsa_bs_key_s *key, DVBCSA_INLINE static inline void dvbcsa_bs_block_encrypt_register (const dvbcsa_bs_word_t *block, dvbcsa_bs_word_t *r) { - int i, g; + int i, j, g; // loop over kk[55]..kk[0] for (i = 0; i < 56; i++) @@ -129,9 +127,17 @@ dvbcsa_bs_block_encrypt_register (const dvbcsa_bs_word_t *block, dvbcsa_bs_word_ for (g = 0; g < 8; g++) { - dvbcsa_bs_word_t sbox_out = BS_XOR(block[i], r7_N[g]); + union { + dvbcsa_bs_word_t so; + uint8_t si[BS_BATCH_BYTES]; + } u; + + u.so = BS_XOR(block[i], r7_N[g]); + + for (j = 0; j < BS_BATCH_BYTES; j++) + u.si[j] = dvbcsa_block_sbox[u.si[j]]; - dvbcsa_bs_block_sbox(&sbox_out); + dvbcsa_bs_word_t sbox_out = u.so; // bit permutation diff --git a/src/dvbcsa_bs_mmx.h b/src/dvbcsa_bs_mmx.h index 62171ed..2235ff8 100644 --- a/src/dvbcsa_bs_mmx.h +++ b/src/dvbcsa_bs_mmx.h @@ -42,14 +42,13 @@ typedef __m64 dvbcsa_bs_word_t; #define BS_AND(a, b) _m_pand((a), (b)) #define BS_OR(a, b) _m_por((a), (b)) #define BS_XOR(a, b) _m_pxor ((a), (b)) -#define BS_XOREQ(a, b) { dvbcsa_bs_word_t *_t = &(a); *_t = _m_pxor(*_t, (b)); } #define BS_NOT(a) _m_pxor ((a), BS_VAL8(ff)) #define BS_SHL(a, n) _m_psllqi((a), n) #define BS_SHR(a, n) _m_psrlqi((a), n) #define BS_SHL8(a, n) BS_SHL(a, 8 * (n)) #define BS_SHR8(a, n) BS_SHR(a, 8 * (n)) -#define BS_EXTRACT8(a, n) _mm_cvtsi64_si32(_m_psrlqi((a), 8 * (n))); +#define BS_EXTRACT8(a, n) _mm_cvtsi64_si32(_m_psrlqi((a), 8 * (n))) #define BS_EMPTY() _m_empty() diff --git a/src/dvbcsa_bs_sse.h b/src/dvbcsa_bs_sse.h index 51edbe3..f1b0c79 100644 --- a/src/dvbcsa_bs_sse.h +++ b/src/dvbcsa_bs_sse.h @@ -29,27 +29,26 @@ #include #include -typedef __m128 dvbcsa_bs_word_t; +typedef __m128i dvbcsa_bs_word_t; #define BS_BATCH_SIZE 128 #define BS_BATCH_BYTES 16 -#define BS_VAL(n, m) _mm_castsi128_ps(_mm_set_epi64x(n, m)) +#define BS_VAL(n, m) _mm_set_epi64x(n, m) #define BS_VAL64(n) BS_VAL(0x##n##ULL, 0x##n##ULL) #define BS_VAL32(n) BS_VAL64(n##n) #define BS_VAL16(n) BS_VAL32(n##n) #define BS_VAL8(n) BS_VAL16(n##n) -#define BS_AND(a, b) _mm_and_ps((a), (b)) -#define BS_OR(a, b) _mm_or_ps((a), (b)) -#define BS_XOR(a, b) _mm_xor_ps((a), (b)) -#define BS_XOREQ(a, b) { dvbcsa_bs_word_t *_t = &(a); *_t = _mm_xor_ps(*_t, (b)); } -#define BS_NOT(a) _mm_xor_ps((a), BS_VAL8(ff)) +#define BS_AND(a, b) _mm_and_si128((a), (b)) +#define BS_OR(a, b) _mm_or_si128((a), (b)) +#define BS_XOR(a, b) _mm_xor_si128((a), (b)) +#define BS_NOT(a) _mm_andnot_si128((a), BS_VAL8(ff)) -#define BS_SHL(a, n) _mm_castsi128_ps(_mm_slli_epi64(_mm_castps_si128(a), n)) -#define BS_SHR(a, n) _mm_castsi128_ps(_mm_srli_epi64(_mm_castps_si128(a), n)) -#define BS_SHL8(a, n) _mm_castsi128_ps(_mm_slli_si128(_mm_castps_si128(a), n)) -#define BS_SHR8(a, n) _mm_castsi128_ps(_mm_srli_si128(_mm_castps_si128(a), n)) +#define BS_SHL(a, n) _mm_slli_epi64(a, n) +#define BS_SHR(a, n) _mm_srli_epi64(a, n) +#define BS_SHL8(a, n) _mm_slli_si128(a, n) +#define BS_SHR8(a, n) _mm_srli_si128(a, n) #define BS_EXTRACT8(a, n) ((uint8_t*)&(a))[n] diff --git a/src/dvbcsa_bs_stream.c b/src/dvbcsa_bs_stream.c index f659ad6..7cb7f09 100644 --- a/src/dvbcsa_bs_stream.c +++ b/src/dvbcsa_bs_stream.c @@ -26,7 +26,7 @@ #include "dvbcsa/dvbcsa.h" #include "dvbcsa_bs.h" -static void DVBCSA_INLINE +static void DVBCSA_INLINE inline dvbcsa_bs_stream_sbox1(dvbcsa_bs_word_t fa, dvbcsa_bs_word_t fb, dvbcsa_bs_word_t fc, dvbcsa_bs_word_t fd, dvbcsa_bs_word_t fe, @@ -43,7 +43,7 @@ dvbcsa_bs_stream_sbox1(dvbcsa_bs_word_t fa, dvbcsa_bs_word_t fb, *sb = BS_XOR (tmp2, BS_AND (fe, tmp3)); } -static void DVBCSA_INLINE +static void DVBCSA_INLINE inline dvbcsa_bs_stream_sbox2(dvbcsa_bs_word_t fa, dvbcsa_bs_word_t fb, dvbcsa_bs_word_t fc, dvbcsa_bs_word_t fd, dvbcsa_bs_word_t fe, @@ -60,7 +60,7 @@ dvbcsa_bs_stream_sbox2(dvbcsa_bs_word_t fa, dvbcsa_bs_word_t fb, *sb = BS_XOR (tmp2, BS_AND (fe, tmp3)); } -static void DVBCSA_INLINE +static void DVBCSA_INLINE inline dvbcsa_bs_stream_sbox3(dvbcsa_bs_word_t fa, dvbcsa_bs_word_t fb, dvbcsa_bs_word_t fc, dvbcsa_bs_word_t fd, dvbcsa_bs_word_t fe, @@ -76,7 +76,7 @@ dvbcsa_bs_stream_sbox3(dvbcsa_bs_word_t fa, dvbcsa_bs_word_t fb, *sb = BS_XOR (tmp2, fe); } -static void DVBCSA_INLINE +static void DVBCSA_INLINE inline dvbcsa_bs_stream_sbox4(dvbcsa_bs_word_t fa, dvbcsa_bs_word_t fb, dvbcsa_bs_word_t fc, dvbcsa_bs_word_t fd, dvbcsa_bs_word_t fe, @@ -92,7 +92,7 @@ dvbcsa_bs_stream_sbox4(dvbcsa_bs_word_t fa, dvbcsa_bs_word_t fb, *sb = BS_XOR (BS_XOR (*sa, tmp2), fe); } -static void DVBCSA_INLINE +static void DVBCSA_INLINE inline dvbcsa_bs_stream_sbox5(dvbcsa_bs_word_t fa, dvbcsa_bs_word_t fb, dvbcsa_bs_word_t fc, dvbcsa_bs_word_t fd, dvbcsa_bs_word_t fe, @@ -109,7 +109,7 @@ dvbcsa_bs_stream_sbox5(dvbcsa_bs_word_t fa, dvbcsa_bs_word_t fb, *sb = BS_XOR (tmp2, BS_AND (fe, tmp3)); } -static void DVBCSA_INLINE +static void DVBCSA_INLINE inline dvbcsa_bs_stream_sbox6(dvbcsa_bs_word_t fa, dvbcsa_bs_word_t fb, dvbcsa_bs_word_t fc, dvbcsa_bs_word_t fd, dvbcsa_bs_word_t fe, @@ -126,7 +126,7 @@ dvbcsa_bs_stream_sbox6(dvbcsa_bs_word_t fa, dvbcsa_bs_word_t fb, *sb = BS_XOR (tmp2, BS_AND (fe, tmp3)); } -static void DVBCSA_INLINE +static void DVBCSA_INLINE inline dvbcsa_bs_stream_sbox7(dvbcsa_bs_word_t fa, dvbcsa_bs_word_t fb, dvbcsa_bs_word_t fc, dvbcsa_bs_word_t fd, dvbcsa_bs_word_t fe, @@ -148,8 +148,8 @@ dvbcsa_bs_stream_cipher_batch(const struct dvbcsa_bs_key_s *key, const struct dvbcsa_bs_batch_s *pcks, unsigned int maxlen) { - dvbcsa_bs_word_t A[32 + 10][4]; // 32 because we will move back (virtual shift register) - dvbcsa_bs_word_t B[32 + 10][4]; // 32 because we will move back (virtual shift register) + dvbcsa_bs_word_t A[10][4]; + dvbcsa_bs_word_t B[10][4]; dvbcsa_bs_word_t X[4]; dvbcsa_bs_word_t Y[4]; dvbcsa_bs_word_t Z[4]; diff --git a/src/dvbcsa_bs_uint32.h b/src/dvbcsa_bs_uint32.h index ae35a36..4aa4efb 100644 --- a/src/dvbcsa_bs_uint32.h +++ b/src/dvbcsa_bs_uint32.h @@ -39,7 +39,6 @@ typedef uint32_t dvbcsa_bs_word_t; #define BS_AND(a, b) ((a) & (b)) #define BS_OR(a, b) ((a) | (b)) #define BS_XOR(a, b) ((a) ^ (b)) -#define BS_XOREQ(a, b) ((a) ^= (b)) #define BS_NOT(a) (~(a)) #define BS_SHL(a, n) ((a) << (n)) diff --git a/src/dvbcsa_bs_uint64.h b/src/dvbcsa_bs_uint64.h index 84ef308..a34b4bc 100644 --- a/src/dvbcsa_bs_uint64.h +++ b/src/dvbcsa_bs_uint64.h @@ -46,7 +46,6 @@ typedef uint64_t dvbcsa_bs_word_t; #define BS_AND(a, b) ((a) & (b)) #define BS_OR(a, b) ((a) | (b)) #define BS_XOR(a, b) ((a) ^ (b)) -#define BS_XOREQ(a, b) ((a) ^= (b)) #define BS_NOT(a) (~(a)) #define BS_SHL(a, n) ((a) << (n)) diff --git a/test/testbsops.c b/test/testbsops.c index 8a22aa0..50748ab 100644 --- a/test/testbsops.c +++ b/test/testbsops.c @@ -204,13 +204,6 @@ main (void) c = BS_XOR(a, b); vec_testeq(BS_VAL8(45), c); - /* test XOREQ */ - - a = BS_VAL8(5a); - b = BS_VAL8(1f); - BS_XOREQ(b, a); - vec_testeq(BS_VAL8(45), b); - /* test NOT */ a = BS_VAL8(5a);