M4RI 1.0.1
|
00001 00009 #ifndef PACKEDMATRIX_H 00010 #define PACKEDMATRIX_H 00011 /******************************************************************* 00012 * 00013 * M4RI: Linear Algebra over GF(2) 00014 * 00015 * Copyright (C) 2007, 2008 Gregory Bard <bard@fordham.edu> 00016 * Copyright (C) 2008-2010 Martin Albrecht <M.R.Albrecht@rhul.ac.uk> 00017 * 00018 * Distributed under the terms of the GNU General Public License (GPL) 00019 * version 2 or higher. 00020 * 00021 * This code is distributed in the hope that it will be useful, 00022 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00023 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00024 * General Public License for more details. 00025 * 00026 * The full text of the GPL is available at: 00027 * 00028 * http://www.gnu.org/licenses/ 00029 * 00030 ********************************************************************/ 00031 00032 #include <math.h> 00033 #include <assert.h> 00034 #include <stdio.h> 00035 00036 #ifdef HAVE_SSE2 00037 #include <emmintrin.h> 00038 #endif 00039 00040 00041 #ifdef HAVE_SSE2 00042 00049 #define SSE2_CUTOFF 20 00050 #endif 00051 00060 #define MZD_MUL_BLOCKSIZE MIN(((int)sqrt((double)(4*CPU_L2_CACHE)))/2,2048) 00061 00062 00069 typedef struct { 00075 mmb_t *blocks; 00076 00081 size_t nrows; 00082 00087 size_t ncols; 00088 00092 size_t width; 00093 00098 size_t offset; 00099 00105 word **rows; 00106 00107 } mzd_t; 00108 00119 mzd_t *mzd_init(const size_t r, const size_t c); 00120 00127 void mzd_free(mzd_t *A); 00128 00129 00151 mzd_t *mzd_init_window(const mzd_t *M, const size_t lowr, const size_t lowc, const size_t highr, const size_t highc); 00152 00159 #define mzd_free_window mzd_free 00160 00169 static inline void mzd_row_swap(mzd_t *M, const size_t rowa, const size_t rowb) { 00170 if(rowa == rowb) 00171 return; 00172 size_t i; 00173 size_t width = M->width - 1; 00174 word *a = M->rows[rowa]; 00175 word *b = M->rows[rowb]; 00176 word tmp; 00177 word mask_begin = RIGHT_BITMASK(RADIX - M->offset); 00178 word mask_end = LEFT_BITMASK( (M->offset + M->ncols)%RADIX ); 00179 00180 if (width != 0) { 00181 tmp = a[0]; 00182 a[0] = (a[0] & ~mask_begin) | (b[0] & mask_begin); 00183 b[0] = (b[0] & ~mask_begin) | (tmp & mask_begin); 00184 00185 for(i = 1; i<width; i++) { 00186 tmp = a[i]; 00187 a[i] = b[i]; 00188 b[i] = tmp; 00189 } 00190 tmp = a[width]; 00191 a[width] = (a[width] & ~mask_end) | (b[width] & mask_end); 00192 b[width] = (b[width] & ~mask_end) | (tmp & mask_end); 00193 00194 } else { 00195 tmp = a[0]; 00196 a[0] = (a[0] & ~mask_begin) | (b[0] & mask_begin & mask_end) | (a[0] & ~mask_end); 00197 b[0] = (b[0] & ~mask_begin) | (tmp & mask_begin & mask_end) | (b[0] & ~mask_end); 00198 } 00199 } 00200 00213 void mzd_copy_row(mzd_t* B, size_t i, const mzd_t* A, size_t j); 00214 00223 void mzd_col_swap(mzd_t *M, const size_t cola, const size_t colb); 00224 00235 static inline void mzd_col_swap_in_rows(mzd_t *M, const size_t cola, const size_t colb, const size_t start_row, const size_t stop_row) { 00236 if (cola == colb) 00237 return; 00238 00239 const size_t _cola = cola + M->offset; 00240 const size_t _colb = colb + M->offset; 00241 00242 const size_t a_word = _cola/RADIX; 00243 const size_t b_word = _colb/RADIX; 00244 const size_t a_bit = _cola%RADIX; 00245 const size_t b_bit = _colb%RADIX; 00246 00247 word a, b, *base; 00248 00249 size_t i; 00250 00251 if(a_word == b_word) { 00252 const word ai = RADIX - a_bit - 1; 00253 const word bi = RADIX - b_bit - 1; 00254 for (i=start_row; i<stop_row; i++) { 00255 base = (M->rows[i] + a_word); 00256 register word b = *base; 00257 register word x = ((b >> ai) ^ (b >> bi)) & 1; // XOR temporary 00258 *base = b ^ ((x << ai) | (x << bi)); 00259 } 00260 return; 00261 } 00262 00263 const word a_bm = (ONE<<(RADIX - (a_bit) - 1)); 00264 const word b_bm = (ONE<<(RADIX - (b_bit) - 1)); 00265 00266 if(a_bit > b_bit) { 00267 const size_t offset = a_bit - b_bit; 00268 for (i=start_row; i<stop_row; i++) { 00269 base = M->rows[i]; 00270 a = *(base + a_word); 00271 b = *(base + b_word); 00272 00273 a ^= (b & b_bm) >> offset; 00274 b ^= (a & a_bm) << offset; 00275 a ^= (b & b_bm) >> offset; 00276 00277 *(base + a_word) = a; 00278 *(base + b_word) = b; 00279 } 00280 } else { 00281 const size_t offset = b_bit - a_bit; 00282 for (i=start_row; i<stop_row; i++) { 00283 base = M->rows[i]; 00284 a = *(base + a_word); 00285 b = *(base + b_word); 00286 00287 a ^= (b & b_bm) << offset; 00288 b ^= (a & a_bm) >> offset; 00289 a ^= (b & b_bm) << offset; 00290 *(base + a_word) = a; 00291 *(base + b_word) = b; 00292 } 00293 } 00294 00295 } 00296 00308 static inline BIT mzd_read_bit(const mzd_t *M, const size_t row, const size_t col ) { 00309 return GET_BIT(M->rows[row][(col+M->offset)/RADIX], (col+M->offset) % RADIX); 00310 } 00311 00324 static inline void mzd_write_bit(mzd_t *M, const size_t row, const size_t col, const BIT value) { 00325 if (value==1) 00326 SET_BIT(M->rows[row][(col+M->offset)/RADIX], (col+M->offset) % RADIX); 00327 else 00328 CLR_BIT(M->rows[row][(col+M->offset)/RADIX], (col+M->offset) % RADIX); 00329 } 00330 00339 void mzd_print(const mzd_t *M); 00340 00347 void mzd_print_tight(const mzd_t *M); 00348 00359 /*void mzd_row_add_offset(mzd_t *M, size_t dstrow, size_t srcrow, size_t coloffset); */ 00360 static inline void mzd_row_add_offset(mzd_t *M, size_t dstrow, size_t srcrow, size_t coloffset) { 00361 coloffset += M->offset; 00362 const size_t startblock= coloffset/RADIX; 00363 size_t wide = M->width - startblock; 00364 word *src = M->rows[srcrow] + startblock; 00365 word *dst = M->rows[dstrow] + startblock; 00366 00367 if(!wide) 00368 return; 00369 00370 word temp = *src++; 00371 if (coloffset%RADIX) 00372 temp = RIGHTMOST_BITS(temp, (RADIX-(coloffset%RADIX)-1)); 00373 *dst++ ^= temp; 00374 wide--; 00375 00376 #ifdef HAVE_SSE2 00377 if (ALIGNMENT(src,16)==8 && wide) { 00378 *dst++ ^= *src++; 00379 wide--; 00380 } 00381 __m128i *__src = (__m128i*)src; 00382 __m128i *__dst = (__m128i*)dst; 00383 const __m128i *eof = (__m128i*)((unsigned long)(src + wide) & ~0xF); 00384 __m128i xmm1; 00385 00386 while(__src < eof) { 00387 xmm1 = _mm_xor_si128(*__dst, *__src++); 00388 *__dst++ = xmm1; 00389 } 00390 src = (word*)__src; 00391 dst = (word*)__dst; 00392 wide = ((sizeof(word)*wide)%16)/sizeof(word); 00393 #endif 00394 size_t i; 00395 for(i=0; i<wide; i++) { 00396 dst[i] ^= src[i]; 00397 } 00398 } 00399 00411 void mzd_row_add(mzd_t *M, const size_t sourcerow, const size_t destrow); 00412 00427 mzd_t *mzd_transpose(mzd_t *DST, const mzd_t *A ); 00428 00443 mzd_t *mzd_mul_naive(mzd_t *C, const mzd_t *A, const mzd_t *B); 00444 00459 mzd_t *mzd_addmul_naive(mzd_t *C, const mzd_t *A, const mzd_t *B); 00460 00472 mzd_t *_mzd_mul_naive(mzd_t *C, const mzd_t *A, const mzd_t *B, const int clear); 00473 00483 mzd_t *_mzd_mul_va(mzd_t *C, const mzd_t *v, const mzd_t *A, const int clear); 00484 00495 void mzd_randomize(mzd_t *M); 00496 00511 void mzd_set_ui(mzd_t *M, const unsigned value); 00512 00528 int mzd_gauss_delayed(mzd_t *M, const size_t startcol, const int full); 00529 00545 int mzd_echelonize_naive(mzd_t *M, const int full); 00546 00556 BIT mzd_equal(const mzd_t *A, const mzd_t *B ); 00557 00571 int mzd_cmp(const mzd_t *A, const mzd_t *B); 00572 00582 mzd_t *mzd_copy(mzd_t *DST, const mzd_t *A); 00583 00604 mzd_t *mzd_concat(mzd_t *C, const mzd_t *A, const mzd_t *B); 00605 00625 mzd_t *mzd_stack(mzd_t *C, const mzd_t *A, const mzd_t *B); 00626 00639 mzd_t *mzd_submatrix(mzd_t *S, const mzd_t *M, const size_t lowr, const size_t lowc, const size_t highr, const size_t highc); 00640 00654 mzd_t *mzd_invert_naive(mzd_t *INV, mzd_t *A, const mzd_t *I); 00655 00667 mzd_t *mzd_add(mzd_t *C, const mzd_t *A, const mzd_t *B); 00668 00679 mzd_t *_mzd_add(mzd_t *C, const mzd_t *A, const mzd_t *B); 00680 00691 #define mzd_sub mzd_add 00692 00703 #define _mzd_sub _mzd_add 00704 00725 void mzd_combine(mzd_t * DST, const size_t row3, const size_t startblock3, 00726 const mzd_t * SC1, const size_t row1, const size_t startblock1, 00727 const mzd_t * SC2, const size_t row2, const size_t startblock2); 00728 00738 static inline word mzd_read_bits(const mzd_t *M, const size_t x, const size_t y, const int n) { 00739 word temp; 00740 00741 /* there are two possible situations. Either all bits are in one 00742 * word or they are spread across two words. */ 00743 00744 if ( ((y+M->offset)%RADIX + n - 1) < RADIX ) { 00745 /* everything happens in one word here */ 00746 temp = M->rows[x][(y+M->offset) / RADIX]; /* get the value */ 00747 temp <<= (y+M->offset)%RADIX; /* clear upper bits */ 00748 temp >>= RADIX - n; /* clear lower bits and move to correct position.*/ 00749 return temp; 00750 00751 } else { 00752 /* two words are affected */ 00753 const size_t block = (y+M->offset) / RADIX; /* correct block */ 00754 const size_t spot = (y+M->offset+n) % RADIX; /* correct offset */ 00755 /* make room by shifting spot times to the right, and add stuff from the second word */ 00756 temp = (M->rows[x][block] << spot) | ( M->rows[x][block + 1] >> (RADIX - spot) ); 00757 return (temp << (RADIX-n)) >> (RADIX-n); /* clear upper bits and return */ 00758 } 00759 } 00760 00761 00772 static inline void mzd_xor_bits(const mzd_t *M, const size_t x, const size_t y, const int n, word values) { 00773 word *temp; 00774 00775 /* there are two possible situations. Either all bits are in one 00776 * word or they are spread across two words. */ 00777 00778 if ( ((y+M->offset)%RADIX + n - 1) < RADIX ) { 00779 /* everything happens in one word here */ 00780 temp = M->rows[x] + (y+M->offset) / RADIX; 00781 *temp ^= values<<(RADIX-((y+M->offset)%RADIX)-n); 00782 00783 } else { 00784 /* two words are affected */ 00785 const size_t block = (y+M->offset) / RADIX; /* correct block */ 00786 const size_t spot = (y+M->offset+n) % RADIX; /* correct offset */ 00787 M->rows[x][block] ^= values >> (spot); 00788 M->rows[x][block + 1] ^= values<<(RADIX-spot); 00789 } 00790 } 00791 00802 static inline void mzd_and_bits(const mzd_t *M, const size_t x, const size_t y, const int n, word values) { 00803 word *temp; 00804 00805 /* there are two possible situations. Either all bits are in one 00806 * word or they are spread across two words. */ 00807 00808 if ( ((y+M->offset)%RADIX + n - 1) < RADIX ) { 00809 /* everything happens in one word here */ 00810 temp = M->rows[x] + (y+M->offset) / RADIX; 00811 *temp &= values<<(RADIX-((y+M->offset)%RADIX)-n); 00812 00813 } else { 00814 /* two words are affected */ 00815 const size_t block = (y+M->offset) / RADIX; /* correct block */ 00816 const size_t spot = (y+M->offset+n) % RADIX; /* correct offset */ 00817 M->rows[x][block] &= values >> (spot); 00818 M->rows[x][block + 1] &= values<<(RADIX-spot); 00819 } 00820 } 00821 00831 static inline void mzd_clear_bits(const mzd_t *M, const size_t x, const size_t y, const int n) { 00832 word temp; 00833 00834 /* there are two possible situations. Either all bits are in one 00835 * word or they are spread across two words. */ 00836 00837 if ( ((y+M->offset)%RADIX + n - 1) < RADIX ) { 00838 /* everything happens in one word here */ 00839 temp = M->rows[x][(y+M->offset) / RADIX]; 00840 temp <<= (y+M->offset)%RADIX; /* clear upper bits */ 00841 temp >>= RADIX-n; /* clear lower bits and move to correct position.*/ 00842 temp <<= RADIX-n - (y+M->offset)%RADIX; 00843 M->rows[x][(y+M->offset) / RADIX] ^= temp; 00844 } else { 00845 /* two words are affected */ 00846 const size_t block = (y+M->offset) / RADIX; /* correct block */ 00847 const size_t spot = (y+M->offset+n) % RADIX; /* correct offset */ 00848 M->rows[x][block] ^= M->rows[x][block] & ((ONE<<(n-spot))-1); 00849 M->rows[x][block+1] ^= (M->rows[x][block+1]>>(RADIX-spot))<<(RADIX-spot); 00850 } 00851 } 00852 00859 int mzd_is_zero(mzd_t *A); 00860 00869 void mzd_row_clear_offset(mzd_t *M, const size_t row, const size_t coloffset); 00870 00887 int mzd_find_pivot(mzd_t *M, size_t start_row, size_t start_col, size_t *r, size_t *c); 00888 00889 00902 double mzd_density(mzd_t *A, int res); 00903 00918 double _mzd_density(mzd_t *A, int res, size_t r, size_t c); 00919 00920 00929 size_t mzd_first_zero_row(mzd_t *A); 00930 00931 #endif //PACKEDMATRIX_H