Algorithm - jjin-choi/study_note GitHub Wiki

ยง Pattern Matching

ref : http://www-igm.univ-mlv.fr/~lecroq/string/

1. Brute Force (BF)

  • x : pattern
  • y : input
  • m : size of pattern
  • n : size of input
  • O( nm )
#include <stdio.h>
#include <string.h>

#define OUTPUT(x)    printf("%d\n", x )
void BF(char *x, int m, char *y, int n) {
	int i, j;

	/* Searching */
	for (j = 0; j <= n - m; ++j) {
		for (i = 0; i < m && x[i] == y[i + j]; ++i);
		if (i >= m)
			OUTPUT(j);
	}
}

int main()
{
	char y[] = "how are you hello world hello";
	char x[] = "hello";

	BF( x, strlen(x), y, strlen(y) );
	return 0;
}

2. Karp-Rabin (KR)

  • data structure : hash
  • hash๋ฅผ ์ด์šฉํ•˜๋ฉด์„œ๋„ rehash macro๋ฅผ ๊ตฌํ˜„ํ•˜์—ฌ for loop ๋ฅผ ์‚ฌ์šฉํ•˜์ง€ ์•Š๋„๋ก ํ•œ๋‹ค.
#include <stdio.h>
#include <string.h>

#define OUTPUT(x)    printf("%d\n", x )
#define REHASH(a, b, h) ((((h) - (a)*d) << 1) + (b))

void KR(char *x, int m, char *y, int n) {
	int d, hx, hy, i, j;

	/* Preprocessing */
	/* computes d = 2^(m-1) with
	   the left-shift operator */
	for (d = i = 1; i < m; ++i)
		d = (d<<1);

	for (hy = hx = i = 0; i < m; ++i) {
		hx = ((hx<<1) + x[i]);
		hy = ((hy<<1) + y[i]);
	}

	/* Searching */
	j = 0;
	while (j <= n-m) {
		if (hx == hy && memcmp(x, y + j, m) == 0)
			OUTPUT(j);
		hy = REHASH(y[j], y[j + m], hy);
		++j;
	}

}

int main()
{
	char y[] = "how are you hello world hello";
	char x[] = "hello";

	KR( x, strlen(x), y, strlen(y) );
	return 0;
}

3. Shift Or (SO)

  • bit mask / bit shift ์‚ฌ์šฉ

  • ASZIE : 256

  • O(nm) โ†’ O(n+m)

  • S[ASIZE][pattern size] : ์ดˆ๊ธฐํ™” (1111 1111) ํ›„ ๊ฐ ASCII ๊ฐ’์— pattern ์— ์กด์žฌํ•˜๋Š” ์œ„์น˜ ์ €์žฅ

    • pattern: hello
    • S['h'] = 1111 1110
    • S['e'] = 1111 1101
    • S['l'] = 1111 0011
    • S['o'] = 1110 1111
    • lim = 0001 1111 ์—์„œ right shift ํ•œ๋ฒˆ ํ›„ ~() ์„ ๋ถ™์ธ๋‹ค. (lim = ~(lim >>1))
    • j๋Š” 1๋ถ€ํ„ฐ j<<=1 ํ•˜์—ฌ m๋ฒˆ ๋ฐ˜๋ณต
  • searching ์šฉ bit : state

    • state๋Š” lim ๊ณผ ๋น„๊ตํ•˜๊ณ  state<<1์€ S[y[i]] ์™€ ๋น„๊ตํ•œ๋‹ค.
    • state<<1๊ณผ S[y[i]]๋ฅผ OR ์—ฐ์‚ฐ์„ ํ•˜์—ฌ ๊ทธ ๊ฒฐ๊ณผ๋ฅผ state์— ์ €์žฅํ•œ๋‹ค.
    • ์ด ๋•Œ, ๋‘˜๋‹ค 0์ธ ๋ถ€๋ถ„๋งŒ (์ฆ‰ ์ผ์น˜ํ•˜๋Š” ๋ถ€๋ถ„) ๋งŒ 0์ด ๋œ๋‹ค.
    • pattern์— ์—†๋Š” ๊ธ€์ž๋Š” S[' '] ์—์„œ ๋ชจ๋‘ 1๋กœ ์ดˆ๊ธฐํ™” ๋˜์–ด์žˆ๊ธฐ ๋•Œ๋ฌธ์—
    • pattern์— ์—†๋Š” ๊ธ€์ž๋ฅผ ๋งŒ๋‚˜๋ฉด state๋Š” ๋„๋กœ์•„๋ฏธํƒ€๋ถˆ์ด ๋˜์–ด 1111 1111 (~0) ์ด ๋œ๋‹ค.
    • pattern์ด ๋ชจ๋‘ ์ผ์น˜ํ•˜๋Š” ๊ฒฝ์šฐ์—๋งŒ lim > state๊ฐ€ ๋œ๋‹ค.
  • look-up table ์„ ๋งŒ๋“ค์–ด์„œ memory๋ฅผ ๋” ์“ฐ๊ธฐ๋Š” ํ–ˆ์ง€๋งŒ, momory๋ฅผ ์•„๋ผ๊ธฐ ์œ„ํ•ด bit ์‚ฌ์šฉ

  • ์ตœ๋Œ€ ํ•œ๊ณ„ : m์ด WORD (32 bits)๋ณด๋‹ค ํฐ ๊ฒฝ์šฐ pattern์„ ์ €์žฅํ•  ์ˆ˜ ์—†์Œ

    • ํ™•์žฅ bit map์„ ๋งŒ๋“ค๋ฉด ๋˜๊ธด ํ•˜๋‹ค.
  • ํ•˜์ง€๋งŒ ์•„์ง๋„ n ํšŒ์ „์„ ํ•œ๋‹ค. ์ ์–ด๋„ n-m+1 ๋งŒํผ for loop ํ•œ๋‹ค.

#include <stdio.h>
#include <string.h>

#define OUTPUT(x)    printf("%d\n", x )
#define ASIZE    256 

int preSo(char *x, int m, unsigned int S[]) { 
	unsigned int j, lim; 
	int i; 
	for (i = 0; i < ASIZE; ++i) 
		S[i] = ~0; 
	for (lim = i = 0, j = 1; i < m; ++i, j <<= 1) { 
		S[x[i]] &= ~j; 
		lim |= j; 
	} 
	lim = ~(lim>>1); 
	return(lim); 
} 
void SO(char *x, int m, char *y, int n) { 
	unsigned int lim, state; 
	unsigned int S[ASIZE]; 
	int j; 

	/* Preprocessing */ 
	lim = preSo(x, m, S); 

	/* Searching */ 
	for (state = ~0, j = 0; j < n; ++j) { 
		state = (state<<1) | S[y[j]]; 
		if (state < lim) 
			OUTPUT(j - m + 1); 
	} 
} 


int main()
{
	char y[] = "how are you hello world hello";
	char x[] = "hello";

	SO( x, strlen(x), y, strlen(y) );
	return 0;
}

4. Morris-Pratt (MP)

  • example
    • pattern : g a g g g a g g
    • mpNext : -1 0 0 1 1 1 2 3 4
    • mpNext์˜ ์ฒ˜์Œ์€ -1 default
    • mpNext ์˜ ์˜๋ฏธ๋Š”, ํ‹€๋ฆฐ ๋ฌธ์ž์—ด์ด ๋ฐœ๊ฒฌ๋œ ์œ„์น˜์˜ ์•ž์„œ ์ผ์น˜๋œ ๋ฌธ์ž์—ด์†์˜ ์ ‘๋‘์‚ฌ์™€ ์ ‘๋ฏธ์‚ฌ์˜ ์ผ์น˜์ˆ˜. ์ฆ‰, shift = i = mpNext[i] ๊ฐ’์ด ๋˜์–ด shift ๋งŒํผ๋งŒ ๋ฐ€์–ด์ฃผ๋ฉด ๋œ๋‹ค.
  • ๋ฐ–์˜ for loop๋ฅผ ์ค„์ผ ์ˆ˜ ์žˆ๋‹ค.
  • ์ฆ‰ ์ ‘๋‘์‚ฌ ์ ‘๋ฏธ์‚ฌ๊ฐ€ ์ผ์น˜ํ•œ ๊ฒƒ์— ๋Œ€ํ•ด์„œ ๋˜ ๋น„๊ต๋ฅผ ํ•˜์ง€ ์•Š์•„๋„ ๋œ๋‹ค.
#include <stdio.h>
#include <string.h>

#define OUTPUT(x)    printf("%d\n", x )

void preMp(char *x, int m, int mpNext[]) {
	int i, j;

	i = 0;
	j = mpNext[0] = -1;
	while (i < m) {
		while (j > -1 && x[i] != x[j])
			j = mpNext[j];
		mpNext[++i] = ++j;
	}
}

void MP(char *x, int m, char *y, int n) {
	int i, j, mpNext[256];

	/* Preprocessing */
	preMp(x, m, mpNext);

	/* Searching */
	i = j = 0;
	while (j < n) {
		while (i > -1 && x[i] != y[j])
			i = mpNext[i];
		i++;
		j++;
		if (i >= m) {
			OUTPUT(j - i);
			i = mpNext[i];
		}
	}
}

int main()
{
	//int mpNext[256];
	int i;
	char y[] = "how are you gagggagg world hello";
	char x[] = "gagggagg";

	/*
	preMp( x, strlen(x), mpNext );
	for(i=0; i<(strlen(x)+1); i++ )
		printf("%4d", mpNext[i] );
	printf("\n");
	*/
	MP( x, strlen(x), y, strlen(y));
	return 0;
}

5. Knuth-Morris-Pratt

  • example
    • pattern : g a g g g a g g
    • kmpNext : -1 0 -1 1 1 0 -1 1 4
    • ์ด ์˜ˆ์‹œ์—์„œ๋Š” mpNext ์— ๋น„ํ•ด์„œ 7 ํšŒ์ „์ด ์ค„์–ด๋“ค๊ฒŒ ๋œ๋‹ค. (2๋ฐฐ ์ด์ƒ ๋น ๋ฅด๋‹ค!)
  • ๋ฏธ๋ฆฌ๋น„๊ต๋ฅผ ์ˆ˜ํ–‰ํ•˜์—ฌ x[i] ์™€ x[j]๋ฅผ ๋จผ์ € ๋น„๊ตํ•œ๋‹ค.
#include <stdio.h>
#include <string.h>

#define OUTPUT(x)    printf("%d\n", x )
#define ASIZE  256
#define XSIZE  256

void preKmp(char *x, int m, int kmpNext[]) {
	int i, j;

	i = 0;
	j = kmpNext[0] = -1;
	while (i < m) {
		while (j > -1 && x[i] != x[j])
			j = kmpNext[j];
		i++;
		j++;
		if (x[i] == x[j])
			kmpNext[i] = kmpNext[j];
		else
			kmpNext[i] = j;
	}
}


void KMP(char *x, int m, char *y, int n) {
	int i, j, kmpNext[XSIZE];

	/* Preprocessing */
	preKmp(x, m, kmpNext);

	/* Searching */
	i = j = 0;
	while (j < n) {
		while (i > -1 && x[i] != y[j])
			i = kmpNext[i];
		i++;
		j++;
		if (i >= m) {
			OUTPUT(j - i);
			i = kmpNext[i];
		}
	}
}

int main()
{
	int i;
	char y[] = "how are you gagggagg world hello";
	char x[] = "gagggagg";

	KMP( x, strlen(x), y, strlen(y));
	return 0;
}

6. Boyer-Moore

  • ๋น„๊ต๋ฅผ pattern์˜ ๋’ค์—์„œ๋ถ€ํ„ฐ ํ•œ๋‹ค.
  • bad character : bad character ๊ฐ€ ๋‹ค์‹œ ๋งŒ๋‚ ๋•Œ๊นŒ์ง€์˜ ์ตœ์†Œ ๊ธธ์ด
    • ๋งจ ๋’ท์นธ์„ ๊ธฐ์ค€์œผ๋กœ ๋ช‡์นธ ๋–จ์–ด์ ธ ์žˆ๋Š”์ง€ ๊ธฐ๋กํ•œ table์ด bmBc[] ์ด๋‹ค. ๋งŒ์•ฝ pattern์— ๊ฐ’์ด ์—†์œผ๋ฉด ?!?!
#include <stdio.h>
#include <string.h>

#define OUTPUT(x)    printf("%d\n", x )
#define MAX(a,b)  (((a)>(b))?(a):(b))
#define ASIZE  256
#define XSIZE  256

void preBmBc(char *x, int m, int bmBc[]) {
	int i;

	for (i = 0; i < ASIZE; ++i)
		bmBc[i] = m;
	for (i = 0; i < m - 1; ++i)
		bmBc[x[i]] = m - i - 1;
}


void suffixes(char *x, int m, int *suff) {
	int f, g, i;

	suff[m - 1] = m;
	g = m - 1;
	for (i = m - 2; i >= 0; --i) {
		if (i > g && suff[i + m - 1 - f] < i - g)
			suff[i] = suff[i + m - 1 - f];
		else {
			if (i < g)
				g = i;
			f = i;
			while (g >= 0 && x[g] == x[g + m - 1 - f])
				--g;
			suff[i] = f - g;
		}
	}
}

void preBmGs(char *x, int m, int bmGs[]) {
	int i, j, suff[XSIZE];

	suffixes(x, m, suff);

	for (i = 0; i < m; ++i)
		bmGs[i] = m;
	j = 0;
	for (i = m - 1; i >= 0; --i)
		if (suff[i] == i + 1)
			for (; j < m - 1 - i; ++j)
				if (bmGs[j] == m)
					bmGs[j] = m - 1 - i;
	for (i = 0; i <= m - 2; ++i)
		bmGs[m - 1 - suff[i]] = m - 1 - i;
}


void BM(char *x, int m, char *y, int n) {
	int i, j, bmGs[XSIZE], bmBc[ASIZE];

	/* Preprocessing */
	preBmGs(x, m, bmGs);
	preBmBc(x, m, bmBc);

	/* Searching */
	j = 0;
	while (j <= n - m) {
		for (i = m - 1; i >= 0 && x[i] == y[i + j]; --i);
		if (i < 0) {
			OUTPUT(j);
			j += bmGs[0];
		}
		else
			j += MAX(bmGs[i], bmBc[y[i + j]] - m + 1 + i);
	}
}
int main()
{
	int i;
	char y[] = "how are you gagggagg world hello";
	char x[] = "gagggagg";

	BM( x, strlen(x), y, strlen(y));
	return 0;
}

ยง Generic sort

1. generic swap

  • swap ํ•จ์ˆ˜๋Š” call by address๋กœ ๊ตฌํ˜„ํ•˜์—ฌ ์‹ค์ œ ๊ฐ’์ด swap ๋  ์ˆ˜ ์žˆ๋„๋ก ํ•œ๋‹ค.
  • ์ธ์ž๋กœ ๋ฐ›์•„์˜ค๋Š” type์„ ๋ชจ๋‘ ์ง€์›ํ•ด์ค˜์•ผ ํ•˜๋Š”๋ฐ, c์—์„œ๋Š” ํ•จ์ˆ˜ ์˜ค๋ฒ„๋กœ๋”ฉ์„ ์ง€์›ํ•˜์ง€ ์•Š์œผ๋ฏ€๋กœ
  • ์ด๋•Œ ์“ธ์ˆ˜ ์žˆ๋Š” technique ์ด void pointer
  • integer 4 byte / double 8 byte / char 4 byte ์ด๊ธฐ ๋•Œ๋ฌธ์— int size ์ธ์ž๋ฅผ ์ถ”๊ฐ€ํ•œ๋‹ค.
void swap (char *a, char *b, int size) 
{
   int i;
   char t;
   for (i=0; i<size; i++) {
      t = *a[i];
      *a[i] = *b[i];
      *b[i] = t;
   }
}

// main
int a, b;
int ad, bd;

swap (&a, &b, sizeof(a));
swap(&ad, &bd, sizeof(ad);
  • caller ์—์„œ warning์ด ๋งŽ์ด ๋ฐœ์ƒํ•˜๊ธฐ ๋•Œ๋ฌธ์— ์ด๋ฅผ ๋ง‰๊ธฐ ์œ„ํ•ด์„œ ๋ช…์‹œ์  casting์„ ํ•ด์ค€๋‹ค.
  • ์ฆ‰ &a, &b ๋กœ ๋„˜๊ฒจ์ฃผ๋Š”๊ฒŒ ์•„๋‹ˆ๋ผ, ํ•จ์ˆ˜์˜ ์ธ์ž type์— ๋งž๊ฒŒ casting ํ•˜์—ฌ ๋„˜๊ฒจ์ค€๋‹ค.
swap( (char*)&a, (char*)&b, sizeof(a) )
  • ํ•˜์ง€๋งŒ ์ด๋ ‡๊ฒŒ ๋˜๋ฉด ์‚ฌ์šฉํ• ๋•Œ๋งˆ๋‹ค ์บ์ŠคํŒ…์„ ํ•ด์ฃผ๋Š๋ผ ์“ฐ๊ธฐ ๋ถˆํŽธํ•ด์ง„๋‹ค.
  • ์ด๋ฅผ ํ•ด๊ฒฐํ•˜๊ธฐ ์œ„ํ•ด void pointer์„ ์‚ฌ์šฉํ•˜์ž !
  • ํ•˜์ง€๋งŒ void pointer์„ ์“ธ๋•Œ๋Š” ์ฒจ์ž๋ฅผ ์‚ฌ์šฉํ•  ์ˆ˜ ์—†๋‹ค. (ex. *a[i])
  • ๋”ฐ๋ผ์„œ ์‹ค์ œ๋กœ ์ฃผ์†Œ์— ์ ‘๊ทผํ•  ๋•Œ casting์„ ํ•˜์—ฌ ์ปดํŒŒ์ผ๋Ÿฌ์—๊ฒŒ ์–ผ๋งˆ๋งŒํผ ์›€์ง์ด๋ฉด ๋ ์ง€๋ฅผ ์•Œ๋ ค์ฃผ์–ด์•ผ ํ•œ๋‹ค.
void swap (void *a, void *b, int size) 
{
   int i;
   char t;
   char *aa = (char*)a;
   char *bb = (char*)b;

   for (i=0; i<size; i++) {
      t = *aa[i];
      *aa[i] = *bb[i];
      *bb[i] = t;
   }
}
void generic_swap(void *a, void *b, int size)
{
   char t;
   do {
      t = *(char *)a;
      *(char *)a++ = *(char *)b;
      *(char *)b++ = t;
   } while (--size >0);
// ์‹ค์ œ generic swap ํ•จ์ˆ˜
// size๋ฅผ 0๊ณผ ๋น„๊ตํ•˜๊ธฐ ๋•Œ๋ฌธ์— (์šฐ๋ฆฌ๊ฐ€ ์ง  ์ฝ”๋“œ๋Š” i๊ณผ size ๋น„๊ต) ์ข€๋” ์œ ๋ฆฌํ•˜๋‹ค
}

2. generic sort

void bubble(int *a, int n)
{
   int i, j;
   for (i=0; i<n-1; i++)
      for (j=0;j<n-1-i; j++)
         if (a[j] > a[j+1])
            generic_swap(a+j, a=j+1, sizeof(a[0]));
}

int main()
{
int a[] = {4, 3, 5, 2, 7, 1, 6};
int i;

bubble(a, 7);
for (i=0; i<7;t++)
   printf("%4d", a[i]);
printf("\n");
return 0;
}
  • ๊ฐ’์ด ํ•„์š”ํ•œ ์•Œ๊ณ ๋ฆฌ์ฆ˜์€ ์ผ๋ฐ˜ํ™” ํ•  ์ˆ˜ ์—†๋‹ค.
  • ์ฆ‰, sort์—์„œ๋Š” ๊ฐ’์„ ๋น„๊ตํ•˜๊ธฐ ์œ„ํ•ด ๊ฐ’์ด ํ•„์š”ํ•˜๋ฏ€๋กœ sorting, ๊ฒ€์ƒ‰ ๋“ฑ์€ ์ผ๋ฐ˜ํ™” ํ•  ์ˆ˜ ์—†๋‹ค.
  • void ํฌ์ธํ„ฐ๋งŒ์œผ๋กœ๋Š” ์งค ์ˆ˜ ์—†๋‹ค. โ†’ ์ผ๋ฐ˜ํ™”๋ฅผ ์œ„ํ•œ ! "๋น„๊ต"๋ฅผ ์œ ์ €์—๊ฒŒ ์œ„์ž„ํ•˜์ž.
  • ํ•จ์ˆ˜ ํฌ์ธํ„ฐ๋ฅผ ์‚ฌ์šฉ
int int_cmp(const void* a, const void* b)
{
   return (*(int*)a - *(int*)b) > 0;
}

# ๋ชจ๋“  type ์„ sort ํ•˜๊ธฐ ์œ„ํ•ด์„œ
# ํ•˜๋‚˜์˜ ์›์†Œ์˜ size๋ฅผ size๋กœ ๋„ฃ์–ด์ค€๋‹ค.  
void bubble (void *a, int n, int size, int (*cmp)(const void*, const void*))
{
   ...
   # void pointer ๋ฅผ ์ง์ ‘ ์‚ฌ์šฉํ•  ์ˆ˜ ์—†์œผ๋ฏ€๋กœ 
   # pointer ์ˆ˜๋™ ๊ณ„์‚ฐํ•ด์ค€๋‹ค. 
   if (cmp ((char*)a+j*size, (char*)a+(j+1)*size)>0)
   ...
}
  • qsort (ํ€ต ์†ŒํŠธ) ๋„ ํ˜ธํ™˜ ๊ฐ€๋Šฅ

ยง Bit algorithm

1. find first set

  • ๋‹จ์ˆœํžˆ << shift๋ฅผ ์ด์šฉํ•ด ๋น„๊ต๋ฅผ ํ•˜๋ฉด ์ตœ์•…์˜ ๊ฒฝ์šฐ O(n) search ํ•ด์•ผ ํ•œ๋‹ค.
  • __ffs ํ•จ์ˆ˜๋ฅผ ์‚ฌ์šฉํ•˜๊ฒŒ ๋˜๋ฉด bit๋ฅผ ์ ˆ๋ฐ˜์”ฉ ์ชผ๊ฐœ์„œ bit mask ๋กœ ๋น„๊ต๋ฅผ ํ•˜๋ฏ€๋กœ O(log2n) search ๋งŒ ํ•˜๋ฉด ๋œ๋‹ค.
  • ํ•˜์ง€๋งŒ ์•„๋ž˜ ์ฝ”๋“œ๋Š” ์ตœ์ƒ์œ„ bit๊ฐ€ 0์ด๊ฑฐ๋‚˜ 1์ผ๋•Œ ๊ฒฐ๊ณผ๊ฐ€ ๊ฐ™์•„์ง€๊ฒŒ ๋œ๋‹ค.
  • ๋จผ์ € word๊ฐ€ 0 ์ธ์ง€ ์•„๋‹Œ์ง€๋ถ€ํ„ฐ ์ฒดํฌํ•˜๊ณ  ffs ํ•˜๋ฉด ๋œ๋‹ค.
 // for int (32 bit)   
 14 int __ffs(int word)
 15 {
 16     int num = 0;
 17  
 18     if ( (word & 0xffff) == 0 ) {
 19         // 00000000 00000000 11111111 1111111
 20         // if == 0 --> at least 16 bit
 21         num += 16;
 22         word >>= 16;
 23     }
 24  
 25     if ( (word & 0xff) == 0 ) {
 26         // 00000000 1111111
 27         // if == 0 -->  +8
 28         num += 8;
 29         word >>= 8;
 30     }
 31  
 32     if ( (word & 0xf) == 0 ) {
 33         // 00001111
 34         num += 4;
 35         word >>= 4;
 36     }
 37  
 38     if ( (word & 0x3) == 0 ) {
 39         // 0011
 40         num += 2;
 41         word >>= 2;
 42     }
 43  
 44     if ( (word & 0x1) == 0)
 45         num += 1;
 46  
 47     return num;
 48 }      
if (bitmap != 0)
  __ffs
else
  error !

2. find next bit

  • ํ™•์žฅ bitmap search
int bitmap[4] = {0, };
bitmap[3] |= 1 << 4;
// bitmap[100/32] |= 1 << 100%32;

// ํ•˜๋ฉด 100๋ฒˆ์งธ์— ๋“ค์–ด๊ฐ„๋‹ค. 
// 100 = 32 * 3 + 4
  • example

    • real time (RT) process : 0 ~ 99
    • normal process : 100 ~ 139
    • ๋งŒ์•ฝ normal process ๊ตฌ๊ฐ„์—์„œ๋งŒ search๋ฅผ ํ•˜๊ณ  ์‹ถ๋‹ค๋ฉด, ๋ถ€๋ถ„ bit์—ด search๋ฅผ ์œ„ํ•œ logic ์ด find_next_bit ๋ผ๊ณ  ํ•œ๋‹ค.
  • find_next_bit ( input, ์œ ํšจ bit ์ˆ˜, ์œ ํšจ bit ์‹œ์ž‘์  )

3.hamming weight

  • example
    • input : 0x12345678

    • input : 0001 0010 0011 0100 0101 0110 0111 1000

    • mask1 : 0101 0101 0101 0101 0101 0101 0101 0101

    • (x & m1) + ((x >> 1) & m1) output : 0001 0001 0010 0100 0101 0101 0110 0100

    • compare

      • 00 01 00 10 00 11 01 00 01 01 01 10 01 11 10 00 (input)
      • 00 01 00 01 00 10 01 00 01 01 01 01 01 10 01 00 (output)
      • ์ฆ‰, 2 bit์”ฉ ๋‚˜๋ˆ ์„œ ๋ณด๋ฉด mask1 ์˜ ๊ฒฐ๊ณผ๋Š” 2 bit๋ผ๋ฆฌ์˜ bit์˜ ๊ฐœ์ˆ˜๊ฐ€ ๊ธฐ๋ก๋œ๋‹ค.
    • mask2 : 0011 0011 0011 0011 0011 0011 0011 0011

    • input : 0001 0010 0011 0100 0101 0110 0111 1000

    • out : 0001 0001 0010 0001 0010 0010 0011 0001

      • ์ด๋ฒˆ์—๋Š” 4 bit์”ฉ ๋‚˜๋ˆ ์„œ ๋ณด๋ฉด mask2 ์˜ ๊ฒฐ๊ณผ๋Š” 4 bit๋ผ๋ฆฌ์˜ bit ๊ฐœ์ˆ˜๊ฐ€ ๊ธฐ๋ก๋œ๋‹ค.
  3 const int m1    = 0x55555555; // 01010101 ...
  4 const int m2    = 0x33333333; // 00110011 ...
  5 const int m4    = 0x0f0f0f0f;
  6 const int m8    = 0x00ff00ff;
  7 const int m16   = 0x0000ffff;
  8 const int h01   = 0x01010101;
  9      
 10 int popcount_a(int x)
 11 {    
 12     x = (x & m1) + ((x >> 1) & m1); // 2bit count
 13     x = (x & m2) + ((x >> 2) & m2); // 4bit count 
 14     x = (x & m4) + ((x >> 4) & m4);
 15     x = (x & m8) + ((x >> 8) & m8);  
 16     x = (x & m16) + ((x >> 16) & m8);
 17      
 18     return x;
 19 }    
  • popcount_a ๊ฐ€ O(log2n) ์œผ๋กœ ๊ณ„์‚ฐ์„ ์ค„์˜€์ง€๋งŒ, ์—ฐ์‚ฐ์ž๊ฐ€ ๋งŽ๋‹ค.

    • ์—ฐ์‚ฐ์ž๊ฐ€ ํ•œ ์ค„์— 4๊ฐœ๋‚˜ ์“ฐ์—ฌ์„œ CPU ์†๋„๊ฐ€ ๋Š๋ ค์ง€๊ฒŒ ๋œ๋‹ค.
  • popcount_b

    • 2bit ์˜ ๊ฒฝ์šฐ์˜ ์ˆ˜๋Š” 00 01 10 11 ๋ฐ–์— ์—†๋‹ค.
    • ์ด ๋•Œ์˜ bit count ์„ธ๋ฉด 00 01 01 10 ์ด ๋œ๋‹ค.
    • popcount_b ์—์„œ m1์— ๋Œ€ํ•œ ์‹์€, ์›๋ณธ bit์—์„œ ์•ž์˜ bit์˜ ๋ฐ€์–ด์„œ count ํ•œ๊ฑฐ๋ฅผ ์›๋ณธ์—์„œ ๋บ€๊ฒŒ count ๋ผ๋Š” ๊ฒƒ
      • ์›๋ณธ - ์•ž bit = count
      • 00 - 0 = 00
      • 01 - 0 = 01
      • 10 - 1 = 01
      • 11 - 1 = 10
  • popcount_c

    • 3๋ฒˆ์งธ ๊นŒ์ง€๋Š” 8๋ฒˆ์งธ๊นŒ์ง€ bit์ˆ˜ count (์—ฌ๊ธฐ๊นŒ์ง€๋Š” popcount_b์™€ ๋™์ผ)
    • ex) 0x12345678 -> 0x02030404
    • h01 ์„ ์ด์šฉํ•œ๋‹ค ! (h01 = 0x01010101)
    • h01์„ ๊ณฑํ•˜๋ฉด, 0x02030404 * 0x01010101 = ๊ฒฐ๊ณผ ๋ถ€๋ถ„์—์„œ 0x00 0000 1100 0000 1์ด ์žˆ๋Š” ๋ถ€๋ถ„์ด, 2bit์”ฉ ์ž˜๋ผ๋‚ด์„œ ๋”ํ•œ๊ฒƒ๊ณผ ๋™์ผํ•œ ๊ฒฐ๊ณผ๋ฅผ ์–ป๊ฒŒ ๋œ๋‹ค. ์ด๋ฅผ ๋‚˜์ค‘์— 24๋งŒํผ ๋ฐ€์–ด์ค€ ๊ฒฐ๊ณผ๋ถ€๋ถ„๋งŒ ๊ฐ€์ ธ์˜ค๋ฉด ๋œ๋‹ค.
    • ์š”์ฆ˜ CPU๋Š” ๊ณฑ์„ ๋นจ๋ฆฌํ•  ์ˆ˜ ์žˆ๊ธฐ ๋•Œ๋ฌธ์— ์—ฐ์‚ฐ์„ ์ค„์ผ ์ˆ˜ ์žˆ๋‹ค.
    • open source __sw_hweight32 ๋„ ์ด๊ฑธ ์ด์šฉํ•˜๊ณ  ์žˆ์Œ
 21 int popcount_b(int x)
 22 {
 23     x -= (x >> 1) &m1;
 24     x = (x & m2) + ((x >> 2) & m2);
 25     x = (x + (x >> 4)) & m4;
 26     x += x >> 8;
 27     x += x >> 16;
 28  
 29     return x & 0x7f;
 30 }
 31  
 32 int popcount_c(int x)
 33 {
 34     x -= (x >> 1) &m1;
 35     x = (x & m2) + ((x >> 2) & m2);
 36     x = (x + (x >> 4)) & m4;
 37  
 38     return (x * h01) >> 24;
 39 }
 40  
  • popcount_d
    • x &= x-1;

    • 0001 0010 0011 0100 0101 0110 0111 1000

    • 0001 0010 0011 0100 0101 0110 0111 0111

    • ________________________________________ &

    • 0001 0010 0011 0100 0101 0110 0111 0000 -> count = 1

    • 1์ด ์ง€์›Œ์ง€๋Š” ๋งŒํผ counting ์ด ๋œ๋‹ค.

    • ํ•˜์ง€๋งŒ ์„ฑ๋Šฅ์ด ์ข‹์„๊นŒ? ๋ณดํ†ต์˜ ๊ฒฝ์šฐ ์œ ๋ฆฌํ•œ ๊ฒƒ์ด ์•„๋‹ˆ๋ผ, 1์„ ๊ฐ€์ง„ bit ์ˆ˜๊ฐ€ ํ˜„์ €ํžˆ ์ ์„ ๋•Œ ! ์œ ๋ฆฌํ•˜๋‹ค.

    • 32 bits ์ค‘์—์„œ 2๊ฐœ๋งŒ 1์ผ๋•Œ 2๋ฒˆ๋งŒ์— ์ฐพ์•„๋‚ผ ์ˆ˜ ์žˆ์–ด์„œ ์œ ๋ฆฌํ•˜๋‹ค. log2n ๋ณด๋‹ค ์ ์„๋•Œ ์œ ๋ฆฌํ•˜๋‹ค.

    • ํŠน๋ณ„ํ•œ ๊ฒฝ์šฐ์—๋งŒ performance๊ฐ€ ์ข‹๊ธฐ ๋•Œ๋ฌธ์— ์˜คํ”ˆ์†Œ์Šค์—์„œ๋Š” popcount_c๋ฅผ ์‚ฌ์šฉํ•œ๋‹ค.

 32 int popcount_d(int x)
 33 {
 34     int count;
 35     for (count = 0; x; count++)
 36         x &= x - 1;
 37  
 38     return count;
 39 }
 40  

4. bit reverse

  • lookup table
  • 2^8 = 256 ๊ฐœ (16 x 16 matrix ๋ฅผ ๋งŒ๋“ค์–ด ๋ฏธ๋ฆฌ ๊ณ„์‚ฐํ•ด๋‘”๋‹ค.)
  • 0x12 ๋ฅผ ๋„ฃ์œผ๋ฉด 1ํ–‰ 2์—ด์„ ์ฝ์–ด์˜จ๋‹ค.
    • 0x12 โ†’ 0x21 ์ด ์•„๋‹ˆ๋ผ 0001 0010 โ†’ 0100 1000 ์ฆ‰ 0x48 ์ด ๋œ๋‹ค.
    • lookup table ์„ ๋ฏธ๋ฆฌ ์จ๋‘๊ณ  ์ด์šฉํ•œ๋‹ค.
    • unsigned char (8bit) ์— ๋Œ€ํ•ด์„œ 16x16 = 256 ํฌ๊ธฐ์˜ table์„ ๋งŒ๋“ค์–ด ๋‘๊ณ  ์‚ฌ์šฉํ•œ๋‹ค.
    • ์ตœ๋Œ€ ํ•œ๊ณ„๋Š” 16bit์— ๋Œ€ํ•ด์„œ๋Š” (size 65532) lookup table์„ ๋งŒ๋“ค ์ˆ˜ ์—†๋‹ค.
    • unsigned short (16bit) ๋ฅผ ๊ณ„์‚ฐํ• ๋•Œ๋Š”, 8bit ์”ฉ reverseํ•˜๊ณ  ์•ž ๋’ค 8bit๋ฅผ swap ํ•œ๋‹ค.
    • unsigned int (32bit) ๋„ 16bit์”ฉ reverseํ•˜๊ณ  ์•ž ๋’ค 16bit๋ฅผ swap ํ•œ๋‹ค.
 11 u8 bitrev8(u8 x)
 12 {    
 13     return byte_rev_table(x);
 14 }    
 15      
 16 u16 bitrev16(u16 x)
 17 {    
 18     return (bitrev8(x & 0xff) << 8) | bitrev8(x >> 8);
 19 }    
 20      
 21 u32 bitrev32(u32 x)
 22 {    
 23     return (bitrev16(x & 0xffff) << 16) | bitrev16(x >> 16);  
 24 }    

ยง Data ๋ฌด๊ฒฐ์„ฑ ๊ด€๋ จ

1. parity bit

  • ๋„คํŠธ์›Œํฌ๋ฅผ ํƒ€๊ณ  client ์— ์ „์†ก์ด ๋˜์—ˆ์„ ๋•Œ, ๋ฐ์ดํ„ฐ ์˜ค๋ฅ˜๋ฅผ ๊ฒ€์ฆํ•  ์šฉ๋„๋กœ ์‚ฌ์šฉ
  • 1์˜ ๊ฐœ์ˆ˜๊ฐ€ ํ™€์ˆ˜์ผ๋•Œ๋Š” ๋งจ ์•ž bit์— 1์„ ๋„ฃ๊ณ , 1์˜ ๊ฐœ์ˆ˜๊ฐ€ ์ง์ˆ˜์ผ๋•Œ๋Š” ๋งจ ์•ž bit์— 0์„ ๋„ฃ๋Š”๋‹ค.
    • hweight ์ด์šฉํ•˜์—ฌ 1์˜ ๊ฐœ์ˆ˜ ํŒŒ์•…
    • hweight(data)%2 ์ผ ๊ฒฝ์šฐ data |= 0x80; (1000 0000)
  • parity bit๊ฐ€ ์ ์šฉ๋˜๋ฉด 1์˜ ๊ฐœ์ˆ˜๊ฐ€ ํ•ญ์ƒ ์ง์ˆ˜์ด๊ธฐ ๋•Œ๋ฌธ์—, ๋ฐ›๋Š” ์ชฝ์—์„œ๋„ hweight ์ด์šฉํ•˜์—ฌ 1์˜ ๊ฐœ์ˆ˜ ํŒŒ์•…
  • ์ตœ๋Œ€ ํ•œ๊ณ„ : ์ง์ˆ˜๊ฐœ์˜ bit๊ฐ€ ๋ณ€ํ˜•๋˜๋ฉด error๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†๋‹ค.

2. check sum

  • ์†ก์‹ ๋ถ€์™€ ์ˆ˜์‹ ๋ถ€์˜ ์ฝ”๋“œ๊ฐ€ ๊ฐ™๋‹ค.

์•„์Šคํ‚ค ์ฝ”๋“œ 128 bit ์ค‘์—์„œ 7bit (Ex) a 97 0110 0001 ref ) https://github.com/imguru-mooc/opensource-datastructure-algorithm

โš ๏ธ **GitHub.com Fallback** โš ๏ธ