programming erlang ch15 - andstudy/forge GitHub Wiki

ets 와 dets

비ꡐ ets (Erlang term storage) dets (disk ets)
데이타 μœ„μΉ˜ λ©”λͺ¨λ¦¬ λ””μŠ€ν¬
μ΅œλŒ€ 크기 λ©”λͺ¨λ¦¬μ— 따라 닀름 μ΅œλŒ€ 2GB
속도 빠름 느림
지속 μ‹œκ°„ νœ˜λ°œμ„± μ˜μ†μ 
ν…Œμ΄λΈ” μ—΄κΈ° ets:new dets:open_file
μ‚½μž… insert(Tablename, X) - X λŠ” νŠœν”Œ ν•˜λ‚˜, ν˜Ήμ€ νŠœν”Œμ˜ 리슀트
쑰회 lookup(TableName, Key) - κ²°κ³Ό : Key 와 λ§€μΉ˜ν•˜λŠ” νŠœν”Œ 리슀트
제거 ets:delete(TableId) dets:close(TableId)
이름 local global
λ‹€λ₯Έ ν”„λ‘œμ„ΈμŠ€μ™€ 곡유 X ?? O

ν…Œμ΄λΈ” μœ ν˜•

  • set

  • ordered set

  • bag

  • duplicate bag

      -module(ets_test).
      -export([start/0]).
      
      start() ->
      	lists:foreach(fun test_ets/1, [set, ordered_set, bag, duplicate_bag]).
      	
      test_ets(Mode) ->
      	TableId = ets:new(test, [Mode]),
      	ets:insert(TableId, {a, 1}),
      	ets:insert(TableId, {b, 2}),
      	ets:insert(TableId, {a, 1}),
      	ets:insert(TableId, {a, 3}),
      	List = ets:tab2list(TableId),
      	io:format("~13w => ~p~n", [Mode, List]),	%% ~w λž‘ ~p 의 차이점은? p.260
      	ets:delete(TableId).
    
      list:foreach(fun(X) -> test_ets(X) end, [set...]).
      ets:new(test, [Mode, {keypos, 2}]).
    

ets ν…Œμ΄λΈ” νš¨μœ¨μ„±

ν•΄μ‹œ ν…Œμ΄λΈ”, κ· ν˜• 이진 트리

μž‘μ€ νŠœν”Œ vs 큰 λ°”μ΄λ„ˆλ¦¬

Trigrams

Trigrams 이 λ­˜κΉŒμš”? -.-;;;

lib_trigrams.erl

    -module(lib_trigrams).
    -export([for_each_trigram_in_the_english_language/2,
    	 make_tables/0, timer_tests/0,
    	 open/0, close/1, is_word/2,
    	 how_many_trigrams/0, 
    	 make_ets_set/0, make_ets_ordered_set/0, make_mod_set/0,
    	 lookup_all_ets/2, lookup_all_set/2
    	]).
    -import(lists, [reverse/1]).
    
    %% timer:tc(Module, Function, Arguments) -> {Time, Value}
    %%	==> apply(Module, Functime, Auguments) λ₯Ό ν‰κ°€ν•˜λŠ” 데 κ±Έλ¦° μ‹€μ œ μ‹œκ°„μ„ μΈ‘μ •ν•œλ‹€.
    make_tables() ->
    	{Micro1, N} = timer:tc(?MODULE, how_many_trigrams, []),
    	io:format("Counting - No of trigrams=~p time/trigram=~p~n", [N, Micro1/N]),
     	{Micro2, Ntri} = timer:tc(?MODULE, make_ets_ordered_set, []),
    	FileSize1 = filelib:file_size("trigramsOS.tab"),
    	io:format("Ets ordered Set size=~p time/trigram=~p~n", [FileSize1/Ntri, Micro2/N]),
    	{Micro3, _} = timer:tc(?MODULE, make_ets_set, []),
    	FileSize2 = filelib:file_size("trigramsS.tab"),
    	io:format("Ets set size=~p time/trigram=~p~n", [FileSize2/Ntri, Micro3/N]),
    	{Micro4, _} = timer:tc(?MODULE, make_mod_set, []),
    	FileSize3 = filelib:file_size("trigrams.set"),
    	io:format("Module sets size=~p time/trigram=~p~n", [FileSize3/Ntri, Micro4/N]).
    	
    make_ets_ordered_set() -> make_a_set(ordered_set, "trigramsOS.tab").		%% ordered set 으둜 생성
    make_ets_set() -> make_a_set(set, "trigramsS.tab").			%% set 으둜 생성
    
    make_a_set(Type, FileName) ->
    	Tab = ets:new(table, [Type]),
    	F = fun(Str, _) -> ets:insert(Tab, {list_to_binary(Str)}) end,
    	for_each_trigram_in_the_english_language(F, 0),
    	ets:tab2file(Tab, FileName),
    	Size = ets:info(Tab, size),
    	ets:delete(Tab),
    	Size.
    	
    make_mod_set() ->
    	D = sets:new(),
    	F = fun(Str, Set) -> sets:add_element(list_to_binary(Str), Set) end,
    	D1 = for_each_trigram_in_the_english_language(F, D),
    	file:write_file("trigrams.set", [term_to_binary(D1)]).
    
    timer_tests() ->
    	time_lookup_ets_set("Ets ordered Set", "trigramsOS.tab"),
    	time_lookup_ets_set("Ets set", "trigramsS.tab"),
    	time_lookup_module_sets().
    	
    time_lookup_ets_set(Type, File) ->
    	{ok, Tab} = ets:file2tab(File),
    	L = ets:tab2list(Tab),
    	Size = length(L),
    	{M, _} = timer:tc(?MODULE, lookup_all_ets, [Tab, L]),
    	io:format("~s lookup=~p micro seconds~n", [Type, M/Size]),
    	ets:delete(Tab).
    	
    lookup_all_ets(Tab, L) ->
    	lists:foreach(fun({K}) -> ets:lookup(Tab, K) end, L).
    
    time_lookup_module_sets() ->
    	{ok, Bin} = file:read_file("trigrams.set"),
    	Set = binary_to_term(Bin),
    	Keys = sets:to_list(Set),
    	Size = length(Keys),
    	{M, _} = timer:tc(?MODULE, lookup_all_set, [Set, Keys]),
    	io:format("Module set lookup=~p micro seconds~n", [M/Size]).
    
    lookup_all_set(Set, L) ->
    	lists:foreach(fun(Key) -> sets:is_element(Key, Set) end, L).
    	
    how_many_trigrams() ->
    	F = fun(_, N) -> 1 + N end,
    	for_each_trigram_in_the_english_language(F, 0).
    
    %% An iterator that iterates through all trigrams in the language
    
    for_each_trigram_in_the_english_language(F, A0) ->
    	{ok, Bin0} = file:read_file("354984si.ngl.gz"),
    	Bin = zlib:gunzip(Bin0),
    	scan_word_list(binary_to_list(Bin), F, A0).
    
    scan_word_list([], _, A) ->
    	A;
    scan_word_list(L, F, A) ->
    	{Word, L1} = get_next_word(L, []),
    	A1 = scan_trigrams([$\s | Word], F, A),
    	scan_word_list(L1, F, A1).
    	
    %% scan the word looking for \r\n
    %% the second argument is the word(reversed) so it
    %% has to be reversed when we find \r\n or run out of characters
    
    get_next_word([$\r, $\n | T], L) -> {reverse([$\s | L]), T};
    get_next_word([H | T], L) -> get_next_word(T, [H | L]);
    get_next_word([], L) -> {reverse([$\s | L]), []}.
    
    scan_trigrams([X, Y, Z], F, A) -> F([X, Y, Z], A);
    scan_trigrams([X, Y, Z | T], F, A) ->
    	A1 = F([X, Y, Z], A),
    	scan_trigrams([Y, Z | T], F, A1);
    scan_trigrams(_, _, A) ->
    	A.
    	
    %% access routines
    %%		open() -> Table
    %%		close(Table)
    %%		is_word(Table, String) -> Bool
    
    is_word(Tab, Str) -> is_word1(Tab, "\s" ++ Str ++ "\s").
    
    is_word1(Tab, [_, _, _] = X) -> is_this_a_trigram(Tab, X);
    is_word1(Tab, [A, B, C | D]) ->
    	case is_this_a_trigram(Tab, [A, B, C]) of
    		true -> is_word1(Tab, [B, C | D]);
    		false -> false
    	end;
    is_word1(_, _) ->
    	false.
    	
    is_this_a_trigram(Tab, X) ->
    	case ets:lookup(Tab, list_to_binary(X)) of
    		[] -> false;
    		_ -> true
    	end.
    	
    open() ->
    	{ok, I} = ets:file2tab(filename:dirname(code:which(?MODULE)) ++ "/trigramsS.tab"),
    	I.
    
    close(Tab) -> ets:delete(Tab).

dets

    -module(lib_filenames_dets).
    -export([open/1, close/0, test/0, filename2index/1, index2filename/1]).
    
    open(File) ->
    	io:format("dets opened:~p~n", [File]),
    	Bool = filelib:is_file(File),
    	case dets:open_file(?MODULE, [{file, File}]) of
    		{ok, ?MODULE} ->
    			case Bool of
    				true -> void;
    				false -> ok = dets:insert(?MODULE, {free, 1})
    			end,
    			true;
    		{error, _Reason} ->
    			io:format("cannot open dets table~n"),
    			exit(eDetsOpen)
    	end.
    	
    close() -> dets:close(?MODULE).
    
    filename2index(FileName) when is_binary(FileName) ->
    	case dets:lookup(?MODULE, FileName) of
    		[] ->		%% FileName 이 μΆ”κ°€λœ 적 μ—†λ‹€λ©΄
    			[{_, Free}] = dets:lookup(?MODULE, free),
    			ok = dets:insert(
    				?MODULE, 
    				[	{Free, FileName}, %% FileName 와 인덱슀λ₯Ό μ„œλ‘œ key 둜 insert.
    					{FileName, Free},
    					{free, Free + 1} %% free 의 인덱슀 1 증가. auto increment λž‘ λΉ„μŠ·.
    				]),
    			Free;
    		[{_, N}] ->
    			N
    		end.
    		
    index2filename(Index) when is_integer(Index) ->
    	case dets:lookup(?MODULE, Index) of
    		[] -> error;
    		[{_, Bin}] -> Bin
    	end.
    
    test() ->
    	file:delete("./filenames.dets"),
    	open("./filenames.dets"),
            io:format("Now in:~p~n", [element(2, file:get_cwd())]),    
    	F = lib_files_find:files(".", "*.erl", true),
    	lists:foreach(fun(I) -> filename2index(list_to_binary(I)) end, F).

μ°Έκ³