programming erlang ch15 - andstudy/forge GitHub Wiki
ets μ dets
| λΉκ΅ | ets (Erlang term storage) | dets (disk ets) |
|---|---|---|
| λ°μ΄ν μμΉ | λ©λͺ¨λ¦¬ | λμ€ν¬ |
| μ΅λ ν¬κΈ° | λ©λͺ¨λ¦¬μ λ°λΌ λ€λ¦ | μ΅λ 2GB |
| μλ | λΉ λ¦ | λλ¦Ό |
| μ§μ μκ° | νλ°μ± | μμμ |
| ν μ΄λΈ μ΄κΈ° | ets:new | dets:open_file |
| μ½μ | insert(Tablename, X) - X λ νν νλ, νΉμ ννμ 리μ€νΈ | |
| μ‘°ν | lookup(TableName, Key) - κ²°κ³Ό : Key μ λ§€μΉνλ νν 리μ€νΈ | |
| μ κ±° | ets:delete(TableId) | dets:close(TableId) |
| μ΄λ¦ | local | global |
| λ€λ₯Έ νλ‘μΈμ€μ 곡μ | X ?? | O |
ν μ΄λΈ μ ν
-
set
-
ordered set
-
bag
-
duplicate bag
-module(ets_test). -export([start/0]). start() -> lists:foreach(fun test_ets/1, [set, ordered_set, bag, duplicate_bag]). test_ets(Mode) -> TableId = ets:new(test, [Mode]), ets:insert(TableId, {a, 1}), ets:insert(TableId, {b, 2}), ets:insert(TableId, {a, 1}), ets:insert(TableId, {a, 3}), List = ets:tab2list(TableId), io:format("~13w => ~p~n", [Mode, List]), %% ~w λ ~p μ μ°¨μ΄μ μ? p.260 ets:delete(TableId). list:foreach(fun(X) -> test_ets(X) end, [set...]). ets:new(test, [Mode, {keypos, 2}]).
ets ν μ΄λΈ ν¨μ¨μ±
ν΄μ ν μ΄λΈ, κ· ν μ΄μ§ νΈλ¦¬
μμ νν vs ν° λ°μ΄λ리
Trigrams
Trigrams μ΄ λκΉμ? -.-;;;
lib_trigrams.erl
-module(lib_trigrams).
-export([for_each_trigram_in_the_english_language/2,
make_tables/0, timer_tests/0,
open/0, close/1, is_word/2,
how_many_trigrams/0,
make_ets_set/0, make_ets_ordered_set/0, make_mod_set/0,
lookup_all_ets/2, lookup_all_set/2
]).
-import(lists, [reverse/1]).
%% timer:tc(Module, Function, Arguments) -> {Time, Value}
%% ==> apply(Module, Functime, Auguments) λ₯Ό νκ°νλ λ° κ±Έλ¦° μ€μ μκ°μ μΈ‘μ νλ€.
make_tables() ->
{Micro1, N} = timer:tc(?MODULE, how_many_trigrams, []),
io:format("Counting - No of trigrams=~p time/trigram=~p~n", [N, Micro1/N]),
{Micro2, Ntri} = timer:tc(?MODULE, make_ets_ordered_set, []),
FileSize1 = filelib:file_size("trigramsOS.tab"),
io:format("Ets ordered Set size=~p time/trigram=~p~n", [FileSize1/Ntri, Micro2/N]),
{Micro3, _} = timer:tc(?MODULE, make_ets_set, []),
FileSize2 = filelib:file_size("trigramsS.tab"),
io:format("Ets set size=~p time/trigram=~p~n", [FileSize2/Ntri, Micro3/N]),
{Micro4, _} = timer:tc(?MODULE, make_mod_set, []),
FileSize3 = filelib:file_size("trigrams.set"),
io:format("Module sets size=~p time/trigram=~p~n", [FileSize3/Ntri, Micro4/N]).
make_ets_ordered_set() -> make_a_set(ordered_set, "trigramsOS.tab"). %% ordered set μΌλ‘ μμ±
make_ets_set() -> make_a_set(set, "trigramsS.tab"). %% set μΌλ‘ μμ±
make_a_set(Type, FileName) ->
Tab = ets:new(table, [Type]),
F = fun(Str, _) -> ets:insert(Tab, {list_to_binary(Str)}) end,
for_each_trigram_in_the_english_language(F, 0),
ets:tab2file(Tab, FileName),
Size = ets:info(Tab, size),
ets:delete(Tab),
Size.
make_mod_set() ->
D = sets:new(),
F = fun(Str, Set) -> sets:add_element(list_to_binary(Str), Set) end,
D1 = for_each_trigram_in_the_english_language(F, D),
file:write_file("trigrams.set", [term_to_binary(D1)]).
timer_tests() ->
time_lookup_ets_set("Ets ordered Set", "trigramsOS.tab"),
time_lookup_ets_set("Ets set", "trigramsS.tab"),
time_lookup_module_sets().
time_lookup_ets_set(Type, File) ->
{ok, Tab} = ets:file2tab(File),
L = ets:tab2list(Tab),
Size = length(L),
{M, _} = timer:tc(?MODULE, lookup_all_ets, [Tab, L]),
io:format("~s lookup=~p micro seconds~n", [Type, M/Size]),
ets:delete(Tab).
lookup_all_ets(Tab, L) ->
lists:foreach(fun({K}) -> ets:lookup(Tab, K) end, L).
time_lookup_module_sets() ->
{ok, Bin} = file:read_file("trigrams.set"),
Set = binary_to_term(Bin),
Keys = sets:to_list(Set),
Size = length(Keys),
{M, _} = timer:tc(?MODULE, lookup_all_set, [Set, Keys]),
io:format("Module set lookup=~p micro seconds~n", [M/Size]).
lookup_all_set(Set, L) ->
lists:foreach(fun(Key) -> sets:is_element(Key, Set) end, L).
how_many_trigrams() ->
F = fun(_, N) -> 1 + N end,
for_each_trigram_in_the_english_language(F, 0).
%% An iterator that iterates through all trigrams in the language
for_each_trigram_in_the_english_language(F, A0) ->
{ok, Bin0} = file:read_file("354984si.ngl.gz"),
Bin = zlib:gunzip(Bin0),
scan_word_list(binary_to_list(Bin), F, A0).
scan_word_list([], _, A) ->
A;
scan_word_list(L, F, A) ->
{Word, L1} = get_next_word(L, []),
A1 = scan_trigrams([$\s | Word], F, A),
scan_word_list(L1, F, A1).
%% scan the word looking for \r\n
%% the second argument is the word(reversed) so it
%% has to be reversed when we find \r\n or run out of characters
get_next_word([$\r, $\n | T], L) -> {reverse([$\s | L]), T};
get_next_word([H | T], L) -> get_next_word(T, [H | L]);
get_next_word([], L) -> {reverse([$\s | L]), []}.
scan_trigrams([X, Y, Z], F, A) -> F([X, Y, Z], A);
scan_trigrams([X, Y, Z | T], F, A) ->
A1 = F([X, Y, Z], A),
scan_trigrams([Y, Z | T], F, A1);
scan_trigrams(_, _, A) ->
A.
%% access routines
%% open() -> Table
%% close(Table)
%% is_word(Table, String) -> Bool
is_word(Tab, Str) -> is_word1(Tab, "\s" ++ Str ++ "\s").
is_word1(Tab, [_, _, _] = X) -> is_this_a_trigram(Tab, X);
is_word1(Tab, [A, B, C | D]) ->
case is_this_a_trigram(Tab, [A, B, C]) of
true -> is_word1(Tab, [B, C | D]);
false -> false
end;
is_word1(_, _) ->
false.
is_this_a_trigram(Tab, X) ->
case ets:lookup(Tab, list_to_binary(X)) of
[] -> false;
_ -> true
end.
open() ->
{ok, I} = ets:file2tab(filename:dirname(code:which(?MODULE)) ++ "/trigramsS.tab"),
I.
close(Tab) -> ets:delete(Tab).
dets
-module(lib_filenames_dets).
-export([open/1, close/0, test/0, filename2index/1, index2filename/1]).
open(File) ->
io:format("dets opened:~p~n", [File]),
Bool = filelib:is_file(File),
case dets:open_file(?MODULE, [{file, File}]) of
{ok, ?MODULE} ->
case Bool of
true -> void;
false -> ok = dets:insert(?MODULE, {free, 1})
end,
true;
{error, _Reason} ->
io:format("cannot open dets table~n"),
exit(eDetsOpen)
end.
close() -> dets:close(?MODULE).
filename2index(FileName) when is_binary(FileName) ->
case dets:lookup(?MODULE, FileName) of
[] -> %% FileName μ΄ μΆκ°λ μ μλ€λ©΄
[{_, Free}] = dets:lookup(?MODULE, free),
ok = dets:insert(
?MODULE,
[ {Free, FileName}, %% FileName μ μΈλ±μ€λ₯Ό μλ‘ key λ‘ insert.
{FileName, Free},
{free, Free + 1} %% free μ μΈλ±μ€ 1 μ¦κ°. auto increment λ λΉμ·.
]),
Free;
[{_, N}] ->
N
end.
index2filename(Index) when is_integer(Index) ->
case dets:lookup(?MODULE, Index) of
[] -> error;
[{_, Bin}] -> Bin
end.
test() ->
file:delete("./filenames.dets"),
open("./filenames.dets"),
io:format("Now in:~p~n", [element(2, file:get_cwd())]),
F = lib_files_find:files(".", "*.erl", true),
lists:foreach(fun(I) -> filename2index(list_to_binary(I)) end, F).