export LS_HOME=~/d2/s2/w2-/wk-/lstudio22/ls22a
export LS_CONF=$HOME/.local/share/label-studio
export ls_home=$LS_HOME
export ls_conf=$LS_CONF
export ls_work=~/d/s/w/
- conda create -y --name myEnv --clone base
- conda create -y --name ls22py39 python==3.8
- conda install virtualenv
- virtualenv ls22py38a
- source ls22py38a/bin/activate
- pip install label-studio
- postgress
- https://www.moncefbelyamani.com/how-to-install-postgresql-on-a-mac-with-homebrew-and-lunchy/
- https://stackoverflow.com/questions/20170895/mac-virtualenv-pip-postgresql-error-pg-config-executable-not-found
- brew install postgres
- emacs -nw ~/.zshrc
- export PATH=$PATH:/Applications/Postgres.app/Contents/Versions/<current_version>/bin
- export PATH=$PATH:/opt/homebrew/opt/postgres/bin
echo "label-studio --data-dir ./my-dir --username [email protected] --password admin"
#ANA_ENV=$(which label-studio)
#LS_BIN=${ANA_ENV}/bin/label-studio
LS_BIN=$(which label-studio)
LS_HOST=localhost
LS_PORT=8090
#
LS0_USER_PASS=abcde
[email protected]
[email protected]
LS0_DAT_DIR=~/.local/share/label-studio
LS1_DAT_DIR=${LS0_DAT_DIR}"-2022-08-14"
LS2_DAT_DIR=${LS0_DAT_DIR}"-2022-08-15"
# Disable Signup Page
export LABEL_STUDIO_DISABLE_SIGNUP_WITHOUT_LINK=true
# label-studio start --username [email protected] --password <myPass>
LS_ARG="--port "${LS_PORT}" --password "${LS0_USER_PASS}
act() {
case $* in
a )
echo "export LABEL_STUDIO_DISABLE_SIGNUP_WITHOUT_LINK=true; ${LS_BIN} start --data-dir ${LS1_DAT_DIR} --username ${LS1_USER_NAME} "${LS_ARG}
;;
b )
echo "${LS_BIN} start --data-dir ${LS2_DAT_DIR} --username ${LS2_USER_NAME} "${LS_ARG}
;;
h )
echo "${LS_BIN} help"
;;
esac
}
act $*
import spacy
import pandas as pd
import json
from itertools import groupby
# Download spaCy models:
models = {
'en_core_web_sm': spacy.load("en_core_web_sm"),
'en_core_web_lg': spacy.load("en_core_web_lg")
}
# This function converts spaCy docs to the list of named entity spans in Label Studio compatible JSON format:
def doc_to_spans(doc):
tokens = [(tok.text, tok.idx, tok.ent_type_) for tok in doc]
results = []
entities = set()
for entity, group in groupby(tokens, key=lambda t: t[-1]):
if not entity:
continue
group = list(group)
_, start, _ = group[0]
word, last, _ = group[-1]
text = ' '.join(item[0] for item in group)
end = last + len(word)
results.append({
'from_name': 'label',
'to_name': 'text',
'type': 'labels',
'value': {
'start': start,
'end': end,
'text': text,
'labels': [entity]
}
})
entities.add(entity)
return results, entities
# Now load the dataset and include only lines containing "Easter ":
df = pd.read_csv('lines_clean.csv')
df = df[df['line_text'].str.contains("Easter ", na=False)]
print(df.head())
texts = df['line_text']
# Prepare Label Studio tasks in import JSON format with the model predictions:
entities = set()
tasks = []
for text in texts:
predictions = []
for model_name, nlp in models.items():
doc = nlp(text)
spans, ents = doc_to_spans(doc)
entities |= ents
predictions.append({'model_version': model_name, 'result': spans})
tasks.append({
'data': {'text': text},
'predictions': predictions
})
# Save Label Studio tasks.json
print(f'Save {len(tasks)} tasks to "tasks.json"')
with open('tasks.json', mode='w') as f:
json.dump(tasks, f, indent=2)
# Save class labels as a txt file
print('Named entities are saved to "named_entities.txt"')
with open('named_entities.txt', mode='w') as f:
f.write('\n'.join(sorted(entities)))
import json
from collections import defaultdict
tasks = json.load(open('annotations.json'))
model_hits = defaultdict(int)
for task in tasks:
annotation_result = task['annotations'][0]['result']
for r in annotation_result:
r.pop('id')
for prediction in task['predictions']:
model_hits[prediction['model_version']] += int(prediction['result'] == annotation_result)
num_task = len(tasks)
for model_name, num_hits in model_hits.items():
acc = num_hits / num_task
print(f'Accuracy for {model_name}: {acc:.2f}%')