commit b650dc95df8b9754693b83e9c004395e2e2888ad Author: ale Date: Fri Jul 30 17:40:31 2021 +0200 initial commit diff --git a/README.md b/README.md new file mode 100644 index 0000000..5a2599b --- /dev/null +++ b/README.md @@ -0,0 +1,36 @@ +# MarIA la Biblioteca Nacional y el BSC crean un sistema especializado en el idioma español para mejorar las respuestas de las IA en nuestra lengua + +[https://www.xataka.com/robotica-e-ia/maria-biblioteca-nacional-bsc-crean-sistema-especializado-idioma-espanol-para-mejorar-respuestas-ia-nuestra-lengua](https://www.xataka.com/robotica-e-ia/maria-biblioteca-nacional-bsc-crean-sistema-especializado-idioma-espanol-para-mejorar-respuestas-ia-nuestra-lengua) + +[https://github.com/PlanTL-SANIDAD/lm-spanish](https://github.com/PlanTL-SANIDAD/lm-spanish) + +[https://huggingface.co/BSC-TeMU/roberta-large-bne](https://huggingface.co/BSC-TeMU/roberta-large-bne) + +``` +$ python3 roberta.py "la inteligencia es el presente" +[' emocional', ' humana', ' no', ' artificial', ' colectiva'] + +$ python3 roberta.py "la curiosidad al gato" +[' mueve', ' mató', ' mata', ' ayuda', ' atrae'] +``` + +## Build + +``` +docker-compose build + +or + +docker build -t maria ./maria +``` + +## Run + +``` +$ docker run --name maria --rm maria "hola , ¿qué tal?" +[' guapa', ' chicas', ' amigos', ' chicos', ' amigo'] +``` + +## License + +MIT diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..1ff7fe2 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,15 @@ +version: '2' + +services: + maria: + build: ./maria + image: maria + hostname: maria + container_name: maria + restart: always + networks: + net: + +networks: + net: + diff --git a/maria/Dockerfile b/maria/Dockerfile new file mode 100644 index 0000000..fbd6019 --- /dev/null +++ b/maria/Dockerfile @@ -0,0 +1,11 @@ +FROM python:3 +RUN curl https://sh.rustup.rs -sSf | sh -s -- -y +RUN . /root/.cargo/env && pip3 install transformers torch flask +RUN mkdir -p /maria +WORKDIR /maria +RUN git clone https://huggingface.co/BSC-TeMU/roberta-base-bne +COPY ./roberta.py /maria/roberta.py +RUN python3 roberta.py "hola " +CMD "hola " +ENTRYPOINT ["python3", "roberta.py"] + diff --git a/maria/roberta.py b/maria/roberta.py new file mode 100644 index 0000000..f542dc9 --- /dev/null +++ b/maria/roberta.py @@ -0,0 +1,12 @@ +from transformers import AutoModelForMaskedLM +from transformers import AutoTokenizer, FillMaskPipeline +from pprint import pprint +import sys +tokenizer_hf = AutoTokenizer.from_pretrained('BSC-TeMU/roberta-base-bne') +model = AutoModelForMaskedLM.from_pretrained('BSC-TeMU/roberta-base-bne') +model.eval() +pipeline = FillMaskPipeline(model, tokenizer_hf) +text = sys.argv[1:] +res_hf = pipeline(text) +pprint([r['token_str'] for r in res_hf]) +