initial commit

2021-07-30 17:40:31 +02:00 · 2021-07-30 17:40:31 +02:00 · b650dc95df
commit b650dc95df
4 changed files with 74 additions and 0 deletions
--- a/README.md
+++ b/README.md
@ -0,0 +1,36 @@
 # MarIA la Biblioteca Nacional y el BSC crean un sistema especializado en el idioma español para mejorar las respuestas de las IA en nuestra lengua
 [https://www.xataka.com/robotica-e-ia/maria-biblioteca-nacional-bsc-crean-sistema-especializado-idioma-espanol-para-mejorar-respuestas-ia-nuestra-lengua](https://www.xataka.com/robotica-e-ia/maria-biblioteca-nacional-bsc-crean-sistema-especializado-idioma-espanol-para-mejorar-respuestas-ia-nuestra-lengua)
 [https://github.com/PlanTL-SANIDAD/lm-spanish](https://github.com/PlanTL-SANIDAD/lm-spanish)
 [https://huggingface.co/BSC-TeMU/roberta-large-bne](https://huggingface.co/BSC-TeMU/roberta-large-bne)
 ```
 $ python3 roberta.py "la inteligencia <mask> es el presente"
 [' emocional', ' humana', ' no', ' artificial', ' colectiva']
 $ python3 roberta.py "la curiosidad <mask> al gato"
 [' mueve', ' mató', ' mata', ' ayuda', ' atrae']
 ```
 ## Build
 ```
 docker-compose build
 or
 docker build -t maria ./maria
 ```
 ## Run
 ```
 $ docker run --name maria --rm maria "hola <mask>, ¿qué tal?"
 [' guapa', ' chicas', ' amigos', ' chicos', ' amigo']
 ```
 ## License
 MIT
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -0,0 +1,15 @@
 version: '2'
 services:
    maria:
        build: ./maria
        image: maria
        hostname: maria
        container_name: maria
        restart: always
        networks:
            net:
 networks:
  net:
--- a/maria/Dockerfile
+++ b/maria/Dockerfile
@ -0,0 +1,11 @@
 FROM python:3
 RUN curl https://sh.rustup.rs -sSf | sh -s -- -y
 RUN . /root/.cargo/env && pip3 install transformers torch flask
 RUN mkdir -p /maria
 WORKDIR /maria
 RUN git clone https://huggingface.co/BSC-TeMU/roberta-base-bne
 COPY ./roberta.py /maria/roberta.py
 RUN python3 roberta.py "hola <mask>"
 CMD "hola <mask>"
 ENTRYPOINT ["python3", "roberta.py"]
--- a/maria/roberta.py
+++ b/maria/roberta.py
@ -0,0 +1,12 @@
 from transformers import AutoModelForMaskedLM
 from transformers import AutoTokenizer, FillMaskPipeline
 from pprint import pprint
 import sys
 tokenizer_hf = AutoTokenizer.from_pretrained('BSC-TeMU/roberta-base-bne')
 model = AutoModelForMaskedLM.from_pretrained('BSC-TeMU/roberta-base-bne')
 model.eval()
 pipeline = FillMaskPipeline(model, tokenizer_hf)
 text = sys.argv[1:]
 res_hf = pipeline(text)
 pprint([r['token_str'] for r in res_hf])