sam749 commited on
Commit
637a1fc
·
verified ·
1 Parent(s): 0b556d8

Upload folder using huggingface_hub

Browse files
Files changed (7) hide show
  1. .gitattributes +3 -0
  2. README.md +6 -6
  3. app.py +52 -0
  4. example_1.png +3 -0
  5. example_2.png +3 -0
  6. example_3.png +3 -0
  7. requirements.txt +3 -0
.gitattributes CHANGED
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ example_1.png filter=lfs diff=lfs merge=lfs -text
37
+ example_2.png filter=lfs diff=lfs merge=lfs -text
38
+ example_3.png filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,12 +1,12 @@
1
  ---
2
- title: Donut Sroie
3
- emoji: 👁
4
- colorFrom: purple
5
- colorTo: pink
6
  sdk: gradio
7
- sdk_version: 5.46.1
8
  app_file: app.py
9
  pinned: false
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Donut SROIE
3
+ emoji: 🖼️
4
+ colorFrom: red
5
+ colorTo: indigo
6
  sdk: gradio
7
+ sdk_version: 5.46.0
8
  app_file: app.py
9
  pinned: false
10
  ---
11
 
12
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import gradio as gr
3
+
4
+ import torch
5
+ from transformers import DonutProcessor, VisionEncoderDecoderModel
6
+
7
+ device = "cuda" if torch.cuda.is_available() else "cpu"
8
+ dtype = torch.float16 if torch.cuda.is_available() else torch.float32
9
+
10
+ processor = DonutProcessor.from_pretrained("sam749/donut-base-finetuned-sroie-v2")
11
+ model = VisionEncoderDecoderModel.from_pretrained("sam749/donut-base-finetuned-sroie-v2", dtype=dtype)
12
+ model.to(device)
13
+
14
+ def process_document(image):
15
+ # prepare encoder inputs
16
+ pixel_values = processor(image, return_tensors="pt").pixel_values
17
+
18
+ # generate answer
19
+ outputs = model.generate(
20
+ pixel_values.to(device),
21
+ use_cache=True,
22
+ num_beams=1,
23
+ bad_words_ids=[[processor.tokenizer.unk_token_id]],
24
+ return_dict_in_generate=True,
25
+ )
26
+
27
+ # postprocess
28
+ sequence = processor.batch_decode(outputs.sequences)[0]
29
+ sequence = sequence.replace(processor.tokenizer.eos_token, "").replace(processor.tokenizer.pad_token, "")
30
+ sequence = re.sub(r"<.*?>", "", sequence, count=1).strip() # remove first task start token
31
+
32
+ return processor.token2json(sequence)
33
+
34
+ description = """Gradio Demo for Donut, an instance of `VisionEncoderDecoderModel` fine-tuned on SROI (document parsing & information extraction).
35
+ To use it, simply upload your image and click 'submit', or click one of the examples to load them.
36
+
37
+ <strong><em>Note: </em>Predictions are more accurate on GPU.</strong><br>
38
+ <em>Output: </em>extracts [date, company, total] from the document.
39
+ """
40
+ article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2111.15664' target='_blank'>Donut: OCR-free Document Understanding Transformer</a> | <a href='https://github.com/clovaai/donut' target='_blank'>Github Repo</a></p>"
41
+
42
+ demo = gr.Interface(
43
+ fn=process_document,
44
+ inputs="image",
45
+ outputs="json",
46
+ title="Demo: Donut 🍩 for Document Parsing",
47
+ description=description,
48
+ article=article,
49
+ examples=[["example_1.png"], ["example_2.png"], ["example_3.png"]],
50
+ cache_examples=False)
51
+
52
+ demo.launch(share=True)
example_1.png ADDED

Git LFS Details

  • SHA256: 1e86bf009671205fac34d2e9b24fddaac165bae18146eece1540c3c5e66a3f10
  • Pointer size: 131 Bytes
  • Size of remote file: 549 kB
example_2.png ADDED

Git LFS Details

  • SHA256: 7b40d20e19bf7a489b04edbd58c639b23b675973451a9849958f6488888ea28d
  • Pointer size: 131 Bytes
  • Size of remote file: 833 kB
example_3.png ADDED

Git LFS Details

  • SHA256: 3fcf61978dfa301c0654acaf3cff9e27a6eb9b12d6fd3ba4dbf7511937e17a51
  • Pointer size: 131 Bytes
  • Size of remote file: 571 kB
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ torch
2
+ git+https://github.com/huggingface/transformers.git
3
+ sentencepiece