yujiepan commited on
Commit
af0b476
·
verified ·
1 Parent(s): 4c7cf67

Upload folder using huggingface_hub

Browse files
README.md CHANGED
@@ -37,6 +37,10 @@ save_folder = "/tmp/yujiepan/baguettotron-tiny-random"
37
  tokenizer = AutoTokenizer.from_pretrained(
38
  source_model_id, trust_remote_code=True,
39
  )
 
 
 
 
40
  tokenizer.save_pretrained(save_folder)
41
 
42
  config = AutoConfig.from_pretrained(
 
37
  tokenizer = AutoTokenizer.from_pretrained(
38
  source_model_id, trust_remote_code=True,
39
  )
40
+ tokenizer.chat_template = "{% for m in messages %}<|im_start|>{{ m['role'] }}\n{{ m['content'] }}<|im_end|>\n{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n<think>\n{% endif %}"
41
+ tokenizer.eos_token = "<|im_end|>"
42
+ tokenizer.bos_token = "<|im_start|>"
43
+ tokenizer.stop_token = "<|im_end|>"
44
  tokenizer.save_pretrained(save_folder)
45
 
46
  config = AutoConfig.from_pretrained(
chat_template.jinja ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {% for m in messages %}<|im_start|>{{ m['role'] }}
2
+ {{ m['content'] }}<|im_end|>
3
+ {% endfor %}{% if add_generation_prompt %}<|im_start|>assistant
4
+ <think>
5
+ {% endif %}
special_tokens_map.json CHANGED
@@ -46,20 +46,8 @@
46
  "⟨H≈1.7⟩",
47
  "⟨H≈1.8⟩"
48
  ],
49
- "bos_token": {
50
- "content": "<|begin_of_text|>",
51
- "lstrip": false,
52
- "normalized": false,
53
- "rstrip": false,
54
- "single_word": false
55
- },
56
- "eos_token": {
57
- "content": "<|end_of_text|>",
58
- "lstrip": false,
59
- "normalized": false,
60
- "rstrip": false,
61
- "single_word": false
62
- },
63
  "pad_token": {
64
  "content": "[PAD]",
65
  "lstrip": false,
 
46
  "⟨H≈1.7⟩",
47
  "⟨H≈1.8⟩"
48
  ],
49
+ "bos_token": "<|im_start|>",
50
+ "eos_token": "<|im_end|>",
 
 
 
 
 
 
 
 
 
 
 
 
51
  "pad_token": {
52
  "content": "[PAD]",
53
  "lstrip": false,
tokenizer_config.json CHANGED
@@ -440,9 +440,9 @@
440
  "⟨H≈1.7⟩",
441
  "⟨H≈1.8⟩"
442
  ],
443
- "bos_token": "<|begin_of_text|>",
444
  "clean_up_tokenization_spaces": true,
445
- "eos_token": "<|end_of_text|>",
446
  "extra_special_tokens": {},
447
  "model_max_length": 1000000000000000019884624838656,
448
  "pad_token": "[PAD]",
 
440
  "⟨H≈1.7⟩",
441
  "⟨H≈1.8⟩"
442
  ],
443
+ "bos_token": "<|im_start|>",
444
  "clean_up_tokenization_spaces": true,
445
+ "eos_token": "<|im_end|>",
446
  "extra_special_tokens": {},
447
  "model_max_length": 1000000000000000019884624838656,
448
  "pad_token": "[PAD]",