sd_hijack_taiyi.py 1.3 KB

123456789101112131415161718192021222324252627282930
  1. from modules import sd_hijack_clip, devices
  2. class FrozenTaiyiEmbedderWithCustomWords(sd_hijack_clip.FrozenCLIPEmbedderWithCustomWords):
  3. def __init__(self, wrapped, hijack):
  4. super().__init__(wrapped, hijack)
  5. self.id_start = wrapped.tokenizer.bos_token_id
  6. self.id_end = wrapped.tokenizer.eos_token_id
  7. self.id_pad = wrapped.tokenizer.pad_token_id
  8. # alt diffusion doesn't have </w> bits for comma
  9. self.comma_token = self.tokenizer.get_vocab().get(',', None)
  10. def encode_with_transformers(self, tokens):
  11. # there's no CLIP Skip here because all hidden layers have size of 1024 and the last one uses a
  12. # trained layer to transform those 1024 into 768 for unet; so you can't choose which transformer
  13. # layer to work with - you have to use the last
  14. outputs = self.wrapped.transformer(input_ids=tokens)
  15. z = outputs.last_hidden_state
  16. return z
  17. def encode_embedding_init_text(self, init_text, nvpt):
  18. embedding_layer = self.wrapped.transformer.embeddings
  19. ids = self.wrapped.tokenizer(init_text, max_length=nvpt,
  20. return_tensors="pt", add_special_tokens=False)["input_ids"]
  21. embedded = embedding_layer.token_embedding.wrapped(ids.to(devices.device)).squeeze(0)
  22. return embedded