sd_hijack_open_clip.py 2.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071
  1. import open_clip.tokenizer
  2. import torch
  3. from modules import sd_hijack_clip, devices
  4. from modules.shared import opts
  5. tokenizer = open_clip.tokenizer._tokenizer
  6. class FrozenOpenCLIPEmbedderWithCustomWords(sd_hijack_clip.FrozenCLIPEmbedderWithCustomWordsBase):
  7. def __init__(self, wrapped, hijack):
  8. super().__init__(wrapped, hijack)
  9. self.comma_token = [v for k, v in tokenizer.encoder.items() if k == ',</w>'][0]
  10. self.id_start = tokenizer.encoder["<start_of_text>"]
  11. self.id_end = tokenizer.encoder["<end_of_text>"]
  12. self.id_pad = 0
  13. def tokenize(self, texts):
  14. assert not opts.use_old_emphasis_implementation, 'Old emphasis implementation not supported for Open Clip'
  15. tokenized = [tokenizer.encode(text) for text in texts]
  16. return tokenized
  17. def encode_with_transformers(self, tokens):
  18. # set self.wrapped.layer_idx here according to opts.CLIP_stop_at_last_layers
  19. z = self.wrapped.encode_with_transformer(tokens)
  20. return z
  21. def encode_embedding_init_text(self, init_text, nvpt):
  22. ids = tokenizer.encode(init_text)
  23. ids = torch.asarray([ids], device=devices.device, dtype=torch.int)
  24. embedded = self.wrapped.model.token_embedding.wrapped(ids).squeeze(0)
  25. return embedded
  26. class FrozenOpenCLIPEmbedder2WithCustomWords(sd_hijack_clip.FrozenCLIPEmbedderWithCustomWordsBase):
  27. def __init__(self, wrapped, hijack):
  28. super().__init__(wrapped, hijack)
  29. self.comma_token = [v for k, v in tokenizer.encoder.items() if k == ',</w>'][0]
  30. self.id_start = tokenizer.encoder["<start_of_text>"]
  31. self.id_end = tokenizer.encoder["<end_of_text>"]
  32. self.id_pad = 0
  33. def tokenize(self, texts):
  34. assert not opts.use_old_emphasis_implementation, 'Old emphasis implementation not supported for Open Clip'
  35. tokenized = [tokenizer.encode(text) for text in texts]
  36. return tokenized
  37. def encode_with_transformers(self, tokens):
  38. d = self.wrapped.encode_with_transformer(tokens)
  39. z = d[self.wrapped.layer]
  40. pooled = d.get("pooled")
  41. if pooled is not None:
  42. z.pooled = pooled
  43. return z
  44. def encode_embedding_init_text(self, init_text, nvpt):
  45. ids = tokenizer.encode(init_text)
  46. ids = torch.asarray([ids], device=devices.device, dtype=torch.int)
  47. embedded = self.wrapped.model.token_embedding.wrapped(ids.to(self.wrapped.model.token_embedding.wrapped.weight.device)).squeeze(0)
  48. return embedded