Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

models.py 2.3 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
  1. from torch import nn
  2. from transformers import CanineModel, CanineForTokenClassification, CaninePreTrainedModel
  3. from transformers.modeling_outputs import TokenClassifierOutput
  4. from hebrew_utils import NIKUD
  5. class KtivMaleModel(CanineForTokenClassification):
  6. def __init__(self, config):
  7. assert hasattr(config, 'num_labels') and config.num_labels == 3
  8. super().__init__(config)
  9. class UnikudModel(CaninePreTrainedModel):
  10. # based on CaninePreTrainedModel
  11. # slightly modified for multilabel classification
  12. def __init__(self, config, num_labels=(len(NIKUD) + 1)):
  13. # Note: one label for each nikud type, plus one for the deletion flag
  14. super().__init__(config)
  15. config.num_labels = num_labels
  16. self.num_labels = config.num_labels
  17. self.canine = CanineModel(config)
  18. self.dropout = nn.Dropout(config.hidden_dropout_prob)
  19. self.classifier = nn.Linear(config.hidden_size, config.num_labels)
  20. # Initialize weights and apply final processing
  21. self.post_init()
  22. self.criterion = nn.BCEWithLogitsLoss()
  23. def forward(
  24. self,
  25. input_ids=None,
  26. attention_mask=None,
  27. token_type_ids=None,
  28. position_ids=None,
  29. head_mask=None,
  30. inputs_embeds=None,
  31. labels=None,
  32. output_attentions=None,
  33. output_hidden_states=None,
  34. ):
  35. outputs = self.canine(
  36. input_ids,
  37. attention_mask=attention_mask,
  38. token_type_ids=token_type_ids,
  39. position_ids=position_ids,
  40. head_mask=head_mask,
  41. inputs_embeds=inputs_embeds,
  42. output_attentions=output_attentions,
  43. output_hidden_states=output_hidden_states
  44. )
  45. sequence_output = outputs[0]
  46. sequence_output = self.dropout(sequence_output)
  47. logits = self.classifier(sequence_output)
  48. loss = None
  49. if labels is not None:
  50. loss = self.criterion(logits, labels)
  51. return TokenClassifierOutput(
  52. loss=loss,
  53. logits=logits,
  54. hidden_states=outputs.hidden_states,
  55. attentions=outputs.attentions,
  56. )
  57. if __name__ == '__main__':
  58. km_model = KtivMaleModel.from_pretrained("google/canine-c", num_labels=3)
  59. u_model = UnikudModel.from_pretrained("google/canine-c")
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...