1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
|
- from torch import nn
- from transformers import CanineModel, CanineForTokenClassification, CaninePreTrainedModel
- from transformers.modeling_outputs import TokenClassifierOutput
- from hebrew_utils import NIKUD
- class KtivMaleModel(CanineForTokenClassification):
-
- def __init__(self, config):
- assert hasattr(config, 'num_labels') and config.num_labels == 3
- super().__init__(config)
- class UnikudModel(CaninePreTrainedModel):
- # based on CaninePreTrainedModel
- # slightly modified for multilabel classification
-
- def __init__(self, config, num_labels=(len(NIKUD) + 1)):
- # Note: one label for each nikud type, plus one for the deletion flag
- super().__init__(config)
- config.num_labels = num_labels
- self.num_labels = config.num_labels
-
- self.canine = CanineModel(config)
- self.dropout = nn.Dropout(config.hidden_dropout_prob)
- self.classifier = nn.Linear(config.hidden_size, config.num_labels)
-
- # Initialize weights and apply final processing
- self.post_init()
-
- self.criterion = nn.BCEWithLogitsLoss()
-
- def forward(
- self,
- input_ids=None,
- attention_mask=None,
- token_type_ids=None,
- position_ids=None,
- head_mask=None,
- inputs_embeds=None,
- labels=None,
- output_attentions=None,
- output_hidden_states=None,
- ):
- outputs = self.canine(
- input_ids,
- attention_mask=attention_mask,
- token_type_ids=token_type_ids,
- position_ids=position_ids,
- head_mask=head_mask,
- inputs_embeds=inputs_embeds,
- output_attentions=output_attentions,
- output_hidden_states=output_hidden_states
- )
- sequence_output = outputs[0]
- sequence_output = self.dropout(sequence_output)
- logits = self.classifier(sequence_output)
- loss = None
- if labels is not None:
- loss = self.criterion(logits, labels)
- return TokenClassifierOutput(
- loss=loss,
- logits=logits,
- hidden_states=outputs.hidden_states,
- attentions=outputs.attentions,
- )
- if __name__ == '__main__':
- km_model = KtivMaleModel.from_pretrained("google/canine-c", num_labels=3)
- u_model = UnikudModel.from_pretrained("google/canine-c")
|