Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

estimators.py 1.9 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
  1. # Copyright 2020 Erik Härkönen. All rights reserved.
  2. # This file is licensed to you under the Apache License, Version 2.0 (the "License");
  3. # you may not use this file except in compliance with the License. You may obtain a copy
  4. # of the License at http://www.apache.org/licenses/LICENSE-2.0
  5. # Unless required by applicable law or agreed to in writing, software distributed under
  6. # the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
  7. # OF ANY KIND, either express or implied. See the License for the specific language
  8. # governing permissions and limitations under the License.
  9. import itertools
  10. from types import SimpleNamespace
  11. import numpy as np
  12. from sklearn.decomposition import IncrementalPCA
  13. # Incremental PCA
  14. class IPCAEstimator():
  15. def __init__(self, n_components):
  16. self.n_components = n_components
  17. self.whiten = False
  18. self.transformer = IncrementalPCA(n_components, whiten=self.whiten, batch_size=max(100, 2 * n_components))
  19. self.batch_support = True
  20. def get_param_str(self):
  21. return "ipca_c{}{}".format(self.n_components, '_w' if self.whiten else '')
  22. def fit(self, X):
  23. self.transformer.fit(X)
  24. def fit_partial(self, X):
  25. try:
  26. self.transformer.partial_fit(X)
  27. self.transformer.n_samples_seen_ = \
  28. self.transformer.n_samples_seen_.astype(np.int64) # avoid overflow
  29. return True
  30. except ValueError as e:
  31. print(f'\nIPCA error:', e)
  32. return False
  33. def get_components(self):
  34. stdev = np.sqrt(self.transformer.explained_variance_) # already sorted
  35. var_ratio = self.transformer.explained_variance_ratio_
  36. return self.transformer.components_, stdev, var_ratio # PCA outputs are normalized
  37. def get_estimator(name, n_components, alpha):
  38. if name == 'ipca':
  39. return IPCAEstimator(n_components)
  40. else:
  41. raise RuntimeError('Unknown estimator')
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...