Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

clean_loan_info.py 1.4 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
  1. import pandas as pd
  2. import numpy as np
  3. def loan_info_fmt_date(df, col):
  4. month_dict = {
  5. 'jan': '1',
  6. 'feb': '2',
  7. 'mar': '3',
  8. 'apr': '4',
  9. 'may': '5',
  10. 'jun': '6',
  11. 'jul': '7',
  12. 'aug': '8',
  13. 'sep': '9',
  14. 'oct': '10',
  15. 'nov': '11',
  16. 'dec': '12'
  17. }
  18. df[col] = df[col].str.strip().str.lower()
  19. df[col] = pd.to_datetime(
  20. df[col].str[:3].replace(month_dict) +
  21. df[col].str[3:],
  22. format='%m-%Y')
  23. def apply_end_d(status, group, max_date):
  24. '''
  25. based on last known payment from loan_info, figure out end_d based on
  26. status
  27. '''
  28. if status == 'charged_off':
  29. #split the group into two groups, one which has paid something,
  30. #and other which has paid nothing
  31. never_paid = group[group['last_pymnt_d'].isnull()]
  32. has_paid = group[group['last_pymnt_d'].notnull()]
  33. # 4 months of late (1-120) and then 1 month of chargeoff, so 5 months
  34. never_paid['end_d'] = never_paid['issue_d'] + pd.DateOffset(months=+5)
  35. has_paid['end_d'] = has_paid['last_pymnt_d'] + pd.DateOffset(months=+5)
  36. group.loc[never_paid.index.values, 'end_d'] = never_paid['end_d']
  37. group.loc[has_paid.index.values, 'end_d'] = has_paid['end_d']
  38. return group['end_d']
  39. elif status == 'paid':
  40. return group['last_pymnt_d']
  41. return pd.Series([max_date] * len(group), index=group.index.values)
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...