Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

merge-wos.jl 1.3 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
  1. ### A Pluto.jl notebook ###
  2. # v0.19.27
  3. using FileIO
  4. using CSV
  5. using TidierData
  6. using DataFrames
  7. using RCall
  8. import Pandas.read_csv as pdread_csv
  9. thewd = joinpath(pwd(), "crawled-data", "wos", "out")
  10. # cd(thewd)
  11. csv_files = filter(x -> endswith(x, ".csv"), readdir(thewd, join = true))
  12. combined_df = DataFrame()
  13. for file_path in csv_files
  14. # 讀取CSV檔案
  15. # try
  16. # df = pdread_csv(file_path) |> DataFrame
  17. df = CSV.read(file_path, DataFrame)
  18. # 從檔名中提取Cluster名稱
  19. cluster_name = split(file_path, ".")[1] # 假設檔名是"ClusterName.csv"
  20. # println(cluster_name)
  21. # 新增Cluster欄位
  22. df[!, "Cluster"] .= cluster_name
  23. # 合併DataFrame
  24. combined_df = @bind_rows(combined_df, df)
  25. println("$(cluster_name) done")
  26. # catch e
  27. # # println("$(file_path)發生錯誤: $e")
  28. # println("出錯了!!")
  29. # end
  30. end
  31. select!(combined_df, :Cluster, :)
  32. exportpth = joinpath(dirname(thewd), "binded.csv")
  33. CSV.write(exportpth, combined_df)
  34. ## 比對檔案
  35. df = @chain exportpth begin
  36. CSV.read(DataFrame)
  37. end
  38. srda = @chain pwd() begin
  39. joinpath("backend-DB/SRDA-BibData.csv")
  40. CSV.read(DataFrame)
  41. end
  42. srda[!, :原始題目]
  43. l0 = uppercase.(df[!, :TI])
  44. l1 = uppercase.(srda[!, :原始題目])
  45. bol = l0 .∉ Ref(l1)
  46. xx = df[bol, :]
  47. CSV.write("ts.csv", xx)
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...