hsuwei
/
bibi
connected to https://github.com/hswei0/bibi.git


  
1

	
2

	
3

	
4

	
5

	
6

	
7

	
8

	
9

	
10

	
11

	
12

	
13

	
14

	
15

	
16

	
17

	
18

	
19

	
20

	
21

	
22

	
23

	
24

	
25

	
26

	
27

	
28

	
29

	
30

	
31

	
32

	
33

	
34

	
35

	
36

	
37

	
38

	
39

	
40

	
41

	
42

	
43

	
44

	
45

	
46

	
47

	
48

	
49

	
50

	
51

	
52

	
53

	
54

	
55

	
56

	
57

	
58

	
59

	
60

	
61

	
62

	
            ### A Pluto.jl notebook ###
# v0.19.27

using FileIO
using CSV
using TidierData
using DataFrames
using RCall
import Pandas.read_csv as pdread_csv

thewd = joinpath(pwd(), "crawled-data", "wos", "out")
# cd(thewd)

csv_files = filter(x -> endswith(x, ".csv"), readdir(thewd, join = true))
combined_df = DataFrame()


for file_path in csv_files
    # 讀取CSV檔案
	# try
    	# df = pdread_csv(file_path) |> DataFrame
	df = CSV.read(file_path, DataFrame)
	# 從檔名中提取Cluster名稱
	cluster_name = split(file_path, ".")[1]  # 假設檔名是"ClusterName.csv"
	# println(cluster_name)
	# 新增Cluster欄位
	df[!, "Cluster"] .= cluster_name
	# 合併DataFrame
	combined_df = @bind_rows(combined_df, df)
	println("$(cluster_name) done")
	
	# catch e
	# 	# println("$(file_path)發生錯誤: $e")
	# 	println("出錯了！！")
	# end
end

select!(combined_df, :Cluster, :)
exportpth = joinpath(dirname(thewd), "binded.csv")
CSV.write(exportpth, combined_df)

## 比對檔案
df = @chain exportpth begin
	CSV.read(DataFrame)
end


srda = @chain pwd() begin
	joinpath("backend-DB/SRDA-BibData.csv")
	CSV.read(DataFrame)
end 

srda[!, :原始題目]

l0 = uppercase.(df[!, :TI])
l1 = uppercase.(srda[!, :原始題目])

bol = l0 .∉ Ref(l1)

xx = df[bol, :]

CSV.write("ts.csv", xx)