Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

az18-index.sql 2.5 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
  1. --- #dep az18-ratings
  2. --- #dep cluster
  3. --- #table az18.user_ids
  4. --- #step Extract review data
  5. DROP MATERIALIZED VIEW IF EXISTS az18.review CASCADE;
  6. CREATE MATERIALIZED VIEW az18.review AS
  7. SELECT "reviewerID" AS user_key, asin, overall AS rating, summary, "reviewText",
  8. "unixReviewTime" AS review_time
  9. FROM az18.raw_review,
  10. jsonb_to_record(review_data) AS
  11. x("reviewerID" VARCHAR, asin VARCHAR, overall REAL,
  12. summary TEXT, "reviewText" TEXT,
  13. "unixReviewTime" INTEGER);
  14. CREATE INDEX az18_review_user_idx ON az18.review (user_key);
  15. CREATE INDEX az18_review_asin_idx ON az18.review (asin);
  16. ANALYZE az18.review;
  17. --- #step Extract user IDs
  18. DROP TABLE IF EXISTS az18.user_ids CASCADE;
  19. CREATE TABLE az18.user_ids (
  20. user_id SERIAL PRIMARY KEY,
  21. user_key VARCHAR NOT NULL,
  22. UNIQUE (user_key)
  23. );
  24. INSERT INTO az18.user_ids (user_key) SELECT DISTINCT user_key FROM az18.review;
  25. ANALYZE az18.user_ids;
  26. --- #step Extract ISBNs
  27. INSERT INTO isbn_id (isbn)
  28. SELECT DISTINCT asin
  29. FROM az18.review WHERE asin NOT IN (SELECT isbn FROM isbn_id);
  30. ANALYZE isbn_id;
  31. --- #step Set up rating view
  32. DROP MATERIALIZED VIEW IF EXISTS az18.rating;
  33. CREATE MATERIALIZED VIEW az18.rating
  34. AS SELECT user_id, COALESCE(cluster, bc_of_isbn(isbn_id)) AS book_id,
  35. MEDIAN(rating) AS rating,
  36. (array_agg(rating ORDER BY review_time DESC))[1] AS last_rating,
  37. MEDIAN(review_time) AS timestamp,
  38. COUNT(rating) AS nratings
  39. FROM az18.review
  40. JOIN az18.user_ids USING (user_key)
  41. JOIN isbn_id ON (isbn = asin)
  42. LEFT JOIN isbn_cluster USING (isbn_id)
  43. GROUP BY user_id, COALESCE(cluster, bc_of_isbn(isbn_id));
  44. CREATE INDEX az18_rating_user_idx ON az18.rating (user_id);
  45. CREATE INDEX az18_rating_asin_idx ON az18.rating (book_id);
  46. ANALYZE az18.rating;
  47. DROP MATERIALIZED VIEW IF EXISTS az18.book;
  48. CREATE MATERIALIZED VIEW az18.book
  49. AS SELECT book_id, (book_data->>'asin')::varchar AS asin,
  50. NULLIF(book_data->>'rank', '') AS rank,
  51. NULLIF(book_data->>'brand', '') AS brand,
  52. NULLIF(book_data->>'price', '') AS price,
  53. NULLIF(book_data->>'title', '') AS title,
  54. NULLIF(book_data->>'also_buy', '') AS also_buy,
  55. NULLIF(book_data->>'main_cat', '') AS main_cat,
  56. NULLIF(book_data->>'category', '') AS genre,
  57. NULLIF(book_data->>'also_view', '') AS also_view,
  58. NULLIF(book_data->>'description', '') AS description
  59. FROM az18.raw_book;
  60. CREATE INDEX az18_book_idx ON az18.book (book_id);
  61. ANALYZE az18.book;
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...