Revisión | eb67ef491c60a931acbff68a1687dc108f80d6ee (tree) |
---|---|
Tiempo | 2025-01-16 04:19:12 |
Autor | Lorenzo Isella <lorenzo.isella@gmai...> |
Commiter | Lorenzo Isella |
A script to process the etsy sales data.
@@ -0,0 +1,66 @@ | ||
1 | +rm(list=ls()) | |
2 | + | |
3 | +library(tidyverse) | |
4 | +library(janitor) | |
5 | +library(openxlsx) | |
6 | + | |
7 | + | |
8 | +source("/home/lorenzo/myprojects-hg/R-codes/stat_lib.R") | |
9 | + | |
10 | + | |
11 | +# Define the function | |
12 | +retain_pattern <- function(vec, pattern) { | |
13 | + sapply(vec, function(entry) { | |
14 | + match <- str_extract(entry, pattern) # Extract the pattern | |
15 | + if (!is.na(match)) { | |
16 | + return(match) # Return the matched pattern if found | |
17 | + } else { | |
18 | + return(entry) # Return the original entry if no match | |
19 | + } | |
20 | + }) | |
21 | +} | |
22 | + | |
23 | +df_ini <- read_csv("../input/etsy_statement_2024_10.csv") |> | |
24 | + clean_names() | |
25 | + | |
26 | +save_excel(df_ini, "../output/clean_october.xlsx") | |
27 | + | |
28 | + | |
29 | +df <- df_ini |> | |
30 | + mutate(across(c(amount, fees_taxes, net), ~convert_euro_to_numeric(.x))) |> | |
31 | + select(-c(tax_details, status, availability_date)) |> | |
32 | + na_num_to_pattern(0) | |
33 | + | |
34 | + | |
35 | +df_long <- df |> | |
36 | + select(-c(type, currency)) |> | |
37 | + pivot_longer(cols=c(amount, fees_taxes, net), names_to="transaction_type", | |
38 | + values_to="value_eur") |> | |
39 | + filter(value_eur!=0) |> | |
40 | + arrange(title) |> | |
41 | + mutate(title2=title) |> | |
42 | + mutate(title2=retain_pattern(title2, "Payment for Order")) |> | |
43 | + mutate(title2=retain_pattern(title2, "Partial refund for Order")) |> | |
44 | + mutate(title2=retain_pattern(title2, "Listing fee")) |> | |
45 | + mutate(title2=retain_pattern(title2, "Transaction fee")) |> | |
46 | + rename("transaction_type_new"="title2") | |
47 | + | |
48 | +df_tran <- df_long |> | |
49 | + summarise(total=sum(value_eur), .by=transaction_type) | |
50 | + | |
51 | + | |
52 | +df_tran_new <- df_long |> | |
53 | + summarise(total=sum(value_eur), .by=c(transaction_type, transaction_type_new)) | |
54 | + | |
55 | +df_tran_new_wide <- df_tran_new |> | |
56 | + pivot_wider(names_from=transaction_type, values_from=total) |> | |
57 | + add_total() | |
58 | + | |
59 | + | |
60 | +save_excel(df_long, "../output/data_masha.xlsx", "raw_data") | |
61 | +save_excel(df_tran, "../output/data_masha.xlsx", "summary_data") | |
62 | +save_excel(df_tran_new_wide, "../output/data_masha.xlsx", "breakdown") | |
63 | + | |
64 | + | |
65 | + | |
66 | +print("So far so good") |