library(ggplot2) library(dplyr) library(tidyr) library(readr) library(stringr) library(cowplot) library(extrafont) font_import(pattern = "Times", prompt = FALSE) # This file contains the code to produce the figures and tables in the main body of the paper. # Author of figure code: Christina Maimone christina.maimone@northwestern.edu # Load data ---- patents <- read.csv("patents_analysis.csv") # styling basetheme <- theme( panel.grid.minor = element_blank(), panel.grid.major = element_blank(), panel.background = element_rect(fill = "transparent", color = "gray", size = .5), plot.background = element_rect(fill = "transparent", color = NA), axis.line.x = element_line(colour = "#666666", size = .25), axis.line.y = element_line(colour = "#666666", size = .25), axis.ticks = element_line(colour = "#666666", size = .25), axis.text = element_text(color = "#333333", size = 9), legend.key = element_rect(fill = NA), legend.title = element_blank(), strip.background = element_blank(), strip.placement = "outside", legend.position = "none", text = element_text(family = "Times New Roman"), panel.spacing = unit(1.5, "lines"), axis.title = element_text(size = 9), plot.caption = element_text(size = 9) ) # Figure 1 ---- figdata <- patents %>% select(validity, starts_with("forward_cite_year"), -ends_with("examiner")) %>% gather(key = "year", value = "citations", starts_with("forward_cite_year")) %>% mutate(year = parse_number(year), Validity = tools::toTitleCase(as.character(validity))) %>% mutate(Validity = factor(Validity, levels = c("Valid", "Invalid"))) %>% group_by(Validity, year) %>% summarize(mean = mean(citations)) xticklabels <- rep("",21) xticklabels[seq(0,20,5)+1] <- seq(0,20,5) f1 <- figdata %>% ggplot(aes( x = year, y = mean, color = Validity, shape = Validity )) + geom_point(size = 2) + geom_line() + scale_shape_manual(values = c(16, 15)) + labs(title = "", x = "Patent Term", y = "Average Forward Citations\nReceived in Year") + scale_y_continuous(limits = c(0, 12), breaks=0:12, labels = c(rbind(seq(0, 12, 2), ""))[-14]) + scale_x_continuous(limits = c(0, 20), breaks=0:20, labels=xticklabels) + scale_color_grey(start = .2, end = .6) + annotate( "text", x = c(15.5, 18.5), y = c(1.3, 4.8), label = c("Invalid", "Valid") , color = "black", size = 3, family = "Times New Roman", hjust = 0 ) + basetheme f1 ggsave("figure1.pdf", plot = f1, width = 4.67, height = 2.5) ggsave("figure1.png", plot = f1, width = 4.67, height = 2.5) f1 + scale_y_continuous(limits = c(0, 12), breaks=0:12, labels = c(rbind(seq(0, 12, 2), ""))[-14], expand=c(0,0,0,.1)) + theme(panel.grid.major.y = element_line(colour = "#cccccc", size = .25)) ggsave("figure1_grid.pdf", width = 4.67, height = 2.5) ggsave("figure1_grid.png", width = 4.67, height = 2.5) # Figure 2 ---- # Note: 18 data points are excluded from the plot because of the limits on the x axis figdata2 <- patents %>% select(patent_num, validity, starts_with("forward_cite_year")) %>% gather(key = "var", value = "citations", starts_with("forward_cite_year")) %>% mutate(year = parse_number(var), var = ifelse(str_detect(var, "us"), "total", "examiner")) %>% spread(var, citations, fill = 0) %>% mutate(Validity = factor( tools::toTitleCase(as.character(validity)), levels = c("Valid", "Invalid") )) %>% # for plot capitalization group_by(Validity, year) %>% summarize( `Examiner Citations` = mean(examiner, na.rm = TRUE), `Applicant Citations` = mean(total - examiner, na.rm = TRUE) ) %>% gather(key = "type", value = "mean", contains("Citations")) f2 <- figdata2 %>% ggplot(aes( x = year, y = mean, color = Validity, shape = Validity )) + geom_point(size = 2) + facet_wrap(type ~ ., ncol = 1, scales = "free") + geom_line() + scale_shape_manual(values = c(16, 15)) + labs(title = "", x = "Patent Term", y = "Average Forward Citations Received in Year") + scale_y_continuous(limits = c(0, 7), breaks = seq(0, 7, 1)) + scale_x_continuous(limits = c(0, 20), breaks=0:20, labels=xticklabels) + scale_color_grey(start = .2, end = .6) + basetheme + theme( strip.text = element_text(size = 9, margin = margin(b = 2)), axis.title.y = element_text(margin = margin(r = 10)) ) ggdraw(f2) + draw_label( "Invalid", x = .89, y = .62, vjust = 1, hjust = 1, size = 8, fontfamily = "Times New Roman" ) + draw_label( "Valid", x = .96, y = .70, vjust = 1, hjust = 1, size = 8, fontfamily = "Times New Roman" ) + draw_label( "Invalid", x = .77, y = .155, vjust = 1, hjust = 1, size = 8, fontfamily = "Times New Roman" ) + draw_label( "Valid", x = .91, y = .24, vjust = 1, hjust = 1, size = 8, fontfamily = "Times New Roman" ) ggsave("figure2.png", width = 4.67, height = 5) ggsave("figure2.pdf", width = 4.67, height = 5) f3 <- f2 + scale_y_continuous(limits = c(0, 7), breaks = seq(0, 7, 1), expand=c(0,0,0,0.1)) + theme(panel.grid.major.y = element_line(colour = "#cccccc", size = .25), strip.text= element_text(margin=margin(b=.5, unit = "cm"))) ggdraw(f3) + draw_label( "Invalid", x = .89, y = .60, vjust = 1, hjust = 1, size = 8, fontfamily = "Times New Roman" ) + draw_label( "Valid", x = .96, y = .685, vjust = 1, hjust = 1, size = 8, fontfamily = "Times New Roman" ) + draw_label( "Invalid", x = .77, y = .14, vjust = 1, hjust = 1, size = 8, fontfamily = "Times New Roman" ) + draw_label( "Valid", x = .9, y = .225, vjust = 1, hjust = 1, size = 8, fontfamily = "Times New Roman" ) ggsave("figure2_grid.png", width = 4.67, height = 5) ggsave("figure2_grid.pdf", width = 4.67, height = 5) # Figure 3 ---- xticklabels <- rep("",17) xticklabels[c(1,5,9,13,17)] <- seq(0,800,200) patents %>% mutate(Validity = tools::toTitleCase(as.character(validity))) %>% mutate(Validity = factor(Validity, levels = c("Invalid", "Valid"))) %>% mutate(tech_class_6 = str_wrap(tech_class_6, 12)) %>% ggplot(aes(y = forward_cite_us, x = Validity, fill = Validity)) + geom_boxplot( outlier.size = 1, outlier.alpha = .5, lwd = .25, outlier.stroke = 0 ) + coord_flip() + facet_grid(tech_class_6 ~ ., switch = "both", scales = "free") + scale_y_continuous(limits = c(0, 800), expand = c(0, 0), breaks=seq(0,800,50), labels=xticklabels) + scale_fill_grey(start = .9, end = .65) + labs( title = "", y = "Forward Citations*", caption = "*Range shown limited to 800", x = "" ) + basetheme + theme( strip.text = element_text( vjust = 0, margin = margin(0, .25, 0, 0, "cm"), size = 9 ), axis.text.y = element_text(size = 9), axis.ticks.y = element_blank(), plot.margin = unit(c(-.3, .3, 0,-.2), "cm"), panel.spacing = unit(1, "lines"), plot.caption = element_text(size=8) ) ggsave("figure3.png", width = 4.67, height = 5) ggsave("figure3.pdf", width = 4.67, height = 5) # Figure 3 summary stats and t-tests tbl <- patents %>% group_by(tech_class_6, validity) %>% summarize( Count = n(), `Median Forward Citations` = median(forward_cite_us), `Mean Forward Citations` = mean(forward_cite_us), `Mean log(Forward Citations+1)` = mean(log(1 + forward_cite_us)) ) %>% mutate(`Difference in Logged Means (Valid-Invalid)` = `Mean log(Forward Citations+1)`[2] - `Mean log(Forward Citations+1)`[1]) tbl[seq(2, 12, 2), "Difference in Logged Means (Valid-Invalid)"] <- NA tbl[1, "p-value on t-test of Difference"] <- t.test(log(forward_cite_us + 1) ~ validity, subset = tech_class_6 == "Chemical", data = patents)$p.value tbl[3, "p-value on t-test of Difference"] <- t.test( log(forward_cite_us + 1) ~ validity, subset = tech_class_6 == "Computers & Communication", data = patents )$p.value tbl[5, "p-value on t-test of Difference"] <- t.test( log(forward_cite_us + 1) ~ validity, subset = tech_class_6 == "Drugs & Medical", data = patents )$p.value tbl[7, "p-value on t-test of Difference"] <- t.test( log(forward_cite_us + 1) ~ validity, subset = tech_class_6 == "Electrical & Electronics", data = patents )$p.value tbl[9, "p-value on t-test of Difference"] <- t.test( log(forward_cite_us + 1) ~ validity, subset = tech_class_6 == "Mechanical", data = patents )$p.value tbl[11, "p-value on t-test of Difference"] <- t.test(log(forward_cite_us + 1) ~ validity, subset = tech_class_6 == "Others", data = patents)$p.value tbl # Figure 4 ---- # Note: 24 data points are excluded from the plot because of the limits on the x axis tmp <- patents %>% select(forward_cite_us, invalidation_s102, invalidation_s103, invalidation_s112) %>% gather("validity", "value", invalidation_s102, invalidation_s103, invalidation_s112) %>% filter(value == 1) %>% select(-value) %>% mutate(validity = paste0("35 U.S.C. § ", parse_number(validity))) fig4data <- patents %>% select(forward_cite_us, validity) %>% mutate(validity = ifelse(validity == "valid", "All Valid", "All Invalid")) %>% rbind(tmp) fig4data <- fig4data %>% mutate(validity = factor( validity, levels = c( "All Valid", "All Invalid", "35 U.S.C. § 102", "35 U.S.C. § 103", "35 U.S.C. § 112" ) )) %>% mutate(forward_cite_us_log = log(forward_cite_us + 1)) fig4data %>% mutate(validity = factor(validity, levels = rev(levels(validity)))) %>% ggplot(aes(y = forward_cite_us, x = validity)) + geom_boxplot( outlier.size = 1, outlier.alpha = .5, lwd = .25, outlier.stroke = 0, fill = c(gray(.9), gray(.9), gray(.9), gray(.9), gray(.65)), width = .6 ) + coord_flip() + scale_y_continuous(limits = c(0, 800), expand = c(0, 0), breaks=seq(0,800,50), labels=xticklabels) + scale_fill_grey(start = .9, end = .65) + labs( title = "", x = "", y = "Forward Citations*", caption = "*Range shown limited to 800" ) + basetheme + theme(#axis.line.y=element_blank(), axis.ticks.y = element_blank(), plot.margin = unit(c(-.3, .3, 0,-.2), "cm"), plot.caption = element_text(size=8)) ggsave("figure4.png", width = 4.67, height = 2) ggsave("figure4.pdf", width = 4.67, height = 2) # Figure 4 t-tests p <- c(0, 0, 0, 0) names(p) <- levels(fig4data$validity)[-1] p[1] <- t.test(fig4data$forward_cite_us_log[fig4data$validity == "All Valid"], fig4data$forward_cite_us_log[fig4data$validity == "All Invalid"])$p.value p[2] <- t.test(fig4data$forward_cite_us_log[fig4data$validity == "All Valid"], fig4data$forward_cite_us_log[fig4data$validity == "35 U.S.C. § 102"])$p.value p[3] <- t.test(fig4data$forward_cite_us_log[fig4data$validity == "All Valid"], fig4data$forward_cite_us_log[fig4data$validity == "35 U.S.C. § 103"])$p.value p[4] <- t.test(fig4data$forward_cite_us_log[fig4data$validity == "All Valid"], fig4data$forward_cite_us_log[fig4data$validity == "35 U.S.C. § 112"])$p.value p <- round(p, 2) p <- ifelse(p < .001, "< 0.001", p) test_tbl <- fig4data %>% rename(Validity = validity) %>% group_by(Validity) %>% summarize(Count = n(), `Mean (log forward citations)` = mean(log(forward_cite_us + 1))) %>% mutate(p = c(NA, p)) test_tbl