knitr::opts_chunk$set(echo = TRUE)
knitr::opts_knit$set(root.dir = 'C:\\Users\\camie\\Dropbox\\Dropbox\\Beg ar Loued\\Article AS_CP\\BAL R data and scripts\\clean_R_scripts\\data_for_figures')

##This R Markdown document provides the code for all figures in Prevost and Suryanarayan et al. to ensure transparency and reproducibility of data.

INSTRUCTIONS

#1) Please ensure you have downloaded the latest version of R Studio.

#2) Download data files from the data repository.

#3) Change file pathway to the name of the folder where you have saved the data files.

#WARNING

#Order for publication of supplementary info is different than for the code of the plots

##publication order:

#Appendix 5.A

#Appendix 5.B

#Appendix 5.C

#Appendix 5.D

#Appendix 6.A

#Appendix 6.B

#Appendix 6.C

#Appendix 6.D

#Appendix 7.A

#Appendix 7.B

#Appendix 7.C

#Appendix 7.D

#Appendix 8.A

#Appendix 8.B

#Appendix 9.A

#Appendix 9.B

#Appendix 10

#Appendix 11

#Appendix 12

##code order :

#Appendix 10

#Appendix 12

#Appendix 11

#Appendix 5.A

#Appendix 7.D

#Appendix 6.D

#Appendix 5.D

#Appendix 7.B

#Appendix 9.A

#Appendix 9.B

#Appendix 8.A

#Appendix 8.B

#Appendix 5.C

#Appendix 6.C

#Appendix 7.C

#Appendix 5.B

#Appendix 6.B

#Appendix 6.A

#Appendix 7.A

Prepare Global environment

#Load packages

library(readr)
library(readxl)
library(ade4)
library(ggplot2)
library(dplyr)
## 
## Attachement du package : 'dplyr'
## Les objets suivants sont masqués depuis 'package:stats':
## 
##     filter, lag
## Les objets suivants sont masqués depuis 'package:base':
## 
##     intersect, setdiff, setequal, union
library(plyr)
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
## 
## Attachement du package : 'plyr'
## Les objets suivants sont masqués depuis 'package:dplyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
library(ggpubr)
## 
## Attachement du package : 'ggpubr'
## L'objet suivant est masqué depuis 'package:plyr':
## 
##     mutate
library(reshape2)
library(RColorBrewer)
library(httpuv)
library(caTools)
library(tidyr)
## 
## Attachement du package : 'tidyr'
## L'objet suivant est masqué depuis 'package:reshape2':
## 
##     smiths
library(gridExtra)
## 
## Attachement du package : 'gridExtra'
## L'objet suivant est masqué depuis 'package:dplyr':
## 
##     combine
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ lubridate 1.9.2     ✔ tibble    3.2.1
## ✔ purrr     1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ plyr::arrange()      masks dplyr::arrange()
## ✖ gridExtra::combine() masks dplyr::combine()
## ✖ purrr::compact()     masks plyr::compact()
## ✖ plyr::count()        masks dplyr::count()
## ✖ plyr::desc()         masks dplyr::desc()
## ✖ plyr::failwith()     masks dplyr::failwith()
## ✖ dplyr::filter()      masks stats::filter()
## ✖ plyr::id()           masks dplyr::id()
## ✖ dplyr::lag()         masks stats::lag()
## ✖ ggpubr::mutate()     masks plyr::mutate(), dplyr::mutate()
## ✖ plyr::rename()       masks dplyr::rename()
## ✖ plyr::summarise()    masks dplyr::summarise()
## ✖ plyr::summarize()    masks dplyr::summarize()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

Supplementary for appendices 10-12

#Load data

Pathway <- "C:/Users/camie/Dropbox/Dropbox/Beg ar Loued/Article AS_CP/BAL R data and scripts/clean_R_scripts/data_for_figures"
setwd(Pathway)
dfA <- read_excel("data_app14.xls", sheet = "archeo2")
dfB <- read_excel("data_app14.xls", sheet = "archeo")
## New names:
## • `` -> `...1`
dfC <- read_excel("data_app14.xls", sheet = "ref")

#Sorting data and verification

dfA_sorted <- arrange(dfA, ech) 
str(dfA_sorted)
## tibble [204 × 5] (S3: tbl_df/tbl/data.frame)
##  $ mol  : chr [1:204] "E34" "E36" "E38" "E40" ...
##  $ qt   : num [1:204] 0 0 0 0 100 ...
##  $ id   : chr [1:204] "Beeswax (potential)" "Beeswax (potential)" "Beeswax (potential)" "Beeswax (potential)" ...
##  $ TLCpc: num [1:204] 6.31 6.31 6.31 6.31 6.31 ...
##  $ ech  : chr [1:204] "MR6506Re" "MR6506Re" "MR6506Re" "MR6506Re" ...
plot(dfA_sorted)

dfB_sorted <- arrange(dfB, ech) 
dfB_sorted$part = factor(dfB_sorted$part, levels = c("C26:0", "C24:0", "C22:0", "C20:0", "C18:0", "C16:0", "C14:0", "C12:0"))
str(dfB_sorted)
## tibble [832 × 6] (S3: tbl_df/tbl/data.frame)
##  $ ...1  : num [1:832] 1 2 3 4 5 6 7 8 1 2 ...
##  $ ech   : chr [1:832] "MR6506T" "MR6506T" "MR6506T" "MR6506T" ...
##  $ mol   : chr [1:832] "E34" "E34" "E34" "E34" ...
##  $ part  : Factor w/ 8 levels "C26:0","C24:0",..: 8 7 6 5 4 3 2 1 8 7 ...
##  $ QT    : num [1:832] 0 0 0 0 0 0 0 0 0 0 ...
##  $ QT(MR): num [1:832] 0 0 0 0 0 0 0 0 0 0 ...
plot(dfB_sorted)

dfC_sorted <- arrange(dfC, ech) 
str(dfC_sorted)
## tibble [60 × 3] (S3: tbl_df/tbl/data.frame)
##  $ ech: chr [1:60] "Bercy; 4600-3000 BCE" "Bercy; 4600-3000 BCE" "Bercy; 4600-3000 BCE" "Bercy; 4600-3000 BCE" ...
##  $ mol: chr [1:60] "E34" "E36" "E38" "E40" ...
##  $ qt : num [1:60] 0 0 0 65.4 51.9 ...
plot(dfC_sorted)

Appendix 10

n1A<-ggplot(dfA_sorted, aes (x=mol, y=qt, fill="Esters"))+
  geom_bar(stat= "identity", show.legend = FALSE, width = 0.90)+
  scale_fill_manual(values=c("#534b62"))+
  facet_wrap(~ech, ncol=6, scales = "free") +
  labs(title = "Ester profiles of archaeological samples", y = "Intensity")+
  xlab("")+
  theme_minimal()
leg<-element_text(color = "black", size = 6, angle=90)

APP10<-n1A+theme(axis.text.x =leg, panel.grid.minor = element_line(colour="white"))
APP10

Appendix 12

n1B<-ggplot(dfB_sorted, aes (x=mol, y=QT, fill=part))+
  geom_bar(stat= "identity", position="fill", show.legend = TRUE,  width = 0.90)+
  facet_wrap(~ech, ncol=6) +
  labs(title = "Percent composition of wax ester profiles", y = "Percentage")+
  guides(fill = guide_legend(title = "Molecules"))+
  xlab("") +
  scale_fill_brewer(palette = "Set2")+
  theme_minimal()
leg<-element_text(color = "black", size = 6, angle=90)

APP12<-n1B+theme(axis.text.x =leg, panel.grid.minor = element_line(colour="white"))
APP12
## Warning: Removed 346 rows containing missing values (`geom_bar()`).

Appendix 11

n1C<-ggplot(dfC_sorted, aes (x=mol, y=qt, fill="C16:0"))+
  geom_bar(stat= "identity", show.legend = TRUE, width = 0.90)+
  facet_wrap(~ech, ncol=6,labeller =  label_wrap_gen(15)) +
  labs(title = "Published ester profiles of archaeological and reference beeswaxes", y = "Intensity")+
  xlab("")+
  scale_fill_manual(values=c("#efca00"))+
  guides(fill = guide_legend(title = "Molecules"))+
  theme_minimal()
leg<-element_text(color = "black", size = 6, angle=90)

APP11<-n1C+theme(axis.text.x =leg, panel.grid.minor = element_line(colour="white"))
APP11

Supplementary for appendices 5-9

#Load data and verification

Pathway <- "C:/Users/camie/Dropbox/Dropbox/Beg ar Loued/Article AS_CP/BAL R data and scripts/clean_R_scripts/data_for_figures"
setwd(Pathway)
dt <- droplevels(read_excel("data_app14_2.xlsx"))

#Convert variables to factors

dt$new_chrono<- factor(dt$new_chrono,
  levels = c("FN","EBA1", "EBA(1&2)","EBA2", "EBA3","Undet."))
dt$Rim_Base_body <- factor(dt$Rim_Base_body) 
dt$Sample_type <- factor(dt$Sample_type, 
  levels = c("Charred residue", "Slip", "Absorbed residue"))
dt$Residue_type<- factor(dt$Residue_type, 
  levels = c("Internal residue","Internal slip","Absorbed residue", "External slip", "External residue"))

#Convert variables to numeric values

dt$Thickness_max_mm <- as.numeric(dt$Thickness_max_mm)
dt$Thickness_min_mm <- as.numeric(dt$Thickness_min_mm)
dt$Av_thickness<- as.numeric(dt$Av_thickness)

Subsetting data

#Create separate groups for all samples with TLE superior to 5 ug/g

dt_lip <- droplevels(subset(dt, TLE >= 5))
summary(dt_lip$TLE)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    5.00   16.16   63.80  226.59  217.10 4368.00

#Subset absorbed residues

absorbedr <- droplevels(subset (dt,Sample_type == "Absorbed residue"))
summary(absorbedr$TLE)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    5.93   26.92  106.79   92.10 1331.30

#Create separate groups for absorbed residue samples with TLE superior to 5 ug/g

abs_lip <- droplevels(subset(absorbedr, TLE>= 5))
summary(abs_lip$TLE)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    5.00   15.30   53.25  140.02  138.09 1331.30

#Subset SLIPS and subset external and internal slips

slip <- droplevels(subset(dt, Sample_type == "Slip"))
slip_ext <- droplevels(subset(slip, Residue_type == "External slip"))
slip_int <- droplevels(subset(slip, Residue_type == "Internal slip")) 

#Create separate groups for charred residue samples with TLE superior to 5 ug/g

slip_lip <- droplevels(subset(slip, TLE >= 5))

#Subset charred residues and subset external and internal charred residues

residue <- droplevels(subset (dt, Sample_type  == "Charred residue"))
residue_ext <- droplevels(subset(residue, Residue_type == "External residue"))
residue_int <- droplevels(subset(residue, Residue_type == "Internal residue"))  

#Create separate groups for charred residue samples with TLE superior to 5 ug/g

residue_lip <- droplevels (subset(residue, TLE >= 5))

Creating boxplots for comparisons

Appendix 5.A

#Boxplot of absorbed vs. charred vs. slips

#Removing outliers in order to plot without a log scale

#Remove samples with TLE above 1000 ug/g

dt_minushighlip <- subset(dt, TLE < 1000)
TLEn<-as.numeric(dt_minushighlip$TLE)

#Plotting

APP5.A <- 
  ggplot(data = dt_minushighlip, 
         aes(x = Sample_type, y = TLEn, colour = Sample_type)) + 
  geom_boxplot(aes(x=Sample_type, y=TLEn), 
               position = position_dodge(0.5), width = .2, outlier.shape = NA) +
  labs(x = "Sample type", 
       y = "TLE (continous scale ; values > 5µg.g-1)",  
       colour = "Sample type", 
       caption = "Values > 1000µg.g-1 not inclued") +
  stat_summary(fun=mean, geom="point", shape=18, size=3, color="black")+
  scale_y_log10(breaks = c(5, 25, 100, 200, 500, 1000)) +
  scale_colour_manual(values = c("#1b9e77", "#d95f02", "#984ea3")) + 
  geom_jitter(aes(y = TLEn), position = position_jitter(0.1)) +
  scale_x_discrete(labels = paste(levels(dt_minushighlip$Sample_type),
                  "\n(N=",table(dt_minushighlip$Sample_type),")",sep="")) +
  theme_classic(base_size = 10) +
  theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))
APP5.A
## Warning: Transformation introduced infinite values in continuous y-axis
## Transformation introduced infinite values in continuous y-axis
## Transformation introduced infinite values in continuous y-axis
## Warning: Removed 13 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 13 rows containing non-finite values (`stat_summary()`).

Appendix 7.D

#Create boxplot comparing TLEs of samples that have multiple residue types

#Create subset paired data

paireddata <- droplevels(subset(dt, Paired >= 1))

#Subset paired data with values above 5 ug/g only

paireddata_lip <- subset(paireddata, TLE> 5)
TLEn<-as.numeric(paireddata$TLE)

#Create boxplots comparing TLEs of rim, body and base of vessels (absorbed residues)

presentstudy <- droplevels(subset(dt, Rim_Base_body_3 >= 1))
TLEn<-as.numeric(presentstudy$TLE)

#Order variables

presentstudy$Rim_Base_body_2<-factor(presentstudy$Rim_Base_body_2, levels = c("Rim", "Body", "Base"))

#Heat marker plots

#Organise data

presentstudy <- droplevels(subset(dt, Rim_Base_body_3 >= 1))
presentstudy$Rim_Base_body_2<-factor(presentstudy$Rim_Base_body_2, levels = c("Rim", "Body", "Base"))
ourstudysherdthickness <- droplevels(subset(presentstudy, Av_thickness  >= 1 ))
ourstudysherdthickness$Presence_heat_marker<- factor(ourstudysherdthickness$Heat)
Av_thicknessn<-as.numeric(ourstudysherdthickness$Av_thickness)

#Plotting

AAP7.D <- 
  ggplot(data = ourstudysherdthickness, 
        aes(x = Rim_Base_body_2, y = Av_thicknessn)) + 
  labs(x = "Fragment type", 
       y = "Average sherd thickness (log scale ; all samples of the present study)",  
       colour = "Presence of heat marker") +
  scale_y_log10(breaks = c(1, 5, 7, 10)) +
  scale_colour_manual(values = c("grey", "red")) +
  geom_point(position=position_jitterdodge
             (jitter.width= 0.2, dodge.width = 0.3),
             aes(y = Av_thicknessn, color=factor(Heat))) +
  scale_x_discrete(labels =paste(levels(ourstudysherdthickness$Rim_Base_body_2),                      "\n(N=",table(ourstudysherdthickness$Rim_Base_body_2),")",sep="")) +
  theme_classic(base_size = 10) +
  theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))
AAP7.D


Appendix 6.D

#Organise data

presentstudy$Residue_type<-factor(presentstudy$Residue_type)
ourstudysherdthickness <- droplevels(subset(presentstudy, Av_thickness  >= 1 ))
ourstudysherdthickness$Presence_heat_marker<- factor(ourstudysherdthickness$Heat)
Av_thicknessn<-as.numeric(ourstudysherdthickness$Av_thickness)

#Plotting

APP6.D <- 
  ggplot(data = ourstudysherdthickness, 
         aes(x = Residue_type, y = Av_thicknessn)) + 
  labs(x = "Residue type", 
       y = "Average sherd thickness (log scale ; all samples of the present study)",  
       colour = "Presence of heat marker") +
  scale_y_log10(breaks = c(1, 5, 7, 10)) +
  geom_point(position=position_jitterdodge(jitter.width= 0.2, dodge.width = 0.3),aes(y = Av_thicknessn, color=factor(Heat))) +
  scale_colour_manual(values = c("grey", "red")) + 
  scale_x_discrete(labels =paste(levels(ourstudysherdthickness$Residue_type),
        "\n(N=",table(ourstudysherdthickness$Residue_type),")",sep="")) +
  theme_classic(base_size = 10) +
  theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))
APP6.D


Appendix 5.D

APP5.D <- ggplot(data = ourstudysherdthickness, 
                   aes(x = Sample_type, y = Av_thicknessn)) + 
  labs(x = "Heat markers", y = "Average sherd thickness (log scale ; all samples of the present study)",  colour = "Presence of heat marker") +
  scale_y_log10(breaks = c(1, 5, 7, 10)) +
  scale_colour_manual(values = c("grey", "red")) +
  geom_point(position=position_jitterdodge(jitter.width= 0.2, dodge.width = 0.3),aes(y = Av_thicknessn, color=factor(Heat))) +
  scale_x_discrete(labels = paste(levels(ourstudysherdthickness$Sample_type),"\n(N=",table(ourstudysherdthickness$Sample_type),")",sep="")) +
  theme_classic(base_size = 10) + theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))
APP5.D

Appendix 7.B

#Morpho vs TAG

#Order variables

presentstudy <- droplevels(subset(dt, Rim_Base_body_3 >= 1))
presentstudy$Rim_Base_body_2<- factor(presentstudy$Rim_Base_body_2, levels = c("Rim","Body", "Base"))
TAGn<-as.numeric(presentstudy$`TAG%`)

#Plotting

APP7.B <- 
  ggplot(data = presentstudy, 
        aes(x = Rim_Base_body_2, y = TAGn, colour=Rim_Base_body_2)) + 
  geom_boxplot(aes(x=Rim_Base_body_2, y=TAGn), 
               position = position_dodge(0.5), width = .2, outlier.shape = NA) +
  labs(x = "Fragment type", 
       y = "TAG% (log scale ; all samples)",  
       colour = "Residues type")+
  scale_colour_manual(values = c("#2b5be2", "#186d4d", "#db2297")) + 
  stat_summary(fun=mean, geom="point", shape=18, size=3, color="red")+
  scale_y_log10(breaks = c(1, 5, 25, 50, 100, 200, 500, 1000)) +
  geom_jitter(aes(y = TAGn), position = position_jitter(0.1)) +
  scale_x_discrete(labels = paste(levels(presentstudy$Rim_Base_body_2),
        "\n(N=",table(presentstudy$Rim_Base_body_2),")",sep="")) +
  theme_classic(base_size = 10) +
  theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))
APP7.B
## Warning: Removed 87 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 87 rows containing non-finite values (`stat_summary()`).
## Warning: Removed 87 rows containing missing values (`geom_point()`).

Appendix 9.A

#Create boxplots comparing TLEs of rim, body and base of vessels (charred residues)

#Exploratory plots comparing time period and TLE

#Order variables

thisstudyresidues <- droplevels(subset(residue, Study == "present"))
summary(thisstudyresidues$Rim_Base_body)
## Base-External residue Base-Internal residue Body-External residue 
##                     1                     3                     5 
## Body-Internal residue  Rim-External residue  Rim-Internal residue 
##                    16                     1                     1
thisstudyresidues$Rim_Base_body<- factor(thisstudyresidues$Rim_Base_body, 
                                         levels = c("Rim-Internal residue", 
                                                    "Body-Internal residue",
                                                    "Base-Internal residue",
                                                    "Rim-External residue",
                                                    "Body-External residue",
                                                    "Base-External residue"))
TLEn<-as.numeric(thisstudyresidues$TLE)
TLEn<-as.numeric(dt$TLE)

#Plotting

##colors indications
#FN=#b2a153
#FNBA=#9d7338
#EBA=#99520d
#MBA=#705230
#U=#999691

APP9.A <- ggplot(data = dt, 
                 aes(x = new_chrono, y = TLEn, colour=new_chrono)) + 
  geom_boxplot(aes(x=new_chrono, y=TLEn), position = position_dodge(0.5), width = .2, outlier.shape = NA) +
  xlab("Chronological period") +   ylab("TLE (log scale ; all samples)") +
  stat_summary(fun=mean, geom="point", shape=18, size=3, color="black")+
  scale_y_log10(breaks = c(1, 5, 25, 50, 100, 200, 500, 2000)) +
  scale_colour_manual(values = c("#c5ba85", "#b2a153","#9d7338","#705230", "#463014", "#999691")) +
  labs(color = "Chronology")+
  geom_jitter(aes(y = TLEn), position = position_jitter(0.1)) +
  scale_x_discrete(labels = paste(levels(dt$new_chrono),"\n(N=",table(dt$new_chrono),")",sep="")) +
  theme_classic(base_size = 10) + theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))
APP9.A
## Warning: Transformation introduced infinite values in continuous y-axis
## Transformation introduced infinite values in continuous y-axis
## Transformation introduced infinite values in continuous y-axis
## Warning: Removed 15 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 15 rows containing non-finite values (`stat_summary()`).
## Warning: Removed 2 rows containing missing values (`geom_point()`).


Appendix 9.B

#Exploratory plots comparing time period and TAG%

#Order variables

TAGn<-as.numeric(dt$`TAG%`)

#Plotting

APP9.B <- ggplot(data = dt, 
                   aes(x = new_chrono, y = TAGn, colour=new_chrono)) + 
  geom_boxplot(aes(x=new_chrono, y=TAGn), position = position_dodge(0.5), width = .2, outlier.shape = NA) +
  xlab("Chronological period") +   ylab("TAG% (log scale ; all samples)") +
  labs(color = "Chronology")+
  stat_summary(fun=mean, geom="point", shape=18, size=3, color="black")+
  scale_y_log10(breaks = c(1, 5, 25, 50, 100, 200, 500, 2000)) +
  scale_colour_manual(values = c("#c5ba85", "#b2a153","#9d7338","#705230", "#463014", "#999691")) +
  geom_jitter(aes(y = TAGn), position = position_jitter(0.1)) +
  scale_x_discrete(labels = paste(levels(dt$new_chrono),"\n(N=",table(dt$new_chrono),")",sep="")) +
  theme_classic(base_size = 10) + theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))
APP9.B
## Warning: Removed 104 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 104 rows containing non-finite values (`stat_summary()`).
## Warning: Removed 104 rows containing missing values (`geom_point()`).