Search tool script - milnegeneseo/digital-scholarly-editing GitHub Wiki
Here's the script currently being used to transform a csv download from our Google sheet for tracking image-text correspondences to a csv suitable for uploading to the TablePress plugin running on the Digital Thoreau website.
#!/usr/bin/env python
# coding: utf-8
# User is prompted for the name of a csv exported from our Google sheet. Script produces a new csv
# suitable for importing into a TablePress table. URLs are converted to linked text, each with the
# corresponding manuscript thumbnail behind it as background image. Some columns
# are dropped, remaining columns are re-ordered, a new column is added to provide MS image numbers.
# CSS classes reference selectors added to the table in TablePress where they can easily be modified
# to change styling globally.
import time, pandas as pd
timestr = time.strftime("%Y-%m-%d_%H-%M") # we'll want to add a timestamp to the file we'll create below
print("Feed me a csv file, please. If the file isn't in the same directory as this script, provide the full path to the file:")
filename = input()
df = pd.read_csv(filename) # create dataframe from csv
df = df.loc[0:, ('Full size', 'Huntington', 'InVersion', 'InParSegs', 'Notes')] # we only want these columns, in this order
df['Image #'] = df['Huntington'].str.replace(r'(https://cdm16003.contentdm.oclc.org/digital/collection/p16003coll16/id/)(\d+)', r'\2') # create column for image identifiers
df = df.loc[0:, ('Image #', 'Full size', 'Huntington', 'InVersion', 'InParSegs', 'Notes')] # reorder columns
df['Full size'] = df['Full size'].str.replace(r'(https://cdm16003.contentdm.oclc.org/digital/iiif/p16003coll16/)(\d+)(/full/full/0/default.jpg)', r'\1\2/full/50,/0/default.jpg \1\2\3') # create link to thumbnail
df['Full size'] = df['Full size'].str.replace(r'(https://cdm16003.contentdm.oclc.org/digital/iiif/p16003coll16/\d+/full/50,/0/default.jpg) (https://cdm16003.contentdm.oclc.org/digital/iiif/p16003coll16/\d+/full/full/0/default.jpg)', r'''<div class="hmimage" style="background-image: url('\1');"><a href="\2" class="hmlink" target="_blank">F</a></div>''') # create html for table cell
df['Huntington'] = df['Huntington'].str.replace(r'(https://cdm16003.contentdm.oclc.org/digital/collection/p16003coll16/id/)(\d+)', r'https://cdm16003.contentdm.oclc.org/digital/iiif/p16003coll16/\2/full/50,/0/default.jpg \1\2') # create thumbnail link for H cells
df['Huntington'] = df["Huntington"].str.replace(r'(https://cdm16003.contentdm.oclc.org/digital/iiif/p16003coll16/\d+/full/50,/0/default.jpg) (https://cdm16003.contentdm.oclc.org/digital/collection/p16003coll16/id/\d+)', r'''<div class="hmimage" style="background-image: url('\1');"><a href="\2" class="hmlink" target="_blank">H</a></div>''') # create html for H cells
df.fillna('-', inplace=True) # we'd rather not have any empty cells
df['InParSegs'] = df['InParSegs'].str.replace(r'\n', r' ') # replace newline with space
df['InParSegs'] = df['InParSegs'].str.replace(r'\*', r'NiW') # replace * with NiW = Not in Walden
df['InParSegs'] = df['InParSegs'].str.strip() # trim leading and trailing space
df.rename(columns={'InVersion': 'In Versions', 'InParSegs': 'In Par Segs'}, inplace=True) # create friendlier names
df.to_csv('/path/to/desired/directory/myfile_' + timestr + '.csv', index=False) # write dataframe to csv