Content Manifest - uclibs/ucrate GitHub Wiki

The manifest rake tasks are:

rake manifest:all                                    # Run all manifest reports
rake manifest:collections                            # Run collections manifest reports
rake manifest:files                                  # Run files manifest reports
rake manifest:groups                                 # Run groups manifest reports
rake manifest:linked_resources                       # Run linked_resources manifest reports
rake manifest:people                                 # Run people manifest reports
rake manifest:profiles                               # Run profiles manifest reports
rake manifest:users                                  # Run users manifest reports
rake manifest:works                                  # Run work manifest reports

They all publish a csv report to the vendor folder in the app.

If you need to add information to any of the reports, you will need to redefine it in the console. All of the report classes are defined here:https://github.com/uclibs/ucrate/tree/develop/app/services

For legacy report classes see: https://github.com/uclibs/curate/tree/develop/app/services

After redefining the class on the console, run the line from the matching rake task to generate the report. E.g., for works, you'd run WorksReport.create_report.

After the file is generated, you need to get it off the server. Our authentication setup complicates sftp, so the mail utility is the quickest way to send it. E.g.:

echo "Foo" | mail -s "Users Report" -a vendor/users_report.csv [email protected]

After the report is delivered, you may need to clean up extra carriage returns in some fields. This can be done quickly with a regex in a text editor like Vim or Atom.


Migration Utilities

Export Files from PID.txt to a zipped file in a shared folder.

# --- Configuration ---
pids_file  = "/srv/apps/curate_uc/scholar-ids.txt"      # <- change me
zip_path   = "/srv/apps/curate_uc/all_works.zip" # <- change me

require 'fileutils'
require 'zip'

FileUtils.mkdir_p(File.dirname(zip_path))

def safe_name(str)
  str.to_s.encode('UTF-8', invalid: :replace, undef: :replace, replace: "")
     .gsub(/[\\\/:\*\?"<>\|]/, "_").strip
end

def write_pcdm_file_to_zip(pcdm_file, zipfile, entry_name)
  zipfile.get_output_stream(entry_name) do |os|
    if pcdm_file.respond_to?(:stream)
      pcdm_file.stream.each { |chunk| os.write(chunk) }
    else
      os.write(pcdm_file.content)
    end
  end
end

# Ensure unique entry names inside the ZIP (avoid collisions)
def unique_entry_name(zipfile, desired)
  return desired unless zipfile.find_entry(desired)

  dir  = File.dirname(desired)
  base = File.basename(desired, ".*")
  ext  = File.extname(desired)
  i = 2
  loop do
    candidate = File.join(dir, "#{base}(#{i})#{ext}")
    return candidate unless zipfile.find_entry(candidate)
    i += 1
  end
end

pids = File.readlines(pids_file).map(&:strip).reject(&:empty?)

Zip::File.open(zip_path, Zip::File::CREATE) do |zipfile|
  pids.each do |pid|
    begin
      work = ActiveFedora::Base.find(pid)
    rescue ActiveFedora::ObjectNotFoundError
      puts "❌ Work not found: #{pid}"
      next
    end

    file_sets =
      if work.respond_to?(:file_sets)
        work.file_sets.to_a
      elsif work.respond_to?(:members)
        work.members.select { |m| m.is_a?(::FileSet) }
      else
        []
      end

    if file_sets.empty?
      puts "⚠️  No FileSets for work #{pid}"
      next
    end

    work_dir = safe_name(pid)

    file_sets.each do |fs|
      pcdm_files =
        if fs.respond_to?(:files)
          fs.files.to_a
        else
          [fs.original_file].compact
        end

      if pcdm_files.empty?
        puts "  ⏭️  FileSet #{fs.id} has no attached files"
        next
      end

      fs_dir = File.join(work_dir, safe_name(fs.id))

      pcdm_files.each do |pcdm_file|
        base =
          (pcdm_file.respond_to?(:original_name) && pcdm_file.original_name.presence) ||
          (pcdm_file.respond_to?(:filename)     && pcdm_file.filename.to_s.split('/').last.presence) ||
          pcdm_file.id

        ext  = File.extname(base)
        name = File.basename(base, ext)
        new_filename = "#{safe_name(name)}_#{safe_name(pid)}#{ext}"
        desired_entry = File.join(fs_dir, new_filename)
        entry = unique_entry_name(zipfile, desired_entry)

        begin
          write_pcdm_file_to_zip(pcdm_file, zipfile, entry)
          puts "  ✅ Added #{entry}"
        rescue => e
          puts "  ❌ Failed adding #{desired_entry}: #{e.class} — #{e.message}"
        end
      end
    end
  end
end

puts "📦 Wrote big ZIP → #{zip_path}"
puts "✨ Done."