001/* 002 * Licensed to DuraSpace under one or more contributor license agreements. 003 * See the NOTICE file distributed with this work for additional information 004 * regarding copyright ownership. 005 * 006 * DuraSpace licenses this file to you under the Apache License, 007 * Version 2.0 (the "License"); you may not use this file except in 008 * compliance with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.fcrepo.persistence.ocfl.impl; 019 020import static com.fasterxml.jackson.databind.SerializationFeature.WRITE_DATES_AS_TIMESTAMPS; 021import static org.apache.jena.riot.RDFFormat.NTRIPLES; 022 023import java.io.IOException; 024import java.nio.file.FileAlreadyExistsException; 025import java.nio.file.Files; 026import java.nio.file.Path; 027import java.util.function.Consumer; 028 029import javax.sql.DataSource; 030 031import org.apache.commons.lang3.SystemUtils; 032import org.apache.http.impl.auth.UnsupportedDigestAlgorithmException; 033import org.apache.jena.riot.RDFFormat; 034import org.slf4j.Logger; 035import org.slf4j.LoggerFactory; 036 037import com.fasterxml.jackson.annotation.JsonInclude; 038import com.fasterxml.jackson.databind.ObjectMapper; 039import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule; 040 041import edu.wisc.library.ocfl.api.DigestAlgorithmRegistry; 042import edu.wisc.library.ocfl.api.MutableOcflRepository; 043import edu.wisc.library.ocfl.api.OcflConfig; 044import edu.wisc.library.ocfl.api.model.DigestAlgorithm; 045import edu.wisc.library.ocfl.aws.OcflS3Client; 046import edu.wisc.library.ocfl.core.OcflRepositoryBuilder; 047import edu.wisc.library.ocfl.core.extension.storage.layout.config.HashedNTupleLayoutConfig; 048import edu.wisc.library.ocfl.core.path.constraint.ContentPathConstraints; 049import edu.wisc.library.ocfl.core.path.mapper.LogicalPathMappers; 050import edu.wisc.library.ocfl.core.storage.cloud.CloudOcflStorage; 051import edu.wisc.library.ocfl.core.storage.filesystem.FileSystemOcflStorage; 052import software.amazon.awssdk.services.s3.S3Client; 053 054/** 055 * A set of utility functions for supporting OCFL persistence activities. 056 * 057 * @author dbernstein 058 * @since 6.0.0 059 */ 060public class OcflPersistentStorageUtils { 061 062 private static final Logger log = LoggerFactory.getLogger(OcflPersistentStorageUtils.class); 063 064 private OcflPersistentStorageUtils() { 065 } 066 067 /** 068 * The default RDF on disk format 069 * TODO Make this value configurable 070 */ 071 072 private static RDFFormat DEFAULT_RDF_FORMAT = NTRIPLES; 073 074 /** 075 * @return the RDF Format. By default NTRIPLES are returned. 076 */ 077 public static RDFFormat getRdfFormat() { 078 return DEFAULT_RDF_FORMAT; 079 } 080 081 /** 082 * @return the RDF file extension. 083 */ 084 public static String getRDFFileExtension() { 085 return "." + DEFAULT_RDF_FORMAT.getLang().getFileExtensions().get(0); 086 } 087 088 /** 089 * Create a new ocfl repository backed by the filesystem 090 * @param ocflStorageRootDir The ocfl storage root directory 091 * @param ocflWorkDir The ocfl work directory 092 * @param algorithm the algorithm for the OCFL repository 093 * @return the repository 094 */ 095 public static MutableOcflRepository createFilesystemRepository(final Path ocflStorageRootDir, 096 final Path ocflWorkDir, 097 final org.fcrepo.config.DigestAlgorithm algorithm) 098 throws IOException { 099 createDirectories(ocflStorageRootDir); 100 101 final var storage = FileSystemOcflStorage.builder().repositoryRoot(ocflStorageRootDir).build(); 102 103 return createRepository(ocflWorkDir, builder -> { 104 builder.storage(storage); 105 }, algorithm); 106 } 107 108 /** 109 * Create a new ocfl repository backed by s3 110 * 111 * @param dataSource the datasource to keep inventories in and use as a lock 112 * @param s3Client aws s3 client 113 * @param bucket the bucket to store objects in 114 * @param prefix the prefix within the bucket to store objects under 115 * @param ocflWorkDir the local directory to stage objects in 116 * @param algorithm the algorithm for the OCFL repository 117 * @param withDb true if the ocfl client should use a db 118 * @return the repository 119 */ 120 public static MutableOcflRepository createS3Repository(final DataSource dataSource, 121 final S3Client s3Client, 122 final String bucket, 123 final String prefix, 124 final Path ocflWorkDir, 125 final org.fcrepo.config.DigestAlgorithm algorithm, 126 final boolean withDb) 127 throws IOException { 128 createDirectories(ocflWorkDir); 129 130 final var storage = CloudOcflStorage.builder() 131 .cloudClient(OcflS3Client.builder() 132 .s3Client(s3Client) 133 .bucket(bucket) 134 .repoPrefix(prefix) 135 .build()) 136 .build(); 137 138 return createRepository(ocflWorkDir, builder -> { 139 builder.contentPathConstraints(ContentPathConstraints.cloud()) 140 .storage(storage); 141 142 if (withDb) { 143 builder.objectDetailsDb(db -> db.dataSource(dataSource)); 144 } 145 146 }, algorithm); 147 } 148 149 private static MutableOcflRepository createRepository(final Path ocflWorkDir, 150 final Consumer<OcflRepositoryBuilder> configurer, 151 final org.fcrepo.config.DigestAlgorithm algorithm) 152 throws IOException { 153 createDirectories(ocflWorkDir); 154 155 final DigestAlgorithm ocflDigestAlg = translateFedoraDigestToOcfl(algorithm); 156 if (ocflDigestAlg == null) { 157 throw new UnsupportedDigestAlgorithmException( 158 "Unable to map Fedora default digest algorithm " + algorithm + " into OCFL"); 159 } 160 161 final var logicalPathMapper = SystemUtils.IS_OS_WINDOWS ? 162 LogicalPathMappers.percentEncodingWindowsMapper() : LogicalPathMappers.percentEncodingLinuxMapper(); 163 164 final var builder = new OcflRepositoryBuilder() 165 .defaultLayoutConfig(new HashedNTupleLayoutConfig()) 166 .ocflConfig(new OcflConfig().setDefaultDigestAlgorithm(ocflDigestAlg)) 167 .logicalPathMapper(logicalPathMapper) 168 .workDir(ocflWorkDir); 169 170 configurer.accept(builder); 171 172 return builder.buildMutable(); 173 } 174 175 /** 176 * @return new object mapper with default config 177 */ 178 public static ObjectMapper objectMapper() { 179 return new ObjectMapper() 180 .configure(WRITE_DATES_AS_TIMESTAMPS, false) 181 .registerModule(new JavaTimeModule()) 182 .setSerializationInclusion(JsonInclude.Include.NON_NULL); 183 } 184 185 /** 186 * Translates the provided fedora digest algorithm enum into a OCFL client digest algorithm 187 * 188 * @param fcrepoAlg fedora digest algorithm 189 * @return OCFL client DigestAlgorithm, or null if no match could be made 190 */ 191 public static DigestAlgorithm translateFedoraDigestToOcfl(final org.fcrepo.config.DigestAlgorithm fcrepoAlg) { 192 return fcrepoAlg.getAliases().stream() 193 .map(alias -> DigestAlgorithmRegistry.getAlgorithm(alias)) 194 .filter(alg -> alg != null) 195 .findFirst() 196 .orElse(null); 197 } 198 199 private static Path createDirectories(final Path path) throws IOException { 200 try { 201 return Files.createDirectories(path); 202 } catch (final FileAlreadyExistsException e) { 203 // Ignore. This only happens with the path is a symlink 204 return path; 205 } 206 } 207}