001/*
002 * Licensed to DuraSpace under one or more contributor license agreements.
003 * See the NOTICE file distributed with this work for additional information
004 * regarding copyright ownership.
005 *
006 * DuraSpace licenses this file to you under the Apache License,
007 * Version 2.0 (the "License"); you may not use this file except in
008 * compliance with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.fcrepo.persistence.ocfl.impl;
019
020import static com.fasterxml.jackson.databind.SerializationFeature.WRITE_DATES_AS_TIMESTAMPS;
021import static org.apache.jena.riot.RDFFormat.NTRIPLES;
022
023import java.io.IOException;
024import java.nio.file.FileAlreadyExistsException;
025import java.nio.file.Files;
026import java.nio.file.Path;
027import java.util.function.Consumer;
028
029import javax.sql.DataSource;
030
031import org.apache.commons.lang3.SystemUtils;
032import org.apache.http.impl.auth.UnsupportedDigestAlgorithmException;
033import org.apache.jena.riot.RDFFormat;
034import org.slf4j.Logger;
035import org.slf4j.LoggerFactory;
036
037import com.fasterxml.jackson.annotation.JsonInclude;
038import com.fasterxml.jackson.databind.ObjectMapper;
039import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule;
040
041import edu.wisc.library.ocfl.api.DigestAlgorithmRegistry;
042import edu.wisc.library.ocfl.api.MutableOcflRepository;
043import edu.wisc.library.ocfl.api.OcflConfig;
044import edu.wisc.library.ocfl.api.model.DigestAlgorithm;
045import edu.wisc.library.ocfl.aws.OcflS3Client;
046import edu.wisc.library.ocfl.core.OcflRepositoryBuilder;
047import edu.wisc.library.ocfl.core.extension.storage.layout.config.HashedNTupleLayoutConfig;
048import edu.wisc.library.ocfl.core.path.constraint.ContentPathConstraints;
049import edu.wisc.library.ocfl.core.path.mapper.LogicalPathMappers;
050import edu.wisc.library.ocfl.core.storage.cloud.CloudOcflStorage;
051import edu.wisc.library.ocfl.core.storage.filesystem.FileSystemOcflStorage;
052import software.amazon.awssdk.services.s3.S3Client;
053
054/**
055 * A set of utility functions for supporting OCFL persistence activities.
056 *
057 * @author dbernstein
058 * @since 6.0.0
059 */
060public class OcflPersistentStorageUtils {
061
062    private static final Logger log = LoggerFactory.getLogger(OcflPersistentStorageUtils.class);
063
064    private OcflPersistentStorageUtils() {
065    }
066
067    /**
068     * The default RDF on disk format
069     * TODO Make this value configurable
070     */
071
072    private static RDFFormat DEFAULT_RDF_FORMAT = NTRIPLES;
073
074    /**
075     * @return the RDF Format. By default NTRIPLES are returned.
076     */
077    public static RDFFormat getRdfFormat() {
078        return DEFAULT_RDF_FORMAT;
079    }
080
081    /**
082     * @return the RDF file extension.
083     */
084    public static String getRDFFileExtension() {
085        return "." + DEFAULT_RDF_FORMAT.getLang().getFileExtensions().get(0);
086    }
087
088    /**
089     * Create a new ocfl repository backed by the filesystem
090     * @param ocflStorageRootDir The ocfl storage root directory
091     * @param ocflWorkDir The ocfl work directory
092     * @param algorithm the algorithm for the OCFL repository
093     * @return the repository
094     */
095    public static MutableOcflRepository createFilesystemRepository(final Path ocflStorageRootDir,
096                                                                   final Path ocflWorkDir,
097                                                                   final org.fcrepo.config.DigestAlgorithm algorithm)
098            throws IOException {
099        createDirectories(ocflStorageRootDir);
100
101        final var storage = FileSystemOcflStorage.builder().repositoryRoot(ocflStorageRootDir).build();
102
103        return createRepository(ocflWorkDir, builder -> {
104            builder.storage(storage);
105        }, algorithm);
106    }
107
108    /**
109     * Create a new ocfl repository backed by s3
110     *
111     * @param dataSource the datasource to keep inventories in and use as a lock
112     * @param s3Client aws s3 client
113     * @param bucket the bucket to store objects in
114     * @param prefix the prefix within the bucket to store objects under
115     * @param ocflWorkDir the local directory to stage objects in
116     * @param algorithm the algorithm for the OCFL repository
117     * @param withDb true if the ocfl client should use a db
118     * @return the repository
119     */
120    public static MutableOcflRepository createS3Repository(final DataSource dataSource,
121                                                           final S3Client s3Client,
122                                                           final String bucket,
123                                                           final String prefix,
124                                                           final Path ocflWorkDir,
125                                                           final org.fcrepo.config.DigestAlgorithm algorithm,
126                                                           final boolean withDb)
127            throws IOException {
128        createDirectories(ocflWorkDir);
129
130        final var storage = CloudOcflStorage.builder()
131                .cloudClient(OcflS3Client.builder()
132                        .s3Client(s3Client)
133                        .bucket(bucket)
134                        .repoPrefix(prefix)
135                        .build())
136                .build();
137
138        return createRepository(ocflWorkDir, builder -> {
139            builder.contentPathConstraints(ContentPathConstraints.cloud())
140                    .storage(storage);
141
142            if (withDb) {
143                builder.objectDetailsDb(db -> db.dataSource(dataSource));
144            }
145
146        }, algorithm);
147    }
148
149    private static MutableOcflRepository createRepository(final Path ocflWorkDir,
150                                                          final Consumer<OcflRepositoryBuilder> configurer,
151                                                          final org.fcrepo.config.DigestAlgorithm algorithm)
152            throws IOException {
153        createDirectories(ocflWorkDir);
154
155        final DigestAlgorithm ocflDigestAlg = translateFedoraDigestToOcfl(algorithm);
156        if (ocflDigestAlg == null) {
157            throw new UnsupportedDigestAlgorithmException(
158                    "Unable to map Fedora default digest algorithm " + algorithm + " into OCFL");
159        }
160
161        final var logicalPathMapper = SystemUtils.IS_OS_WINDOWS ?
162                LogicalPathMappers.percentEncodingWindowsMapper() : LogicalPathMappers.percentEncodingLinuxMapper();
163
164        final var builder = new OcflRepositoryBuilder()
165                .defaultLayoutConfig(new HashedNTupleLayoutConfig())
166                .ocflConfig(new OcflConfig().setDefaultDigestAlgorithm(ocflDigestAlg))
167                .logicalPathMapper(logicalPathMapper)
168                .workDir(ocflWorkDir);
169
170        configurer.accept(builder);
171
172        return builder.buildMutable();
173    }
174
175    /**
176     * @return new object mapper with default config
177     */
178    public static ObjectMapper objectMapper() {
179        return new ObjectMapper()
180                .configure(WRITE_DATES_AS_TIMESTAMPS, false)
181                .registerModule(new JavaTimeModule())
182                .setSerializationInclusion(JsonInclude.Include.NON_NULL);
183    }
184
185    /**
186     * Translates the provided fedora digest algorithm enum into a OCFL client digest algorithm
187     *
188     * @param fcrepoAlg fedora digest algorithm
189     * @return OCFL client DigestAlgorithm, or null if no match could be made
190     */
191    public static DigestAlgorithm translateFedoraDigestToOcfl(final org.fcrepo.config.DigestAlgorithm fcrepoAlg) {
192        return fcrepoAlg.getAliases().stream()
193                .map(alias -> DigestAlgorithmRegistry.getAlgorithm(alias))
194                .filter(alg -> alg != null)
195                .findFirst()
196                .orElse(null);
197    }
198
199    private static Path createDirectories(final Path path) throws IOException {
200        try {
201            return Files.createDirectories(path);
202        } catch (final FileAlreadyExistsException e) {
203            // Ignore. This only happens with the path is a symlink
204            return path;
205        }
206    }
207}