001/*
002 * Licensed to DuraSpace under one or more contributor license agreements.
003 * See the NOTICE file distributed with this work for additional information
004 * regarding copyright ownership.
005 *
006 * DuraSpace licenses this file to you under the Apache License,
007 * Version 2.0 (the "License"); you may not use this file except in
008 * compliance with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019package org.fcrepo.config;
020
021import java.io.IOException;
022import java.nio.file.Path;
023import java.util.List;
024import java.util.Objects;
025import java.util.stream.Collectors;
026
027import javax.annotation.PostConstruct;
028
029import org.slf4j.Logger;
030import org.slf4j.LoggerFactory;
031import org.springframework.beans.factory.annotation.Value;
032import org.springframework.context.annotation.Configuration;
033
034/**
035 * Fedora's OCFL related configuration properties
036 *
037 * @author pwinckles
038 * @since 6.0.0
039 */
040@Configuration
041public class OcflPropsConfig extends BasePropsConfig {
042
043    private static final Logger LOGGER = LoggerFactory.getLogger(OcflPropsConfig.class);
044
045    public static final String FCREPO_OCFL_STAGING = "fcrepo.ocfl.staging";
046    public static final String FCREPO_OCFL_ROOT = "fcrepo.ocfl.root";
047    public static final String FCREPO_OCFL_TEMP = "fcrepo.ocfl.temp";
048    private static final String FCREPO_OCFL_S3_BUCKET = "fcrepo.ocfl.s3.bucket";
049
050    private static final String OCFL_STAGING = "staging";
051    private static final String OCFL_ROOT = "ocfl-root";
052    private static final String OCFL_TEMP = "ocfl-temp";
053
054    private static final String FCREPO_PERSISTENCE_ALGORITHM = "fcrepo.persistence.defaultDigestAlgorithm";
055
056    @Value("${" + FCREPO_OCFL_STAGING + ":#{fedoraPropsConfig.fedoraData.resolve('" + OCFL_STAGING + "')}}")
057    private Path fedoraOcflStaging;
058
059    @Value("${" + FCREPO_OCFL_ROOT + ":#{fedoraPropsConfig.fedoraData.resolve('" + OCFL_ROOT + "')}}")
060    private Path ocflRepoRoot;
061
062    @Value("${" + FCREPO_OCFL_TEMP + ":#{fedoraPropsConfig.fedoraData.resolve('" + OCFL_TEMP + "')}}")
063    private Path ocflTemp;
064
065    /**
066     * Controls whether changes are committed to new OCFL versions or to a mutable HEAD
067     */
068    @Value("${fcrepo.autoversioning.enabled:true}")
069    private boolean autoVersioningEnabled;
070
071    @Value("${fcrepo.storage:ocfl-fs}")
072    private String storageStr;
073    private Storage storage;
074
075    @Value("${fcrepo.aws.access-key:}")
076    private String awsAccessKey;
077
078    @Value("${fcrepo.aws.secret-key:}")
079    private String awsSecretKey;
080
081    @Value("${fcrepo.aws.region:}")
082    private String awsRegion;
083
084    @Value("${fcrepo.s3.endpoint:}")
085    private String s3Endpoint;
086
087    @Value("${fcrepo.s3.path.style.access:false}")
088    private boolean pathStyleAccessEnabled;
089
090    @Value("${" + FCREPO_OCFL_S3_BUCKET + ":}")
091    private String ocflS3Bucket;
092
093    @Value("${fcrepo.ocfl.s3.prefix:}")
094    private String ocflS3Prefix;
095
096    @Value("${fcrepo.resource-header-cache.enable:true}")
097    private boolean resourceHeadersCacheEnabled;
098
099    @Value("${fcrepo.resource-header-cache.max-size:512}")
100    private long resourceHeadersCacheMaxSize;
101
102    @Value("${fcrepo.resource-header-cache.expire-after-seconds:600}")
103    private long resourceHeadersCacheExpireAfterSeconds;
104
105    @Value("${fcrepo.ocfl.reindex.threads:-1}")
106    private long reindexThreads;
107
108    @Value("${fcrepo.ocfl.reindex.batchSize:100}")
109    private long reindexBatchSize;
110
111    @Value("${fcrepo.ocfl.reindex.failOnError:true}")
112    private boolean reindexFailOnError;
113
114    @Value("${" + FCREPO_PERSISTENCE_ALGORITHM + ":sha512}")
115    private String FCREPO_DIGEST_ALGORITHM_VALUE;
116
117    @Value("${fcrepo.ocfl.s3.db.enabled:true}")
118    private boolean ocflS3DbEnabled;
119
120    @Value("${fcrepo.ocfl.unsafe.write.enabled:false}")
121    private boolean unsafeWriteEnabled;
122
123    @Value("${fcrepo.cache.db.ocfl.id_map.size.entries:1024}")
124    private long fedoraToOcflCacheSize;
125
126    @Value("${fcrepo.cache.db.ocfl.id_map.timeout.minutes:30}")
127    private long fedoraToOcflCacheTimeout;
128
129    private DigestAlgorithm FCREPO_DIGEST_ALGORITHM;
130
131    /**
132     * List of valid choices for fcrepo.persistence.defaultDigestAlgorithm
133     */
134    private static final List<DigestAlgorithm> FCREPO_VALID_DIGEST_ALGORITHMS = List.of(
135            DigestAlgorithm.SHA256,
136            DigestAlgorithm.SHA512
137    );
138
139    private static final long availableThreads = Runtime.getRuntime().availableProcessors();
140
141    @PostConstruct
142    private void postConstruct() throws IOException {
143        if (reindexThreads < 0L) {
144            reindexThreads = computeDefaultReindexThreads();
145        } else {
146            reindexThreads = checkReindexThreadLimit(reindexThreads);
147        }
148        storage = Storage.fromString(storageStr);
149        LOGGER.info("Fedora storage type: {}", storage);
150        LOGGER.info("Fedora staging: {}", fedoraOcflStaging);
151        LOGGER.info("Fedora OCFL temp: {}", ocflTemp);
152        LOGGER.info("Fedora OCFL reindexing threads: {}", reindexThreads);
153        LOGGER.info("Fedora OCFL reindexing batch size: {}", reindexBatchSize);
154        LOGGER.info("Fedora OCFL reindexing fail on error: {}", reindexFailOnError);
155        createDirectories(fedoraOcflStaging);
156        createDirectories(ocflTemp);
157
158        if (storage == Storage.OCFL_FILESYSTEM) {
159            LOGGER.info("Fedora OCFL root: {}", ocflRepoRoot);
160            createDirectories(ocflRepoRoot);
161        } else if (storage == Storage.OCFL_S3) {
162            Objects.requireNonNull(ocflS3Bucket,
163                    String.format("The property %s must be set when OCFL S3 storage is used", FCREPO_OCFL_S3_BUCKET));
164
165            LOGGER.info("Fedora AWS access key: {}", awsAccessKey);
166            LOGGER.info("Fedora AWS secret key set: {}", Objects.isNull(awsSecretKey));
167            LOGGER.info("Fedora AWS region: {}", awsRegion);
168            LOGGER.info("Fedora OCFL S3 bucket: {}", ocflS3Bucket);
169            LOGGER.info("Fedora OCFL S3 prefix: {}", ocflS3Prefix);
170        }
171        FCREPO_DIGEST_ALGORITHM = DigestAlgorithm.fromAlgorithm(FCREPO_DIGEST_ALGORITHM_VALUE);
172        // Throw error if the configured default digest is not known to fedora or is not a valid option
173        if (DigestAlgorithm.MISSING.equals(FCREPO_DIGEST_ALGORITHM) ||
174                !FCREPO_VALID_DIGEST_ALGORITHMS.contains(FCREPO_DIGEST_ALGORITHM)) {
175            throw new IllegalArgumentException(String.format("Invalid %s property configured: %s, must be one of %s",
176                    FCREPO_PERSISTENCE_ALGORITHM, FCREPO_DIGEST_ALGORITHM_VALUE,
177                    FCREPO_VALID_DIGEST_ALGORITHMS.stream().map(DigestAlgorithm::getAlgorithm)
178                            .collect(Collectors.joining(", "))));
179        }
180        LOGGER.info("Fedora OCFL digest algorithm: {}", FCREPO_DIGEST_ALGORITHM.getAlgorithm());
181    }
182
183    /**
184     * @return Path to directory Fedora stages resources before moving them into OCFL
185     */
186    public Path getFedoraOcflStaging() {
187        return fedoraOcflStaging;
188    }
189
190    /**
191     * Sets the path to the Fedora staging directory -- should only be used for testing purposes.
192     *
193     * @param fedoraOcflStaging Path to Fedora staging directory
194     */
195    public void setFedoraOcflStaging(final Path fedoraOcflStaging) {
196        this.fedoraOcflStaging = fedoraOcflStaging;
197    }
198
199    /**
200     * @return Path to OCFL root directory
201     */
202    public Path getOcflRepoRoot() {
203        return ocflRepoRoot;
204    }
205
206    /**
207     * Sets the path to the Fedora OCFL root directory -- should only be used for testing purposes.
208     *
209     * @param ocflRepoRoot Path to Fedora OCFL root directory
210     */
211    public void setOcflRepoRoot(final Path ocflRepoRoot) {
212        this.ocflRepoRoot = ocflRepoRoot;
213    }
214
215    /**
216     * @return Path to the temp directory used by the OCFL client
217     */
218    public Path getOcflTemp() {
219        return ocflTemp;
220    }
221
222    /**
223     * Sets the path to the OCFL temp directory -- should only be used for testing purposes.
224     *
225     * @param ocflTemp Path to OCFL temp directory
226     */
227    public void setOcflTemp(final Path ocflTemp) {
228        this.ocflTemp = ocflTemp;
229    }
230
231    /**
232     * @return true if every update should create a new OCFL version; false if the mutable HEAD should be used
233     */
234    public boolean isAutoVersioningEnabled() {
235        return autoVersioningEnabled;
236    }
237
238    /**
239     * Determines whether or not new OCFL versions are created on every update.
240     *
241     * @param autoVersioningEnabled true to create new versions on every update
242     */
243    public void setAutoVersioningEnabled(final boolean autoVersioningEnabled) {
244        this.autoVersioningEnabled = autoVersioningEnabled;
245    }
246
247    /**
248     * @return Indicates the storage type. ocfl-fs is the default
249     */
250    public Storage getStorage() {
251        return storage;
252    }
253
254    /**
255     * @param storage storage to use
256     */
257    public void setStorage(final Storage storage) {
258        this.storage = storage;
259    }
260
261    /**
262     * @return the aws access key to use, may be null
263     */
264    public String getAwsAccessKey() {
265        return awsAccessKey;
266    }
267
268    /**
269     * @param awsAccessKey the aws access key to use
270     */
271    public void setAwsAccessKey(final String awsAccessKey) {
272        this.awsAccessKey = awsAccessKey;
273    }
274
275    /**
276     * @return the aws secret key to use, may be null
277     */
278    public String getAwsSecretKey() {
279        return awsSecretKey;
280    }
281
282    /**
283     * @param awsSecretKey the aws secret key to use
284     */
285    public void setAwsSecretKey(final String awsSecretKey) {
286        this.awsSecretKey = awsSecretKey;
287    }
288
289    /**
290     * @return the aws region to use, may be null
291     */
292    public String getAwsRegion() {
293        return awsRegion;
294    }
295
296    /**
297     * @param awsRegion the aws region to use
298     */
299    public void setAwsRegion(final String awsRegion) {
300        this.awsRegion = awsRegion;
301    }
302
303    /**
304     * @return the s3 bucket to store objects in
305     */
306    public String getOcflS3Bucket() {
307        return ocflS3Bucket;
308    }
309
310    /**
311     * @param ocflS3Bucket sets the s3 bucket to store objects in
312     */
313    public void setOcflS3Bucket(final String ocflS3Bucket) {
314        this.ocflS3Bucket = ocflS3Bucket;
315    }
316
317    /**
318     * @return the s3 prefix to store objects under, may be null
319     */
320    public String getOcflS3Prefix() {
321        return ocflS3Prefix;
322    }
323
324    /**
325     * @param ocflS3Prefix the prefix to store objects under
326     */
327    public void setOcflS3Prefix(final String ocflS3Prefix) {
328        this.ocflS3Prefix = ocflS3Prefix;
329    }
330
331    /**
332     * @return whether or not to enable the resource headers cache
333     */
334    public boolean isResourceHeadersCacheEnabled() {
335        return resourceHeadersCacheEnabled;
336    }
337
338    /**
339     * @param resourceHeadersCacheEnabled whether or not to enable the resource headers cache
340     */
341    public void setResourceHeadersCacheEnabled(final boolean resourceHeadersCacheEnabled) {
342        this.resourceHeadersCacheEnabled = resourceHeadersCacheEnabled;
343    }
344
345    /**
346     * @return maximum number or resource headers in cache
347     */
348    public long getResourceHeadersCacheMaxSize() {
349        return resourceHeadersCacheMaxSize;
350    }
351
352    /**
353     * @param resourceHeadersCacheMaxSize maximum number of resource headers in cache
354     */
355    public void setResourceHeadersCacheMaxSize(final long resourceHeadersCacheMaxSize) {
356        this.resourceHeadersCacheMaxSize = resourceHeadersCacheMaxSize;
357    }
358
359    /**
360     * @return number of seconds to wait before expiring a resource header from the cache
361     */
362    public long getResourceHeadersCacheExpireAfterSeconds() {
363        return resourceHeadersCacheExpireAfterSeconds;
364    }
365
366    /**
367     * @param resourceHeadersCacheExpireAfterSeconds
368     *      number of seconds to wait before expiring a resource header from the cache
369     */
370    public void setResourceHeadersCacheExpireAfterSeconds(final long resourceHeadersCacheExpireAfterSeconds) {
371        this.resourceHeadersCacheExpireAfterSeconds = resourceHeadersCacheExpireAfterSeconds;
372    }
373
374    /**
375     * @param threads
376     *   number of threads to use when rebuilding from Fedora OCFL on disk.
377     */
378    public void setReindexingThreads(final long threads) {
379        this.reindexThreads = checkReindexThreadLimit(threads);
380    }
381
382    /**
383     * @return number of threads to use when rebuilding from Fedora OCFL on disk.
384     */
385    public long getReindexingThreads() {
386        return this.reindexThreads;
387    }
388
389    /**
390     * @return number of OCFL ids for a the reindexing manager to hand out at once.
391     */
392    public long getReindexBatchSize() {
393        return reindexBatchSize;
394    }
395
396    /**
397     * @param reindexBatchSize
398     *   number of OCFL ids for a the reindexing manager to hand out at once.
399     */
400    public void setReindexBatchSize(final long reindexBatchSize) {
401        this.reindexBatchSize = reindexBatchSize;
402    }
403
404    /**
405     * @return whether to stop the entire reindexing process if a single object fails.
406     */
407    public boolean isReindexFailOnError() {
408        return reindexFailOnError;
409    }
410
411    /**
412     * @param reindexFailOnError
413     *   whether to stop the entire reindexing process if a single object fails.
414     */
415    public void setReindexFailOnError(final boolean reindexFailOnError) {
416        this.reindexFailOnError = reindexFailOnError;
417    }
418
419    /**
420     * Check we don't create too few reindexing threads.
421     * @param threads the number of threads requested.
422     * @return higher of the requested amount or 1
423     */
424    private long checkReindexThreadLimit(final long threads) {
425       if (threads <= 0) {
426            LOGGER.warn("Can't have fewer than 1 reindexing thread, setting to 1.");
427            return 1;
428        } else {
429            return threads;
430        }
431    }
432
433    /**
434     * @return number of available processors minus 1.
435     */
436    private static long computeDefaultReindexThreads() {
437        return Math.max(availableThreads - 1, 1);
438    }
439
440    /**
441     * @return the configured OCFL digest algorithm
442     */
443    public DigestAlgorithm getDefaultDigestAlgorithm() {
444        return FCREPO_DIGEST_ALGORITHM;
445    }
446
447    /**
448     * @return an optional custom s3 endpoint or null
449     */
450    public String getS3Endpoint() {
451        return s3Endpoint;
452    }
453
454    /**
455     * @return true if path style S3 access should be used
456     */
457    public boolean isPathStyleAccessEnabled() {
458        return pathStyleAccessEnabled;
459    }
460
461    /**
462     * @return true if the ocfl client should be configured to use a database when storing objects in S3
463     */
464    public boolean isOcflS3DbEnabled() {
465        return ocflS3DbEnabled;
466    }
467
468    /**
469     * When unsafe writes are enabled, the OCFL client does not calculate a digest for files that are added, and
470     * trusts the digest value that it's given. If this value is incorrect, the object will be corrupted.
471     *
472     * @return true if objects should be written to OCFL using an "unsafe" write
473     */
474    public boolean isUnsafeWriteEnabled() {
475        return unsafeWriteEnabled;
476    }
477
478    /**
479     * @return Size of the fedoraToOcflIndex cache.
480     */
481    public long getFedoraToOcflCacheSize() {
482        return fedoraToOcflCacheSize;
483    }
484
485    /**
486     * @return Time to cache expiration in minutes.
487     */
488    public long getFedoraToOcflCacheTimeout() {
489        return fedoraToOcflCacheTimeout;
490    }
491}