001/*
002 * Licensed to DuraSpace under one or more contributor license agreements.
003 * See the NOTICE file distributed with this work for additional information
004 * regarding copyright ownership.
005 *
006 * DuraSpace licenses this file to you under the Apache License,
007 * Version 2.0 (the "License"); you may not use this file except in
008 * compliance with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.fcrepo.kernel.modeshape;
019
020import com.codahale.metrics.Counter;
021import com.codahale.metrics.Histogram;
022import com.codahale.metrics.Timer;
023import org.apache.jena.rdf.model.Resource;
024import org.fcrepo.kernel.api.exception.InvalidChecksumException;
025import org.fcrepo.kernel.api.exception.PathNotFoundRuntimeException;
026import org.fcrepo.kernel.api.exception.RepositoryRuntimeException;
027import org.fcrepo.kernel.api.identifiers.IdentifierConverter;
028import org.fcrepo.kernel.api.models.NonRdfSourceDescription;
029import org.fcrepo.kernel.api.models.FedoraBinary;
030import org.fcrepo.kernel.api.models.FedoraResource;
031import org.fcrepo.kernel.api.services.policy.StoragePolicyDecisionPoint;
032import org.fcrepo.kernel.api.RdfStream;
033import org.fcrepo.kernel.api.utils.CacheEntry;
034import org.fcrepo.kernel.api.utils.ContentDigest;
035import org.fcrepo.kernel.api.utils.FixityResult;
036import org.fcrepo.kernel.modeshape.rdf.impl.FixityRdfContext;
037import org.fcrepo.kernel.modeshape.utils.impl.CacheEntryFactory;
038import org.fcrepo.metrics.RegistryService;
039import org.modeshape.jcr.api.Binary;
040import org.modeshape.jcr.api.ValueFactory;
041import org.slf4j.Logger;
042
043import javax.jcr.Node;
044import javax.jcr.PathNotFoundException;
045import javax.jcr.Property;
046import javax.jcr.RepositoryException;
047import javax.jcr.Value;
048import javax.jcr.version.Version;
049import javax.jcr.version.VersionHistory;
050import java.io.InputStream;
051import java.net.URI;
052import java.util.Collection;
053import java.util.HashMap;
054import java.util.HashSet;
055import java.util.Map;
056import java.util.Optional;
057import java.util.stream.Collectors;
058
059import static com.codahale.metrics.MetricRegistry.name;
060import static org.apache.jena.datatypes.xsd.XSDDatatype.XSDstring;
061import static org.fcrepo.kernel.api.utils.ContentDigest.DIGEST_ALGORITHM.SHA1;
062import static org.fcrepo.kernel.modeshape.FedoraJcrConstants.FIELD_DELIMITER;
063import static org.fcrepo.kernel.modeshape.services.functions.JcrPropertyFunctions.property2values;
064import static org.fcrepo.kernel.modeshape.utils.FedoraTypesUtils.isFedoraBinary;
065import static org.modeshape.jcr.api.JcrConstants.JCR_CONTENT;
066import static org.modeshape.jcr.api.JcrConstants.JCR_DATA;
067import static org.slf4j.LoggerFactory.getLogger;
068
069/**
070 * @author cabeer
071 * @since 9/19/14
072 */
073public class FedoraBinaryImpl extends FedoraResourceImpl implements FedoraBinary {
074
075    private static final Logger LOGGER = getLogger(FedoraBinaryImpl.class);
076
077
078    static final RegistryService registryService = RegistryService.getInstance();
079    static final Counter fixityCheckCounter
080            = registryService.getMetrics().counter(name(FedoraBinary.class, "fixity-check-counter"));
081
082    static final Timer timer = registryService.getMetrics().timer(
083            name(NonRdfSourceDescription.class, "fixity-check-time"));
084
085    static final Histogram contentSizeHistogram =
086            registryService.getMetrics().histogram(name(FedoraBinary.class, "content-size"));
087
088    /**
089     * Wrap an existing Node as a Fedora Binary
090     * @param node the node
091     */
092    public FedoraBinaryImpl(final Node node) {
093        super(node);
094
095        if (node.isNew()) {
096            initializeNewBinaryProperties();
097        }
098    }
099
100    private void initializeNewBinaryProperties() {
101        try {
102            decorateContentNode(node, new HashSet<>());
103        } catch (final RepositoryException e) {
104            LOGGER.warn("Count not decorate {} with FedoraBinary properties: {}", node, e);
105        }
106    }
107
108    @Override
109    public FedoraResource getDescription() {
110        try {
111            return new NonRdfSourceDescriptionImpl(getNode().getParent());
112        } catch (final RepositoryException e) {
113            throw new RepositoryRuntimeException(e);
114        }
115    }
116
117    /*
118         * (non-Javadoc)
119         * @see org.fcrepo.kernel.api.models.FedoraBinary#getContent()
120         */
121    @Override
122    public InputStream getContent() {
123        try {
124            return getBinaryContent().getStream();
125        } catch (final RepositoryException e) {
126            throw new RepositoryRuntimeException(e);
127        }
128    }
129
130    /**
131     * Retrieve the JCR Binary object
132     * @return a JCR-wrapped Binary object
133     */
134    private javax.jcr.Binary getBinaryContent() {
135        try {
136            return getProperty(JCR_DATA).getBinary();
137        } catch (final PathNotFoundException e) {
138            throw new PathNotFoundRuntimeException(e);
139        } catch (final RepositoryException e) {
140            throw new RepositoryRuntimeException(e);
141        }
142    }
143
144    /*
145     * (non-Javadoc)
146     * @see org.fcrepo.kernel.api.models.FedoraBinary#setContent(java.io.InputStream,
147     * java.lang.String, java.net.URI, java.lang.String,
148     * org.fcrepo.kernel.api.services.policy.StoragePolicyDecisionPoint)
149     */
150    @Override
151    public void setContent(final InputStream content, final String contentType,
152                           final Collection<URI> checksums, final String originalFileName,
153                           final StoragePolicyDecisionPoint storagePolicyDecisionPoint)
154            throws InvalidChecksumException {
155
156        try {
157            final Node contentNode = getNode();
158
159            if (contentNode.canAddMixin(FEDORA_BINARY)) {
160                contentNode.addMixin(FEDORA_BINARY);
161            }
162
163            if (contentType != null) {
164                contentNode.setProperty(HAS_MIME_TYPE, contentType);
165            }
166
167            if (originalFileName != null) {
168                contentNode.setProperty(FILENAME, originalFileName);
169            }
170
171            LOGGER.debug("Created content node at path: {}", contentNode.getPath());
172
173            String hint = null;
174
175            if (storagePolicyDecisionPoint != null) {
176                hint = storagePolicyDecisionPoint.evaluatePolicies(this);
177            }
178            final ValueFactory modevf =
179                    (ValueFactory) node.getSession().getValueFactory();
180            final Binary binary = modevf.createBinary(content, hint);
181
182        /*
183         * This next line of code deserves explanation. If we chose for the
184         * simpler line: Property dataProperty =
185         * contentNode.setProperty(JCR_DATA, requestBodyStream); then the JCR
186         * would not block on the stream's completion, and we would return to
187         * the requester before the mutation to the repo had actually completed.
188         * So instead we use createBinary(requestBodyStream), because its
189         * contract specifies: "The passed InputStream is closed before this
190         * method returns either normally or because of an exception." which
191         * lets us block and not return until the job is done! The simpler code
192         * may still be useful to us for an asynchronous method that we develop
193         * later.
194         */
195            final Property dataProperty = contentNode.setProperty(JCR_DATA, binary);
196
197            // Ensure provided checksums are valid
198            final Collection<URI> nonNullChecksums = (null == checksums) ? new HashSet<>() : checksums;
199            verifyChecksums(nonNullChecksums, dataProperty);
200
201            decorateContentNode(contentNode, nonNullChecksums);
202            touch();
203            ((FedoraResourceImpl) getDescription()).touch();
204
205            LOGGER.debug("Created data property at path: {}", dataProperty.getPath());
206
207        } catch (final RepositoryException e) {
208            throw new RepositoryRuntimeException(e);
209        }
210    }
211
212    /**
213     * This method ensures that the arg checksums are valid against the binary associated with the arg dataProperty.
214     * If one or more of the checksums are invalid, an InvalidChecksumException is thrown.
215     *
216     * @param checksums that the user provided
217     * @param dataProperty containing the binary against which the checksums will be verified
218     * @throws InvalidChecksumException
219     * @throws RepositoryException
220     */
221    private void verifyChecksums(final Collection<URI> checksums, final Property dataProperty)
222            throws InvalidChecksumException, RepositoryException {
223
224        final Map<URI, URI> checksumErrors = new HashMap<>();
225
226        // Loop through provided checksums validating against computed values
227        checksums.forEach(checksum -> {
228            final String algorithm = ContentDigest.getAlgorithm(checksum);
229            try {
230                // The case internally supported by ModeShape
231                if (algorithm.equals(SHA1.algorithm)) {
232                    final String dsSHA1 = ((Binary) dataProperty.getBinary()).getHexHash();
233                    final URI dsSHA1Uri = ContentDigest.asURI(SHA1.algorithm, dsSHA1);
234
235                    if (!dsSHA1Uri.equals(checksum)) {
236                        LOGGER.debug("Failed checksum test");
237                        checksumErrors.put(checksum, dsSHA1Uri);
238                    }
239
240                // The case that requires re-computing the checksum
241                } else {
242                    final CacheEntry cacheEntry = CacheEntryFactory.forProperty(dataProperty);
243                    cacheEntry.checkFixity(algorithm).stream().findFirst().ifPresent(
244                            fixityResult -> {
245                                if (!fixityResult.matches(checksum)) {
246                                    LOGGER.debug("Failed checksum test");
247                                    checksumErrors.put(checksum, fixityResult.getComputedChecksum());
248                                }
249                            }
250                    );
251                }
252            } catch (RepositoryException e) {
253                throw new RepositoryRuntimeException(e);
254            }
255        });
256
257        // Throw an exception if any checksum errors occurred
258        if (!checksumErrors.isEmpty()) {
259            final String template = "Checksum Mismatch of %1$s and %2$s\n";
260            final StringBuilder error = new StringBuilder();
261            checksumErrors.forEach((key, value) -> error.append(String.format(template, key, value)));
262            throw new InvalidChecksumException(error.toString());
263        }
264
265    }
266
267    /*
268     * (non-Javadoc)
269     * @see org.fcrepo.kernel.api.models.FedoraBinary#getContentSize()
270     */
271    @Override
272    public long getContentSize() {
273        try {
274            if (hasProperty(CONTENT_SIZE)) {
275                return getProperty(CONTENT_SIZE).getLong();
276            }
277        } catch (final RepositoryException e) {
278            LOGGER.info("Could not get contentSize(): {}", e.getMessage());
279        }
280
281        return -1L;
282    }
283
284    /*
285     * (non-Javadoc)
286     * @see org.fcrepo.kernel.api.models.FedoraBinary#getContentDigest()
287     */
288    @Override
289    public URI getContentDigest() {
290        try {
291            // Determine which digest algorithm to use
292            final String algorithm = hasProperty(DEFAULT_DIGEST_ALGORITHM) ?
293                    property2values.apply(getProperty(DEFAULT_DIGEST_ALGORITHM)).findFirst().get().getString() :
294                    ContentDigest.DEFAULT_ALGORITHM;
295            final String algorithmWithoutStringType = algorithm.replace(FIELD_DELIMITER + XSDstring.getURI(), "");
296
297            if (hasProperty(CONTENT_DIGEST)) {
298                // Select the stored digest that matches the digest algorithm
299                Optional<Value> digestValue = property2values.apply(getProperty(CONTENT_DIGEST)).filter(digest -> {
300                    try {
301                        final URI digestUri = URI.create(digest.getString());
302                        return algorithmWithoutStringType.equalsIgnoreCase(ContentDigest.getAlgorithm(digestUri));
303
304                    } catch (RepositoryException e) {
305                        LOGGER.warn("Exception thrown when getting digest property {}, {}", digest, e.getMessage());
306                        return false;
307                    }
308                }).findFirst();
309
310                // Success, return the digest value
311                if (digestValue.isPresent()) {
312                    return URI.create(digestValue.get().getString());
313                }
314            }
315            LOGGER.warn("No digest value was found to match the algorithm: {}", algorithmWithoutStringType);
316        } catch (final RepositoryException e) {
317            LOGGER.warn("Could not get content digest: {}", e.getMessage());
318        }
319
320        return ContentDigest.missingChecksum();
321    }
322
323    /*
324     * (non-Javadoc)
325     * @see org.fcrepo.kernel.api.models.FedoraBinary#getMimeType()
326     */
327    @Override
328    public String getMimeType() {
329        try {
330            if (hasProperty(HAS_MIME_TYPE)) {
331                return getProperty(HAS_MIME_TYPE).getString().replace(FIELD_DELIMITER + XSDstring.getURI(), "");
332            }
333            return "application/octet-stream";
334        } catch (final RepositoryException e) {
335            throw new RepositoryRuntimeException(e);
336        }
337    }
338
339    /*
340     * (non-Javadoc)
341     * @see org.fcrepo.kernel.api.models.FedoraBinary#getFilename()
342     */
343    @Override
344    public String getFilename() {
345        try {
346            if (hasProperty(FILENAME)) {
347                return getProperty(FILENAME).getString();
348            }
349            return node.getParent().getName();
350        } catch (final RepositoryException e) {
351            throw new RepositoryRuntimeException(e);
352        }
353    }
354
355    @Override
356    public RdfStream getFixity(final IdentifierConverter<Resource, FedoraResource> idTranslator) {
357        return getFixity(idTranslator, getContentDigest(), getContentSize());
358    }
359
360    @Override
361    public RdfStream getFixity(final IdentifierConverter<Resource, FedoraResource> idTranslator,
362                               final URI digestUri,
363                               final long size) {
364
365        fixityCheckCounter.inc();
366
367        try (final Timer.Context context = timer.time()) {
368
369            LOGGER.debug("Checking resource: " + getPath());
370
371            final String algorithm = ContentDigest.getAlgorithm(digestUri);
372
373            final long contentSize = size < 0 ? getBinaryContent().getSize() : size;
374
375            final Collection<FixityResult> fixityResults
376                    = CacheEntryFactory.forProperty(getProperty(JCR_DATA)).checkFixity(algorithm);
377
378            return new FixityRdfContext(this, idTranslator, fixityResults, digestUri, contentSize);
379        } catch (final RepositoryException e) {
380            throw new RepositoryRuntimeException(e);
381        }
382    }
383
384    /**
385     * When deleting the binary, we also need to clean up the description document.
386     */
387    @Override
388    public void delete() {
389        final FedoraResource description = getDescription();
390
391        super.delete();
392
393        description.delete();
394    }
395
396    @Override
397    public Version getBaseVersion() {
398        return getDescription().getBaseVersion();
399    }
400
401    private static void decorateContentNode(final Node contentNode, final Collection<URI> checksums)
402            throws RepositoryException {
403        if (contentNode == null) {
404            LOGGER.warn("{} node appears to be null!", JCR_CONTENT);
405            return;
406        }
407        if (contentNode.canAddMixin(FEDORA_BINARY)) {
408            contentNode.addMixin(FEDORA_BINARY);
409        }
410
411        if (contentNode.hasProperty(JCR_DATA)) {
412            final Property dataProperty = contentNode.getProperty(JCR_DATA);
413            final Binary binary = (Binary) dataProperty.getBinary();
414            final String dsChecksum = binary.getHexHash();
415
416            contentSizeHistogram.update(dataProperty.getLength());
417
418            checksums.add(ContentDigest.asURI(SHA1.algorithm, dsChecksum));
419
420            final String[] checksumArray = new String[checksums.size()];
421            checksums.stream().map(Object::toString).collect(Collectors.toSet()).toArray(checksumArray);
422
423            contentNode.setProperty(CONTENT_DIGEST, checksumArray);
424            contentNode.setProperty(CONTENT_SIZE, dataProperty.getLength());
425
426            LOGGER.debug("Decorated data property at path: {}", dataProperty.getPath());
427        }
428    }
429
430    /*
431     * (non-Javadoc)
432     * @see org.fcrepo.kernel.api.models.FedoraResource#getVersionHistory()
433     */
434    @Override
435    public VersionHistory getVersionHistory() {
436        try {
437            return getVersionManager().getVersionHistory(getDescription().getPath());
438        } catch (final RepositoryException e) {
439            throw new RepositoryRuntimeException(e);
440        }
441    }
442
443
444    @Override
445    public boolean isVersioned() {
446        return getDescription().isVersioned();
447    }
448
449    @Override
450    public void enableVersioning() {
451        super.enableVersioning();
452        getDescription().enableVersioning();
453    }
454
455    @Override
456    public void disableVersioning() {
457        super.disableVersioning();
458        getDescription().disableVersioning();
459    }
460
461    /**
462     * Check if the given node is a Fedora binary
463     * @param node the given node
464     * @return whether the given node is a Fedora binary
465     */
466    public static boolean hasMixin(final Node node) {
467        return isFedoraBinary.test(node);
468    }
469}