001/*
002 * Licensed to DuraSpace under one or more contributor license agreements.
003 * See the NOTICE file distributed with this work for additional information
004 * regarding copyright ownership.
005 *
006 * DuraSpace licenses this file to you under the Apache License,
007 * Version 2.0 (the "License"); you may not use this file except in
008 * compliance with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.fcrepo.kernel.modeshape;
019
020import com.codahale.metrics.Counter;
021import com.codahale.metrics.Histogram;
022import com.codahale.metrics.Timer;
023import org.apache.jena.rdf.model.Resource;
024import org.fcrepo.kernel.api.exception.InvalidChecksumException;
025import org.fcrepo.kernel.api.exception.PathNotFoundRuntimeException;
026import org.fcrepo.kernel.api.exception.RepositoryRuntimeException;
027import org.fcrepo.kernel.api.identifiers.IdentifierConverter;
028import org.fcrepo.kernel.api.models.NonRdfSourceDescription;
029import org.fcrepo.kernel.api.models.FedoraBinary;
030import org.fcrepo.kernel.api.models.FedoraResource;
031import org.fcrepo.kernel.api.services.policy.StoragePolicyDecisionPoint;
032import org.fcrepo.kernel.api.RdfStream;
033import org.fcrepo.kernel.api.utils.CacheEntry;
034import org.fcrepo.kernel.api.utils.ContentDigest;
035import org.fcrepo.kernel.api.utils.FixityResult;
036import org.fcrepo.kernel.modeshape.rdf.impl.FixityRdfContext;
037import org.fcrepo.kernel.modeshape.utils.impl.CacheEntryFactory;
038import org.fcrepo.metrics.RegistryService;
039import org.modeshape.jcr.api.Binary;
040import org.modeshape.jcr.api.ValueFactory;
041import org.slf4j.Logger;
042
043import javax.jcr.Node;
044import javax.jcr.PathNotFoundException;
045import javax.jcr.Property;
046import javax.jcr.RepositoryException;
047import javax.jcr.Value;
048import java.io.InputStream;
049import java.net.URI;
050import java.util.Collection;
051import java.util.HashMap;
052import java.util.HashSet;
053import java.util.Map;
054import java.util.Optional;
055import java.util.stream.Collectors;
056
057import static com.codahale.metrics.MetricRegistry.name;
058import static org.apache.jena.datatypes.xsd.XSDDatatype.XSDstring;
059import static org.fcrepo.kernel.api.utils.ContentDigest.DIGEST_ALGORITHM.SHA1;
060import static org.fcrepo.kernel.modeshape.FedoraJcrConstants.FIELD_DELIMITER;
061import static org.fcrepo.kernel.modeshape.services.functions.JcrPropertyFunctions.property2values;
062import static org.fcrepo.kernel.modeshape.utils.FedoraTypesUtils.isFedoraBinary;
063import static org.modeshape.jcr.api.JcrConstants.JCR_CONTENT;
064import static org.modeshape.jcr.api.JcrConstants.JCR_DATA;
065import static org.slf4j.LoggerFactory.getLogger;
066
067/**
068 * @author cabeer
069 * @since 9/19/14
070 */
071public class FedoraBinaryImpl extends FedoraResourceImpl implements FedoraBinary {
072
073    private static final Logger LOGGER = getLogger(FedoraBinaryImpl.class);
074
075
076    static final RegistryService registryService = RegistryService.getInstance();
077    static final Counter fixityCheckCounter
078            = registryService.getMetrics().counter(name(FedoraBinary.class, "fixity-check-counter"));
079
080    static final Timer timer = registryService.getMetrics().timer(
081            name(NonRdfSourceDescription.class, "fixity-check-time"));
082
083    static final Histogram contentSizeHistogram =
084            registryService.getMetrics().histogram(name(FedoraBinary.class, "content-size"));
085
086    /**
087     * Wrap an existing Node as a Fedora Binary
088     * @param node the node
089     */
090    public FedoraBinaryImpl(final Node node) {
091        super(node);
092
093        if (node.isNew()) {
094            initializeNewBinaryProperties();
095        }
096    }
097
098    private void initializeNewBinaryProperties() {
099        try {
100            decorateContentNode(node, new HashSet<>());
101        } catch (final RepositoryException e) {
102            LOGGER.warn("Count not decorate {} with FedoraBinary properties: {}", node, e);
103        }
104    }
105
106    @Override
107    public FedoraResource getDescription() {
108        try {
109            return new NonRdfSourceDescriptionImpl(getNode().getParent());
110        } catch (final RepositoryException e) {
111            throw new RepositoryRuntimeException(e);
112        }
113    }
114
115    /*
116         * (non-Javadoc)
117         * @see org.fcrepo.kernel.api.models.FedoraBinary#getContent()
118         */
119    @Override
120    public InputStream getContent() {
121        try {
122            return getBinaryContent().getStream();
123        } catch (final RepositoryException e) {
124            throw new RepositoryRuntimeException(e);
125        }
126    }
127
128    /**
129     * Retrieve the JCR Binary object
130     * @return a JCR-wrapped Binary object
131     */
132    private javax.jcr.Binary getBinaryContent() {
133        try {
134            return getProperty(JCR_DATA).getBinary();
135        } catch (final PathNotFoundException e) {
136            throw new PathNotFoundRuntimeException(e);
137        } catch (final RepositoryException e) {
138            throw new RepositoryRuntimeException(e);
139        }
140    }
141
142    /*
143     * (non-Javadoc)
144     * @see org.fcrepo.kernel.api.models.FedoraBinary#setContent(java.io.InputStream,
145     * java.lang.String, java.net.URI, java.lang.String,
146     * org.fcrepo.kernel.api.services.policy.StoragePolicyDecisionPoint)
147     */
148    @Override
149    public void setContent(final InputStream content, final String contentType,
150                           final Collection<URI> checksums, final String originalFileName,
151                           final StoragePolicyDecisionPoint storagePolicyDecisionPoint)
152            throws InvalidChecksumException {
153
154        try {
155            final Node contentNode = getNode();
156
157            if (contentNode.canAddMixin(FEDORA_BINARY)) {
158                contentNode.addMixin(FEDORA_BINARY);
159            }
160
161            if (contentType != null) {
162                contentNode.setProperty(HAS_MIME_TYPE, contentType);
163            }
164
165            if (originalFileName != null) {
166                contentNode.setProperty(FILENAME, originalFileName);
167            }
168
169            LOGGER.debug("Created content node at path: {}", contentNode.getPath());
170
171            String hint = null;
172
173            if (storagePolicyDecisionPoint != null) {
174                hint = storagePolicyDecisionPoint.evaluatePolicies(this);
175            }
176            final ValueFactory modevf =
177                    (ValueFactory) node.getSession().getValueFactory();
178            final Binary binary = modevf.createBinary(content, hint);
179
180        /*
181         * This next line of code deserves explanation. If we chose for the
182         * simpler line: Property dataProperty =
183         * contentNode.setProperty(JCR_DATA, requestBodyStream); then the JCR
184         * would not block on the stream's completion, and we would return to
185         * the requester before the mutation to the repo had actually completed.
186         * So instead we use createBinary(requestBodyStream), because its
187         * contract specifies: "The passed InputStream is closed before this
188         * method returns either normally or because of an exception." which
189         * lets us block and not return until the job is done! The simpler code
190         * may still be useful to us for an asynchronous method that we develop
191         * later.
192         */
193            final Property dataProperty = contentNode.setProperty(JCR_DATA, binary);
194
195            // Ensure provided checksums are valid
196            final Collection<URI> nonNullChecksums = (null == checksums) ? new HashSet<>() : checksums;
197            verifyChecksums(nonNullChecksums, dataProperty);
198
199            decorateContentNode(contentNode, nonNullChecksums);
200            touch();
201            ((FedoraResourceImpl) getDescription()).touch();
202
203            LOGGER.debug("Created data property at path: {}", dataProperty.getPath());
204
205        } catch (final RepositoryException e) {
206            throw new RepositoryRuntimeException(e);
207        }
208    }
209
210    /**
211     * This method ensures that the arg checksums are valid against the binary associated with the arg dataProperty.
212     * If one or more of the checksums are invalid, an InvalidChecksumException is thrown.
213     *
214     * @param checksums that the user provided
215     * @param dataProperty containing the binary against which the checksums will be verified
216     * @throws InvalidChecksumException
217     * @throws RepositoryException
218     */
219    private void verifyChecksums(final Collection<URI> checksums, final Property dataProperty)
220            throws InvalidChecksumException, RepositoryException {
221
222        final Map<URI, URI> checksumErrors = new HashMap<>();
223
224        // Loop through provided checksums validating against computed values
225        checksums.forEach(checksum -> {
226            final String algorithm = ContentDigest.getAlgorithm(checksum);
227            try {
228                // The case internally supported by ModeShape
229                if (algorithm.equals(SHA1.algorithm)) {
230                    final String dsSHA1 = ((Binary) dataProperty.getBinary()).getHexHash();
231                    final URI dsSHA1Uri = ContentDigest.asURI(SHA1.algorithm, dsSHA1);
232
233                    if (!dsSHA1Uri.equals(checksum)) {
234                        LOGGER.debug("Failed checksum test");
235                        checksumErrors.put(checksum, dsSHA1Uri);
236                    }
237
238                // The case that requires re-computing the checksum
239                } else {
240                    final CacheEntry cacheEntry = CacheEntryFactory.forProperty(dataProperty);
241                    cacheEntry.checkFixity(algorithm).stream().findFirst().ifPresent(
242                            fixityResult -> {
243                                if (!fixityResult.matches(checksum)) {
244                                    LOGGER.debug("Failed checksum test");
245                                    checksumErrors.put(checksum, fixityResult.getComputedChecksum());
246                                }
247                            }
248                    );
249                }
250            } catch (RepositoryException e) {
251                throw new RepositoryRuntimeException(e);
252            }
253        });
254
255        // Throw an exception if any checksum errors occurred
256        if (!checksumErrors.isEmpty()) {
257            final String template = "Checksum Mismatch of %1$s and %2$s\n";
258            final StringBuilder error = new StringBuilder();
259            checksumErrors.forEach((key, value) -> error.append(String.format(template, key, value)));
260            throw new InvalidChecksumException(error.toString());
261        }
262
263    }
264
265    /*
266     * (non-Javadoc)
267     * @see org.fcrepo.kernel.api.models.FedoraBinary#getContentSize()
268     */
269    @Override
270    public long getContentSize() {
271        try {
272            if (hasProperty(CONTENT_SIZE)) {
273                return getProperty(CONTENT_SIZE).getLong();
274            }
275        } catch (final RepositoryException e) {
276            LOGGER.info("Could not get contentSize(): {}", e.getMessage());
277        }
278
279        return -1L;
280    }
281
282    /*
283     * (non-Javadoc)
284     * @see org.fcrepo.kernel.api.models.FedoraBinary#getContentDigest()
285     */
286    @Override
287    public URI getContentDigest() {
288        try {
289            // Determine which digest algorithm to use
290            final String algorithm = hasProperty(DEFAULT_DIGEST_ALGORITHM) ?
291                    property2values.apply(getProperty(DEFAULT_DIGEST_ALGORITHM)).findFirst().get().getString() :
292                    ContentDigest.DEFAULT_ALGORITHM;
293            final String algorithmWithoutStringType = algorithm.replace(FIELD_DELIMITER + XSDstring.getURI(), "");
294
295            if (hasProperty(CONTENT_DIGEST)) {
296                // Select the stored digest that matches the digest algorithm
297                Optional<Value> digestValue = property2values.apply(getProperty(CONTENT_DIGEST)).filter(digest -> {
298                    try {
299                        final URI digestUri = URI.create(digest.getString());
300                        return algorithmWithoutStringType.equalsIgnoreCase(ContentDigest.getAlgorithm(digestUri));
301
302                    } catch (RepositoryException e) {
303                        LOGGER.warn("Exception thrown when getting digest property {}, {}", digest, e.getMessage());
304                        return false;
305                    }
306                }).findFirst();
307
308                // Success, return the digest value
309                if (digestValue.isPresent()) {
310                    return URI.create(digestValue.get().getString());
311                }
312            }
313            LOGGER.warn("No digest value was found to match the algorithm: {}", algorithmWithoutStringType);
314        } catch (final RepositoryException e) {
315            LOGGER.warn("Could not get content digest: {}", e.getMessage());
316        }
317
318        return ContentDigest.missingChecksum();
319    }
320
321    /*
322     * (non-Javadoc)
323     * @see org.fcrepo.kernel.api.models.FedoraBinary#getMimeType()
324     */
325    @Override
326    public String getMimeType() {
327        try {
328            if (hasProperty(HAS_MIME_TYPE)) {
329                return getProperty(HAS_MIME_TYPE).getString().replace(FIELD_DELIMITER + XSDstring.getURI(), "");
330            }
331            return "application/octet-stream";
332        } catch (final RepositoryException e) {
333            throw new RepositoryRuntimeException(e);
334        }
335    }
336
337    /*
338     * (non-Javadoc)
339     * @see org.fcrepo.kernel.api.models.FedoraBinary#getFilename()
340     */
341    @Override
342    public String getFilename() {
343        try {
344            if (hasProperty(FILENAME)) {
345                return getProperty(FILENAME).getString().replace(FIELD_DELIMITER + XSDstring.getURI(), "");
346            }
347            return node.getParent().getName();
348        } catch (final RepositoryException e) {
349            throw new RepositoryRuntimeException(e);
350        }
351    }
352
353    @Override
354    public RdfStream getFixity(final IdentifierConverter<Resource, FedoraResource> idTranslator) {
355        return getFixity(idTranslator, getContentDigest(), getContentSize());
356    }
357
358    @Override
359    public RdfStream getFixity(final IdentifierConverter<Resource, FedoraResource> idTranslator,
360                               final URI digestUri,
361                               final long size) {
362
363        fixityCheckCounter.inc();
364
365        try (final Timer.Context context = timer.time()) {
366
367            LOGGER.debug("Checking resource: " + getPath());
368
369            final String algorithm = ContentDigest.getAlgorithm(digestUri);
370
371            final long contentSize = size < 0 ? getBinaryContent().getSize() : size;
372
373            final Collection<FixityResult> fixityResults
374                    = CacheEntryFactory.forProperty(getProperty(JCR_DATA)).checkFixity(algorithm);
375
376            return new FixityRdfContext(this, idTranslator, fixityResults, digestUri, contentSize);
377        } catch (final RepositoryException e) {
378            throw new RepositoryRuntimeException(e);
379        }
380    }
381
382    /**
383     * When deleting the binary, we also need to clean up the description document.
384     */
385    @Override
386    public void delete() {
387        final FedoraResource description = getDescription();
388
389        super.delete();
390
391        description.delete();
392    }
393
394    @Override
395    public FedoraResource getBaseVersion() {
396        return getDescription().getBaseVersion();
397    }
398
399    private static void decorateContentNode(final Node contentNode, final Collection<URI> checksums)
400            throws RepositoryException {
401        if (contentNode == null) {
402            LOGGER.warn("{} node appears to be null!", JCR_CONTENT);
403            return;
404        }
405        if (contentNode.canAddMixin(FEDORA_BINARY)) {
406            contentNode.addMixin(FEDORA_BINARY);
407        }
408
409        if (contentNode.hasProperty(JCR_DATA)) {
410            final Property dataProperty = contentNode.getProperty(JCR_DATA);
411            final Binary binary = (Binary) dataProperty.getBinary();
412            final String dsChecksum = binary.getHexHash();
413
414            contentSizeHistogram.update(dataProperty.getLength());
415
416            checksums.add(ContentDigest.asURI(SHA1.algorithm, dsChecksum));
417
418            final String[] checksumArray = new String[checksums.size()];
419            checksums.stream().map(Object::toString).collect(Collectors.toSet()).toArray(checksumArray);
420
421            contentNode.setProperty(CONTENT_DIGEST, checksumArray);
422            contentNode.setProperty(CONTENT_SIZE, dataProperty.getLength());
423
424            LOGGER.debug("Decorated data property at path: {}", dataProperty.getPath());
425        }
426    }
427
428    @Override
429    public boolean isVersioned() {
430        return getDescription().isVersioned();
431    }
432
433    @Override
434    public void enableVersioning() {
435        super.enableVersioning();
436        getDescription().enableVersioning();
437    }
438
439    @Override
440    public void disableVersioning() {
441        super.disableVersioning();
442        getDescription().disableVersioning();
443    }
444
445    /**
446     * Check if the given node is a Fedora binary
447     * @param node the given node
448     * @return whether the given node is a Fedora binary
449     */
450    public static boolean hasMixin(final Node node) {
451        return isFedoraBinary.test(node);
452    }
453}