001/*
002 * Licensed to DuraSpace under one or more contributor license agreements.
003 * See the NOTICE file distributed with this work for additional information
004 * regarding copyright ownership.
005 *
006 * DuraSpace licenses this file to you under the Apache License,
007 * Version 2.0 (the "License"); you may not use this file except in
008 * compliance with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.fcrepo.kernel.modeshape;
019
020import com.codahale.metrics.Counter;
021import com.codahale.metrics.Histogram;
022import com.codahale.metrics.Timer;
023import org.apache.jena.rdf.model.Resource;
024import org.fcrepo.kernel.api.exception.InvalidChecksumException;
025import org.fcrepo.kernel.api.exception.PathNotFoundRuntimeException;
026import org.fcrepo.kernel.api.exception.RepositoryRuntimeException;
027import org.fcrepo.kernel.api.identifiers.IdentifierConverter;
028import org.fcrepo.kernel.api.models.NonRdfSourceDescription;
029import org.fcrepo.kernel.api.models.FedoraBinary;
030import org.fcrepo.kernel.api.models.FedoraResource;
031import org.fcrepo.kernel.api.services.policy.StoragePolicyDecisionPoint;
032import org.fcrepo.kernel.api.RdfStream;
033import org.fcrepo.kernel.api.utils.CacheEntry;
034import org.fcrepo.kernel.api.utils.ContentDigest;
035import org.fcrepo.kernel.api.utils.FixityResult;
036import org.fcrepo.kernel.modeshape.rdf.impl.FixityRdfContext;
037import org.fcrepo.kernel.modeshape.utils.FedoraTypesUtils;
038import org.fcrepo.kernel.modeshape.utils.impl.CacheEntryFactory;
039import org.fcrepo.metrics.RegistryService;
040import org.modeshape.jcr.api.Binary;
041import org.modeshape.jcr.api.ValueFactory;
042import org.slf4j.Logger;
043
044import javax.jcr.Node;
045import javax.jcr.PathNotFoundException;
046import javax.jcr.Property;
047import javax.jcr.RepositoryException;
048import javax.jcr.Value;
049import java.io.InputStream;
050import java.net.URI;
051import java.util.Collection;
052import java.util.HashMap;
053import java.util.HashSet;
054import java.util.Map;
055import java.util.Optional;
056import java.util.stream.Collectors;
057
058import static com.codahale.metrics.MetricRegistry.name;
059import static org.apache.jena.datatypes.xsd.XSDDatatype.XSDstring;
060import static org.fcrepo.kernel.api.utils.ContentDigest.DIGEST_ALGORITHM.SHA1;
061import static org.fcrepo.kernel.modeshape.FedoraJcrConstants.FIELD_DELIMITER;
062import static org.fcrepo.kernel.modeshape.services.functions.JcrPropertyFunctions.property2values;
063import static org.fcrepo.kernel.modeshape.utils.FedoraTypesUtils.isFedoraBinary;
064import static org.modeshape.jcr.api.JcrConstants.JCR_CONTENT;
065import static org.modeshape.jcr.api.JcrConstants.JCR_DATA;
066import static org.slf4j.LoggerFactory.getLogger;
067
068/**
069 * @author cabeer
070 * @since 9/19/14
071 */
072public class FedoraBinaryImpl extends FedoraResourceImpl implements FedoraBinary {
073
074    private static final Logger LOGGER = getLogger(FedoraBinaryImpl.class);
075
076
077    static final RegistryService registryService = RegistryService.getInstance();
078    static final Counter fixityCheckCounter
079            = registryService.getMetrics().counter(name(FedoraBinary.class, "fixity-check-counter"));
080
081    static final Timer timer = registryService.getMetrics().timer(
082            name(NonRdfSourceDescription.class, "fixity-check-time"));
083
084    static final Histogram contentSizeHistogram =
085            registryService.getMetrics().histogram(name(FedoraBinary.class, "content-size"));
086
087    /**
088     * Wrap an existing Node as a Fedora Binary
089     * @param node the node
090     */
091    public FedoraBinaryImpl(final Node node) {
092        super(node);
093
094        if (node.isNew()) {
095            initializeNewBinaryProperties();
096        }
097    }
098
099    private void initializeNewBinaryProperties() {
100        try {
101            decorateContentNode(node, new HashSet<>());
102        } catch (final RepositoryException e) {
103            LOGGER.warn("Count not decorate {} with FedoraBinary properties: {}", node, e);
104        }
105    }
106
107    @Override
108    public FedoraResource getDescription() {
109        try {
110            return new NonRdfSourceDescriptionImpl(getNode().getParent());
111        } catch (final RepositoryException e) {
112            throw new RepositoryRuntimeException(e);
113        }
114    }
115
116    /*
117         * (non-Javadoc)
118         * @see org.fcrepo.kernel.api.models.FedoraBinary#getContent()
119         */
120    @Override
121    public InputStream getContent() {
122        try {
123            return getBinaryContent().getStream();
124        } catch (final RepositoryException e) {
125            throw new RepositoryRuntimeException(e);
126        }
127    }
128
129    /**
130     * Retrieve the JCR Binary object
131     * @return a JCR-wrapped Binary object
132     */
133    private javax.jcr.Binary getBinaryContent() {
134        try {
135            return getProperty(JCR_DATA).getBinary();
136        } catch (final PathNotFoundException e) {
137            throw new PathNotFoundRuntimeException(e);
138        } catch (final RepositoryException e) {
139            throw new RepositoryRuntimeException(e);
140        }
141    }
142
143    /*
144     * (non-Javadoc)
145     * @see org.fcrepo.kernel.api.models.FedoraBinary#setContent(java.io.InputStream,
146     * java.lang.String, java.net.URI, java.lang.String,
147     * org.fcrepo.kernel.api.services.policy.StoragePolicyDecisionPoint)
148     */
149    @Override
150    public void setContent(final InputStream content, final String contentType,
151                           final Collection<URI> checksums, final String originalFileName,
152                           final StoragePolicyDecisionPoint storagePolicyDecisionPoint)
153            throws InvalidChecksumException {
154
155        try {
156            final Node contentNode = getNode();
157
158            if (contentNode.canAddMixin(FEDORA_BINARY)) {
159                contentNode.addMixin(FEDORA_BINARY);
160            }
161
162            if (contentType != null) {
163                contentNode.setProperty(HAS_MIME_TYPE, contentType);
164            }
165
166            if (originalFileName != null) {
167                contentNode.setProperty(FILENAME, originalFileName);
168            }
169
170            LOGGER.debug("Created content node at path: {}", contentNode.getPath());
171
172            String hint = null;
173
174            if (storagePolicyDecisionPoint != null) {
175                hint = storagePolicyDecisionPoint.evaluatePolicies(this);
176            }
177            final ValueFactory modevf =
178                    (ValueFactory) node.getSession().getValueFactory();
179            final Binary binary = modevf.createBinary(content, hint);
180
181        /*
182         * This next line of code deserves explanation. If we chose for the
183         * simpler line: Property dataProperty =
184         * contentNode.setProperty(JCR_DATA, requestBodyStream); then the JCR
185         * would not block on the stream's completion, and we would return to
186         * the requester before the mutation to the repo had actually completed.
187         * So instead we use createBinary(requestBodyStream), because its
188         * contract specifies: "The passed InputStream is closed before this
189         * method returns either normally or because of an exception." which
190         * lets us block and not return until the job is done! The simpler code
191         * may still be useful to us for an asynchronous method that we develop
192         * later.
193         */
194            final Property dataProperty = contentNode.setProperty(JCR_DATA, binary);
195
196            // Ensure provided checksums are valid
197            final Collection<URI> nonNullChecksums = (null == checksums) ? new HashSet<>() : checksums;
198            verifyChecksums(nonNullChecksums, dataProperty);
199
200            decorateContentNode(contentNode, nonNullChecksums);
201            FedoraTypesUtils.touch(getNode());
202            FedoraTypesUtils.touch(((FedoraResourceImpl) getDescription()).getNode());
203
204            LOGGER.debug("Created data property at path: {}", dataProperty.getPath());
205
206        } catch (final RepositoryException e) {
207            throw new RepositoryRuntimeException(e);
208        }
209    }
210
211    /**
212     * This method ensures that the arg checksums are valid against the binary associated with the arg dataProperty.
213     * If one or more of the checksums are invalid, an InvalidChecksumException is thrown.
214     *
215     * @param checksums that the user provided
216     * @param dataProperty containing the binary against which the checksums will be verified
217     * @throws InvalidChecksumException
218     * @throws RepositoryException
219     */
220    private void verifyChecksums(final Collection<URI> checksums, final Property dataProperty)
221            throws InvalidChecksumException, RepositoryException {
222
223        final Map<URI, URI> checksumErrors = new HashMap<>();
224
225        // Loop through provided checksums validating against computed values
226        checksums.forEach(checksum -> {
227            final String algorithm = ContentDigest.getAlgorithm(checksum);
228            try {
229                // The case internally supported by ModeShape
230                if (algorithm.equals(SHA1.algorithm)) {
231                    final String dsSHA1 = ((Binary) dataProperty.getBinary()).getHexHash();
232                    final URI dsSHA1Uri = ContentDigest.asURI(SHA1.algorithm, dsSHA1);
233
234                    if (!dsSHA1Uri.equals(checksum)) {
235                        LOGGER.debug("Failed checksum test");
236                        checksumErrors.put(checksum, dsSHA1Uri);
237                    }
238
239                // The case that requires re-computing the checksum
240                } else {
241                    final CacheEntry cacheEntry = CacheEntryFactory.forProperty(dataProperty);
242                    cacheEntry.checkFixity(algorithm).stream().findFirst().ifPresent(
243                            fixityResult -> {
244                                if (!fixityResult.matches(checksum)) {
245                                    LOGGER.debug("Failed checksum test");
246                                    checksumErrors.put(checksum, fixityResult.getComputedChecksum());
247                                }
248                            }
249                    );
250                }
251            } catch (RepositoryException e) {
252                throw new RepositoryRuntimeException(e);
253            }
254        });
255
256        // Throw an exception if any checksum errors occurred
257        if (!checksumErrors.isEmpty()) {
258            final String template = "Checksum Mismatch of %1$s and %2$s\n";
259            final StringBuilder error = new StringBuilder();
260            checksumErrors.forEach((key, value) -> error.append(String.format(template, key, value)));
261            throw new InvalidChecksumException(error.toString());
262        }
263
264    }
265
266    /*
267     * (non-Javadoc)
268     * @see org.fcrepo.kernel.api.models.FedoraBinary#getContentSize()
269     */
270    @Override
271    public long getContentSize() {
272        try {
273            if (hasProperty(CONTENT_SIZE)) {
274                return getProperty(CONTENT_SIZE).getLong();
275            }
276        } catch (final RepositoryException e) {
277            LOGGER.info("Could not get contentSize(): {}", e.getMessage());
278        }
279
280        return -1L;
281    }
282
283    /*
284     * (non-Javadoc)
285     * @see org.fcrepo.kernel.api.models.FedoraBinary#getContentDigest()
286     */
287    @Override
288    public URI getContentDigest() {
289        try {
290            // Determine which digest algorithm to use
291            final String algorithm = hasProperty(DEFAULT_DIGEST_ALGORITHM) ?
292                    property2values.apply(getProperty(DEFAULT_DIGEST_ALGORITHM)).findFirst().get().getString() :
293                    ContentDigest.DEFAULT_ALGORITHM;
294            final String algorithmWithoutStringType = algorithm.replace(FIELD_DELIMITER + XSDstring.getURI(), "");
295
296            if (hasProperty(CONTENT_DIGEST)) {
297                // Select the stored digest that matches the digest algorithm
298                Optional<Value> digestValue = property2values.apply(getProperty(CONTENT_DIGEST)).filter(digest -> {
299                    try {
300                        final URI digestUri = URI.create(digest.getString());
301                        return algorithmWithoutStringType.equalsIgnoreCase(ContentDigest.getAlgorithm(digestUri));
302
303                    } catch (RepositoryException e) {
304                        LOGGER.warn("Exception thrown when getting digest property {}, {}", digest, e.getMessage());
305                        return false;
306                    }
307                }).findFirst();
308
309                // Success, return the digest value
310                if (digestValue.isPresent()) {
311                    return URI.create(digestValue.get().getString());
312                }
313            }
314            LOGGER.warn("No digest value was found to match the algorithm: {}", algorithmWithoutStringType);
315        } catch (final RepositoryException e) {
316            LOGGER.warn("Could not get content digest: {}", e.getMessage());
317        }
318
319        return ContentDigest.missingChecksum();
320    }
321
322    /*
323     * (non-Javadoc)
324     * @see org.fcrepo.kernel.api.models.FedoraBinary#getMimeType()
325     */
326    @Override
327    public String getMimeType() {
328        try {
329            if (hasProperty(HAS_MIME_TYPE)) {
330                return getProperty(HAS_MIME_TYPE).getString().replace(FIELD_DELIMITER + XSDstring.getURI(), "");
331            }
332            return "application/octet-stream";
333        } catch (final RepositoryException e) {
334            throw new RepositoryRuntimeException(e);
335        }
336    }
337
338    /*
339     * (non-Javadoc)
340     * @see org.fcrepo.kernel.api.models.FedoraBinary#getFilename()
341     */
342    @Override
343    public String getFilename() {
344        try {
345            if (hasProperty(FILENAME)) {
346                return getProperty(FILENAME).getString().replace(FIELD_DELIMITER + XSDstring.getURI(), "");
347            }
348            return node.getParent().getName();
349        } catch (final RepositoryException e) {
350            throw new RepositoryRuntimeException(e);
351        }
352    }
353
354    @Override
355    public RdfStream getFixity(final IdentifierConverter<Resource, FedoraResource> idTranslator) {
356        return getFixity(idTranslator, getContentDigest(), getContentSize());
357    }
358
359    @Override
360    public RdfStream getFixity(final IdentifierConverter<Resource, FedoraResource> idTranslator,
361                               final URI digestUri,
362                               final long size) {
363
364        fixityCheckCounter.inc();
365
366        try (final Timer.Context context = timer.time()) {
367
368            LOGGER.debug("Checking resource: " + getPath());
369
370            final String algorithm = ContentDigest.getAlgorithm(digestUri);
371
372            final long contentSize = size < 0 ? getBinaryContent().getSize() : size;
373
374            final Collection<FixityResult> fixityResults
375                    = CacheEntryFactory.forProperty(getProperty(JCR_DATA)).checkFixity(algorithm);
376
377            return new FixityRdfContext(this, idTranslator, fixityResults, digestUri, contentSize);
378        } catch (final RepositoryException e) {
379            throw new RepositoryRuntimeException(e);
380        }
381    }
382
383    /**
384     * When deleting the binary, we also need to clean up the description document.
385     */
386    @Override
387    public void delete() {
388        final FedoraResource description = getDescription();
389
390        super.delete();
391
392        description.delete();
393    }
394
395    @Override
396    public FedoraResource getBaseVersion() {
397        return getDescription().getBaseVersion();
398    }
399
400    private static void decorateContentNode(final Node contentNode, final Collection<URI> checksums)
401            throws RepositoryException {
402        if (contentNode == null) {
403            LOGGER.warn("{} node appears to be null!", JCR_CONTENT);
404            return;
405        }
406        if (contentNode.canAddMixin(FEDORA_BINARY)) {
407            contentNode.addMixin(FEDORA_BINARY);
408        }
409
410        if (contentNode.hasProperty(JCR_DATA)) {
411            final Property dataProperty = contentNode.getProperty(JCR_DATA);
412            final Binary binary = (Binary) dataProperty.getBinary();
413            final String dsChecksum = binary.getHexHash();
414
415            contentSizeHistogram.update(dataProperty.getLength());
416
417            checksums.add(ContentDigest.asURI(SHA1.algorithm, dsChecksum));
418
419            final String[] checksumArray = new String[checksums.size()];
420            checksums.stream().map(Object::toString).collect(Collectors.toSet()).toArray(checksumArray);
421
422            contentNode.setProperty(CONTENT_DIGEST, checksumArray);
423            contentNode.setProperty(CONTENT_SIZE, dataProperty.getLength());
424
425            LOGGER.debug("Decorated data property at path: {}", dataProperty.getPath());
426        }
427    }
428
429    @Override
430    public boolean isVersioned() {
431        return getDescription().isVersioned();
432    }
433
434    @Override
435    public void enableVersioning() {
436        super.enableVersioning();
437        getDescription().enableVersioning();
438    }
439
440    @Override
441    public void disableVersioning() {
442        super.disableVersioning();
443        getDescription().disableVersioning();
444    }
445
446    /**
447     * Check if the given node is a Fedora binary
448     * @param node the given node
449     * @return whether the given node is a Fedora binary
450     */
451    public static boolean hasMixin(final Node node) {
452        return isFedoraBinary.test(node);
453    }
454}