001/* 002 * Licensed to DuraSpace under one or more contributor license agreements. 003 * See the NOTICE file distributed with this work for additional information 004 * regarding copyright ownership. 005 * 006 * DuraSpace licenses this file to you under the Apache License, 007 * Version 2.0 (the "License"); you may not use this file except in 008 * compliance with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.fcrepo.kernel.modeshape; 019 020import com.codahale.metrics.Counter; 021import com.codahale.metrics.Histogram; 022import com.codahale.metrics.Timer; 023import org.apache.jena.rdf.model.Resource; 024import org.fcrepo.kernel.api.exception.InvalidChecksumException; 025import org.fcrepo.kernel.api.exception.PathNotFoundRuntimeException; 026import org.fcrepo.kernel.api.exception.RepositoryRuntimeException; 027import org.fcrepo.kernel.api.identifiers.IdentifierConverter; 028import org.fcrepo.kernel.api.models.NonRdfSourceDescription; 029import org.fcrepo.kernel.api.models.FedoraBinary; 030import org.fcrepo.kernel.api.models.FedoraResource; 031import org.fcrepo.kernel.api.services.policy.StoragePolicyDecisionPoint; 032import org.fcrepo.kernel.api.RdfStream; 033import org.fcrepo.kernel.api.utils.CacheEntry; 034import org.fcrepo.kernel.api.utils.ContentDigest; 035import org.fcrepo.kernel.api.utils.FixityResult; 036import org.fcrepo.kernel.modeshape.rdf.impl.FixityRdfContext; 037import org.fcrepo.kernel.modeshape.utils.impl.CacheEntryFactory; 038import org.fcrepo.metrics.RegistryService; 039import org.modeshape.jcr.api.Binary; 040import org.modeshape.jcr.api.ValueFactory; 041import org.slf4j.Logger; 042 043import javax.jcr.Node; 044import javax.jcr.PathNotFoundException; 045import javax.jcr.Property; 046import javax.jcr.RepositoryException; 047import javax.jcr.Value; 048import java.io.InputStream; 049import java.net.URI; 050import java.util.Collection; 051import java.util.HashMap; 052import java.util.HashSet; 053import java.util.Map; 054import java.util.Optional; 055import java.util.stream.Collectors; 056 057import static com.codahale.metrics.MetricRegistry.name; 058import static org.apache.jena.datatypes.xsd.XSDDatatype.XSDstring; 059import static org.fcrepo.kernel.api.utils.ContentDigest.DIGEST_ALGORITHM.SHA1; 060import static org.fcrepo.kernel.modeshape.FedoraJcrConstants.FIELD_DELIMITER; 061import static org.fcrepo.kernel.modeshape.services.functions.JcrPropertyFunctions.property2values; 062import static org.fcrepo.kernel.modeshape.utils.FedoraTypesUtils.isFedoraBinary; 063import static org.modeshape.jcr.api.JcrConstants.JCR_CONTENT; 064import static org.modeshape.jcr.api.JcrConstants.JCR_DATA; 065import static org.slf4j.LoggerFactory.getLogger; 066 067/** 068 * @author cabeer 069 * @since 9/19/14 070 */ 071public class FedoraBinaryImpl extends FedoraResourceImpl implements FedoraBinary { 072 073 private static final Logger LOGGER = getLogger(FedoraBinaryImpl.class); 074 075 076 static final RegistryService registryService = RegistryService.getInstance(); 077 static final Counter fixityCheckCounter 078 = registryService.getMetrics().counter(name(FedoraBinary.class, "fixity-check-counter")); 079 080 static final Timer timer = registryService.getMetrics().timer( 081 name(NonRdfSourceDescription.class, "fixity-check-time")); 082 083 static final Histogram contentSizeHistogram = 084 registryService.getMetrics().histogram(name(FedoraBinary.class, "content-size")); 085 086 /** 087 * Wrap an existing Node as a Fedora Binary 088 * @param node the node 089 */ 090 public FedoraBinaryImpl(final Node node) { 091 super(node); 092 093 if (node.isNew()) { 094 initializeNewBinaryProperties(); 095 } 096 } 097 098 private void initializeNewBinaryProperties() { 099 try { 100 decorateContentNode(node, new HashSet<>()); 101 } catch (final RepositoryException e) { 102 LOGGER.warn("Count not decorate {} with FedoraBinary properties: {}", node, e); 103 } 104 } 105 106 @Override 107 public FedoraResource getDescription() { 108 try { 109 return new NonRdfSourceDescriptionImpl(getNode().getParent()); 110 } catch (final RepositoryException e) { 111 throw new RepositoryRuntimeException(e); 112 } 113 } 114 115 /* 116 * (non-Javadoc) 117 * @see org.fcrepo.kernel.api.models.FedoraBinary#getContent() 118 */ 119 @Override 120 public InputStream getContent() { 121 try { 122 return getBinaryContent().getStream(); 123 } catch (final RepositoryException e) { 124 throw new RepositoryRuntimeException(e); 125 } 126 } 127 128 /** 129 * Retrieve the JCR Binary object 130 * @return a JCR-wrapped Binary object 131 */ 132 private javax.jcr.Binary getBinaryContent() { 133 try { 134 return getProperty(JCR_DATA).getBinary(); 135 } catch (final PathNotFoundException e) { 136 throw new PathNotFoundRuntimeException(e); 137 } catch (final RepositoryException e) { 138 throw new RepositoryRuntimeException(e); 139 } 140 } 141 142 /* 143 * (non-Javadoc) 144 * @see org.fcrepo.kernel.api.models.FedoraBinary#setContent(java.io.InputStream, 145 * java.lang.String, java.net.URI, java.lang.String, 146 * org.fcrepo.kernel.api.services.policy.StoragePolicyDecisionPoint) 147 */ 148 @Override 149 public void setContent(final InputStream content, final String contentType, 150 final Collection<URI> checksums, final String originalFileName, 151 final StoragePolicyDecisionPoint storagePolicyDecisionPoint) 152 throws InvalidChecksumException { 153 154 try { 155 final Node contentNode = getNode(); 156 157 if (contentNode.canAddMixin(FEDORA_BINARY)) { 158 contentNode.addMixin(FEDORA_BINARY); 159 } 160 161 if (contentType != null) { 162 contentNode.setProperty(HAS_MIME_TYPE, contentType); 163 } 164 165 if (originalFileName != null) { 166 contentNode.setProperty(FILENAME, originalFileName); 167 } 168 169 LOGGER.debug("Created content node at path: {}", contentNode.getPath()); 170 171 String hint = null; 172 173 if (storagePolicyDecisionPoint != null) { 174 hint = storagePolicyDecisionPoint.evaluatePolicies(this); 175 } 176 final ValueFactory modevf = 177 (ValueFactory) node.getSession().getValueFactory(); 178 final Binary binary = modevf.createBinary(content, hint); 179 180 /* 181 * This next line of code deserves explanation. If we chose for the 182 * simpler line: Property dataProperty = 183 * contentNode.setProperty(JCR_DATA, requestBodyStream); then the JCR 184 * would not block on the stream's completion, and we would return to 185 * the requester before the mutation to the repo had actually completed. 186 * So instead we use createBinary(requestBodyStream), because its 187 * contract specifies: "The passed InputStream is closed before this 188 * method returns either normally or because of an exception." which 189 * lets us block and not return until the job is done! The simpler code 190 * may still be useful to us for an asynchronous method that we develop 191 * later. 192 */ 193 final Property dataProperty = contentNode.setProperty(JCR_DATA, binary); 194 195 // Ensure provided checksums are valid 196 final Collection<URI> nonNullChecksums = (null == checksums) ? new HashSet<>() : checksums; 197 verifyChecksums(nonNullChecksums, dataProperty); 198 199 decorateContentNode(contentNode, nonNullChecksums); 200 touch(); 201 ((FedoraResourceImpl) getDescription()).touch(); 202 203 LOGGER.debug("Created data property at path: {}", dataProperty.getPath()); 204 205 } catch (final RepositoryException e) { 206 throw new RepositoryRuntimeException(e); 207 } 208 } 209 210 /** 211 * This method ensures that the arg checksums are valid against the binary associated with the arg dataProperty. 212 * If one or more of the checksums are invalid, an InvalidChecksumException is thrown. 213 * 214 * @param checksums that the user provided 215 * @param dataProperty containing the binary against which the checksums will be verified 216 * @throws InvalidChecksumException 217 * @throws RepositoryException 218 */ 219 private void verifyChecksums(final Collection<URI> checksums, final Property dataProperty) 220 throws InvalidChecksumException, RepositoryException { 221 222 final Map<URI, URI> checksumErrors = new HashMap<>(); 223 224 // Loop through provided checksums validating against computed values 225 checksums.forEach(checksum -> { 226 final String algorithm = ContentDigest.getAlgorithm(checksum); 227 try { 228 // The case internally supported by ModeShape 229 if (algorithm.equals(SHA1.algorithm)) { 230 final String dsSHA1 = ((Binary) dataProperty.getBinary()).getHexHash(); 231 final URI dsSHA1Uri = ContentDigest.asURI(SHA1.algorithm, dsSHA1); 232 233 if (!dsSHA1Uri.equals(checksum)) { 234 LOGGER.debug("Failed checksum test"); 235 checksumErrors.put(checksum, dsSHA1Uri); 236 } 237 238 // The case that requires re-computing the checksum 239 } else { 240 final CacheEntry cacheEntry = CacheEntryFactory.forProperty(dataProperty); 241 cacheEntry.checkFixity(algorithm).stream().findFirst().ifPresent( 242 fixityResult -> { 243 if (!fixityResult.matches(checksum)) { 244 LOGGER.debug("Failed checksum test"); 245 checksumErrors.put(checksum, fixityResult.getComputedChecksum()); 246 } 247 } 248 ); 249 } 250 } catch (RepositoryException e) { 251 throw new RepositoryRuntimeException(e); 252 } 253 }); 254 255 // Throw an exception if any checksum errors occurred 256 if (!checksumErrors.isEmpty()) { 257 final String template = "Checksum Mismatch of %1$s and %2$s\n"; 258 final StringBuilder error = new StringBuilder(); 259 checksumErrors.forEach((key, value) -> error.append(String.format(template, key, value))); 260 throw new InvalidChecksumException(error.toString()); 261 } 262 263 } 264 265 /* 266 * (non-Javadoc) 267 * @see org.fcrepo.kernel.api.models.FedoraBinary#getContentSize() 268 */ 269 @Override 270 public long getContentSize() { 271 try { 272 if (hasProperty(CONTENT_SIZE)) { 273 return getProperty(CONTENT_SIZE).getLong(); 274 } 275 } catch (final RepositoryException e) { 276 LOGGER.info("Could not get contentSize(): {}", e.getMessage()); 277 } 278 279 return -1L; 280 } 281 282 /* 283 * (non-Javadoc) 284 * @see org.fcrepo.kernel.api.models.FedoraBinary#getContentDigest() 285 */ 286 @Override 287 public URI getContentDigest() { 288 try { 289 // Determine which digest algorithm to use 290 final String algorithm = hasProperty(DEFAULT_DIGEST_ALGORITHM) ? 291 property2values.apply(getProperty(DEFAULT_DIGEST_ALGORITHM)).findFirst().get().getString() : 292 ContentDigest.DEFAULT_ALGORITHM; 293 final String algorithmWithoutStringType = algorithm.replace(FIELD_DELIMITER + XSDstring.getURI(), ""); 294 295 if (hasProperty(CONTENT_DIGEST)) { 296 // Select the stored digest that matches the digest algorithm 297 Optional<Value> digestValue = property2values.apply(getProperty(CONTENT_DIGEST)).filter(digest -> { 298 try { 299 final URI digestUri = URI.create(digest.getString()); 300 return algorithmWithoutStringType.equalsIgnoreCase(ContentDigest.getAlgorithm(digestUri)); 301 302 } catch (RepositoryException e) { 303 LOGGER.warn("Exception thrown when getting digest property {}, {}", digest, e.getMessage()); 304 return false; 305 } 306 }).findFirst(); 307 308 // Success, return the digest value 309 if (digestValue.isPresent()) { 310 return URI.create(digestValue.get().getString()); 311 } 312 } 313 LOGGER.warn("No digest value was found to match the algorithm: {}", algorithmWithoutStringType); 314 } catch (final RepositoryException e) { 315 LOGGER.warn("Could not get content digest: {}", e.getMessage()); 316 } 317 318 return ContentDigest.missingChecksum(); 319 } 320 321 /* 322 * (non-Javadoc) 323 * @see org.fcrepo.kernel.api.models.FedoraBinary#getMimeType() 324 */ 325 @Override 326 public String getMimeType() { 327 try { 328 if (hasProperty(HAS_MIME_TYPE)) { 329 return getProperty(HAS_MIME_TYPE).getString().replace(FIELD_DELIMITER + XSDstring.getURI(), ""); 330 } 331 return "application/octet-stream"; 332 } catch (final RepositoryException e) { 333 throw new RepositoryRuntimeException(e); 334 } 335 } 336 337 /* 338 * (non-Javadoc) 339 * @see org.fcrepo.kernel.api.models.FedoraBinary#getFilename() 340 */ 341 @Override 342 public String getFilename() { 343 try { 344 if (hasProperty(FILENAME)) { 345 return getProperty(FILENAME).getString().replace(FIELD_DELIMITER + XSDstring.getURI(), ""); 346 } 347 return node.getParent().getName(); 348 } catch (final RepositoryException e) { 349 throw new RepositoryRuntimeException(e); 350 } 351 } 352 353 @Override 354 public RdfStream getFixity(final IdentifierConverter<Resource, FedoraResource> idTranslator) { 355 return getFixity(idTranslator, getContentDigest(), getContentSize()); 356 } 357 358 @Override 359 public RdfStream getFixity(final IdentifierConverter<Resource, FedoraResource> idTranslator, 360 final URI digestUri, 361 final long size) { 362 363 fixityCheckCounter.inc(); 364 365 try (final Timer.Context context = timer.time()) { 366 367 LOGGER.debug("Checking resource: " + getPath()); 368 369 final String algorithm = ContentDigest.getAlgorithm(digestUri); 370 371 final long contentSize = size < 0 ? getBinaryContent().getSize() : size; 372 373 final Collection<FixityResult> fixityResults 374 = CacheEntryFactory.forProperty(getProperty(JCR_DATA)).checkFixity(algorithm); 375 376 return new FixityRdfContext(this, idTranslator, fixityResults, digestUri, contentSize); 377 } catch (final RepositoryException e) { 378 throw new RepositoryRuntimeException(e); 379 } 380 } 381 382 /** 383 * When deleting the binary, we also need to clean up the description document. 384 */ 385 @Override 386 public void delete() { 387 final FedoraResource description = getDescription(); 388 389 super.delete(); 390 391 description.delete(); 392 } 393 394 @Override 395 public FedoraResource getBaseVersion() { 396 return getDescription().getBaseVersion(); 397 } 398 399 private static void decorateContentNode(final Node contentNode, final Collection<URI> checksums) 400 throws RepositoryException { 401 if (contentNode == null) { 402 LOGGER.warn("{} node appears to be null!", JCR_CONTENT); 403 return; 404 } 405 if (contentNode.canAddMixin(FEDORA_BINARY)) { 406 contentNode.addMixin(FEDORA_BINARY); 407 } 408 409 if (contentNode.hasProperty(JCR_DATA)) { 410 final Property dataProperty = contentNode.getProperty(JCR_DATA); 411 final Binary binary = (Binary) dataProperty.getBinary(); 412 final String dsChecksum = binary.getHexHash(); 413 414 contentSizeHistogram.update(dataProperty.getLength()); 415 416 checksums.add(ContentDigest.asURI(SHA1.algorithm, dsChecksum)); 417 418 final String[] checksumArray = new String[checksums.size()]; 419 checksums.stream().map(Object::toString).collect(Collectors.toSet()).toArray(checksumArray); 420 421 contentNode.setProperty(CONTENT_DIGEST, checksumArray); 422 contentNode.setProperty(CONTENT_SIZE, dataProperty.getLength()); 423 424 LOGGER.debug("Decorated data property at path: {}", dataProperty.getPath()); 425 } 426 } 427 428 @Override 429 public boolean isVersioned() { 430 return getDescription().isVersioned(); 431 } 432 433 @Override 434 public void enableVersioning() { 435 super.enableVersioning(); 436 getDescription().enableVersioning(); 437 } 438 439 @Override 440 public void disableVersioning() { 441 super.disableVersioning(); 442 getDescription().disableVersioning(); 443 } 444 445 /** 446 * Check if the given node is a Fedora binary 447 * @param node the given node 448 * @return whether the given node is a Fedora binary 449 */ 450 public static boolean hasMixin(final Node node) { 451 return isFedoraBinary.test(node); 452 } 453}