001/* 002 * Copyright 2015 DuraSpace, Inc. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package org.fcrepo.connector.file; 017 018import static java.lang.System.currentTimeMillis; 019import static org.fcrepo.kernel.api.FedoraTypes.CONTENT_DIGEST; 020import static org.fcrepo.kernel.api.FedoraTypes.CONTENT_SIZE; 021import static org.fcrepo.kernel.api.FedoraTypes.FEDORA_BINARY; 022import static org.fcrepo.kernel.api.FedoraTypes.FEDORA_CONTAINER; 023import static org.fcrepo.kernel.api.FedoraTypes.FEDORA_NON_RDF_SOURCE_DESCRIPTION; 024import static org.fcrepo.kernel.api.FedoraTypes.FEDORA_RESOURCE; 025import static org.fcrepo.kernel.api.utils.ContentDigest.asURI; 026import static org.fcrepo.kernel.modeshape.FedoraJcrConstants.JCR_CREATED; 027import static org.fcrepo.kernel.modeshape.FedoraJcrConstants.JCR_LASTMODIFIED; 028import static org.modeshape.jcr.api.JcrConstants.JCR_DATA; 029import static org.modeshape.jcr.api.JcrConstants.NT_FILE; 030import static org.modeshape.jcr.api.JcrConstants.NT_FOLDER; 031import static org.modeshape.jcr.api.JcrConstants.NT_RESOURCE; 032 033import java.io.File; 034import java.net.URI; 035import java.util.Date; 036import java.util.HashMap; 037import java.io.IOException; 038import java.util.Map; 039 040import com.google.common.annotations.VisibleForTesting; 041 042import org.fcrepo.kernel.api.exception.RepositoryRuntimeException; 043import org.infinispan.schematic.document.Document; 044import org.modeshape.connector.filesystem.FileSystemConnector; 045import org.modeshape.jcr.api.value.DateTime; 046import org.modeshape.jcr.api.nodetype.NodeTypeManager; 047import org.modeshape.jcr.spi.federation.DocumentChanges; 048import org.modeshape.jcr.spi.federation.DocumentReader; 049import org.modeshape.jcr.spi.federation.DocumentWriter; 050import org.modeshape.jcr.value.BinaryValue; 051import org.modeshape.jcr.value.Name; 052import org.modeshape.jcr.value.Property; 053import org.modeshape.jcr.value.basic.BasicSingleValueProperty; 054import org.slf4j.Logger; 055import org.slf4j.LoggerFactory; 056 057import javax.jcr.NamespaceRegistry; 058import javax.jcr.RepositoryException; 059 060/** 061 * This class extends the {@link FileSystemConnector} to enable the autocreation of Fedora-specific datastream and 062 * content properties. 063 * 064 * @author Andrew Woods 065 * Date: 1/30/14 066 */ 067public class FedoraFileSystemConnector extends FileSystemConnector { 068 069 private static final Logger LOGGER = LoggerFactory.getLogger(FedoraFileSystemConnector.class); 070 071 private static final String DELIMITER = "/"; 072 private static final String JCR_CONTENT = "jcr:content"; 073 private static final String JCR_CONTENT_SUFFIX = DELIMITER + JCR_CONTENT; 074 075 /** 076 * The string path for a {@link File} object that represents the top-level directory in which properties are 077 * stored. This is optional for this connector, but if set allows properties to be cached (greatly 078 * improving performance) for even read-only connectors. When this property is specified the extraPropertiesStore 079 * should be null (not specified) as it would be overridden by this. 080 */ 081 private String propertiesDirectoryPath; 082 private File propertiesDirectory; 083 084 @Override 085 public void initialize(final NamespaceRegistry registry, 086 final NodeTypeManager nodeTypeManager) throws IOException { 087 try { 088 super.initialize(registry, nodeTypeManager); 089 } catch (final RepositoryException e) { 090 throw new RepositoryRuntimeException("Error initializing FedoraFileSystemConnector!", e); 091 } 092 093 if (propertiesDirectoryPath != null) { 094 propertiesDirectory = new File(propertiesDirectoryPath); 095 if (!propertiesDirectory.exists() || !propertiesDirectory.isDirectory()) { 096 throw new RepositoryRuntimeException("Configured \"propertiesDirectory\", " + propertiesDirectoryPath 097 + ", does not exist or is not a directory."); 098 } else if ( !propertiesDirectory.canRead() || !propertiesDirectory.canWrite() ) { 099 throw new RepositoryRuntimeException("Configured \"propertiesDirectory\", " + propertiesDirectoryPath 100 + ", should be readable and writable."); 101 } 102 if (extraPropertiesStore() != null) { 103 LOGGER.warn("Extra properties store was specified but won't be used!"); 104 } 105 setExtraPropertiesStore(new ExternalJsonSidecarExtraPropertyStore(this, translator(), propertiesDirectory)); 106 } 107 } 108 109 /** 110 * This method returns the object/document for the node with the federated arg 'id'. 111 * 112 * Additionally, this method adds Fedora datastream and content properties to the result of the parent class 113 * implementation. 114 */ 115 @Override 116 public Document getDocumentById(final String id) { 117 LOGGER.debug("Getting Federated document: {}", id); 118 if (null == id || id.isEmpty()) { 119 LOGGER.warn("Can not get document with null id"); 120 return null; 121 } 122 123 final Document doc = super.getDocumentById(id); 124 if ( doc == null ) { 125 LOGGER.debug("Non-existent node, document is null: {}", id); 126 return doc; 127 } 128 129 final DocumentReader docReader = readDocument(doc); 130 final DocumentWriter docWriter = writeDocument(doc); 131 final long lastmod = fileFor(id).lastModified(); 132 LOGGER.debug("Adding lastModified={}", lastmod); 133 docWriter.addProperty(JCR_LASTMODIFIED, lastmod); 134 135 final String primaryType = docReader.getPrimaryTypeName(); 136 137 if (!docReader.getMixinTypeNames().contains(FEDORA_RESOURCE)) { 138 LOGGER.trace("Adding mixin: {}, to {}", FEDORA_RESOURCE, id); 139 docWriter.addMixinType(FEDORA_RESOURCE); 140 } 141 142 // Is Fedora Datastream? 143 if (primaryType.equals(NT_FILE)) { 144 decorateDatastreamNode(docReader, docWriter); 145 146 // Is Fedora Content? 147 } else if (primaryType.equals(NT_RESOURCE)) { 148 decorateContentNode(docReader, docWriter, fileFor(id)); 149 150 // Is Fedora Object? 151 } else if (primaryType.equals(NT_FOLDER)) { 152 decorateObjectNode(docReader, docWriter); 153 } 154 155 return docWriter.document(); 156 } 157 158 /** 159 * Checks whether internally managed properties can and should be stored to 160 * an ExtraPropertiesStore. 161 * @return whether internally managed properties can and should be stored to 162 */ 163 protected boolean shouldCacheProperties() { 164 return extraPropertiesStore() != null && (!isReadonly() || this.propertiesDirectory != null); 165 } 166 167 168 /** 169 * Pass-thru to the parent class in order to make this function public 170 * 171 * @param id the node ID to test 172 * @return whether the id corresponds to the root location 173 */ 174 @Override 175 public boolean isRoot(final String id) { 176 return super.isRoot(id); 177 } 178 179 /** 180 * Pass-thru to the parent class in order to make this function public 181 * 182 * @param file the file used to compute a sha1 hash 183 * @return the sha1 hash of the file contents 184 */ 185 @Override 186 public String sha1(final File file) { 187 final String cachedSha1 = getCachedSha1(file); 188 if (cachedSha1 == null) { 189 return computeAndCacheSha1(file); 190 } 191 return cachedSha1; 192 } 193 194 private String getCachedSha1(final File file) { 195 final String id = idFor(file) + JCR_CONTENT_SUFFIX; 196 if (extraPropertiesStore() != null) { 197 final Map<Name, Property> extraProperties = extraPropertiesStore().getProperties(id); 198 final Name digestName = nameFrom(CONTENT_DIGEST); 199 if (extraProperties.containsKey(digestName)) { 200 if (!hasBeenModifiedSincePropertiesWereStored(file, extraProperties.get(nameFrom(JCR_CREATED)))) { 201 LOGGER.trace("Found sha1 for {} in extra properties store.", id); 202 final String uriStr = ((URI) extraProperties.get(digestName).getFirstValue()).toString(); 203 return uriStr.substring(uriStr.indexOf("sha1:") + 5); 204 } 205 } 206 } else { 207 LOGGER.trace("No cache configured to contain object hashes."); 208 } 209 return null; 210 } 211 212 private String computeAndCacheSha1(final File file) { 213 final String id = idFor(file) + JCR_CONTENT_SUFFIX; 214 LOGGER.trace("Computing sha1 for {}.", id); 215 final String sha1 = super.sha1(file); 216 if (shouldCacheProperties()) { 217 final Map<Name, Property> updateMap = new HashMap<>(); 218 final Property digestProperty = new BasicSingleValueProperty(nameFrom(CONTENT_DIGEST), 219 asURI("SHA-1", sha1)); 220 final Property digestDateProperty = new BasicSingleValueProperty(nameFrom(JCR_CREATED), 221 factories().getDateFactory().create(file.lastModified())); 222 updateMap.put(digestProperty.getName(), digestProperty); 223 updateMap.put(digestDateProperty.getName(), digestDateProperty); 224 extraPropertiesStore().updateProperties(id, updateMap); 225 } 226 return sha1; 227 } 228 229 private static void decorateObjectNode(final DocumentReader docReader, final DocumentWriter docWriter) { 230 if (!docReader.getMixinTypeNames().contains(FEDORA_CONTAINER)) { 231 LOGGER.trace("Adding mixin: {}, to {}", FEDORA_CONTAINER, docReader.getDocumentId()); 232 docWriter.addMixinType(FEDORA_CONTAINER); 233 } 234 } 235 236 private static void decorateDatastreamNode(final DocumentReader docReader, final DocumentWriter docWriter) { 237 if (!docReader.getMixinTypeNames().contains(FEDORA_NON_RDF_SOURCE_DESCRIPTION)) { 238 LOGGER.trace("Adding mixin: {}, to {}", FEDORA_NON_RDF_SOURCE_DESCRIPTION, docReader.getDocumentId()); 239 docWriter.addMixinType(FEDORA_NON_RDF_SOURCE_DESCRIPTION); 240 } 241 } 242 243 private static void decorateContentNode(final DocumentReader docReader, 244 final DocumentWriter docWriter, 245 final File file) { 246 if (!docReader.getMixinTypeNames().contains(FEDORA_BINARY)) { 247 LOGGER.trace("Adding mixin: {}, to {}", FEDORA_BINARY, docReader.getDocumentId()); 248 docWriter.addMixinType(FEDORA_BINARY); 249 } 250 251 if (null == docReader.getProperty(CONTENT_DIGEST) 252 || hasBeenModifiedSincePropertiesWereStored(file, docReader.getProperty(JCR_CREATED))) { 253 final BinaryValue binaryValue = getBinaryValue(docReader); 254 final String dsChecksum = binaryValue.getHexHash(); 255 final String dsURI = asURI("SHA-1", dsChecksum).toString(); 256 257 LOGGER.trace("Adding {} property of {} to {}", CONTENT_DIGEST, dsURI, docReader.getDocumentId()); 258 docWriter.addProperty(CONTENT_DIGEST, dsURI); 259 } 260 261 if (null == docReader.getProperty(CONTENT_SIZE)) { 262 final long binarySize = file.length(); 263 LOGGER.trace("Adding {} property of {} to {}", CONTENT_SIZE, binarySize, docReader.getDocumentId()); 264 docWriter.addProperty(CONTENT_SIZE, binarySize); 265 } 266 267 LOGGER.debug("Decorated data property at path: {}", docReader.getDocumentId()); 268 } 269 270 private static boolean hasBeenModifiedSincePropertiesWereStored(final File file, final Property lastModified) { 271 if (lastModified == null) { 272 LOGGER.trace("Hash for {} has not been computed yet.", file.getName()); 273 return true; 274 } 275 final DateTime datetime = (DateTime) lastModified.getFirstValue(); 276 if (datetime.toDate().equals(new Date(file.lastModified()))) { 277 return false; 278 } 279 LOGGER.trace("{} has been modified ({}) since hash was last computed ({}).", file.getName(), 280 new Date(file.lastModified()), datetime.toDate()); 281 return true; 282 } 283 284 private static BinaryValue getBinaryValue(final DocumentReader docReader) { 285 final Property binaryProperty = docReader.getProperty(JCR_DATA); 286 return (BinaryValue) binaryProperty.getFirstValue(); 287 } 288 289 /* Override write operations to also update the parent file's timestamp, so 290 its Last-Modified header correctly reflects changes to children. */ 291 @Override 292 public boolean removeDocument( final String id ) { 293 if ( super.removeDocument(id) ) { 294 touchParent(id); 295 return true; 296 } 297 return false; 298 } 299 300 @Override 301 public void storeDocument( final Document document ) { 302 super.storeDocument( document ); 303 touchParent(readDocument(document).getDocumentId()); 304 } 305 306 @Override 307 public void updateDocument( final DocumentChanges changes ) { 308 super.updateDocument( changes ); 309 touchParent( changes.getDocumentId() ); 310 } 311 312 /** 313 * Find the parent file, and set its timestamp to the current time. This 314 * timestamp will be used for populating the Last-Modified header. 315 * @param id the id 316 **/ 317 protected void touchParent( final String id ) { 318 if (!isRoot(id)) { 319 final File file = fileFor(id); 320 final File parent = file.getParentFile(); 321 parent.setLastModified(currentTimeMillis()); 322 } 323 } 324 325 /* Overriding so unit test can mock. */ 326 @Override 327 @VisibleForTesting 328 protected File fileFor( final String id ) { 329 return super.fileFor(id); 330 } 331 @Override 332 @VisibleForTesting 333 protected DocumentReader readDocument( final Document document ) { 334 return super.readDocument(document); 335 } 336 337 /* Overriding to make the FedoraFileSystemConnector is always read-only. */ 338 @Override 339 public boolean isReadonly() { 340 return true; 341 } 342 343 @Override 344 public boolean isContentNode(final String id) { 345 return super.isContentNode(id); 346 } 347 348}