ATLAS-342: Import Glossary Terms from csv/excel file into Glossary
Signed-off-by: Sarath Subramanian <sarath@apache.org>
This commit is contained in:
parent
a30c6d4e9c
commit
784b606ddf
|
|
@ -201,6 +201,11 @@ public final class Constants {
|
|||
public static final String ATTR_NAME_REPLICATED_FROM = "replicatedFrom";
|
||||
public static final Integer INCOMPLETE_ENTITY_VALUE = Integer.valueOf(1);
|
||||
|
||||
/*
|
||||
* All supported file-format extensions for AtlasGlossaryTerms file upload
|
||||
*/
|
||||
public enum GlossaryImportSupportedFileExtensions { XLSX, XLS, CSV }
|
||||
|
||||
private Constants() {
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -167,6 +167,7 @@ public enum AtlasErrorCode {
|
|||
INVALID_BUSINESS_METADATA_NAME_FOR_ENTITY_TYPE(400, "ATLAS-400-00-095", "Invalid business-metadata: {0} specified for entity, applicable business-metadata: {1}"),
|
||||
BUSINESS_METADATA_ATTRIBUTE_DOES_NOT_EXIST(400, "ATLAS-400-00-096", "Business-metadata attribute does not exist in entity: {0}"),
|
||||
BUSINESS_METADATA_ATTRIBUTE_ALREADY_EXISTS(400, "ATLAS-400-00-097", "Business-metadata attribute already exists in entity: {0}"),
|
||||
INVALID_FILE_TYPE(400, "ATLAS-400-00-98", "The provided file type {0} is not supported."),
|
||||
|
||||
UNAUTHORIZED_ACCESS(403, "ATLAS-403-00-001", "{0} is not authorized to perform {1}"),
|
||||
|
||||
|
|
@ -190,6 +191,7 @@ public enum AtlasErrorCode {
|
|||
INVALID_LINEAGE_ENTITY_TYPE(404, "ATLAS-404-00-011", "Given instance guid {0} with type {1} is not a valid lineage entity type."),
|
||||
INSTANCE_GUID_DELETED(404, "ATLAS-404-00-012", "Given instance guid {0} has been deleted"),
|
||||
NO_PROPAGATED_CLASSIFICATIONS_FOUND_FOR_ENTITY(404, "ATLAS-404-00-013", "No propagated classifications associated with entity: {0}"),
|
||||
NO_DATA_FOUND(404, "ATLAS-404-00-014", "No data found in the uploaded file"),
|
||||
|
||||
// All data conflict errors go here
|
||||
TYPE_ALREADY_EXISTS(409, "ATLAS-409-00-001", "Given type {0} already exists"),
|
||||
|
|
@ -221,7 +223,9 @@ public enum AtlasErrorCode {
|
|||
HIVE_HOOK(500, "ATLAS-500-00-010", "HiveHook: {0}"),
|
||||
HIVE_HOOK_METASTORE_BRIDGE(500, "ATLAS-500-00-011", "HiveHookMetaStoreBridge: {0}"),
|
||||
DATA_ACCESS_LOAD_FAILED(500, "ATLAS-500-00-013", "Load failed: {0}"),
|
||||
ENTITY_NOTIFICATION_FAILED(500, "ATLAS-500-00-014", "Notification failed for operation: {0} : {1}");
|
||||
ENTITY_NOTIFICATION_FAILED(500, "ATLAS-500-00-014", "Notification failed for operation: {0} : {1}"),
|
||||
FAILED_TO_UPLOAD(500, "ATLAS-500-00-015", "Error occurred while uploading the file: {0}"),
|
||||
FAILED_TO_CREATE_GLOSSARY_TERM(500, "ATLAS-500-00-016", "Error occurred while creating glossary term: {0}");
|
||||
|
||||
private String errorCode;
|
||||
private String errorMessage;
|
||||
|
|
|
|||
|
|
@ -27,6 +27,13 @@ public class AtlasGlossaryHeader {
|
|||
private String relationGuid;
|
||||
private String displayText;
|
||||
|
||||
public AtlasGlossaryHeader(String glossaryGuid) {
|
||||
this.glossaryGuid = glossaryGuid;
|
||||
}
|
||||
|
||||
public AtlasGlossaryHeader() {
|
||||
}
|
||||
|
||||
public String getDisplayText() {
|
||||
return displayText;
|
||||
}
|
||||
|
|
|
|||
5
pom.xml
5
pom.xml
|
|
@ -706,6 +706,11 @@
|
|||
<commons-io.version>2.6</commons-io.version>
|
||||
<!-- Apache commons -->
|
||||
|
||||
<!--Apache poi and Open csv-->
|
||||
<opencsv.version>4.6</opencsv.version>
|
||||
<poi.version>3.17</poi.version>
|
||||
<poi-ooxml.version>3.17</poi-ooxml.version>
|
||||
|
||||
<javax-inject.version>1</javax-inject.version>
|
||||
<jettison.version>1.3.7</jettison.version>
|
||||
<paranamer.version>2.7</paranamer.version>
|
||||
|
|
|
|||
|
|
@ -224,6 +224,24 @@
|
|||
<version>${hppc.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.opencsv</groupId>
|
||||
<artifactId>opencsv</artifactId>
|
||||
<version>${opencsv.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.poi</groupId>
|
||||
<artifactId>poi</artifactId>
|
||||
<version>${poi.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.poi</groupId>
|
||||
<artifactId>poi-ooxml</artifactId>
|
||||
<version>${poi-ooxml.version}</version>
|
||||
</dependency>
|
||||
|
||||
</dependencies>
|
||||
|
||||
<profiles>
|
||||
|
|
|
|||
|
|
@ -34,6 +34,7 @@ import org.apache.atlas.repository.store.graph.AtlasRelationshipStore;
|
|||
import org.apache.atlas.repository.store.graph.v2.AtlasEntityChangeNotifier;
|
||||
import org.apache.atlas.repository.store.graph.v2.AtlasGraphUtilsV2;
|
||||
import org.apache.atlas.type.AtlasTypeRegistry;
|
||||
import org.apache.atlas.util.FileUtils;
|
||||
import org.apache.commons.collections.CollectionUtils;
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
import org.slf4j.Logger;
|
||||
|
|
@ -41,6 +42,8 @@ import org.slf4j.LoggerFactory;
|
|||
import org.springframework.stereotype.Service;
|
||||
|
||||
import javax.inject.Inject;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
|
|
@ -51,9 +54,7 @@ import java.util.Objects;
|
|||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import static org.apache.atlas.glossary.GlossaryUtils.getAtlasGlossaryCategorySkeleton;
|
||||
import static org.apache.atlas.glossary.GlossaryUtils.getAtlasGlossaryTermSkeleton;
|
||||
import static org.apache.atlas.glossary.GlossaryUtils.getGlossarySkeleton;
|
||||
import static org.apache.atlas.glossary.GlossaryUtils.*;
|
||||
|
||||
@Service
|
||||
public class GlossaryService {
|
||||
|
|
@ -67,7 +68,7 @@ public class GlossaryService {
|
|||
private final AtlasTypeRegistry atlasTypeRegistry;
|
||||
private final AtlasEntityChangeNotifier entityChangeNotifier;
|
||||
|
||||
private final char[] invalidNameChars = {'@', '.'};
|
||||
private static final char[] invalidNameChars = { '@', '.' };
|
||||
|
||||
@Inject
|
||||
public GlossaryService(DataAccess dataAccess, final AtlasRelationshipStore relationshipStore,
|
||||
|
|
@ -1029,7 +1030,7 @@ public class GlossaryService {
|
|||
termHeaders.forEach(t -> t.setDisplayText(getDisplayText(termMap.get(t.getTermGuid()))));
|
||||
}
|
||||
|
||||
private boolean isNameInvalid(String name) {
|
||||
public static boolean isNameInvalid(String name) {
|
||||
return StringUtils.containsAny(name, invalidNameChars);
|
||||
}
|
||||
|
||||
|
|
@ -1080,4 +1081,37 @@ public class GlossaryService {
|
|||
}
|
||||
}
|
||||
|
||||
public List<AtlasGlossaryTerm> importGlossaryData(InputStream inputStream, String fileName) throws AtlasBaseException {
|
||||
List<AtlasGlossaryTerm> ret;
|
||||
|
||||
try {
|
||||
if (StringUtils.isBlank(fileName)) {
|
||||
throw new AtlasBaseException(AtlasErrorCode.INVALID_FILE_TYPE, fileName);
|
||||
}
|
||||
|
||||
List<String[]> fileData = FileUtils.readFileData(fileName, inputStream);
|
||||
List<String> failedTermMsgs = new ArrayList<>();
|
||||
|
||||
ret = glossaryTermUtils.getGlossaryTermDataList(fileData, failedTermMsgs);
|
||||
ret = createGlossaryTerms(ret);
|
||||
} catch (IOException e) {
|
||||
throw new AtlasBaseException(AtlasErrorCode.FAILED_TO_UPLOAD, e);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
private List<AtlasGlossaryTerm> createGlossaryTerms(List<AtlasGlossaryTerm> glossaryTerms) throws AtlasBaseException {
|
||||
List<AtlasGlossaryTerm> ret = new ArrayList<>();
|
||||
|
||||
for (AtlasGlossaryTerm glossaryTerm : glossaryTerms) {
|
||||
try {
|
||||
ret.add(createTerm(glossaryTerm));
|
||||
} catch (AtlasBaseException e) {
|
||||
throw new AtlasBaseException(AtlasErrorCode.FAILED_TO_CREATE_GLOSSARY_TERM, e);
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -28,19 +28,26 @@ import org.apache.atlas.model.instance.AtlasObjectId;
|
|||
import org.apache.atlas.model.instance.AtlasRelatedObjectId;
|
||||
import org.apache.atlas.model.instance.AtlasRelationship;
|
||||
import org.apache.atlas.model.instance.AtlasStruct;
|
||||
import org.apache.atlas.repository.graphdb.AtlasVertex;
|
||||
import org.apache.atlas.repository.ogm.DataAccess;
|
||||
import org.apache.atlas.repository.store.graph.AtlasRelationshipStore;
|
||||
import org.apache.atlas.repository.store.graph.v2.AtlasGraphUtilsV2;
|
||||
import org.apache.atlas.type.AtlasRelationshipType;
|
||||
import org.apache.atlas.type.AtlasTypeRegistry;
|
||||
import org.apache.atlas.util.FileUtils;
|
||||
import org.apache.commons.collections.CollectionUtils;
|
||||
import org.apache.commons.collections.MapUtils;
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
|
|
@ -522,4 +529,205 @@ public class GlossaryTermUtils extends GlossaryUtils {
|
|||
}
|
||||
}
|
||||
|
||||
protected List<AtlasGlossaryTerm> getGlossaryTermDataList(List<String[]> fileData, List<String> failedTermMsgs) throws AtlasBaseException {
|
||||
List<AtlasGlossaryTerm> glossaryTerms = new ArrayList<>();
|
||||
Map<String, String> glossaryNameCache = new HashMap<>();
|
||||
|
||||
for (String[] record : fileData) {
|
||||
AtlasGlossaryTerm glossaryTerm = new AtlasGlossaryTerm();
|
||||
|
||||
if ((record.length < 1) || StringUtils.isBlank(record[0])) {
|
||||
LOG.error("The GlossaryName is blank for the record : ", Arrays.toString(record));
|
||||
failedTermMsgs.add("The GlossaryName is blank for the record : " + Arrays.toString(record));
|
||||
}
|
||||
|
||||
String glossaryName = record[0];
|
||||
String glossaryGuid;
|
||||
|
||||
if (glossaryNameCache.get(glossaryName) != null) {
|
||||
glossaryGuid = glossaryNameCache.get(glossaryName);
|
||||
|
||||
} else {
|
||||
AtlasVertex vertex = AtlasGraphUtilsV2.findByTypeAndUniquePropertyName(GlossaryUtils.ATLAS_GLOSSARY_TYPENAME, GlossaryUtils.ATLAS_GLOSSARY_TYPENAME + "." + QUALIFIED_NAME_ATTR, glossaryName);
|
||||
|
||||
glossaryGuid = (vertex != null) ? AtlasGraphUtilsV2.getIdFromVertex(vertex) : null;
|
||||
}
|
||||
|
||||
if (glossaryGuid == null) {
|
||||
if (GlossaryService.isNameInvalid(glossaryName)) {
|
||||
LOG.error("The provided Glossary Name is invalid : " + glossaryName);
|
||||
failedTermMsgs.add("The provided Glossary Name is invalid : " + glossaryName);
|
||||
} else {
|
||||
AtlasGlossary glossary = new AtlasGlossary();
|
||||
glossary.setQualifiedName(glossaryName);
|
||||
glossary.setName(glossaryName);
|
||||
|
||||
glossary = dataAccess.save(glossary);
|
||||
glossaryGuid = glossary.getGuid();
|
||||
}
|
||||
}
|
||||
|
||||
if (glossaryGuid != null) {
|
||||
glossaryNameCache.put(glossaryName, glossaryGuid);
|
||||
glossaryTerm = populateGlossaryTermObject(failedTermMsgs, record, glossaryGuid);
|
||||
glossaryTerms.add(glossaryTerm);
|
||||
}
|
||||
}
|
||||
|
||||
if (failedTermMsgs.size() == 0) {
|
||||
return glossaryTerms;
|
||||
} else {
|
||||
throw new AtlasBaseException("The uploaded file has not been processed due to the following errors : " + "\n" + failedTermMsgs.toString());
|
||||
}
|
||||
}
|
||||
|
||||
public static String getGlossaryTermHeaders() {
|
||||
List<String> ret = new ArrayList<>();
|
||||
|
||||
ret.add("GlossaryName");
|
||||
ret.add("TermName");
|
||||
ret.add("ShortDescription");
|
||||
ret.add("LongDescription");
|
||||
ret.add("Examples");
|
||||
ret.add("Abbreviation");
|
||||
ret.add("Usage");
|
||||
ret.add("AdditionalAttributes");
|
||||
ret.add("TranslationTerms");
|
||||
ret.add("ValidValuesFor");
|
||||
ret.add("Synonyms");
|
||||
ret.add("ReplacedBy");
|
||||
ret.add("ValidValues");
|
||||
ret.add("ReplacementTerms");
|
||||
ret.add("SeeAlso");
|
||||
ret.add("TranslatedTerms");
|
||||
ret.add("IsA");
|
||||
ret.add("Antonyms");
|
||||
ret.add("Classifies");
|
||||
ret.add("PreferredToTerms");
|
||||
ret.add("PreferredTerms");
|
||||
|
||||
return String.join(", ", ret);
|
||||
}
|
||||
|
||||
protected Map getMapValue(String csvRecord, List<String> failedTermMsgs) {
|
||||
Map ret = null;
|
||||
|
||||
if (StringUtils.isNotBlank(csvRecord)) {
|
||||
ret = new HashMap<>();
|
||||
String csvRecordArray[] = csvRecord.split(FileUtils.ESCAPE_CHARACTER + FileUtils.PIPE_CHARACTER);
|
||||
String recordArray[];
|
||||
|
||||
for (String record : csvRecordArray) {
|
||||
recordArray = record.split(FileUtils.COLON_CHARACTER);
|
||||
|
||||
if ((recordArray.length % 2) == 0) {
|
||||
ret.put(recordArray[0], recordArray[1]);
|
||||
} else {
|
||||
failedTermMsgs.add("\n" + "The Data in the uploaded file is incorrectly specified : " + csvRecord);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
protected List getListValue(String csvRecord) {
|
||||
List ret = null;
|
||||
|
||||
if (StringUtils.isNotBlank(csvRecord)) {
|
||||
ret = Arrays.asList(csvRecord.split(FileUtils.ESCAPE_CHARACTER + FileUtils.PIPE_CHARACTER));
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
protected Set getAtlasRelatedTermHeaderSet(String csvRecord, String termName, String glossaryName, List<String> failedTermMsgs) {
|
||||
Set ret = null;
|
||||
|
||||
if (StringUtils.isNotBlank(csvRecord)) {
|
||||
ret = new HashSet();
|
||||
String csvRecordArray[] = csvRecord.split(FileUtils.ESCAPE_CHARACTER + FileUtils.PIPE_CHARACTER);
|
||||
AtlasRelatedTermHeader relatedTermHeader;
|
||||
|
||||
for (String data : csvRecordArray) {
|
||||
AtlasVertex vertex = null;
|
||||
String dataArray[] = data.split(FileUtils.ESCAPE_CHARACTER + FileUtils.COLON_CHARACTER);
|
||||
|
||||
if ((dataArray.length % 2) == 0) {
|
||||
vertex = AtlasGraphUtilsV2.findByTypeAndUniquePropertyName(GlossaryUtils.ATLAS_GLOSSARY_TERM_TYPENAME,
|
||||
GlossaryUtils.ATLAS_GLOSSARY_TERM_TYPENAME + invalidNameChars[1] + QUALIFIED_NAME_ATTR, dataArray[1] + invalidNameChars[0] + dataArray[0]);
|
||||
} else {
|
||||
failedTermMsgs.add("\n" + "Either incorrect data specified for Term or Term does not exist : " +termName);
|
||||
}
|
||||
|
||||
if (vertex != null) {
|
||||
String glossaryTermGuid = AtlasGraphUtilsV2.getIdFromVertex(vertex);
|
||||
relatedTermHeader = new AtlasRelatedTermHeader();
|
||||
relatedTermHeader.setTermGuid(glossaryTermGuid);
|
||||
ret.add(relatedTermHeader);
|
||||
} else {
|
||||
failedTermMsgs.add("\n" + "The provided Reference Glossary and TermName does not exist in the system " +
|
||||
dataArray[1] + FileUtils.COLON_CHARACTER + dataArray[0] + " for record with TermName : " + termName + " and GlossaryName : " + glossaryName);
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
protected AtlasGlossaryTerm populateGlossaryTermObject(List<String> failedTermMsgList, String[] record, String glossaryGuid) {
|
||||
AtlasGlossaryTerm ret = new AtlasGlossaryTerm();
|
||||
int i = 0;
|
||||
int length = record.length;
|
||||
|
||||
ret.setName((length > ++i) ? record[i] : null);
|
||||
|
||||
if (!StringUtils.isNotBlank(ret.getName())) {
|
||||
failedTermMsgList.add("\n" + "The TermName is blank for provided record: " + Arrays.toString(record));
|
||||
} else {
|
||||
ret.setShortDescription((length > ++i) ? record[i] : null);
|
||||
|
||||
ret.setLongDescription((length > ++i) ? record[i] : null);
|
||||
|
||||
ret.setExamples((length > ++i) ? (List<String>) getListValue(record[i]) : null);
|
||||
|
||||
ret.setAbbreviation((length > ++i) ? record[i] : null);
|
||||
|
||||
ret.setUsage((length > ++i) ? record[i] : null);
|
||||
|
||||
ret.setAdditionalAttributes(((length > ++i) ? (Map<String, Object>) getMapValue(record[i], failedTermMsgList) : null));
|
||||
|
||||
ret.setTranslationTerms((length > ++i) ? (Set<AtlasRelatedTermHeader>) getAtlasRelatedTermHeaderSet(record[i], ret.getName(), record[0], failedTermMsgList) : null);
|
||||
|
||||
ret.setValidValuesFor((length > ++i) ? (Set<AtlasRelatedTermHeader>) getAtlasRelatedTermHeaderSet(record[i], ret.getName(), record[0], failedTermMsgList) : null);
|
||||
|
||||
ret.setSynonyms((length > ++i) ? (Set<AtlasRelatedTermHeader>) getAtlasRelatedTermHeaderSet(record[i], ret.getName(), record[0], failedTermMsgList) : null);
|
||||
|
||||
ret.setReplacedBy((length > ++i) ? (Set<AtlasRelatedTermHeader>) getAtlasRelatedTermHeaderSet(record[i], ret.getName(), record[0], failedTermMsgList) : null);
|
||||
|
||||
ret.setValidValues((length > ++i) ? (Set<AtlasRelatedTermHeader>) getAtlasRelatedTermHeaderSet(record[i], ret.getName(), record[0], failedTermMsgList) : null);
|
||||
|
||||
ret.setReplacementTerms((length > ++i) ? (Set<AtlasRelatedTermHeader>) getAtlasRelatedTermHeaderSet(record[i], ret.getName(), record[0], failedTermMsgList) : null);
|
||||
|
||||
ret.setSeeAlso((length > ++i) ? (Set<AtlasRelatedTermHeader>) getAtlasRelatedTermHeaderSet(record[i], ret.getName(), record[0], failedTermMsgList) : null);
|
||||
|
||||
ret.setTranslatedTerms((length > ++i) ? (Set<AtlasRelatedTermHeader>) getAtlasRelatedTermHeaderSet(record[i], ret.getName(), record[0], failedTermMsgList) : null);
|
||||
|
||||
ret.setIsA((length > ++i) ? (Set<AtlasRelatedTermHeader>) getAtlasRelatedTermHeaderSet(record[i], ret.getName(), record[0], failedTermMsgList) : null);
|
||||
|
||||
ret.setAnchor(new AtlasGlossaryHeader(glossaryGuid));
|
||||
|
||||
ret.setAntonyms((length > ++i) ? (Set<AtlasRelatedTermHeader>) getAtlasRelatedTermHeaderSet(record[i], ret.getName(), record[0], failedTermMsgList) : null);
|
||||
|
||||
ret.setClassifies((length > ++i) ? (Set<AtlasRelatedTermHeader>) getAtlasRelatedTermHeaderSet(record[i], ret.getName(), record[0], failedTermMsgList) : null);
|
||||
|
||||
ret.setPreferredToTerms((length > ++i) ? (Set<AtlasRelatedTermHeader>) getAtlasRelatedTermHeaderSet(record[i], ret.getName(), record[0], failedTermMsgList) : null);
|
||||
|
||||
ret.setPreferredTerms((length > ++i) ? (Set<AtlasRelatedTermHeader>) getAtlasRelatedTermHeaderSet(record[i], ret.getName(), record[0], failedTermMsgList) : null);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -17,7 +17,6 @@
|
|||
*/
|
||||
package org.apache.atlas.glossary;
|
||||
|
||||
import org.apache.atlas.AtlasErrorCode;
|
||||
import org.apache.atlas.exception.AtlasBaseException;
|
||||
import org.apache.atlas.model.glossary.AtlasGlossary;
|
||||
import org.apache.atlas.model.glossary.AtlasGlossaryCategory;
|
||||
|
|
@ -44,6 +43,10 @@ public abstract class GlossaryUtils {
|
|||
static final String ATLAS_GLOSSARY_TERM_TYPENAME = "AtlasGlossaryTerm";
|
||||
static final String ATLAS_GLOSSARY_CATEGORY_TYPENAME = "AtlasGlossaryCategory";
|
||||
|
||||
public static final String NAME = "name";
|
||||
public static final String QUALIFIED_NAME_ATTR = "qualifiedName";
|
||||
public static final char[] invalidNameChars = {'@', '.'};
|
||||
|
||||
// Relation name constants
|
||||
protected static final String ATLAS_GLOSSARY_PREFIX = ATLAS_GLOSSARY_TYPENAME;
|
||||
protected static final String TERM_ANCHOR = ATLAS_GLOSSARY_PREFIX + "TermAnchor";
|
||||
|
|
|
|||
|
|
@ -0,0 +1,126 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
* <p>
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* <p>
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.atlas.util;
|
||||
|
||||
import com.opencsv.CSVReader;
|
||||
import org.apache.atlas.AtlasErrorCode;
|
||||
import org.apache.atlas.exception.AtlasBaseException;
|
||||
import org.apache.commons.collections.CollectionUtils;
|
||||
import org.apache.commons.io.FilenameUtils;
|
||||
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
|
||||
import org.apache.poi.ss.usermodel.Cell;
|
||||
import org.apache.poi.ss.usermodel.Row;
|
||||
import org.apache.poi.ss.usermodel.Sheet;
|
||||
import org.apache.poi.ss.usermodel.Workbook;
|
||||
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
import static org.apache.atlas.repository.Constants.GlossaryImportSupportedFileExtensions.*;
|
||||
|
||||
public class FileUtils {
|
||||
public static final String PIPE_CHARACTER = "|";
|
||||
public static final String COLON_CHARACTER = ":";
|
||||
public static final String ESCAPE_CHARACTER = "\\";
|
||||
|
||||
public static List<String[]> readFileData(String fileName, InputStream inputStream) throws IOException, AtlasBaseException {
|
||||
List<String[]> ret;
|
||||
String extension = FilenameUtils.getExtension(fileName);
|
||||
|
||||
if (extension.equalsIgnoreCase(CSV.name())) {
|
||||
ret = readCSV(inputStream);
|
||||
} else if (extension.equalsIgnoreCase(XLS.name()) || extension.equalsIgnoreCase(XLSX.name())) {
|
||||
ret = readExcel(inputStream, extension);
|
||||
} else {
|
||||
throw new AtlasBaseException(AtlasErrorCode.INVALID_FILE_TYPE);
|
||||
}
|
||||
|
||||
if (CollectionUtils.isEmpty(ret)) {
|
||||
throw new AtlasBaseException(AtlasErrorCode.NO_DATA_FOUND);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
public static List<String[]> readCSV(InputStream inputStream) throws IOException {
|
||||
List<String[]> ret = new ArrayList<>();
|
||||
|
||||
try (CSVReader csvReader = new CSVReader(new InputStreamReader(inputStream))) {
|
||||
String[] header = csvReader.readNext();
|
||||
|
||||
if (header == null || header.length == 0) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
String[] data;
|
||||
|
||||
while ((data = csvReader.readNext()) != null) {
|
||||
if (data.length > 1) {
|
||||
ret.add(data);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
public static List<String[]> readExcel(InputStream inputStream, String extension) throws IOException {
|
||||
List<String[]> ret = new ArrayList<>();
|
||||
Workbook excelBook = extension.equalsIgnoreCase(XLS.name()) ? new HSSFWorkbook(inputStream) : new XSSFWorkbook(inputStream);
|
||||
Sheet excelSheet = excelBook.getSheetAt(0);
|
||||
Iterator itr = excelSheet.rowIterator();
|
||||
Row headerRow = (Row) itr.next();
|
||||
|
||||
if (isRowEmpty(headerRow)) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
while (itr.hasNext()) {
|
||||
Row row = (Row) itr.next();
|
||||
|
||||
if (!isRowEmpty(row)) {
|
||||
String[] data = new String[row.getLastCellNum()];
|
||||
|
||||
for (int i = 0; i < row.getLastCellNum(); i++) {
|
||||
data[i] = (row.getCell(i) != null) ? row.getCell(i).getStringCellValue().trim() : null;
|
||||
}
|
||||
|
||||
ret.add(data);
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
private static boolean isRowEmpty(Row row) {
|
||||
for (int c = row.getFirstCellNum(); c < row.getLastCellNum(); c++) {
|
||||
Cell cell = row.getCell(c);
|
||||
|
||||
if (cell != null && cell.getCellType() != Cell.CELL_TYPE_BLANK) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
|
@ -36,13 +36,14 @@ import org.apache.atlas.model.instance.AtlasRelatedObjectId;
|
|||
import org.apache.atlas.model.instance.EntityMutationResponse;
|
||||
import org.apache.atlas.model.typedef.AtlasClassificationDef;
|
||||
import org.apache.atlas.model.typedef.AtlasTypesDef;
|
||||
import org.apache.atlas.utils.TestLoadModelUtils;
|
||||
import org.apache.atlas.repository.store.graph.AtlasEntityStore;
|
||||
import org.apache.atlas.repository.store.graph.v2.AtlasEntityStream;
|
||||
import org.apache.atlas.store.AtlasTypeDefStore;
|
||||
import org.apache.atlas.type.AtlasTypeRegistry;
|
||||
import org.apache.atlas.utils.AtlasJson;
|
||||
import org.apache.atlas.utils.TestLoadModelUtils;
|
||||
import org.apache.commons.collections.CollectionUtils;
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.testng.SkipException;
|
||||
|
|
@ -52,7 +53,11 @@ import org.testng.annotations.Guice;
|
|||
import org.testng.annotations.Test;
|
||||
|
||||
import javax.inject.Inject;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
|
|
@ -80,13 +85,16 @@ public class GlossaryServiceTest {
|
|||
|
||||
private AtlasRelatedObjectId relatedObjectId;
|
||||
|
||||
public static final String CSV_FILES = "/csvFiles/";
|
||||
public static final String EXCEL_FILES = "/excelFiles/";
|
||||
|
||||
@DataProvider
|
||||
public static Object[][] getGlossaryTermsProvider() {
|
||||
return new Object[][]{
|
||||
// offset, limit, expected
|
||||
{0, -1, 4},
|
||||
{0, -1, 6},
|
||||
{0, 2, 2},
|
||||
{2, 5, 2},
|
||||
{2, 5, 4},
|
||||
};
|
||||
}
|
||||
|
||||
|
|
@ -845,7 +853,6 @@ public class GlossaryServiceTest {
|
|||
} catch (AtlasBaseException e) {
|
||||
fail("RelatedTerm association should've succeeded", e);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Test(dataProvider = "getGlossaryTermsProvider" , groups = "Glossary.GET.postUpdate", dependsOnGroups = "Glossary.UPDATE")
|
||||
|
|
@ -897,7 +904,6 @@ public class GlossaryServiceTest {
|
|||
};
|
||||
}
|
||||
|
||||
|
||||
@Test(dataProvider = "getCategoryTermsProvider", dependsOnGroups = "Glossary.CREATE")
|
||||
public void testGetCategoryTerms(int offset, int limit, int expected) {
|
||||
for (AtlasGlossaryCategory c : Arrays.asList(accountCategory, mortgageCategory)) {
|
||||
|
|
@ -910,4 +916,84 @@ public class GlossaryServiceTest {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetTemplate(){
|
||||
try {
|
||||
String glossaryTermHeaderListAsString = GlossaryTermUtils.getGlossaryTermHeaders();
|
||||
|
||||
assertNotNull(glossaryTermHeaderListAsString);
|
||||
assertEquals(glossaryTermHeaderListAsString,"GlossaryName, TermName, ShortDescription, LongDescription, Examples, Abbreviation, Usage, AdditionalAttributes, TranslationTerms, ValidValuesFor, Synonyms, ReplacedBy, ValidValues, ReplacementTerms, SeeAlso, TranslatedTerms, IsA, Antonyms, Classifies, PreferredToTerms, PreferredTerms");
|
||||
} catch (Exception e) {
|
||||
fail("The Template for Glossary Term should've been a success",e);
|
||||
}
|
||||
}
|
||||
|
||||
@Test( dependsOnGroups = "Glossary.CREATE" )
|
||||
public void testImportGlossaryData(){
|
||||
try {
|
||||
InputStream inputStream = getFile(CSV_FILES,"template_1.csv");
|
||||
List<AtlasGlossaryTerm> atlasGlossaryTermList = glossaryService.importGlossaryData(inputStream,"template_1.csv");
|
||||
|
||||
assertNotNull(atlasGlossaryTermList);
|
||||
assertEquals(atlasGlossaryTermList.size(), 1);
|
||||
|
||||
InputStream inputStream1 = getFile(EXCEL_FILES,"template_1.xlsx");
|
||||
List<AtlasGlossaryTerm> atlasGlossaryTermList1 = glossaryService.importGlossaryData(inputStream1,"template_1.xlsx");
|
||||
|
||||
assertNotNull(atlasGlossaryTermList1);
|
||||
assertEquals(atlasGlossaryTermList1.size(), 1);
|
||||
} catch (AtlasBaseException e){
|
||||
fail("The GlossaryTerm should have been created "+e);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testEmptyFileException() {
|
||||
InputStream inputStream = getFile(CSV_FILES, "empty.csv");
|
||||
|
||||
try {
|
||||
glossaryService.importGlossaryData(inputStream, "empty.csv");
|
||||
fail("Error occurred : Failed to recognize the empty file.");
|
||||
} catch (AtlasBaseException e) {
|
||||
assertEquals(e.getMessage(),"No Data found in the uploaded file !");
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testIncorrectFileException() {
|
||||
InputStream inputStream = getFile(CSV_FILES, "incorrectFile.csv");
|
||||
|
||||
try {
|
||||
glossaryService.importGlossaryData(inputStream, "incorrectFile.csv");
|
||||
fail("Error occurred : Failed to recognize the incorrect file.");
|
||||
} catch (AtlasBaseException e) {
|
||||
assertEquals(e.getMessage(),"The uploaded file has not been processed due to the following errors : \n" +
|
||||
"[\n" +
|
||||
"The provided Reference Glossary and TermName does not exist in the system GentsFootwear: for record with TermName : BankBranch1 and GlossaryName : testBankingGlossary]");
|
||||
}
|
||||
}
|
||||
|
||||
private static InputStream getFile(String subDir, String fileName){
|
||||
final String userDir = System.getProperty("user.dir");
|
||||
String filePath = getTestFilePath(userDir, subDir, fileName);
|
||||
File f = new File(filePath);
|
||||
InputStream fs = null;
|
||||
|
||||
try {
|
||||
fs = new FileInputStream(f);
|
||||
} catch (FileNotFoundException e) {
|
||||
LOG.error("File could not be found at: {}", filePath, e);
|
||||
}
|
||||
|
||||
return fs;
|
||||
}
|
||||
|
||||
private static String getTestFilePath(String startPath, String subDir, String fileName) {
|
||||
if (StringUtils.isNotEmpty(subDir)) {
|
||||
return startPath + "/src/test/resources/" + subDir + "/" + fileName;
|
||||
} else {
|
||||
return startPath + "/src/test/resources/" + fileName;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|
@ -0,0 +1,2 @@
|
|||
GlossaryName, TermName, ShortDescription, LongDescription, Examples, Abbreviation, Usage, AdditionalAttributes, TranslationTerms, ValidValuesFor, Synonyms, ReplacedBy, ValidValues, ReplacementTerms, SeeAlso, TranslatedTerms, IsA, Antonyms, Classifies, PreferredToTerms, PreferredTerms
|
||||
testBankingGlossary,BankBranch1,SD4,LD4,"EXAMPLE","ABBREVIATION","USAGE",,,,,,,,,,,,,":GentsFootwear",
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
GlossaryName, TermName, ShortDescription, LongDescription, Examples, Abbreviation, Usage, AdditionalAttributes, TranslationTerms, ValidValuesFor, Synonyms, ReplacedBy, ValidValues, ReplacementTerms, SeeAlso, TranslatedTerms, IsA, Antonyms, Classifies, PreferredToTerms, PreferredTerms
|
||||
testBankingGlossary,BankBranch,SD4,LD4,"EXAMPLE","ABBREVIATION","USAGE",,,,,,,,,,,,,,
|
||||
|
Binary file not shown.
|
|
@ -17,10 +17,13 @@
|
|||
*/
|
||||
package org.apache.atlas.web.rest;
|
||||
|
||||
import com.sun.jersey.core.header.FormDataContentDisposition;
|
||||
import com.sun.jersey.multipart.FormDataParam;
|
||||
import org.apache.atlas.AtlasErrorCode;
|
||||
import org.apache.atlas.SortOrder;
|
||||
import org.apache.atlas.exception.AtlasBaseException;
|
||||
import org.apache.atlas.glossary.GlossaryService;
|
||||
import org.apache.atlas.glossary.GlossaryTermUtils;
|
||||
import org.apache.atlas.model.glossary.AtlasGlossary;
|
||||
import org.apache.atlas.model.glossary.AtlasGlossaryCategory;
|
||||
import org.apache.atlas.model.glossary.AtlasGlossaryTerm;
|
||||
|
|
@ -35,8 +38,22 @@ import org.slf4j.LoggerFactory;
|
|||
import org.springframework.stereotype.Service;
|
||||
|
||||
import javax.inject.Inject;
|
||||
import javax.ws.rs.*;
|
||||
import javax.ws.rs.Consumes;
|
||||
import javax.ws.rs.DELETE;
|
||||
import javax.ws.rs.DefaultValue;
|
||||
import javax.ws.rs.GET;
|
||||
import javax.ws.rs.POST;
|
||||
import javax.ws.rs.PUT;
|
||||
import javax.ws.rs.Path;
|
||||
import javax.ws.rs.PathParam;
|
||||
import javax.ws.rs.Produces;
|
||||
import javax.ws.rs.QueryParam;
|
||||
import javax.ws.rs.WebApplicationException;
|
||||
import javax.ws.rs.core.MediaType;
|
||||
import javax.ws.rs.core.StreamingOutput;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
|
|
@ -934,4 +951,40 @@ public class GlossaryREST {
|
|||
return ret;
|
||||
}
|
||||
|
||||
}
|
||||
/**
|
||||
* Get sample template for uploading/creating bulk AtlasGlossaryTerm
|
||||
*
|
||||
* @return Template File
|
||||
* @HTTP 400 If the provided fileType is not supported
|
||||
*/
|
||||
@GET
|
||||
@Path("/import/template")
|
||||
@Produces(MediaType.APPLICATION_OCTET_STREAM)
|
||||
public StreamingOutput produceTemplate() {
|
||||
return new StreamingOutput() {
|
||||
@Override
|
||||
public void write(OutputStream outputStream) throws IOException, WebApplicationException {
|
||||
outputStream.write(GlossaryTermUtils.getGlossaryTermHeaders().getBytes());
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Upload glossary file for creating AtlasGlossaryTerms in bulk
|
||||
*
|
||||
* @param inputStream InputStream of file
|
||||
* @param fileDetail FormDataContentDisposition metadata of file
|
||||
* @return
|
||||
* @throws AtlasBaseException
|
||||
* @HTTP 200 If glossary term creation was successful
|
||||
* @HTTP 400 If Glossary term definition has invalid or missing information
|
||||
* @HTTP 409 If Glossary term already exists (duplicate qualifiedName)
|
||||
*/
|
||||
@POST
|
||||
@Path("/import")
|
||||
@Consumes(MediaType.MULTIPART_FORM_DATA)
|
||||
public List<AtlasGlossaryTerm> importGlossaryData(@FormDataParam("file") InputStream inputStream,
|
||||
@FormDataParam("file") FormDataContentDisposition fileDetail) throws AtlasBaseException {
|
||||
return glossaryService.importGlossaryData(inputStream, fileDetail.getFileName());
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue