ATLAS-4225 : Support for Chinese character in Atlas entities

Signed-off-by: Pinal Shah <pinal.shah@freestoneinfotech.com>
This commit is contained in:
chaitali.borole 2024-05-02 11:10:03 +05:30 committed by Pinal Shah
parent 94083a3802
commit f46d1c47ec
10 changed files with 867 additions and 5 deletions

View File

@ -1075,17 +1075,30 @@ public class AtlasStructType extends AtlasType {
}
public static boolean hastokenizeChar(String value) {
if (value != null) {
if (StringUtils.isNotEmpty(value)) {
for (int i = 0; i < value.length(); i++) {
if (hastokenizeChar(value, i)) {
return true;
} else if (hasCJKChar(value,i)) {
return true;
}
}
}
return false;
}
private static boolean hasCJKChar(String value,int i){
char ch = value.charAt(i);
Character.UnicodeBlock block = Character.UnicodeBlock.of(ch);
if (Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS.equals(block) ||
Character.UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS.equals(block) ||
Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A.equals(block) ||
Character.UnicodeBlock.HIRAGANA.equals(block)) {
return true;
}
return false;
}
private static boolean hastokenizeChar(String value, int i) {
char c = value.charAt(i);

View File

@ -49,6 +49,8 @@ public class TestAtlasStructType {
private final AtlasStructType structType;
private final List<Object> validValues;
private final List<Object> invalidValues;
private final List<String> tokenizedValue;
private final List<String> nonTokenizedValue;
{
AtlasAttributeDef multiValuedAttribMinMax = new AtlasAttributeDef();
@ -126,6 +128,31 @@ public class TestAtlasStructType {
invalidValues.add(new HashSet()); // incorrect datatype
invalidValues.add(new ArrayList()); // incorrect datatype
invalidValues.add(new String[] {}); // incorrect datatype
tokenizedValue = new ArrayList<>();
tokenizedValue.add("test[data"); //added special char [
tokenizedValue.add("test]data"); //added special char ]
tokenizedValue.add(""); //single char chinese data
tokenizedValue.add("数据"); //mutiple char chinese data
tokenizedValue.add("test data"); //english words with space
tokenizedValue.add("testdata "); //space after testdata
tokenizedValue.add("私は日本語を話します"); //japanese word
tokenizedValue.add("元帳"); //japanese ledger char
tokenizedValue.add("mydata&"); //added special char &
tokenizedValue.add("test.1data");
tokenizedValue.add("test:1data");
nonTokenizedValue = new ArrayList<>();
nonTokenizedValue.add("test.data");
nonTokenizedValue.add("test:data");
nonTokenizedValue.add("test_data");
nonTokenizedValue.add("test:");
nonTokenizedValue.add("test.");
nonTokenizedValue.add("test_");
nonTokenizedValue.add("レシート");
nonTokenizedValue.add("test");
}
@Test
@ -199,6 +226,16 @@ public class TestAtlasStructType {
}
}
@Test
public void testTokenizeChar() {
for (String valid : tokenizedValue) {
assertTrue(AtlasStructType.AtlasAttribute.hastokenizeChar(valid));
}
for (String invalid : nonTokenizedValue) {
assertFalse(AtlasStructType.AtlasAttribute.hastokenizeChar(invalid));
}
}
private static AtlasStructType getStructType(AtlasStructDef structDef) {
try {
return new AtlasStructType(structDef, ModelTestUtil.getTypesRegistry());

View File

@ -466,6 +466,16 @@ public class EntityDiscoveryService implements AtlasDiscoveryService {
@Override
@GraphTransaction
public AtlasSearchResult searchWithParameters(SearchParameters searchParameters) throws AtlasBaseException {
String query = searchParameters.getQuery();
if (StringUtils.isNotEmpty(query)) {
String modifiedString = StringUtils.strip(query, "*");
if (AtlasStructType.AtlasAttribute.hastokenizeChar(modifiedString)) {
searchParameters.setQuery(modifiedString);
}
}
return searchWithSearchContext(new SearchContext(searchParameters, typeRegistry, graph, indexer.getVertexIndexKeys()));
}

View File

@ -142,8 +142,8 @@ public class EntityGraphMapper {
private static final String CUSTOM_ATTRIBUTE_KEY_SPECIAL_PREFIX = AtlasConfiguration.CUSTOM_ATTRIBUTE_KEY_SPECIAL_PREFIX.getString();
private static final String CLASSIFICATION_NAME_DELIMITER = "|";
private static final Pattern CUSTOM_ATTRIBUTE_KEY_REGEX = Pattern.compile("^[a-zA-Z0-9_-]*$");
private static final Pattern LABEL_REGEX = Pattern.compile("^[a-zA-Z0-9_-]*$");
private static final Pattern CUSTOM_ATTRIBUTE_KEY_REGEX = Pattern.compile("^[\u2E80-\u2FD5\u3190-\u319f\u3400-\u4DBF\u4E00-\u9FCC\uF900-\uFAADa-zA-Z0-9_-]*$");
private static final Pattern LABEL_REGEX = Pattern.compile("^[\u2E80-\u2FD5\u3190-\u319f\u3400-\u4DBF\u4E00-\u9FCC\uF900-\uFAADa-zA-Z0-9_-]*$");
private static final int CUSTOM_ATTRIBUTE_KEY_MAX_LENGTH = AtlasConfiguration.CUSTOM_ATTRIBUTE_KEY_MAX_LENGTH.getInt();
private static final int CUSTOM_ATTRIBUTE_VALUE_MAX_LENGTH = AtlasConfiguration.CUSTOM_ATTRIBUTE_VALUE_MAX_LENGTH.getInt();

View File

@ -56,6 +56,7 @@ public abstract class BasicTestSetup extends AtlasTestBase {
private static final String STORAGE_DESC_TYPE = "hive_storagedesc";
private static final String VIEW_TYPE = "hive_process";
protected static final String DATASET_SUBTYPE = "Asset";
protected static final String HDFS_PATH = "hdfs_path";
//Classification type //
public static final String DIMENSION_CLASSIFICATION = "Dimension";
@ -94,6 +95,7 @@ public abstract class BasicTestSetup extends AtlasTestBase {
protected void setupTestData() {
loadBaseModels();
loadHiveDataset();
loadFsDataset();
loadEmployeeDataset();
assignGlossary();
}
@ -148,6 +150,18 @@ public abstract class BasicTestSetup extends AtlasTestBase {
}
}
protected void loadFsDataset() {
if (!baseLoaded) {
loadBaseModels();
}
try {
loadModelFromJson("1000-Hadoop/1020-fs_model.json", typeDefStore, typeRegistry);
} catch (IOException | AtlasBaseException e) {
fail("Fs model setup is required for test to run!");
}
}
protected void loadEmployeeDataset() {
if (!baseLoaded) {
loadBaseModels();
@ -342,7 +356,8 @@ public abstract class BasicTestSetup extends AtlasTestBase {
new AtlasClassificationDef(PII_CLASSIFICATION, "PII Classification", "1.0"),
new AtlasClassificationDef(METRIC_CLASSIFICATION, "Metric Classification", "1.0"),
new AtlasClassificationDef(ETL_CLASSIFICATION, "ETL Classification", "1.0"),
new AtlasClassificationDef(JDBC_CLASSIFICATION, "JdbcAccess Classification", "1.0"),
new AtlasClassificationDef(JDBC_CLASSIFICATION, "JdbcAccess Classification", "1.0",
Arrays.asList(new AtlasStructDef.AtlasAttributeDef("attr1","string"))),
new AtlasClassificationDef(LOGDATA_CLASSIFICATION, "LogData Classification", "1.0"),
new AtlasClassificationDef(DIMENSIONAL_CLASSIFICATION,"Dimensional Classification", "1.0" ,
Arrays.asList(new AtlasStructDef.AtlasAttributeDef("attr1","string"))));
@ -759,4 +774,24 @@ public abstract class BasicTestSetup extends AtlasTestBase {
}
return entities;
}
public void addLabels(){
for(AtlasEntityHeader entity : hiveEntities.getCreatedEntities()) {
if (entity.getTypeName().equals(HIVE_TABLE_TYPE)){
Set<String> labels = new HashSet<>();
labels.add("你好");
try {
entityStore.setLabels(entity.getGuid(), labels);
} catch (AtlasBaseException e) {
fail("Failed to add Labels for Chinese characters to entity");
}
break;
}
}
}
}

View File

@ -42,6 +42,7 @@ import org.testng.Assert;
import org.testng.annotations.*;
import javax.inject.Inject;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
@ -69,6 +70,7 @@ public class AtlasDiscoveryServiceTest extends BasicTestSetup {
createDimensionalTaggedEntity("sales");
createSpecialCharTestEntities();
setupRelationshipTestData();
createJapaneseEntityWithDescription();
}
/* TermSearchProcessor(TSP),
@ -1047,6 +1049,78 @@ public class AtlasDiscoveryServiceTest extends BasicTestSetup {
assertEquals(sr.getRelations().size(), 4);
}
@Test
public void cjkCharQuickSearch() throws AtlasBaseException {
addLabels();
String searchValue = "";
int expected = 1;
QuickSearchParameters params = new QuickSearchParameters();
params.setQuery(searchValue);
params.setLimit(5);
params.setOffset(0);
AtlasQuickSearchResult searchResult = discoveryService.quickSearch(params);
assertSearchResult(searchResult.getSearchResults(), expected, searchValue);
}
@Test(dependsOnMethods = "cjkCharQuickSearch")
public void cjkCharBasicSearch() throws AtlasBaseException {
String searchValue = "你好";
int expected = 1;
SearchParameters params = new SearchParameters();
params.setQuery(searchValue);
params.setLimit(5);
params.setOffset(0);
AtlasSearchResult searchResult = discoveryService.searchWithParameters(params);
assertSearchResult(searchResult, expected, searchValue);
}
@Test
public void japaneseReceiptStarSearch() throws AtlasBaseException {
String searchValue = "レシート";
int expected = 1;
SearchParameters params = new SearchParameters();
params.setQuery(searchValue);
params.setTypeName(HDFS_PATH);
params.setLimit(5);
params.setOffset(0);
AtlasSearchResult searchResult = discoveryService.searchWithParameters(params);
assertSearchResult(searchResult, expected, searchValue);
}
@Test
public void japaneseLedgerSearch() throws AtlasBaseException {
String searchValue = "元帳";
int expected = 2;
SearchParameters params = new SearchParameters();
params.setQuery(searchValue);
params.setLimit(5);
params.setOffset(0);
AtlasSearchResult searchResult = discoveryService.searchWithParameters(params);
assertSearchResult(searchResult, expected, searchValue);
}
@Test
public void japaneseLedgerStarSearch() throws AtlasBaseException {
String searchValue = "の台*";
int expected = 1;
SearchParameters params = new SearchParameters();
params.setQuery(searchValue);
params.setLimit(5);
params.setOffset(0);
AtlasSearchResult searchResult = discoveryService.searchWithParameters(params);
assertSearchResult(searchResult, expected, searchValue);
}
private String gethiveTableSalesFactGuid() throws AtlasBaseException {
if (salesFactGuid == null) {
SearchParameters params = new SearchParameters();
@ -1105,6 +1179,21 @@ public class AtlasDiscoveryServiceTest extends BasicTestSetup {
entityStore.addClassification(Arrays.asList(guid), new AtlasClassification(DIMENSIONAL_CLASSIFICATION, attr));
}
private void createJapaneseEntityWithDescription() throws AtlasBaseException {
AtlasEntity entity = new AtlasEntity(HDFS_PATH);
entity.setAttribute("name", "元帳");
entity.setAttribute(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, "レシート");
entity.setAttribute("path", "レシート");
entityStore.createOrUpdate(new AtlasEntityStream(new AtlasEntity.AtlasEntitiesWithExtInfo(entity)), false);
AtlasEntity entity2 = new AtlasEntity(HDFS_PATH);
entity2.setAttribute("name", "cjk");
entity2.setAttribute(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, "の台帳");
entity2.setAttribute("path", "cjk");
entityStore.createOrUpdate(new AtlasEntityStream(new AtlasEntity.AtlasEntitiesWithExtInfo(entity2)), false);
}
private void assertSearchProcessorWithoutMarker(SearchParameters params, int expected) throws AtlasBaseException {
assertSearchProcessor(params, expected, false);

View File

@ -64,6 +64,8 @@ public class ClassificationSearchProcessorTest extends BasicTestSetup {
private int dimensionTagEntities = 10;
private String dimensionTagDeleteGuid;
private String dimensionalTagGuid;
private String CJKGUID1;
private String CJKGUID2;
@BeforeClass
public void setup() throws Exception {
@ -72,6 +74,8 @@ public class ClassificationSearchProcessorTest extends BasicTestSetup {
setupTestData();
createDimensionTaggedEntityAndDelete();
createDimensionalTaggedEntityWithAttr();
createChineseEntityWithClassificationSingleChar();
createChineseEntityWithClassificationMultipleChar();
}
@Test(priority = -1)
@ -356,6 +360,360 @@ public class ClassificationSearchProcessorTest extends BasicTestSetup {
}
private void createChineseEntityWithClassificationSingleChar() throws AtlasBaseException {
AtlasEntity entityToDelete = new AtlasEntity(HDFS_PATH);
entityToDelete.setAttribute("name", "h1");
entityToDelete.setAttribute(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, "h1qualified");
entityToDelete.setAttribute("path", "h1");
List<AtlasClassification> cls = new ArrayList<>();
cls.add(new AtlasClassification(JDBC_CLASSIFICATION, new HashMap<String, Object>() {{
put("attr1", "");
}}));
entityToDelete.setClassifications(cls);
//create entity
final EntityMutationResponse response = entityStore.createOrUpdate(new AtlasEntityStream(new AtlasEntity.AtlasEntitiesWithExtInfo(entityToDelete)), false);
AtlasEntityHeader entityHeader = response.getCreatedEntities().get(0);
CJKGUID1 = entityHeader.getGuid();
}
private void createChineseEntityWithClassificationMultipleChar() throws AtlasBaseException {
AtlasEntity entityToDelete = new AtlasEntity(HDFS_PATH);
entityToDelete.setAttribute("name", "h2");
entityToDelete.setAttribute(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, "h2qualified");
entityToDelete.setAttribute("path", "h2");
List<AtlasClassification> cls = new ArrayList<>();
cls.add(new AtlasClassification(JDBC_CLASSIFICATION, new HashMap<String, Object>() {{
put("attr1", "数据");
}}));
entityToDelete.setClassifications(cls);
//create entity
final EntityMutationResponse response = entityStore.createOrUpdate(new AtlasEntityStream(new AtlasEntity.AtlasEntitiesWithExtInfo(entityToDelete)), false);
AtlasEntityHeader entityHeader = response.getCreatedEntities().get(0);
CJKGUID2 = entityHeader.getGuid();
}
//Equals with single char
@Test
public void searchJapaneseChineseByTagEquals() throws AtlasBaseException {
SearchParameters params = new SearchParameters();
params.setClassification(JDBC_CLASSIFICATION);
FilterCriteria filterCriteria = getSingleFilterCondition("attr1", Operator.EQ, "");
params.setTagFilters(filterCriteria);
params.setLimit(20);
SearchContext context = new SearchContext(params, typeRegistry, graph, indexer.getVertexIndexKeys());
ClassificationSearchProcessor processor = new ClassificationSearchProcessor(context);
List<AtlasVertex> vertices = processor.execute();
Assert.assertTrue(CollectionUtils.isNotEmpty(vertices));
assertEquals(vertices.size(), 1);
List<String> guids = vertices.stream().map(g -> {
try {
return entityRetriever.toAtlasEntityHeader(g).getGuid();
} catch (AtlasBaseException e) {
fail("Failure in mapping vertex to AtlasEntityHeader");
}
return "";
}).collect(Collectors.toList());
Assert.assertTrue(guids.contains(CJKGUID1));
}
//NEQ with single char
@Test
public void searchJapaneseChineseByTagNotEquals() throws AtlasBaseException {
SearchParameters params = new SearchParameters();
params.setClassification(JDBC_CLASSIFICATION);
FilterCriteria filterCriteria = getSingleFilterCondition("attr1", Operator.NEQ, "");
params.setTagFilters(filterCriteria);
params.setLimit(20);
SearchContext context = new SearchContext(params, typeRegistry, graph, indexer.getVertexIndexKeys());
ClassificationSearchProcessor processor = new ClassificationSearchProcessor(context);
List<AtlasVertex> vertices = processor.execute();
Assert.assertTrue(CollectionUtils.isNotEmpty(vertices));
assertEquals(vertices.size(), 1);
List<String> guids = vertices.stream().map(g -> {
try {
return entityRetriever.toAtlasEntityHeader(g).getGuid();
} catch (AtlasBaseException e) {
fail("Failure in mapping vertex to AtlasEntityHeader");
}
return "";
}).collect(Collectors.toList());
Assert.assertTrue(guids.contains(CJKGUID2));
}
//Contains with single char
@Test
public void searchJapaneseChineseByTagContains() throws AtlasBaseException {
SearchParameters params = new SearchParameters();
params.setClassification(JDBC_CLASSIFICATION);
FilterCriteria filterCriteria = getSingleFilterCondition("attr1", Operator.CONTAINS, "");
params.setTagFilters(filterCriteria);
params.setLimit(20);
SearchContext context = new SearchContext(params, typeRegistry, graph, indexer.getVertexIndexKeys());
ClassificationSearchProcessor processor = new ClassificationSearchProcessor(context);
List<AtlasVertex> vertices = processor.execute();
Assert.assertTrue(CollectionUtils.isNotEmpty(vertices));
assertEquals(vertices.size(), 1);
List<String> guids = vertices.stream().map(g -> {
try {
return entityRetriever.toAtlasEntityHeader(g).getGuid();
} catch (AtlasBaseException e) {
fail("Failure in mapping vertex to AtlasEntityHeader");
}
return "";
}).collect(Collectors.toList());
Assert.assertTrue(guids.contains(CJKGUID1));
}
//Begins with single char
@Test
public void searchJapaneseChineseByTagBeginswith() throws AtlasBaseException {
SearchParameters params = new SearchParameters();
params.setClassification(JDBC_CLASSIFICATION);
FilterCriteria filterCriteria = getSingleFilterCondition("attr1", Operator.STARTS_WITH, "");
params.setTagFilters(filterCriteria);
params.setLimit(20);
SearchContext context = new SearchContext(params, typeRegistry, graph, indexer.getVertexIndexKeys());
ClassificationSearchProcessor processor = new ClassificationSearchProcessor(context);
List<AtlasVertex> vertices = processor.execute();
Assert.assertTrue(CollectionUtils.isNotEmpty(vertices));
assertEquals(vertices.size(), 1);
List<String> guids = vertices.stream().map(g -> {
try {
return entityRetriever.toAtlasEntityHeader(g).getGuid();
} catch (AtlasBaseException e) {
fail("Failure in mapping vertex to AtlasEntityHeader");
}
return "";
}).collect(Collectors.toList());
Assert.assertTrue(guids.contains(CJKGUID1));
}
//ENDS with single char
@Test
public void searchJapaneseChineseByTagEndsWith() throws AtlasBaseException {
SearchParameters params = new SearchParameters();
params.setClassification(JDBC_CLASSIFICATION);
FilterCriteria filterCriteria = getSingleFilterCondition("attr1", Operator.ENDS_WITH, "");
params.setTagFilters(filterCriteria);
params.setLimit(20);
SearchContext context = new SearchContext(params, typeRegistry, graph, indexer.getVertexIndexKeys());
ClassificationSearchProcessor processor = new ClassificationSearchProcessor(context);
List<AtlasVertex> vertices = processor.execute();
Assert.assertTrue(CollectionUtils.isNotEmpty(vertices));
assertEquals(vertices.size(), 1);
List<String> guids = vertices.stream().map(g -> {
try {
return entityRetriever.toAtlasEntityHeader(g).getGuid();
} catch (AtlasBaseException e) {
fail("Failure in mapping vertex to AtlasEntityHeader");
}
return "";
}).collect(Collectors.toList());
Assert.assertTrue(guids.contains(CJKGUID1));
}
//ISNULL check
@Test
public void searchJapaneseChineseByTagISNULL() throws AtlasBaseException {
SearchParameters params = new SearchParameters();
params.setClassification(JDBC_CLASSIFICATION);
FilterCriteria filterCriteria = getSingleFilterCondition("attr1", Operator.IS_NULL, "");
params.setTagFilters(filterCriteria);
params.setLimit(20);
SearchContext context = new SearchContext(params, typeRegistry, graph, indexer.getVertexIndexKeys());
ClassificationSearchProcessor processor = new ClassificationSearchProcessor(context);
List<AtlasVertex> vertices = processor.execute();
Assert.assertTrue(CollectionUtils.isNotEmpty(vertices));
assertEquals(vertices.size(), 2); //customer_dim_view, product_dim_view entities
}
//ISNOT NULL CHECK
@Test
public void searchJapaneseChineseByTagISNOTNULL() throws AtlasBaseException {
SearchParameters params = new SearchParameters();
params.setClassification(JDBC_CLASSIFICATION);
FilterCriteria filterCriteria = getSingleFilterCondition("attr1", Operator.NOT_NULL, "");
params.setTagFilters(filterCriteria);
params.setLimit(20);
SearchContext context = new SearchContext(params, typeRegistry, graph, indexer.getVertexIndexKeys());
ClassificationSearchProcessor processor = new ClassificationSearchProcessor(context);
List<AtlasVertex> vertices = processor.execute();
Assert.assertTrue(CollectionUtils.isNotEmpty(vertices));
assertEquals(vertices.size(), 2);
List<String> guids = vertices.stream().map(g -> {
try {
return entityRetriever.toAtlasEntityHeader(g).getGuid();
} catch (AtlasBaseException e) {
fail("Failure in mapping vertex to AtlasEntityHeader");
}
return "";
}).collect(Collectors.toList());
Assert.assertTrue(guids.contains(CJKGUID1));
}
//Equals with Multiple char
@Test
public void searchJapaneseChineseByTagEqualsMultiple() throws AtlasBaseException {
SearchParameters params = new SearchParameters();
params.setClassification(JDBC_CLASSIFICATION);
FilterCriteria filterCriteria = getSingleFilterCondition("attr1", Operator.EQ, "数据");
params.setTagFilters(filterCriteria);
params.setLimit(20);
SearchContext context = new SearchContext(params, typeRegistry, graph, indexer.getVertexIndexKeys());
ClassificationSearchProcessor processor = new ClassificationSearchProcessor(context);
List<AtlasVertex> vertices = processor.execute();
Assert.assertTrue(CollectionUtils.isNotEmpty(vertices));
assertEquals(vertices.size(), 1);
List<String> guids = vertices.stream().map(g -> {
try {
return entityRetriever.toAtlasEntityHeader(g).getGuid();
} catch (AtlasBaseException e) {
fail("Failure in mapping vertex to AtlasEntityHeader");
}
return "";
}).collect(Collectors.toList());
Assert.assertTrue(guids.contains(CJKGUID2));
}
//NEQ with Multiple char
@Test
public void searchJapaneseChineseByTagNotEqualsMultiple() throws AtlasBaseException {
SearchParameters params = new SearchParameters();
params.setClassification(JDBC_CLASSIFICATION);
FilterCriteria filterCriteria = getSingleFilterCondition("attr1", Operator.NEQ, "数据");
params.setTagFilters(filterCriteria);
params.setLimit(20);
SearchContext context = new SearchContext(params, typeRegistry, graph, indexer.getVertexIndexKeys());
ClassificationSearchProcessor processor = new ClassificationSearchProcessor(context);
List<AtlasVertex> vertices = processor.execute();
Assert.assertTrue(CollectionUtils.isNotEmpty(vertices));
assertEquals(vertices.size(), 1);
List<String> guids = vertices.stream().map(g -> {
try {
return entityRetriever.toAtlasEntityHeader(g).getGuid();
} catch (AtlasBaseException e) {
fail("Failure in mapping vertex to AtlasEntityHeader");
}
return "";
}).collect(Collectors.toList());
Assert.assertTrue(guids.contains(CJKGUID1));
}
//Contains with Multiple char
@Test
public void searchJapaneseChineseByTagContainsMultiple() throws AtlasBaseException {
SearchParameters params = new SearchParameters();
params.setClassification(JDBC_CLASSIFICATION);
FilterCriteria filterCriteria = getSingleFilterCondition("attr1", Operator.CONTAINS, "数据");
params.setTagFilters(filterCriteria);
params.setLimit(20);
SearchContext context = new SearchContext(params, typeRegistry, graph, indexer.getVertexIndexKeys());
ClassificationSearchProcessor processor = new ClassificationSearchProcessor(context);
List<AtlasVertex> vertices = processor.execute();
Assert.assertTrue(CollectionUtils.isNotEmpty(vertices));
assertEquals(vertices.size(), 1);
List<String> guids = vertices.stream().map(g -> {
try {
return entityRetriever.toAtlasEntityHeader(g).getGuid();
} catch (AtlasBaseException e) {
fail("Failure in mapping vertex to AtlasEntityHeader");
}
return "";
}).collect(Collectors.toList());
Assert.assertTrue(guids.contains(CJKGUID2));
}
//Begins with Multiple char
@Test
public void searchJapaneseChineseByTagBeginsWithMultiple() throws AtlasBaseException {
SearchParameters params = new SearchParameters();
params.setClassification(JDBC_CLASSIFICATION);
FilterCriteria filterCriteria = getSingleFilterCondition("attr1", Operator.STARTS_WITH, "数据");
params.setTagFilters(filterCriteria);
params.setLimit(20);
SearchContext context = new SearchContext(params, typeRegistry, graph, indexer.getVertexIndexKeys());
ClassificationSearchProcessor processor = new ClassificationSearchProcessor(context);
List<AtlasVertex> vertices = processor.execute();
Assert.assertTrue(CollectionUtils.isNotEmpty(vertices));
assertEquals(vertices.size(), 1);
List<String> guids = vertices.stream().map(g -> {
try {
return entityRetriever.toAtlasEntityHeader(g).getGuid();
} catch (AtlasBaseException e) {
fail("Failure in mapping vertex to AtlasEntityHeader");
}
return "";
}).collect(Collectors.toList());
Assert.assertTrue(guids.contains(CJKGUID2));
}
//ENDS with single char
@Test
public void searchJapaneseChineseByTagEndsWithMultiple() throws AtlasBaseException {
SearchParameters params = new SearchParameters();
params.setClassification(JDBC_CLASSIFICATION);
FilterCriteria filterCriteria = getSingleFilterCondition("attr1", Operator.ENDS_WITH, "数据");
params.setTagFilters(filterCriteria);
params.setLimit(20);
SearchContext context = new SearchContext(params, typeRegistry, graph, indexer.getVertexIndexKeys());
ClassificationSearchProcessor processor = new ClassificationSearchProcessor(context);
List<AtlasVertex> vertices = processor.execute();
Assert.assertTrue(CollectionUtils.isNotEmpty(vertices));
assertEquals(vertices.size(), 1);
List<String> guids = vertices.stream().map(g -> {
try {
return entityRetriever.toAtlasEntityHeader(g).getGuid();
} catch (AtlasBaseException e) {
fail("Failure in mapping vertex to AtlasEntityHeader");
}
return "";
}).collect(Collectors.toList());
Assert.assertTrue(guids.contains(CJKGUID2));
}
@AfterClass
public void teardown() throws Exception {
AtlasGraphProvider.cleanup();

View File

@ -18,18 +18,25 @@
package org.apache.atlas.discovery;
import com.google.common.collect.Sets;
import org.apache.atlas.AtlasClient;
import org.apache.atlas.BasicTestSetup;
import org.apache.atlas.SortOrder;
import org.apache.atlas.TestModules;
import org.apache.atlas.exception.AtlasBaseException;
import org.apache.atlas.model.discovery.SearchParameters;
import org.apache.atlas.model.instance.AtlasClassification;
import org.apache.atlas.model.instance.AtlasEntity;
import org.apache.atlas.model.instance.AtlasEntityHeader;
import org.apache.atlas.model.instance.EntityMutationResponse;
import org.apache.atlas.repository.Constants;
import org.apache.atlas.repository.graph.AtlasGraphProvider;
import org.apache.atlas.repository.graph.GraphBackedSearchIndexer;
import org.apache.atlas.repository.graphdb.AtlasGraph;
import org.apache.atlas.repository.graphdb.AtlasVertex;
import org.apache.atlas.repository.store.graph.v2.AtlasEntityStream;
import org.apache.atlas.repository.store.graph.v2.EntityGraphRetriever;
import org.apache.atlas.type.AtlasTypeRegistry;
import org.testng.Assert;
import org.testng.annotations.AfterClass;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Guice;
@ -40,6 +47,7 @@ import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Calendar;
import java.util.GregorianCalendar;
@ -50,6 +58,7 @@ import org.slf4j.LoggerFactory;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertTrue;
import static org.testng.Assert.fail;
@Guice(modules = TestModules.TestOnlyModule.class)
public class EntitySearchProcessorTest extends BasicTestSetup {
@ -65,11 +74,17 @@ public class EntitySearchProcessorTest extends BasicTestSetup {
@Inject
private EntityGraphRetriever entityRetriever;
private String CJKGUID1;
private String CJKGUID2;
@BeforeClass
public void setup() throws Exception {
super.initialize();
setupTestData();
createJapaneseEntityWithDescription();
createChineseEntityWithDescription();
}
@Inject
@ -630,8 +645,172 @@ public class EntitySearchProcessorTest extends BasicTestSetup {
List b =v2.stream().map(v3 -> v3.getProperty(Constants.GUID_PROPERTY_KEY, String.class)).collect(Collectors.toList());
assertTrue(!a.stream().anyMatch(element -> b.contains(element)));
}
//Description EQUALS chinese multiple char
@Test
public void searchChineseDescription() throws AtlasBaseException {
SearchParameters params = new SearchParameters();
params.setTypeName(HDFS_PATH);
SearchParameters.FilterCriteria filterCriteria = getSingleFilterCondition("path", SearchParameters.Operator.EQ, "我说中文");
filterCriteria.getCriterion();
params.setEntityFilters(filterCriteria);
params.setLimit(20);
SearchContext context = new SearchContext(params, typeRegistry, graph, indexer.getVertexIndexKeys());
EntitySearchProcessor processor = new EntitySearchProcessor(context);//
List<AtlasVertex> vertices = processor.execute();
assertEquals(vertices.size(), 1);
List<String> guids = vertices.stream().map(g -> {
try {
return entityRetriever.toAtlasEntityHeader(g).getGuid();
} catch (AtlasBaseException e) {
fail("Failure in mapping vertex to AtlasEntityHeader");
}
return "";
}).collect(Collectors.toList());
Assert.assertTrue(guids.contains(CJKGUID2));
}
//Description contains chinese multiple char
@Test
public void searchChineseDescriptionCONTAINS() throws AtlasBaseException {
SearchParameters params = new SearchParameters();
params.setTypeName(HDFS_PATH);
SearchParameters.FilterCriteria filterCriteria = getSingleFilterCondition("path", SearchParameters.Operator.CONTAINS, "我说中文");
filterCriteria.getCriterion();
params.setEntityFilters(filterCriteria);
params.setLimit(20);
SearchContext context = new SearchContext(params, typeRegistry, graph, indexer.getVertexIndexKeys());
EntitySearchProcessor processor = new EntitySearchProcessor(context);//
List<AtlasVertex> vertices = processor.execute();
assertEquals(vertices.size(), 1);
List<String> guids = vertices.stream().map(g -> {
try {
return entityRetriever.toAtlasEntityHeader(g).getGuid();
} catch (AtlasBaseException e) {
fail("Failure in mapping vertex to AtlasEntityHeader");
}
return "";
}).collect(Collectors.toList());
Assert.assertTrue(guids.contains(CJKGUID2));
}
//Description contains japanese
@Test
public void searchJapaneseDescription() throws AtlasBaseException {
SearchParameters params = new SearchParameters();
params.setTypeName(HDFS_PATH);
SearchParameters.FilterCriteria filterCriteria = getSingleFilterCondition("path", SearchParameters.Operator.EQ, "私は日本語を話します");
filterCriteria.getCriterion();
params.setEntityFilters(filterCriteria);
params.setLimit(20);
SearchContext context = new SearchContext(params, typeRegistry, graph, indexer.getVertexIndexKeys());
EntitySearchProcessor processor = new EntitySearchProcessor(context);//
List<AtlasVertex> vertices = processor.execute();
assertEquals(vertices.size(), 1);
List<String> guids = vertices.stream().map(g -> {
try {
return entityRetriever.toAtlasEntityHeader(g).getGuid();
} catch (AtlasBaseException e) {
fail("Failure in mapping vertex to AtlasEntityHeader");
}
return "";
}).collect(Collectors.toList());
Assert.assertTrue(guids.contains(CJKGUID1));
}
@Test
public void searchWithQualifiedNameEQ() throws AtlasBaseException {
SearchParameters params = new SearchParameters();
params.setTypeName(HDFS_PATH);
SearchParameters.FilterCriteria filterCriteria = getSingleFilterCondition("qualifiedName", SearchParameters.Operator.EQ, "h3qualified");
filterCriteria.getCriterion();
params.setEntityFilters(filterCriteria);
params.setLimit(20);
SearchContext context = new SearchContext(params, typeRegistry, graph, indexer.getVertexIndexKeys());
EntitySearchProcessor processor = new EntitySearchProcessor(context);//
List<AtlasVertex> vertices = processor.execute();
assertEquals(vertices.size(), 1);
List<String> guids = vertices.stream().map(g -> {
try {
return entityRetriever.toAtlasEntityHeader(g).getGuid();
} catch (AtlasBaseException e) {
fail("Failure in mapping vertex to AtlasEntityHeader");
}
return "";
}).collect(Collectors.toList());
Assert.assertTrue(guids.contains(CJKGUID1));
}
@Test
public void searchWithNameBeginswith() throws AtlasBaseException {
SearchParameters params = new SearchParameters();
params.setTypeName(HDFS_PATH);
SearchParameters.FilterCriteria filterCriteria = getSingleFilterCondition("name", SearchParameters.Operator.STARTS_WITH, "hdfs");
filterCriteria.getCriterion();
params.setEntityFilters(filterCriteria);
params.setLimit(20);
SearchContext context = new SearchContext(params, typeRegistry, graph, indexer.getVertexIndexKeys());
EntitySearchProcessor processor = new EntitySearchProcessor(context);//
List<AtlasVertex> vertices = processor.execute();
assertEquals(vertices.size(), 1);
List<String> guids = vertices.stream().map(g -> {
try {
return entityRetriever.toAtlasEntityHeader(g).getGuid();
} catch (AtlasBaseException e) {
fail("Failure in mapping vertex to AtlasEntityHeader");
}
return "";
}).collect(Collectors.toList());
Assert.assertTrue(guids.contains(CJKGUID2));
}
private void createJapaneseEntityWithDescription() throws AtlasBaseException {
AtlasEntity entity = new AtlasEntity(HDFS_PATH);
entity.setAttribute("name", "h3");
entity.setAttribute(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, "h3qualified");
entity.setAttribute("path", "私は日本語を話します");
List<AtlasClassification> cls = new ArrayList<>();
cls.add(new AtlasClassification(JDBC_CLASSIFICATION, new HashMap<String, Object>() {{
put("attr1", "attr1");
}}));
entity.setClassifications(cls);
//create entity
final EntityMutationResponse response = entityStore.createOrUpdate(new AtlasEntityStream(new AtlasEntity.AtlasEntitiesWithExtInfo(entity)), false);
AtlasEntityHeader entityHeader = response.getCreatedEntities().get(0);
CJKGUID1 = entityHeader.getGuid();
}
private void createChineseEntityWithDescription() throws AtlasBaseException {
AtlasEntity entity = new AtlasEntity(HDFS_PATH);
entity.setAttribute("name", "hdfs_chinese_test");
entity.setAttribute(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, "hdfs_chinese_test_qualified");
entity.setAttribute("path", "我说中文");
//create entity
final EntityMutationResponse response = entityStore.createOrUpdate(new AtlasEntityStream(new AtlasEntity.AtlasEntitiesWithExtInfo(entity)), false);
AtlasEntityHeader entityHeader = response.getCreatedEntities().get(0);
CJKGUID2 = entityHeader.getGuid();
}
}

View File

@ -18,31 +18,43 @@
package org.apache.atlas.discovery;
import com.google.common.collect.Sets;
import org.apache.atlas.AtlasClient;
import org.apache.atlas.BasicTestSetup;
import org.apache.atlas.SortOrder;
import org.apache.atlas.TestModules;
import org.apache.atlas.exception.AtlasBaseException;
import org.apache.atlas.model.discovery.SearchParameters;
import org.apache.atlas.model.instance.AtlasClassification;
import org.apache.atlas.model.instance.AtlasEntity;
import org.apache.atlas.model.instance.AtlasEntityHeader;
import org.apache.atlas.model.instance.EntityMutationResponse;
import org.apache.atlas.repository.graph.AtlasGraphProvider;
import org.apache.atlas.repository.graphdb.AtlasGraph;
import org.apache.atlas.repository.graphdb.AtlasVertex;
import org.apache.atlas.repository.store.graph.v2.AtlasEntityStream;
import org.apache.atlas.repository.store.graph.v2.AtlasGraphUtilsV2;
import org.apache.atlas.repository.store.graph.v2.EntityGraphRetriever;
import org.apache.atlas.type.AtlasEntityType;
import org.apache.atlas.type.AtlasTypeRegistry;
import org.apache.commons.collections.CollectionUtils;
import org.testng.Assert;
import org.testng.annotations.AfterClass;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Guice;
import org.testng.annotations.Test;
import javax.inject.Inject;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.stream.Collectors;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertNotEquals;
import static org.testng.Assert.assertFalse;
import static org.testng.Assert.assertNotNull;
import static org.testng.Assert.fail;
@Guice(modules = TestModules.TestOnlyModule.class)
public class FreeTextSearchProcessorTest extends BasicTestSetup {
@ -56,11 +68,14 @@ public class FreeTextSearchProcessorTest extends BasicTestSetup {
@Inject
private EntityGraphRetriever entityRetriever;
private String entityGUID;
@BeforeClass
public void setup() throws Exception {
super.initialize();
setupTestData();
createEntityWithQualifiedName();
}
@Test
@ -162,6 +177,103 @@ public class FreeTextSearchProcessorTest extends BasicTestSetup {
}
}
@Test
public void searchQualifiedName() throws AtlasBaseException {
SearchParameters params = new SearchParameters();
params.setQuery("h1qualified*");
params.setExcludeDeletedEntities(true);
params.setLimit(500);
params.setOffset(0);
SearchContext context = new SearchContext(params, typeRegistry, graph, Collections.<String>emptySet());
FreeTextSearchProcessor processor = new FreeTextSearchProcessor(context);
List<AtlasVertex> vertices = processor.execute();
Assert.assertTrue(CollectionUtils.isNotEmpty(vertices));
assertEquals(vertices.size(), 1);
List<String> guids = vertices.stream().map(g -> {
try {
return entityRetriever.toAtlasEntityHeader(g).getGuid();
} catch (AtlasBaseException e) {
fail("Failure in mapping vertex to AtlasEntityHeader");
}
return "";
}).collect(Collectors.toList());
Assert.assertTrue(guids.contains(entityGUID));
}
@Test
public void searchName() throws AtlasBaseException {
SearchParameters params = new SearchParameters();
params.setQuery("h1Name*");
params.setExcludeDeletedEntities(true);
params.setLimit(500);
params.setOffset(0);
SearchContext context = new SearchContext(params, typeRegistry, graph, Collections.<String>emptySet());
FreeTextSearchProcessor processor = new FreeTextSearchProcessor(context);
List<AtlasVertex> vertices = processor.execute();
Assert.assertTrue(CollectionUtils.isNotEmpty(vertices));
assertEquals(vertices.size(), 1);
List<String> guids = vertices.stream().map(g -> {
try {
return entityRetriever.toAtlasEntityHeader(g).getGuid();
} catch (AtlasBaseException e) {
fail("Failure in mapping vertex to AtlasEntityHeader");
}
return "";
}).collect(Collectors.toList());
Assert.assertTrue(guids.contains(entityGUID));
}
@Test
public void searchNameWithStar() throws AtlasBaseException {
SearchParameters params = new SearchParameters();
params.setQuery("*h1*");
params.setExcludeDeletedEntities(true);
params.setLimit(500);
params.setOffset(0);
SearchContext context = new SearchContext(params, typeRegistry, graph, Collections.<String>emptySet());
FreeTextSearchProcessor processor = new FreeTextSearchProcessor(context);
List<AtlasVertex> vertices = processor.execute();
Assert.assertTrue(CollectionUtils.isNotEmpty(vertices));
assertEquals(vertices.size(), 1);
List<String> guids = vertices.stream().map(g -> {
try {
return entityRetriever.toAtlasEntityHeader(g).getGuid();
} catch (AtlasBaseException e) {
fail("Failure in mapping vertex to AtlasEntityHeader");
}
return "";
}).collect(Collectors.toList());
Assert.assertTrue(guids.contains(entityGUID));
}
private void createEntityWithQualifiedName() throws AtlasBaseException {
AtlasEntity entityToDelete = new AtlasEntity(HDFS_PATH);
entityToDelete.setAttribute("name", "h1NameHDFS");
entityToDelete.setAttribute(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, "h1qualifiedNameHDFS");
entityToDelete.setAttribute("path", "h1PathHDFS");
//create entity
final EntityMutationResponse response = entityStore.createOrUpdate(new AtlasEntityStream(new AtlasEntity.AtlasEntitiesWithExtInfo(entityToDelete)), false);
AtlasEntityHeader entityHeader = response.getCreatedEntities().get(0);
entityGUID = entityHeader.getGuid();
}
@AfterClass
public void teardown() throws Exception {
AtlasGraphProvider.cleanup();

View File

@ -1434,4 +1434,33 @@ public class AtlasEntityStoreV2Test extends AtlasEntityTestBase {
fail("The BusinessMetadata Attribute should have been assigned " +e);
}
}
@Test(dependsOnMethods = "deleteLabelsToEntity")
public void testCJKaddLabel() {
Set<String> labels = new HashSet();
labels.add("国家");
try {
AtlasEntity tblEntity = getEntityFromStore(tblEntityGuid);
int count = tblEntity.getLabels().size();
entityStore.setLabels(tblEntityGuid, labels);
tblEntity = getEntityFromStore(tblEntityGuid);
assertEquals(tblEntity.getLabels().size(), count + 1);
} catch (Exception e) {
LOG.error("An error occurred : " + e);
}
}
@Test()
public void addCJKCustomAttributes() throws Exception {
AtlasEntity tblEntity = getEntityFromStore(tblEntityGuid);
Map<String, String> customAttributes = new HashMap<>();
customAttributes.put("国家", "国家");
tblEntity.setCustomAttributes(customAttributes);
entityStore.createOrUpdate(new AtlasEntityStream(tblEntity), false);
tblEntity = getEntityFromStore(tblEntityGuid);
assertEquals(customAttributes, tblEntity.getCustomAttributes());
}
}