ATLAS-4267 : Quick Search : AggregationMetrics is incorrect with some special characters

Signed-off-by: Pinal <pinal-shah>
This commit is contained in:
Pinal 2021-04-29 11:20:53 +05:30
parent 7e73fc2591
commit 1366a648d0
4 changed files with 156 additions and 21 deletions

View File

@ -21,6 +21,8 @@ import org.apache.atlas.exception.AtlasBaseException;
import org.apache.atlas.model.discovery.SearchParameters.FilterCriteria;
import org.apache.atlas.model.discovery.SearchParameters.Operator;
import org.apache.atlas.model.instance.AtlasEntity;
import org.apache.atlas.model.typedef.AtlasBaseTypeDef;
import org.apache.atlas.model.typedef.AtlasStructDef;
import org.apache.atlas.repository.Constants;
import org.apache.atlas.type.AtlasEntityType;
import org.apache.atlas.type.AtlasStructType.AtlasAttribute;
@ -32,6 +34,9 @@ import org.slf4j.LoggerFactory;
import java.util.*;
import static org.apache.atlas.repository.Constants.CUSTOM_ATTRIBUTES_PROPERTY_KEY;
import static org.apache.atlas.repository.Constants.CLASSIFICATION_NAMES_KEY;
import static org.apache.atlas.repository.Constants.PROPAGATED_CLASSIFICATION_NAMES_KEY;
import static org.apache.atlas.repository.Constants.LABELS_PROPERTY_KEY;
public class AtlasSolrQueryBuilder {
private static final Logger LOG = LoggerFactory.getLogger(AtlasSolrQueryBuilder.class);
@ -207,7 +212,32 @@ public class AtlasSolrQueryBuilder {
attributeValue = getIndexQueryAttributeValue(attributeValue);
}
withPropertyCondition(sb, indexAttributeName, operator, attributeValue);
if (attributeValue != null) {
attributeValue = attributeValue.trim();
}
//when wildcard search -> escape special Char, don't quote
// when tokenized characters + index field Type TEXT -> remove wildcard '*' from query
//'CONTAINS and NOT_CONTAINS' operator with tokenize char in attributeValue doesn't guarantee correct results
// for aggregationMetrics
boolean replaceWildcardChar = false;
AtlasStructDef.AtlasAttributeDef def = type.getAttributeDef(attributeName);
if (!isPipeSeparatedSystemAttribute(attributeName)
&& isWildCardOperator(operator)
&& def.getTypeName().equalsIgnoreCase(AtlasBaseTypeDef.ATLAS_TYPE_STRING)) {
if (def.getIndexType() == null && AtlasAttribute.hastokenizeChar(attributeValue)) {
replaceWildcardChar = true;
}
attributeValue = AtlasAttribute.escapeIndexQueryValue(attributeValue, false, false);
} else {
attributeValue = AtlasAttribute.escapeIndexQueryValue(attributeValue);
}
withPropertyCondition(sb, indexAttributeName, operator, attributeValue, replaceWildcardChar);
indexAttributes.add(indexAttributeName);
orExpQuery.add(sb);
}
@ -241,12 +271,8 @@ public class AtlasSolrQueryBuilder {
return this;
}
private void withPropertyCondition(StringBuilder queryBuilder, String indexFieldName, Operator operator, String attributeValue) throws AtlasBaseException {
private void withPropertyCondition(StringBuilder queryBuilder, String indexFieldName, Operator operator, String attributeValue, boolean replaceWildCard) throws AtlasBaseException {
if (StringUtils.isNotEmpty(indexFieldName) && operator != null) {
if (attributeValue != null) {
attributeValue = attributeValue.trim();
}
beginCriteria(queryBuilder);
switch (operator) {
@ -257,16 +283,16 @@ public class AtlasSolrQueryBuilder {
withNotEqual(queryBuilder, indexFieldName, attributeValue);
break;
case STARTS_WITH:
withStartsWith(queryBuilder, indexFieldName, attributeValue);
withStartsWith(queryBuilder, indexFieldName, attributeValue, replaceWildCard);
break;
case ENDS_WITH:
withEndsWith(queryBuilder, indexFieldName, attributeValue);
withEndsWith(queryBuilder, indexFieldName, attributeValue, replaceWildCard);
break;
case CONTAINS:
withContains(queryBuilder, indexFieldName, attributeValue);
withContains(queryBuilder, indexFieldName, attributeValue, replaceWildCard);
break;
case NOT_CONTAINS:
withNotContains(queryBuilder, indexFieldName, attributeValue);
withNotContains(queryBuilder, indexFieldName, attributeValue, replaceWildCard);
break;
case IS_NULL:
withIsNull(queryBuilder, indexFieldName);
@ -300,6 +326,20 @@ public class AtlasSolrQueryBuilder {
}
}
private boolean isPipeSeparatedSystemAttribute(String attrName) {
return StringUtils.equals(attrName, CLASSIFICATION_NAMES_KEY) ||
StringUtils.equals(attrName, PROPAGATED_CLASSIFICATION_NAMES_KEY) ||
StringUtils.equals(attrName, LABELS_PROPERTY_KEY) ||
StringUtils.equals(attrName, CUSTOM_ATTRIBUTES_PROPERTY_KEY);
}
private boolean isWildCardOperator(Operator operator) {
return (operator == Operator.CONTAINS ||
operator == Operator.STARTS_WITH ||
operator == Operator.ENDS_WITH ||
operator == Operator.NOT_CONTAINS);
}
private String getIndexQueryAttributeValue(String attributeValue) {
if (StringUtils.isNotEmpty(attributeValue)) {
@ -347,12 +387,14 @@ public class AtlasSolrQueryBuilder {
queryBuilder.append(" )");
}
private void withEndsWith(StringBuilder queryBuilder, String indexFieldName, String attributeValue) {
queryBuilder.append("+").append(indexFieldName).append(":*").append(attributeValue).append(" ");
private void withEndsWith(StringBuilder queryBuilder, String indexFieldName, String attributeValue, boolean replaceWildCard) {
String attrValuePrefix = replaceWildCard ? ":" : ":*";
queryBuilder.append("+").append(indexFieldName).append(attrValuePrefix).append(attributeValue).append(" ");
}
private void withStartsWith(StringBuilder queryBuilder, String indexFieldName, String attributeValue) {
queryBuilder.append("+").append(indexFieldName).append(":").append(attributeValue).append("* ");
private void withStartsWith(StringBuilder queryBuilder, String indexFieldName, String attributeValue, boolean replaceWildCard) {
String attrValuePostfix = replaceWildCard ? " " : "* ";
queryBuilder.append("+").append(indexFieldName).append(":").append(attributeValue).append(attrValuePostfix);
}
private void withNotEqual(StringBuilder queryBuilder, String indexFieldName, String attributeValue) {
@ -391,12 +433,19 @@ public class AtlasSolrQueryBuilder {
queryBuilder.append("+").append(indexFieldName).append(":[ * TO ").append(attributeValue).append(" ] ");
}
private void withContains(StringBuilder queryBuilder, String indexFieldName, String attributeValue) {
queryBuilder.append("+").append(indexFieldName).append(":*").append(attributeValue).append("* ");
private void withContains(StringBuilder queryBuilder, String indexFieldName, String attributeValue, boolean replaceWildCard) {
String attrValuePrefix = replaceWildCard ? ":" : ":*";
String attrValuePostfix = replaceWildCard ? " " : "* ";
queryBuilder.append("+").append(indexFieldName).append(attrValuePrefix).append(attributeValue).append(attrValuePostfix);
}
private void withNotContains(StringBuilder queryBuilder, String indexFieldName, String attributeValue) {
queryBuilder.append("*:* -").append(indexFieldName).append(":*").append(attributeValue).append("* ");
private void withNotContains(StringBuilder queryBuilder, String indexFieldName, String attributeValue, boolean replaceWildCard) {
String attrValuePrefix = replaceWildCard ? ":" : ":*";
String attrValuePostfix = replaceWildCard ? " " : "* ";
queryBuilder.append("*:* -").append(indexFieldName).append(attrValuePrefix).append(attributeValue).append(attrValuePostfix);
}
private void withIsNull(StringBuilder queryBuilder, String indexFieldName) {

View File

@ -20,6 +20,8 @@ package org.apache.atlas.repository.graphdb.janus;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.atlas.exception.AtlasBaseException;
import org.apache.atlas.model.discovery.SearchParameters;
import org.apache.atlas.model.typedef.AtlasBaseTypeDef;
import org.apache.atlas.model.typedef.AtlasStructDef;
import org.apache.atlas.model.typedef.AtlasTypesDef;
import org.apache.atlas.repository.Constants;
import org.apache.atlas.type.AtlasEntityType;
@ -73,6 +75,15 @@ public class AtlasSolrQueryBuilderTest {
@Mock
private AtlasStructType.AtlasAttribute qualifiedNameAttributeMock;
@Mock
private AtlasStructDef.AtlasAttributeDef stringAttributeDef;
@Mock
private AtlasStructDef.AtlasAttributeDef textAttributeDef;
@Mock
private AtlasStructDef.AtlasAttributeDef nonStringAttributeDef;
private Map<String, String> indexFieldNamesMap = new HashMap<>();
@ -89,6 +100,16 @@ public class AtlasSolrQueryBuilderTest {
when(hiveTableEntityTypeMock.getAttribute("Constants.ENTITY_TYPE_PROPERTY_KEY")).thenReturn(entitypeAttributeMock);
when(hiveTableEntityTypeMock.getAttribute("qualifiedName")).thenReturn(qualifiedNameAttributeMock);
when(hiveTableEntityTypeMock.getAttributeDef("name")).thenReturn(stringAttributeDef);
when(hiveTableEntityTypeMock.getAttributeDef("comment")).thenReturn(stringAttributeDef);
when(hiveTableEntityTypeMock.getAttributeDef("description")).thenReturn(stringAttributeDef);
when(hiveTableEntityTypeMock.getAttributeDef("qualifiedName")).thenReturn(textAttributeDef);
when(nonStringAttributeDef.getTypeName()).thenReturn(AtlasBaseTypeDef.ATLAS_TYPE_INT);
when(stringAttributeDef.getTypeName()).thenReturn(AtlasBaseTypeDef.ATLAS_TYPE_STRING);
when(textAttributeDef.getTypeName()).thenReturn(AtlasBaseTypeDef.ATLAS_TYPE_STRING);
when(stringAttributeDef.getIndexType()).thenReturn(AtlasStructDef.AtlasAttributeDef.IndexType.STRING);
indexFieldNamesMap.put("name", "name_index");
indexFieldNamesMap.put("comment", "comment_index");
@ -170,7 +191,7 @@ public class AtlasSolrQueryBuilderTest {
processSearchParameters(fileName, underTest);
Assert.assertEquals(underTest.build(), "+t10 AND -__state_index:DELETED AND +__typeName__index:(hive_table ) AND ( ( +comment_index:*United States* ) AND ( +descrption__index:*nothing* ) AND ( +name_index:*t100* ) )");
Assert.assertEquals(underTest.build(), "+t10 AND -__state_index:DELETED AND +__typeName__index:(hive_table ) AND ( ( +comment_index:*United\\ States* ) AND ( +descrption__index:*nothing* ) AND ( +name_index:*t100* ) )");
}
@Test

View File

@ -487,7 +487,7 @@ public abstract class SearchProcessor {
ret = false;
} else if (operator == SearchParameters.Operator.CONTAINS && AtlasAttribute.hastokenizeChar(attributeValue) && indexType == null) { // indexType = TEXT
if (LOG.isDebugEnabled()) {
LOG.debug("{} operator found for string (TEXT) attribute {} and special characters found in filter value {}, deferring to in-memory or graph query (might cause poor performance)", attributeValue);
LOG.debug("{} operator found for string (TEXT) attribute {} and special characters found in filter value {}, deferring to in-memory or graph query (might cause poor performance)", operator, qualifiedName, attributeValue);
}
ret = false;
@ -808,10 +808,11 @@ public abstract class SearchProcessor {
&& (op == SearchParameters.Operator.CONTAINS || op == SearchParameters.Operator.STARTS_WITH || op == SearchParameters.Operator.ENDS_WITH)
&& def.getTypeName().equalsIgnoreCase(AtlasBaseTypeDef.ATLAS_TYPE_STRING)) {
escapeIndexQueryValue = AtlasAttribute.escapeIndexQueryValue(attrVal, false, false);
if (def.getIndexType() == null && AtlasAttribute.hastokenizeChar(attrVal)) {
replaceWildcardChar = true;
}
escapeIndexQueryValue = AtlasAttribute.escapeIndexQueryValue(attrVal, false, false);
} else {
escapeIndexQueryValue = AtlasAttribute.escapeIndexQueryValue(attrVal);
}

View File

@ -26,6 +26,7 @@ import org.apache.atlas.model.discovery.AtlasQuickSearchResult;
import org.apache.atlas.model.discovery.AtlasSearchResult;
import org.apache.atlas.model.discovery.QuickSearchParameters;
import org.apache.atlas.model.discovery.SearchParameters;
import org.apache.atlas.model.discovery.AtlasAggregationEntry;
import org.apache.atlas.model.instance.AtlasClassification;
import org.apache.atlas.model.instance.AtlasEntity;
import org.apache.atlas.model.instance.AtlasEntityHeader;
@ -40,6 +41,7 @@ import javax.inject.Inject;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import static org.apache.atlas.model.discovery.SearchParameters.*;
import static org.testng.Assert.*;
@ -672,6 +674,27 @@ public class AtlasDiscoveryServiceTest extends BasicTestSetup {
assertSearchResult(searchResult,expected, attrValue);
}
@Test(dataProvider = "specialCharSearchContains")
public void specialCharQuickSearchAssertContains(String attrName, SearchParameters.Operator operator, String attrValue, int expected) throws AtlasBaseException {
QuickSearchParameters params = new QuickSearchParameters();
params.setTypeName(HIVE_TABLE_TYPE);
SearchParameters.FilterCriteria filterCriteria = getSingleFilterCondition(attrName,operator, attrValue);
params.setEntityFilters(filterCriteria);
params.setLimit(20);
AtlasQuickSearchResult searchResult = discoveryService.quickSearch(params);
assertSearchResult(searchResult.getSearchResults(), expected, attrValue);
List<String> failedCases = Arrays.asList("def:default:qf-","star*in","ith spac","778-87");
if (attrName.equals("qualifiedName") && failedCases.contains(attrValue)) {
expected = 0;
}
if (expected > 0) {
assertAggregationMetrics(searchResult);
}
}
@Test(dataProvider = "specialCharSearchName")
public void specialCharSearchAssertName(String attrName, SearchParameters.Operator operator, String attrValue, int expected) throws AtlasBaseException {
SearchParameters params = new SearchParameters();
@ -684,6 +707,21 @@ public class AtlasDiscoveryServiceTest extends BasicTestSetup {
assertSearchResult(searchResult,expected, attrValue);
}
@Test(dataProvider = "specialCharSearchName")
public void specialCharQuickSearchAssertName(String attrName, SearchParameters.Operator operator, String attrValue, int expected) throws AtlasBaseException {
QuickSearchParameters params = new QuickSearchParameters();
params.setTypeName(HIVE_TABLE_TYPE);
SearchParameters.FilterCriteria filterCriteria = getSingleFilterCondition(attrName,operator, attrValue);
params.setEntityFilters(filterCriteria);
params.setLimit(20);
AtlasQuickSearchResult searchResult = discoveryService.quickSearch(params);
assertSearchResult(searchResult.getSearchResults(), expected, attrValue);
if (expected > 0) {
assertAggregationMetrics(searchResult);
}
}
@Test(dataProvider = "specialCharSearchQFName")
public void specialCharSearchAssertQFName(String attrName, SearchParameters.Operator operator, String attrValue, int expected) throws AtlasBaseException {
SearchParameters params = new SearchParameters();
@ -696,6 +734,21 @@ public class AtlasDiscoveryServiceTest extends BasicTestSetup {
assertSearchResult(searchResult, expected, attrValue);
}
@Test(dataProvider = "specialCharSearchQFName")
public void specialCharQuickSearchAssertQFName(String attrName, SearchParameters.Operator operator, String attrValue, int expected) throws AtlasBaseException {
QuickSearchParameters params = new QuickSearchParameters();
params.setTypeName(HIVE_TABLE_TYPE);
SearchParameters.FilterCriteria filterCriteria = getSingleFilterCondition(attrName,operator, attrValue);
params.setEntityFilters(filterCriteria);
params.setLimit(20);
AtlasQuickSearchResult searchResult = discoveryService.quickSearch(params);
assertSearchResult(searchResult.getSearchResults(), expected, attrValue);
if (expected > 0) {
assertAggregationMetrics(searchResult);
}
}
@Test(dataProvider = "specialCharQuickSearch")
public void specialCharQuickSearch(String searchValue, int expected) throws AtlasBaseException {
QuickSearchParameters params = new QuickSearchParameters();
@ -705,6 +758,9 @@ public class AtlasDiscoveryServiceTest extends BasicTestSetup {
AtlasQuickSearchResult searchResult = discoveryService.quickSearch(params);
assertSearchResult(searchResult.getSearchResults(), expected, searchValue);
if (expected > 0) {
assertAggregationMetrics(searchResult);
}
}
private void assertSearchResult(AtlasSearchResult searchResult, int expected, String query) {
@ -721,6 +777,14 @@ public class AtlasDiscoveryServiceTest extends BasicTestSetup {
}
}
private void assertAggregationMetrics(AtlasQuickSearchResult searchResult) {
Map<String, List<AtlasAggregationEntry>> agg = searchResult.getAggregationMetrics();
Assert.assertTrue(CollectionUtils.isNotEmpty(agg.get("__typeName")));
AtlasAggregationEntry entry = agg.get("__typeName").get(0);
Assert.assertTrue(entry!=null && entry.getCount() > 0);
}
private void createDimensionalTaggedEntity(String name) throws AtlasBaseException {
EntityMutationResponse resp = createDummyEntity(name, HIVE_TABLE_TYPE);
AtlasEntityHeader entityHeader = resp.getCreatedEntities().get(0);