SELECT
CASE
WHEN CHARINDEX(',', Name) > 0
THEN LTRIM(SUBSTRING(Name, CHARINDEX(',', Name) + 1, LEN(Name)))
ELSE NULL
END AS FirstName,
COUNT(*) AS NameCount
FROM
Customer
GROUP BY
CASE
WHEN CHARINDEX(',', Name) > 0
THEN LTRIM(SUBSTRING(Name, CHARINDEX(',', Name) + 1, LEN(Name)))
ELSE NULL
END
HAVING COUNT(*) = 1;
EVALUATE
VAR NameTable =
ADDCOLUMNS (
Customer,
"CommaPosition", SEARCH ( ",", Customer[Name], 1, 0 ),
"LastName",
IF (
SEARCH ( ",", Customer[Name], 1, 0 ) > 0,
LEFT ( Customer[Name], SEARCH ( ",", Customer[Name], 1, 0 ) - 1 ),
Customer[Name]
),
"FirstName",
IF (
SEARCH ( ",", Customer[Name], 1, 0 ) > 0,
MID (
Customer[Name],
SEARCH ( ",", Customer[Name], 1, 0 ) + 2,
LEN ( Customer[Name] )
),
BLANK ()
)
)
RETURN
FILTER (
SUMMARIZE (
NameTable,
[FirstName],
"NameCount",
COUNTX (
FILTER ( NameTable, [FirstName] = EARLIER ( [FirstName] ) ),
[FirstName]
)
),
[NameCount] = 1
)
import pandas as pd
# Sample dataframe
df_customers = pd.DataFrame({
'Name': ['Yang, Jon', 'Doe, Jane', 'Smith John', 'Yang, Jon', 'Adams, Mary']
})
# Split the Name column into LastName and FirstName
df_customers['CommaPosition'] = df_customers['Name'].str.find(',')
df_customers['FirstName'] = df_customers.apply(lambda row: row['Name'].split(',')[1].strip() if row['CommaPosition'] != -1 else None, axis=1)
# Group by FirstName and filter for NameCount = 1
result = df_customers.groupby('FirstName').size().reset_index(name='NameCount')
result = result[result['NameCount'] == 1]
# Convert result to list of lists format
result_list = result.values.tolist()
| Jane | 1 |
| Mary | 1 |
SEARCH ( ",", Customer[Name], 1, 0 ): The SEARCH function now includes the fourth parameter, which specifies the "Not Found" behavior. If a comma is not found, it returns 0. CommaPosition: A new column is created to store the position of the comma in the Customer[Name]. If no comma is found, it will be 0. Handling Names without a Comma: If a comma is found (CommaPosition > 0), the query splits the name into LastName and FirstName as before. If no comma is found, the entire name is placed in the LastName field, and FirstName is left blank. COUNTX: This function is used to iterate over rows of the NameTable, ensuring that the context is filtered down to the current group (in this case, FirstName). EARLIER([FirstName]): This keeps track of the outer row context (group by FirstName), so that COUNTX only counts rows where the FirstName matches the current value.
Leave a Comment