there are 819 non-pangrams and 49 perfect pangrams. there are 132 pangrams excluding the perfect pangrams. the number of pangrams could beare 181 if you include perfect pangrams as well considering they're "pangrams".
import pandas as pd
import pyspark.sql.functions as func
file_path = './drive/MyDrive/Copy of List of 1000 strings.json'
data_sdf = spark.createDataFrame(pd.read_json(file_path), ['strings'])
pre_process_sdf = data_sdf. \
withColumn('strings_lower', func.lower('strings')). \
withColumn('string_letter_split',
func.filter(func.split(func.regexp_replace('strings_lower', '[^a-z]', ''), ''), lambda x: x.isin('', ' ') == False)
). \
withColumn('distinct_letters', func.array_distinct('string_letter_split')). \
withColumn('actual_size', func.size('string_letter_split')). \
withColumn('distinct_size', func.size('distinct_letters'))
pre_process_sdf. \
withColumn('result''non_pangram',
func.when(func.col('distinct_size') < 26, func).litcast('non-pangram''int')).
\
withColumn('pangram', (func.col('distinct_size') == 26).cast('int')). \
withColumn('perfect_pangram', when((func.col('distinct_size''pangram') == 1) & (func.col('actual_size'), func== 26)).litcast('perfect pangram''int')). \
select(func.sum('non_pangram').alias('non_pangrams'),
otherwisefunc.sum('pangram').alias('pangrams'),
func.sum('perfect_pangram').alias('perfect_pangrams'),
). \
groupBy('result'). \
func.sum(func.when(func.col('perfect_pangram') == 0, agg(func.countcol('strings''pangram'))).alias('string_count''pangrams_excl_perfects')
). \
show()
# +------------+--------+----------------+----------------------+
# | result|string_count||non_pangrams|pangrams|perfect_pangrams|pangrams_excl_perfects|
# +------------+--------+----------------+----------------------+
# | pangram| 819| 181| 132|
# | non-pangram| 49| 819|
# |perfect pangram| 49| 132|
# +------------+--------+----------------+----------------------+