Basically there are couple of issues with your formulation.
First you need to change the date from string to it's proper date format.
Then Window in pyspark allows you to specify the ordering of the columns one after the other. Then there is rank() function which allows you to rank the results over the Window. Finally all that remains is to select the first rank.
from pyspark.sql.types import *
from pyspark import SparkContext, SQLContext
import pyspark.sql.functions as F
from pyspark.sql import Window
sc = SparkContext('local')
sqlContext = SQLContext(sc)
data1 = [
(100,1,"2020-03-19","Nil1"),
(100,2,"2020-04-19","Nil2"),
(100,2,"2020-04-19","Nil2"),
(100,2,"2020-05-19","Ni13"),
(200,1,"2020-09-19","Jay1"),
(200,2,"2020-07-19","Jay2"),
(200,2,"2020-08-19","Jay3"),
]
df1Columns = ["id", "version", "dt", "Name"]
df1 = sqlContext.createDataFrame(data=data1, schema = df1Columns)
df1 = df1.withColumn("dt",F.to_date(F.to_timestamp("dt", 'yyyy-MM-dd')).alias('dt'))
print("Schema.")
df1.printSchema()
print("Actual initial data")
df1.show(truncate=False)
wind = Window.partitionBy("id").orderBy(F.desc("version"), F.desc("dt"))
df1 = df1.withColumn("rank", F.rank().over(wind))
print("Ranking over the window spec specified")
df1.show(truncate=False)
final_df = df1.filter(F.col("rank") == 1).drop("rank")
print("Filtering the final result by applying the rank == 1 condition")
final_df.show(truncate=False)
Output :
Schema.
root
|-- id: long (nullable = true)
|-- version: long (nullable = true)
|-- dt: date (nullable = true)
|-- Name: string (nullable = true)
Actual initial data
+---+-------+----------+----+
|id |version|dt |Name|
+---+-------+----------+----+
|100|1 |2020-03-19|Nil1|
|100|2 |2020-04-19|Nil2|
|100|2 |2020-04-19|Nil2|
|100|2 |2020-05-19|Ni13|
|200|1 |2020-09-19|Jay1|
|200|2 |2020-07-19|Jay2|
|200|2 |2020-08-19|Jay3|
+---+-------+----------+----+
Ranking over the window spec specified
+---+-------+----------+----+----+
|id |version|dt |Name|rank|
+---+-------+----------+----+----+
|100|2 |2020-05-19|Ni13|1 |
|100|2 |2020-04-19|Nil2|2 |
|100|2 |2020-04-19|Nil2|2 |
|100|1 |2020-03-19|Nil1|4 |
|200|2 |2020-08-19|Jay3|1 |
|200|2 |2020-07-19|Jay2|2 |
|200|1 |2020-09-19|Jay1|3 |
+---+-------+----------+----+----+
Filtering the final result by applying the rank == 1 condition
+---+-------+----------+----+
|id |version|dt |Name|
+---+-------+----------+----+
|100|2 |2020-05-19|Ni13|
|200|2 |2020-08-19|Jay3|
+---+-------+----------+----+