The following will extract the data from the HTML table on your target page
and load it into a ClientDataSet.
It's fairly long-winded, perhaps demonstrating that as David said, Delphi
is maybe not the best tool for the job.
On my Form1, I have a TEdit, edValue, for me to key in the value in the first
data row in the HTML table data. I use this as a way to find the table in the
HTML document. I dare say there are better methods, but at least my method should be more robust than hard-coding assumptions about the layout of the document in which the table is embedded that maybe won't survive a change by the page's author.
Broadly, the code works by first finding the HTML table cell using the contents of
my edValue.Text, then finding the table to which the cell belongs, and then
populating the CDS's Fields and data from the table.
The CDS fields are set to 255 characters by default; maybe there's a specification for
the data published on the web page that would allow you to use a smaller value for some, if not all, fields. They're all assumed to be of type ftString, to avoid the code choking on unexpected cell contents.
Btw, at the bottom is a utility function for saving the HTML page locally, to
save having to keep clicking the button for selecting a year + month. To reload
the WebBrowser from the saved file, just use the file's name as the URL to load.
TForm1 = class(TForm)
[ ... ]
public
{ Public declarations }
Doc : IHtmlDocument2;
procedure TForm1.btnFindValueClick(Sender: TObject);
var
Table : IHTMLTable;
begin
Doc := WebBrowser1.Document as IHTMLDocument2;
Table := FindTableByCellValue(edValue.Text);
Assert(Table <> Nil);
LoadCDSFromHTMLTable(CDS, Table);
end;
procedure TForm1.LoadCDSFromHTMLTable(DestCDS : TClientDataSet; Table : IHTMLTable);
var
I,
J : Integer;
vTable : OleVariant;
iRow : IHTMLTableRow;
FieldName,
FieldValue : String;
Field : TField;
const
cMaxFieldSize = 255;
scIDFieldName = 'ID';
begin
// Use OleVariant instead of IHTMLTable becuse it's less fiddly for doing what follows
vTable := Table;
Assert(not DestCDS.Active and (DestCDS.FieldCount = 0));
// First create an AutoInc field
Field := TAutoIncField.Create(Self);
Field.FieldName := scIDFieldName;
Field.DataSet := DestCDS;
// Next create CDS fields from the names in the cells in the first row of the table
for I := 0 to (vTable.Rows.Item(0).Cells.Length - 1) do begin
FieldName := vTable.Rows.Item(0).Cells.Item(I).InnerText;
Field := TStringField.Create(Self);
// At this point, we might want to clean up the FieldName by removing embedded spaces, etc
Field.FieldName := FieldName;
Field.Size := cMaxFieldSize;
Field.DataSet := DestCDS;
end;
DestCDS.DisableControls;
try
DestCDS.IndexFieldNames := scIDFieldName;
DestCDS.CreateDataSet;
// Next load the HTML table data into the CDS
for I := 1 to (vTable.Rows.Length - 1) do begin
DestCDS.Insert;
for J := 0 to vTable.Rows.Item(0).Cells.Length - 1 do begin
FieldValue := vTable.Rows.Item(I).Cells.Item(J).InnerText;
// the J + 1 is because Fields[0] is the autoinc one
DestCDS.Fields[J + 1].AsString := FieldValue;
end;
DestCDS.Post;
end;
DestCDS.First;
finally
DestCDS.EnableControls;
end;
end;
function TForm1.FindTableCellByTagValue(Doc : IHtmlDocument2; const AValue : String) : IHTMLTableCell;
var
All: IHTMLElementCollection;
Value: String;
I,
Len: Integer;
E: OleVariant;
iE : IHTMLElement;
iT : IHTMLTextElement;
iC : IHTMLTableCell;
begin
Result := Nil;
All := Doc.All;
if All = Nil then Exit;
Len := All.Length;
for I := 0 to Len - 1 do begin
E := All.Item(I, varEmpty);
iE := IDispatch(E) as IHTMLElement;
if Supports(iE, IHTMLTableCell, iC) then begin
Value := Trim(iE.Get_InnerText);
if Pos(Trim(AValue), Value) = 1 then begin
Result := iC;
Break;
end
end
else
Continue;
end;
end;
function TForm1.FindTableByCellValue(Value : String): IHTMLTable;
var
Node : IHtmlElement;
iTable : IHTMLTable;
iCell : IHTMLTableCell;
begin
Result := Nil;
iCell := FindTableCellByTagValue(Doc, edValue.Text);
if iCell = Nil then
Exit;
Node := IDispatch(iCell) as IHtmlElement;
// if we found a Node with the cell text we were looking for,
// we can now find the HTML table to which it belongs
while Node <> Nil do begin
Node := Node.parentElement;
if Supports(Node, IHTMLTable, iTable) then begin
Result := iTable;
Break;
end;
end;
end;
procedure TForm1.SaveFileLocally(const FileName : String);
var
PFile: IPersistFile; // declared in ActiveX unit
begin
PFile := Doc as IPersistFile;
PFile.Save(StringToOleStr(FileName), False);
end;