C# webbrowser 坐标定位进行网页元素的模拟操作
in C# 爬虫 with 0 comment

C# webbrowser 坐标定位进行网页元素的模拟操作

in C# 爬虫 with 0 comment

前段时间,需要在某个网站进行模拟一些简单的操作,但是由于该网站的整个页面都是使用JavaScript函数动态加载的,导致网页的元素根本定位不到(包括节点位置、节点名称)。网页完全加载完成后,页面的HTML只有几个DIV,因此使用python selenium和C# 的webbrowser都不能直接对元素进行操作。最后没办法,只能使用C# webbrowser坐标定位进行模拟操作。

设置webbrowser浏览器版本

public MainForm()
    {
        SetWebBrowserFeatures(9);
        InitializeComponent();
        SetPathAndTime();
    }
    /// <summary>  
    /// 修改注册表信息来兼容当前程序  
    ///   
    /// </summary>  
    static void SetWebBrowserFeatures(int ieVersion)
    {
        // don't change the registry if running in-proc inside Visual Studio  
        if (LicenseManager.UsageMode != LicenseUsageMode.Runtime)
            return;
        //获取程序及名称  
        var appName = System.IO.Path.GetFileName(System.Diagnostics.Process.GetCurrentProcess().MainModule.FileName);
        //得到浏览器的模式的值  
        UInt32 ieMode = GeoEmulationModee(ieVersion);
        var featureControlRegKey = @"HKEY_CURRENT_USER\Software\Microsoft\Internet Explorer\Main\FeatureControl\";
        //设置浏览器对应用程序(appName)以什么模式(ieMode)运行  
        Registry.SetValue(featureControlRegKey + "FEATURE_BROWSER_EMULATION",
            appName, ieMode, RegistryValueKind.DWord);
        // enable the features which are "On" for the full Internet Explorer browser  
        //不晓得设置有什么用  
        Registry.SetValue(featureControlRegKey + "FEATURE_ENABLE_CLIPCHILDREN_OPTIMIZATION",
            appName, 1, RegistryValueKind.DWord);
    }
    /// <summary>  
    /// 获取浏览器的版本  
    /// </summary>  
    /// <returns></returns>  
    static int GetBrowserVersion()
    {
        int browserVersion = 0;
        using (var ieKey = Registry.LocalMachine.OpenSubKey(@"SOFTWARE\Microsoft\Internet Explorer",
            RegistryKeyPermissionCheck.ReadSubTree,
            System.Security.AccessControl.RegistryRights.QueryValues))
        {
            var version = ieKey.GetValue("svcVersion");
            if (null == version)
            {
                version = ieKey.GetValue("Version");
                if (null == version)
                    throw new ApplicationException("Microsoft Internet Explorer is required!");
            }
            int.TryParse(version.ToString().Split('.')[0], out browserVersion);
        }
        //如果小于7  
        if (browserVersion < 7)
        {
            throw new ApplicationException("不支持的浏览器版本!");
        }
        return browserVersion;
    }
    /// <summary>  
    /// 通过版本得到浏览器模式的值  
    /// </summary>  
    /// <param name="browserVersion"></param>  
    /// <returns></returns>  
    static UInt32 GeoEmulationModee(int browserVersion)
    {
        UInt32 mode = 11000; // Internet Explorer 11. Webpages containing standards-based !DOCTYPE directives are displayed in IE11 Standards mode.   
        switch (browserVersion)
        {
            case 7:
                mode = 7000; // Webpages containing standards-based !DOCTYPE directives are displayed in IE7 Standards mode.   
                break;
            case 8:
                mode = 8000; // Webpages containing standards-based !DOCTYPE directives are displayed in IE8 mode.   
                break;
            case 9:
                mode = 9000; // Internet Explorer 9. Webpages containing standards-based !DOCTYPE directives are displayed in IE9 mode.                      
                break;
            case 10:
                mode = 10000; // Internet Explorer 10.  
                break;
            case 11:
                mode = 11000; // Internet Explorer 11  
                break;
        }
        return mode;
    }

设置鼠标位置

[DllImport("User32")]
public extern static void SetCursorPos(int x, int y);

鼠标事件

private static extern int mouse_event(int dwFlags, int dx, int dy, int cButtons, int dwExtraInfo);
const int MOUSEEVENTF_LEFTDOWN = 0x0002; //模拟鼠标左键按下
const int MOUSEEVENTF_LEFTUP = 0x0004; //模拟鼠标左键抬起 
//移动鼠标 
const int MOUSEEVENTF_MOVE = 0x0001;      
//模拟鼠标左键按下 
const int MOUSEEVENTF_LEFTDOWN = 0x0002; 
//模拟鼠标左键抬起 
const int MOUSEEVENTF_LEFTUP = 0x0004; 
//模拟鼠标右键按下 
const int MOUSEEVENTF_RIGHTDOWN = 0x0008; 
//模拟鼠标右键抬起 
const int MOUSEEVENTF_RIGHTUP = 0x0010; 
//模拟鼠标中键按下 
const int MOUSEEVENTF_MIDDLEDOWN = 0x0020; 
//模拟鼠标中键抬起 
const int MOUSEEVENTF_MIDDLEUP = 0x0040; 
//标示是否采用绝对坐标 
const int MOUSEEVENTF_ABSOLUTE = 0x8000; 

模拟按键、粘贴板

/// <summary>
/// 导入模拟键盘的方法
/// </summary>
/// <param name="bVk" >按键的虚拟键值</param>
/// <param name= "bScan" >扫描码,一般不用设置,用0代替就行</param>
/// <param name= "dwFlags" >选项标志:0:表示按下,2:表示松开</param>
/// <param name= "dwExtraInfo">一般设置为0</param>
[DllImport("user32")]
[DllImport("User32")]
//按键事件
public static extern void keybd_event(byte bVk, byte bScan, byte dwFlags, byte dwInfo);
//复制
public const byte keyA = 65;
public const byte keyCtrl = 0x11;
public const byte keyV = 86;
public const byte keyEnter = 0xD;

//设置文本到粘贴板,然后移动到需要粘贴的位置,左点击,再使用按键Ctrl+v进行粘贴
Clipboard.SetDataObject(searchtext); 

控件坐标相对位置

//打开搜索框
Control openSearchbtncontrol = openSearchbtn;
Point openSearchbtnshowPoint = new Point();
while (openSearchbtncontrol != this)
{

    openSearchbtnshowPoint.X += openSearchbtncontrol.Location.X;
    openSearchbtnshowPoint.Y += openSearchbtncontrol.Location.Y;
    openSearchbtncontrol = openSearchbtncontrol.Parent;

}
//加上相对的X、Y像素值
openSearchbtnshowPoint.X += openSearchbtncontrol.Location.X + 13;
openSearchbtnshowPoint.Y += openSearchbtncontrol.Location.Y + 37;
SetCursorPos(openSearchbtnshowPoint.X, openSearchbtnshowPoint.Y);
System.Threading.Thread.Sleep(clickSleepTime);

总体思路

  1. 判断页面加载完成

    private void webBrowser_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e) {

     //根据当前URL判断是否为需要处理的页面,JFrame这些都会到这个函数里面
     int len = e.Url.LocalPath.Length;
     int flag = 1;
     //if (len == 14){}

    }

  2. 页面元素递归处理和页面跳转

    //页面的文本
    String s = mainWebBrowser.DocumentText;
    Regex restr = new Regex(@"TGT-[S]+?.cn");
    Match mtc = restr.Match(s);
    String url = "http://IP/webgis/?ticket=" + mtc.Groups[0].Value;
    //根据ID获得元素节点
    HtmlElement enter4A_btn = mainWebBrowser.Document.GetElementById("enter4A_btn");
    //页面节点的递归处理
    HtmlElement user = enter4A_btn.Parent.Children[1].Children[0].Children[0].Children[0].Children[1];
    LogTextBox.AppendText(DateTime.Now.ToString("HH:mm:ss ") + user.InnerText+"登录成功rn");
    //页面跳转
    mainWebBrowser.Navigate(url);

  3. C# 程序无卡顿等待

    //等待的毫秒数
    public static void Delay(int mm) {
    DateTime crrent = DateTime.Now;
    while (crrent.AddMilliseconds(mm) > DateTime.Now) {

    Application.DoEvents();
    }

    return;
    }

  4. 鼠标点击无效的处理

    SetCursorPos(search_x, search_y);
    Delay(100);
    mouse_event(MOUSEEVENTF_LEFTDOWN, search_x, search_y, 0, 0);
    //按下、抬起鼠标之间需要设置间隔,不然可能会导致鼠标点击失效
    Delay(200);
    mouse_event(MOUSEEVENTF_LEFTUP, search_x, search_y, 0, 0);
    Delay(100);

  5. 粘贴板

    Clipboard.SetDataObject(searchtext);
    keybd_event(keyCtrl, 0, 0, 0);
    //为了清除原先输入框的内容,因此先后使用Ctrl+A,Ctrl+V,就可以完成此效果
    //0:表示按下,2:表示松开
    keybd_event(keyA, 0, 0, 0);
    keybd_event(keyCtrl, 0, 2, 0);
    keybd_event(keyA, 0, 2, 0);
    keybd_event(keyCtrl, 0, 0, 0);
    keybd_event(keyV, 0, 0, 0);
    keybd_event(keyCtrl, 0, 2, 0);
    keybd_event(keyV, 0, 2, 0);
    keybd_event(keyEnter, 0, 0, 0);
    keybd_event(keyEnter, 0, 2, 0);

  6. 其他知识点

    //时间格式化
    LogTextBox.AppendText(DateTime.Now.ToString("HH:mm:ss ") + " 配置文件中的路径可能出错,请查看配置文件rn");
    //分割split
    clickSleepTime = int.Parse(tmptext[2].Split(new Char[] { '=' })[1]);
    //TextBox滚动到最后
    LogTextBox.SelectionStart = LogTextBox.Text.Length;
    this.LogTextBox.ScrollToCaret();
    //文件读写
    string name = System.Environment.CurrentDirectory+"\"+DateTime.Now.ToString("yyyy-mm-dd HH-mm-ss")+"-log.txt";
    LogTextBox.AppendText("保存日志:" + name);
    MessageBox.Show(name);
    FileStream fs = new FileStream(name,FileMode.CreateNew);
    StreamWriter sw = new StreamWriter(fs);
    sw.Write(LogTextBox.Text);
    sw.Flush();
    sw.Close();
    fs.Close();
    //TextBox换行 rn
    tmptext = tmptext + line.ToString().Substring(11, line.ToString().Length - 11) + "rn";
    //关闭程序
    private void MainForm_FormClosed(object sender, FormClosedEventArgs e)
    {

    Application.Exit();
    System.Environment.Exit(0);

    }

Responses