Visual Studioのwebtestファイルをもとにして、他のサイトの情報を取得する
ログインやセッションの必要ないページの情報は、HttpWebRequest/HttpWebResponseを使えば簡単に取得できるが、
となると、そこそこ複雑なプログラムが必要になる。
また、アクセスするサイトごとに異なった処理が必要となるので、毎回コーディングが必要となってしまい、共通化することが難しい。
そこで、Visual Studio2008のwebtestファイルを利用して、他のサイトにアクセスするユーティリティを作ってみた。
Webtestファイルであれば多くのアクセスパターンに汎用的に対応できるので、毎回個別にプログラミングする必要がなくなる。(必要そうなところのみ実装しているので、まだ全てのパターンには対応していない)
ダウンロードはここから
http://www.geofront.dev-asp.net/Download/Chiaki.zip
HTMLパーサには、こちらのものを使用しています。
http://japan.internet.com/developer/20051115/25.html
using System; using System.Collections.Generic; using System.Net; using System.Text; using System.Xml; using System.IO; using System.Web; using Chiaki.Parser; namespace Chiaki { //TODO Credentialsの対応 public class Scraper { #region properties private List<RequestData> requests; public List<RequestData> Requests { get { return this.requests; } } private Dictionary<string, string> context = new Dictionary<string,string>(); public Dictionary<string, string> Context { get { return this.context; } } private CookieContainer container = new CookieContainer(); #endregion public Scraper() { } public void Load(string filename) { requests = new List<RequestData>(); XmlDocument xmlDoc = new XmlDocument(); xmlDoc.Load(filename); XmlNamespaceManager nsmgr = new XmlNamespaceManager(xmlDoc.NameTable); nsmgr.AddNamespace("a", "http://microsoft.com/schemas/VisualStudio/TeamTest/2006"); foreach (XmlNode node in xmlDoc.SelectNodes("//a:Request", nsmgr)) { RequestData reqData = new RequestData(); reqData.Method = node.Attributes["Method"].Value; reqData.Url = node.Attributes["Url"].Value; reqData.Encoding = node.Attributes["Encoding"].Value; foreach (XmlNode formPostParameterNode in node.SelectNodes(".//a:FormPostParameter", nsmgr)) { string paramName = formPostParameterNode.Attributes["Name"].Value; string paramValue = formPostParameterNode.Attributes["Value"].Value; reqData.FormPostParameters.Add(paramName, paramValue); } foreach (XmlNode queryStringParameterNode in node.SelectNodes(".//a:QueryStringParameter", nsmgr)) { string paramName = queryStringParameterNode.Attributes["Name"].Value; string paramValue = queryStringParameterNode.Attributes["Value"].Value; reqData.QueryStringParameters.Add(paramName,paramValue); } foreach (XmlNode extractionRuleNode in node.SelectNodes("./a:ExtractionRules/a:ExtractionRule",nsmgr)) { ExtractionRuleData ruleData = new ExtractionRuleData(); ruleData.ClassName = extractionRuleNode.Attributes["Classname"].Value; ruleData.VariableName = extractionRuleNode.Attributes["VariableName"].Value; reqData.ExtractionRules.Add(ruleData); } requests.Add(reqData); } } public string Execute() { string result = null; try { foreach (RequestData reqData in requests) { result = ExecuteRequest(reqData); } } catch (WebException ex) { Stream dataStream = ex.Response.GetResponseStream(); StreamReader reader = new StreamReader(dataStream, Encoding.GetEncoding(((HttpWebResponse)ex.Response).CharacterSet)); result = reader.ReadToEnd(); } return result; } #region private private string ExecuteRequest(RequestData reqData) { HttpWebRequest request = (HttpWebRequest)WebRequest.Create(BuildUrl(reqData)); request.Method = reqData.Method; if (container != null) { request.CookieContainer = container; } request.AllowAutoRedirect = true; request.Accept = "*/*"; request.UserAgent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)"; request.CookieContainer = container; //TODO //request.Credentials = CredentialCache.DefaultCredentials; if ("POST" == reqData.Method) { byte[] postData = Encoding.GetEncoding(reqData.Encoding).GetBytes(BuildFormData(reqData)); request.ContentType = "application/x-www-form-urlencoded"; request.ContentLength = postData.Length; using (Stream stream = request.GetRequestStream()) { stream.Write(postData, 0, postData.Length); } } HttpWebResponse response = (HttpWebResponse)request.GetResponse(); Stream dataStream = response.GetResponseStream(); StreamReader reader = new StreamReader(dataStream,Encoding.GetEncoding(response.CharacterSet)); string responseFromServer = reader.ReadToEnd(); reader.Close(); dataStream.Close(); response.Close(); if (reqData.ExtractionRules.Count > 0) { foreach (ExtractionRuleData ruleData in reqData.ExtractionRules) { if (ruleData.ClassName.StartsWith("Microsoft.VisualStudio.TestTools.WebTesting.Rules.ExtractHiddenFields")) { ExtractHiddenField(responseFromServer,ruleData.VariableName); } } } return responseFromServer; } private string BuildUrl(RequestData reqData) { if (reqData.QueryStringParameters.Count == 0) { return reqData.Url; } StringBuilder builder = new StringBuilder(); builder.Append(reqData.Url); bool firstParam = true; foreach (string key in reqData.QueryStringParameters.Keys) { if (firstParam) { firstParam = false; builder.Append("?"); } else { builder.Append("&"); } builder.Append(key); builder.Append("="); string value =reqData.QueryStringParameters[key]; if (Context.ContainsKey(value)) { value = Context[value]; } builder.Append(value); } return builder.ToString(); } private string BuildFormData(RequestData reqData) { if (reqData.FormPostParameters.Count == 0) { return string.Empty; } StringBuilder builder = new StringBuilder(); bool firstParam = true; foreach (string key in reqData.FormPostParameters.Keys) { if (firstParam) { firstParam = false; } else { builder.Append("&"); } builder.Append(HttpUtility.UrlEncode(key)); builder.Append("="); string value = reqData.FormPostParameters[key]; if (Context.ContainsKey(value)) { value = Context[value]; } builder.Append(HttpUtility.UrlEncode(value)); } return builder.ToString(); } private void ExtractHiddenField(string source, string variableName) { string prefix = "{{$HIDDEN" + variableName + "."; string suffix = "}}"; ParseHTML parse = new ParseHTML(); parse.Source = source; while (!parse.Eof()) { char ch = parse.Parse(); if (ch == 0) { AttributeList tag = parse.GetTag(); if (tag.Name.ToUpper() == "INPUT") { if (tag["type"] != null && tag["type"].Value.ToUpper() == "HIDDEN") { string name = tag["name"].Value; string value = tag["value"].Value; Context.Add(prefix + name + suffix, value); } } } } } #endregion } }
using System; using System.Collections.Generic; using System.Text; namespace Chiaki { public class RequestData { #region properties private string method; public string Method { get { return this.method; } set { this.method = value; } } private string url; public string Url { get { return this.url; } set { this.url = value; } } private string encoding; public string Encoding { get { return this.encoding; } set { this.encoding = value; } } private Dictionary<string, string> formPostParameters = new Dictionary<string, string>(); public Dictionary<string, string> FormPostParameters { get { return this.formPostParameters; } } private Dictionary<string, string> queryStringParameters = new Dictionary<string, string>(); public Dictionary<string, string> QueryStringParameters { get { return this.queryStringParameters; } } private List<ExtractionRuleData> extractionRules = new List<ExtractionRuleData>(); public List<ExtractionRuleData> ExtractionRules { get { return this.extractionRules; } } #endregion public RequestData() { } } }
using System; using System.Collections.Generic; using System.Text; namespace Chiaki { public class ExtractionRuleData { private string className; public string ClassName { get { return this.className; } set { this.className = value; } } private string variableName; public string VariableName { get { return this.variableName; } set { this.variableName = value; } } } }