uri.c 61 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313
  1. /**
  2. * uri.c: set of generic URI related routines
  3. *
  4. * Reference: RFCs 3986, 2732 and 2373
  5. *
  6. * Copyright (C) 1998-2003 Daniel Veillard. All Rights Reserved.
  7. *
  8. * Permission is hereby granted, free of charge, to any person obtaining a copy
  9. * of this software and associated documentation files (the "Software"), to deal
  10. * in the Software without restriction, including without limitation the rights
  11. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  12. * copies of the Software, and to permit persons to whom the Software is
  13. * furnished to do so, subject to the following conditions:
  14. *
  15. * The above copyright notice and this permission notice shall be included in
  16. * all copies or substantial portions of the Software.
  17. *
  18. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  19. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  21. * DANIEL VEILLARD BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
  22. * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  23. * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  24. *
  25. * Except as contained in this notice, the name of Daniel Veillard shall not
  26. * be used in advertising or otherwise to promote the sale, use or other
  27. * dealings in this Software without prior written authorization from him.
  28. *
  29. * daniel@veillard.com
  30. *
  31. **
  32. *
  33. * Copyright (C) 2007, 2009-2010 Red Hat, Inc.
  34. *
  35. * This library is free software; you can redistribute it and/or
  36. * modify it under the terms of the GNU Lesser General Public
  37. * License as published by the Free Software Foundation; either
  38. * version 2.1 of the License, or (at your option) any later version.
  39. *
  40. * This library is distributed in the hope that it will be useful,
  41. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  42. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  43. * Lesser General Public License for more details.
  44. *
  45. * You should have received a copy of the GNU Lesser General Public
  46. * License along with this library. If not, see <https://www.gnu.org/licenses/>.
  47. *
  48. * Authors:
  49. * Richard W.M. Jones <rjones@redhat.com>
  50. *
  51. */
  52. #include "qemu/osdep.h"
  53. #include "qemu/cutils.h"
  54. #include "qemu/uri.h"
  55. static void uri_clean(URI *uri);
  56. /*
  57. * Old rule from 2396 used in legacy handling code
  58. * alpha = lowalpha | upalpha
  59. */
  60. #define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
  61. /*
  62. * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
  63. * "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
  64. * "u" | "v" | "w" | "x" | "y" | "z"
  65. */
  66. #define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
  67. /*
  68. * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
  69. * "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
  70. * "U" | "V" | "W" | "X" | "Y" | "Z"
  71. */
  72. #define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
  73. #ifdef IS_DIGIT
  74. #undef IS_DIGIT
  75. #endif
  76. /*
  77. * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
  78. */
  79. #define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
  80. /*
  81. * alphanum = alpha | digit
  82. */
  83. #define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
  84. /*
  85. * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
  86. */
  87. #define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') || \
  88. ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') || \
  89. ((x) == '(') || ((x) == ')'))
  90. /*
  91. * unwise = "{" | "}" | "|" | "\" | "^" | "`"
  92. */
  93. #define IS_UNWISE(p) \
  94. (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) || \
  95. ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) || \
  96. ((*(p) == ']')) || ((*(p) == '`')))
  97. /*
  98. * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
  99. * "[" | "]"
  100. */
  101. #define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
  102. ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
  103. ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
  104. ((x) == ']'))
  105. /*
  106. * unreserved = alphanum | mark
  107. */
  108. #define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
  109. /*
  110. * Skip to next pointer char, handle escaped sequences
  111. */
  112. #define NEXT(p) ((*p == '%') ? p += 3 : p++)
  113. /*
  114. * Productions from the spec.
  115. *
  116. * authority = server | reg_name
  117. * reg_name = 1*( unreserved | escaped | "$" | "," |
  118. * ";" | ":" | "@" | "&" | "=" | "+" )
  119. *
  120. * path = [ abs_path | opaque_part ]
  121. */
  122. /************************************************************************
  123. * *
  124. * RFC 3986 parser *
  125. * *
  126. ************************************************************************/
  127. #define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9'))
  128. #define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) || \
  129. ((*(p) >= 'A') && (*(p) <= 'Z')))
  130. #define ISA_HEXDIG(p) \
  131. (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) || \
  132. ((*(p) >= 'A') && (*(p) <= 'F')))
  133. /*
  134. * sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
  135. * / "*" / "+" / "," / ";" / "="
  136. */
  137. #define ISA_SUB_DELIM(p) \
  138. (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) || \
  139. ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) || \
  140. ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) || \
  141. ((*(p) == '=')) || ((*(p) == '\'')))
  142. /*
  143. * gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
  144. */
  145. #define ISA_GEN_DELIM(p) \
  146. (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) || \
  147. ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) || \
  148. ((*(p) == '@')))
  149. /*
  150. * reserved = gen-delims / sub-delims
  151. */
  152. #define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p)))
  153. /*
  154. * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
  155. */
  156. #define ISA_UNRESERVED(p) \
  157. ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) || \
  158. ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~')))
  159. /*
  160. * pct-encoded = "%" HEXDIG HEXDIG
  161. */
  162. #define ISA_PCT_ENCODED(p) \
  163. ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))
  164. /*
  165. * pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
  166. */
  167. #define ISA_PCHAR(p) \
  168. (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) || \
  169. ((*(p) == ':')) || ((*(p) == '@')))
  170. /**
  171. * rfc3986_parse_scheme:
  172. * @uri: pointer to an URI structure
  173. * @str: pointer to the string to analyze
  174. *
  175. * Parse an URI scheme
  176. *
  177. * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
  178. *
  179. * Returns 0 or the error code
  180. */
  181. static int rfc3986_parse_scheme(URI *uri, const char **str)
  182. {
  183. const char *cur;
  184. if (str == NULL) {
  185. return -1;
  186. }
  187. cur = *str;
  188. if (!ISA_ALPHA(cur)) {
  189. return 2;
  190. }
  191. cur++;
  192. while (ISA_ALPHA(cur) || ISA_DIGIT(cur) || (*cur == '+') || (*cur == '-') ||
  193. (*cur == '.')) {
  194. cur++;
  195. }
  196. if (uri != NULL) {
  197. g_free(uri->scheme);
  198. uri->scheme = g_strndup(*str, cur - *str);
  199. }
  200. *str = cur;
  201. return 0;
  202. }
  203. /**
  204. * rfc3986_parse_fragment:
  205. * @uri: pointer to an URI structure
  206. * @str: pointer to the string to analyze
  207. *
  208. * Parse the query part of an URI
  209. *
  210. * fragment = *( pchar / "/" / "?" )
  211. * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'
  212. * in the fragment identifier but this is used very broadly for
  213. * xpointer scheme selection, so we are allowing it here to not break
  214. * for example all the DocBook processing chains.
  215. *
  216. * Returns 0 or the error code
  217. */
  218. static int rfc3986_parse_fragment(URI *uri, const char **str)
  219. {
  220. const char *cur;
  221. if (str == NULL) {
  222. return -1;
  223. }
  224. cur = *str;
  225. while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
  226. (*cur == '[') || (*cur == ']') ||
  227. ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur)))) {
  228. NEXT(cur);
  229. }
  230. if (uri != NULL) {
  231. g_free(uri->fragment);
  232. if (uri->cleanup & 2) {
  233. uri->fragment = g_strndup(*str, cur - *str);
  234. } else {
  235. uri->fragment = uri_string_unescape(*str, cur - *str, NULL);
  236. }
  237. }
  238. *str = cur;
  239. return 0;
  240. }
  241. /**
  242. * rfc3986_parse_query:
  243. * @uri: pointer to an URI structure
  244. * @str: pointer to the string to analyze
  245. *
  246. * Parse the query part of an URI
  247. *
  248. * query = *uric
  249. *
  250. * Returns 0 or the error code
  251. */
  252. static int rfc3986_parse_query(URI *uri, const char **str)
  253. {
  254. const char *cur;
  255. if (str == NULL) {
  256. return -1;
  257. }
  258. cur = *str;
  259. while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
  260. ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur)))) {
  261. NEXT(cur);
  262. }
  263. if (uri != NULL) {
  264. g_free(uri->query);
  265. uri->query = g_strndup(*str, cur - *str);
  266. }
  267. *str = cur;
  268. return 0;
  269. }
  270. /**
  271. * rfc3986_parse_port:
  272. * @uri: pointer to an URI structure
  273. * @str: the string to analyze
  274. *
  275. * Parse a port part and fills in the appropriate fields
  276. * of the @uri structure
  277. *
  278. * port = *DIGIT
  279. *
  280. * Returns 0 or the error code
  281. */
  282. static int rfc3986_parse_port(URI *uri, const char **str)
  283. {
  284. const char *cur = *str;
  285. int port = 0;
  286. if (ISA_DIGIT(cur)) {
  287. while (ISA_DIGIT(cur)) {
  288. port = port * 10 + (*cur - '0');
  289. if (port > 65535) {
  290. return 1;
  291. }
  292. cur++;
  293. }
  294. if (uri) {
  295. uri->port = port;
  296. }
  297. *str = cur;
  298. return 0;
  299. }
  300. return 1;
  301. }
  302. /**
  303. * rfc3986_parse_user_info:
  304. * @uri: pointer to an URI structure
  305. * @str: the string to analyze
  306. *
  307. * Parse a user information part and fill in the appropriate fields
  308. * of the @uri structure
  309. *
  310. * userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
  311. *
  312. * Returns 0 or the error code
  313. */
  314. static int rfc3986_parse_user_info(URI *uri, const char **str)
  315. {
  316. const char *cur;
  317. cur = *str;
  318. while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur) ||
  319. (*cur == ':')) {
  320. NEXT(cur);
  321. }
  322. if (*cur == '@') {
  323. if (uri != NULL) {
  324. g_free(uri->user);
  325. if (uri->cleanup & 2) {
  326. uri->user = g_strndup(*str, cur - *str);
  327. } else {
  328. uri->user = uri_string_unescape(*str, cur - *str, NULL);
  329. }
  330. }
  331. *str = cur;
  332. return 0;
  333. }
  334. return 1;
  335. }
  336. /**
  337. * rfc3986_parse_dec_octet:
  338. * @str: the string to analyze
  339. *
  340. * dec-octet = DIGIT ; 0-9
  341. * / %x31-39 DIGIT ; 10-99
  342. * / "1" 2DIGIT ; 100-199
  343. * / "2" %x30-34 DIGIT ; 200-249
  344. * / "25" %x30-35 ; 250-255
  345. *
  346. * Skip a dec-octet.
  347. *
  348. * Returns 0 if found and skipped, 1 otherwise
  349. */
  350. static int rfc3986_parse_dec_octet(const char **str)
  351. {
  352. const char *cur = *str;
  353. if (!(ISA_DIGIT(cur))) {
  354. return 1;
  355. }
  356. if (!ISA_DIGIT(cur + 1)) {
  357. cur++;
  358. } else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur + 2))) {
  359. cur += 2;
  360. } else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2))) {
  361. cur += 3;
  362. } else if ((*cur == '2') && (*(cur + 1) >= '0') && (*(cur + 1) <= '4') &&
  363. (ISA_DIGIT(cur + 2))) {
  364. cur += 3;
  365. } else if ((*cur == '2') && (*(cur + 1) == '5') && (*(cur + 2) >= '0') &&
  366. (*(cur + 1) <= '5')) {
  367. cur += 3;
  368. } else {
  369. return 1;
  370. }
  371. *str = cur;
  372. return 0;
  373. }
  374. /**
  375. * rfc3986_parse_host:
  376. * @uri: pointer to an URI structure
  377. * @str: the string to analyze
  378. *
  379. * Parse an host part and fills in the appropriate fields
  380. * of the @uri structure
  381. *
  382. * host = IP-literal / IPv4address / reg-name
  383. * IP-literal = "[" ( IPv6address / IPvFuture ) "]"
  384. * IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
  385. * reg-name = *( unreserved / pct-encoded / sub-delims )
  386. *
  387. * Returns 0 or the error code
  388. */
  389. static int rfc3986_parse_host(URI *uri, const char **str)
  390. {
  391. const char *cur = *str;
  392. const char *host;
  393. host = cur;
  394. /*
  395. * IPv6 and future addressing scheme are enclosed between brackets
  396. */
  397. if (*cur == '[') {
  398. cur++;
  399. while ((*cur != ']') && (*cur != 0)) {
  400. cur++;
  401. }
  402. if (*cur != ']') {
  403. return 1;
  404. }
  405. cur++;
  406. goto found;
  407. }
  408. /*
  409. * try to parse an IPv4
  410. */
  411. if (ISA_DIGIT(cur)) {
  412. if (rfc3986_parse_dec_octet(&cur) != 0) {
  413. goto not_ipv4;
  414. }
  415. if (*cur != '.') {
  416. goto not_ipv4;
  417. }
  418. cur++;
  419. if (rfc3986_parse_dec_octet(&cur) != 0) {
  420. goto not_ipv4;
  421. }
  422. if (*cur != '.') {
  423. goto not_ipv4;
  424. }
  425. if (rfc3986_parse_dec_octet(&cur) != 0) {
  426. goto not_ipv4;
  427. }
  428. if (*cur != '.') {
  429. goto not_ipv4;
  430. }
  431. if (rfc3986_parse_dec_octet(&cur) != 0) {
  432. goto not_ipv4;
  433. }
  434. goto found;
  435. not_ipv4:
  436. cur = *str;
  437. }
  438. /*
  439. * then this should be a hostname which can be empty
  440. */
  441. while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur)) {
  442. NEXT(cur);
  443. }
  444. found:
  445. if (uri != NULL) {
  446. g_free(uri->authority);
  447. uri->authority = NULL;
  448. g_free(uri->server);
  449. if (cur != host) {
  450. if (uri->cleanup & 2) {
  451. uri->server = g_strndup(host, cur - host);
  452. } else {
  453. uri->server = uri_string_unescape(host, cur - host, NULL);
  454. }
  455. } else {
  456. uri->server = NULL;
  457. }
  458. }
  459. *str = cur;
  460. return 0;
  461. }
  462. /**
  463. * rfc3986_parse_authority:
  464. * @uri: pointer to an URI structure
  465. * @str: the string to analyze
  466. *
  467. * Parse an authority part and fills in the appropriate fields
  468. * of the @uri structure
  469. *
  470. * authority = [ userinfo "@" ] host [ ":" port ]
  471. *
  472. * Returns 0 or the error code
  473. */
  474. static int rfc3986_parse_authority(URI *uri, const char **str)
  475. {
  476. const char *cur;
  477. int ret;
  478. cur = *str;
  479. /*
  480. * try to parse a userinfo and check for the trailing @
  481. */
  482. ret = rfc3986_parse_user_info(uri, &cur);
  483. if ((ret != 0) || (*cur != '@')) {
  484. cur = *str;
  485. } else {
  486. cur++;
  487. }
  488. ret = rfc3986_parse_host(uri, &cur);
  489. if (ret != 0) {
  490. return ret;
  491. }
  492. if (*cur == ':') {
  493. cur++;
  494. ret = rfc3986_parse_port(uri, &cur);
  495. if (ret != 0) {
  496. return ret;
  497. }
  498. }
  499. *str = cur;
  500. return 0;
  501. }
  502. /**
  503. * rfc3986_parse_segment:
  504. * @str: the string to analyze
  505. * @forbid: an optional forbidden character
  506. * @empty: allow an empty segment
  507. *
  508. * Parse a segment and fills in the appropriate fields
  509. * of the @uri structure
  510. *
  511. * segment = *pchar
  512. * segment-nz = 1*pchar
  513. * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
  514. * ; non-zero-length segment without any colon ":"
  515. *
  516. * Returns 0 or the error code
  517. */
  518. static int rfc3986_parse_segment(const char **str, char forbid, int empty)
  519. {
  520. const char *cur;
  521. cur = *str;
  522. if (!ISA_PCHAR(cur)) {
  523. if (empty) {
  524. return 0;
  525. }
  526. return 1;
  527. }
  528. while (ISA_PCHAR(cur) && (*cur != forbid)) {
  529. NEXT(cur);
  530. }
  531. *str = cur;
  532. return 0;
  533. }
  534. /**
  535. * rfc3986_parse_path_ab_empty:
  536. * @uri: pointer to an URI structure
  537. * @str: the string to analyze
  538. *
  539. * Parse an path absolute or empty and fills in the appropriate fields
  540. * of the @uri structure
  541. *
  542. * path-abempty = *( "/" segment )
  543. *
  544. * Returns 0 or the error code
  545. */
  546. static int rfc3986_parse_path_ab_empty(URI *uri, const char **str)
  547. {
  548. const char *cur;
  549. int ret;
  550. cur = *str;
  551. while (*cur == '/') {
  552. cur++;
  553. ret = rfc3986_parse_segment(&cur, 0, 1);
  554. if (ret != 0) {
  555. return ret;
  556. }
  557. }
  558. if (uri != NULL) {
  559. g_free(uri->path);
  560. if (*str != cur) {
  561. if (uri->cleanup & 2) {
  562. uri->path = g_strndup(*str, cur - *str);
  563. } else {
  564. uri->path = uri_string_unescape(*str, cur - *str, NULL);
  565. }
  566. } else {
  567. uri->path = NULL;
  568. }
  569. }
  570. *str = cur;
  571. return 0;
  572. }
  573. /**
  574. * rfc3986_parse_path_absolute:
  575. * @uri: pointer to an URI structure
  576. * @str: the string to analyze
  577. *
  578. * Parse an path absolute and fills in the appropriate fields
  579. * of the @uri structure
  580. *
  581. * path-absolute = "/" [ segment-nz *( "/" segment ) ]
  582. *
  583. * Returns 0 or the error code
  584. */
  585. static int rfc3986_parse_path_absolute(URI *uri, const char **str)
  586. {
  587. const char *cur;
  588. int ret;
  589. cur = *str;
  590. if (*cur != '/') {
  591. return 1;
  592. }
  593. cur++;
  594. ret = rfc3986_parse_segment(&cur, 0, 0);
  595. if (ret == 0) {
  596. while (*cur == '/') {
  597. cur++;
  598. ret = rfc3986_parse_segment(&cur, 0, 1);
  599. if (ret != 0) {
  600. return ret;
  601. }
  602. }
  603. }
  604. if (uri != NULL) {
  605. g_free(uri->path);
  606. if (cur != *str) {
  607. if (uri->cleanup & 2) {
  608. uri->path = g_strndup(*str, cur - *str);
  609. } else {
  610. uri->path = uri_string_unescape(*str, cur - *str, NULL);
  611. }
  612. } else {
  613. uri->path = NULL;
  614. }
  615. }
  616. *str = cur;
  617. return 0;
  618. }
  619. /**
  620. * rfc3986_parse_path_rootless:
  621. * @uri: pointer to an URI structure
  622. * @str: the string to analyze
  623. *
  624. * Parse an path without root and fills in the appropriate fields
  625. * of the @uri structure
  626. *
  627. * path-rootless = segment-nz *( "/" segment )
  628. *
  629. * Returns 0 or the error code
  630. */
  631. static int rfc3986_parse_path_rootless(URI *uri, const char **str)
  632. {
  633. const char *cur;
  634. int ret;
  635. cur = *str;
  636. ret = rfc3986_parse_segment(&cur, 0, 0);
  637. if (ret != 0) {
  638. return ret;
  639. }
  640. while (*cur == '/') {
  641. cur++;
  642. ret = rfc3986_parse_segment(&cur, 0, 1);
  643. if (ret != 0) {
  644. return ret;
  645. }
  646. }
  647. if (uri != NULL) {
  648. g_free(uri->path);
  649. if (cur != *str) {
  650. if (uri->cleanup & 2) {
  651. uri->path = g_strndup(*str, cur - *str);
  652. } else {
  653. uri->path = uri_string_unescape(*str, cur - *str, NULL);
  654. }
  655. } else {
  656. uri->path = NULL;
  657. }
  658. }
  659. *str = cur;
  660. return 0;
  661. }
  662. /**
  663. * rfc3986_parse_path_no_scheme:
  664. * @uri: pointer to an URI structure
  665. * @str: the string to analyze
  666. *
  667. * Parse an path which is not a scheme and fills in the appropriate fields
  668. * of the @uri structure
  669. *
  670. * path-noscheme = segment-nz-nc *( "/" segment )
  671. *
  672. * Returns 0 or the error code
  673. */
  674. static int rfc3986_parse_path_no_scheme(URI *uri, const char **str)
  675. {
  676. const char *cur;
  677. int ret;
  678. cur = *str;
  679. ret = rfc3986_parse_segment(&cur, ':', 0);
  680. if (ret != 0) {
  681. return ret;
  682. }
  683. while (*cur == '/') {
  684. cur++;
  685. ret = rfc3986_parse_segment(&cur, 0, 1);
  686. if (ret != 0) {
  687. return ret;
  688. }
  689. }
  690. if (uri != NULL) {
  691. g_free(uri->path);
  692. if (cur != *str) {
  693. if (uri->cleanup & 2) {
  694. uri->path = g_strndup(*str, cur - *str);
  695. } else {
  696. uri->path = uri_string_unescape(*str, cur - *str, NULL);
  697. }
  698. } else {
  699. uri->path = NULL;
  700. }
  701. }
  702. *str = cur;
  703. return 0;
  704. }
  705. /**
  706. * rfc3986_parse_hier_part:
  707. * @uri: pointer to an URI structure
  708. * @str: the string to analyze
  709. *
  710. * Parse an hierarchical part and fills in the appropriate fields
  711. * of the @uri structure
  712. *
  713. * hier-part = "//" authority path-abempty
  714. * / path-absolute
  715. * / path-rootless
  716. * / path-empty
  717. *
  718. * Returns 0 or the error code
  719. */
  720. static int rfc3986_parse_hier_part(URI *uri, const char **str)
  721. {
  722. const char *cur;
  723. int ret;
  724. cur = *str;
  725. if ((*cur == '/') && (*(cur + 1) == '/')) {
  726. cur += 2;
  727. ret = rfc3986_parse_authority(uri, &cur);
  728. if (ret != 0) {
  729. return ret;
  730. }
  731. ret = rfc3986_parse_path_ab_empty(uri, &cur);
  732. if (ret != 0) {
  733. return ret;
  734. }
  735. *str = cur;
  736. return 0;
  737. } else if (*cur == '/') {
  738. ret = rfc3986_parse_path_absolute(uri, &cur);
  739. if (ret != 0) {
  740. return ret;
  741. }
  742. } else if (ISA_PCHAR(cur)) {
  743. ret = rfc3986_parse_path_rootless(uri, &cur);
  744. if (ret != 0) {
  745. return ret;
  746. }
  747. } else {
  748. /* path-empty is effectively empty */
  749. if (uri != NULL) {
  750. g_free(uri->path);
  751. uri->path = NULL;
  752. }
  753. }
  754. *str = cur;
  755. return 0;
  756. }
  757. /**
  758. * rfc3986_parse_relative_ref:
  759. * @uri: pointer to an URI structure
  760. * @str: the string to analyze
  761. *
  762. * Parse an URI string and fills in the appropriate fields
  763. * of the @uri structure
  764. *
  765. * relative-ref = relative-part [ "?" query ] [ "#" fragment ]
  766. * relative-part = "//" authority path-abempty
  767. * / path-absolute
  768. * / path-noscheme
  769. * / path-empty
  770. *
  771. * Returns 0 or the error code
  772. */
  773. static int rfc3986_parse_relative_ref(URI *uri, const char *str)
  774. {
  775. int ret;
  776. if ((*str == '/') && (*(str + 1) == '/')) {
  777. str += 2;
  778. ret = rfc3986_parse_authority(uri, &str);
  779. if (ret != 0) {
  780. return ret;
  781. }
  782. ret = rfc3986_parse_path_ab_empty(uri, &str);
  783. if (ret != 0) {
  784. return ret;
  785. }
  786. } else if (*str == '/') {
  787. ret = rfc3986_parse_path_absolute(uri, &str);
  788. if (ret != 0) {
  789. return ret;
  790. }
  791. } else if (ISA_PCHAR(str)) {
  792. ret = rfc3986_parse_path_no_scheme(uri, &str);
  793. if (ret != 0) {
  794. return ret;
  795. }
  796. } else {
  797. /* path-empty is effectively empty */
  798. if (uri != NULL) {
  799. g_free(uri->path);
  800. uri->path = NULL;
  801. }
  802. }
  803. if (*str == '?') {
  804. str++;
  805. ret = rfc3986_parse_query(uri, &str);
  806. if (ret != 0) {
  807. return ret;
  808. }
  809. }
  810. if (*str == '#') {
  811. str++;
  812. ret = rfc3986_parse_fragment(uri, &str);
  813. if (ret != 0) {
  814. return ret;
  815. }
  816. }
  817. if (*str != 0) {
  818. uri_clean(uri);
  819. return 1;
  820. }
  821. return 0;
  822. }
  823. /**
  824. * rfc3986_parse:
  825. * @uri: pointer to an URI structure
  826. * @str: the string to analyze
  827. *
  828. * Parse an URI string and fills in the appropriate fields
  829. * of the @uri structure
  830. *
  831. * scheme ":" hier-part [ "?" query ] [ "#" fragment ]
  832. *
  833. * Returns 0 or the error code
  834. */
  835. static int rfc3986_parse(URI *uri, const char *str)
  836. {
  837. int ret;
  838. ret = rfc3986_parse_scheme(uri, &str);
  839. if (ret != 0) {
  840. return ret;
  841. }
  842. if (*str != ':') {
  843. return 1;
  844. }
  845. str++;
  846. ret = rfc3986_parse_hier_part(uri, &str);
  847. if (ret != 0) {
  848. return ret;
  849. }
  850. if (*str == '?') {
  851. str++;
  852. ret = rfc3986_parse_query(uri, &str);
  853. if (ret != 0) {
  854. return ret;
  855. }
  856. }
  857. if (*str == '#') {
  858. str++;
  859. ret = rfc3986_parse_fragment(uri, &str);
  860. if (ret != 0) {
  861. return ret;
  862. }
  863. }
  864. if (*str != 0) {
  865. uri_clean(uri);
  866. return 1;
  867. }
  868. return 0;
  869. }
  870. /**
  871. * rfc3986_parse_uri_reference:
  872. * @uri: pointer to an URI structure
  873. * @str: the string to analyze
  874. *
  875. * Parse an URI reference string and fills in the appropriate fields
  876. * of the @uri structure
  877. *
  878. * URI-reference = URI / relative-ref
  879. *
  880. * Returns 0 or the error code
  881. */
  882. static int rfc3986_parse_uri_reference(URI *uri, const char *str)
  883. {
  884. int ret;
  885. if (str == NULL) {
  886. return -1;
  887. }
  888. uri_clean(uri);
  889. /*
  890. * Try first to parse absolute refs, then fallback to relative if
  891. * it fails.
  892. */
  893. ret = rfc3986_parse(uri, str);
  894. if (ret != 0) {
  895. uri_clean(uri);
  896. ret = rfc3986_parse_relative_ref(uri, str);
  897. if (ret != 0) {
  898. uri_clean(uri);
  899. return ret;
  900. }
  901. }
  902. return 0;
  903. }
  904. /**
  905. * uri_parse:
  906. * @str: the URI string to analyze
  907. *
  908. * Parse an URI based on RFC 3986
  909. *
  910. * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
  911. *
  912. * Returns a newly built URI or NULL in case of error
  913. */
  914. URI *uri_parse(const char *str)
  915. {
  916. URI *uri;
  917. int ret;
  918. if (str == NULL) {
  919. return NULL;
  920. }
  921. uri = uri_new();
  922. ret = rfc3986_parse_uri_reference(uri, str);
  923. if (ret) {
  924. uri_free(uri);
  925. return NULL;
  926. }
  927. return uri;
  928. }
  929. /**
  930. * uri_parse_into:
  931. * @uri: pointer to an URI structure
  932. * @str: the string to analyze
  933. *
  934. * Parse an URI reference string based on RFC 3986 and fills in the
  935. * appropriate fields of the @uri structure
  936. *
  937. * URI-reference = URI / relative-ref
  938. *
  939. * Returns 0 or the error code
  940. */
  941. int uri_parse_into(URI *uri, const char *str)
  942. {
  943. return rfc3986_parse_uri_reference(uri, str);
  944. }
  945. /**
  946. * uri_parse_raw:
  947. * @str: the URI string to analyze
  948. * @raw: if 1 unescaping of URI pieces are disabled
  949. *
  950. * Parse an URI but allows to keep intact the original fragments.
  951. *
  952. * URI-reference = URI / relative-ref
  953. *
  954. * Returns a newly built URI or NULL in case of error
  955. */
  956. URI *uri_parse_raw(const char *str, int raw)
  957. {
  958. URI *uri;
  959. int ret;
  960. if (str == NULL) {
  961. return NULL;
  962. }
  963. uri = uri_new();
  964. if (raw) {
  965. uri->cleanup |= 2;
  966. }
  967. ret = uri_parse_into(uri, str);
  968. if (ret) {
  969. uri_free(uri);
  970. return NULL;
  971. }
  972. return uri;
  973. }
  974. /************************************************************************
  975. * *
  976. * Generic URI structure functions *
  977. * *
  978. ************************************************************************/
  979. /**
  980. * uri_new:
  981. *
  982. * Simply creates an empty URI
  983. *
  984. * Returns the new structure or NULL in case of error
  985. */
  986. URI *uri_new(void)
  987. {
  988. return g_new0(URI, 1);
  989. }
  990. /**
  991. * realloc2n:
  992. *
  993. * Function to handle properly a reallocation when saving an URI
  994. * Also imposes some limit on the length of an URI string output
  995. */
  996. static char *realloc2n(char *ret, int *max)
  997. {
  998. char *temp;
  999. int tmp;
  1000. tmp = *max * 2;
  1001. temp = g_realloc(ret, (tmp + 1));
  1002. *max = tmp;
  1003. return temp;
  1004. }
  1005. /**
  1006. * uri_to_string:
  1007. * @uri: pointer to an URI
  1008. *
  1009. * Save the URI as an escaped string
  1010. *
  1011. * Returns a new string (to be deallocated by caller)
  1012. */
  1013. char *uri_to_string(URI *uri)
  1014. {
  1015. char *ret = NULL;
  1016. char *temp;
  1017. const char *p;
  1018. int len;
  1019. int max;
  1020. if (uri == NULL) {
  1021. return NULL;
  1022. }
  1023. max = 80;
  1024. ret = g_malloc(max + 1);
  1025. len = 0;
  1026. if (uri->scheme != NULL) {
  1027. p = uri->scheme;
  1028. while (*p != 0) {
  1029. if (len >= max) {
  1030. temp = realloc2n(ret, &max);
  1031. ret = temp;
  1032. }
  1033. ret[len++] = *p++;
  1034. }
  1035. if (len >= max) {
  1036. temp = realloc2n(ret, &max);
  1037. ret = temp;
  1038. }
  1039. ret[len++] = ':';
  1040. }
  1041. if (uri->opaque != NULL) {
  1042. p = uri->opaque;
  1043. while (*p != 0) {
  1044. if (len + 3 >= max) {
  1045. temp = realloc2n(ret, &max);
  1046. ret = temp;
  1047. }
  1048. if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p))) {
  1049. ret[len++] = *p++;
  1050. } else {
  1051. int val = *(unsigned char *)p++;
  1052. int hi = val / 0x10, lo = val % 0x10;
  1053. ret[len++] = '%';
  1054. ret[len++] = hi + (hi > 9 ? 'A' - 10 : '0');
  1055. ret[len++] = lo + (lo > 9 ? 'A' - 10 : '0');
  1056. }
  1057. }
  1058. } else {
  1059. if (uri->server != NULL) {
  1060. if (len + 3 >= max) {
  1061. temp = realloc2n(ret, &max);
  1062. ret = temp;
  1063. }
  1064. ret[len++] = '/';
  1065. ret[len++] = '/';
  1066. if (uri->user != NULL) {
  1067. p = uri->user;
  1068. while (*p != 0) {
  1069. if (len + 3 >= max) {
  1070. temp = realloc2n(ret, &max);
  1071. ret = temp;
  1072. }
  1073. if ((IS_UNRESERVED(*(p))) || ((*(p) == ';')) ||
  1074. ((*(p) == ':')) || ((*(p) == '&')) || ((*(p) == '=')) ||
  1075. ((*(p) == '+')) || ((*(p) == '$')) || ((*(p) == ','))) {
  1076. ret[len++] = *p++;
  1077. } else {
  1078. int val = *(unsigned char *)p++;
  1079. int hi = val / 0x10, lo = val % 0x10;
  1080. ret[len++] = '%';
  1081. ret[len++] = hi + (hi > 9 ? 'A' - 10 : '0');
  1082. ret[len++] = lo + (lo > 9 ? 'A' - 10 : '0');
  1083. }
  1084. }
  1085. if (len + 3 >= max) {
  1086. temp = realloc2n(ret, &max);
  1087. ret = temp;
  1088. }
  1089. ret[len++] = '@';
  1090. }
  1091. p = uri->server;
  1092. while (*p != 0) {
  1093. if (len >= max) {
  1094. temp = realloc2n(ret, &max);
  1095. ret = temp;
  1096. }
  1097. ret[len++] = *p++;
  1098. }
  1099. if (uri->port > 0) {
  1100. if (len + 10 >= max) {
  1101. temp = realloc2n(ret, &max);
  1102. ret = temp;
  1103. }
  1104. len += snprintf(&ret[len], max - len, ":%d", uri->port);
  1105. }
  1106. } else if (uri->authority != NULL) {
  1107. if (len + 3 >= max) {
  1108. temp = realloc2n(ret, &max);
  1109. ret = temp;
  1110. }
  1111. ret[len++] = '/';
  1112. ret[len++] = '/';
  1113. p = uri->authority;
  1114. while (*p != 0) {
  1115. if (len + 3 >= max) {
  1116. temp = realloc2n(ret, &max);
  1117. ret = temp;
  1118. }
  1119. if ((IS_UNRESERVED(*(p))) || ((*(p) == '$')) ||
  1120. ((*(p) == ',')) || ((*(p) == ';')) || ((*(p) == ':')) ||
  1121. ((*(p) == '@')) || ((*(p) == '&')) || ((*(p) == '=')) ||
  1122. ((*(p) == '+'))) {
  1123. ret[len++] = *p++;
  1124. } else {
  1125. int val = *(unsigned char *)p++;
  1126. int hi = val / 0x10, lo = val % 0x10;
  1127. ret[len++] = '%';
  1128. ret[len++] = hi + (hi > 9 ? 'A' - 10 : '0');
  1129. ret[len++] = lo + (lo > 9 ? 'A' - 10 : '0');
  1130. }
  1131. }
  1132. } else if (uri->scheme != NULL) {
  1133. if (len + 3 >= max) {
  1134. temp = realloc2n(ret, &max);
  1135. ret = temp;
  1136. }
  1137. ret[len++] = '/';
  1138. ret[len++] = '/';
  1139. }
  1140. if (uri->path != NULL) {
  1141. p = uri->path;
  1142. /*
  1143. * the colon in file:///d: should not be escaped or
  1144. * Windows accesses fail later.
  1145. */
  1146. if ((uri->scheme != NULL) && (p[0] == '/') &&
  1147. (((p[1] >= 'a') && (p[1] <= 'z')) ||
  1148. ((p[1] >= 'A') && (p[1] <= 'Z'))) &&
  1149. (p[2] == ':') && (!strcmp(uri->scheme, "file"))) {
  1150. if (len + 3 >= max) {
  1151. temp = realloc2n(ret, &max);
  1152. ret = temp;
  1153. }
  1154. ret[len++] = *p++;
  1155. ret[len++] = *p++;
  1156. ret[len++] = *p++;
  1157. }
  1158. while (*p != 0) {
  1159. if (len + 3 >= max) {
  1160. temp = realloc2n(ret, &max);
  1161. ret = temp;
  1162. }
  1163. if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
  1164. ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
  1165. ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
  1166. ((*(p) == ','))) {
  1167. ret[len++] = *p++;
  1168. } else {
  1169. int val = *(unsigned char *)p++;
  1170. int hi = val / 0x10, lo = val % 0x10;
  1171. ret[len++] = '%';
  1172. ret[len++] = hi + (hi > 9 ? 'A' - 10 : '0');
  1173. ret[len++] = lo + (lo > 9 ? 'A' - 10 : '0');
  1174. }
  1175. }
  1176. }
  1177. if (uri->query != NULL) {
  1178. if (len + 1 >= max) {
  1179. temp = realloc2n(ret, &max);
  1180. ret = temp;
  1181. }
  1182. ret[len++] = '?';
  1183. p = uri->query;
  1184. while (*p != 0) {
  1185. if (len + 1 >= max) {
  1186. temp = realloc2n(ret, &max);
  1187. ret = temp;
  1188. }
  1189. ret[len++] = *p++;
  1190. }
  1191. }
  1192. }
  1193. if (uri->fragment != NULL) {
  1194. if (len + 3 >= max) {
  1195. temp = realloc2n(ret, &max);
  1196. ret = temp;
  1197. }
  1198. ret[len++] = '#';
  1199. p = uri->fragment;
  1200. while (*p != 0) {
  1201. if (len + 3 >= max) {
  1202. temp = realloc2n(ret, &max);
  1203. ret = temp;
  1204. }
  1205. if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p)))) {
  1206. ret[len++] = *p++;
  1207. } else {
  1208. int val = *(unsigned char *)p++;
  1209. int hi = val / 0x10, lo = val % 0x10;
  1210. ret[len++] = '%';
  1211. ret[len++] = hi + (hi > 9 ? 'A' - 10 : '0');
  1212. ret[len++] = lo + (lo > 9 ? 'A' - 10 : '0');
  1213. }
  1214. }
  1215. }
  1216. if (len >= max) {
  1217. temp = realloc2n(ret, &max);
  1218. ret = temp;
  1219. }
  1220. ret[len] = 0;
  1221. return ret;
  1222. }
  1223. /**
  1224. * uri_clean:
  1225. * @uri: pointer to an URI
  1226. *
  1227. * Make sure the URI struct is free of content
  1228. */
  1229. static void uri_clean(URI *uri)
  1230. {
  1231. if (uri == NULL) {
  1232. return;
  1233. }
  1234. g_free(uri->scheme);
  1235. uri->scheme = NULL;
  1236. g_free(uri->server);
  1237. uri->server = NULL;
  1238. g_free(uri->user);
  1239. uri->user = NULL;
  1240. g_free(uri->path);
  1241. uri->path = NULL;
  1242. g_free(uri->fragment);
  1243. uri->fragment = NULL;
  1244. g_free(uri->opaque);
  1245. uri->opaque = NULL;
  1246. g_free(uri->authority);
  1247. uri->authority = NULL;
  1248. g_free(uri->query);
  1249. uri->query = NULL;
  1250. }
  1251. /**
  1252. * uri_free:
  1253. * @uri: pointer to an URI, NULL is ignored
  1254. *
  1255. * Free up the URI struct
  1256. */
  1257. void uri_free(URI *uri)
  1258. {
  1259. uri_clean(uri);
  1260. g_free(uri);
  1261. }
  1262. /************************************************************************
  1263. * *
  1264. * Helper functions *
  1265. * *
  1266. ************************************************************************/
  1267. /**
  1268. * normalize_uri_path:
  1269. * @path: pointer to the path string
  1270. *
  1271. * Applies the 5 normalization steps to a path string--that is, RFC 2396
  1272. * Section 5.2, steps 6.c through 6.g.
  1273. *
  1274. * Normalization occurs directly on the string, no new allocation is done
  1275. *
  1276. * Returns 0 or an error code
  1277. */
  1278. static int normalize_uri_path(char *path)
  1279. {
  1280. char *cur, *out;
  1281. if (path == NULL) {
  1282. return -1;
  1283. }
  1284. /* Skip all initial "/" chars. We want to get to the beginning of the
  1285. * first non-empty segment.
  1286. */
  1287. cur = path;
  1288. while (cur[0] == '/') {
  1289. ++cur;
  1290. }
  1291. if (cur[0] == '\0') {
  1292. return 0;
  1293. }
  1294. /* Keep everything we've seen so far. */
  1295. out = cur;
  1296. /*
  1297. * Analyze each segment in sequence for cases (c) and (d).
  1298. */
  1299. while (cur[0] != '\0') {
  1300. /*
  1301. * c) All occurrences of "./", where "." is a complete path segment,
  1302. * are removed from the buffer string.
  1303. */
  1304. if ((cur[0] == '.') && (cur[1] == '/')) {
  1305. cur += 2;
  1306. /* '//' normalization should be done at this point too */
  1307. while (cur[0] == '/') {
  1308. cur++;
  1309. }
  1310. continue;
  1311. }
  1312. /*
  1313. * d) If the buffer string ends with "." as a complete path segment,
  1314. * that "." is removed.
  1315. */
  1316. if ((cur[0] == '.') && (cur[1] == '\0')) {
  1317. break;
  1318. }
  1319. /* Otherwise keep the segment. */
  1320. while (cur[0] != '/') {
  1321. if (cur[0] == '\0') {
  1322. goto done_cd;
  1323. }
  1324. (out++)[0] = (cur++)[0];
  1325. }
  1326. /* nomalize // */
  1327. while ((cur[0] == '/') && (cur[1] == '/')) {
  1328. cur++;
  1329. }
  1330. (out++)[0] = (cur++)[0];
  1331. }
  1332. done_cd:
  1333. out[0] = '\0';
  1334. /* Reset to the beginning of the first segment for the next sequence. */
  1335. cur = path;
  1336. while (cur[0] == '/') {
  1337. ++cur;
  1338. }
  1339. if (cur[0] == '\0') {
  1340. return 0;
  1341. }
  1342. /*
  1343. * Analyze each segment in sequence for cases (e) and (f).
  1344. *
  1345. * e) All occurrences of "<segment>/../", where <segment> is a
  1346. * complete path segment not equal to "..", are removed from the
  1347. * buffer string. Removal of these path segments is performed
  1348. * iteratively, removing the leftmost matching pattern on each
  1349. * iteration, until no matching pattern remains.
  1350. *
  1351. * f) If the buffer string ends with "<segment>/..", where <segment>
  1352. * is a complete path segment not equal to "..", that
  1353. * "<segment>/.." is removed.
  1354. *
  1355. * To satisfy the "iterative" clause in (e), we need to collapse the
  1356. * string every time we find something that needs to be removed. Thus,
  1357. * we don't need to keep two pointers into the string: we only need a
  1358. * "current position" pointer.
  1359. */
  1360. while (1) {
  1361. char *segp, *tmp;
  1362. /* At the beginning of each iteration of this loop, "cur" points to
  1363. * the first character of the segment we want to examine.
  1364. */
  1365. /* Find the end of the current segment. */
  1366. segp = cur;
  1367. while ((segp[0] != '/') && (segp[0] != '\0')) {
  1368. ++segp;
  1369. }
  1370. /* If this is the last segment, we're done (we need at least two
  1371. * segments to meet the criteria for the (e) and (f) cases).
  1372. */
  1373. if (segp[0] == '\0') {
  1374. break;
  1375. }
  1376. /* If the first segment is "..", or if the next segment _isn't_ "..",
  1377. * keep this segment and try the next one.
  1378. */
  1379. ++segp;
  1380. if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur + 3)) ||
  1381. ((segp[0] != '.') || (segp[1] != '.') ||
  1382. ((segp[2] != '/') && (segp[2] != '\0')))) {
  1383. cur = segp;
  1384. continue;
  1385. }
  1386. /* If we get here, remove this segment and the next one and back up
  1387. * to the previous segment (if there is one), to implement the
  1388. * "iteratively" clause. It's pretty much impossible to back up
  1389. * while maintaining two pointers into the buffer, so just compact
  1390. * the whole buffer now.
  1391. */
  1392. /* If this is the end of the buffer, we're done. */
  1393. if (segp[2] == '\0') {
  1394. cur[0] = '\0';
  1395. break;
  1396. }
  1397. /* Valgrind complained, strcpy(cur, segp + 3); */
  1398. /* string will overlap, do not use strcpy */
  1399. tmp = cur;
  1400. segp += 3;
  1401. while ((*tmp++ = *segp++) != 0) {
  1402. /* No further work */
  1403. }
  1404. /* If there are no previous segments, then keep going from here. */
  1405. segp = cur;
  1406. while ((segp > path) && ((--segp)[0] == '/')) {
  1407. /* No further work */
  1408. }
  1409. if (segp == path) {
  1410. continue;
  1411. }
  1412. /* "segp" is pointing to the end of a previous segment; find it's
  1413. * start. We need to back up to the previous segment and start
  1414. * over with that to handle things like "foo/bar/../..". If we
  1415. * don't do this, then on the first pass we'll remove the "bar/..",
  1416. * but be pointing at the second ".." so we won't realize we can also
  1417. * remove the "foo/..".
  1418. */
  1419. cur = segp;
  1420. while ((cur > path) && (cur[-1] != '/')) {
  1421. --cur;
  1422. }
  1423. }
  1424. out[0] = '\0';
  1425. /*
  1426. * g) If the resulting buffer string still begins with one or more
  1427. * complete path segments of "..", then the reference is
  1428. * considered to be in error. Implementations may handle this
  1429. * error by retaining these components in the resolved path (i.e.,
  1430. * treating them as part of the final URI), by removing them from
  1431. * the resolved path (i.e., discarding relative levels above the
  1432. * root), or by avoiding traversal of the reference.
  1433. *
  1434. * We discard them from the final path.
  1435. */
  1436. if (path[0] == '/') {
  1437. cur = path;
  1438. while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.') &&
  1439. ((cur[3] == '/') || (cur[3] == '\0'))) {
  1440. cur += 3;
  1441. }
  1442. if (cur != path) {
  1443. out = path;
  1444. while (cur[0] != '\0') {
  1445. (out++)[0] = (cur++)[0];
  1446. }
  1447. out[0] = 0;
  1448. }
  1449. }
  1450. return 0;
  1451. }
  1452. static int is_hex(char c)
  1453. {
  1454. if (((c >= '0') && (c <= '9')) || ((c >= 'a') && (c <= 'f')) ||
  1455. ((c >= 'A') && (c <= 'F'))) {
  1456. return 1;
  1457. }
  1458. return 0;
  1459. }
  1460. /**
  1461. * uri_string_unescape:
  1462. * @str: the string to unescape
  1463. * @len: the length in bytes to unescape (or <= 0 to indicate full string)
  1464. * @target: optional destination buffer
  1465. *
  1466. * Unescaping routine, but does not check that the string is an URI. The
  1467. * output is a direct unsigned char translation of %XX values (no encoding)
  1468. * Note that the length of the result can only be smaller or same size as
  1469. * the input string.
  1470. *
  1471. * Returns a copy of the string, but unescaped, will return NULL only in case
  1472. * of error
  1473. */
  1474. char *uri_string_unescape(const char *str, int len, char *target)
  1475. {
  1476. char *ret, *out;
  1477. const char *in;
  1478. if (str == NULL) {
  1479. return NULL;
  1480. }
  1481. if (len <= 0) {
  1482. len = strlen(str);
  1483. }
  1484. if (len < 0) {
  1485. return NULL;
  1486. }
  1487. if (target == NULL) {
  1488. ret = g_malloc(len + 1);
  1489. } else {
  1490. ret = target;
  1491. }
  1492. in = str;
  1493. out = ret;
  1494. while (len > 0) {
  1495. if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
  1496. in++;
  1497. if ((*in >= '0') && (*in <= '9')) {
  1498. *out = (*in - '0');
  1499. } else if ((*in >= 'a') && (*in <= 'f')) {
  1500. *out = (*in - 'a') + 10;
  1501. } else if ((*in >= 'A') && (*in <= 'F')) {
  1502. *out = (*in - 'A') + 10;
  1503. }
  1504. in++;
  1505. if ((*in >= '0') && (*in <= '9')) {
  1506. *out = *out * 16 + (*in - '0');
  1507. } else if ((*in >= 'a') && (*in <= 'f')) {
  1508. *out = *out * 16 + (*in - 'a') + 10;
  1509. } else if ((*in >= 'A') && (*in <= 'F')) {
  1510. *out = *out * 16 + (*in - 'A') + 10;
  1511. }
  1512. in++;
  1513. len -= 3;
  1514. out++;
  1515. } else {
  1516. *out++ = *in++;
  1517. len--;
  1518. }
  1519. }
  1520. *out = 0;
  1521. return ret;
  1522. }
  1523. /**
  1524. * uri_string_escape:
  1525. * @str: string to escape
  1526. * @list: exception list string of chars not to escape
  1527. *
  1528. * This routine escapes a string to hex, ignoring reserved characters (a-z)
  1529. * and the characters in the exception list.
  1530. *
  1531. * Returns a new escaped string or NULL in case of error.
  1532. */
  1533. char *uri_string_escape(const char *str, const char *list)
  1534. {
  1535. char *ret, ch;
  1536. char *temp;
  1537. const char *in;
  1538. int len, out;
  1539. if (str == NULL) {
  1540. return NULL;
  1541. }
  1542. if (str[0] == 0) {
  1543. return g_strdup(str);
  1544. }
  1545. len = strlen(str);
  1546. if (!(len > 0)) {
  1547. return NULL;
  1548. }
  1549. len += 20;
  1550. ret = g_malloc(len);
  1551. in = str;
  1552. out = 0;
  1553. while (*in != 0) {
  1554. if (len - out <= 3) {
  1555. temp = realloc2n(ret, &len);
  1556. ret = temp;
  1557. }
  1558. ch = *in;
  1559. if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!strchr(list, ch))) {
  1560. unsigned char val;
  1561. ret[out++] = '%';
  1562. val = ch >> 4;
  1563. if (val <= 9) {
  1564. ret[out++] = '0' + val;
  1565. } else {
  1566. ret[out++] = 'A' + val - 0xA;
  1567. }
  1568. val = ch & 0xF;
  1569. if (val <= 9) {
  1570. ret[out++] = '0' + val;
  1571. } else {
  1572. ret[out++] = 'A' + val - 0xA;
  1573. }
  1574. in++;
  1575. } else {
  1576. ret[out++] = *in++;
  1577. }
  1578. }
  1579. ret[out] = 0;
  1580. return ret;
  1581. }
  1582. /************************************************************************
  1583. * *
  1584. * Public functions *
  1585. * *
  1586. ************************************************************************/
  1587. /**
  1588. * uri_resolve:
  1589. * @URI: the URI instance found in the document
  1590. * @base: the base value
  1591. *
  1592. * Computes he final URI of the reference done by checking that
  1593. * the given URI is valid, and building the final URI using the
  1594. * base URI. This is processed according to section 5.2 of the
  1595. * RFC 2396
  1596. *
  1597. * 5.2. Resolving Relative References to Absolute Form
  1598. *
  1599. * Returns a new URI string (to be freed by the caller) or NULL in case
  1600. * of error.
  1601. */
  1602. char *uri_resolve(const char *uri, const char *base)
  1603. {
  1604. char *val = NULL;
  1605. int ret, len, indx, cur, out;
  1606. URI *ref = NULL;
  1607. URI *bas = NULL;
  1608. URI *res = NULL;
  1609. /*
  1610. * 1) The URI reference is parsed into the potential four components and
  1611. * fragment identifier, as described in Section 4.3.
  1612. *
  1613. * NOTE that a completely empty URI is treated by modern browsers
  1614. * as a reference to "." rather than as a synonym for the current
  1615. * URI. Should we do that here?
  1616. */
  1617. if (uri == NULL) {
  1618. ret = -1;
  1619. } else {
  1620. if (*uri) {
  1621. ref = uri_new();
  1622. ret = uri_parse_into(ref, uri);
  1623. } else {
  1624. ret = 0;
  1625. }
  1626. }
  1627. if (ret != 0) {
  1628. goto done;
  1629. }
  1630. if ((ref != NULL) && (ref->scheme != NULL)) {
  1631. /*
  1632. * The URI is absolute don't modify.
  1633. */
  1634. val = g_strdup(uri);
  1635. goto done;
  1636. }
  1637. if (base == NULL) {
  1638. ret = -1;
  1639. } else {
  1640. bas = uri_new();
  1641. ret = uri_parse_into(bas, base);
  1642. }
  1643. if (ret != 0) {
  1644. if (ref) {
  1645. val = uri_to_string(ref);
  1646. }
  1647. goto done;
  1648. }
  1649. if (ref == NULL) {
  1650. /*
  1651. * the base fragment must be ignored
  1652. */
  1653. g_free(bas->fragment);
  1654. bas->fragment = NULL;
  1655. val = uri_to_string(bas);
  1656. goto done;
  1657. }
  1658. /*
  1659. * 2) If the path component is empty and the scheme, authority, and
  1660. * query components are undefined, then it is a reference to the
  1661. * current document and we are done. Otherwise, the reference URI's
  1662. * query and fragment components are defined as found (or not found)
  1663. * within the URI reference and not inherited from the base URI.
  1664. *
  1665. * NOTE that in modern browsers, the parsing differs from the above
  1666. * in the following aspect: the query component is allowed to be
  1667. * defined while still treating this as a reference to the current
  1668. * document.
  1669. */
  1670. res = uri_new();
  1671. if ((ref->scheme == NULL) && (ref->path == NULL) &&
  1672. ((ref->authority == NULL) && (ref->server == NULL))) {
  1673. res->scheme = g_strdup(bas->scheme);
  1674. if (bas->authority != NULL) {
  1675. res->authority = g_strdup(bas->authority);
  1676. } else if (bas->server != NULL) {
  1677. res->server = g_strdup(bas->server);
  1678. res->user = g_strdup(bas->user);
  1679. res->port = bas->port;
  1680. }
  1681. res->path = g_strdup(bas->path);
  1682. if (ref->query != NULL) {
  1683. res->query = g_strdup(ref->query);
  1684. } else {
  1685. res->query = g_strdup(bas->query);
  1686. }
  1687. res->fragment = g_strdup(ref->fragment);
  1688. goto step_7;
  1689. }
  1690. /*
  1691. * 3) If the scheme component is defined, indicating that the reference
  1692. * starts with a scheme name, then the reference is interpreted as an
  1693. * absolute URI and we are done. Otherwise, the reference URI's
  1694. * scheme is inherited from the base URI's scheme component.
  1695. */
  1696. if (ref->scheme != NULL) {
  1697. val = uri_to_string(ref);
  1698. goto done;
  1699. }
  1700. res->scheme = g_strdup(bas->scheme);
  1701. res->query = g_strdup(ref->query);
  1702. res->fragment = g_strdup(ref->fragment);
  1703. /*
  1704. * 4) If the authority component is defined, then the reference is a
  1705. * network-path and we skip to step 7. Otherwise, the reference
  1706. * URI's authority is inherited from the base URI's authority
  1707. * component, which will also be undefined if the URI scheme does not
  1708. * use an authority component.
  1709. */
  1710. if ((ref->authority != NULL) || (ref->server != NULL)) {
  1711. if (ref->authority != NULL) {
  1712. res->authority = g_strdup(ref->authority);
  1713. } else {
  1714. res->server = g_strdup(ref->server);
  1715. res->user = g_strdup(ref->user);
  1716. res->port = ref->port;
  1717. }
  1718. res->path = g_strdup(ref->path);
  1719. goto step_7;
  1720. }
  1721. if (bas->authority != NULL) {
  1722. res->authority = g_strdup(bas->authority);
  1723. } else if (bas->server != NULL) {
  1724. res->server = g_strdup(bas->server);
  1725. res->user = g_strdup(bas->user);
  1726. res->port = bas->port;
  1727. }
  1728. /*
  1729. * 5) If the path component begins with a slash character ("/"), then
  1730. * the reference is an absolute-path and we skip to step 7.
  1731. */
  1732. if ((ref->path != NULL) && (ref->path[0] == '/')) {
  1733. res->path = g_strdup(ref->path);
  1734. goto step_7;
  1735. }
  1736. /*
  1737. * 6) If this step is reached, then we are resolving a relative-path
  1738. * reference. The relative path needs to be merged with the base
  1739. * URI's path. Although there are many ways to do this, we will
  1740. * describe a simple method using a separate string buffer.
  1741. *
  1742. * Allocate a buffer large enough for the result string.
  1743. */
  1744. len = 2; /* extra / and 0 */
  1745. if (ref->path != NULL) {
  1746. len += strlen(ref->path);
  1747. }
  1748. if (bas->path != NULL) {
  1749. len += strlen(bas->path);
  1750. }
  1751. res->path = g_malloc(len);
  1752. res->path[0] = 0;
  1753. /*
  1754. * a) All but the last segment of the base URI's path component is
  1755. * copied to the buffer. In other words, any characters after the
  1756. * last (right-most) slash character, if any, are excluded.
  1757. */
  1758. cur = 0;
  1759. out = 0;
  1760. if (bas->path != NULL) {
  1761. while (bas->path[cur] != 0) {
  1762. while ((bas->path[cur] != 0) && (bas->path[cur] != '/')) {
  1763. cur++;
  1764. }
  1765. if (bas->path[cur] == 0) {
  1766. break;
  1767. }
  1768. cur++;
  1769. while (out < cur) {
  1770. res->path[out] = bas->path[out];
  1771. out++;
  1772. }
  1773. }
  1774. }
  1775. res->path[out] = 0;
  1776. /*
  1777. * b) The reference's path component is appended to the buffer
  1778. * string.
  1779. */
  1780. if (ref->path != NULL && ref->path[0] != 0) {
  1781. indx = 0;
  1782. /*
  1783. * Ensure the path includes a '/'
  1784. */
  1785. if ((out == 0) && (bas->server != NULL)) {
  1786. res->path[out++] = '/';
  1787. }
  1788. while (ref->path[indx] != 0) {
  1789. res->path[out++] = ref->path[indx++];
  1790. }
  1791. }
  1792. res->path[out] = 0;
  1793. /*
  1794. * Steps c) to h) are really path normalization steps
  1795. */
  1796. normalize_uri_path(res->path);
  1797. step_7:
  1798. /*
  1799. * 7) The resulting URI components, including any inherited from the
  1800. * base URI, are recombined to give the absolute form of the URI
  1801. * reference.
  1802. */
  1803. val = uri_to_string(res);
  1804. done:
  1805. uri_free(ref);
  1806. uri_free(bas);
  1807. uri_free(res);
  1808. return val;
  1809. }
  1810. /**
  1811. * uri_resolve_relative:
  1812. * @URI: the URI reference under consideration
  1813. * @base: the base value
  1814. *
  1815. * Expresses the URI of the reference in terms relative to the
  1816. * base. Some examples of this operation include:
  1817. * base = "http://site1.com/docs/book1.html"
  1818. * URI input URI returned
  1819. * docs/pic1.gif pic1.gif
  1820. * docs/img/pic1.gif img/pic1.gif
  1821. * img/pic1.gif ../img/pic1.gif
  1822. * http://site1.com/docs/pic1.gif pic1.gif
  1823. * http://site2.com/docs/pic1.gif http://site2.com/docs/pic1.gif
  1824. *
  1825. * base = "docs/book1.html"
  1826. * URI input URI returned
  1827. * docs/pic1.gif pic1.gif
  1828. * docs/img/pic1.gif img/pic1.gif
  1829. * img/pic1.gif ../img/pic1.gif
  1830. * http://site1.com/docs/pic1.gif http://site1.com/docs/pic1.gif
  1831. *
  1832. *
  1833. * Note: if the URI reference is really weird or complicated, it may be
  1834. * worthwhile to first convert it into a "nice" one by calling
  1835. * uri_resolve (using 'base') before calling this routine,
  1836. * since this routine (for reasonable efficiency) assumes URI has
  1837. * already been through some validation.
  1838. *
  1839. * Returns a new URI string (to be freed by the caller) or NULL in case
  1840. * error.
  1841. */
  1842. char *uri_resolve_relative(const char *uri, const char *base)
  1843. {
  1844. char *val = NULL;
  1845. int ret;
  1846. int ix;
  1847. int pos = 0;
  1848. int nbslash = 0;
  1849. int len;
  1850. URI *ref = NULL;
  1851. URI *bas = NULL;
  1852. char *bptr, *uptr, *vptr;
  1853. int remove_path = 0;
  1854. if ((uri == NULL) || (*uri == 0)) {
  1855. return NULL;
  1856. }
  1857. /*
  1858. * First parse URI into a standard form
  1859. */
  1860. ref = uri_new();
  1861. /* If URI not already in "relative" form */
  1862. if (uri[0] != '.') {
  1863. ret = uri_parse_into(ref, uri);
  1864. if (ret != 0) {
  1865. goto done; /* Error in URI, return NULL */
  1866. }
  1867. } else {
  1868. ref->path = g_strdup(uri);
  1869. }
  1870. /*
  1871. * Next parse base into the same standard form
  1872. */
  1873. if ((base == NULL) || (*base == 0)) {
  1874. val = g_strdup(uri);
  1875. goto done;
  1876. }
  1877. bas = uri_new();
  1878. if (base[0] != '.') {
  1879. ret = uri_parse_into(bas, base);
  1880. if (ret != 0) {
  1881. goto done; /* Error in base, return NULL */
  1882. }
  1883. } else {
  1884. bas->path = g_strdup(base);
  1885. }
  1886. /*
  1887. * If the scheme / server on the URI differs from the base,
  1888. * just return the URI
  1889. */
  1890. if ((ref->scheme != NULL) &&
  1891. ((bas->scheme == NULL) || (strcmp(bas->scheme, ref->scheme)) ||
  1892. (strcmp(bas->server, ref->server)))) {
  1893. val = g_strdup(uri);
  1894. goto done;
  1895. }
  1896. if (bas->path == ref->path ||
  1897. (bas->path && ref->path && !strcmp(bas->path, ref->path))) {
  1898. val = g_strdup("");
  1899. goto done;
  1900. }
  1901. if (bas->path == NULL) {
  1902. val = g_strdup(ref->path);
  1903. goto done;
  1904. }
  1905. if (ref->path == NULL) {
  1906. ref->path = (char *)"/";
  1907. remove_path = 1;
  1908. }
  1909. /*
  1910. * At this point (at last!) we can compare the two paths
  1911. *
  1912. * First we take care of the special case where either of the
  1913. * two path components may be missing (bug 316224)
  1914. */
  1915. if (bas->path == NULL) {
  1916. if (ref->path != NULL) {
  1917. uptr = ref->path;
  1918. if (*uptr == '/') {
  1919. uptr++;
  1920. }
  1921. /* exception characters from uri_to_string */
  1922. val = uri_string_escape(uptr, "/;&=+$,");
  1923. }
  1924. goto done;
  1925. }
  1926. bptr = bas->path;
  1927. if (ref->path == NULL) {
  1928. for (ix = 0; bptr[ix] != 0; ix++) {
  1929. if (bptr[ix] == '/') {
  1930. nbslash++;
  1931. }
  1932. }
  1933. uptr = NULL;
  1934. len = 1; /* this is for a string terminator only */
  1935. } else {
  1936. /*
  1937. * Next we compare the two strings and find where they first differ
  1938. */
  1939. if ((ref->path[pos] == '.') && (ref->path[pos + 1] == '/')) {
  1940. pos += 2;
  1941. }
  1942. if ((*bptr == '.') && (bptr[1] == '/')) {
  1943. bptr += 2;
  1944. } else if ((*bptr == '/') && (ref->path[pos] != '/')) {
  1945. bptr++;
  1946. }
  1947. while ((bptr[pos] == ref->path[pos]) && (bptr[pos] != 0)) {
  1948. pos++;
  1949. }
  1950. if (bptr[pos] == ref->path[pos]) {
  1951. val = g_strdup("");
  1952. goto done; /* (I can't imagine why anyone would do this) */
  1953. }
  1954. /*
  1955. * In URI, "back up" to the last '/' encountered. This will be the
  1956. * beginning of the "unique" suffix of URI
  1957. */
  1958. ix = pos;
  1959. if ((ref->path[ix] == '/') && (ix > 0)) {
  1960. ix--;
  1961. } else if ((ref->path[ix] == 0) && (ix > 1)
  1962. && (ref->path[ix - 1] == '/')) {
  1963. ix -= 2;
  1964. }
  1965. for (; ix > 0; ix--) {
  1966. if (ref->path[ix] == '/') {
  1967. break;
  1968. }
  1969. }
  1970. if (ix == 0) {
  1971. uptr = ref->path;
  1972. } else {
  1973. ix++;
  1974. uptr = &ref->path[ix];
  1975. }
  1976. /*
  1977. * In base, count the number of '/' from the differing point
  1978. */
  1979. if (bptr[pos] != ref->path[pos]) { /* check for trivial URI == base */
  1980. for (; bptr[ix] != 0; ix++) {
  1981. if (bptr[ix] == '/') {
  1982. nbslash++;
  1983. }
  1984. }
  1985. }
  1986. len = strlen(uptr) + 1;
  1987. }
  1988. if (nbslash == 0) {
  1989. if (uptr != NULL) {
  1990. /* exception characters from uri_to_string */
  1991. val = uri_string_escape(uptr, "/;&=+$,");
  1992. }
  1993. goto done;
  1994. }
  1995. /*
  1996. * Allocate just enough space for the returned string -
  1997. * length of the remainder of the URI, plus enough space
  1998. * for the "../" groups, plus one for the terminator
  1999. */
  2000. val = g_malloc(len + 3 * nbslash);
  2001. vptr = val;
  2002. /*
  2003. * Put in as many "../" as needed
  2004. */
  2005. for (; nbslash > 0; nbslash--) {
  2006. *vptr++ = '.';
  2007. *vptr++ = '.';
  2008. *vptr++ = '/';
  2009. }
  2010. /*
  2011. * Finish up with the end of the URI
  2012. */
  2013. if (uptr != NULL) {
  2014. if ((vptr > val) && (len > 0) && (uptr[0] == '/') &&
  2015. (vptr[-1] == '/')) {
  2016. memcpy(vptr, uptr + 1, len - 1);
  2017. vptr[len - 2] = 0;
  2018. } else {
  2019. memcpy(vptr, uptr, len);
  2020. vptr[len - 1] = 0;
  2021. }
  2022. } else {
  2023. vptr[len - 1] = 0;
  2024. }
  2025. /* escape the freshly-built path */
  2026. vptr = val;
  2027. /* exception characters from uri_to_string */
  2028. val = uri_string_escape(vptr, "/;&=+$,");
  2029. g_free(vptr);
  2030. done:
  2031. /*
  2032. * Free the working variables
  2033. */
  2034. if (remove_path != 0) {
  2035. ref->path = NULL;
  2036. }
  2037. uri_free(ref);
  2038. uri_free(bas);
  2039. return val;
  2040. }
  2041. /*
  2042. * Utility functions to help parse and assemble query strings.
  2043. */
  2044. struct QueryParams *query_params_new(int init_alloc)
  2045. {
  2046. struct QueryParams *ps;
  2047. if (init_alloc <= 0) {
  2048. init_alloc = 1;
  2049. }
  2050. ps = g_new(QueryParams, 1);
  2051. ps->n = 0;
  2052. ps->alloc = init_alloc;
  2053. ps->p = g_new(QueryParam, ps->alloc);
  2054. return ps;
  2055. }
  2056. /* Ensure there is space to store at least one more parameter
  2057. * at the end of the set.
  2058. */
  2059. static int query_params_append(struct QueryParams *ps, const char *name,
  2060. const char *value)
  2061. {
  2062. if (ps->n >= ps->alloc) {
  2063. ps->p = g_renew(QueryParam, ps->p, ps->alloc * 2);
  2064. ps->alloc *= 2;
  2065. }
  2066. ps->p[ps->n].name = g_strdup(name);
  2067. ps->p[ps->n].value = g_strdup(value);
  2068. ps->p[ps->n].ignore = 0;
  2069. ps->n++;
  2070. return 0;
  2071. }
  2072. void query_params_free(struct QueryParams *ps)
  2073. {
  2074. int i;
  2075. for (i = 0; i < ps->n; ++i) {
  2076. g_free(ps->p[i].name);
  2077. g_free(ps->p[i].value);
  2078. }
  2079. g_free(ps->p);
  2080. g_free(ps);
  2081. }
  2082. struct QueryParams *query_params_parse(const char *query)
  2083. {
  2084. struct QueryParams *ps;
  2085. const char *end, *eq;
  2086. ps = query_params_new(0);
  2087. if (!query || query[0] == '\0') {
  2088. return ps;
  2089. }
  2090. while (*query) {
  2091. char *name = NULL, *value = NULL;
  2092. /* Find the next separator, or end of the string. */
  2093. end = strchr(query, '&');
  2094. if (!end) {
  2095. end = qemu_strchrnul(query, ';');
  2096. }
  2097. /* Find the first '=' character between here and end. */
  2098. eq = strchr(query, '=');
  2099. if (eq && eq >= end) {
  2100. eq = NULL;
  2101. }
  2102. /* Empty section (eg. "&&"). */
  2103. if (end == query) {
  2104. goto next;
  2105. }
  2106. /* If there is no '=' character, then we have just "name"
  2107. * and consistent with CGI.pm we assume value is "".
  2108. */
  2109. else if (!eq) {
  2110. name = uri_string_unescape(query, end - query, NULL);
  2111. value = NULL;
  2112. }
  2113. /* Or if we have "name=" here (works around annoying
  2114. * problem when calling uri_string_unescape with len = 0).
  2115. */
  2116. else if (eq + 1 == end) {
  2117. name = uri_string_unescape(query, eq - query, NULL);
  2118. value = g_new0(char, 1);
  2119. }
  2120. /* If the '=' character is at the beginning then we have
  2121. * "=value" and consistent with CGI.pm we _ignore_ this.
  2122. */
  2123. else if (query == eq) {
  2124. goto next;
  2125. }
  2126. /* Otherwise it's "name=value". */
  2127. else {
  2128. name = uri_string_unescape(query, eq - query, NULL);
  2129. value = uri_string_unescape(eq + 1, end - (eq + 1), NULL);
  2130. }
  2131. /* Append to the parameter set. */
  2132. query_params_append(ps, name, value);
  2133. g_free(name);
  2134. g_free(value);
  2135. next:
  2136. query = end;
  2137. if (*query) {
  2138. query++; /* skip '&' separator */
  2139. }
  2140. }
  2141. return ps;
  2142. }